]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - block/genhd.c
block: implement CONFIG_DEBUG_BLOCK_EXT_DEVT
[mirror_ubuntu-bionic-kernel.git] / block / genhd.c
1 /*
2 * gendisk handling
3 */
4
5 #include <linux/module.h>
6 #include <linux/fs.h>
7 #include <linux/genhd.h>
8 #include <linux/kdev_t.h>
9 #include <linux/kernel.h>
10 #include <linux/blkdev.h>
11 #include <linux/init.h>
12 #include <linux/spinlock.h>
13 #include <linux/seq_file.h>
14 #include <linux/slab.h>
15 #include <linux/kmod.h>
16 #include <linux/kobj_map.h>
17 #include <linux/buffer_head.h>
18 #include <linux/mutex.h>
19 #include <linux/idr.h>
20
21 #include "blk.h"
22
23 static DEFINE_MUTEX(block_class_lock);
24 #ifndef CONFIG_SYSFS_DEPRECATED
25 struct kobject *block_depr;
26 #endif
27
28 /* for extended dynamic devt allocation, currently only one major is used */
29 #define MAX_EXT_DEVT (1 << MINORBITS)
30
31 /* For extended devt allocation. ext_devt_mutex prevents look up
32 * results from going away underneath its user.
33 */
34 static DEFINE_MUTEX(ext_devt_mutex);
35 static DEFINE_IDR(ext_devt_idr);
36
37 static struct device_type disk_type;
38
39 /**
40 * disk_get_part - get partition
41 * @disk: disk to look partition from
42 * @partno: partition number
43 *
44 * Look for partition @partno from @disk. If found, increment
45 * reference count and return it.
46 *
47 * CONTEXT:
48 * Don't care.
49 *
50 * RETURNS:
51 * Pointer to the found partition on success, NULL if not found.
52 */
53 struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
54 {
55 struct hd_struct *part;
56
57 if (unlikely(partno < 1 || partno > disk_max_parts(disk)))
58 return NULL;
59 rcu_read_lock();
60 part = rcu_dereference(disk->__part[partno - 1]);
61 if (part)
62 get_device(&part->dev);
63 rcu_read_unlock();
64
65 return part;
66 }
67 EXPORT_SYMBOL_GPL(disk_get_part);
68
69 /**
70 * disk_part_iter_init - initialize partition iterator
71 * @piter: iterator to initialize
72 * @disk: disk to iterate over
73 * @flags: DISK_PITER_* flags
74 *
75 * Initialize @piter so that it iterates over partitions of @disk.
76 *
77 * CONTEXT:
78 * Don't care.
79 */
80 void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
81 unsigned int flags)
82 {
83 piter->disk = disk;
84 piter->part = NULL;
85
86 if (flags & DISK_PITER_REVERSE)
87 piter->idx = disk_max_parts(piter->disk) - 1;
88 else
89 piter->idx = 0;
90
91 piter->flags = flags;
92 }
93 EXPORT_SYMBOL_GPL(disk_part_iter_init);
94
95 /**
96 * disk_part_iter_next - proceed iterator to the next partition and return it
97 * @piter: iterator of interest
98 *
99 * Proceed @piter to the next partition and return it.
100 *
101 * CONTEXT:
102 * Don't care.
103 */
104 struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
105 {
106 int inc, end;
107
108 /* put the last partition */
109 disk_put_part(piter->part);
110 piter->part = NULL;
111
112 rcu_read_lock();
113
114 /* determine iteration parameters */
115 if (piter->flags & DISK_PITER_REVERSE) {
116 inc = -1;
117 end = -1;
118 } else {
119 inc = 1;
120 end = disk_max_parts(piter->disk);
121 }
122
123 /* iterate to the next partition */
124 for (; piter->idx != end; piter->idx += inc) {
125 struct hd_struct *part;
126
127 part = rcu_dereference(piter->disk->__part[piter->idx]);
128 if (!part)
129 continue;
130 if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects)
131 continue;
132
133 get_device(&part->dev);
134 piter->part = part;
135 piter->idx += inc;
136 break;
137 }
138
139 rcu_read_unlock();
140
141 return piter->part;
142 }
143 EXPORT_SYMBOL_GPL(disk_part_iter_next);
144
145 /**
146 * disk_part_iter_exit - finish up partition iteration
147 * @piter: iter of interest
148 *
149 * Called when iteration is over. Cleans up @piter.
150 *
151 * CONTEXT:
152 * Don't care.
153 */
154 void disk_part_iter_exit(struct disk_part_iter *piter)
155 {
156 disk_put_part(piter->part);
157 piter->part = NULL;
158 }
159 EXPORT_SYMBOL_GPL(disk_part_iter_exit);
160
161 /**
162 * disk_map_sector_rcu - map sector to partition
163 * @disk: gendisk of interest
164 * @sector: sector to map
165 *
166 * Find out which partition @sector maps to on @disk. This is
167 * primarily used for stats accounting.
168 *
169 * CONTEXT:
170 * RCU read locked. The returned partition pointer is valid only
171 * while preemption is disabled.
172 *
173 * RETURNS:
174 * Found partition on success, NULL if there's no matching partition.
175 */
176 struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
177 {
178 int i;
179
180 for (i = 0; i < disk_max_parts(disk); i++) {
181 struct hd_struct *part = rcu_dereference(disk->__part[i]);
182
183 if (part && part->start_sect <= sector &&
184 sector < part->start_sect + part->nr_sects)
185 return part;
186 }
187 return NULL;
188 }
189 EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
190
191 /*
192 * Can be deleted altogether. Later.
193 *
194 */
195 static struct blk_major_name {
196 struct blk_major_name *next;
197 int major;
198 char name[16];
199 } *major_names[BLKDEV_MAJOR_HASH_SIZE];
200
201 /* index in the above - for now: assume no multimajor ranges */
202 static inline int major_to_index(int major)
203 {
204 return major % BLKDEV_MAJOR_HASH_SIZE;
205 }
206
207 #ifdef CONFIG_PROC_FS
208 void blkdev_show(struct seq_file *seqf, off_t offset)
209 {
210 struct blk_major_name *dp;
211
212 if (offset < BLKDEV_MAJOR_HASH_SIZE) {
213 mutex_lock(&block_class_lock);
214 for (dp = major_names[offset]; dp; dp = dp->next)
215 seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
216 mutex_unlock(&block_class_lock);
217 }
218 }
219 #endif /* CONFIG_PROC_FS */
220
221 int register_blkdev(unsigned int major, const char *name)
222 {
223 struct blk_major_name **n, *p;
224 int index, ret = 0;
225
226 mutex_lock(&block_class_lock);
227
228 /* temporary */
229 if (major == 0) {
230 for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
231 if (major_names[index] == NULL)
232 break;
233 }
234
235 if (index == 0) {
236 printk("register_blkdev: failed to get major for %s\n",
237 name);
238 ret = -EBUSY;
239 goto out;
240 }
241 major = index;
242 ret = major;
243 }
244
245 p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
246 if (p == NULL) {
247 ret = -ENOMEM;
248 goto out;
249 }
250
251 p->major = major;
252 strlcpy(p->name, name, sizeof(p->name));
253 p->next = NULL;
254 index = major_to_index(major);
255
256 for (n = &major_names[index]; *n; n = &(*n)->next) {
257 if ((*n)->major == major)
258 break;
259 }
260 if (!*n)
261 *n = p;
262 else
263 ret = -EBUSY;
264
265 if (ret < 0) {
266 printk("register_blkdev: cannot get major %d for %s\n",
267 major, name);
268 kfree(p);
269 }
270 out:
271 mutex_unlock(&block_class_lock);
272 return ret;
273 }
274
275 EXPORT_SYMBOL(register_blkdev);
276
277 void unregister_blkdev(unsigned int major, const char *name)
278 {
279 struct blk_major_name **n;
280 struct blk_major_name *p = NULL;
281 int index = major_to_index(major);
282
283 mutex_lock(&block_class_lock);
284 for (n = &major_names[index]; *n; n = &(*n)->next)
285 if ((*n)->major == major)
286 break;
287 if (!*n || strcmp((*n)->name, name)) {
288 WARN_ON(1);
289 } else {
290 p = *n;
291 *n = p->next;
292 }
293 mutex_unlock(&block_class_lock);
294 kfree(p);
295 }
296
297 EXPORT_SYMBOL(unregister_blkdev);
298
299 static struct kobj_map *bdev_map;
300
301 /**
302 * blk_mangle_minor - scatter minor numbers apart
303 * @minor: minor number to mangle
304 *
305 * Scatter consecutively allocated @minor number apart if MANGLE_DEVT
306 * is enabled. Mangling twice gives the original value.
307 *
308 * RETURNS:
309 * Mangled value.
310 *
311 * CONTEXT:
312 * Don't care.
313 */
314 static int blk_mangle_minor(int minor)
315 {
316 #ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
317 int i;
318
319 for (i = 0; i < MINORBITS / 2; i++) {
320 int low = minor & (1 << i);
321 int high = minor & (1 << (MINORBITS - 1 - i));
322 int distance = MINORBITS - 1 - 2 * i;
323
324 minor ^= low | high; /* clear both bits */
325 low <<= distance; /* swap the positions */
326 high >>= distance;
327 minor |= low | high; /* and set */
328 }
329 #endif
330 return minor;
331 }
332
333 /**
334 * blk_alloc_devt - allocate a dev_t for a partition
335 * @part: partition to allocate dev_t for
336 * @gfp_mask: memory allocation flag
337 * @devt: out parameter for resulting dev_t
338 *
339 * Allocate a dev_t for block device.
340 *
341 * RETURNS:
342 * 0 on success, allocated dev_t is returned in *@devt. -errno on
343 * failure.
344 *
345 * CONTEXT:
346 * Might sleep.
347 */
348 int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
349 {
350 struct gendisk *disk = part_to_disk(part);
351 int idx, rc;
352
353 /* in consecutive minor range? */
354 if (part->partno < disk->minors) {
355 *devt = MKDEV(disk->major, disk->first_minor + part->partno);
356 return 0;
357 }
358
359 /* allocate ext devt */
360 do {
361 if (!idr_pre_get(&ext_devt_idr, GFP_KERNEL))
362 return -ENOMEM;
363 rc = idr_get_new(&ext_devt_idr, part, &idx);
364 } while (rc == -EAGAIN);
365
366 if (rc)
367 return rc;
368
369 if (idx > MAX_EXT_DEVT) {
370 idr_remove(&ext_devt_idr, idx);
371 return -EBUSY;
372 }
373
374 *devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx));
375 return 0;
376 }
377
378 /**
379 * blk_free_devt - free a dev_t
380 * @devt: dev_t to free
381 *
382 * Free @devt which was allocated using blk_alloc_devt().
383 *
384 * CONTEXT:
385 * Might sleep.
386 */
387 void blk_free_devt(dev_t devt)
388 {
389 might_sleep();
390
391 if (devt == MKDEV(0, 0))
392 return;
393
394 if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
395 mutex_lock(&ext_devt_mutex);
396 idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
397 mutex_unlock(&ext_devt_mutex);
398 }
399 }
400
401 static char *bdevt_str(dev_t devt, char *buf)
402 {
403 if (MAJOR(devt) <= 0xff && MINOR(devt) <= 0xff) {
404 char tbuf[BDEVT_SIZE];
405 snprintf(tbuf, BDEVT_SIZE, "%02x%02x", MAJOR(devt), MINOR(devt));
406 snprintf(buf, BDEVT_SIZE, "%-9s", tbuf);
407 } else
408 snprintf(buf, BDEVT_SIZE, "%03x:%05x", MAJOR(devt), MINOR(devt));
409
410 return buf;
411 }
412
413 /*
414 * Register device numbers dev..(dev+range-1)
415 * range must be nonzero
416 * The hash chain is sorted on range, so that subranges can override.
417 */
418 void blk_register_region(dev_t devt, unsigned long range, struct module *module,
419 struct kobject *(*probe)(dev_t, int *, void *),
420 int (*lock)(dev_t, void *), void *data)
421 {
422 kobj_map(bdev_map, devt, range, module, probe, lock, data);
423 }
424
425 EXPORT_SYMBOL(blk_register_region);
426
427 void blk_unregister_region(dev_t devt, unsigned long range)
428 {
429 kobj_unmap(bdev_map, devt, range);
430 }
431
432 EXPORT_SYMBOL(blk_unregister_region);
433
434 static struct kobject *exact_match(dev_t devt, int *partno, void *data)
435 {
436 struct gendisk *p = data;
437
438 return &p->dev.kobj;
439 }
440
441 static int exact_lock(dev_t devt, void *data)
442 {
443 struct gendisk *p = data;
444
445 if (!get_disk(p))
446 return -1;
447 return 0;
448 }
449
450 /**
451 * add_disk - add partitioning information to kernel list
452 * @disk: per-device partitioning information
453 *
454 * This function registers the partitioning information in @disk
455 * with the kernel.
456 */
457 void add_disk(struct gendisk *disk)
458 {
459 struct backing_dev_info *bdi;
460 int retval;
461
462 disk->flags |= GENHD_FL_UP;
463 disk->dev.devt = MKDEV(disk->major, disk->first_minor);
464 blk_register_region(disk_devt(disk), disk->minors, NULL,
465 exact_match, exact_lock, disk);
466 register_disk(disk);
467 blk_register_queue(disk);
468
469 bdi = &disk->queue->backing_dev_info;
470 bdi_register_dev(bdi, disk_devt(disk));
471 retval = sysfs_create_link(&disk->dev.kobj, &bdi->dev->kobj, "bdi");
472 WARN_ON(retval);
473 }
474
475 EXPORT_SYMBOL(add_disk);
476 EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
477
478 void unlink_gendisk(struct gendisk *disk)
479 {
480 sysfs_remove_link(&disk->dev.kobj, "bdi");
481 bdi_unregister(&disk->queue->backing_dev_info);
482 blk_unregister_queue(disk);
483 blk_unregister_region(disk_devt(disk), disk->minors);
484 }
485
486 /**
487 * get_gendisk - get partitioning information for a given device
488 * @devt: device to get partitioning information for
489 * @part: returned partition index
490 *
491 * This function gets the structure containing partitioning
492 * information for the given device @devt.
493 */
494 struct gendisk *get_gendisk(dev_t devt, int *partno)
495 {
496 struct gendisk *disk = NULL;
497
498 if (MAJOR(devt) != BLOCK_EXT_MAJOR) {
499 struct kobject *kobj;
500
501 kobj = kobj_lookup(bdev_map, devt, partno);
502 if (kobj)
503 disk = dev_to_disk(kobj_to_dev(kobj));
504 } else {
505 struct hd_struct *part;
506
507 mutex_lock(&ext_devt_mutex);
508 part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
509 if (part && get_disk(part_to_disk(part))) {
510 *partno = part->partno;
511 disk = part_to_disk(part);
512 }
513 mutex_unlock(&ext_devt_mutex);
514 }
515
516 return disk;
517 }
518
519 /**
520 * bdget_disk - do bdget() by gendisk and partition number
521 * @disk: gendisk of interest
522 * @partno: partition number
523 *
524 * Find partition @partno from @disk, do bdget() on it.
525 *
526 * CONTEXT:
527 * Don't care.
528 *
529 * RETURNS:
530 * Resulting block_device on success, NULL on failure.
531 */
532 extern struct block_device *bdget_disk(struct gendisk *disk, int partno)
533 {
534 dev_t devt = MKDEV(0, 0);
535
536 if (partno == 0)
537 devt = disk_devt(disk);
538 else {
539 struct hd_struct *part;
540
541 part = disk_get_part(disk, partno);
542 if (part && part->nr_sects)
543 devt = part_devt(part);
544 disk_put_part(part);
545 }
546
547 if (likely(devt != MKDEV(0, 0)))
548 return bdget(devt);
549 return NULL;
550 }
551 EXPORT_SYMBOL(bdget_disk);
552
553 /*
554 * print a full list of all partitions - intended for places where the root
555 * filesystem can't be mounted and thus to give the victim some idea of what
556 * went wrong
557 */
558 void __init printk_all_partitions(void)
559 {
560 struct class_dev_iter iter;
561 struct device *dev;
562
563 class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
564 while ((dev = class_dev_iter_next(&iter))) {
565 struct gendisk *disk = dev_to_disk(dev);
566 struct disk_part_iter piter;
567 struct hd_struct *part;
568 char name_buf[BDEVNAME_SIZE];
569 char devt_buf[BDEVT_SIZE];
570
571 /*
572 * Don't show empty devices or things that have been
573 * surpressed
574 */
575 if (get_capacity(disk) == 0 ||
576 (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
577 continue;
578
579 /*
580 * Note, unlike /proc/partitions, I am showing the
581 * numbers in hex - the same format as the root=
582 * option takes.
583 */
584 printk("%s %10llu %s",
585 bdevt_str(disk_devt(disk), devt_buf),
586 (unsigned long long)get_capacity(disk) >> 1,
587 disk_name(disk, 0, name_buf));
588 if (disk->driverfs_dev != NULL &&
589 disk->driverfs_dev->driver != NULL)
590 printk(" driver: %s\n",
591 disk->driverfs_dev->driver->name);
592 else
593 printk(" (driver?)\n");
594
595 /* now show the partitions */
596 disk_part_iter_init(&piter, disk, 0);
597 while ((part = disk_part_iter_next(&piter)))
598 printk(" %s %10llu %s\n",
599 bdevt_str(part_devt(part), devt_buf),
600 (unsigned long long)part->nr_sects >> 1,
601 disk_name(disk, part->partno, name_buf));
602 disk_part_iter_exit(&piter);
603 }
604 class_dev_iter_exit(&iter);
605 }
606
607 #ifdef CONFIG_PROC_FS
608 /* iterator */
609 static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
610 {
611 loff_t skip = *pos;
612 struct class_dev_iter *iter;
613 struct device *dev;
614
615 iter = kmalloc(GFP_KERNEL, sizeof(*iter));
616 if (!iter)
617 return ERR_PTR(-ENOMEM);
618
619 seqf->private = iter;
620 class_dev_iter_init(iter, &block_class, NULL, &disk_type);
621 do {
622 dev = class_dev_iter_next(iter);
623 if (!dev)
624 return NULL;
625 } while (skip--);
626
627 return dev_to_disk(dev);
628 }
629
630 static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
631 {
632 struct device *dev;
633
634 (*pos)++;
635 dev = class_dev_iter_next(seqf->private);
636 if (dev)
637 return dev_to_disk(dev);
638
639 return NULL;
640 }
641
642 static void disk_seqf_stop(struct seq_file *seqf, void *v)
643 {
644 struct class_dev_iter *iter = seqf->private;
645
646 /* stop is called even after start failed :-( */
647 if (iter) {
648 class_dev_iter_exit(iter);
649 kfree(iter);
650 }
651 }
652
653 static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
654 {
655 static void *p;
656
657 p = disk_seqf_start(seqf, pos);
658 if (!IS_ERR(p) && p)
659 seq_puts(seqf, "major minor #blocks name\n\n");
660 return p;
661 }
662
663 static int show_partition(struct seq_file *seqf, void *v)
664 {
665 struct gendisk *sgp = v;
666 struct disk_part_iter piter;
667 struct hd_struct *part;
668 char buf[BDEVNAME_SIZE];
669
670 /* Don't show non-partitionable removeable devices or empty devices */
671 if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
672 (sgp->flags & GENHD_FL_REMOVABLE)))
673 return 0;
674 if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
675 return 0;
676
677 /* show the full disk and all non-0 size partitions of it */
678 seq_printf(seqf, "%4d %7d %10llu %s\n",
679 MAJOR(disk_devt(sgp)), MINOR(disk_devt(sgp)),
680 (unsigned long long)get_capacity(sgp) >> 1,
681 disk_name(sgp, 0, buf));
682
683 disk_part_iter_init(&piter, sgp, 0);
684 while ((part = disk_part_iter_next(&piter)))
685 seq_printf(seqf, "%4d %7d %10llu %s\n",
686 MAJOR(part_devt(part)), MINOR(part_devt(part)),
687 (unsigned long long)part->nr_sects >> 1,
688 disk_name(sgp, part->partno, buf));
689 disk_part_iter_exit(&piter);
690
691 return 0;
692 }
693
694 const struct seq_operations partitions_op = {
695 .start = show_partition_start,
696 .next = disk_seqf_next,
697 .stop = disk_seqf_stop,
698 .show = show_partition
699 };
700 #endif
701
702
703 static struct kobject *base_probe(dev_t devt, int *partno, void *data)
704 {
705 if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
706 /* Make old-style 2.4 aliases work */
707 request_module("block-major-%d", MAJOR(devt));
708 return NULL;
709 }
710
711 static int __init genhd_device_init(void)
712 {
713 int error;
714
715 block_class.dev_kobj = sysfs_dev_block_kobj;
716 error = class_register(&block_class);
717 if (unlikely(error))
718 return error;
719 bdev_map = kobj_map_init(base_probe, &block_class_lock);
720 blk_dev_init();
721
722 #ifndef CONFIG_SYSFS_DEPRECATED
723 /* create top-level block dir */
724 block_depr = kobject_create_and_add("block", NULL);
725 #endif
726 return 0;
727 }
728
729 subsys_initcall(genhd_device_init);
730
731 static ssize_t disk_range_show(struct device *dev,
732 struct device_attribute *attr, char *buf)
733 {
734 struct gendisk *disk = dev_to_disk(dev);
735
736 return sprintf(buf, "%d\n", disk->minors);
737 }
738
739 static ssize_t disk_ext_range_show(struct device *dev,
740 struct device_attribute *attr, char *buf)
741 {
742 struct gendisk *disk = dev_to_disk(dev);
743
744 return sprintf(buf, "%d\n", disk_max_parts(disk) + 1);
745 }
746
747 static ssize_t disk_removable_show(struct device *dev,
748 struct device_attribute *attr, char *buf)
749 {
750 struct gendisk *disk = dev_to_disk(dev);
751
752 return sprintf(buf, "%d\n",
753 (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
754 }
755
756 static ssize_t disk_ro_show(struct device *dev,
757 struct device_attribute *attr, char *buf)
758 {
759 struct gendisk *disk = dev_to_disk(dev);
760
761 return sprintf(buf, "%d\n", disk->policy ? 1 : 0);
762 }
763
764 static ssize_t disk_size_show(struct device *dev,
765 struct device_attribute *attr, char *buf)
766 {
767 struct gendisk *disk = dev_to_disk(dev);
768
769 return sprintf(buf, "%llu\n", (unsigned long long)get_capacity(disk));
770 }
771
772 static ssize_t disk_capability_show(struct device *dev,
773 struct device_attribute *attr, char *buf)
774 {
775 struct gendisk *disk = dev_to_disk(dev);
776
777 return sprintf(buf, "%x\n", disk->flags);
778 }
779
780 static ssize_t disk_stat_show(struct device *dev,
781 struct device_attribute *attr, char *buf)
782 {
783 struct gendisk *disk = dev_to_disk(dev);
784 int cpu;
785
786 cpu = disk_stat_lock();
787 disk_round_stats(cpu, disk);
788 disk_stat_unlock();
789 return sprintf(buf,
790 "%8lu %8lu %8llu %8u "
791 "%8lu %8lu %8llu %8u "
792 "%8u %8u %8u"
793 "\n",
794 disk_stat_read(disk, ios[READ]),
795 disk_stat_read(disk, merges[READ]),
796 (unsigned long long)disk_stat_read(disk, sectors[READ]),
797 jiffies_to_msecs(disk_stat_read(disk, ticks[READ])),
798 disk_stat_read(disk, ios[WRITE]),
799 disk_stat_read(disk, merges[WRITE]),
800 (unsigned long long)disk_stat_read(disk, sectors[WRITE]),
801 jiffies_to_msecs(disk_stat_read(disk, ticks[WRITE])),
802 disk->in_flight,
803 jiffies_to_msecs(disk_stat_read(disk, io_ticks)),
804 jiffies_to_msecs(disk_stat_read(disk, time_in_queue)));
805 }
806
807 #ifdef CONFIG_FAIL_MAKE_REQUEST
808 static ssize_t disk_fail_show(struct device *dev,
809 struct device_attribute *attr, char *buf)
810 {
811 struct gendisk *disk = dev_to_disk(dev);
812
813 return sprintf(buf, "%d\n", disk->flags & GENHD_FL_FAIL ? 1 : 0);
814 }
815
816 static ssize_t disk_fail_store(struct device *dev,
817 struct device_attribute *attr,
818 const char *buf, size_t count)
819 {
820 struct gendisk *disk = dev_to_disk(dev);
821 int i;
822
823 if (count > 0 && sscanf(buf, "%d", &i) > 0) {
824 if (i == 0)
825 disk->flags &= ~GENHD_FL_FAIL;
826 else
827 disk->flags |= GENHD_FL_FAIL;
828 }
829
830 return count;
831 }
832
833 #endif
834
835 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
836 static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
837 static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
838 static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
839 static DEVICE_ATTR(size, S_IRUGO, disk_size_show, NULL);
840 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
841 static DEVICE_ATTR(stat, S_IRUGO, disk_stat_show, NULL);
842 #ifdef CONFIG_FAIL_MAKE_REQUEST
843 static struct device_attribute dev_attr_fail =
844 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, disk_fail_show, disk_fail_store);
845 #endif
846
847 static struct attribute *disk_attrs[] = {
848 &dev_attr_range.attr,
849 &dev_attr_ext_range.attr,
850 &dev_attr_removable.attr,
851 &dev_attr_ro.attr,
852 &dev_attr_size.attr,
853 &dev_attr_capability.attr,
854 &dev_attr_stat.attr,
855 #ifdef CONFIG_FAIL_MAKE_REQUEST
856 &dev_attr_fail.attr,
857 #endif
858 NULL
859 };
860
861 static struct attribute_group disk_attr_group = {
862 .attrs = disk_attrs,
863 };
864
865 static struct attribute_group *disk_attr_groups[] = {
866 &disk_attr_group,
867 NULL
868 };
869
870 static void disk_release(struct device *dev)
871 {
872 struct gendisk *disk = dev_to_disk(dev);
873
874 kfree(disk->random);
875 kfree(disk->__part);
876 free_disk_stats(disk);
877 kfree(disk);
878 }
879 struct class block_class = {
880 .name = "block",
881 };
882
883 static struct device_type disk_type = {
884 .name = "disk",
885 .groups = disk_attr_groups,
886 .release = disk_release,
887 };
888
889 #ifdef CONFIG_PROC_FS
890 /*
891 * aggregate disk stat collector. Uses the same stats that the sysfs
892 * entries do, above, but makes them available through one seq_file.
893 *
894 * The output looks suspiciously like /proc/partitions with a bunch of
895 * extra fields.
896 */
897 static int diskstats_show(struct seq_file *seqf, void *v)
898 {
899 struct gendisk *gp = v;
900 struct disk_part_iter piter;
901 struct hd_struct *hd;
902 char buf[BDEVNAME_SIZE];
903 int cpu;
904
905 /*
906 if (&gp->dev.kobj.entry == block_class.devices.next)
907 seq_puts(seqf, "major minor name"
908 " rio rmerge rsect ruse wio wmerge "
909 "wsect wuse running use aveq"
910 "\n\n");
911 */
912
913 cpu = disk_stat_lock();
914 disk_round_stats(cpu, gp);
915 disk_stat_unlock();
916 seq_printf(seqf, "%4d %7d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n",
917 MAJOR(disk_devt(gp)), MINOR(disk_devt(gp)),
918 disk_name(gp, 0, buf),
919 disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]),
920 (unsigned long long)disk_stat_read(gp, sectors[0]),
921 jiffies_to_msecs(disk_stat_read(gp, ticks[0])),
922 disk_stat_read(gp, ios[1]), disk_stat_read(gp, merges[1]),
923 (unsigned long long)disk_stat_read(gp, sectors[1]),
924 jiffies_to_msecs(disk_stat_read(gp, ticks[1])),
925 gp->in_flight,
926 jiffies_to_msecs(disk_stat_read(gp, io_ticks)),
927 jiffies_to_msecs(disk_stat_read(gp, time_in_queue)));
928
929 /* now show all non-0 size partitions of it */
930 disk_part_iter_init(&piter, gp, 0);
931 while ((hd = disk_part_iter_next(&piter))) {
932 cpu = disk_stat_lock();
933 part_round_stats(cpu, hd);
934 disk_stat_unlock();
935 seq_printf(seqf, "%4d %7d %s %lu %lu %llu "
936 "%u %lu %lu %llu %u %u %u %u\n",
937 MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
938 disk_name(gp, hd->partno, buf),
939 part_stat_read(hd, ios[0]),
940 part_stat_read(hd, merges[0]),
941 (unsigned long long)part_stat_read(hd, sectors[0]),
942 jiffies_to_msecs(part_stat_read(hd, ticks[0])),
943 part_stat_read(hd, ios[1]),
944 part_stat_read(hd, merges[1]),
945 (unsigned long long)part_stat_read(hd, sectors[1]),
946 jiffies_to_msecs(part_stat_read(hd, ticks[1])),
947 hd->in_flight,
948 jiffies_to_msecs(part_stat_read(hd, io_ticks)),
949 jiffies_to_msecs(part_stat_read(hd, time_in_queue))
950 );
951 }
952 disk_part_iter_exit(&piter);
953
954 return 0;
955 }
956
957 const struct seq_operations diskstats_op = {
958 .start = disk_seqf_start,
959 .next = disk_seqf_next,
960 .stop = disk_seqf_stop,
961 .show = diskstats_show
962 };
963 #endif /* CONFIG_PROC_FS */
964
965 static void media_change_notify_thread(struct work_struct *work)
966 {
967 struct gendisk *gd = container_of(work, struct gendisk, async_notify);
968 char event[] = "MEDIA_CHANGE=1";
969 char *envp[] = { event, NULL };
970
971 /*
972 * set enviroment vars to indicate which event this is for
973 * so that user space will know to go check the media status.
974 */
975 kobject_uevent_env(&gd->dev.kobj, KOBJ_CHANGE, envp);
976 put_device(gd->driverfs_dev);
977 }
978
979 #if 0
980 void genhd_media_change_notify(struct gendisk *disk)
981 {
982 get_device(disk->driverfs_dev);
983 schedule_work(&disk->async_notify);
984 }
985 EXPORT_SYMBOL_GPL(genhd_media_change_notify);
986 #endif /* 0 */
987
988 dev_t blk_lookup_devt(const char *name, int partno)
989 {
990 dev_t devt = MKDEV(0, 0);
991 struct class_dev_iter iter;
992 struct device *dev;
993
994 class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
995 while ((dev = class_dev_iter_next(&iter))) {
996 struct gendisk *disk = dev_to_disk(dev);
997
998 if (strcmp(dev->bus_id, name))
999 continue;
1000 if (partno < 0 || partno > disk_max_parts(disk))
1001 continue;
1002
1003 if (partno == 0)
1004 devt = disk_devt(disk);
1005 else {
1006 struct hd_struct *part;
1007
1008 part = disk_get_part(disk, partno);
1009 if (!part || !part->nr_sects) {
1010 disk_put_part(part);
1011 continue;
1012 }
1013
1014 devt = part_devt(part);
1015 disk_put_part(part);
1016 }
1017 break;
1018 }
1019 class_dev_iter_exit(&iter);
1020 return devt;
1021 }
1022 EXPORT_SYMBOL(blk_lookup_devt);
1023
1024 struct gendisk *alloc_disk(int minors)
1025 {
1026 return alloc_disk_node(minors, -1);
1027 }
1028
1029 struct gendisk *alloc_disk_node(int minors, int node_id)
1030 {
1031 return alloc_disk_ext_node(minors, 0, node_id);
1032 }
1033
1034 struct gendisk *alloc_disk_ext(int minors, int ext_minors)
1035 {
1036 return alloc_disk_ext_node(minors, ext_minors, -1);
1037 }
1038
1039 struct gendisk *alloc_disk_ext_node(int minors, int ext_minors, int node_id)
1040 {
1041 struct gendisk *disk;
1042
1043 disk = kmalloc_node(sizeof(struct gendisk),
1044 GFP_KERNEL | __GFP_ZERO, node_id);
1045 if (disk) {
1046 int tot_minors = minors + ext_minors;
1047
1048 if (!init_disk_stats(disk)) {
1049 kfree(disk);
1050 return NULL;
1051 }
1052 if (tot_minors > 1) {
1053 int size = (tot_minors - 1) * sizeof(struct hd_struct *);
1054 disk->__part = kmalloc_node(size,
1055 GFP_KERNEL | __GFP_ZERO, node_id);
1056 if (!disk->__part) {
1057 free_disk_stats(disk);
1058 kfree(disk);
1059 return NULL;
1060 }
1061 }
1062 disk->minors = minors;
1063 disk->ext_minors = ext_minors;
1064 rand_initialize_disk(disk);
1065 disk->dev.class = &block_class;
1066 disk->dev.type = &disk_type;
1067 device_initialize(&disk->dev);
1068 INIT_WORK(&disk->async_notify,
1069 media_change_notify_thread);
1070 }
1071 return disk;
1072 }
1073
1074 EXPORT_SYMBOL(alloc_disk);
1075 EXPORT_SYMBOL(alloc_disk_node);
1076 EXPORT_SYMBOL(alloc_disk_ext);
1077 EXPORT_SYMBOL(alloc_disk_ext_node);
1078
1079 struct kobject *get_disk(struct gendisk *disk)
1080 {
1081 struct module *owner;
1082 struct kobject *kobj;
1083
1084 if (!disk->fops)
1085 return NULL;
1086 owner = disk->fops->owner;
1087 if (owner && !try_module_get(owner))
1088 return NULL;
1089 kobj = kobject_get(&disk->dev.kobj);
1090 if (kobj == NULL) {
1091 module_put(owner);
1092 return NULL;
1093 }
1094 return kobj;
1095
1096 }
1097
1098 EXPORT_SYMBOL(get_disk);
1099
1100 void put_disk(struct gendisk *disk)
1101 {
1102 if (disk)
1103 kobject_put(&disk->dev.kobj);
1104 }
1105
1106 EXPORT_SYMBOL(put_disk);
1107
1108 void set_device_ro(struct block_device *bdev, int flag)
1109 {
1110 if (bdev->bd_contains != bdev)
1111 bdev->bd_part->policy = flag;
1112 else
1113 bdev->bd_disk->policy = flag;
1114 }
1115
1116 EXPORT_SYMBOL(set_device_ro);
1117
1118 void set_disk_ro(struct gendisk *disk, int flag)
1119 {
1120 struct disk_part_iter piter;
1121 struct hd_struct *part;
1122
1123 disk->policy = flag;
1124 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
1125 while ((part = disk_part_iter_next(&piter)))
1126 part->policy = flag;
1127 disk_part_iter_exit(&piter);
1128 }
1129
1130 EXPORT_SYMBOL(set_disk_ro);
1131
1132 int bdev_read_only(struct block_device *bdev)
1133 {
1134 if (!bdev)
1135 return 0;
1136 else if (bdev->bd_contains != bdev)
1137 return bdev->bd_part->policy;
1138 else
1139 return bdev->bd_disk->policy;
1140 }
1141
1142 EXPORT_SYMBOL(bdev_read_only);
1143
1144 int invalidate_partition(struct gendisk *disk, int partno)
1145 {
1146 int res = 0;
1147 struct block_device *bdev = bdget_disk(disk, partno);
1148 if (bdev) {
1149 fsync_bdev(bdev);
1150 res = __invalidate_device(bdev);
1151 bdput(bdev);
1152 }
1153 return res;
1154 }
1155
1156 EXPORT_SYMBOL(invalidate_partition);