]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blame - block/genhd.c
block: fix disk->part[] dereferencing race
[mirror_ubuntu-bionic-kernel.git] / block / genhd.c
CommitLineData
1da177e4
LT
1/*
2 * gendisk handling
3 */
4
1da177e4
LT
5#include <linux/module.h>
6#include <linux/fs.h>
7#include <linux/genhd.h>
b446b60e 8#include <linux/kdev_t.h>
1da177e4
LT
9#include <linux/kernel.h>
10#include <linux/blkdev.h>
11#include <linux/init.h>
12#include <linux/spinlock.h>
13#include <linux/seq_file.h>
14#include <linux/slab.h>
15#include <linux/kmod.h>
16#include <linux/kobj_map.h>
2ef41634 17#include <linux/buffer_head.h>
58383af6 18#include <linux/mutex.h>
1da177e4 19
ff88972c
AB
20#include "blk.h"
21
edfaa7c3
KS
22static DEFINE_MUTEX(block_class_lock);
23#ifndef CONFIG_SYSFS_DEPRECATED
24struct kobject *block_depr;
25#endif
1da177e4 26
1826eadf
AB
27static struct device_type disk_type;
28
e71bf0d0
TH
29/**
30 * disk_get_part - get partition
31 * @disk: disk to look partition from
32 * @partno: partition number
33 *
34 * Look for partition @partno from @disk. If found, increment
35 * reference count and return it.
36 *
37 * CONTEXT:
38 * Don't care.
39 *
40 * RETURNS:
41 * Pointer to the found partition on success, NULL if not found.
42 */
43struct hd_struct *disk_get_part(struct gendisk *disk, int partno)
44{
45 struct hd_struct *part;
46
47 if (unlikely(partno < 1 || partno > disk_max_parts(disk)))
48 return NULL;
49 rcu_read_lock();
50 part = rcu_dereference(disk->__part[partno - 1]);
51 if (part)
52 get_device(&part->dev);
53 rcu_read_unlock();
54
55 return part;
56}
57EXPORT_SYMBOL_GPL(disk_get_part);
58
59/**
60 * disk_part_iter_init - initialize partition iterator
61 * @piter: iterator to initialize
62 * @disk: disk to iterate over
63 * @flags: DISK_PITER_* flags
64 *
65 * Initialize @piter so that it iterates over partitions of @disk.
66 *
67 * CONTEXT:
68 * Don't care.
69 */
70void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk,
71 unsigned int flags)
72{
73 piter->disk = disk;
74 piter->part = NULL;
75
76 if (flags & DISK_PITER_REVERSE)
77 piter->idx = disk_max_parts(piter->disk) - 1;
78 else
79 piter->idx = 0;
80
81 piter->flags = flags;
82}
83EXPORT_SYMBOL_GPL(disk_part_iter_init);
84
85/**
86 * disk_part_iter_next - proceed iterator to the next partition and return it
87 * @piter: iterator of interest
88 *
89 * Proceed @piter to the next partition and return it.
90 *
91 * CONTEXT:
92 * Don't care.
93 */
94struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
95{
96 int inc, end;
97
98 /* put the last partition */
99 disk_put_part(piter->part);
100 piter->part = NULL;
101
102 rcu_read_lock();
103
104 /* determine iteration parameters */
105 if (piter->flags & DISK_PITER_REVERSE) {
106 inc = -1;
107 end = -1;
108 } else {
109 inc = 1;
110 end = disk_max_parts(piter->disk);
111 }
112
113 /* iterate to the next partition */
114 for (; piter->idx != end; piter->idx += inc) {
115 struct hd_struct *part;
116
117 part = rcu_dereference(piter->disk->__part[piter->idx]);
118 if (!part)
119 continue;
120 if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects)
121 continue;
122
123 get_device(&part->dev);
124 piter->part = part;
125 piter->idx += inc;
126 break;
127 }
128
129 rcu_read_unlock();
130
131 return piter->part;
132}
133EXPORT_SYMBOL_GPL(disk_part_iter_next);
134
135/**
136 * disk_part_iter_exit - finish up partition iteration
137 * @piter: iter of interest
138 *
139 * Called when iteration is over. Cleans up @piter.
140 *
141 * CONTEXT:
142 * Don't care.
143 */
144void disk_part_iter_exit(struct disk_part_iter *piter)
145{
146 disk_put_part(piter->part);
147 piter->part = NULL;
148}
149EXPORT_SYMBOL_GPL(disk_part_iter_exit);
150
151/**
152 * disk_map_sector_rcu - map sector to partition
153 * @disk: gendisk of interest
154 * @sector: sector to map
155 *
156 * Find out which partition @sector maps to on @disk. This is
157 * primarily used for stats accounting.
158 *
159 * CONTEXT:
160 * RCU read locked. The returned partition pointer is valid only
161 * while preemption is disabled.
162 *
163 * RETURNS:
164 * Found partition on success, NULL if there's no matching partition.
165 */
166struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, sector_t sector)
167{
168 int i;
169
170 for (i = 0; i < disk_max_parts(disk); i++) {
171 struct hd_struct *part = rcu_dereference(disk->__part[i]);
172
173 if (part && part->start_sect <= sector &&
174 sector < part->start_sect + part->nr_sects)
175 return part;
176 }
177 return NULL;
178}
179EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
180
1da177e4
LT
181/*
182 * Can be deleted altogether. Later.
183 *
184 */
185static struct blk_major_name {
186 struct blk_major_name *next;
187 int major;
188 char name[16];
68eef3b4 189} *major_names[BLKDEV_MAJOR_HASH_SIZE];
1da177e4
LT
190
191/* index in the above - for now: assume no multimajor ranges */
192static inline int major_to_index(int major)
193{
68eef3b4 194 return major % BLKDEV_MAJOR_HASH_SIZE;
7170be5f
NH
195}
196
68eef3b4 197#ifdef CONFIG_PROC_FS
cf771cb5 198void blkdev_show(struct seq_file *seqf, off_t offset)
7170be5f 199{
68eef3b4 200 struct blk_major_name *dp;
7170be5f 201
68eef3b4 202 if (offset < BLKDEV_MAJOR_HASH_SIZE) {
edfaa7c3 203 mutex_lock(&block_class_lock);
68eef3b4 204 for (dp = major_names[offset]; dp; dp = dp->next)
cf771cb5 205 seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
edfaa7c3 206 mutex_unlock(&block_class_lock);
1da177e4 207 }
1da177e4 208}
68eef3b4 209#endif /* CONFIG_PROC_FS */
1da177e4
LT
210
211int register_blkdev(unsigned int major, const char *name)
212{
213 struct blk_major_name **n, *p;
214 int index, ret = 0;
215
edfaa7c3 216 mutex_lock(&block_class_lock);
1da177e4
LT
217
218 /* temporary */
219 if (major == 0) {
220 for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
221 if (major_names[index] == NULL)
222 break;
223 }
224
225 if (index == 0) {
226 printk("register_blkdev: failed to get major for %s\n",
227 name);
228 ret = -EBUSY;
229 goto out;
230 }
231 major = index;
232 ret = major;
233 }
234
235 p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
236 if (p == NULL) {
237 ret = -ENOMEM;
238 goto out;
239 }
240
241 p->major = major;
242 strlcpy(p->name, name, sizeof(p->name));
243 p->next = NULL;
244 index = major_to_index(major);
245
246 for (n = &major_names[index]; *n; n = &(*n)->next) {
247 if ((*n)->major == major)
248 break;
249 }
250 if (!*n)
251 *n = p;
252 else
253 ret = -EBUSY;
254
255 if (ret < 0) {
256 printk("register_blkdev: cannot get major %d for %s\n",
257 major, name);
258 kfree(p);
259 }
260out:
edfaa7c3 261 mutex_unlock(&block_class_lock);
1da177e4
LT
262 return ret;
263}
264
265EXPORT_SYMBOL(register_blkdev);
266
f4480240 267void unregister_blkdev(unsigned int major, const char *name)
1da177e4
LT
268{
269 struct blk_major_name **n;
270 struct blk_major_name *p = NULL;
271 int index = major_to_index(major);
1da177e4 272
edfaa7c3 273 mutex_lock(&block_class_lock);
1da177e4
LT
274 for (n = &major_names[index]; *n; n = &(*n)->next)
275 if ((*n)->major == major)
276 break;
294462a5
AM
277 if (!*n || strcmp((*n)->name, name)) {
278 WARN_ON(1);
294462a5 279 } else {
1da177e4
LT
280 p = *n;
281 *n = p->next;
282 }
edfaa7c3 283 mutex_unlock(&block_class_lock);
1da177e4 284 kfree(p);
1da177e4
LT
285}
286
287EXPORT_SYMBOL(unregister_blkdev);
288
289static struct kobj_map *bdev_map;
290
291/*
292 * Register device numbers dev..(dev+range-1)
293 * range must be nonzero
294 * The hash chain is sorted on range, so that subranges can override.
295 */
edfaa7c3 296void blk_register_region(dev_t devt, unsigned long range, struct module *module,
1da177e4
LT
297 struct kobject *(*probe)(dev_t, int *, void *),
298 int (*lock)(dev_t, void *), void *data)
299{
edfaa7c3 300 kobj_map(bdev_map, devt, range, module, probe, lock, data);
1da177e4
LT
301}
302
303EXPORT_SYMBOL(blk_register_region);
304
edfaa7c3 305void blk_unregister_region(dev_t devt, unsigned long range)
1da177e4 306{
edfaa7c3 307 kobj_unmap(bdev_map, devt, range);
1da177e4
LT
308}
309
310EXPORT_SYMBOL(blk_unregister_region);
311
cf771cb5 312static struct kobject *exact_match(dev_t devt, int *partno, void *data)
1da177e4
LT
313{
314 struct gendisk *p = data;
edfaa7c3
KS
315
316 return &p->dev.kobj;
1da177e4
LT
317}
318
edfaa7c3 319static int exact_lock(dev_t devt, void *data)
1da177e4
LT
320{
321 struct gendisk *p = data;
322
323 if (!get_disk(p))
324 return -1;
325 return 0;
326}
327
328/**
329 * add_disk - add partitioning information to kernel list
330 * @disk: per-device partitioning information
331 *
332 * This function registers the partitioning information in @disk
333 * with the kernel.
334 */
335void add_disk(struct gendisk *disk)
336{
cf0ca9fe 337 struct backing_dev_info *bdi;
6ffeea77 338 int retval;
cf0ca9fe 339
1da177e4 340 disk->flags |= GENHD_FL_UP;
f331c029
TH
341 disk->dev.devt = MKDEV(disk->major, disk->first_minor);
342 blk_register_region(disk_devt(disk), disk->minors, NULL,
343 exact_match, exact_lock, disk);
1da177e4
LT
344 register_disk(disk);
345 blk_register_queue(disk);
cf0ca9fe
PZ
346
347 bdi = &disk->queue->backing_dev_info;
f331c029 348 bdi_register_dev(bdi, disk_devt(disk));
6ffeea77
GKH
349 retval = sysfs_create_link(&disk->dev.kobj, &bdi->dev->kobj, "bdi");
350 WARN_ON(retval);
1da177e4
LT
351}
352
353EXPORT_SYMBOL(add_disk);
354EXPORT_SYMBOL(del_gendisk); /* in partitions/check.c */
355
356void unlink_gendisk(struct gendisk *disk)
357{
cf0ca9fe
PZ
358 sysfs_remove_link(&disk->dev.kobj, "bdi");
359 bdi_unregister(&disk->queue->backing_dev_info);
1da177e4 360 blk_unregister_queue(disk);
f331c029 361 blk_unregister_region(disk_devt(disk), disk->minors);
1da177e4
LT
362}
363
1da177e4
LT
364/**
365 * get_gendisk - get partitioning information for a given device
710027a4
RD
366 * @devt: device to get partitioning information for
367 * @part: returned partition index
1da177e4
LT
368 *
369 * This function gets the structure containing partitioning
710027a4 370 * information for the given device @devt.
1da177e4 371 */
cf771cb5 372struct gendisk *get_gendisk(dev_t devt, int *partno)
1da177e4 373{
cf771cb5 374 struct kobject *kobj = kobj_lookup(bdev_map, devt, partno);
edfaa7c3
KS
375 struct device *dev = kobj_to_dev(kobj);
376
377 return kobj ? dev_to_disk(dev) : NULL;
1da177e4
LT
378}
379
f331c029
TH
380/**
381 * bdget_disk - do bdget() by gendisk and partition number
382 * @disk: gendisk of interest
383 * @partno: partition number
384 *
385 * Find partition @partno from @disk, do bdget() on it.
386 *
387 * CONTEXT:
388 * Don't care.
389 *
390 * RETURNS:
391 * Resulting block_device on success, NULL on failure.
392 */
393extern struct block_device *bdget_disk(struct gendisk *disk, int partno)
394{
395 dev_t devt = MKDEV(0, 0);
396
397 if (partno == 0)
398 devt = disk_devt(disk);
399 else {
e71bf0d0 400 struct hd_struct *part;
f331c029 401
e71bf0d0 402 part = disk_get_part(disk, partno);
f331c029
TH
403 if (part && part->nr_sects)
404 devt = part_devt(part);
e71bf0d0 405 disk_put_part(part);
f331c029
TH
406 }
407
408 if (likely(devt != MKDEV(0, 0)))
409 return bdget(devt);
410 return NULL;
411}
412EXPORT_SYMBOL(bdget_disk);
413
5c6f35c5
GKH
414/*
415 * print a full list of all partitions - intended for places where the root
416 * filesystem can't be mounted and thus to give the victim some idea of what
417 * went wrong
418 */
419void __init printk_all_partitions(void)
420{
def4e38d
TH
421 struct class_dev_iter iter;
422 struct device *dev;
423
424 class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
425 while ((dev = class_dev_iter_next(&iter))) {
426 struct gendisk *disk = dev_to_disk(dev);
e71bf0d0
TH
427 struct disk_part_iter piter;
428 struct hd_struct *part;
def4e38d 429 char buf[BDEVNAME_SIZE];
def4e38d
TH
430
431 /*
432 * Don't show empty devices or things that have been
433 * surpressed
434 */
435 if (get_capacity(disk) == 0 ||
436 (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
437 continue;
438
439 /*
440 * Note, unlike /proc/partitions, I am showing the
441 * numbers in hex - the same format as the root=
442 * option takes.
443 */
444 printk("%02x%02x %10llu %s",
f331c029 445 MAJOR(disk_devt(disk)), MINOR(disk_devt(disk)),
def4e38d
TH
446 (unsigned long long)get_capacity(disk) >> 1,
447 disk_name(disk, 0, buf));
448 if (disk->driverfs_dev != NULL &&
449 disk->driverfs_dev->driver != NULL)
450 printk(" driver: %s\n",
451 disk->driverfs_dev->driver->name);
452 else
453 printk(" (driver?)\n");
454
455 /* now show the partitions */
e71bf0d0
TH
456 disk_part_iter_init(&piter, disk, 0);
457 while ((part = disk_part_iter_next(&piter)))
def4e38d 458 printk(" %02x%02x %10llu %s\n",
f331c029
TH
459 MAJOR(part_devt(part)), MINOR(part_devt(part)),
460 (unsigned long long)part->nr_sects >> 1,
461 disk_name(disk, part->partno, buf));
e71bf0d0 462 disk_part_iter_exit(&piter);
def4e38d
TH
463 }
464 class_dev_iter_exit(&iter);
dd2a345f
DG
465}
466
1da177e4
LT
467#ifdef CONFIG_PROC_FS
468/* iterator */
def4e38d 469static void *disk_seqf_start(struct seq_file *seqf, loff_t *pos)
68c4d4a7 470{
def4e38d
TH
471 loff_t skip = *pos;
472 struct class_dev_iter *iter;
473 struct device *dev;
68c4d4a7 474
def4e38d
TH
475 iter = kmalloc(GFP_KERNEL, sizeof(*iter));
476 if (!iter)
477 return ERR_PTR(-ENOMEM);
478
479 seqf->private = iter;
480 class_dev_iter_init(iter, &block_class, NULL, &disk_type);
481 do {
482 dev = class_dev_iter_next(iter);
483 if (!dev)
484 return NULL;
485 } while (skip--);
486
487 return dev_to_disk(dev);
68c4d4a7
GKH
488}
489
def4e38d 490static void *disk_seqf_next(struct seq_file *seqf, void *v, loff_t *pos)
1da177e4 491{
edfaa7c3 492 struct device *dev;
1da177e4 493
def4e38d
TH
494 (*pos)++;
495 dev = class_dev_iter_next(seqf->private);
2ac3cee5 496 if (dev)
68c4d4a7 497 return dev_to_disk(dev);
2ac3cee5 498
1da177e4
LT
499 return NULL;
500}
501
def4e38d 502static void disk_seqf_stop(struct seq_file *seqf, void *v)
27f30251 503{
def4e38d 504 struct class_dev_iter *iter = seqf->private;
27f30251 505
def4e38d
TH
506 /* stop is called even after start failed :-( */
507 if (iter) {
508 class_dev_iter_exit(iter);
509 kfree(iter);
5c0ef6d0 510 }
1da177e4
LT
511}
512
def4e38d 513static void *show_partition_start(struct seq_file *seqf, loff_t *pos)
1da177e4 514{
def4e38d
TH
515 static void *p;
516
517 p = disk_seqf_start(seqf, pos);
518 if (!IS_ERR(p) && p)
519 seq_puts(seqf, "major minor #blocks name\n\n");
520 return p;
1da177e4
LT
521}
522
cf771cb5 523static int show_partition(struct seq_file *seqf, void *v)
1da177e4
LT
524{
525 struct gendisk *sgp = v;
e71bf0d0
TH
526 struct disk_part_iter piter;
527 struct hd_struct *part;
1da177e4
LT
528 char buf[BDEVNAME_SIZE];
529
1da177e4 530 /* Don't show non-partitionable removeable devices or empty devices */
f331c029
TH
531 if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
532 (sgp->flags & GENHD_FL_REMOVABLE)))
1da177e4
LT
533 return 0;
534 if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
535 return 0;
536
537 /* show the full disk and all non-0 size partitions of it */
cf771cb5 538 seq_printf(seqf, "%4d %4d %10llu %s\n",
f331c029 539 MAJOR(disk_devt(sgp)), MINOR(disk_devt(sgp)),
1da177e4
LT
540 (unsigned long long)get_capacity(sgp) >> 1,
541 disk_name(sgp, 0, buf));
e71bf0d0
TH
542
543 disk_part_iter_init(&piter, sgp, 0);
544 while ((part = disk_part_iter_next(&piter)))
cf771cb5 545 seq_printf(seqf, "%4d %4d %10llu %s\n",
f331c029
TH
546 MAJOR(part_devt(part)), MINOR(part_devt(part)),
547 (unsigned long long)part->nr_sects >> 1,
548 disk_name(sgp, part->partno, buf));
e71bf0d0 549 disk_part_iter_exit(&piter);
1da177e4
LT
550
551 return 0;
552}
553
12f32bb3 554const struct seq_operations partitions_op = {
def4e38d
TH
555 .start = show_partition_start,
556 .next = disk_seqf_next,
557 .stop = disk_seqf_stop,
edfaa7c3 558 .show = show_partition
1da177e4
LT
559};
560#endif
561
562
cf771cb5 563static struct kobject *base_probe(dev_t devt, int *partno, void *data)
1da177e4 564{
edfaa7c3 565 if (request_module("block-major-%d-%d", MAJOR(devt), MINOR(devt)) > 0)
1da177e4 566 /* Make old-style 2.4 aliases work */
edfaa7c3 567 request_module("block-major-%d", MAJOR(devt));
1da177e4
LT
568 return NULL;
569}
570
571static int __init genhd_device_init(void)
572{
e105b8bf
DW
573 int error;
574
575 block_class.dev_kobj = sysfs_dev_block_kobj;
576 error = class_register(&block_class);
ee27a558
RM
577 if (unlikely(error))
578 return error;
edfaa7c3 579 bdev_map = kobj_map_init(base_probe, &block_class_lock);
1da177e4 580 blk_dev_init();
edfaa7c3
KS
581
582#ifndef CONFIG_SYSFS_DEPRECATED
583 /* create top-level block dir */
584 block_depr = kobject_create_and_add("block", NULL);
585#endif
830d3cfb 586 return 0;
1da177e4
LT
587}
588
589subsys_initcall(genhd_device_init);
590
edfaa7c3
KS
591static ssize_t disk_range_show(struct device *dev,
592 struct device_attribute *attr, char *buf)
1da177e4 593{
edfaa7c3 594 struct gendisk *disk = dev_to_disk(dev);
1da177e4 595
edfaa7c3 596 return sprintf(buf, "%d\n", disk->minors);
1da177e4
LT
597}
598
edfaa7c3
KS
599static ssize_t disk_removable_show(struct device *dev,
600 struct device_attribute *attr, char *buf)
a7fd6706 601{
edfaa7c3 602 struct gendisk *disk = dev_to_disk(dev);
a7fd6706 603
edfaa7c3
KS
604 return sprintf(buf, "%d\n",
605 (disk->flags & GENHD_FL_REMOVABLE ? 1 : 0));
a7fd6706
KS
606}
607
1c9ce527
KS
608static ssize_t disk_ro_show(struct device *dev,
609 struct device_attribute *attr, char *buf)
610{
611 struct gendisk *disk = dev_to_disk(dev);
612
613 return sprintf(buf, "%d\n", disk->policy ? 1 : 0);
614}
615
edfaa7c3
KS
616static ssize_t disk_size_show(struct device *dev,
617 struct device_attribute *attr, char *buf)
a7fd6706 618{
edfaa7c3 619 struct gendisk *disk = dev_to_disk(dev);
1da177e4 620
edfaa7c3 621 return sprintf(buf, "%llu\n", (unsigned long long)get_capacity(disk));
1da177e4 622}
edfaa7c3
KS
623
624static ssize_t disk_capability_show(struct device *dev,
625 struct device_attribute *attr, char *buf)
86ce18d7 626{
edfaa7c3
KS
627 struct gendisk *disk = dev_to_disk(dev);
628
629 return sprintf(buf, "%x\n", disk->flags);
86ce18d7 630}
edfaa7c3
KS
631
632static ssize_t disk_stat_show(struct device *dev,
633 struct device_attribute *attr, char *buf)
1da177e4 634{
edfaa7c3
KS
635 struct gendisk *disk = dev_to_disk(dev);
636
1da177e4
LT
637 preempt_disable();
638 disk_round_stats(disk);
639 preempt_enable();
edfaa7c3 640 return sprintf(buf,
837c7878
BW
641 "%8lu %8lu %8llu %8u "
642 "%8lu %8lu %8llu %8u "
1da177e4
LT
643 "%8u %8u %8u"
644 "\n",
47a00410
JA
645 disk_stat_read(disk, ios[READ]),
646 disk_stat_read(disk, merges[READ]),
647 (unsigned long long)disk_stat_read(disk, sectors[READ]),
648 jiffies_to_msecs(disk_stat_read(disk, ticks[READ])),
649 disk_stat_read(disk, ios[WRITE]),
650 disk_stat_read(disk, merges[WRITE]),
651 (unsigned long long)disk_stat_read(disk, sectors[WRITE]),
652 jiffies_to_msecs(disk_stat_read(disk, ticks[WRITE])),
1da177e4
LT
653 disk->in_flight,
654 jiffies_to_msecs(disk_stat_read(disk, io_ticks)),
655 jiffies_to_msecs(disk_stat_read(disk, time_in_queue)));
656}
1da177e4 657
c17bb495 658#ifdef CONFIG_FAIL_MAKE_REQUEST
edfaa7c3
KS
659static ssize_t disk_fail_show(struct device *dev,
660 struct device_attribute *attr, char *buf)
661{
662 struct gendisk *disk = dev_to_disk(dev);
663
664 return sprintf(buf, "%d\n", disk->flags & GENHD_FL_FAIL ? 1 : 0);
665}
c17bb495 666
edfaa7c3
KS
667static ssize_t disk_fail_store(struct device *dev,
668 struct device_attribute *attr,
c17bb495
AM
669 const char *buf, size_t count)
670{
edfaa7c3 671 struct gendisk *disk = dev_to_disk(dev);
c17bb495
AM
672 int i;
673
674 if (count > 0 && sscanf(buf, "%d", &i) > 0) {
675 if (i == 0)
676 disk->flags &= ~GENHD_FL_FAIL;
677 else
678 disk->flags |= GENHD_FL_FAIL;
679 }
680
681 return count;
682}
c17bb495
AM
683
684#endif
685
edfaa7c3
KS
686static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
687static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
1c9ce527 688static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
edfaa7c3
KS
689static DEVICE_ATTR(size, S_IRUGO, disk_size_show, NULL);
690static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
691static DEVICE_ATTR(stat, S_IRUGO, disk_stat_show, NULL);
c17bb495 692#ifdef CONFIG_FAIL_MAKE_REQUEST
edfaa7c3
KS
693static struct device_attribute dev_attr_fail =
694 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, disk_fail_show, disk_fail_store);
c17bb495 695#endif
edfaa7c3
KS
696
697static struct attribute *disk_attrs[] = {
698 &dev_attr_range.attr,
699 &dev_attr_removable.attr,
1c9ce527 700 &dev_attr_ro.attr,
edfaa7c3
KS
701 &dev_attr_size.attr,
702 &dev_attr_capability.attr,
703 &dev_attr_stat.attr,
704#ifdef CONFIG_FAIL_MAKE_REQUEST
705 &dev_attr_fail.attr,
706#endif
707 NULL
708};
709
710static struct attribute_group disk_attr_group = {
711 .attrs = disk_attrs,
712};
713
714static struct attribute_group *disk_attr_groups[] = {
715 &disk_attr_group,
716 NULL
1da177e4
LT
717};
718
edfaa7c3 719static void disk_release(struct device *dev)
1da177e4 720{
edfaa7c3
KS
721 struct gendisk *disk = dev_to_disk(dev);
722
1da177e4 723 kfree(disk->random);
e71bf0d0 724 kfree(disk->__part);
1da177e4
LT
725 free_disk_stats(disk);
726 kfree(disk);
727}
edfaa7c3
KS
728struct class block_class = {
729 .name = "block",
1da177e4
LT
730};
731
1826eadf 732static struct device_type disk_type = {
edfaa7c3
KS
733 .name = "disk",
734 .groups = disk_attr_groups,
735 .release = disk_release,
1da177e4
LT
736};
737
a6e2ba88 738#ifdef CONFIG_PROC_FS
cf771cb5
TH
739/*
740 * aggregate disk stat collector. Uses the same stats that the sysfs
741 * entries do, above, but makes them available through one seq_file.
742 *
743 * The output looks suspiciously like /proc/partitions with a bunch of
744 * extra fields.
745 */
746static int diskstats_show(struct seq_file *seqf, void *v)
1da177e4
LT
747{
748 struct gendisk *gp = v;
e71bf0d0
TH
749 struct disk_part_iter piter;
750 struct hd_struct *hd;
1da177e4 751 char buf[BDEVNAME_SIZE];
1da177e4
LT
752
753 /*
edfaa7c3 754 if (&gp->dev.kobj.entry == block_class.devices.next)
cf771cb5 755 seq_puts(seqf, "major minor name"
1da177e4
LT
756 " rio rmerge rsect ruse wio wmerge "
757 "wsect wuse running use aveq"
758 "\n\n");
759 */
760
761 preempt_disable();
762 disk_round_stats(gp);
763 preempt_enable();
cf771cb5 764 seq_printf(seqf, "%4d %4d %s %lu %lu %llu %u %lu %lu %llu %u %u %u %u\n",
f331c029
TH
765 MAJOR(disk_devt(gp)), MINOR(disk_devt(gp)),
766 disk_name(gp, 0, buf),
a362357b
JA
767 disk_stat_read(gp, ios[0]), disk_stat_read(gp, merges[0]),
768 (unsigned long long)disk_stat_read(gp, sectors[0]),
769 jiffies_to_msecs(disk_stat_read(gp, ticks[0])),
770 disk_stat_read(gp, ios[1]), disk_stat_read(gp, merges[1]),
771 (unsigned long long)disk_stat_read(gp, sectors[1]),
772 jiffies_to_msecs(disk_stat_read(gp, ticks[1])),
1da177e4
LT
773 gp->in_flight,
774 jiffies_to_msecs(disk_stat_read(gp, io_ticks)),
775 jiffies_to_msecs(disk_stat_read(gp, time_in_queue)));
776
777 /* now show all non-0 size partitions of it */
e71bf0d0
TH
778 disk_part_iter_init(&piter, gp, 0);
779 while ((hd = disk_part_iter_next(&piter))) {
28f39d55
JM
780 preempt_disable();
781 part_round_stats(hd);
782 preempt_enable();
cf771cb5 783 seq_printf(seqf, "%4d %4d %s %lu %lu %llu "
28f39d55 784 "%u %lu %lu %llu %u %u %u %u\n",
f331c029
TH
785 MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
786 disk_name(gp, hd->partno, buf),
28f39d55
JM
787 part_stat_read(hd, ios[0]),
788 part_stat_read(hd, merges[0]),
789 (unsigned long long)part_stat_read(hd, sectors[0]),
790 jiffies_to_msecs(part_stat_read(hd, ticks[0])),
791 part_stat_read(hd, ios[1]),
792 part_stat_read(hd, merges[1]),
793 (unsigned long long)part_stat_read(hd, sectors[1]),
794 jiffies_to_msecs(part_stat_read(hd, ticks[1])),
795 hd->in_flight,
796 jiffies_to_msecs(part_stat_read(hd, io_ticks)),
797 jiffies_to_msecs(part_stat_read(hd, time_in_queue))
798 );
1da177e4 799 }
e71bf0d0 800 disk_part_iter_exit(&piter);
1da177e4
LT
801
802 return 0;
803}
804
12f32bb3 805const struct seq_operations diskstats_op = {
def4e38d
TH
806 .start = disk_seqf_start,
807 .next = disk_seqf_next,
808 .stop = disk_seqf_stop,
1da177e4
LT
809 .show = diskstats_show
810};
a6e2ba88 811#endif /* CONFIG_PROC_FS */
1da177e4 812
8ce7ad7b
KCA
813static void media_change_notify_thread(struct work_struct *work)
814{
815 struct gendisk *gd = container_of(work, struct gendisk, async_notify);
816 char event[] = "MEDIA_CHANGE=1";
817 char *envp[] = { event, NULL };
818
819 /*
820 * set enviroment vars to indicate which event this is for
821 * so that user space will know to go check the media status.
822 */
edfaa7c3 823 kobject_uevent_env(&gd->dev.kobj, KOBJ_CHANGE, envp);
8ce7ad7b
KCA
824 put_device(gd->driverfs_dev);
825}
826
1826eadf 827#if 0
8ce7ad7b
KCA
828void genhd_media_change_notify(struct gendisk *disk)
829{
830 get_device(disk->driverfs_dev);
831 schedule_work(&disk->async_notify);
832}
833EXPORT_SYMBOL_GPL(genhd_media_change_notify);
1826eadf 834#endif /* 0 */
8ce7ad7b 835
cf771cb5 836dev_t blk_lookup_devt(const char *name, int partno)
a142be85 837{
def4e38d
TH
838 dev_t devt = MKDEV(0, 0);
839 struct class_dev_iter iter;
840 struct device *dev;
a142be85 841
def4e38d
TH
842 class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
843 while ((dev = class_dev_iter_next(&iter))) {
a142be85 844 struct gendisk *disk = dev_to_disk(dev);
a142be85 845
f331c029
TH
846 if (strcmp(dev->bus_id, name))
847 continue;
848 if (partno < 0 || partno > disk_max_parts(disk))
849 continue;
850
851 if (partno == 0)
852 devt = disk_devt(disk);
853 else {
e71bf0d0 854 struct hd_struct *part;
f331c029 855
e71bf0d0
TH
856 part = disk_get_part(disk, partno);
857 if (!part || !part->nr_sects) {
858 disk_put_part(part);
f331c029 859 continue;
e71bf0d0 860 }
f331c029
TH
861
862 devt = part_devt(part);
e71bf0d0 863 disk_put_part(part);
def4e38d 864 }
f331c029 865 break;
5c0ef6d0 866 }
def4e38d 867 class_dev_iter_exit(&iter);
edfaa7c3
KS
868 return devt;
869}
edfaa7c3
KS
870EXPORT_SYMBOL(blk_lookup_devt);
871
1da177e4
LT
872struct gendisk *alloc_disk(int minors)
873{
1946089a
CL
874 return alloc_disk_node(minors, -1);
875}
876
877struct gendisk *alloc_disk_node(int minors, int node_id)
878{
879 struct gendisk *disk;
880
94f6030c
CL
881 disk = kmalloc_node(sizeof(struct gendisk),
882 GFP_KERNEL | __GFP_ZERO, node_id);
1da177e4 883 if (disk) {
1da177e4
LT
884 if (!init_disk_stats(disk)) {
885 kfree(disk);
886 return NULL;
887 }
888 if (minors > 1) {
889 int size = (minors - 1) * sizeof(struct hd_struct *);
e71bf0d0 890 disk->__part = kmalloc_node(size,
94f6030c 891 GFP_KERNEL | __GFP_ZERO, node_id);
e71bf0d0 892 if (!disk->__part) {
c7674030 893 free_disk_stats(disk);
1da177e4
LT
894 kfree(disk);
895 return NULL;
896 }
1da177e4
LT
897 }
898 disk->minors = minors;
1da177e4 899 rand_initialize_disk(disk);
edfaa7c3
KS
900 disk->dev.class = &block_class;
901 disk->dev.type = &disk_type;
902 device_initialize(&disk->dev);
8ce7ad7b
KCA
903 INIT_WORK(&disk->async_notify,
904 media_change_notify_thread);
1da177e4
LT
905 }
906 return disk;
907}
908
909EXPORT_SYMBOL(alloc_disk);
1946089a 910EXPORT_SYMBOL(alloc_disk_node);
1da177e4
LT
911
912struct kobject *get_disk(struct gendisk *disk)
913{
914 struct module *owner;
915 struct kobject *kobj;
916
917 if (!disk->fops)
918 return NULL;
919 owner = disk->fops->owner;
920 if (owner && !try_module_get(owner))
921 return NULL;
edfaa7c3 922 kobj = kobject_get(&disk->dev.kobj);
1da177e4
LT
923 if (kobj == NULL) {
924 module_put(owner);
925 return NULL;
926 }
927 return kobj;
928
929}
930
931EXPORT_SYMBOL(get_disk);
932
933void put_disk(struct gendisk *disk)
934{
935 if (disk)
edfaa7c3 936 kobject_put(&disk->dev.kobj);
1da177e4
LT
937}
938
939EXPORT_SYMBOL(put_disk);
940
941void set_device_ro(struct block_device *bdev, int flag)
942{
943 if (bdev->bd_contains != bdev)
944 bdev->bd_part->policy = flag;
945 else
946 bdev->bd_disk->policy = flag;
947}
948
949EXPORT_SYMBOL(set_device_ro);
950
951void set_disk_ro(struct gendisk *disk, int flag)
952{
e71bf0d0
TH
953 struct disk_part_iter piter;
954 struct hd_struct *part;
955
1da177e4 956 disk->policy = flag;
e71bf0d0
TH
957 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
958 while ((part = disk_part_iter_next(&piter)))
959 part->policy = flag;
960 disk_part_iter_exit(&piter);
1da177e4
LT
961}
962
963EXPORT_SYMBOL(set_disk_ro);
964
965int bdev_read_only(struct block_device *bdev)
966{
967 if (!bdev)
968 return 0;
969 else if (bdev->bd_contains != bdev)
970 return bdev->bd_part->policy;
971 else
972 return bdev->bd_disk->policy;
973}
974
975EXPORT_SYMBOL(bdev_read_only);
976
cf771cb5 977int invalidate_partition(struct gendisk *disk, int partno)
1da177e4
LT
978{
979 int res = 0;
cf771cb5 980 struct block_device *bdev = bdget_disk(disk, partno);
1da177e4 981 if (bdev) {
2ef41634
CH
982 fsync_bdev(bdev);
983 res = __invalidate_device(bdev);
1da177e4
LT
984 bdput(bdev);
985 }
986 return res;
987}
988
989EXPORT_SYMBOL(invalidate_partition);