1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 1991-1998 Linus Torvalds
4 * Re-organised Feb 1998 Russell King
7 #include <linux/slab.h>
8 #include <linux/ctype.h>
9 #include <linux/genhd.h>
10 #include <linux/vmalloc.h>
11 #include <linux/blktrace_api.h>
12 #include <linux/raid/detect.h>
15 static int (*check_part
[])(struct parsed_partitions
*) = {
17 * Probe partition formats with tables at disk address 0
18 * that also have an ADFS boot block at 0xdc0.
20 #ifdef CONFIG_ACORN_PARTITION_ICS
23 #ifdef CONFIG_ACORN_PARTITION_POWERTEC
24 adfspart_check_POWERTEC
,
26 #ifdef CONFIG_ACORN_PARTITION_EESOX
31 * Now move on to formats that only have partition info at
32 * disk address 0xdc0. Since these may also have stale
33 * PC/BIOS partition tables, they need to come before
36 #ifdef CONFIG_ACORN_PARTITION_CUMANA
37 adfspart_check_CUMANA
,
39 #ifdef CONFIG_ACORN_PARTITION_ADFS
43 #ifdef CONFIG_CMDLINE_PARTITION
46 #ifdef CONFIG_EFI_PARTITION
47 efi_partition
, /* this must come before msdos */
49 #ifdef CONFIG_SGI_PARTITION
52 #ifdef CONFIG_LDM_PARTITION
53 ldm_partition
, /* this must come before msdos */
55 #ifdef CONFIG_MSDOS_PARTITION
58 #ifdef CONFIG_OSF_PARTITION
61 #ifdef CONFIG_SUN_PARTITION
64 #ifdef CONFIG_AMIGA_PARTITION
67 #ifdef CONFIG_ATARI_PARTITION
70 #ifdef CONFIG_MAC_PARTITION
73 #ifdef CONFIG_ULTRIX_PARTITION
76 #ifdef CONFIG_IBM_PARTITION
79 #ifdef CONFIG_KARMA_PARTITION
82 #ifdef CONFIG_SYSV68_PARTITION
88 static struct parsed_partitions
*allocate_partitions(struct gendisk
*hd
)
90 struct parsed_partitions
*state
;
93 state
= kzalloc(sizeof(*state
), GFP_KERNEL
);
97 nr
= disk_max_parts(hd
);
98 state
->parts
= vzalloc(array_size(nr
, sizeof(state
->parts
[0])));
109 static void free_partitions(struct parsed_partitions
*state
)
115 static struct parsed_partitions
*check_partition(struct gendisk
*hd
,
116 struct block_device
*bdev
)
118 struct parsed_partitions
*state
;
121 state
= allocate_partitions(hd
);
124 state
->pp_buf
= (char *)__get_free_page(GFP_KERNEL
);
125 if (!state
->pp_buf
) {
126 free_partitions(state
);
129 state
->pp_buf
[0] = '\0';
132 disk_name(hd
, 0, state
->name
);
133 snprintf(state
->pp_buf
, PAGE_SIZE
, " %s:", state
->name
);
134 if (isdigit(state
->name
[strlen(state
->name
)-1]))
135 sprintf(state
->name
, "p");
138 while (!res
&& check_part
[i
]) {
139 memset(state
->parts
, 0, state
->limit
* sizeof(state
->parts
[0]));
140 res
= check_part
[i
++](state
);
143 * We have hit an I/O error which we don't report now.
144 * But record it, and let the others do their job.
152 printk(KERN_INFO
"%s", state
->pp_buf
);
154 free_page((unsigned long)state
->pp_buf
);
157 if (state
->access_beyond_eod
)
160 * The partition is unrecognized. So report I/O errors if there were any
165 strlcat(state
->pp_buf
,
166 " unable to read partition table\n", PAGE_SIZE
);
167 printk(KERN_INFO
"%s", state
->pp_buf
);
170 free_page((unsigned long)state
->pp_buf
);
171 free_partitions(state
);
175 static ssize_t
part_partition_show(struct device
*dev
,
176 struct device_attribute
*attr
, char *buf
)
178 struct hd_struct
*p
= dev_to_part(dev
);
180 return sprintf(buf
, "%d\n", p
->partno
);
183 static ssize_t
part_start_show(struct device
*dev
,
184 struct device_attribute
*attr
, char *buf
)
186 struct hd_struct
*p
= dev_to_part(dev
);
188 return sprintf(buf
, "%llu\n",(unsigned long long)p
->start_sect
);
191 static ssize_t
part_ro_show(struct device
*dev
,
192 struct device_attribute
*attr
, char *buf
)
194 struct hd_struct
*p
= dev_to_part(dev
);
195 return sprintf(buf
, "%d\n", p
->policy
? 1 : 0);
198 static ssize_t
part_alignment_offset_show(struct device
*dev
,
199 struct device_attribute
*attr
, char *buf
)
201 struct hd_struct
*p
= dev_to_part(dev
);
203 return sprintf(buf
, "%u\n",
204 queue_limit_alignment_offset(&part_to_disk(p
)->queue
->limits
,
208 static ssize_t
part_discard_alignment_show(struct device
*dev
,
209 struct device_attribute
*attr
, char *buf
)
211 struct hd_struct
*p
= dev_to_part(dev
);
213 return sprintf(buf
, "%u\n",
214 queue_limit_discard_alignment(&part_to_disk(p
)->queue
->limits
,
218 static DEVICE_ATTR(partition
, 0444, part_partition_show
, NULL
);
219 static DEVICE_ATTR(start
, 0444, part_start_show
, NULL
);
220 static DEVICE_ATTR(size
, 0444, part_size_show
, NULL
);
221 static DEVICE_ATTR(ro
, 0444, part_ro_show
, NULL
);
222 static DEVICE_ATTR(alignment_offset
, 0444, part_alignment_offset_show
, NULL
);
223 static DEVICE_ATTR(discard_alignment
, 0444, part_discard_alignment_show
, NULL
);
224 static DEVICE_ATTR(stat
, 0444, part_stat_show
, NULL
);
225 static DEVICE_ATTR(inflight
, 0444, part_inflight_show
, NULL
);
226 #ifdef CONFIG_FAIL_MAKE_REQUEST
227 static struct device_attribute dev_attr_fail
=
228 __ATTR(make
-it
-fail
, 0644, part_fail_show
, part_fail_store
);
231 static struct attribute
*part_attrs
[] = {
232 &dev_attr_partition
.attr
,
233 &dev_attr_start
.attr
,
236 &dev_attr_alignment_offset
.attr
,
237 &dev_attr_discard_alignment
.attr
,
239 &dev_attr_inflight
.attr
,
240 #ifdef CONFIG_FAIL_MAKE_REQUEST
246 static struct attribute_group part_attr_group
= {
250 static const struct attribute_group
*part_attr_groups
[] = {
252 #ifdef CONFIG_BLK_DEV_IO_TRACE
253 &blk_trace_attr_group
,
258 static void part_release(struct device
*dev
)
260 struct hd_struct
*p
= dev_to_part(dev
);
261 blk_free_devt(dev
->devt
);
266 static int part_uevent(struct device
*dev
, struct kobj_uevent_env
*env
)
268 struct hd_struct
*part
= dev_to_part(dev
);
270 add_uevent_var(env
, "PARTN=%u", part
->partno
);
271 if (part
->info
&& part
->info
->volname
[0])
272 add_uevent_var(env
, "PARTNAME=%s", part
->info
->volname
);
276 struct device_type part_type
= {
278 .groups
= part_attr_groups
,
279 .release
= part_release
,
280 .uevent
= part_uevent
,
283 static void hd_struct_free_work(struct work_struct
*work
)
285 struct hd_struct
*part
=
286 container_of(to_rcu_work(work
), struct hd_struct
, rcu_work
);
287 struct gendisk
*disk
= part_to_disk(part
);
290 * Release the disk reference acquired in delete_partition here.
291 * We can't release it in hd_struct_free because the final put_device
292 * needs process context and thus can't be run directly from a
293 * percpu_ref ->release handler.
295 put_device(disk_to_dev(disk
));
297 part
->start_sect
= 0;
299 part_stat_set_all(part
, 0);
300 put_device(part_to_dev(part
));
303 static void hd_struct_free(struct percpu_ref
*ref
)
305 struct hd_struct
*part
= container_of(ref
, struct hd_struct
, ref
);
306 struct gendisk
*disk
= part_to_disk(part
);
307 struct disk_part_tbl
*ptbl
=
308 rcu_dereference_protected(disk
->part_tbl
, 1);
310 rcu_assign_pointer(ptbl
->last_lookup
, NULL
);
312 INIT_RCU_WORK(&part
->rcu_work
, hd_struct_free_work
);
313 queue_rcu_work(system_wq
, &part
->rcu_work
);
316 int hd_ref_init(struct hd_struct
*part
)
318 if (percpu_ref_init(&part
->ref
, hd_struct_free
, 0, GFP_KERNEL
))
324 * Must be called either with bd_mutex held, before a disk can be opened or
325 * after all disk users are gone.
327 void delete_partition(struct hd_struct
*part
)
329 struct gendisk
*disk
= part_to_disk(part
);
330 struct disk_part_tbl
*ptbl
=
331 rcu_dereference_protected(disk
->part_tbl
, 1);
334 * ->part_tbl is referenced in this part's release handler, so
335 * we have to hold the disk device
337 get_device(disk_to_dev(disk
));
338 rcu_assign_pointer(ptbl
->part
[part
->partno
], NULL
);
339 kobject_put(part
->holder_dir
);
340 device_del(part_to_dev(part
));
343 * Remove the block device from the inode hash, so that it cannot be
344 * looked up any more even when openers still hold references.
346 remove_inode_hash(part
->bdev
->bd_inode
);
348 percpu_ref_kill(&part
->ref
);
351 static ssize_t
whole_disk_show(struct device
*dev
,
352 struct device_attribute
*attr
, char *buf
)
356 static DEVICE_ATTR(whole_disk
, 0444, whole_disk_show
, NULL
);
359 * Must be called either with bd_mutex held, before a disk can be opened or
360 * after all disk users are gone.
362 static struct hd_struct
*add_partition(struct gendisk
*disk
, int partno
,
363 sector_t start
, sector_t len
, int flags
,
364 struct partition_meta_info
*info
)
367 dev_t devt
= MKDEV(0, 0);
368 struct device
*ddev
= disk_to_dev(disk
);
370 struct block_device
*bdev
;
371 struct disk_part_tbl
*ptbl
;
376 * Partitions are not supported on zoned block devices that are used as
379 switch (disk
->queue
->limits
.zoned
) {
381 pr_warn("%s: partitions not supported on host managed zoned block device\n",
383 return ERR_PTR(-ENXIO
);
385 pr_info("%s: disabling host aware zoned block device support due to partitions\n",
387 disk
->queue
->limits
.zoned
= BLK_ZONED_NONE
;
393 err
= disk_expand_part_tbl(disk
, partno
);
396 ptbl
= rcu_dereference_protected(disk
->part_tbl
, 1);
398 if (ptbl
->part
[partno
])
399 return ERR_PTR(-EBUSY
);
401 p
= kzalloc(sizeof(*p
), GFP_KERNEL
);
403 return ERR_PTR(-EBUSY
);
406 p
->dkstats
= alloc_percpu(struct disk_stats
);
410 bdev
= bdev_alloc(disk
, partno
);
415 hd_sects_seq_init(p
);
416 pdev
= part_to_dev(p
);
418 p
->start_sect
= start
;
421 p
->policy
= get_disk_ro(disk
);
424 struct partition_meta_info
*pinfo
;
426 pinfo
= kzalloc_node(sizeof(*pinfo
), GFP_KERNEL
, disk
->node_id
);
429 memcpy(pinfo
, info
, sizeof(*info
));
433 dname
= dev_name(ddev
);
434 if (isdigit(dname
[strlen(dname
) - 1]))
435 dev_set_name(pdev
, "%sp%d", dname
, partno
);
437 dev_set_name(pdev
, "%s%d", dname
, partno
);
439 device_initialize(pdev
);
440 pdev
->class = &block_class
;
441 pdev
->type
= &part_type
;
444 err
= blk_alloc_devt(p
, &devt
);
449 /* delay uevent until 'holders' subdir is created */
450 dev_set_uevent_suppress(pdev
, 1);
451 err
= device_add(pdev
);
456 p
->holder_dir
= kobject_create_and_add("holders", &pdev
->kobj
);
460 dev_set_uevent_suppress(pdev
, 0);
461 if (flags
& ADDPART_FLAG_WHOLEDISK
) {
462 err
= device_create_file(pdev
, &dev_attr_whole_disk
);
467 err
= hd_ref_init(p
);
469 if (flags
& ADDPART_FLAG_WHOLEDISK
)
470 goto out_remove_file
;
474 /* everything is up and running, commence */
475 bdev_add(bdev
, devt
);
476 rcu_assign_pointer(ptbl
->part
[partno
], p
);
478 /* suppress uevent if the disk suppresses it */
479 if (!dev_get_uevent_suppress(ddev
))
480 kobject_uevent(&pdev
->kobj
, KOBJ_ADD
);
488 free_percpu(p
->dkstats
);
493 device_remove_file(pdev
, &dev_attr_whole_disk
);
495 kobject_put(p
->holder_dir
);
502 static bool partition_overlaps(struct gendisk
*disk
, sector_t start
,
503 sector_t length
, int skip_partno
)
505 struct disk_part_iter piter
;
506 struct hd_struct
*part
;
507 bool overlap
= false;
509 disk_part_iter_init(&piter
, disk
, DISK_PITER_INCL_EMPTY
);
510 while ((part
= disk_part_iter_next(&piter
))) {
511 if (part
->partno
== skip_partno
||
512 start
>= part
->start_sect
+ part
->nr_sects
||
513 start
+ length
<= part
->start_sect
)
519 disk_part_iter_exit(&piter
);
523 int bdev_add_partition(struct block_device
*bdev
, int partno
,
524 sector_t start
, sector_t length
)
526 struct hd_struct
*part
;
528 mutex_lock(&bdev
->bd_mutex
);
529 if (partition_overlaps(bdev
->bd_disk
, start
, length
, -1)) {
530 mutex_unlock(&bdev
->bd_mutex
);
534 part
= add_partition(bdev
->bd_disk
, partno
, start
, length
,
535 ADDPART_FLAG_NONE
, NULL
);
536 mutex_unlock(&bdev
->bd_mutex
);
537 return PTR_ERR_OR_ZERO(part
);
540 int bdev_del_partition(struct block_device
*bdev
, int partno
)
542 struct block_device
*bdevp
;
543 struct hd_struct
*part
= NULL
;
546 bdevp
= bdget_disk(bdev
->bd_disk
, partno
);
550 mutex_lock(&bdevp
->bd_mutex
);
551 mutex_lock_nested(&bdev
->bd_mutex
, 1);
554 part
= disk_get_part(bdev
->bd_disk
, partno
);
559 if (bdevp
->bd_openers
)
562 sync_blockdev(bdevp
);
563 invalidate_bdev(bdevp
);
565 delete_partition(part
);
568 mutex_unlock(&bdev
->bd_mutex
);
569 mutex_unlock(&bdevp
->bd_mutex
);
576 int bdev_resize_partition(struct block_device
*bdev
, int partno
,
577 sector_t start
, sector_t length
)
579 struct block_device
*bdevp
;
580 struct hd_struct
*part
;
583 part
= disk_get_part(bdev
->bd_disk
, partno
);
588 bdevp
= bdget_part(part
);
592 mutex_lock(&bdevp
->bd_mutex
);
593 mutex_lock_nested(&bdev
->bd_mutex
, 1);
596 if (start
!= part
->start_sect
)
600 if (partition_overlaps(bdev
->bd_disk
, start
, length
, partno
))
603 part_nr_sects_write(part
, length
);
604 bd_set_nr_sectors(bdevp
, length
);
608 mutex_unlock(&bdevp
->bd_mutex
);
609 mutex_unlock(&bdev
->bd_mutex
);
616 static bool disk_unlock_native_capacity(struct gendisk
*disk
)
618 const struct block_device_operations
*bdops
= disk
->fops
;
620 if (bdops
->unlock_native_capacity
&&
621 !(disk
->flags
& GENHD_FL_NATIVE_CAPACITY
)) {
622 printk(KERN_CONT
"enabling native capacity\n");
623 bdops
->unlock_native_capacity(disk
);
624 disk
->flags
|= GENHD_FL_NATIVE_CAPACITY
;
627 printk(KERN_CONT
"truncated\n");
632 int blk_drop_partitions(struct block_device
*bdev
)
634 struct disk_part_iter piter
;
635 struct hd_struct
*part
;
637 if (bdev
->bd_part_count
)
641 invalidate_bdev(bdev
);
643 disk_part_iter_init(&piter
, bdev
->bd_disk
, DISK_PITER_INCL_EMPTY
);
644 while ((part
= disk_part_iter_next(&piter
)))
645 delete_partition(part
);
646 disk_part_iter_exit(&piter
);
651 /* for historic reasons in the DASD driver */
652 EXPORT_SYMBOL_GPL(blk_drop_partitions
);
655 static bool blk_add_partition(struct gendisk
*disk
, struct block_device
*bdev
,
656 struct parsed_partitions
*state
, int p
)
658 sector_t size
= state
->parts
[p
].size
;
659 sector_t from
= state
->parts
[p
].from
;
660 struct hd_struct
*part
;
665 if (from
>= get_capacity(disk
)) {
667 "%s: p%d start %llu is beyond EOD, ",
668 disk
->disk_name
, p
, (unsigned long long) from
);
669 if (disk_unlock_native_capacity(disk
))
674 if (from
+ size
> get_capacity(disk
)) {
676 "%s: p%d size %llu extends beyond EOD, ",
677 disk
->disk_name
, p
, (unsigned long long) size
);
679 if (disk_unlock_native_capacity(disk
))
683 * We can not ignore partitions of broken tables created by for
684 * example camera firmware, but we limit them to the end of the
685 * disk to avoid creating invalid block devices.
687 size
= get_capacity(disk
) - from
;
690 part
= add_partition(disk
, p
, from
, size
, state
->parts
[p
].flags
,
691 &state
->parts
[p
].info
);
692 if (IS_ERR(part
) && PTR_ERR(part
) != -ENXIO
) {
693 printk(KERN_ERR
" %s: p%d could not be added: %ld\n",
694 disk
->disk_name
, p
, -PTR_ERR(part
));
698 if (IS_BUILTIN(CONFIG_BLK_DEV_MD
) &&
699 (state
->parts
[p
].flags
& ADDPART_FLAG_RAID
))
700 md_autodetect_dev(part_to_dev(part
)->devt
);
705 int blk_add_partitions(struct gendisk
*disk
, struct block_device
*bdev
)
707 struct parsed_partitions
*state
;
708 int ret
= -EAGAIN
, p
, highest
;
710 if (!disk_part_scan_enabled(disk
))
713 state
= check_partition(disk
, bdev
);
718 * I/O error reading the partition table. If we tried to read
719 * beyond EOD, retry after unlocking the native capacity.
721 if (PTR_ERR(state
) == -ENOSPC
) {
722 printk(KERN_WARNING
"%s: partition table beyond EOD, ",
724 if (disk_unlock_native_capacity(disk
))
731 * Partitions are not supported on host managed zoned block devices.
733 if (disk
->queue
->limits
.zoned
== BLK_ZONED_HM
) {
734 pr_warn("%s: ignoring partition table on host managed zoned block device\n",
741 * If we read beyond EOD, try unlocking native capacity even if the
742 * partition table was successfully read as we could be missing some
745 if (state
->access_beyond_eod
) {
747 "%s: partition table partially beyond EOD, ",
749 if (disk_unlock_native_capacity(disk
))
753 /* tell userspace that the media / partition table may have changed */
754 kobject_uevent(&disk_to_dev(disk
)->kobj
, KOBJ_CHANGE
);
757 * Detect the highest partition number and preallocate disk->part_tbl.
758 * This is an optimization and not strictly necessary.
760 for (p
= 1, highest
= 0; p
< state
->limit
; p
++)
761 if (state
->parts
[p
].size
)
763 disk_expand_part_tbl(disk
, highest
);
765 for (p
= 1; p
< state
->limit
; p
++)
766 if (!blk_add_partition(disk
, bdev
, state
, p
))
771 free_partitions(state
);
775 void *read_part_sector(struct parsed_partitions
*state
, sector_t n
, Sector
*p
)
777 struct address_space
*mapping
= state
->bdev
->bd_inode
->i_mapping
;
780 if (n
>= get_capacity(state
->bdev
->bd_disk
)) {
781 state
->access_beyond_eod
= true;
785 page
= read_mapping_page(mapping
,
786 (pgoff_t
)(n
>> (PAGE_SHIFT
- 9)), NULL
);
793 return (unsigned char *)page_address(page
) +
794 ((n
& ((1 << (PAGE_SHIFT
- 9)) - 1)) << SECTOR_SHIFT
);