]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
94ea4158 AV |
2 | /* |
3 | * Code extracted from drivers/block/genhd.c | |
4 | * Copyright (C) 1991-1998 Linus Torvalds | |
5 | * Re-organised Feb 1998 Russell King | |
6 | * | |
7 | * We now have independent partition support from the | |
8 | * block drivers, which allows all the partition code to | |
9 | * be grouped in one location, and it to be mostly self | |
10 | * contained. | |
11 | */ | |
12 | ||
13 | #include <linux/init.h> | |
14 | #include <linux/module.h> | |
15 | #include <linux/fs.h> | |
16 | #include <linux/slab.h> | |
17 | #include <linux/kmod.h> | |
18 | #include <linux/ctype.h> | |
19 | #include <linux/genhd.h> | |
20 | #include <linux/blktrace_api.h> | |
3ad5cee5 | 21 | #include "blk.h" |
94ea4158 AV |
22 | |
23 | #include "partitions/check.h" | |
24 | ||
25 | #ifdef CONFIG_BLK_DEV_MD | |
26 | extern void md_autodetect_dev(dev_t dev); | |
27 | #endif | |
28 | ||
94ea4158 AV |
29 | static ssize_t part_partition_show(struct device *dev, |
30 | struct device_attribute *attr, char *buf) | |
31 | { | |
32 | struct hd_struct *p = dev_to_part(dev); | |
33 | ||
34 | return sprintf(buf, "%d\n", p->partno); | |
35 | } | |
36 | ||
37 | static ssize_t part_start_show(struct device *dev, | |
38 | struct device_attribute *attr, char *buf) | |
39 | { | |
40 | struct hd_struct *p = dev_to_part(dev); | |
41 | ||
42 | return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); | |
43 | } | |
44 | ||
94ea4158 AV |
45 | static ssize_t part_ro_show(struct device *dev, |
46 | struct device_attribute *attr, char *buf) | |
47 | { | |
48 | struct hd_struct *p = dev_to_part(dev); | |
49 | return sprintf(buf, "%d\n", p->policy ? 1 : 0); | |
50 | } | |
51 | ||
52 | static ssize_t part_alignment_offset_show(struct device *dev, | |
53 | struct device_attribute *attr, char *buf) | |
54 | { | |
55 | struct hd_struct *p = dev_to_part(dev); | |
56 | return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset); | |
57 | } | |
58 | ||
59 | static ssize_t part_discard_alignment_show(struct device *dev, | |
60 | struct device_attribute *attr, char *buf) | |
61 | { | |
62 | struct hd_struct *p = dev_to_part(dev); | |
63 | return sprintf(buf, "%u\n", p->discard_alignment); | |
64 | } | |
65 | ||
5657a819 JP |
66 | static DEVICE_ATTR(partition, 0444, part_partition_show, NULL); |
67 | static DEVICE_ATTR(start, 0444, part_start_show, NULL); | |
68 | static DEVICE_ATTR(size, 0444, part_size_show, NULL); | |
69 | static DEVICE_ATTR(ro, 0444, part_ro_show, NULL); | |
70 | static DEVICE_ATTR(alignment_offset, 0444, part_alignment_offset_show, NULL); | |
71 | static DEVICE_ATTR(discard_alignment, 0444, part_discard_alignment_show, NULL); | |
72 | static DEVICE_ATTR(stat, 0444, part_stat_show, NULL); | |
73 | static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL); | |
94ea4158 AV |
74 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
75 | static struct device_attribute dev_attr_fail = | |
5657a819 | 76 | __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store); |
94ea4158 AV |
77 | #endif |
78 | ||
79 | static struct attribute *part_attrs[] = { | |
80 | &dev_attr_partition.attr, | |
81 | &dev_attr_start.attr, | |
82 | &dev_attr_size.attr, | |
83 | &dev_attr_ro.attr, | |
84 | &dev_attr_alignment_offset.attr, | |
85 | &dev_attr_discard_alignment.attr, | |
86 | &dev_attr_stat.attr, | |
87 | &dev_attr_inflight.attr, | |
88 | #ifdef CONFIG_FAIL_MAKE_REQUEST | |
89 | &dev_attr_fail.attr, | |
90 | #endif | |
91 | NULL | |
92 | }; | |
93 | ||
94 | static struct attribute_group part_attr_group = { | |
95 | .attrs = part_attrs, | |
96 | }; | |
97 | ||
98 | static const struct attribute_group *part_attr_groups[] = { | |
99 | &part_attr_group, | |
100 | #ifdef CONFIG_BLK_DEV_IO_TRACE | |
101 | &blk_trace_attr_group, | |
102 | #endif | |
103 | NULL | |
104 | }; | |
105 | ||
106 | static void part_release(struct device *dev) | |
107 | { | |
108 | struct hd_struct *p = dev_to_part(dev); | |
2da78092 | 109 | blk_free_devt(dev->devt); |
b54e5ed8 | 110 | hd_free_part(p); |
94ea4158 AV |
111 | kfree(p); |
112 | } | |
113 | ||
0d9c51a6 SM |
114 | static int part_uevent(struct device *dev, struct kobj_uevent_env *env) |
115 | { | |
116 | struct hd_struct *part = dev_to_part(dev); | |
117 | ||
118 | add_uevent_var(env, "PARTN=%u", part->partno); | |
119 | if (part->info && part->info->volname[0]) | |
120 | add_uevent_var(env, "PARTNAME=%s", part->info->volname); | |
121 | return 0; | |
122 | } | |
123 | ||
94ea4158 AV |
124 | struct device_type part_type = { |
125 | .name = "partition", | |
126 | .groups = part_attr_groups, | |
127 | .release = part_release, | |
0d9c51a6 | 128 | .uevent = part_uevent, |
94ea4158 AV |
129 | }; |
130 | ||
94a2c3a3 | 131 | static void delete_partition_work_fn(struct work_struct *work) |
94ea4158 | 132 | { |
94a2c3a3 YY |
133 | struct hd_struct *part = container_of(to_rcu_work(work), struct hd_struct, |
134 | rcu_work); | |
94ea4158 AV |
135 | |
136 | part->start_sect = 0; | |
137 | part->nr_sects = 0; | |
138 | part_stat_set_all(part, 0); | |
139 | put_device(part_to_dev(part)); | |
140 | } | |
141 | ||
6c71013e | 142 | void __delete_partition(struct percpu_ref *ref) |
94ea4158 | 143 | { |
6c71013e | 144 | struct hd_struct *part = container_of(ref, struct hd_struct, ref); |
94a2c3a3 YY |
145 | INIT_RCU_WORK(&part->rcu_work, delete_partition_work_fn); |
146 | queue_rcu_work(system_wq, &part->rcu_work); | |
94ea4158 AV |
147 | } |
148 | ||
6d2cf6f2 BVA |
149 | /* |
150 | * Must be called either with bd_mutex held, before a disk can be opened or | |
151 | * after all disk users are gone. | |
152 | */ | |
94ea4158 AV |
153 | void delete_partition(struct gendisk *disk, int partno) |
154 | { | |
6d2cf6f2 BVA |
155 | struct disk_part_tbl *ptbl = |
156 | rcu_dereference_protected(disk->part_tbl, 1); | |
94ea4158 AV |
157 | struct hd_struct *part; |
158 | ||
159 | if (partno >= ptbl->len) | |
160 | return; | |
161 | ||
6d2cf6f2 | 162 | part = rcu_dereference_protected(ptbl->part[partno], 1); |
94ea4158 AV |
163 | if (!part) |
164 | return; | |
165 | ||
94ea4158 AV |
166 | rcu_assign_pointer(ptbl->part[partno], NULL); |
167 | rcu_assign_pointer(ptbl->last_lookup, NULL); | |
168 | kobject_put(part->holder_dir); | |
169 | device_del(part_to_dev(part)); | |
170 | ||
6fcc44d1 YY |
171 | /* |
172 | * Remove gendisk pointer from idr so that it cannot be looked up | |
173 | * while RCU period before freeing gendisk is running to prevent | |
174 | * use-after-free issues. Note that the device number stays | |
175 | * "in-use" until we really free the gendisk. | |
176 | */ | |
177 | blk_invalidate_devt(part_devt(part)); | |
6c71013e | 178 | hd_struct_kill(part); |
94ea4158 AV |
179 | } |
180 | ||
181 | static ssize_t whole_disk_show(struct device *dev, | |
182 | struct device_attribute *attr, char *buf) | |
183 | { | |
184 | return 0; | |
185 | } | |
5657a819 | 186 | static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL); |
94ea4158 | 187 | |
6d2cf6f2 BVA |
188 | /* |
189 | * Must be called either with bd_mutex held, before a disk can be opened or | |
190 | * after all disk users are gone. | |
191 | */ | |
94ea4158 AV |
192 | struct hd_struct *add_partition(struct gendisk *disk, int partno, |
193 | sector_t start, sector_t len, int flags, | |
194 | struct partition_meta_info *info) | |
195 | { | |
196 | struct hd_struct *p; | |
197 | dev_t devt = MKDEV(0, 0); | |
198 | struct device *ddev = disk_to_dev(disk); | |
199 | struct device *pdev; | |
200 | struct disk_part_tbl *ptbl; | |
201 | const char *dname; | |
202 | int err; | |
203 | ||
b7205307 CH |
204 | /* |
205 | * Partitions are not supported on zoned block devices that are used as | |
206 | * such. | |
207 | */ | |
208 | switch (disk->queue->limits.zoned) { | |
209 | case BLK_ZONED_HM: | |
210 | pr_warn("%s: partitions not supported on host managed zoned block device\n", | |
211 | disk->disk_name); | |
212 | return ERR_PTR(-ENXIO); | |
213 | case BLK_ZONED_HA: | |
214 | pr_info("%s: disabling host aware zoned block device support due to partitions\n", | |
215 | disk->disk_name); | |
216 | disk->queue->limits.zoned = BLK_ZONED_NONE; | |
217 | break; | |
218 | case BLK_ZONED_NONE: | |
219 | break; | |
220 | } | |
221 | ||
94ea4158 AV |
222 | err = disk_expand_part_tbl(disk, partno); |
223 | if (err) | |
224 | return ERR_PTR(err); | |
6d2cf6f2 | 225 | ptbl = rcu_dereference_protected(disk->part_tbl, 1); |
94ea4158 AV |
226 | |
227 | if (ptbl->part[partno]) | |
228 | return ERR_PTR(-EBUSY); | |
229 | ||
230 | p = kzalloc(sizeof(*p), GFP_KERNEL); | |
231 | if (!p) | |
232 | return ERR_PTR(-EBUSY); | |
233 | ||
234 | if (!init_part_stats(p)) { | |
235 | err = -ENOMEM; | |
236 | goto out_free; | |
237 | } | |
c83f6bf9 VG |
238 | |
239 | seqcount_init(&p->nr_sects_seq); | |
94ea4158 AV |
240 | pdev = part_to_dev(p); |
241 | ||
242 | p->start_sect = start; | |
243 | p->alignment_offset = | |
244 | queue_limit_alignment_offset(&disk->queue->limits, start); | |
245 | p->discard_alignment = | |
246 | queue_limit_discard_alignment(&disk->queue->limits, start); | |
247 | p->nr_sects = len; | |
248 | p->partno = partno; | |
249 | p->policy = get_disk_ro(disk); | |
250 | ||
251 | if (info) { | |
f17c21c1 CH |
252 | struct partition_meta_info *pinfo; |
253 | ||
254 | pinfo = kzalloc_node(sizeof(*pinfo), GFP_KERNEL, disk->node_id); | |
7bd897cf DC |
255 | if (!pinfo) { |
256 | err = -ENOMEM; | |
94ea4158 | 257 | goto out_free_stats; |
7bd897cf | 258 | } |
94ea4158 AV |
259 | memcpy(pinfo, info, sizeof(*info)); |
260 | p->info = pinfo; | |
261 | } | |
262 | ||
263 | dname = dev_name(ddev); | |
264 | if (isdigit(dname[strlen(dname) - 1])) | |
265 | dev_set_name(pdev, "%sp%d", dname, partno); | |
266 | else | |
267 | dev_set_name(pdev, "%s%d", dname, partno); | |
268 | ||
269 | device_initialize(pdev); | |
270 | pdev->class = &block_class; | |
271 | pdev->type = &part_type; | |
272 | pdev->parent = ddev; | |
273 | ||
274 | err = blk_alloc_devt(p, &devt); | |
275 | if (err) | |
276 | goto out_free_info; | |
277 | pdev->devt = devt; | |
278 | ||
279 | /* delay uevent until 'holders' subdir is created */ | |
280 | dev_set_uevent_suppress(pdev, 1); | |
281 | err = device_add(pdev); | |
282 | if (err) | |
283 | goto out_put; | |
284 | ||
285 | err = -ENOMEM; | |
286 | p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); | |
287 | if (!p->holder_dir) | |
288 | goto out_del; | |
289 | ||
290 | dev_set_uevent_suppress(pdev, 0); | |
291 | if (flags & ADDPART_FLAG_WHOLEDISK) { | |
292 | err = device_create_file(pdev, &dev_attr_whole_disk); | |
293 | if (err) | |
294 | goto out_del; | |
295 | } | |
296 | ||
b30a337c ML |
297 | err = hd_ref_init(p); |
298 | if (err) { | |
299 | if (flags & ADDPART_FLAG_WHOLEDISK) | |
300 | goto out_remove_file; | |
301 | goto out_del; | |
302 | } | |
303 | ||
94ea4158 AV |
304 | /* everything is up and running, commence */ |
305 | rcu_assign_pointer(ptbl->part[partno], p); | |
306 | ||
307 | /* suppress uevent if the disk suppresses it */ | |
308 | if (!dev_get_uevent_suppress(ddev)) | |
309 | kobject_uevent(&pdev->kobj, KOBJ_ADD); | |
b30a337c | 310 | return p; |
94ea4158 AV |
311 | |
312 | out_free_info: | |
f17c21c1 | 313 | kfree(p->info); |
94ea4158 AV |
314 | out_free_stats: |
315 | free_part_stats(p); | |
316 | out_free: | |
317 | kfree(p); | |
318 | return ERR_PTR(err); | |
b30a337c ML |
319 | out_remove_file: |
320 | device_remove_file(pdev, &dev_attr_whole_disk); | |
94ea4158 AV |
321 | out_del: |
322 | kobject_put(p->holder_dir); | |
323 | device_del(pdev); | |
324 | out_put: | |
325 | put_device(pdev); | |
94ea4158 AV |
326 | return ERR_PTR(err); |
327 | } | |
328 | ||
329 | static bool disk_unlock_native_capacity(struct gendisk *disk) | |
330 | { | |
331 | const struct block_device_operations *bdops = disk->fops; | |
332 | ||
333 | if (bdops->unlock_native_capacity && | |
334 | !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) { | |
335 | printk(KERN_CONT "enabling native capacity\n"); | |
336 | bdops->unlock_native_capacity(disk); | |
337 | disk->flags |= GENHD_FL_NATIVE_CAPACITY; | |
338 | return true; | |
339 | } else { | |
340 | printk(KERN_CONT "truncated\n"); | |
341 | return false; | |
342 | } | |
343 | } | |
344 | ||
a1548b67 | 345 | int blk_drop_partitions(struct gendisk *disk, struct block_device *bdev) |
94ea4158 | 346 | { |
94ea4158 AV |
347 | struct disk_part_iter piter; |
348 | struct hd_struct *part; | |
fe316bf2 | 349 | int res; |
94ea4158 | 350 | |
142fe8f4 CH |
351 | if (!disk_part_scan_enabled(disk)) |
352 | return 0; | |
77032ca6 | 353 | if (bdev->bd_part_count || bdev->bd_super) |
94ea4158 AV |
354 | return -EBUSY; |
355 | res = invalidate_partition(disk, 0); | |
356 | if (res) | |
357 | return res; | |
358 | ||
359 | disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); | |
360 | while ((part = disk_part_iter_next(&piter))) | |
361 | delete_partition(disk, part->partno); | |
362 | disk_part_iter_exit(&piter); | |
363 | ||
fe316bf2 JN |
364 | return 0; |
365 | } | |
366 | ||
f902b026 CH |
367 | static bool blk_add_partition(struct gendisk *disk, struct block_device *bdev, |
368 | struct parsed_partitions *state, int p) | |
fe316bf2 | 369 | { |
f902b026 CH |
370 | sector_t size = state->parts[p].size; |
371 | sector_t from = state->parts[p].from; | |
fe316bf2 | 372 | struct hd_struct *part; |
f902b026 CH |
373 | |
374 | if (!size) | |
375 | return true; | |
376 | ||
377 | if (from >= get_capacity(disk)) { | |
378 | printk(KERN_WARNING | |
379 | "%s: p%d start %llu is beyond EOD, ", | |
380 | disk->disk_name, p, (unsigned long long) from); | |
381 | if (disk_unlock_native_capacity(disk)) | |
382 | return false; | |
383 | return true; | |
fe316bf2 JN |
384 | } |
385 | ||
f902b026 CH |
386 | if (from + size > get_capacity(disk)) { |
387 | printk(KERN_WARNING | |
388 | "%s: p%d size %llu extends beyond EOD, ", | |
389 | disk->disk_name, p, (unsigned long long) size); | |
fe316bf2 | 390 | |
f902b026 CH |
391 | if (disk_unlock_native_capacity(disk)) |
392 | return false; | |
393 | ||
394 | /* | |
395 | * We can not ignore partitions of broken tables created by for | |
396 | * example camera firmware, but we limit them to the end of the | |
397 | * disk to avoid creating invalid block devices. | |
398 | */ | |
399 | size = get_capacity(disk) - from; | |
400 | } | |
401 | ||
402 | part = add_partition(disk, p, from, size, state->parts[p].flags, | |
403 | &state->parts[p].info); | |
b7205307 | 404 | if (IS_ERR(part) && PTR_ERR(part) != -ENXIO) { |
f902b026 CH |
405 | printk(KERN_ERR " %s: p%d could not be added: %ld\n", |
406 | disk->disk_name, p, -PTR_ERR(part)); | |
407 | return true; | |
408 | } | |
409 | ||
410 | #ifdef CONFIG_BLK_DEV_MD | |
411 | if (state->parts[p].flags & ADDPART_FLAG_RAID) | |
412 | md_autodetect_dev(part_to_dev(part)->devt); | |
413 | #endif | |
414 | return true; | |
415 | } | |
416 | ||
a1548b67 | 417 | int blk_add_partitions(struct gendisk *disk, struct block_device *bdev) |
f902b026 CH |
418 | { |
419 | struct parsed_partitions *state; | |
420 | int ret = -EAGAIN, p, highest; | |
fe316bf2 | 421 | |
142fe8f4 CH |
422 | if (!disk_part_scan_enabled(disk)) |
423 | return 0; | |
424 | ||
f902b026 CH |
425 | state = check_partition(disk, bdev); |
426 | if (!state) | |
94ea4158 AV |
427 | return 0; |
428 | if (IS_ERR(state)) { | |
429 | /* | |
f902b026 CH |
430 | * I/O error reading the partition table. If we tried to read |
431 | * beyond EOD, retry after unlocking the native capacity. | |
94ea4158 AV |
432 | */ |
433 | if (PTR_ERR(state) == -ENOSPC) { | |
434 | printk(KERN_WARNING "%s: partition table beyond EOD, ", | |
435 | disk->disk_name); | |
436 | if (disk_unlock_native_capacity(disk)) | |
f902b026 | 437 | return -EAGAIN; |
94ea4158 AV |
438 | } |
439 | return -EIO; | |
440 | } | |
5eac3eb3 | 441 | |
f902b026 | 442 | /* |
b7205307 | 443 | * Partitions are not supported on host managed zoned block devices. |
f902b026 | 444 | */ |
b7205307 CH |
445 | if (disk->queue->limits.zoned == BLK_ZONED_HM) { |
446 | pr_warn("%s: ignoring partition table on host managed zoned block device\n", | |
5eac3eb3 | 447 | disk->disk_name); |
f902b026 CH |
448 | ret = 0; |
449 | goto out_free_state; | |
5eac3eb3 DLM |
450 | } |
451 | ||
94ea4158 | 452 | /* |
f902b026 CH |
453 | * If we read beyond EOD, try unlocking native capacity even if the |
454 | * partition table was successfully read as we could be missing some | |
455 | * partitions. | |
94ea4158 AV |
456 | */ |
457 | if (state->access_beyond_eod) { | |
458 | printk(KERN_WARNING | |
459 | "%s: partition table partially beyond EOD, ", | |
460 | disk->disk_name); | |
461 | if (disk_unlock_native_capacity(disk)) | |
f902b026 | 462 | goto out_free_state; |
94ea4158 AV |
463 | } |
464 | ||
465 | /* tell userspace that the media / partition table may have changed */ | |
466 | kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); | |
467 | ||
f902b026 CH |
468 | /* |
469 | * Detect the highest partition number and preallocate disk->part_tbl. | |
470 | * This is an optimization and not strictly necessary. | |
94ea4158 AV |
471 | */ |
472 | for (p = 1, highest = 0; p < state->limit; p++) | |
473 | if (state->parts[p].size) | |
474 | highest = p; | |
94ea4158 AV |
475 | disk_expand_part_tbl(disk, highest); |
476 | ||
f902b026 CH |
477 | for (p = 1; p < state->limit; p++) |
478 | if (!blk_add_partition(disk, bdev, state, p)) | |
479 | goto out_free_state; | |
94ea4158 | 480 | |
f902b026 CH |
481 | ret = 0; |
482 | out_free_state: | |
ac2e5327 | 483 | free_partitions(state); |
f902b026 | 484 | return ret; |
fe316bf2 JN |
485 | } |
486 | ||
d1a5f2b4 DW |
487 | unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) |
488 | { | |
a41fe02b | 489 | struct address_space *mapping = bdev->bd_inode->i_mapping; |
94ea4158 AV |
490 | struct page *page; |
491 | ||
a41fe02b | 492 | page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_SHIFT-9)), NULL); |
94ea4158 AV |
493 | if (!IS_ERR(page)) { |
494 | if (PageError(page)) | |
495 | goto fail; | |
496 | p->v = page; | |
09cbfeaf | 497 | return (unsigned char *)page_address(page) + ((n & ((1 << (PAGE_SHIFT - 9)) - 1)) << 9); |
94ea4158 | 498 | fail: |
09cbfeaf | 499 | put_page(page); |
94ea4158 AV |
500 | } |
501 | p->v = NULL; | |
502 | return NULL; | |
503 | } | |
504 | ||
505 | EXPORT_SYMBOL(read_dev_sector); |