]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - fs/btrfs/volumes.c
Btrfs: Handle transid == 0 while opening devices
[mirror_ubuntu-zesty-kernel.git] / fs / btrfs / volumes.c
CommitLineData
0b86a832
CM
1/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18#include <linux/sched.h>
19#include <linux/bio.h>
8a4b83cc 20#include <linux/buffer_head.h>
f2d8d74d 21#include <linux/blkdev.h>
788f20eb 22#include <linux/random.h>
593060d7 23#include <asm/div64.h>
0b86a832
CM
24#include "ctree.h"
25#include "extent_map.h"
26#include "disk-io.h"
27#include "transaction.h"
28#include "print-tree.h"
29#include "volumes.h"
30
593060d7
CM
31struct map_lookup {
32 u64 type;
33 int io_align;
34 int io_width;
35 int stripe_len;
36 int sector_size;
37 int num_stripes;
321aecc6 38 int sub_stripes;
cea9e445 39 struct btrfs_bio_stripe stripes[];
593060d7
CM
40};
41
42#define map_lookup_size(n) (sizeof(struct map_lookup) + \
cea9e445 43 (sizeof(struct btrfs_bio_stripe) * (n)))
593060d7 44
8a4b83cc
CM
45static DEFINE_MUTEX(uuid_mutex);
46static LIST_HEAD(fs_uuids);
47
a061fc8d
CM
48void btrfs_lock_volumes(void)
49{
50 mutex_lock(&uuid_mutex);
51}
52
53void btrfs_unlock_volumes(void)
54{
55 mutex_unlock(&uuid_mutex);
56}
57
8a4b83cc
CM
58int btrfs_cleanup_fs_uuids(void)
59{
60 struct btrfs_fs_devices *fs_devices;
61 struct list_head *uuid_cur;
62 struct list_head *devices_cur;
63 struct btrfs_device *dev;
64
65 list_for_each(uuid_cur, &fs_uuids) {
66 fs_devices = list_entry(uuid_cur, struct btrfs_fs_devices,
67 list);
68 while(!list_empty(&fs_devices->devices)) {
69 devices_cur = fs_devices->devices.next;
70 dev = list_entry(devices_cur, struct btrfs_device,
71 dev_list);
8a4b83cc 72 if (dev->bdev) {
8a4b83cc 73 close_bdev_excl(dev->bdev);
a0af469b 74 fs_devices->open_devices--;
8a4b83cc
CM
75 }
76 list_del(&dev->dev_list);
dfe25020 77 kfree(dev->name);
8a4b83cc
CM
78 kfree(dev);
79 }
80 }
81 return 0;
82}
83
a443755f
CM
84static struct btrfs_device *__find_device(struct list_head *head, u64 devid,
85 u8 *uuid)
8a4b83cc
CM
86{
87 struct btrfs_device *dev;
88 struct list_head *cur;
89
90 list_for_each(cur, head) {
91 dev = list_entry(cur, struct btrfs_device, dev_list);
a443755f 92 if (dev->devid == devid &&
8f18cf13 93 (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
8a4b83cc 94 return dev;
a443755f 95 }
8a4b83cc
CM
96 }
97 return NULL;
98}
99
100static struct btrfs_fs_devices *find_fsid(u8 *fsid)
101{
102 struct list_head *cur;
103 struct btrfs_fs_devices *fs_devices;
104
105 list_for_each(cur, &fs_uuids) {
106 fs_devices = list_entry(cur, struct btrfs_fs_devices, list);
107 if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
108 return fs_devices;
109 }
110 return NULL;
111}
112
113static int device_list_add(const char *path,
114 struct btrfs_super_block *disk_super,
115 u64 devid, struct btrfs_fs_devices **fs_devices_ret)
116{
117 struct btrfs_device *device;
118 struct btrfs_fs_devices *fs_devices;
119 u64 found_transid = btrfs_super_generation(disk_super);
120
121 fs_devices = find_fsid(disk_super->fsid);
122 if (!fs_devices) {
123 fs_devices = kmalloc(sizeof(*fs_devices), GFP_NOFS);
124 if (!fs_devices)
125 return -ENOMEM;
126 INIT_LIST_HEAD(&fs_devices->devices);
b3075717 127 INIT_LIST_HEAD(&fs_devices->alloc_list);
8a4b83cc
CM
128 list_add(&fs_devices->list, &fs_uuids);
129 memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
130 fs_devices->latest_devid = devid;
131 fs_devices->latest_trans = found_transid;
8a4b83cc
CM
132 fs_devices->num_devices = 0;
133 device = NULL;
134 } else {
a443755f
CM
135 device = __find_device(&fs_devices->devices, devid,
136 disk_super->dev_item.uuid);
8a4b83cc
CM
137 }
138 if (!device) {
139 device = kzalloc(sizeof(*device), GFP_NOFS);
140 if (!device) {
141 /* we can safely leave the fs_devices entry around */
142 return -ENOMEM;
143 }
144 device->devid = devid;
a443755f
CM
145 memcpy(device->uuid, disk_super->dev_item.uuid,
146 BTRFS_UUID_SIZE);
f2984462 147 device->barriers = 1;
b248a415 148 spin_lock_init(&device->io_lock);
8a4b83cc
CM
149 device->name = kstrdup(path, GFP_NOFS);
150 if (!device->name) {
151 kfree(device);
152 return -ENOMEM;
153 }
154 list_add(&device->dev_list, &fs_devices->devices);
b3075717 155 list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
8a4b83cc
CM
156 fs_devices->num_devices++;
157 }
158
159 if (found_transid > fs_devices->latest_trans) {
160 fs_devices->latest_devid = devid;
161 fs_devices->latest_trans = found_transid;
162 }
8a4b83cc
CM
163 *fs_devices_ret = fs_devices;
164 return 0;
165}
166
dfe25020
CM
167int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
168{
169 struct list_head *head = &fs_devices->devices;
170 struct list_head *cur;
171 struct btrfs_device *device;
172
173 mutex_lock(&uuid_mutex);
174again:
175 list_for_each(cur, head) {
176 device = list_entry(cur, struct btrfs_device, dev_list);
177 if (!device->in_fs_metadata) {
a0af469b 178 if (device->bdev) {
dfe25020 179 close_bdev_excl(device->bdev);
a0af469b
CM
180 fs_devices->open_devices--;
181 }
dfe25020
CM
182 list_del(&device->dev_list);
183 list_del(&device->dev_alloc_list);
184 fs_devices->num_devices--;
185 kfree(device->name);
186 kfree(device);
187 goto again;
188 }
189 }
190 mutex_unlock(&uuid_mutex);
191 return 0;
192}
a0af469b 193
8a4b83cc
CM
194int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
195{
196 struct list_head *head = &fs_devices->devices;
197 struct list_head *cur;
198 struct btrfs_device *device;
199
200 mutex_lock(&uuid_mutex);
201 list_for_each(cur, head) {
202 device = list_entry(cur, struct btrfs_device, dev_list);
203 if (device->bdev) {
204 close_bdev_excl(device->bdev);
a0af469b 205 fs_devices->open_devices--;
8a4b83cc
CM
206 }
207 device->bdev = NULL;
dfe25020 208 device->in_fs_metadata = 0;
8a4b83cc 209 }
a0af469b 210 fs_devices->mounted = 0;
8a4b83cc
CM
211 mutex_unlock(&uuid_mutex);
212 return 0;
213}
214
215int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
216 int flags, void *holder)
217{
218 struct block_device *bdev;
219 struct list_head *head = &fs_devices->devices;
220 struct list_head *cur;
221 struct btrfs_device *device;
a0af469b
CM
222 struct block_device *latest_bdev = NULL;
223 struct buffer_head *bh;
224 struct btrfs_super_block *disk_super;
225 u64 latest_devid = 0;
226 u64 latest_transid = 0;
227 u64 transid;
228 u64 devid;
229 int ret = 0;
8a4b83cc
CM
230
231 mutex_lock(&uuid_mutex);
a0af469b
CM
232 if (fs_devices->mounted)
233 goto out;
234
8a4b83cc
CM
235 list_for_each(cur, head) {
236 device = list_entry(cur, struct btrfs_device, dev_list);
c1c4d91c
CM
237 if (device->bdev)
238 continue;
239
dfe25020
CM
240 if (!device->name)
241 continue;
242
8a4b83cc 243 bdev = open_bdev_excl(device->name, flags, holder);
e17cade2 244
8a4b83cc
CM
245 if (IS_ERR(bdev)) {
246 printk("open %s failed\n", device->name);
a0af469b 247 goto error;
8a4b83cc 248 }
a061fc8d 249 set_blocksize(bdev, 4096);
a0af469b
CM
250
251 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
252 if (!bh)
253 goto error_close;
254
255 disk_super = (struct btrfs_super_block *)bh->b_data;
256 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
257 sizeof(disk_super->magic)))
258 goto error_brelse;
259
260 devid = le64_to_cpu(disk_super->dev_item.devid);
261 if (devid != device->devid)
262 goto error_brelse;
263
264 transid = btrfs_super_generation(disk_super);
6af5ac3c 265 if (!latest_transid || transid > latest_transid) {
a0af469b
CM
266 latest_devid = devid;
267 latest_transid = transid;
268 latest_bdev = bdev;
269 }
270
8a4b83cc 271 device->bdev = bdev;
dfe25020 272 device->in_fs_metadata = 0;
a0af469b
CM
273 fs_devices->open_devices++;
274 continue;
a061fc8d 275
a0af469b
CM
276error_brelse:
277 brelse(bh);
278error_close:
279 close_bdev_excl(bdev);
280error:
281 continue;
8a4b83cc 282 }
a0af469b
CM
283 if (fs_devices->open_devices == 0) {
284 ret = -EIO;
285 goto out;
286 }
287 fs_devices->mounted = 1;
288 fs_devices->latest_bdev = latest_bdev;
289 fs_devices->latest_devid = latest_devid;
290 fs_devices->latest_trans = latest_transid;
291out:
8a4b83cc 292 mutex_unlock(&uuid_mutex);
8a4b83cc
CM
293 return ret;
294}
295
296int btrfs_scan_one_device(const char *path, int flags, void *holder,
297 struct btrfs_fs_devices **fs_devices_ret)
298{
299 struct btrfs_super_block *disk_super;
300 struct block_device *bdev;
301 struct buffer_head *bh;
302 int ret;
303 u64 devid;
f2984462 304 u64 transid;
8a4b83cc
CM
305
306 mutex_lock(&uuid_mutex);
307
8a4b83cc
CM
308 bdev = open_bdev_excl(path, flags, holder);
309
310 if (IS_ERR(bdev)) {
8a4b83cc
CM
311 ret = PTR_ERR(bdev);
312 goto error;
313 }
314
315 ret = set_blocksize(bdev, 4096);
316 if (ret)
317 goto error_close;
318 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
319 if (!bh) {
320 ret = -EIO;
321 goto error_close;
322 }
323 disk_super = (struct btrfs_super_block *)bh->b_data;
324 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
325 sizeof(disk_super->magic))) {
e58ca020 326 ret = -EINVAL;
8a4b83cc
CM
327 goto error_brelse;
328 }
329 devid = le64_to_cpu(disk_super->dev_item.devid);
f2984462 330 transid = btrfs_super_generation(disk_super);
7ae9c09d
CM
331 if (disk_super->label[0])
332 printk("device label %s ", disk_super->label);
333 else {
334 /* FIXME, make a readl uuid parser */
335 printk("device fsid %llx-%llx ",
336 *(unsigned long long *)disk_super->fsid,
337 *(unsigned long long *)(disk_super->fsid + 8));
338 }
339 printk("devid %Lu transid %Lu %s\n", devid, transid, path);
8a4b83cc
CM
340 ret = device_list_add(path, disk_super, devid, fs_devices_ret);
341
342error_brelse:
343 brelse(bh);
344error_close:
345 close_bdev_excl(bdev);
8a4b83cc
CM
346error:
347 mutex_unlock(&uuid_mutex);
348 return ret;
349}
0b86a832
CM
350
351/*
352 * this uses a pretty simple search, the expectation is that it is
353 * called very infrequently and that a given device has a small number
354 * of extents
355 */
356static int find_free_dev_extent(struct btrfs_trans_handle *trans,
357 struct btrfs_device *device,
358 struct btrfs_path *path,
359 u64 num_bytes, u64 *start)
360{
361 struct btrfs_key key;
362 struct btrfs_root *root = device->dev_root;
363 struct btrfs_dev_extent *dev_extent = NULL;
364 u64 hole_size = 0;
365 u64 last_byte = 0;
366 u64 search_start = 0;
367 u64 search_end = device->total_bytes;
368 int ret;
369 int slot = 0;
370 int start_found;
371 struct extent_buffer *l;
372
373 start_found = 0;
374 path->reada = 2;
375
376 /* FIXME use last free of some kind */
377
8a4b83cc
CM
378 /* we don't want to overwrite the superblock on the drive,
379 * so we make sure to start at an offset of at least 1MB
380 */
381 search_start = max((u64)1024 * 1024, search_start);
8f18cf13
CM
382
383 if (root->fs_info->alloc_start + num_bytes <= device->total_bytes)
384 search_start = max(root->fs_info->alloc_start, search_start);
385
0b86a832
CM
386 key.objectid = device->devid;
387 key.offset = search_start;
388 key.type = BTRFS_DEV_EXTENT_KEY;
389 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
390 if (ret < 0)
391 goto error;
392 ret = btrfs_previous_item(root, path, 0, key.type);
393 if (ret < 0)
394 goto error;
395 l = path->nodes[0];
396 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
397 while (1) {
398 l = path->nodes[0];
399 slot = path->slots[0];
400 if (slot >= btrfs_header_nritems(l)) {
401 ret = btrfs_next_leaf(root, path);
402 if (ret == 0)
403 continue;
404 if (ret < 0)
405 goto error;
406no_more_items:
407 if (!start_found) {
408 if (search_start >= search_end) {
409 ret = -ENOSPC;
410 goto error;
411 }
412 *start = search_start;
413 start_found = 1;
414 goto check_pending;
415 }
416 *start = last_byte > search_start ?
417 last_byte : search_start;
418 if (search_end <= *start) {
419 ret = -ENOSPC;
420 goto error;
421 }
422 goto check_pending;
423 }
424 btrfs_item_key_to_cpu(l, &key, slot);
425
426 if (key.objectid < device->devid)
427 goto next;
428
429 if (key.objectid > device->devid)
430 goto no_more_items;
431
432 if (key.offset >= search_start && key.offset > last_byte &&
433 start_found) {
434 if (last_byte < search_start)
435 last_byte = search_start;
436 hole_size = key.offset - last_byte;
437 if (key.offset > last_byte &&
438 hole_size >= num_bytes) {
439 *start = last_byte;
440 goto check_pending;
441 }
442 }
443 if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) {
444 goto next;
445 }
446
447 start_found = 1;
448 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
449 last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
450next:
451 path->slots[0]++;
452 cond_resched();
453 }
454check_pending:
455 /* we have to make sure we didn't find an extent that has already
456 * been allocated by the map tree or the original allocation
457 */
458 btrfs_release_path(root, path);
459 BUG_ON(*start < search_start);
460
6324fbf3 461 if (*start + num_bytes > search_end) {
0b86a832
CM
462 ret = -ENOSPC;
463 goto error;
464 }
465 /* check for pending inserts here */
466 return 0;
467
468error:
469 btrfs_release_path(root, path);
470 return ret;
471}
472
8f18cf13
CM
473int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
474 struct btrfs_device *device,
475 u64 start)
476{
477 int ret;
478 struct btrfs_path *path;
479 struct btrfs_root *root = device->dev_root;
480 struct btrfs_key key;
a061fc8d
CM
481 struct btrfs_key found_key;
482 struct extent_buffer *leaf = NULL;
483 struct btrfs_dev_extent *extent = NULL;
8f18cf13
CM
484
485 path = btrfs_alloc_path();
486 if (!path)
487 return -ENOMEM;
488
489 key.objectid = device->devid;
490 key.offset = start;
491 key.type = BTRFS_DEV_EXTENT_KEY;
492
493 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
a061fc8d
CM
494 if (ret > 0) {
495 ret = btrfs_previous_item(root, path, key.objectid,
496 BTRFS_DEV_EXTENT_KEY);
497 BUG_ON(ret);
498 leaf = path->nodes[0];
499 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
500 extent = btrfs_item_ptr(leaf, path->slots[0],
501 struct btrfs_dev_extent);
502 BUG_ON(found_key.offset > start || found_key.offset +
503 btrfs_dev_extent_length(leaf, extent) < start);
504 ret = 0;
505 } else if (ret == 0) {
506 leaf = path->nodes[0];
507 extent = btrfs_item_ptr(leaf, path->slots[0],
508 struct btrfs_dev_extent);
509 }
8f18cf13
CM
510 BUG_ON(ret);
511
dfe25020
CM
512 if (device->bytes_used > 0)
513 device->bytes_used -= btrfs_dev_extent_length(leaf, extent);
8f18cf13
CM
514 ret = btrfs_del_item(trans, root, path);
515 BUG_ON(ret);
516
517 btrfs_free_path(path);
518 return ret;
519}
520
0b86a832
CM
521int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
522 struct btrfs_device *device,
e17cade2
CM
523 u64 chunk_tree, u64 chunk_objectid,
524 u64 chunk_offset,
525 u64 num_bytes, u64 *start)
0b86a832
CM
526{
527 int ret;
528 struct btrfs_path *path;
529 struct btrfs_root *root = device->dev_root;
530 struct btrfs_dev_extent *extent;
531 struct extent_buffer *leaf;
532 struct btrfs_key key;
533
dfe25020 534 WARN_ON(!device->in_fs_metadata);
0b86a832
CM
535 path = btrfs_alloc_path();
536 if (!path)
537 return -ENOMEM;
538
539 ret = find_free_dev_extent(trans, device, path, num_bytes, start);
6324fbf3 540 if (ret) {
0b86a832 541 goto err;
6324fbf3 542 }
0b86a832
CM
543
544 key.objectid = device->devid;
545 key.offset = *start;
546 key.type = BTRFS_DEV_EXTENT_KEY;
547 ret = btrfs_insert_empty_item(trans, root, path, &key,
548 sizeof(*extent));
549 BUG_ON(ret);
550
551 leaf = path->nodes[0];
552 extent = btrfs_item_ptr(leaf, path->slots[0],
553 struct btrfs_dev_extent);
e17cade2
CM
554 btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree);
555 btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid);
556 btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
557
558 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
559 (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
560 BTRFS_UUID_SIZE);
561
0b86a832
CM
562 btrfs_set_dev_extent_length(leaf, extent, num_bytes);
563 btrfs_mark_buffer_dirty(leaf);
564err:
565 btrfs_free_path(path);
566 return ret;
567}
568
e17cade2 569static int find_next_chunk(struct btrfs_root *root, u64 objectid, u64 *offset)
0b86a832
CM
570{
571 struct btrfs_path *path;
572 int ret;
573 struct btrfs_key key;
e17cade2 574 struct btrfs_chunk *chunk;
0b86a832
CM
575 struct btrfs_key found_key;
576
577 path = btrfs_alloc_path();
578 BUG_ON(!path);
579
e17cade2 580 key.objectid = objectid;
0b86a832
CM
581 key.offset = (u64)-1;
582 key.type = BTRFS_CHUNK_ITEM_KEY;
583
584 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
585 if (ret < 0)
586 goto error;
587
588 BUG_ON(ret == 0);
589
590 ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
591 if (ret) {
e17cade2 592 *offset = 0;
0b86a832
CM
593 } else {
594 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
595 path->slots[0]);
e17cade2
CM
596 if (found_key.objectid != objectid)
597 *offset = 0;
598 else {
599 chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
600 struct btrfs_chunk);
601 *offset = found_key.offset +
602 btrfs_chunk_length(path->nodes[0], chunk);
603 }
0b86a832
CM
604 }
605 ret = 0;
606error:
607 btrfs_free_path(path);
608 return ret;
609}
610
0b86a832
CM
611static int find_next_devid(struct btrfs_root *root, struct btrfs_path *path,
612 u64 *objectid)
613{
614 int ret;
615 struct btrfs_key key;
616 struct btrfs_key found_key;
617
618 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
619 key.type = BTRFS_DEV_ITEM_KEY;
620 key.offset = (u64)-1;
621
622 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
623 if (ret < 0)
624 goto error;
625
626 BUG_ON(ret == 0);
627
628 ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
629 BTRFS_DEV_ITEM_KEY);
630 if (ret) {
631 *objectid = 1;
632 } else {
633 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
634 path->slots[0]);
635 *objectid = found_key.offset + 1;
636 }
637 ret = 0;
638error:
639 btrfs_release_path(root, path);
640 return ret;
641}
642
643/*
644 * the device information is stored in the chunk root
645 * the btrfs_device struct should be fully filled in
646 */
647int btrfs_add_device(struct btrfs_trans_handle *trans,
648 struct btrfs_root *root,
649 struct btrfs_device *device)
650{
651 int ret;
652 struct btrfs_path *path;
653 struct btrfs_dev_item *dev_item;
654 struct extent_buffer *leaf;
655 struct btrfs_key key;
656 unsigned long ptr;
006a58a2 657 u64 free_devid = 0;
0b86a832
CM
658
659 root = root->fs_info->chunk_root;
660
661 path = btrfs_alloc_path();
662 if (!path)
663 return -ENOMEM;
664
665 ret = find_next_devid(root, path, &free_devid);
666 if (ret)
667 goto out;
668
669 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
670 key.type = BTRFS_DEV_ITEM_KEY;
671 key.offset = free_devid;
672
673 ret = btrfs_insert_empty_item(trans, root, path, &key,
0d81ba5d 674 sizeof(*dev_item));
0b86a832
CM
675 if (ret)
676 goto out;
677
678 leaf = path->nodes[0];
679 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
680
8a4b83cc 681 device->devid = free_devid;
0b86a832
CM
682 btrfs_set_device_id(leaf, dev_item, device->devid);
683 btrfs_set_device_type(leaf, dev_item, device->type);
684 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
685 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
686 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
0b86a832
CM
687 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
688 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
e17cade2
CM
689 btrfs_set_device_group(leaf, dev_item, 0);
690 btrfs_set_device_seek_speed(leaf, dev_item, 0);
691 btrfs_set_device_bandwidth(leaf, dev_item, 0);
0b86a832 692
0b86a832 693 ptr = (unsigned long)btrfs_device_uuid(dev_item);
e17cade2 694 write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
0b86a832
CM
695 btrfs_mark_buffer_dirty(leaf);
696 ret = 0;
697
698out:
699 btrfs_free_path(path);
700 return ret;
701}
8f18cf13 702
a061fc8d
CM
703static int btrfs_rm_dev_item(struct btrfs_root *root,
704 struct btrfs_device *device)
705{
706 int ret;
707 struct btrfs_path *path;
708 struct block_device *bdev = device->bdev;
709 struct btrfs_device *next_dev;
710 struct btrfs_key key;
711 u64 total_bytes;
712 struct btrfs_fs_devices *fs_devices;
713 struct btrfs_trans_handle *trans;
714
715 root = root->fs_info->chunk_root;
716
717 path = btrfs_alloc_path();
718 if (!path)
719 return -ENOMEM;
720
721 trans = btrfs_start_transaction(root, 1);
722 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
723 key.type = BTRFS_DEV_ITEM_KEY;
724 key.offset = device->devid;
725
726 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
727 if (ret < 0)
728 goto out;
729
730 if (ret > 0) {
731 ret = -ENOENT;
732 goto out;
733 }
734
735 ret = btrfs_del_item(trans, root, path);
736 if (ret)
737 goto out;
738
739 /*
740 * at this point, the device is zero sized. We want to
741 * remove it from the devices list and zero out the old super
742 */
743 list_del_init(&device->dev_list);
744 list_del_init(&device->dev_alloc_list);
745 fs_devices = root->fs_info->fs_devices;
746
747 next_dev = list_entry(fs_devices->devices.next, struct btrfs_device,
748 dev_list);
a061fc8d
CM
749 if (bdev == root->fs_info->sb->s_bdev)
750 root->fs_info->sb->s_bdev = next_dev->bdev;
751 if (bdev == fs_devices->latest_bdev)
752 fs_devices->latest_bdev = next_dev->bdev;
753
754 total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
755 btrfs_set_super_total_bytes(&root->fs_info->super_copy,
756 total_bytes - device->total_bytes);
757
758 total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
759 btrfs_set_super_num_devices(&root->fs_info->super_copy,
760 total_bytes - 1);
761out:
762 btrfs_free_path(path);
763 btrfs_commit_transaction(trans, root);
764 return ret;
765}
766
767int btrfs_rm_device(struct btrfs_root *root, char *device_path)
768{
769 struct btrfs_device *device;
770 struct block_device *bdev;
dfe25020 771 struct buffer_head *bh = NULL;
a061fc8d
CM
772 struct btrfs_super_block *disk_super;
773 u64 all_avail;
774 u64 devid;
775 int ret = 0;
776
777 mutex_lock(&root->fs_info->fs_mutex);
778 mutex_lock(&uuid_mutex);
779
780 all_avail = root->fs_info->avail_data_alloc_bits |
781 root->fs_info->avail_system_alloc_bits |
782 root->fs_info->avail_metadata_alloc_bits;
783
784 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
dfe25020 785 btrfs_super_num_devices(&root->fs_info->super_copy) <= 4) {
a061fc8d
CM
786 printk("btrfs: unable to go below four devices on raid10\n");
787 ret = -EINVAL;
788 goto out;
789 }
790
791 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
dfe25020 792 btrfs_super_num_devices(&root->fs_info->super_copy) <= 2) {
a061fc8d
CM
793 printk("btrfs: unable to go below two devices on raid1\n");
794 ret = -EINVAL;
795 goto out;
796 }
797
dfe25020
CM
798 if (strcmp(device_path, "missing") == 0) {
799 struct list_head *cur;
800 struct list_head *devices;
801 struct btrfs_device *tmp;
a061fc8d 802
dfe25020
CM
803 device = NULL;
804 devices = &root->fs_info->fs_devices->devices;
805 list_for_each(cur, devices) {
806 tmp = list_entry(cur, struct btrfs_device, dev_list);
807 if (tmp->in_fs_metadata && !tmp->bdev) {
808 device = tmp;
809 break;
810 }
811 }
812 bdev = NULL;
813 bh = NULL;
814 disk_super = NULL;
815 if (!device) {
816 printk("btrfs: no missing devices found to remove\n");
817 goto out;
818 }
819
820 } else {
821 bdev = open_bdev_excl(device_path, 0,
822 root->fs_info->bdev_holder);
823 if (IS_ERR(bdev)) {
824 ret = PTR_ERR(bdev);
825 goto out;
826 }
a061fc8d 827
dfe25020
CM
828 bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
829 if (!bh) {
830 ret = -EIO;
831 goto error_close;
832 }
833 disk_super = (struct btrfs_super_block *)bh->b_data;
834 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
835 sizeof(disk_super->magic))) {
836 ret = -ENOENT;
837 goto error_brelse;
838 }
839 if (memcmp(disk_super->fsid, root->fs_info->fsid,
840 BTRFS_FSID_SIZE)) {
841 ret = -ENOENT;
842 goto error_brelse;
843 }
844 devid = le64_to_cpu(disk_super->dev_item.devid);
845 device = btrfs_find_device(root, devid, NULL);
846 if (!device) {
847 ret = -ENOENT;
848 goto error_brelse;
849 }
850
851 }
a061fc8d
CM
852 root->fs_info->fs_devices->num_devices--;
853
854 ret = btrfs_shrink_device(device, 0);
855 if (ret)
856 goto error_brelse;
857
858
859 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
860 if (ret)
861 goto error_brelse;
862
dfe25020
CM
863 if (bh) {
864 /* make sure this device isn't detected as part of
865 * the FS anymore
866 */
867 memset(&disk_super->magic, 0, sizeof(disk_super->magic));
868 set_buffer_dirty(bh);
869 sync_dirty_buffer(bh);
a061fc8d 870
dfe25020
CM
871 brelse(bh);
872 }
a061fc8d 873
dfe25020
CM
874 if (device->bdev) {
875 /* one close for the device struct or super_block */
876 close_bdev_excl(device->bdev);
a0af469b 877 root->fs_info->fs_devices->open_devices--;
dfe25020
CM
878 }
879 if (bdev) {
880 /* one close for us */
881 close_bdev_excl(bdev);
882 }
a061fc8d
CM
883 kfree(device->name);
884 kfree(device);
885 ret = 0;
886 goto out;
887
888error_brelse:
889 brelse(bh);
890error_close:
dfe25020
CM
891 if (bdev)
892 close_bdev_excl(bdev);
a061fc8d
CM
893out:
894 mutex_unlock(&uuid_mutex);
895 mutex_unlock(&root->fs_info->fs_mutex);
896 return ret;
897}
898
788f20eb
CM
899int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
900{
901 struct btrfs_trans_handle *trans;
902 struct btrfs_device *device;
903 struct block_device *bdev;
904 struct list_head *cur;
905 struct list_head *devices;
906 u64 total_bytes;
907 int ret = 0;
908
909
910 bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder);
911 if (!bdev) {
912 return -EIO;
913 }
914 mutex_lock(&root->fs_info->fs_mutex);
915 trans = btrfs_start_transaction(root, 1);
916 devices = &root->fs_info->fs_devices->devices;
917 list_for_each(cur, devices) {
918 device = list_entry(cur, struct btrfs_device, dev_list);
919 if (device->bdev == bdev) {
920 ret = -EEXIST;
921 goto out;
922 }
923 }
924
925 device = kzalloc(sizeof(*device), GFP_NOFS);
926 if (!device) {
927 /* we can safely leave the fs_devices entry around */
928 ret = -ENOMEM;
929 goto out_close_bdev;
930 }
931
932 device->barriers = 1;
933 generate_random_uuid(device->uuid);
934 spin_lock_init(&device->io_lock);
935 device->name = kstrdup(device_path, GFP_NOFS);
936 if (!device->name) {
937 kfree(device);
938 goto out_close_bdev;
939 }
940 device->io_width = root->sectorsize;
941 device->io_align = root->sectorsize;
942 device->sector_size = root->sectorsize;
943 device->total_bytes = i_size_read(bdev->bd_inode);
944 device->dev_root = root->fs_info->dev_root;
945 device->bdev = bdev;
dfe25020 946 device->in_fs_metadata = 1;
788f20eb
CM
947
948 ret = btrfs_add_device(trans, root, device);
949 if (ret)
950 goto out_close_bdev;
951
952 total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
953 btrfs_set_super_total_bytes(&root->fs_info->super_copy,
954 total_bytes + device->total_bytes);
955
956 total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
957 btrfs_set_super_num_devices(&root->fs_info->super_copy,
958 total_bytes + 1);
959
960 list_add(&device->dev_list, &root->fs_info->fs_devices->devices);
961 list_add(&device->dev_alloc_list,
962 &root->fs_info->fs_devices->alloc_list);
963 root->fs_info->fs_devices->num_devices++;
a0af469b 964 root->fs_info->fs_devices->open_devices++;
788f20eb
CM
965out:
966 btrfs_end_transaction(trans, root);
967 mutex_unlock(&root->fs_info->fs_mutex);
968 return ret;
969
970out_close_bdev:
971 close_bdev_excl(bdev);
972 goto out;
973}
974
0b86a832
CM
975int btrfs_update_device(struct btrfs_trans_handle *trans,
976 struct btrfs_device *device)
977{
978 int ret;
979 struct btrfs_path *path;
980 struct btrfs_root *root;
981 struct btrfs_dev_item *dev_item;
982 struct extent_buffer *leaf;
983 struct btrfs_key key;
984
985 root = device->dev_root->fs_info->chunk_root;
986
987 path = btrfs_alloc_path();
988 if (!path)
989 return -ENOMEM;
990
991 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
992 key.type = BTRFS_DEV_ITEM_KEY;
993 key.offset = device->devid;
994
995 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
996 if (ret < 0)
997 goto out;
998
999 if (ret > 0) {
1000 ret = -ENOENT;
1001 goto out;
1002 }
1003
1004 leaf = path->nodes[0];
1005 dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
1006
1007 btrfs_set_device_id(leaf, dev_item, device->devid);
1008 btrfs_set_device_type(leaf, dev_item, device->type);
1009 btrfs_set_device_io_align(leaf, dev_item, device->io_align);
1010 btrfs_set_device_io_width(leaf, dev_item, device->io_width);
1011 btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
0b86a832
CM
1012 btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
1013 btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
1014 btrfs_mark_buffer_dirty(leaf);
1015
1016out:
1017 btrfs_free_path(path);
1018 return ret;
1019}
1020
8f18cf13
CM
1021int btrfs_grow_device(struct btrfs_trans_handle *trans,
1022 struct btrfs_device *device, u64 new_size)
1023{
1024 struct btrfs_super_block *super_copy =
1025 &device->dev_root->fs_info->super_copy;
1026 u64 old_total = btrfs_super_total_bytes(super_copy);
1027 u64 diff = new_size - device->total_bytes;
1028
1029 btrfs_set_super_total_bytes(super_copy, old_total + diff);
1030 return btrfs_update_device(trans, device);
1031}
1032
1033static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
1034 struct btrfs_root *root,
1035 u64 chunk_tree, u64 chunk_objectid,
1036 u64 chunk_offset)
1037{
1038 int ret;
1039 struct btrfs_path *path;
1040 struct btrfs_key key;
1041
1042 root = root->fs_info->chunk_root;
1043 path = btrfs_alloc_path();
1044 if (!path)
1045 return -ENOMEM;
1046
1047 key.objectid = chunk_objectid;
1048 key.offset = chunk_offset;
1049 key.type = BTRFS_CHUNK_ITEM_KEY;
1050
1051 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1052 BUG_ON(ret);
1053
1054 ret = btrfs_del_item(trans, root, path);
1055 BUG_ON(ret);
1056
1057 btrfs_free_path(path);
1058 return 0;
1059}
1060
1061int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64
1062 chunk_offset)
1063{
1064 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1065 struct btrfs_disk_key *disk_key;
1066 struct btrfs_chunk *chunk;
1067 u8 *ptr;
1068 int ret = 0;
1069 u32 num_stripes;
1070 u32 array_size;
1071 u32 len = 0;
1072 u32 cur;
1073 struct btrfs_key key;
1074
1075 array_size = btrfs_super_sys_array_size(super_copy);
1076
1077 ptr = super_copy->sys_chunk_array;
1078 cur = 0;
1079
1080 while (cur < array_size) {
1081 disk_key = (struct btrfs_disk_key *)ptr;
1082 btrfs_disk_key_to_cpu(&key, disk_key);
1083
1084 len = sizeof(*disk_key);
1085
1086 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
1087 chunk = (struct btrfs_chunk *)(ptr + len);
1088 num_stripes = btrfs_stack_chunk_num_stripes(chunk);
1089 len += btrfs_chunk_item_size(num_stripes);
1090 } else {
1091 ret = -EIO;
1092 break;
1093 }
1094 if (key.objectid == chunk_objectid &&
1095 key.offset == chunk_offset) {
1096 memmove(ptr, ptr + len, array_size - (cur + len));
1097 array_size -= len;
1098 btrfs_set_super_sys_array_size(super_copy, array_size);
1099 } else {
1100 ptr += len;
1101 cur += len;
1102 }
1103 }
1104 return ret;
1105}
1106
1107
1108int btrfs_relocate_chunk(struct btrfs_root *root,
1109 u64 chunk_tree, u64 chunk_objectid,
1110 u64 chunk_offset)
1111{
1112 struct extent_map_tree *em_tree;
1113 struct btrfs_root *extent_root;
1114 struct btrfs_trans_handle *trans;
1115 struct extent_map *em;
1116 struct map_lookup *map;
1117 int ret;
1118 int i;
1119
323da79c
CM
1120 printk("btrfs relocating chunk %llu\n",
1121 (unsigned long long)chunk_offset);
8f18cf13
CM
1122 root = root->fs_info->chunk_root;
1123 extent_root = root->fs_info->extent_root;
1124 em_tree = &root->fs_info->mapping_tree.map_tree;
1125
1126 /* step one, relocate all the extents inside this chunk */
1127 ret = btrfs_shrink_extent_tree(extent_root, chunk_offset);
1128 BUG_ON(ret);
1129
1130 trans = btrfs_start_transaction(root, 1);
1131 BUG_ON(!trans);
1132
1133 /*
1134 * step two, delete the device extents and the
1135 * chunk tree entries
1136 */
1137 spin_lock(&em_tree->lock);
1138 em = lookup_extent_mapping(em_tree, chunk_offset, 1);
1139 spin_unlock(&em_tree->lock);
1140
a061fc8d
CM
1141 BUG_ON(em->start > chunk_offset ||
1142 em->start + em->len < chunk_offset);
8f18cf13
CM
1143 map = (struct map_lookup *)em->bdev;
1144
1145 for (i = 0; i < map->num_stripes; i++) {
1146 ret = btrfs_free_dev_extent(trans, map->stripes[i].dev,
1147 map->stripes[i].physical);
1148 BUG_ON(ret);
a061fc8d 1149
dfe25020
CM
1150 if (map->stripes[i].dev) {
1151 ret = btrfs_update_device(trans, map->stripes[i].dev);
1152 BUG_ON(ret);
1153 }
8f18cf13
CM
1154 }
1155 ret = btrfs_free_chunk(trans, root, chunk_tree, chunk_objectid,
1156 chunk_offset);
1157
1158 BUG_ON(ret);
1159
1160 if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
1161 ret = btrfs_del_sys_chunk(root, chunk_objectid, chunk_offset);
1162 BUG_ON(ret);
8f18cf13
CM
1163 }
1164
8f18cf13
CM
1165 spin_lock(&em_tree->lock);
1166 remove_extent_mapping(em_tree, em);
1167 kfree(map);
1168 em->bdev = NULL;
1169
1170 /* once for the tree */
1171 free_extent_map(em);
1172 spin_unlock(&em_tree->lock);
1173
8f18cf13
CM
1174 /* once for us */
1175 free_extent_map(em);
1176
1177 btrfs_end_transaction(trans, root);
1178 return 0;
1179}
1180
ec44a35c
CM
1181static u64 div_factor(u64 num, int factor)
1182{
1183 if (factor == 10)
1184 return num;
1185 num *= factor;
1186 do_div(num, 10);
1187 return num;
1188}
1189
1190
1191int btrfs_balance(struct btrfs_root *dev_root)
1192{
1193 int ret;
1194 struct list_head *cur;
1195 struct list_head *devices = &dev_root->fs_info->fs_devices->devices;
1196 struct btrfs_device *device;
1197 u64 old_size;
1198 u64 size_to_free;
1199 struct btrfs_path *path;
1200 struct btrfs_key key;
1201 struct btrfs_chunk *chunk;
1202 struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
1203 struct btrfs_trans_handle *trans;
1204 struct btrfs_key found_key;
1205
1206
1207 dev_root = dev_root->fs_info->dev_root;
1208
1209 mutex_lock(&dev_root->fs_info->fs_mutex);
1210 /* step one make some room on all the devices */
1211 list_for_each(cur, devices) {
1212 device = list_entry(cur, struct btrfs_device, dev_list);
1213 old_size = device->total_bytes;
1214 size_to_free = div_factor(old_size, 1);
1215 size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
1216 if (device->total_bytes - device->bytes_used > size_to_free)
1217 continue;
1218
1219 ret = btrfs_shrink_device(device, old_size - size_to_free);
1220 BUG_ON(ret);
1221
1222 trans = btrfs_start_transaction(dev_root, 1);
1223 BUG_ON(!trans);
1224
1225 ret = btrfs_grow_device(trans, device, old_size);
1226 BUG_ON(ret);
1227
1228 btrfs_end_transaction(trans, dev_root);
1229 }
1230
1231 /* step two, relocate all the chunks */
1232 path = btrfs_alloc_path();
1233 BUG_ON(!path);
1234
1235 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1236 key.offset = (u64)-1;
1237 key.type = BTRFS_CHUNK_ITEM_KEY;
1238
1239 while(1) {
1240 ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
1241 if (ret < 0)
1242 goto error;
1243
1244 /*
1245 * this shouldn't happen, it means the last relocate
1246 * failed
1247 */
1248 if (ret == 0)
1249 break;
1250
1251 ret = btrfs_previous_item(chunk_root, path, 0,
1252 BTRFS_CHUNK_ITEM_KEY);
1253 if (ret) {
1254 break;
1255 }
1256 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
1257 path->slots[0]);
1258 if (found_key.objectid != key.objectid)
1259 break;
1260 chunk = btrfs_item_ptr(path->nodes[0],
1261 path->slots[0],
1262 struct btrfs_chunk);
1263 key.offset = found_key.offset;
1264 /* chunk zero is special */
1265 if (key.offset == 0)
1266 break;
1267
1268 ret = btrfs_relocate_chunk(chunk_root,
1269 chunk_root->root_key.objectid,
1270 found_key.objectid,
1271 found_key.offset);
1272 BUG_ON(ret);
1273 btrfs_release_path(chunk_root, path);
1274 }
1275 ret = 0;
1276error:
1277 btrfs_free_path(path);
1278 mutex_unlock(&dev_root->fs_info->fs_mutex);
1279 return ret;
1280}
1281
8f18cf13
CM
1282/*
1283 * shrinking a device means finding all of the device extents past
1284 * the new size, and then following the back refs to the chunks.
1285 * The chunk relocation code actually frees the device extent
1286 */
1287int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
1288{
1289 struct btrfs_trans_handle *trans;
1290 struct btrfs_root *root = device->dev_root;
1291 struct btrfs_dev_extent *dev_extent = NULL;
1292 struct btrfs_path *path;
1293 u64 length;
1294 u64 chunk_tree;
1295 u64 chunk_objectid;
1296 u64 chunk_offset;
1297 int ret;
1298 int slot;
1299 struct extent_buffer *l;
1300 struct btrfs_key key;
1301 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1302 u64 old_total = btrfs_super_total_bytes(super_copy);
1303 u64 diff = device->total_bytes - new_size;
1304
1305
1306 path = btrfs_alloc_path();
1307 if (!path)
1308 return -ENOMEM;
1309
1310 trans = btrfs_start_transaction(root, 1);
1311 if (!trans) {
1312 ret = -ENOMEM;
1313 goto done;
1314 }
1315
1316 path->reada = 2;
1317
1318 device->total_bytes = new_size;
1319 ret = btrfs_update_device(trans, device);
1320 if (ret) {
1321 btrfs_end_transaction(trans, root);
1322 goto done;
1323 }
1324 WARN_ON(diff > old_total);
1325 btrfs_set_super_total_bytes(super_copy, old_total - diff);
1326 btrfs_end_transaction(trans, root);
1327
1328 key.objectid = device->devid;
1329 key.offset = (u64)-1;
1330 key.type = BTRFS_DEV_EXTENT_KEY;
1331
1332 while (1) {
1333 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1334 if (ret < 0)
1335 goto done;
1336
1337 ret = btrfs_previous_item(root, path, 0, key.type);
1338 if (ret < 0)
1339 goto done;
1340 if (ret) {
1341 ret = 0;
1342 goto done;
1343 }
1344
1345 l = path->nodes[0];
1346 slot = path->slots[0];
1347 btrfs_item_key_to_cpu(l, &key, path->slots[0]);
1348
1349 if (key.objectid != device->devid)
1350 goto done;
1351
1352 dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
1353 length = btrfs_dev_extent_length(l, dev_extent);
1354
1355 if (key.offset + length <= new_size)
1356 goto done;
1357
1358 chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent);
1359 chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent);
1360 chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
1361 btrfs_release_path(root, path);
1362
1363 ret = btrfs_relocate_chunk(root, chunk_tree, chunk_objectid,
1364 chunk_offset);
1365 if (ret)
1366 goto done;
1367 }
1368
1369done:
1370 btrfs_free_path(path);
1371 return ret;
1372}
1373
0b86a832
CM
1374int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
1375 struct btrfs_root *root,
1376 struct btrfs_key *key,
1377 struct btrfs_chunk *chunk, int item_size)
1378{
1379 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
1380 struct btrfs_disk_key disk_key;
1381 u32 array_size;
1382 u8 *ptr;
1383
1384 array_size = btrfs_super_sys_array_size(super_copy);
1385 if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE)
1386 return -EFBIG;
1387
1388 ptr = super_copy->sys_chunk_array + array_size;
1389 btrfs_cpu_key_to_disk(&disk_key, key);
1390 memcpy(ptr, &disk_key, sizeof(disk_key));
1391 ptr += sizeof(disk_key);
1392 memcpy(ptr, chunk, item_size);
1393 item_size += sizeof(disk_key);
1394 btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
1395 return 0;
1396}
1397
9b3f68b9
CM
1398static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes,
1399 int sub_stripes)
1400{
1401 if (type & (BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_DUP))
1402 return calc_size;
1403 else if (type & BTRFS_BLOCK_GROUP_RAID10)
1404 return calc_size * (num_stripes / sub_stripes);
1405 else
1406 return calc_size * num_stripes;
1407}
1408
1409
0b86a832
CM
1410int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
1411 struct btrfs_root *extent_root, u64 *start,
6324fbf3 1412 u64 *num_bytes, u64 type)
0b86a832
CM
1413{
1414 u64 dev_offset;
593060d7 1415 struct btrfs_fs_info *info = extent_root->fs_info;
0b86a832 1416 struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root;
8f18cf13 1417 struct btrfs_path *path;
0b86a832
CM
1418 struct btrfs_stripe *stripes;
1419 struct btrfs_device *device = NULL;
1420 struct btrfs_chunk *chunk;
6324fbf3 1421 struct list_head private_devs;
b3075717 1422 struct list_head *dev_list;
6324fbf3 1423 struct list_head *cur;
0b86a832
CM
1424 struct extent_map_tree *em_tree;
1425 struct map_lookup *map;
1426 struct extent_map *em;
a40a90a0 1427 int min_stripe_size = 1 * 1024 * 1024;
0b86a832
CM
1428 u64 physical;
1429 u64 calc_size = 1024 * 1024 * 1024;
9b3f68b9
CM
1430 u64 max_chunk_size = calc_size;
1431 u64 min_free;
6324fbf3
CM
1432 u64 avail;
1433 u64 max_avail = 0;
9b3f68b9 1434 u64 percent_max;
6324fbf3 1435 int num_stripes = 1;
a40a90a0 1436 int min_stripes = 1;
321aecc6 1437 int sub_stripes = 0;
6324fbf3 1438 int looped = 0;
0b86a832 1439 int ret;
6324fbf3 1440 int index;
593060d7 1441 int stripe_len = 64 * 1024;
0b86a832
CM
1442 struct btrfs_key key;
1443
ec44a35c
CM
1444 if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
1445 (type & BTRFS_BLOCK_GROUP_DUP)) {
1446 WARN_ON(1);
1447 type &= ~BTRFS_BLOCK_GROUP_DUP;
1448 }
b3075717 1449 dev_list = &extent_root->fs_info->fs_devices->alloc_list;
6324fbf3
CM
1450 if (list_empty(dev_list))
1451 return -ENOSPC;
593060d7 1452
a40a90a0 1453 if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
593060d7 1454 num_stripes = btrfs_super_num_devices(&info->super_copy);
a40a90a0
CM
1455 min_stripes = 2;
1456 }
1457 if (type & (BTRFS_BLOCK_GROUP_DUP)) {
611f0e00 1458 num_stripes = 2;
a40a90a0
CM
1459 min_stripes = 2;
1460 }
8790d502
CM
1461 if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
1462 num_stripes = min_t(u64, 2,
1463 btrfs_super_num_devices(&info->super_copy));
9b3f68b9
CM
1464 if (num_stripes < 2)
1465 return -ENOSPC;
a40a90a0 1466 min_stripes = 2;
8790d502 1467 }
321aecc6
CM
1468 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
1469 num_stripes = btrfs_super_num_devices(&info->super_copy);
1470 if (num_stripes < 4)
1471 return -ENOSPC;
1472 num_stripes &= ~(u32)1;
1473 sub_stripes = 2;
a40a90a0 1474 min_stripes = 4;
321aecc6 1475 }
9b3f68b9
CM
1476
1477 if (type & BTRFS_BLOCK_GROUP_DATA) {
1478 max_chunk_size = 10 * calc_size;
a40a90a0 1479 min_stripe_size = 64 * 1024 * 1024;
9b3f68b9
CM
1480 } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
1481 max_chunk_size = 4 * calc_size;
a40a90a0
CM
1482 min_stripe_size = 32 * 1024 * 1024;
1483 } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
1484 calc_size = 8 * 1024 * 1024;
1485 max_chunk_size = calc_size * 2;
1486 min_stripe_size = 1 * 1024 * 1024;
9b3f68b9
CM
1487 }
1488
8f18cf13
CM
1489 path = btrfs_alloc_path();
1490 if (!path)
1491 return -ENOMEM;
1492
9b3f68b9
CM
1493 /* we don't want a chunk larger than 10% of the FS */
1494 percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1);
1495 max_chunk_size = min(percent_max, max_chunk_size);
1496
a40a90a0 1497again:
9b3f68b9
CM
1498 if (calc_size * num_stripes > max_chunk_size) {
1499 calc_size = max_chunk_size;
1500 do_div(calc_size, num_stripes);
1501 do_div(calc_size, stripe_len);
1502 calc_size *= stripe_len;
1503 }
1504 /* we don't want tiny stripes */
a40a90a0 1505 calc_size = max_t(u64, min_stripe_size, calc_size);
9b3f68b9 1506
9b3f68b9
CM
1507 do_div(calc_size, stripe_len);
1508 calc_size *= stripe_len;
1509
6324fbf3
CM
1510 INIT_LIST_HEAD(&private_devs);
1511 cur = dev_list->next;
1512 index = 0;
611f0e00
CM
1513
1514 if (type & BTRFS_BLOCK_GROUP_DUP)
1515 min_free = calc_size * 2;
9b3f68b9
CM
1516 else
1517 min_free = calc_size;
611f0e00 1518
ad5bd91e
CM
1519 /* we add 1MB because we never use the first 1MB of the device */
1520 min_free += 1024 * 1024;
1521
6324fbf3
CM
1522 /* build a private list of devices we will allocate from */
1523 while(index < num_stripes) {
b3075717 1524 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
611f0e00 1525
dfe25020
CM
1526 if (device->total_bytes > device->bytes_used)
1527 avail = device->total_bytes - device->bytes_used;
1528 else
1529 avail = 0;
6324fbf3 1530 cur = cur->next;
8f18cf13 1531
dfe25020 1532 if (device->in_fs_metadata && avail >= min_free) {
8f18cf13
CM
1533 u64 ignored_start = 0;
1534 ret = find_free_dev_extent(trans, device, path,
1535 min_free,
1536 &ignored_start);
1537 if (ret == 0) {
1538 list_move_tail(&device->dev_alloc_list,
1539 &private_devs);
611f0e00 1540 index++;
8f18cf13
CM
1541 if (type & BTRFS_BLOCK_GROUP_DUP)
1542 index++;
1543 }
dfe25020 1544 } else if (device->in_fs_metadata && avail > max_avail)
a40a90a0 1545 max_avail = avail;
6324fbf3
CM
1546 if (cur == dev_list)
1547 break;
1548 }
1549 if (index < num_stripes) {
1550 list_splice(&private_devs, dev_list);
a40a90a0
CM
1551 if (index >= min_stripes) {
1552 num_stripes = index;
1553 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
1554 num_stripes /= sub_stripes;
1555 num_stripes *= sub_stripes;
1556 }
1557 looped = 1;
1558 goto again;
1559 }
6324fbf3
CM
1560 if (!looped && max_avail > 0) {
1561 looped = 1;
1562 calc_size = max_avail;
1563 goto again;
1564 }
8f18cf13 1565 btrfs_free_path(path);
6324fbf3
CM
1566 return -ENOSPC;
1567 }
e17cade2
CM
1568 key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
1569 key.type = BTRFS_CHUNK_ITEM_KEY;
1570 ret = find_next_chunk(chunk_root, BTRFS_FIRST_CHUNK_TREE_OBJECTID,
1571 &key.offset);
8f18cf13
CM
1572 if (ret) {
1573 btrfs_free_path(path);
0b86a832 1574 return ret;
8f18cf13 1575 }
0b86a832 1576
0b86a832 1577 chunk = kmalloc(btrfs_chunk_item_size(num_stripes), GFP_NOFS);
8f18cf13
CM
1578 if (!chunk) {
1579 btrfs_free_path(path);
0b86a832 1580 return -ENOMEM;
8f18cf13 1581 }
0b86a832 1582
593060d7
CM
1583 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
1584 if (!map) {
1585 kfree(chunk);
8f18cf13 1586 btrfs_free_path(path);
593060d7
CM
1587 return -ENOMEM;
1588 }
8f18cf13
CM
1589 btrfs_free_path(path);
1590 path = NULL;
593060d7 1591
0b86a832 1592 stripes = &chunk->stripe;
9b3f68b9
CM
1593 *num_bytes = chunk_bytes_by_type(type, calc_size,
1594 num_stripes, sub_stripes);
0b86a832 1595
6324fbf3 1596 index = 0;
0b86a832 1597 while(index < num_stripes) {
e17cade2 1598 struct btrfs_stripe *stripe;
6324fbf3
CM
1599 BUG_ON(list_empty(&private_devs));
1600 cur = private_devs.next;
b3075717 1601 device = list_entry(cur, struct btrfs_device, dev_alloc_list);
611f0e00
CM
1602
1603 /* loop over this device again if we're doing a dup group */
1604 if (!(type & BTRFS_BLOCK_GROUP_DUP) ||
1605 (index == num_stripes - 1))
b3075717 1606 list_move_tail(&device->dev_alloc_list, dev_list);
0b86a832
CM
1607
1608 ret = btrfs_alloc_dev_extent(trans, device,
e17cade2
CM
1609 info->chunk_root->root_key.objectid,
1610 BTRFS_FIRST_CHUNK_TREE_OBJECTID, key.offset,
1611 calc_size, &dev_offset);
0b86a832 1612 BUG_ON(ret);
0b86a832
CM
1613 device->bytes_used += calc_size;
1614 ret = btrfs_update_device(trans, device);
1615 BUG_ON(ret);
1616
593060d7
CM
1617 map->stripes[index].dev = device;
1618 map->stripes[index].physical = dev_offset;
e17cade2
CM
1619 stripe = stripes + index;
1620 btrfs_set_stack_stripe_devid(stripe, device->devid);
1621 btrfs_set_stack_stripe_offset(stripe, dev_offset);
1622 memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
0b86a832
CM
1623 physical = dev_offset;
1624 index++;
1625 }
6324fbf3 1626 BUG_ON(!list_empty(&private_devs));
0b86a832 1627
e17cade2
CM
1628 /* key was set above */
1629 btrfs_set_stack_chunk_length(chunk, *num_bytes);
0b86a832 1630 btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
593060d7 1631 btrfs_set_stack_chunk_stripe_len(chunk, stripe_len);
0b86a832
CM
1632 btrfs_set_stack_chunk_type(chunk, type);
1633 btrfs_set_stack_chunk_num_stripes(chunk, num_stripes);
593060d7
CM
1634 btrfs_set_stack_chunk_io_align(chunk, stripe_len);
1635 btrfs_set_stack_chunk_io_width(chunk, stripe_len);
0b86a832 1636 btrfs_set_stack_chunk_sector_size(chunk, extent_root->sectorsize);
321aecc6 1637 btrfs_set_stack_chunk_sub_stripes(chunk, sub_stripes);
593060d7
CM
1638 map->sector_size = extent_root->sectorsize;
1639 map->stripe_len = stripe_len;
1640 map->io_align = stripe_len;
1641 map->io_width = stripe_len;
1642 map->type = type;
1643 map->num_stripes = num_stripes;
321aecc6 1644 map->sub_stripes = sub_stripes;
0b86a832
CM
1645
1646 ret = btrfs_insert_item(trans, chunk_root, &key, chunk,
1647 btrfs_chunk_item_size(num_stripes));
1648 BUG_ON(ret);
e17cade2 1649 *start = key.offset;;
0b86a832
CM
1650
1651 em = alloc_extent_map(GFP_NOFS);
1652 if (!em)
1653 return -ENOMEM;
0b86a832 1654 em->bdev = (struct block_device *)map;
e17cade2
CM
1655 em->start = key.offset;
1656 em->len = *num_bytes;
0b86a832
CM
1657 em->block_start = 0;
1658
8f18cf13
CM
1659 if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
1660 ret = btrfs_add_system_chunk(trans, chunk_root, &key,
1661 chunk, btrfs_chunk_item_size(num_stripes));
1662 BUG_ON(ret);
1663 }
0b86a832
CM
1664 kfree(chunk);
1665
1666 em_tree = &extent_root->fs_info->mapping_tree.map_tree;
1667 spin_lock(&em_tree->lock);
1668 ret = add_extent_mapping(em_tree, em);
0b86a832 1669 spin_unlock(&em_tree->lock);
b248a415 1670 BUG_ON(ret);
0b86a832
CM
1671 free_extent_map(em);
1672 return ret;
1673}
1674
1675void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
1676{
1677 extent_map_tree_init(&tree->map_tree, GFP_NOFS);
1678}
1679
1680void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
1681{
1682 struct extent_map *em;
1683
1684 while(1) {
1685 spin_lock(&tree->map_tree.lock);
1686 em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
1687 if (em)
1688 remove_extent_mapping(&tree->map_tree, em);
1689 spin_unlock(&tree->map_tree.lock);
1690 if (!em)
1691 break;
1692 kfree(em->bdev);
1693 /* once for us */
1694 free_extent_map(em);
1695 /* once for the tree */
1696 free_extent_map(em);
1697 }
1698}
1699
f188591e
CM
1700int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
1701{
1702 struct extent_map *em;
1703 struct map_lookup *map;
1704 struct extent_map_tree *em_tree = &map_tree->map_tree;
1705 int ret;
1706
1707 spin_lock(&em_tree->lock);
1708 em = lookup_extent_mapping(em_tree, logical, len);
b248a415 1709 spin_unlock(&em_tree->lock);
f188591e
CM
1710 BUG_ON(!em);
1711
1712 BUG_ON(em->start > logical || em->start + em->len < logical);
1713 map = (struct map_lookup *)em->bdev;
1714 if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
1715 ret = map->num_stripes;
321aecc6
CM
1716 else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
1717 ret = map->sub_stripes;
f188591e
CM
1718 else
1719 ret = 1;
1720 free_extent_map(em);
f188591e
CM
1721 return ret;
1722}
1723
dfe25020
CM
1724static int find_live_mirror(struct map_lookup *map, int first, int num,
1725 int optimal)
1726{
1727 int i;
1728 if (map->stripes[optimal].dev->bdev)
1729 return optimal;
1730 for (i = first; i < first + num; i++) {
1731 if (map->stripes[i].dev->bdev)
1732 return i;
1733 }
1734 /* we couldn't find one that doesn't fail. Just return something
1735 * and the io error handling code will clean up eventually
1736 */
1737 return optimal;
1738}
1739
f2d8d74d
CM
1740static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
1741 u64 logical, u64 *length,
1742 struct btrfs_multi_bio **multi_ret,
1743 int mirror_num, struct page *unplug_page)
0b86a832
CM
1744{
1745 struct extent_map *em;
1746 struct map_lookup *map;
1747 struct extent_map_tree *em_tree = &map_tree->map_tree;
1748 u64 offset;
593060d7
CM
1749 u64 stripe_offset;
1750 u64 stripe_nr;
cea9e445 1751 int stripes_allocated = 8;
321aecc6 1752 int stripes_required = 1;
593060d7 1753 int stripe_index;
cea9e445 1754 int i;
f2d8d74d 1755 int num_stripes;
a236aed1 1756 int max_errors = 0;
cea9e445 1757 struct btrfs_multi_bio *multi = NULL;
0b86a832 1758
cea9e445
CM
1759 if (multi_ret && !(rw & (1 << BIO_RW))) {
1760 stripes_allocated = 1;
1761 }
1762again:
1763 if (multi_ret) {
1764 multi = kzalloc(btrfs_multi_bio_size(stripes_allocated),
1765 GFP_NOFS);
1766 if (!multi)
1767 return -ENOMEM;
a236aed1
CM
1768
1769 atomic_set(&multi->error, 0);
cea9e445 1770 }
0b86a832
CM
1771
1772 spin_lock(&em_tree->lock);
1773 em = lookup_extent_mapping(em_tree, logical, *length);
b248a415 1774 spin_unlock(&em_tree->lock);
f2d8d74d
CM
1775
1776 if (!em && unplug_page)
1777 return 0;
1778
3b951516 1779 if (!em) {
a061fc8d 1780 printk("unable to find logical %Lu len %Lu\n", logical, *length);
f2d8d74d 1781 BUG();
3b951516 1782 }
0b86a832
CM
1783
1784 BUG_ON(em->start > logical || em->start + em->len < logical);
1785 map = (struct map_lookup *)em->bdev;
1786 offset = logical - em->start;
593060d7 1787
f188591e
CM
1788 if (mirror_num > map->num_stripes)
1789 mirror_num = 0;
1790
cea9e445 1791 /* if our multi bio struct is too small, back off and try again */
321aecc6
CM
1792 if (rw & (1 << BIO_RW)) {
1793 if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
1794 BTRFS_BLOCK_GROUP_DUP)) {
1795 stripes_required = map->num_stripes;
a236aed1 1796 max_errors = 1;
321aecc6
CM
1797 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
1798 stripes_required = map->sub_stripes;
a236aed1 1799 max_errors = 1;
321aecc6
CM
1800 }
1801 }
1802 if (multi_ret && rw == WRITE &&
1803 stripes_allocated < stripes_required) {
cea9e445 1804 stripes_allocated = map->num_stripes;
cea9e445
CM
1805 free_extent_map(em);
1806 kfree(multi);
1807 goto again;
1808 }
593060d7
CM
1809 stripe_nr = offset;
1810 /*
1811 * stripe_nr counts the total number of stripes we have to stride
1812 * to get to this block
1813 */
1814 do_div(stripe_nr, map->stripe_len);
1815
1816 stripe_offset = stripe_nr * map->stripe_len;
1817 BUG_ON(offset < stripe_offset);
1818
1819 /* stripe_offset is the offset of this block in its stripe*/
1820 stripe_offset = offset - stripe_offset;
1821
cea9e445 1822 if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
321aecc6 1823 BTRFS_BLOCK_GROUP_RAID10 |
cea9e445
CM
1824 BTRFS_BLOCK_GROUP_DUP)) {
1825 /* we limit the length of each bio to what fits in a stripe */
1826 *length = min_t(u64, em->len - offset,
1827 map->stripe_len - stripe_offset);
1828 } else {
1829 *length = em->len - offset;
1830 }
f2d8d74d
CM
1831
1832 if (!multi_ret && !unplug_page)
cea9e445
CM
1833 goto out;
1834
f2d8d74d 1835 num_stripes = 1;
cea9e445 1836 stripe_index = 0;
8790d502 1837 if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
f2d8d74d
CM
1838 if (unplug_page || (rw & (1 << BIO_RW)))
1839 num_stripes = map->num_stripes;
2fff734f 1840 else if (mirror_num)
f188591e 1841 stripe_index = mirror_num - 1;
dfe25020
CM
1842 else {
1843 stripe_index = find_live_mirror(map, 0,
1844 map->num_stripes,
1845 current->pid % map->num_stripes);
1846 }
2fff734f 1847
611f0e00 1848 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
cea9e445 1849 if (rw & (1 << BIO_RW))
f2d8d74d 1850 num_stripes = map->num_stripes;
f188591e
CM
1851 else if (mirror_num)
1852 stripe_index = mirror_num - 1;
2fff734f 1853
321aecc6
CM
1854 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
1855 int factor = map->num_stripes / map->sub_stripes;
321aecc6
CM
1856
1857 stripe_index = do_div(stripe_nr, factor);
1858 stripe_index *= map->sub_stripes;
1859
f2d8d74d
CM
1860 if (unplug_page || (rw & (1 << BIO_RW)))
1861 num_stripes = map->sub_stripes;
321aecc6
CM
1862 else if (mirror_num)
1863 stripe_index += mirror_num - 1;
dfe25020
CM
1864 else {
1865 stripe_index = find_live_mirror(map, stripe_index,
1866 map->sub_stripes, stripe_index +
1867 current->pid % map->sub_stripes);
1868 }
8790d502
CM
1869 } else {
1870 /*
1871 * after this do_div call, stripe_nr is the number of stripes
1872 * on this device we have to walk to find the data, and
1873 * stripe_index is the number of our device in the stripe array
1874 */
1875 stripe_index = do_div(stripe_nr, map->num_stripes);
1876 }
593060d7 1877 BUG_ON(stripe_index >= map->num_stripes);
cea9e445 1878
f2d8d74d
CM
1879 for (i = 0; i < num_stripes; i++) {
1880 if (unplug_page) {
1881 struct btrfs_device *device;
1882 struct backing_dev_info *bdi;
1883
1884 device = map->stripes[stripe_index].dev;
dfe25020
CM
1885 if (device->bdev) {
1886 bdi = blk_get_backing_dev_info(device->bdev);
1887 if (bdi->unplug_io_fn) {
1888 bdi->unplug_io_fn(bdi, unplug_page);
1889 }
f2d8d74d
CM
1890 }
1891 } else {
1892 multi->stripes[i].physical =
1893 map->stripes[stripe_index].physical +
1894 stripe_offset + stripe_nr * map->stripe_len;
1895 multi->stripes[i].dev = map->stripes[stripe_index].dev;
1896 }
cea9e445 1897 stripe_index++;
593060d7 1898 }
f2d8d74d
CM
1899 if (multi_ret) {
1900 *multi_ret = multi;
1901 multi->num_stripes = num_stripes;
a236aed1 1902 multi->max_errors = max_errors;
f2d8d74d 1903 }
cea9e445 1904out:
0b86a832 1905 free_extent_map(em);
0b86a832
CM
1906 return 0;
1907}
1908
f2d8d74d
CM
1909int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
1910 u64 logical, u64 *length,
1911 struct btrfs_multi_bio **multi_ret, int mirror_num)
1912{
1913 return __btrfs_map_block(map_tree, rw, logical, length, multi_ret,
1914 mirror_num, NULL);
1915}
1916
1917int btrfs_unplug_page(struct btrfs_mapping_tree *map_tree,
1918 u64 logical, struct page *page)
1919{
1920 u64 length = PAGE_CACHE_SIZE;
1921 return __btrfs_map_block(map_tree, READ, logical, &length,
1922 NULL, 0, page);
1923}
1924
1925
8790d502
CM
1926#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
1927static void end_bio_multi_stripe(struct bio *bio, int err)
1928#else
1929static int end_bio_multi_stripe(struct bio *bio,
1930 unsigned int bytes_done, int err)
1931#endif
1932{
cea9e445 1933 struct btrfs_multi_bio *multi = bio->bi_private;
8790d502
CM
1934
1935#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1936 if (bio->bi_size)
1937 return 1;
1938#endif
1939 if (err)
a236aed1 1940 atomic_inc(&multi->error);
8790d502 1941
cea9e445 1942 if (atomic_dec_and_test(&multi->stripes_pending)) {
8790d502
CM
1943 bio->bi_private = multi->private;
1944 bio->bi_end_io = multi->end_io;
a236aed1
CM
1945 /* only send an error to the higher layers if it is
1946 * beyond the tolerance of the multi-bio
1947 */
1259ab75 1948 if (atomic_read(&multi->error) > multi->max_errors) {
a236aed1 1949 err = -EIO;
1259ab75
CM
1950 } else if (err) {
1951 /*
1952 * this bio is actually up to date, we didn't
1953 * go over the max number of errors
1954 */
1955 set_bit(BIO_UPTODATE, &bio->bi_flags);
a236aed1 1956 err = 0;
1259ab75 1957 }
8790d502
CM
1958 kfree(multi);
1959
73f61b2a
M
1960#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1961 bio_endio(bio, bio->bi_size, err);
1962#else
8790d502 1963 bio_endio(bio, err);
73f61b2a 1964#endif
8790d502
CM
1965 } else {
1966 bio_put(bio);
1967 }
1968#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
1969 return 0;
1970#endif
1971}
1972
f188591e
CM
1973int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
1974 int mirror_num)
0b86a832
CM
1975{
1976 struct btrfs_mapping_tree *map_tree;
1977 struct btrfs_device *dev;
8790d502 1978 struct bio *first_bio = bio;
0b86a832 1979 u64 logical = bio->bi_sector << 9;
0b86a832
CM
1980 u64 length = 0;
1981 u64 map_length;
cea9e445 1982 struct btrfs_multi_bio *multi = NULL;
0b86a832 1983 int ret;
8790d502
CM
1984 int dev_nr = 0;
1985 int total_devs = 1;
0b86a832 1986
f2d8d74d 1987 length = bio->bi_size;
0b86a832
CM
1988 map_tree = &root->fs_info->mapping_tree;
1989 map_length = length;
cea9e445 1990
f188591e
CM
1991 ret = btrfs_map_block(map_tree, rw, logical, &map_length, &multi,
1992 mirror_num);
cea9e445
CM
1993 BUG_ON(ret);
1994
1995 total_devs = multi->num_stripes;
1996 if (map_length < length) {
1997 printk("mapping failed logical %Lu bio len %Lu "
1998 "len %Lu\n", logical, length, map_length);
1999 BUG();
2000 }
2001 multi->end_io = first_bio->bi_end_io;
2002 multi->private = first_bio->bi_private;
2003 atomic_set(&multi->stripes_pending, multi->num_stripes);
2004
8790d502 2005 while(dev_nr < total_devs) {
8790d502 2006 if (total_devs > 1) {
8790d502
CM
2007 if (dev_nr < total_devs - 1) {
2008 bio = bio_clone(first_bio, GFP_NOFS);
2009 BUG_ON(!bio);
2010 } else {
2011 bio = first_bio;
2012 }
2013 bio->bi_private = multi;
2014 bio->bi_end_io = end_bio_multi_stripe;
2015 }
cea9e445
CM
2016 bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
2017 dev = multi->stripes[dev_nr].dev;
dfe25020
CM
2018 if (dev && dev->bdev) {
2019 bio->bi_bdev = dev->bdev;
2020 spin_lock(&dev->io_lock);
2021 dev->total_ios++;
2022 spin_unlock(&dev->io_lock);
2023 submit_bio(rw, bio);
2024 } else {
2025 bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
2026 bio->bi_sector = logical >> 9;
2027#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23)
2028 bio_endio(bio, bio->bi_size, -EIO);
2029#else
2030 bio_endio(bio, -EIO);
2031#endif
2032 }
8790d502
CM
2033 dev_nr++;
2034 }
cea9e445
CM
2035 if (total_devs == 1)
2036 kfree(multi);
0b86a832
CM
2037 return 0;
2038}
2039
a443755f
CM
2040struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
2041 u8 *uuid)
0b86a832 2042{
8a4b83cc 2043 struct list_head *head = &root->fs_info->fs_devices->devices;
0b86a832 2044
a443755f 2045 return __find_device(head, devid, uuid);
0b86a832
CM
2046}
2047
dfe25020
CM
2048static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
2049 u64 devid, u8 *dev_uuid)
2050{
2051 struct btrfs_device *device;
2052 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
2053
2054 device = kzalloc(sizeof(*device), GFP_NOFS);
2055 list_add(&device->dev_list,
2056 &fs_devices->devices);
2057 list_add(&device->dev_alloc_list,
2058 &fs_devices->alloc_list);
2059 device->barriers = 1;
2060 device->dev_root = root->fs_info->dev_root;
2061 device->devid = devid;
2062 fs_devices->num_devices++;
2063 spin_lock_init(&device->io_lock);
2064 memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
2065 return device;
2066}
2067
2068
0b86a832
CM
2069static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
2070 struct extent_buffer *leaf,
2071 struct btrfs_chunk *chunk)
2072{
2073 struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
2074 struct map_lookup *map;
2075 struct extent_map *em;
2076 u64 logical;
2077 u64 length;
2078 u64 devid;
a443755f 2079 u8 uuid[BTRFS_UUID_SIZE];
593060d7 2080 int num_stripes;
0b86a832 2081 int ret;
593060d7 2082 int i;
0b86a832 2083
e17cade2
CM
2084 logical = key->offset;
2085 length = btrfs_chunk_length(leaf, chunk);
a061fc8d 2086
0b86a832
CM
2087 spin_lock(&map_tree->map_tree.lock);
2088 em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
b248a415 2089 spin_unlock(&map_tree->map_tree.lock);
0b86a832
CM
2090
2091 /* already mapped? */
2092 if (em && em->start <= logical && em->start + em->len > logical) {
2093 free_extent_map(em);
0b86a832
CM
2094 return 0;
2095 } else if (em) {
2096 free_extent_map(em);
2097 }
0b86a832
CM
2098
2099 map = kzalloc(sizeof(*map), GFP_NOFS);
2100 if (!map)
2101 return -ENOMEM;
2102
2103 em = alloc_extent_map(GFP_NOFS);
2104 if (!em)
2105 return -ENOMEM;
593060d7
CM
2106 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
2107 map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
0b86a832
CM
2108 if (!map) {
2109 free_extent_map(em);
2110 return -ENOMEM;
2111 }
2112
2113 em->bdev = (struct block_device *)map;
2114 em->start = logical;
2115 em->len = length;
2116 em->block_start = 0;
2117
593060d7
CM
2118 map->num_stripes = num_stripes;
2119 map->io_width = btrfs_chunk_io_width(leaf, chunk);
2120 map->io_align = btrfs_chunk_io_align(leaf, chunk);
2121 map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
2122 map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
2123 map->type = btrfs_chunk_type(leaf, chunk);
321aecc6 2124 map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
593060d7
CM
2125 for (i = 0; i < num_stripes; i++) {
2126 map->stripes[i].physical =
2127 btrfs_stripe_offset_nr(leaf, chunk, i);
2128 devid = btrfs_stripe_devid_nr(leaf, chunk, i);
a443755f
CM
2129 read_extent_buffer(leaf, uuid, (unsigned long)
2130 btrfs_stripe_dev_uuid_nr(chunk, i),
2131 BTRFS_UUID_SIZE);
2132 map->stripes[i].dev = btrfs_find_device(root, devid, uuid);
dfe25020
CM
2133
2134 if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) {
593060d7
CM
2135 kfree(map);
2136 free_extent_map(em);
2137 return -EIO;
2138 }
dfe25020
CM
2139 if (!map->stripes[i].dev) {
2140 map->stripes[i].dev =
2141 add_missing_dev(root, devid, uuid);
2142 if (!map->stripes[i].dev) {
2143 kfree(map);
2144 free_extent_map(em);
2145 return -EIO;
2146 }
2147 }
2148 map->stripes[i].dev->in_fs_metadata = 1;
0b86a832
CM
2149 }
2150
2151 spin_lock(&map_tree->map_tree.lock);
2152 ret = add_extent_mapping(&map_tree->map_tree, em);
0b86a832 2153 spin_unlock(&map_tree->map_tree.lock);
b248a415 2154 BUG_ON(ret);
0b86a832
CM
2155 free_extent_map(em);
2156
2157 return 0;
2158}
2159
2160static int fill_device_from_item(struct extent_buffer *leaf,
2161 struct btrfs_dev_item *dev_item,
2162 struct btrfs_device *device)
2163{
2164 unsigned long ptr;
0b86a832
CM
2165
2166 device->devid = btrfs_device_id(leaf, dev_item);
2167 device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
2168 device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
2169 device->type = btrfs_device_type(leaf, dev_item);
2170 device->io_align = btrfs_device_io_align(leaf, dev_item);
2171 device->io_width = btrfs_device_io_width(leaf, dev_item);
2172 device->sector_size = btrfs_device_sector_size(leaf, dev_item);
0b86a832
CM
2173
2174 ptr = (unsigned long)btrfs_device_uuid(dev_item);
e17cade2 2175 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
0b86a832 2176
0b86a832
CM
2177 return 0;
2178}
2179
0d81ba5d 2180static int read_one_dev(struct btrfs_root *root,
0b86a832
CM
2181 struct extent_buffer *leaf,
2182 struct btrfs_dev_item *dev_item)
2183{
2184 struct btrfs_device *device;
2185 u64 devid;
2186 int ret;
a443755f
CM
2187 u8 dev_uuid[BTRFS_UUID_SIZE];
2188
0b86a832 2189 devid = btrfs_device_id(leaf, dev_item);
a443755f
CM
2190 read_extent_buffer(leaf, dev_uuid,
2191 (unsigned long)btrfs_device_uuid(dev_item),
2192 BTRFS_UUID_SIZE);
2193 device = btrfs_find_device(root, devid, dev_uuid);
6324fbf3 2194 if (!device) {
dfe25020
CM
2195 printk("warning devid %Lu missing\n", devid);
2196 device = add_missing_dev(root, devid, dev_uuid);
6324fbf3
CM
2197 if (!device)
2198 return -ENOMEM;
6324fbf3 2199 }
0b86a832
CM
2200
2201 fill_device_from_item(leaf, dev_item, device);
2202 device->dev_root = root->fs_info->dev_root;
dfe25020 2203 device->in_fs_metadata = 1;
0b86a832
CM
2204 ret = 0;
2205#if 0
2206 ret = btrfs_open_device(device);
2207 if (ret) {
2208 kfree(device);
2209 }
2210#endif
2211 return ret;
2212}
2213
0d81ba5d
CM
2214int btrfs_read_super_device(struct btrfs_root *root, struct extent_buffer *buf)
2215{
2216 struct btrfs_dev_item *dev_item;
2217
2218 dev_item = (struct btrfs_dev_item *)offsetof(struct btrfs_super_block,
2219 dev_item);
2220 return read_one_dev(root, buf, dev_item);
2221}
2222
0b86a832
CM
2223int btrfs_read_sys_array(struct btrfs_root *root)
2224{
2225 struct btrfs_super_block *super_copy = &root->fs_info->super_copy;
a061fc8d 2226 struct extent_buffer *sb;
0b86a832 2227 struct btrfs_disk_key *disk_key;
0b86a832 2228 struct btrfs_chunk *chunk;
84eed90f
CM
2229 u8 *ptr;
2230 unsigned long sb_ptr;
2231 int ret = 0;
0b86a832
CM
2232 u32 num_stripes;
2233 u32 array_size;
2234 u32 len = 0;
0b86a832 2235 u32 cur;
84eed90f 2236 struct btrfs_key key;
0b86a832 2237
a061fc8d
CM
2238 sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET,
2239 BTRFS_SUPER_INFO_SIZE);
2240 if (!sb)
2241 return -ENOMEM;
2242 btrfs_set_buffer_uptodate(sb);
2243 write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
0b86a832
CM
2244 array_size = btrfs_super_sys_array_size(super_copy);
2245
0b86a832
CM
2246 ptr = super_copy->sys_chunk_array;
2247 sb_ptr = offsetof(struct btrfs_super_block, sys_chunk_array);
2248 cur = 0;
2249
2250 while (cur < array_size) {
2251 disk_key = (struct btrfs_disk_key *)ptr;
2252 btrfs_disk_key_to_cpu(&key, disk_key);
2253
a061fc8d 2254 len = sizeof(*disk_key); ptr += len;
0b86a832
CM
2255 sb_ptr += len;
2256 cur += len;
2257
0d81ba5d 2258 if (key.type == BTRFS_CHUNK_ITEM_KEY) {
0b86a832 2259 chunk = (struct btrfs_chunk *)sb_ptr;
0d81ba5d 2260 ret = read_one_chunk(root, &key, sb, chunk);
84eed90f
CM
2261 if (ret)
2262 break;
0b86a832
CM
2263 num_stripes = btrfs_chunk_num_stripes(sb, chunk);
2264 len = btrfs_chunk_item_size(num_stripes);
2265 } else {
84eed90f
CM
2266 ret = -EIO;
2267 break;
0b86a832
CM
2268 }
2269 ptr += len;
2270 sb_ptr += len;
2271 cur += len;
2272 }
a061fc8d 2273 free_extent_buffer(sb);
84eed90f 2274 return ret;
0b86a832
CM
2275}
2276
2277int btrfs_read_chunk_tree(struct btrfs_root *root)
2278{
2279 struct btrfs_path *path;
2280 struct extent_buffer *leaf;
2281 struct btrfs_key key;
2282 struct btrfs_key found_key;
2283 int ret;
2284 int slot;
2285
2286 root = root->fs_info->chunk_root;
2287
2288 path = btrfs_alloc_path();
2289 if (!path)
2290 return -ENOMEM;
2291
2292 /* first we search for all of the device items, and then we
2293 * read in all of the chunk items. This way we can create chunk
2294 * mappings that reference all of the devices that are afound
2295 */
2296 key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
2297 key.offset = 0;
2298 key.type = 0;
2299again:
2300 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2301 while(1) {
2302 leaf = path->nodes[0];
2303 slot = path->slots[0];
2304 if (slot >= btrfs_header_nritems(leaf)) {
2305 ret = btrfs_next_leaf(root, path);
2306 if (ret == 0)
2307 continue;
2308 if (ret < 0)
2309 goto error;
2310 break;
2311 }
2312 btrfs_item_key_to_cpu(leaf, &found_key, slot);
2313 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
2314 if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
2315 break;
2316 if (found_key.type == BTRFS_DEV_ITEM_KEY) {
2317 struct btrfs_dev_item *dev_item;
2318 dev_item = btrfs_item_ptr(leaf, slot,
2319 struct btrfs_dev_item);
0d81ba5d 2320 ret = read_one_dev(root, leaf, dev_item);
0b86a832
CM
2321 BUG_ON(ret);
2322 }
2323 } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
2324 struct btrfs_chunk *chunk;
2325 chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
2326 ret = read_one_chunk(root, &found_key, leaf, chunk);
2327 }
2328 path->slots[0]++;
2329 }
2330 if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
2331 key.objectid = 0;
2332 btrfs_release_path(root, path);
2333 goto again;
2334 }
2335
2336 btrfs_free_path(path);
2337 ret = 0;
2338error:
2339 return ret;
2340}
2341