]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - fs/btrfs/extent_map.c
Btrfs: Optimizations for the extent_buffer code
[mirror_ubuntu-artful-kernel.git] / fs / btrfs / extent_map.c
CommitLineData
a52d9a80
CM
1#include <linux/bitops.h>
2#include <linux/slab.h>
3#include <linux/bio.h>
4#include <linux/mm.h>
5#include <linux/gfp.h>
6#include <linux/pagemap.h>
7#include <linux/page-flags.h>
8#include <linux/module.h>
9#include <linux/spinlock.h>
10#include <linux/blkdev.h>
11#include "extent_map.h"
12
86479a04
CM
13/* temporary define until extent_map moves out of btrfs */
14struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
15 unsigned long extra_flags,
16 void (*ctor)(void *, struct kmem_cache *,
17 unsigned long));
18
a52d9a80
CM
19static struct kmem_cache *extent_map_cache;
20static struct kmem_cache *extent_state_cache;
21
22struct tree_entry {
23 u64 start;
24 u64 end;
25 int in_tree;
26 struct rb_node rb_node;
27};
28
a52d9a80
CM
29void __init extent_map_init(void)
30{
86479a04
CM
31 extent_map_cache = btrfs_cache_create("extent_map",
32 sizeof(struct extent_map),
a52d9a80
CM
33 SLAB_DESTROY_BY_RCU,
34 NULL);
86479a04
CM
35 extent_state_cache = btrfs_cache_create("extent_state",
36 sizeof(struct extent_state),
a52d9a80
CM
37 SLAB_DESTROY_BY_RCU,
38 NULL);
39}
40
41void __exit extent_map_exit(void)
42{
a52d9a80
CM
43 if (extent_map_cache)
44 kmem_cache_destroy(extent_map_cache);
45 if (extent_state_cache)
46 kmem_cache_destroy(extent_state_cache);
47}
48
49void extent_map_tree_init(struct extent_map_tree *tree,
50 struct address_space *mapping, gfp_t mask)
51{
52 tree->map.rb_node = NULL;
53 tree->state.rb_node = NULL;
07157aac 54 tree->ops = NULL;
a52d9a80
CM
55 rwlock_init(&tree->lock);
56 tree->mapping = mapping;
57}
58EXPORT_SYMBOL(extent_map_tree_init);
59
60struct extent_map *alloc_extent_map(gfp_t mask)
61{
62 struct extent_map *em;
63 em = kmem_cache_alloc(extent_map_cache, mask);
64 if (!em || IS_ERR(em))
65 return em;
66 em->in_tree = 0;
67 atomic_set(&em->refs, 1);
68 return em;
69}
70EXPORT_SYMBOL(alloc_extent_map);
71
72void free_extent_map(struct extent_map *em)
73{
2bf5a725
CM
74 if (!em)
75 return;
a52d9a80
CM
76 if (atomic_dec_and_test(&em->refs)) {
77 WARN_ON(em->in_tree);
78 kmem_cache_free(extent_map_cache, em);
79 }
80}
81EXPORT_SYMBOL(free_extent_map);
82
83
84struct extent_state *alloc_extent_state(gfp_t mask)
85{
86 struct extent_state *state;
87 state = kmem_cache_alloc(extent_state_cache, mask);
88 if (!state || IS_ERR(state))
89 return state;
90 state->state = 0;
91 state->in_tree = 0;
07157aac 92 state->private = 0;
a52d9a80
CM
93 atomic_set(&state->refs, 1);
94 init_waitqueue_head(&state->wq);
a52d9a80
CM
95 return state;
96}
97EXPORT_SYMBOL(alloc_extent_state);
98
99void free_extent_state(struct extent_state *state)
100{
2bf5a725
CM
101 if (!state)
102 return;
a52d9a80
CM
103 if (atomic_dec_and_test(&state->refs)) {
104 WARN_ON(state->in_tree);
a52d9a80
CM
105 kmem_cache_free(extent_state_cache, state);
106 }
107}
108EXPORT_SYMBOL(free_extent_state);
109
110static struct rb_node *tree_insert(struct rb_root *root, u64 offset,
111 struct rb_node *node)
112{
113 struct rb_node ** p = &root->rb_node;
114 struct rb_node * parent = NULL;
115 struct tree_entry *entry;
116
117 while(*p) {
118 parent = *p;
119 entry = rb_entry(parent, struct tree_entry, rb_node);
120
121 if (offset < entry->start)
122 p = &(*p)->rb_left;
123 else if (offset > entry->end)
124 p = &(*p)->rb_right;
125 else
126 return parent;
127 }
128
129 entry = rb_entry(node, struct tree_entry, rb_node);
130 entry->in_tree = 1;
131 rb_link_node(node, parent, p);
132 rb_insert_color(node, root);
133 return NULL;
134}
135
136static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
137 struct rb_node **prev_ret)
138{
139 struct rb_node * n = root->rb_node;
140 struct rb_node *prev = NULL;
141 struct tree_entry *entry;
142 struct tree_entry *prev_entry = NULL;
143
144 while(n) {
145 entry = rb_entry(n, struct tree_entry, rb_node);
146 prev = n;
147 prev_entry = entry;
148
149 if (offset < entry->start)
150 n = n->rb_left;
151 else if (offset > entry->end)
152 n = n->rb_right;
153 else
154 return n;
155 }
156 if (!prev_ret)
157 return NULL;
158 while(prev && offset > prev_entry->end) {
159 prev = rb_next(prev);
160 prev_entry = rb_entry(prev, struct tree_entry, rb_node);
161 }
162 *prev_ret = prev;
163 return NULL;
164}
165
166static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
167{
168 struct rb_node *prev;
169 struct rb_node *ret;
170 ret = __tree_search(root, offset, &prev);
171 if (!ret)
172 return prev;
173 return ret;
174}
175
176static int tree_delete(struct rb_root *root, u64 offset)
177{
178 struct rb_node *node;
179 struct tree_entry *entry;
180
181 node = __tree_search(root, offset, NULL);
182 if (!node)
183 return -ENOENT;
184 entry = rb_entry(node, struct tree_entry, rb_node);
185 entry->in_tree = 0;
186 rb_erase(node, root);
187 return 0;
188}
189
190/*
191 * add_extent_mapping tries a simple backward merge with existing
192 * mappings. The extent_map struct passed in will be inserted into
193 * the tree directly (no copies made, just a reference taken).
194 */
195int add_extent_mapping(struct extent_map_tree *tree,
196 struct extent_map *em)
197{
198 int ret = 0;
199 struct extent_map *prev = NULL;
200 struct rb_node *rb;
201
202 write_lock_irq(&tree->lock);
203 rb = tree_insert(&tree->map, em->end, &em->rb_node);
204 if (rb) {
205 prev = rb_entry(rb, struct extent_map, rb_node);
206 printk("found extent map %Lu %Lu on insert of %Lu %Lu\n", prev->start, prev->end, em->start, em->end);
207 ret = -EEXIST;
208 goto out;
209 }
210 atomic_inc(&em->refs);
211 if (em->start != 0) {
212 rb = rb_prev(&em->rb_node);
213 if (rb)
214 prev = rb_entry(rb, struct extent_map, rb_node);
215 if (prev && prev->end + 1 == em->start &&
5f39d397
CM
216 ((em->block_start == EXTENT_MAP_HOLE &&
217 prev->block_start == EXTENT_MAP_HOLE) ||
a52d9a80
CM
218 (em->block_start == prev->block_end + 1))) {
219 em->start = prev->start;
220 em->block_start = prev->block_start;
221 rb_erase(&prev->rb_node, &tree->map);
222 prev->in_tree = 0;
223 free_extent_map(prev);
224 }
225 }
226out:
227 write_unlock_irq(&tree->lock);
228 return ret;
229}
230EXPORT_SYMBOL(add_extent_mapping);
231
232/*
233 * lookup_extent_mapping returns the first extent_map struct in the
234 * tree that intersects the [start, end] (inclusive) range. There may
235 * be additional objects in the tree that intersect, so check the object
236 * returned carefully to make sure you don't need additional lookups.
237 */
238struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
239 u64 start, u64 end)
240{
241 struct extent_map *em;
242 struct rb_node *rb_node;
243
244 read_lock_irq(&tree->lock);
245 rb_node = tree_search(&tree->map, start);
246 if (!rb_node) {
247 em = NULL;
248 goto out;
249 }
250 if (IS_ERR(rb_node)) {
251 em = ERR_PTR(PTR_ERR(rb_node));
252 goto out;
253 }
254 em = rb_entry(rb_node, struct extent_map, rb_node);
255 if (em->end < start || em->start > end) {
256 em = NULL;
257 goto out;
258 }
259 atomic_inc(&em->refs);
260out:
261 read_unlock_irq(&tree->lock);
262 return em;
263}
264EXPORT_SYMBOL(lookup_extent_mapping);
265
266/*
267 * removes an extent_map struct from the tree. No reference counts are
268 * dropped, and no checks are done to see if the range is in use
269 */
270int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
271{
272 int ret;
273
274 write_lock_irq(&tree->lock);
275 ret = tree_delete(&tree->map, em->end);
276 write_unlock_irq(&tree->lock);
277 return ret;
278}
279EXPORT_SYMBOL(remove_extent_mapping);
280
281/*
282 * utility function to look for merge candidates inside a given range.
283 * Any extents with matching state are merged together into a single
284 * extent in the tree. Extents with EXTENT_IO in their state field
285 * are not merged because the end_io handlers need to be able to do
286 * operations on them without sleeping (or doing allocations/splits).
287 *
288 * This should be called with the tree lock held.
289 */
290static int merge_state(struct extent_map_tree *tree,
291 struct extent_state *state)
292{
293 struct extent_state *other;
294 struct rb_node *other_node;
295
296 if (state->state & EXTENT_IOBITS)
297 return 0;
298
299 other_node = rb_prev(&state->rb_node);
300 if (other_node) {
301 other = rb_entry(other_node, struct extent_state, rb_node);
302 if (other->end == state->start - 1 &&
303 other->state == state->state) {
304 state->start = other->start;
305 other->in_tree = 0;
306 rb_erase(&other->rb_node, &tree->state);
307 free_extent_state(other);
308 }
309 }
310 other_node = rb_next(&state->rb_node);
311 if (other_node) {
312 other = rb_entry(other_node, struct extent_state, rb_node);
313 if (other->start == state->end + 1 &&
314 other->state == state->state) {
315 other->start = state->start;
316 state->in_tree = 0;
317 rb_erase(&state->rb_node, &tree->state);
318 free_extent_state(state);
319 }
320 }
321 return 0;
322}
323
324/*
325 * insert an extent_state struct into the tree. 'bits' are set on the
326 * struct before it is inserted.
327 *
328 * This may return -EEXIST if the extent is already there, in which case the
329 * state struct is freed.
330 *
331 * The tree lock is not taken internally. This is a utility function and
332 * probably isn't what you want to call (see set/clear_extent_bit).
333 */
334static int insert_state(struct extent_map_tree *tree,
335 struct extent_state *state, u64 start, u64 end,
336 int bits)
337{
338 struct rb_node *node;
339
340 if (end < start) {
341 printk("end < start %Lu %Lu\n", end, start);
342 WARN_ON(1);
343 }
344 state->state |= bits;
345 state->start = start;
346 state->end = end;
347 if ((end & 4095) == 0) {
348 printk("insert state %Lu %Lu strange end\n", start, end);
349 WARN_ON(1);
350 }
351 node = tree_insert(&tree->state, end, &state->rb_node);
352 if (node) {
353 struct extent_state *found;
354 found = rb_entry(node, struct extent_state, rb_node);
b888db2b 355 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, start, end);
a52d9a80
CM
356 free_extent_state(state);
357 return -EEXIST;
358 }
359 merge_state(tree, state);
360 return 0;
361}
362
363/*
364 * split a given extent state struct in two, inserting the preallocated
365 * struct 'prealloc' as the newly created second half. 'split' indicates an
366 * offset inside 'orig' where it should be split.
367 *
368 * Before calling,
369 * the tree has 'orig' at [orig->start, orig->end]. After calling, there
370 * are two extent state structs in the tree:
371 * prealloc: [orig->start, split - 1]
372 * orig: [ split, orig->end ]
373 *
374 * The tree locks are not taken by this function. They need to be held
375 * by the caller.
376 */
377static int split_state(struct extent_map_tree *tree, struct extent_state *orig,
378 struct extent_state *prealloc, u64 split)
379{
380 struct rb_node *node;
381 prealloc->start = orig->start;
382 prealloc->end = split - 1;
383 prealloc->state = orig->state;
384 orig->start = split;
385 if ((prealloc->end & 4095) == 0) {
386 printk("insert state %Lu %Lu strange end\n", prealloc->start,
387 prealloc->end);
388 WARN_ON(1);
389 }
390 node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node);
391 if (node) {
392 struct extent_state *found;
393 found = rb_entry(node, struct extent_state, rb_node);
b888db2b 394 printk("found node %Lu %Lu on insert of %Lu %Lu\n", found->start, found->end, prealloc->start, prealloc->end);
a52d9a80
CM
395 free_extent_state(prealloc);
396 return -EEXIST;
397 }
398 return 0;
399}
400
401/*
402 * utility function to clear some bits in an extent state struct.
403 * it will optionally wake up any one waiting on this state (wake == 1), or
404 * forcibly remove the state from the tree (delete == 1).
405 *
406 * If no bits are set on the state struct after clearing things, the
407 * struct is freed and removed from the tree
408 */
409static int clear_state_bit(struct extent_map_tree *tree,
410 struct extent_state *state, int bits, int wake,
411 int delete)
412{
413 int ret = state->state & bits;
414 state->state &= ~bits;
415 if (wake)
416 wake_up(&state->wq);
417 if (delete || state->state == 0) {
418 if (state->in_tree) {
419 rb_erase(&state->rb_node, &tree->state);
420 state->in_tree = 0;
421 free_extent_state(state);
422 } else {
423 WARN_ON(1);
424 }
425 } else {
426 merge_state(tree, state);
427 }
428 return ret;
429}
430
431/*
432 * clear some bits on a range in the tree. This may require splitting
433 * or inserting elements in the tree, so the gfp mask is used to
434 * indicate which allocations or sleeping are allowed.
435 *
436 * pass 'wake' == 1 to kick any sleepers, and 'delete' == 1 to remove
437 * the given range from the tree regardless of state (ie for truncate).
438 *
439 * the range [start, end] is inclusive.
440 *
441 * This takes the tree lock, and returns < 0 on error, > 0 if any of the
442 * bits were already set, or zero if none of the bits were already set.
443 */
444int clear_extent_bit(struct extent_map_tree *tree, u64 start, u64 end,
445 int bits, int wake, int delete, gfp_t mask)
446{
447 struct extent_state *state;
448 struct extent_state *prealloc = NULL;
449 struct rb_node *node;
90f1c19a 450 unsigned long flags;
a52d9a80
CM
451 int err;
452 int set = 0;
453
454again:
455 if (!prealloc && (mask & __GFP_WAIT)) {
456 prealloc = alloc_extent_state(mask);
457 if (!prealloc)
458 return -ENOMEM;
459 }
460
90f1c19a 461 write_lock_irqsave(&tree->lock, flags);
a52d9a80
CM
462 /*
463 * this search will find the extents that end after
464 * our range starts
465 */
466 node = tree_search(&tree->state, start);
467 if (!node)
468 goto out;
469 state = rb_entry(node, struct extent_state, rb_node);
470 if (state->start > end)
471 goto out;
472 WARN_ON(state->end < start);
473
474 /*
475 * | ---- desired range ---- |
476 * | state | or
477 * | ------------- state -------------- |
478 *
479 * We need to split the extent we found, and may flip
480 * bits on second half.
481 *
482 * If the extent we found extends past our range, we
483 * just split and search again. It'll get split again
484 * the next time though.
485 *
486 * If the extent we found is inside our range, we clear
487 * the desired bit on it.
488 */
489
490 if (state->start < start) {
491 err = split_state(tree, state, prealloc, start);
492 BUG_ON(err == -EEXIST);
493 prealloc = NULL;
494 if (err)
495 goto out;
496 if (state->end <= end) {
497 start = state->end + 1;
498 set |= clear_state_bit(tree, state, bits,
499 wake, delete);
500 } else {
501 start = state->start;
502 }
503 goto search_again;
504 }
505 /*
506 * | ---- desired range ---- |
507 * | state |
508 * We need to split the extent, and clear the bit
509 * on the first half
510 */
511 if (state->start <= end && state->end > end) {
512 err = split_state(tree, state, prealloc, end + 1);
513 BUG_ON(err == -EEXIST);
514
515 if (wake)
516 wake_up(&state->wq);
517 set |= clear_state_bit(tree, prealloc, bits,
518 wake, delete);
519 prealloc = NULL;
520 goto out;
521 }
522
523 start = state->end + 1;
524 set |= clear_state_bit(tree, state, bits, wake, delete);
525 goto search_again;
526
527out:
90f1c19a 528 write_unlock_irqrestore(&tree->lock, flags);
a52d9a80
CM
529 if (prealloc)
530 free_extent_state(prealloc);
531
532 return set;
533
534search_again:
535 if (start >= end)
536 goto out;
90f1c19a 537 write_unlock_irqrestore(&tree->lock, flags);
a52d9a80
CM
538 if (mask & __GFP_WAIT)
539 cond_resched();
540 goto again;
541}
542EXPORT_SYMBOL(clear_extent_bit);
543
544static int wait_on_state(struct extent_map_tree *tree,
545 struct extent_state *state)
546{
547 DEFINE_WAIT(wait);
548 prepare_to_wait(&state->wq, &wait, TASK_UNINTERRUPTIBLE);
549 read_unlock_irq(&tree->lock);
550 schedule();
551 read_lock_irq(&tree->lock);
552 finish_wait(&state->wq, &wait);
553 return 0;
554}
555
556/*
557 * waits for one or more bits to clear on a range in the state tree.
558 * The range [start, end] is inclusive.
559 * The tree lock is taken by this function
560 */
561int wait_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits)
562{
563 struct extent_state *state;
564 struct rb_node *node;
565
566 read_lock_irq(&tree->lock);
567again:
568 while (1) {
569 /*
570 * this search will find all the extents that end after
571 * our range starts
572 */
573 node = tree_search(&tree->state, start);
574 if (!node)
575 break;
576
577 state = rb_entry(node, struct extent_state, rb_node);
578
579 if (state->start > end)
580 goto out;
581
582 if (state->state & bits) {
583 start = state->start;
584 atomic_inc(&state->refs);
585 wait_on_state(tree, state);
586 free_extent_state(state);
587 goto again;
588 }
589 start = state->end + 1;
590
591 if (start > end)
592 break;
593
594 if (need_resched()) {
595 read_unlock_irq(&tree->lock);
596 cond_resched();
597 read_lock_irq(&tree->lock);
598 }
599 }
600out:
601 read_unlock_irq(&tree->lock);
602 return 0;
603}
604EXPORT_SYMBOL(wait_extent_bit);
605
606/*
607 * set some bits on a range in the tree. This may require allocations
608 * or sleeping, so the gfp mask is used to indicate what is allowed.
609 *
610 * If 'exclusive' == 1, this will fail with -EEXIST if some part of the
611 * range already has the desired bits set. The start of the existing
612 * range is returned in failed_start in this case.
613 *
614 * [start, end] is inclusive
615 * This takes the tree lock.
616 */
617int set_extent_bit(struct extent_map_tree *tree, u64 start, u64 end, int bits,
618 int exclusive, u64 *failed_start, gfp_t mask)
619{
620 struct extent_state *state;
621 struct extent_state *prealloc = NULL;
622 struct rb_node *node;
90f1c19a 623 unsigned long flags;
a52d9a80
CM
624 int err = 0;
625 int set;
626 u64 last_start;
627 u64 last_end;
628again:
629 if (!prealloc && (mask & __GFP_WAIT)) {
630 prealloc = alloc_extent_state(mask);
631 if (!prealloc)
632 return -ENOMEM;
633 }
634
90f1c19a 635 write_lock_irqsave(&tree->lock, flags);
a52d9a80
CM
636 /*
637 * this search will find all the extents that end after
638 * our range starts.
639 */
640 node = tree_search(&tree->state, start);
641 if (!node) {
642 err = insert_state(tree, prealloc, start, end, bits);
643 prealloc = NULL;
644 BUG_ON(err == -EEXIST);
645 goto out;
646 }
647
648 state = rb_entry(node, struct extent_state, rb_node);
649 last_start = state->start;
650 last_end = state->end;
651
652 /*
653 * | ---- desired range ---- |
654 * | state |
655 *
656 * Just lock what we found and keep going
657 */
658 if (state->start == start && state->end <= end) {
659 set = state->state & bits;
660 if (set && exclusive) {
661 *failed_start = state->start;
662 err = -EEXIST;
663 goto out;
664 }
665 state->state |= bits;
666 start = state->end + 1;
667 merge_state(tree, state);
668 goto search_again;
669 }
670
671 /*
672 * | ---- desired range ---- |
673 * | state |
674 * or
675 * | ------------- state -------------- |
676 *
677 * We need to split the extent we found, and may flip bits on
678 * second half.
679 *
680 * If the extent we found extends past our
681 * range, we just split and search again. It'll get split
682 * again the next time though.
683 *
684 * If the extent we found is inside our range, we set the
685 * desired bit on it.
686 */
687 if (state->start < start) {
688 set = state->state & bits;
689 if (exclusive && set) {
690 *failed_start = start;
691 err = -EEXIST;
692 goto out;
693 }
694 err = split_state(tree, state, prealloc, start);
695 BUG_ON(err == -EEXIST);
696 prealloc = NULL;
697 if (err)
698 goto out;
699 if (state->end <= end) {
700 state->state |= bits;
701 start = state->end + 1;
702 merge_state(tree, state);
703 } else {
704 start = state->start;
705 }
706 goto search_again;
707 }
a52d9a80
CM
708 /*
709 * | ---- desired range ---- |
710 * | state | or | state |
711 *
712 * There's a hole, we need to insert something in it and
713 * ignore the extent we found.
714 */
715 if (state->start > start) {
716 u64 this_end;
717 if (end < last_start)
718 this_end = end;
719 else
720 this_end = last_start -1;
721 err = insert_state(tree, prealloc, start, this_end,
722 bits);
723 prealloc = NULL;
724 BUG_ON(err == -EEXIST);
725 if (err)
726 goto out;
727 start = this_end + 1;
728 goto search_again;
729 }
a8c450b2
CM
730 /*
731 * | ---- desired range ---- |
732 * | state |
733 * We need to split the extent, and set the bit
734 * on the first half
735 */
736 if (state->start <= end && state->end > end) {
737 set = state->state & bits;
738 if (exclusive && set) {
739 *failed_start = start;
740 err = -EEXIST;
741 goto out;
742 }
743 err = split_state(tree, state, prealloc, end + 1);
744 BUG_ON(err == -EEXIST);
745
746 prealloc->state |= bits;
747 merge_state(tree, prealloc);
748 prealloc = NULL;
749 goto out;
750 }
751
a52d9a80
CM
752 goto search_again;
753
754out:
90f1c19a 755 write_unlock_irqrestore(&tree->lock, flags);
a52d9a80
CM
756 if (prealloc)
757 free_extent_state(prealloc);
758
759 return err;
760
761search_again:
762 if (start > end)
763 goto out;
90f1c19a 764 write_unlock_irqrestore(&tree->lock, flags);
a52d9a80
CM
765 if (mask & __GFP_WAIT)
766 cond_resched();
767 goto again;
768}
769EXPORT_SYMBOL(set_extent_bit);
770
771/* wrappers around set/clear extent bit */
772int set_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
773 gfp_t mask)
774{
775 return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL,
776 mask);
777}
778EXPORT_SYMBOL(set_extent_dirty);
779
b888db2b
CM
780int set_extent_delalloc(struct extent_map_tree *tree, u64 start, u64 end,
781 gfp_t mask)
782{
783 return set_extent_bit(tree, start, end,
784 EXTENT_DELALLOC | EXTENT_DIRTY, 0, NULL,
785 mask);
786}
787EXPORT_SYMBOL(set_extent_delalloc);
788
a52d9a80
CM
789int clear_extent_dirty(struct extent_map_tree *tree, u64 start, u64 end,
790 gfp_t mask)
791{
b888db2b
CM
792 return clear_extent_bit(tree, start, end,
793 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, mask);
a52d9a80
CM
794}
795EXPORT_SYMBOL(clear_extent_dirty);
796
797int set_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
798 gfp_t mask)
799{
800 return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL,
801 mask);
802}
803EXPORT_SYMBOL(set_extent_new);
804
805int clear_extent_new(struct extent_map_tree *tree, u64 start, u64 end,
806 gfp_t mask)
807{
808 return clear_extent_bit(tree, start, end, EXTENT_NEW, 0, 0, mask);
809}
810EXPORT_SYMBOL(clear_extent_new);
811
812int set_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
813 gfp_t mask)
814{
815 return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
816 mask);
817}
818EXPORT_SYMBOL(set_extent_uptodate);
819
820int clear_extent_uptodate(struct extent_map_tree *tree, u64 start, u64 end,
821 gfp_t mask)
822{
823 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, mask);
824}
825EXPORT_SYMBOL(clear_extent_uptodate);
826
827int set_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end,
828 gfp_t mask)
829{
830 return set_extent_bit(tree, start, end, EXTENT_WRITEBACK,
831 0, NULL, mask);
832}
833EXPORT_SYMBOL(set_extent_writeback);
834
835int clear_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end,
836 gfp_t mask)
837{
838 return clear_extent_bit(tree, start, end, EXTENT_WRITEBACK, 1, 0, mask);
839}
840EXPORT_SYMBOL(clear_extent_writeback);
841
842int wait_on_extent_writeback(struct extent_map_tree *tree, u64 start, u64 end)
843{
844 return wait_extent_bit(tree, start, end, EXTENT_WRITEBACK);
845}
846EXPORT_SYMBOL(wait_on_extent_writeback);
847
848/*
849 * locks a range in ascending order, waiting for any locked regions
850 * it hits on the way. [start,end] are inclusive, and this will sleep.
851 */
852int lock_extent(struct extent_map_tree *tree, u64 start, u64 end, gfp_t mask)
853{
854 int err;
855 u64 failed_start;
856 while (1) {
857 err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
858 &failed_start, mask);
859 if (err == -EEXIST && (mask & __GFP_WAIT)) {
860 wait_extent_bit(tree, failed_start, end, EXTENT_LOCKED);
861 start = failed_start;
862 } else {
863 break;
864 }
865 WARN_ON(start > end);
866 }
867 return err;
868}
869EXPORT_SYMBOL(lock_extent);
870
871int unlock_extent(struct extent_map_tree *tree, u64 start, u64 end,
872 gfp_t mask)
873{
874 return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, mask);
875}
876EXPORT_SYMBOL(unlock_extent);
877
878/*
879 * helper function to set pages and extents in the tree dirty
880 */
881int set_range_dirty(struct extent_map_tree *tree, u64 start, u64 end)
882{
883 unsigned long index = start >> PAGE_CACHE_SHIFT;
884 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
885 struct page *page;
886
887 while (index <= end_index) {
888 page = find_get_page(tree->mapping, index);
889 BUG_ON(!page);
890 __set_page_dirty_nobuffers(page);
891 page_cache_release(page);
892 index++;
893 }
894 set_extent_dirty(tree, start, end, GFP_NOFS);
895 return 0;
896}
897EXPORT_SYMBOL(set_range_dirty);
898
899/*
900 * helper function to set both pages and extents in the tree writeback
901 */
902int set_range_writeback(struct extent_map_tree *tree, u64 start, u64 end)
903{
904 unsigned long index = start >> PAGE_CACHE_SHIFT;
905 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
906 struct page *page;
907
908 while (index <= end_index) {
909 page = find_get_page(tree->mapping, index);
910 BUG_ON(!page);
911 set_page_writeback(page);
912 page_cache_release(page);
913 index++;
914 }
915 set_extent_writeback(tree, start, end, GFP_NOFS);
916 return 0;
917}
918EXPORT_SYMBOL(set_range_writeback);
919
5f39d397
CM
920int find_first_extent_bit(struct extent_map_tree *tree, u64 start,
921 u64 *start_ret, u64 *end_ret, int bits)
922{
923 struct rb_node *node;
924 struct extent_state *state;
925 int ret = 1;
926
927 write_lock_irq(&tree->lock);
928 /*
929 * this search will find all the extents that end after
930 * our range starts.
931 */
932 node = tree_search(&tree->state, start);
933 if (!node || IS_ERR(node)) {
934 goto out;
935 }
936
937 while(1) {
938 state = rb_entry(node, struct extent_state, rb_node);
939 if (state->state & bits) {
940 *start_ret = state->start;
941 *end_ret = state->end;
942 ret = 0;
943 }
944 node = rb_next(node);
945 if (!node)
946 break;
947 }
948out:
949 write_unlock_irq(&tree->lock);
950 return ret;
951}
952EXPORT_SYMBOL(find_first_extent_bit);
953
b888db2b
CM
954u64 find_lock_delalloc_range(struct extent_map_tree *tree,
955 u64 start, u64 lock_start, u64 *end, u64 max_bytes)
956{
957 struct rb_node *node;
958 struct extent_state *state;
959 u64 cur_start = start;
960 u64 found = 0;
961 u64 total_bytes = 0;
962
963 write_lock_irq(&tree->lock);
964 /*
965 * this search will find all the extents that end after
966 * our range starts.
967 */
968search_again:
969 node = tree_search(&tree->state, cur_start);
970 if (!node || IS_ERR(node)) {
971 goto out;
972 }
973
974 while(1) {
975 state = rb_entry(node, struct extent_state, rb_node);
976 if (state->start != cur_start) {
977 goto out;
978 }
979 if (!(state->state & EXTENT_DELALLOC)) {
980 goto out;
981 }
982 if (state->start >= lock_start) {
983 if (state->state & EXTENT_LOCKED) {
984 DEFINE_WAIT(wait);
985 atomic_inc(&state->refs);
986 write_unlock_irq(&tree->lock);
987 schedule();
988 write_lock_irq(&tree->lock);
989 finish_wait(&state->wq, &wait);
990 free_extent_state(state);
991 goto search_again;
992 }
993 state->state |= EXTENT_LOCKED;
994 }
995 found++;
996 *end = state->end;
997 cur_start = state->end + 1;
998 node = rb_next(node);
999 if (!node)
1000 break;
1001 total_bytes = state->end - state->start + 1;
1002 if (total_bytes >= max_bytes)
1003 break;
1004 }
1005out:
1006 write_unlock_irq(&tree->lock);
1007 return found;
1008}
1009
a52d9a80
CM
1010/*
1011 * helper function to lock both pages and extents in the tree.
1012 * pages must be locked first.
1013 */
1014int lock_range(struct extent_map_tree *tree, u64 start, u64 end)
1015{
1016 unsigned long index = start >> PAGE_CACHE_SHIFT;
1017 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1018 struct page *page;
1019 int err;
1020
1021 while (index <= end_index) {
1022 page = grab_cache_page(tree->mapping, index);
1023 if (!page) {
1024 err = -ENOMEM;
1025 goto failed;
1026 }
1027 if (IS_ERR(page)) {
1028 err = PTR_ERR(page);
1029 goto failed;
1030 }
1031 index++;
1032 }
1033 lock_extent(tree, start, end, GFP_NOFS);
1034 return 0;
1035
1036failed:
1037 /*
1038 * we failed above in getting the page at 'index', so we undo here
1039 * up to but not including the page at 'index'
1040 */
1041 end_index = index;
1042 index = start >> PAGE_CACHE_SHIFT;
1043 while (index < end_index) {
1044 page = find_get_page(tree->mapping, index);
1045 unlock_page(page);
1046 page_cache_release(page);
1047 index++;
1048 }
1049 return err;
1050}
1051EXPORT_SYMBOL(lock_range);
1052
1053/*
1054 * helper function to unlock both pages and extents in the tree.
1055 */
1056int unlock_range(struct extent_map_tree *tree, u64 start, u64 end)
1057{
1058 unsigned long index = start >> PAGE_CACHE_SHIFT;
1059 unsigned long end_index = end >> PAGE_CACHE_SHIFT;
1060 struct page *page;
1061
1062 while (index <= end_index) {
1063 page = find_get_page(tree->mapping, index);
1064 unlock_page(page);
1065 page_cache_release(page);
1066 index++;
1067 }
1068 unlock_extent(tree, start, end, GFP_NOFS);
1069 return 0;
1070}
1071EXPORT_SYMBOL(unlock_range);
1072
07157aac
CM
1073int set_state_private(struct extent_map_tree *tree, u64 start, u64 private)
1074{
1075 struct rb_node *node;
1076 struct extent_state *state;
1077 int ret = 0;
1078
1079 write_lock_irq(&tree->lock);
1080 /*
1081 * this search will find all the extents that end after
1082 * our range starts.
1083 */
1084 node = tree_search(&tree->state, start);
1085 if (!node || IS_ERR(node)) {
1086 ret = -ENOENT;
1087 goto out;
1088 }
1089 state = rb_entry(node, struct extent_state, rb_node);
1090 if (state->start != start) {
1091 ret = -ENOENT;
1092 goto out;
1093 }
1094 state->private = private;
1095out:
1096 write_unlock_irq(&tree->lock);
1097 return ret;
1098
1099}
1100
1101int get_state_private(struct extent_map_tree *tree, u64 start, u64 *private)
1102{
1103 struct rb_node *node;
1104 struct extent_state *state;
1105 int ret = 0;
1106
1107 read_lock_irq(&tree->lock);
1108 /*
1109 * this search will find all the extents that end after
1110 * our range starts.
1111 */
1112 node = tree_search(&tree->state, start);
1113 if (!node || IS_ERR(node)) {
1114 ret = -ENOENT;
1115 goto out;
1116 }
1117 state = rb_entry(node, struct extent_state, rb_node);
1118 if (state->start != start) {
1119 ret = -ENOENT;
1120 goto out;
1121 }
1122 *private = state->private;
1123out:
1124 read_unlock_irq(&tree->lock);
1125 return ret;
1126}
1127
a52d9a80
CM
1128/*
1129 * searches a range in the state tree for a given mask.
1130 * If 'filled' == 1, this returns 1 only if ever extent in the tree
1131 * has the bits set. Otherwise, 1 is returned if any bit in the
1132 * range is found set.
1133 */
1134static int test_range_bit(struct extent_map_tree *tree, u64 start, u64 end,
1135 int bits, int filled)
1136{
1137 struct extent_state *state = NULL;
1138 struct rb_node *node;
1139 int bitset = 0;
1140
1141 read_lock_irq(&tree->lock);
1142 node = tree_search(&tree->state, start);
1143 while (node && start <= end) {
1144 state = rb_entry(node, struct extent_state, rb_node);
1145 if (state->start > end)
1146 break;
1147
1148 if (filled && state->start > start) {
1149 bitset = 0;
1150 break;
1151 }
1152 if (state->state & bits) {
1153 bitset = 1;
1154 if (!filled)
1155 break;
1156 } else if (filled) {
1157 bitset = 0;
1158 break;
1159 }
1160 start = state->end + 1;
1161 if (start > end)
1162 break;
1163 node = rb_next(node);
1164 }
1165 read_unlock_irq(&tree->lock);
1166 return bitset;
1167}
1168
1169/*
1170 * helper function to set a given page up to date if all the
1171 * extents in the tree for that page are up to date
1172 */
1173static int check_page_uptodate(struct extent_map_tree *tree,
1174 struct page *page)
1175{
1176 u64 start = page->index << PAGE_CACHE_SHIFT;
1177 u64 end = start + PAGE_CACHE_SIZE - 1;
1178 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1))
1179 SetPageUptodate(page);
1180 return 0;
1181}
1182
1183/*
1184 * helper function to unlock a page if all the extents in the tree
1185 * for that page are unlocked
1186 */
1187static int check_page_locked(struct extent_map_tree *tree,
1188 struct page *page)
1189{
1190 u64 start = page->index << PAGE_CACHE_SHIFT;
1191 u64 end = start + PAGE_CACHE_SIZE - 1;
1192 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0))
1193 unlock_page(page);
1194 return 0;
1195}
1196
1197/*
1198 * helper function to end page writeback if all the extents
1199 * in the tree for that page are done with writeback
1200 */
1201static int check_page_writeback(struct extent_map_tree *tree,
1202 struct page *page)
1203{
1204 u64 start = page->index << PAGE_CACHE_SHIFT;
1205 u64 end = start + PAGE_CACHE_SIZE - 1;
1206 if (!test_range_bit(tree, start, end, EXTENT_WRITEBACK, 0))
1207 end_page_writeback(page);
1208 return 0;
1209}
1210
1211/* lots and lots of room for performance fixes in the end_bio funcs */
1212
1213/*
1214 * after a writepage IO is done, we need to:
1215 * clear the uptodate bits on error
1216 * clear the writeback bits in the extent tree for this IO
1217 * end_page_writeback if the page has no more pending IO
1218 *
1219 * Scheduling is not allowed, so the extent state tree is expected
1220 * to have one and only one object corresponding to this IO.
1221 */
1222static int end_bio_extent_writepage(struct bio *bio,
1223 unsigned int bytes_done, int err)
1224{
1225 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1226 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1227 struct extent_map_tree *tree = bio->bi_private;
1228 u64 start;
1229 u64 end;
1230 int whole_page;
1231
1232 if (bio->bi_size)
1233 return 1;
1234
1235 do {
1236 struct page *page = bvec->bv_page;
1237 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1238 end = start + bvec->bv_len - 1;
1239
1240 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1241 whole_page = 1;
1242 else
1243 whole_page = 0;
1244
1245 if (--bvec >= bio->bi_io_vec)
1246 prefetchw(&bvec->bv_page->flags);
1247
1248 if (!uptodate) {
1249 clear_extent_uptodate(tree, start, end, GFP_ATOMIC);
1250 ClearPageUptodate(page);
1251 SetPageError(page);
1252 }
1253 clear_extent_writeback(tree, start, end, GFP_ATOMIC);
1254
1255 if (whole_page)
1256 end_page_writeback(page);
1257 else
1258 check_page_writeback(tree, page);
0e2752a7
CH
1259 if (tree->ops && tree->ops->writepage_end_io_hook)
1260 tree->ops->writepage_end_io_hook(page, start, end);
a52d9a80
CM
1261 } while (bvec >= bio->bi_io_vec);
1262
1263 bio_put(bio);
1264 return 0;
1265}
1266
1267/*
1268 * after a readpage IO is done, we need to:
1269 * clear the uptodate bits on error
1270 * set the uptodate bits if things worked
1271 * set the page up to date if all extents in the tree are uptodate
1272 * clear the lock bit in the extent tree
1273 * unlock the page if there are no other extents locked for it
1274 *
1275 * Scheduling is not allowed, so the extent state tree is expected
1276 * to have one and only one object corresponding to this IO.
1277 */
1278static int end_bio_extent_readpage(struct bio *bio,
1279 unsigned int bytes_done, int err)
1280{
07157aac 1281 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
a52d9a80
CM
1282 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1283 struct extent_map_tree *tree = bio->bi_private;
1284 u64 start;
1285 u64 end;
1286 int whole_page;
07157aac 1287 int ret;
a52d9a80
CM
1288
1289 if (bio->bi_size)
1290 return 1;
1291
1292 do {
1293 struct page *page = bvec->bv_page;
1294 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1295 end = start + bvec->bv_len - 1;
1296
1297 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
1298 whole_page = 1;
1299 else
1300 whole_page = 0;
1301
1302 if (--bvec >= bio->bi_io_vec)
1303 prefetchw(&bvec->bv_page->flags);
1304
07157aac
CM
1305 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
1306 ret = tree->ops->readpage_end_io_hook(page, start, end);
1307 if (ret)
1308 uptodate = 0;
1309 }
a52d9a80
CM
1310 if (uptodate) {
1311 set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1312 if (whole_page)
1313 SetPageUptodate(page);
1314 else
1315 check_page_uptodate(tree, page);
1316 } else {
1317 ClearPageUptodate(page);
1318 SetPageError(page);
1319 }
1320
1321 unlock_extent(tree, start, end, GFP_ATOMIC);
1322
1323 if (whole_page)
1324 unlock_page(page);
1325 else
1326 check_page_locked(tree, page);
1327 } while (bvec >= bio->bi_io_vec);
1328
1329 bio_put(bio);
1330 return 0;
1331}
1332
1333/*
1334 * IO done from prepare_write is pretty simple, we just unlock
1335 * the structs in the extent tree when done, and set the uptodate bits
1336 * as appropriate.
1337 */
1338static int end_bio_extent_preparewrite(struct bio *bio,
1339 unsigned int bytes_done, int err)
1340{
1341 const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1342 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
1343 struct extent_map_tree *tree = bio->bi_private;
1344 u64 start;
1345 u64 end;
1346
1347 if (bio->bi_size)
1348 return 1;
1349
1350 do {
1351 struct page *page = bvec->bv_page;
1352 start = (page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1353 end = start + bvec->bv_len - 1;
1354
1355 if (--bvec >= bio->bi_io_vec)
1356 prefetchw(&bvec->bv_page->flags);
1357
1358 if (uptodate) {
1359 set_extent_uptodate(tree, start, end, GFP_ATOMIC);
1360 } else {
1361 ClearPageUptodate(page);
1362 SetPageError(page);
1363 }
1364
1365 unlock_extent(tree, start, end, GFP_ATOMIC);
1366
1367 } while (bvec >= bio->bi_io_vec);
1368
1369 bio_put(bio);
1370 return 0;
1371}
1372
1373static int submit_extent_page(int rw, struct extent_map_tree *tree,
1374 struct page *page, sector_t sector,
1375 size_t size, unsigned long offset,
1376 struct block_device *bdev,
1377 bio_end_io_t end_io_func)
1378{
1379 struct bio *bio;
1380 int ret = 0;
1381
1382 bio = bio_alloc(GFP_NOIO, 1);
1383
1384 bio->bi_sector = sector;
1385 bio->bi_bdev = bdev;
1386 bio->bi_io_vec[0].bv_page = page;
1387 bio->bi_io_vec[0].bv_len = size;
1388 bio->bi_io_vec[0].bv_offset = offset;
1389
1390 bio->bi_vcnt = 1;
1391 bio->bi_idx = 0;
1392 bio->bi_size = size;
1393
1394 bio->bi_end_io = end_io_func;
1395 bio->bi_private = tree;
1396
1397 bio_get(bio);
1398 submit_bio(rw, bio);
1399
1400 if (bio_flagged(bio, BIO_EOPNOTSUPP))
1401 ret = -EOPNOTSUPP;
1402
1403 bio_put(bio);
1404 return ret;
1405}
1406
b3cfa35a
CH
1407void set_page_extent_mapped(struct page *page)
1408{
1409 if (!PagePrivate(page)) {
1410 SetPagePrivate(page);
1411 WARN_ON(!page->mapping->a_ops->invalidatepage);
1412 set_page_private(page, 1);
1413 page_cache_get(page);
1414 }
1415}
1416
a52d9a80
CM
1417/*
1418 * basic readpage implementation. Locked extent state structs are inserted
1419 * into the tree that are removed when the IO is done (by the end_io
1420 * handlers)
1421 */
1422int extent_read_full_page(struct extent_map_tree *tree, struct page *page,
1423 get_extent_t *get_extent)
1424{
1425 struct inode *inode = page->mapping->host;
1426 u64 start = page->index << PAGE_CACHE_SHIFT;
1427 u64 page_end = start + PAGE_CACHE_SIZE - 1;
1428 u64 end;
1429 u64 cur = start;
1430 u64 extent_offset;
1431 u64 last_byte = i_size_read(inode);
1432 u64 block_start;
1433 u64 cur_end;
1434 sector_t sector;
1435 struct extent_map *em;
1436 struct block_device *bdev;
1437 int ret;
1438 int nr = 0;
1439 size_t page_offset = 0;
1440 size_t iosize;
1441 size_t blocksize = inode->i_sb->s_blocksize;
1442
b3cfa35a 1443 set_page_extent_mapped(page);
a52d9a80
CM
1444
1445 end = page_end;
1446 lock_extent(tree, start, end, GFP_NOFS);
1447
1448 while (cur <= end) {
1449 if (cur >= last_byte) {
1450 iosize = PAGE_CACHE_SIZE - page_offset;
1451 zero_user_page(page, page_offset, iosize, KM_USER0);
1452 set_extent_uptodate(tree, cur, cur + iosize - 1,
1453 GFP_NOFS);
1454 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1455 break;
1456 }
1457 em = get_extent(inode, page, page_offset, cur, end, 0);
1458 if (IS_ERR(em) || !em) {
1459 SetPageError(page);
1460 unlock_extent(tree, cur, end, GFP_NOFS);
1461 break;
1462 }
1463
1464 extent_offset = cur - em->start;
1465 BUG_ON(em->end < cur);
1466 BUG_ON(end < cur);
1467
1468 iosize = min(em->end - cur, end - cur) + 1;
1469 cur_end = min(em->end, end);
1470 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
1471 sector = (em->block_start + extent_offset) >> 9;
1472 bdev = em->bdev;
1473 block_start = em->block_start;
1474 free_extent_map(em);
1475 em = NULL;
1476
1477 /* we've found a hole, just zero and go on */
5f39d397 1478 if (block_start == EXTENT_MAP_HOLE) {
a52d9a80
CM
1479 zero_user_page(page, page_offset, iosize, KM_USER0);
1480 set_extent_uptodate(tree, cur, cur + iosize - 1,
1481 GFP_NOFS);
1482 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1483 cur = cur + iosize;
1484 page_offset += iosize;
1485 continue;
1486 }
1487 /* the get_extent function already copied into the page */
1488 if (test_range_bit(tree, cur, cur_end, EXTENT_UPTODATE, 1)) {
1489 unlock_extent(tree, cur, cur + iosize - 1, GFP_NOFS);
1490 cur = cur + iosize;
1491 page_offset += iosize;
1492 continue;
1493 }
1494
07157aac
CM
1495 ret = 0;
1496 if (tree->ops && tree->ops->readpage_io_hook) {
1497 ret = tree->ops->readpage_io_hook(page, cur,
1498 cur + iosize - 1);
1499 }
1500 if (!ret) {
1501 ret = submit_extent_page(READ, tree, page,
1502 sector, iosize, page_offset,
1503 bdev, end_bio_extent_readpage);
1504 }
a52d9a80
CM
1505 if (ret)
1506 SetPageError(page);
1507 cur = cur + iosize;
1508 page_offset += iosize;
1509 nr++;
1510 }
1511 if (!nr) {
1512 if (!PageError(page))
1513 SetPageUptodate(page);
1514 unlock_page(page);
1515 }
1516 return 0;
1517}
1518EXPORT_SYMBOL(extent_read_full_page);
1519
1520/*
1521 * the writepage semantics are similar to regular writepage. extent
1522 * records are inserted to lock ranges in the tree, and as dirty areas
1523 * are found, they are marked writeback. Then the lock bits are removed
1524 * and the end_io handler clears the writeback ranges
1525 */
1526int extent_write_full_page(struct extent_map_tree *tree, struct page *page,
1527 get_extent_t *get_extent,
1528 struct writeback_control *wbc)
1529{
1530 struct inode *inode = page->mapping->host;
1531 u64 start = page->index << PAGE_CACHE_SHIFT;
1532 u64 page_end = start + PAGE_CACHE_SIZE - 1;
1533 u64 end;
1534 u64 cur = start;
1535 u64 extent_offset;
1536 u64 last_byte = i_size_read(inode);
1537 u64 block_start;
1538 sector_t sector;
1539 struct extent_map *em;
1540 struct block_device *bdev;
1541 int ret;
1542 int nr = 0;
1543 size_t page_offset = 0;
1544 size_t iosize;
1545 size_t blocksize;
1546 loff_t i_size = i_size_read(inode);
1547 unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
b888db2b
CM
1548 u64 nr_delalloc;
1549 u64 delalloc_end;
a52d9a80 1550
b888db2b 1551 WARN_ON(!PageLocked(page));
a52d9a80
CM
1552 if (page->index > end_index) {
1553 clear_extent_dirty(tree, start, page_end, GFP_NOFS);
1554 unlock_page(page);
1555 return 0;
1556 }
1557
1558 if (page->index == end_index) {
1559 size_t offset = i_size & (PAGE_CACHE_SIZE - 1);
1560 zero_user_page(page, offset,
1561 PAGE_CACHE_SIZE - offset, KM_USER0);
1562 }
1563
b3cfa35a 1564 set_page_extent_mapped(page);
a52d9a80 1565
a52d9a80 1566 lock_extent(tree, start, page_end, GFP_NOFS);
b888db2b
CM
1567 nr_delalloc = find_lock_delalloc_range(tree, start, page_end + 1,
1568 &delalloc_end,
1569 128 * 1024 * 1024);
1570 if (nr_delalloc) {
07157aac 1571 tree->ops->fill_delalloc(inode, start, delalloc_end);
b888db2b
CM
1572 if (delalloc_end >= page_end + 1) {
1573 clear_extent_bit(tree, page_end + 1, delalloc_end,
1574 EXTENT_LOCKED | EXTENT_DELALLOC,
1575 1, 0, GFP_NOFS);
1576 }
1577 clear_extent_bit(tree, start, page_end, EXTENT_DELALLOC,
1578 0, 0, GFP_NOFS);
1579 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
1580 printk("found delalloc bits after clear extent_bit\n");
1581 }
1582 } else if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
1583 printk("found delalloc bits after find_delalloc_range returns 0\n");
1584 }
1585
1586 end = page_end;
1587 if (test_range_bit(tree, start, page_end, EXTENT_DELALLOC, 0)) {
1588 printk("found delalloc bits after lock_extent\n");
1589 }
a52d9a80
CM
1590
1591 if (last_byte <= start) {
1592 clear_extent_dirty(tree, start, page_end, GFP_NOFS);
1593 goto done;
1594 }
1595
1596 set_extent_uptodate(tree, start, page_end, GFP_NOFS);
1597 blocksize = inode->i_sb->s_blocksize;
1598
1599 while (cur <= end) {
1600 if (cur >= last_byte) {
1601 clear_extent_dirty(tree, cur, page_end, GFP_NOFS);
1602 break;
1603 }
b888db2b 1604 em = get_extent(inode, page, page_offset, cur, end, 0);
a52d9a80
CM
1605 if (IS_ERR(em) || !em) {
1606 SetPageError(page);
1607 break;
1608 }
1609
1610 extent_offset = cur - em->start;
1611 BUG_ON(em->end < cur);
1612 BUG_ON(end < cur);
1613 iosize = min(em->end - cur, end - cur) + 1;
1614 iosize = (iosize + blocksize - 1) & ~((u64)blocksize - 1);
1615 sector = (em->block_start + extent_offset) >> 9;
1616 bdev = em->bdev;
1617 block_start = em->block_start;
1618 free_extent_map(em);
1619 em = NULL;
1620
5f39d397
CM
1621 if (block_start == EXTENT_MAP_HOLE ||
1622 block_start == EXTENT_MAP_INLINE) {
a52d9a80
CM
1623 clear_extent_dirty(tree, cur,
1624 cur + iosize - 1, GFP_NOFS);
1625 cur = cur + iosize;
1626 page_offset += iosize;
1627 continue;
1628 }
1629
1630 /* leave this out until we have a page_mkwrite call */
1631 if (0 && !test_range_bit(tree, cur, cur + iosize - 1,
1632 EXTENT_DIRTY, 0)) {
1633 cur = cur + iosize;
1634 page_offset += iosize;
1635 continue;
1636 }
1637 clear_extent_dirty(tree, cur, cur + iosize - 1, GFP_NOFS);
b06355f0
CH
1638 if (tree->ops && tree->ops->writepage_io_hook) {
1639 ret = tree->ops->writepage_io_hook(page, cur,
1640 cur + iosize - 1);
1641 } else {
1642 ret = 0;
1643 }
a52d9a80
CM
1644 if (ret)
1645 SetPageError(page);
07157aac
CM
1646 else {
1647 set_range_writeback(tree, cur, cur + iosize - 1);
1648 ret = submit_extent_page(WRITE, tree, page, sector,
1649 iosize, page_offset, bdev,
1650 end_bio_extent_writepage);
1651 if (ret)
1652 SetPageError(page);
1653 }
a52d9a80
CM
1654 cur = cur + iosize;
1655 page_offset += iosize;
1656 nr++;
1657 }
1658done:
a52d9a80
CM
1659 unlock_extent(tree, start, page_end, GFP_NOFS);
1660 unlock_page(page);
1661 return 0;
1662}
1663EXPORT_SYMBOL(extent_write_full_page);
1664
1665/*
1666 * basic invalidatepage code, this waits on any locked or writeback
1667 * ranges corresponding to the page, and then deletes any extent state
1668 * records from the tree
1669 */
1670int extent_invalidatepage(struct extent_map_tree *tree,
1671 struct page *page, unsigned long offset)
1672{
1673 u64 start = (page->index << PAGE_CACHE_SHIFT);
1674 u64 end = start + PAGE_CACHE_SIZE - 1;
1675 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
1676
1677 start += (offset + blocksize -1) & ~(blocksize - 1);
1678 if (start > end)
1679 return 0;
1680
1681 lock_extent(tree, start, end, GFP_NOFS);
1682 wait_on_extent_writeback(tree, start, end);
2bf5a725
CM
1683 clear_extent_bit(tree, start, end,
1684 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC,
a52d9a80
CM
1685 1, 1, GFP_NOFS);
1686 return 0;
1687}
1688EXPORT_SYMBOL(extent_invalidatepage);
1689
1690/*
1691 * simple commit_write call, set_range_dirty is used to mark both
1692 * the pages and the extent records as dirty
1693 */
1694int extent_commit_write(struct extent_map_tree *tree,
1695 struct inode *inode, struct page *page,
1696 unsigned from, unsigned to)
1697{
1698 loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1699
b3cfa35a 1700 set_page_extent_mapped(page);
a52d9a80
CM
1701 set_page_dirty(page);
1702
1703 if (pos > inode->i_size) {
1704 i_size_write(inode, pos);
1705 mark_inode_dirty(inode);
1706 }
1707 return 0;
1708}
1709EXPORT_SYMBOL(extent_commit_write);
1710
1711int extent_prepare_write(struct extent_map_tree *tree,
1712 struct inode *inode, struct page *page,
1713 unsigned from, unsigned to, get_extent_t *get_extent)
1714{
1715 u64 page_start = page->index << PAGE_CACHE_SHIFT;
1716 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
1717 u64 block_start;
1718 u64 orig_block_start;
1719 u64 block_end;
1720 u64 cur_end;
1721 struct extent_map *em;
1722 unsigned blocksize = 1 << inode->i_blkbits;
1723 size_t page_offset = 0;
1724 size_t block_off_start;
1725 size_t block_off_end;
1726 int err = 0;
1727 int iocount = 0;
1728 int ret = 0;
1729 int isnew;
1730
b3cfa35a
CH
1731 set_page_extent_mapped(page);
1732
a52d9a80
CM
1733 block_start = (page_start + from) & ~((u64)blocksize - 1);
1734 block_end = (page_start + to - 1) | (blocksize - 1);
1735 orig_block_start = block_start;
1736
1737 lock_extent(tree, page_start, page_end, GFP_NOFS);
1738 while(block_start <= block_end) {
1739 em = get_extent(inode, page, page_offset, block_start,
1740 block_end, 1);
1741 if (IS_ERR(em) || !em) {
1742 goto err;
1743 }
1744 cur_end = min(block_end, em->end);
1745 block_off_start = block_start & (PAGE_CACHE_SIZE - 1);
1746 block_off_end = block_off_start + blocksize;
1747 isnew = clear_extent_new(tree, block_start, cur_end, GFP_NOFS);
1748
1749 if (!PageUptodate(page) && isnew &&
1750 (block_off_end > to || block_off_start < from)) {
1751 void *kaddr;
1752
1753 kaddr = kmap_atomic(page, KM_USER0);
1754 if (block_off_end > to)
1755 memset(kaddr + to, 0, block_off_end - to);
1756 if (block_off_start < from)
1757 memset(kaddr + block_off_start, 0,
1758 from - block_off_start);
1759 flush_dcache_page(page);
1760 kunmap_atomic(kaddr, KM_USER0);
1761 }
1762 if (!isnew && !PageUptodate(page) &&
1763 (block_off_end > to || block_off_start < from) &&
1764 !test_range_bit(tree, block_start, cur_end,
1765 EXTENT_UPTODATE, 1)) {
1766 u64 sector;
1767 u64 extent_offset = block_start - em->start;
1768 size_t iosize;
1769 sector = (em->block_start + extent_offset) >> 9;
1770 iosize = (cur_end - block_start + blocksize - 1) &
1771 ~((u64)blocksize - 1);
1772 /*
1773 * we've already got the extent locked, but we
1774 * need to split the state such that our end_bio
1775 * handler can clear the lock.
1776 */
1777 set_extent_bit(tree, block_start,
1778 block_start + iosize - 1,
1779 EXTENT_LOCKED, 0, NULL, GFP_NOFS);
1780 ret = submit_extent_page(READ, tree, page,
1781 sector, iosize, page_offset, em->bdev,
1782 end_bio_extent_preparewrite);
1783 iocount++;
1784 block_start = block_start + iosize;
1785 } else {
1786 set_extent_uptodate(tree, block_start, cur_end,
1787 GFP_NOFS);
1788 unlock_extent(tree, block_start, cur_end, GFP_NOFS);
1789 block_start = cur_end + 1;
1790 }
1791 page_offset = block_start & (PAGE_CACHE_SIZE - 1);
1792 free_extent_map(em);
1793 }
1794 if (iocount) {
1795 wait_extent_bit(tree, orig_block_start,
1796 block_end, EXTENT_LOCKED);
1797 }
1798 check_page_uptodate(tree, page);
1799err:
1800 /* FIXME, zero out newly allocated blocks on error */
1801 return err;
1802}
1803EXPORT_SYMBOL(extent_prepare_write);
1804
1805/*
1806 * a helper for releasepage. As long as there are no locked extents
1807 * in the range corresponding to the page, both state records and extent
1808 * map records are removed
1809 */
1810int try_release_extent_mapping(struct extent_map_tree *tree, struct page *page)
1811{
1812 struct extent_map *em;
1813 u64 start = page->index << PAGE_CACHE_SHIFT;
1814 u64 end = start + PAGE_CACHE_SIZE - 1;
1815 u64 orig_start = start;
b888db2b 1816 int ret = 1;
a52d9a80
CM
1817
1818 while (start <= end) {
1819 em = lookup_extent_mapping(tree, start, end);
1820 if (!em || IS_ERR(em))
1821 break;
b888db2b
CM
1822 if (!test_range_bit(tree, em->start, em->end,
1823 EXTENT_LOCKED, 0)) {
1824 remove_extent_mapping(tree, em);
1825 /* once for the rb tree */
a52d9a80 1826 free_extent_map(em);
a52d9a80 1827 }
a52d9a80 1828 start = em->end + 1;
a52d9a80
CM
1829 /* once for us */
1830 free_extent_map(em);
1831 }
b888db2b
CM
1832 if (test_range_bit(tree, orig_start, end, EXTENT_LOCKED, 0))
1833 ret = 0;
1834 else
1835 clear_extent_bit(tree, orig_start, end, EXTENT_UPTODATE,
1836 1, 1, GFP_NOFS);
1837 return ret;
a52d9a80
CM
1838}
1839EXPORT_SYMBOL(try_release_extent_mapping);
1840
d396c6f5
CH
1841sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
1842 get_extent_t *get_extent)
1843{
1844 struct inode *inode = mapping->host;
1845 u64 start = iblock << inode->i_blkbits;
1846 u64 end = start + (1 << inode->i_blkbits) - 1;
1847 struct extent_map *em;
1848
1849 em = get_extent(inode, NULL, 0, start, end, 0);
1850 if (!em || IS_ERR(em))
1851 return 0;
1852
1853 // XXX(hch): block 0 is valid in some cases, e.g. XFS RT device
1854 if (em->block_start == EXTENT_MAP_INLINE ||
5f39d397
CM
1855 em->block_start == EXTENT_MAP_HOLE)
1856 return 0;
d396c6f5
CH
1857
1858 return (em->block_start + start - em->start) >> inode->i_blkbits;
1859}
5f39d397
CM
1860
1861struct extent_buffer *alloc_extent_buffer(struct extent_map_tree *tree,
1862 u64 start, unsigned long len,
1863 gfp_t mask)
1864{
1865 unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) -
1866 (start >> PAGE_CACHE_SHIFT) + 1;
1867 unsigned long i;
1868 unsigned long index = start >> PAGE_CACHE_SHIFT;
1869 struct extent_buffer *eb;
1870 struct page *p;
1871 struct address_space *mapping = tree->mapping;
1872 int uptodate = 0;
1873
1874 eb = kzalloc(EXTENT_BUFFER_SIZE(num_pages), mask);
1875 if (!eb || IS_ERR(eb))
1876 return NULL;
1877
1878 eb->start = start;
1879 eb->len = len;
1880 atomic_set(&eb->refs, 1);
1881
1882 for (i = 0; i < num_pages; i++, index++) {
1883 p = find_or_create_page(mapping, index, mask | __GFP_HIGHMEM);
1884 if (!p)
1885 goto fail;
1886 eb->pages[i] = p;
1887 if (!PageUptodate(p))
1888 uptodate = 0;
1889 unlock_page(p);
1890 }
1891 if (uptodate)
1892 eb->flags |= EXTENT_UPTODATE;
1893 return eb;
1894fail:
1895 free_extent_buffer(eb);
1896 return NULL;
1897}
1898EXPORT_SYMBOL(alloc_extent_buffer);
1899
1900struct extent_buffer *find_extent_buffer(struct extent_map_tree *tree,
1901 u64 start, unsigned long len,
1902 gfp_t mask)
1903{
1904 unsigned long num_pages = ((start + len - 1) >> PAGE_CACHE_SHIFT) -
1905 (start >> PAGE_CACHE_SHIFT) + 1;
1906 unsigned long i;
1907 unsigned long index = start >> PAGE_CACHE_SHIFT;
1908 struct extent_buffer *eb;
1909 struct page *p;
1910 struct address_space *mapping = tree->mapping;
1911
1912 eb = kzalloc(EXTENT_BUFFER_SIZE(num_pages), mask);
1913 if (!eb || IS_ERR(eb))
1914 return NULL;
1915
1916 eb->start = start;
1917 eb->len = len;
1918 atomic_set(&eb->refs, 1);
1919
1920 for (i = 0; i < num_pages; i++, index++) {
1921 p = find_get_page(mapping, index);
1922 if (!p)
1923 goto fail;
1924 eb->pages[i] = p;
1925 }
1926 return eb;
1927fail:
1928 free_extent_buffer(eb);
1929 return NULL;
1930}
1931EXPORT_SYMBOL(find_extent_buffer);
1932
1933void free_extent_buffer(struct extent_buffer *eb)
1934{
1935 unsigned long i;
1936 unsigned long num_pages;
1937
1938 if (!eb)
1939 return;
1940
1941 if (!atomic_dec_and_test(&eb->refs))
1942 return;
1943
1944 num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) -
1945 (eb->start >> PAGE_CACHE_SHIFT) + 1;
1946
1947 for (i = 0; i < num_pages; i++) {
1948 if (eb->pages[i])
1949 page_cache_release(eb->pages[i]);
1950 }
1951 kfree(eb);
1952}
1953EXPORT_SYMBOL(free_extent_buffer);
1954
1955int clear_extent_buffer_dirty(struct extent_map_tree *tree,
1956 struct extent_buffer *eb)
1957{
1958 int set;
1959 unsigned long i;
1960 unsigned long num_pages;
1961 struct page *page;
1962
1963 u64 start = eb->start;
1964 u64 end = start + eb->len - 1;
1965
1966 set = clear_extent_dirty(tree, start, end, GFP_NOFS);
1967 num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) -
1968 (eb->start >> PAGE_CACHE_SHIFT) + 1;
1969
1970 for (i = 0; i < num_pages; i++) {
1971 page = eb->pages[i];
1972 lock_page(page);
1973 /*
1974 * if we're on the last page or the first page and the
1975 * block isn't aligned on a page boundary, do extra checks
1976 * to make sure we don't clean page that is partially dirty
1977 */
1978 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
1979 ((i == num_pages - 1) &&
1980 ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) {
1981 start = page->index << PAGE_CACHE_SHIFT;
1982 end = start + PAGE_CACHE_SIZE - 1;
1983 if (test_range_bit(tree, start, end,
1984 EXTENT_DIRTY, 0)) {
1985 unlock_page(page);
1986 continue;
1987 }
1988 }
1989 clear_page_dirty_for_io(page);
1990 unlock_page(page);
1991 }
1992 return 0;
1993}
1994EXPORT_SYMBOL(clear_extent_buffer_dirty);
1995
1996int wait_on_extent_buffer_writeback(struct extent_map_tree *tree,
1997 struct extent_buffer *eb)
1998{
1999 return wait_on_extent_writeback(tree, eb->start,
2000 eb->start + eb->len - 1);
2001}
2002EXPORT_SYMBOL(wait_on_extent_buffer_writeback);
2003
2004int set_extent_buffer_dirty(struct extent_map_tree *tree,
2005 struct extent_buffer *eb)
2006{
2007 return set_range_dirty(tree, eb->start, eb->start + eb->len - 1);
2008}
2009EXPORT_SYMBOL(set_extent_buffer_dirty);
2010
2011int set_extent_buffer_uptodate(struct extent_map_tree *tree,
2012 struct extent_buffer *eb)
2013{
2014 unsigned long i;
2015 struct page *page;
2016 unsigned long num_pages;
2017
2018 num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) -
2019 (eb->start >> PAGE_CACHE_SHIFT) + 1;
2020
2021 set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
2022 GFP_NOFS);
2023 for (i = 0; i < num_pages; i++) {
2024 page = eb->pages[i];
2025 if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) ||
2026 ((i == num_pages - 1) &&
2027 ((eb->start + eb->len - 1) & (PAGE_CACHE_SIZE - 1)))) {
2028 check_page_uptodate(tree, page);
2029 continue;
2030 }
2031 SetPageUptodate(page);
2032 }
2033 return 0;
2034}
2035EXPORT_SYMBOL(set_extent_buffer_uptodate);
2036
2037int extent_buffer_uptodate(struct extent_map_tree *tree,
2038 struct extent_buffer *eb)
2039{
2040 if (eb->flags & EXTENT_UPTODATE)
2041 return 1;
2042 return test_range_bit(tree, eb->start, eb->start + eb->len - 1,
2043 EXTENT_UPTODATE, 1);
2044}
2045EXPORT_SYMBOL(extent_buffer_uptodate);
2046
2047int read_extent_buffer_pages(struct extent_map_tree *tree,
2048 struct extent_buffer *eb, int wait)
2049{
2050 unsigned long i;
2051 struct page *page;
2052 int err;
2053 int ret = 0;
2054 unsigned long num_pages;
2055
2056 if (eb->flags & EXTENT_UPTODATE)
2057 return 0;
2058
2059 if (test_range_bit(tree, eb->start, eb->start + eb->len - 1,
2060 EXTENT_UPTODATE, 1)) {
2061 return 0;
2062 }
2063
2064 num_pages = ((eb->start + eb->len - 1) >> PAGE_CACHE_SHIFT) -
2065 (eb->start >> PAGE_CACHE_SHIFT) + 1;
2066 for (i = 0; i < num_pages; i++) {
2067 page = eb->pages[i];
2068 if (PageUptodate(page)) {
2069 continue;
2070 }
2071 if (!wait) {
2072 if (TestSetPageLocked(page)) {
2073 continue;
2074 }
2075 } else {
2076 lock_page(page);
2077 }
2078 if (!PageUptodate(page)) {
2079 err = page->mapping->a_ops->readpage(NULL, page);
2080 if (err) {
2081 ret = err;
2082 }
2083 } else {
2084 unlock_page(page);
2085 }
2086 }
2087
2088 if (ret || !wait) {
2089 return ret;
2090 }
2091
2092 for (i = 0; i < num_pages; i++) {
2093 page = eb->pages[i];
2094 wait_on_page_locked(page);
2095 if (!PageUptodate(page)) {
2096 ret = -EIO;
2097 }
2098 }
2099 eb->flags |= EXTENT_UPTODATE;
2100 return ret;
2101}
2102EXPORT_SYMBOL(read_extent_buffer_pages);
2103
2104void read_extent_buffer(struct extent_buffer *eb, void *dstv,
2105 unsigned long start,
2106 unsigned long len)
2107{
2108 size_t cur;
2109 size_t offset;
2110 struct page *page;
2111 char *kaddr;
2112 char *dst = (char *)dstv;
2113 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2114 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2115
2116 WARN_ON(start > eb->len);
2117 WARN_ON(start + len > eb->start + eb->len);
2118
2119 page = eb->pages[i];
2120 offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1);
2121 if (i == 0)
2122 offset += start_offset;
2123
2124 while(len > 0) {
2125 WARN_ON(!PageUptodate(page));
2126
2127 cur = min(len, (PAGE_CACHE_SIZE - offset));
2128 // kaddr = kmap_atomic(page, KM_USER0);
2129 kaddr = page_address(page);
2130 memcpy(dst, kaddr + offset, cur);
2131 // kunmap_atomic(kaddr, KM_USER0);
2132
2133 dst += cur;
2134 len -= cur;
2135 offset = 0;
2136 i++;
2137 page = eb->pages[i];
2138 }
2139}
2140EXPORT_SYMBOL(read_extent_buffer);
2141
2142int map_extent_buffer(struct extent_buffer *eb, unsigned long start,
479965d6 2143 unsigned long min_len,
5f39d397
CM
2144 char **token, char **map,
2145 unsigned long *map_start,
2146 unsigned long *map_len, int km)
2147{
479965d6 2148 size_t offset = start & (PAGE_CACHE_SIZE - 1);
5f39d397
CM
2149 char *kaddr;
2150 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2151 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
479965d6
CM
2152 unsigned long end_i = (start_offset + start + min_len) >>
2153 PAGE_CACHE_SHIFT;
2154
2155 if (i != end_i)
2156 return -EINVAL;
5f39d397
CM
2157
2158 WARN_ON(start > eb->len);
2159
2160 if (i == 0) {
2161 offset = start_offset;
2162 *map_start = 0;
2163 } else {
479965d6 2164 *map_start = (i << PAGE_CACHE_SHIFT) - start_offset;
5f39d397
CM
2165 }
2166
2167 // kaddr = kmap_atomic(eb->pages[i], km);
2168 kaddr = page_address(eb->pages[i]);
2169 *token = kaddr;
2170 *map = kaddr + offset;
2171 *map_len = PAGE_CACHE_SIZE - offset;
2172 return 0;
2173}
2174EXPORT_SYMBOL(map_extent_buffer);
2175
2176void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km)
2177{
2178 // kunmap_atomic(token, km);
2179}
2180EXPORT_SYMBOL(unmap_extent_buffer);
2181
2182int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
2183 unsigned long start,
2184 unsigned long len)
2185{
2186 size_t cur;
2187 size_t offset;
2188 struct page *page;
2189 char *kaddr;
2190 char *ptr = (char *)ptrv;
2191 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2192 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2193 int ret = 0;
2194
2195 WARN_ON(start > eb->len);
2196 WARN_ON(start + len > eb->start + eb->len);
2197
2198 page = eb->pages[i];
2199 offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1);
2200 if (i == 0)
2201 offset += start_offset;
2202
2203 while(len > 0) {
2204 WARN_ON(!PageUptodate(page));
2205
2206 cur = min(len, (PAGE_CACHE_SIZE - offset));
2207
2208 // kaddr = kmap_atomic(page, KM_USER0);
2209 kaddr = page_address(page);
2210 ret = memcmp(ptr, kaddr + offset, cur);
2211 // kunmap_atomic(kaddr, KM_USER0);
2212 if (ret)
2213 break;
2214
2215 ptr += cur;
2216 len -= cur;
2217 offset = 0;
2218 i++;
2219 page = eb->pages[i];
2220 }
2221 return ret;
2222}
2223EXPORT_SYMBOL(memcmp_extent_buffer);
2224
2225void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
2226 unsigned long start, unsigned long len)
2227{
2228 size_t cur;
2229 size_t offset;
2230 struct page *page;
2231 char *kaddr;
2232 char *src = (char *)srcv;
2233 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2234 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2235
2236 WARN_ON(start > eb->len);
2237 WARN_ON(start + len > eb->start + eb->len);
2238
2239 page = eb->pages[i];
2240 offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1);
2241 if (i == 0)
2242 offset += start_offset;
2243
2244 while(len > 0) {
2245 WARN_ON(!PageUptodate(page));
2246
2247 cur = min(len, PAGE_CACHE_SIZE - offset);
2248 // kaddr = kmap_atomic(page, KM_USER0);
2249 kaddr = page_address(page);
2250 memcpy(kaddr + offset, src, cur);
2251 // kunmap_atomic(kaddr, KM_USER0);
2252
2253 src += cur;
2254 len -= cur;
2255 offset = 0;
2256 i++;
2257 page = eb->pages[i];
2258 }
2259}
2260EXPORT_SYMBOL(write_extent_buffer);
2261
2262void memset_extent_buffer(struct extent_buffer *eb, char c,
2263 unsigned long start, unsigned long len)
2264{
2265 size_t cur;
2266 size_t offset;
2267 struct page *page;
2268 char *kaddr;
2269 size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
2270 unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
2271
2272 WARN_ON(start > eb->len);
2273 WARN_ON(start + len > eb->start + eb->len);
2274
2275 page = eb->pages[i];
2276 offset = start & ((unsigned long)PAGE_CACHE_SIZE - 1);
2277 if (i == 0)
2278 offset += start_offset;
2279
2280 while(len > 0) {
2281 WARN_ON(!PageUptodate(page));
2282
2283 cur = min(len, PAGE_CACHE_SIZE - offset);
2284 // kaddr = kmap_atomic(page, KM_USER0);
2285 kaddr = page_address(page);
2286 memset(kaddr + offset, c, cur);
2287 // kunmap_atomic(kaddr, KM_USER0);
2288
2289 len -= cur;
2290 offset = 0;
2291 i++;
2292 page = eb->pages[i];
2293 }
2294}
2295EXPORT_SYMBOL(memset_extent_buffer);
2296
2297void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
2298 unsigned long dst_offset, unsigned long src_offset,
2299 unsigned long len)
2300{
2301 u64 dst_len = dst->len;
2302 size_t cur;
2303 size_t offset;
2304 struct page *page;
2305 char *kaddr;
2306 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
2307 unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
2308
2309 WARN_ON(src->len != dst_len);
2310
2311 offset = dst_offset & ((unsigned long)PAGE_CACHE_SIZE - 1);
2312 if (i == 0)
2313 offset += start_offset;
2314
2315 while(len > 0) {
2316 page = dst->pages[i];
2317 WARN_ON(!PageUptodate(page));
2318
2319 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
2320
2321 // kaddr = kmap_atomic(page, KM_USER1);
2322 kaddr = page_address(page);
2323 read_extent_buffer(src, kaddr + offset, src_offset, cur);
2324 // kunmap_atomic(kaddr, KM_USER1);
2325
2326 src_offset += cur;
2327 len -= cur;
2328 offset = 0;
2329 i++;
2330 }
2331}
2332EXPORT_SYMBOL(copy_extent_buffer);
2333
2334static void move_pages(struct page *dst_page, struct page *src_page,
2335 unsigned long dst_off, unsigned long src_off,
2336 unsigned long len)
2337{
2338 // char *dst_kaddr = kmap_atomic(dst_page, KM_USER0);
2339 char *dst_kaddr = page_address(dst_page);
2340 if (dst_page == src_page) {
2341 memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len);
2342 } else {
2343 // char *src_kaddr = kmap_atomic(src_page, KM_USER1);
2344 char *src_kaddr = page_address(src_page);
2345 char *p = dst_kaddr + dst_off + len;
2346 char *s = src_kaddr + src_off + len;
2347
2348 while (len--)
2349 *--p = *--s;
2350
2351 // kunmap_atomic(src_kaddr, KM_USER1);
2352 }
2353 // kunmap_atomic(dst_kaddr, KM_USER0);
2354}
2355
2356static void copy_pages(struct page *dst_page, struct page *src_page,
2357 unsigned long dst_off, unsigned long src_off,
2358 unsigned long len)
2359{
2360 //kmap_atomic(dst_page, KM_USER0);
2361 char *dst_kaddr = page_address(dst_page);
2362 char *src_kaddr;
2363
2364 if (dst_page != src_page)
2365 src_kaddr = page_address(src_page); // kmap_atomic(src_page, KM_USER1);
2366 else
2367 src_kaddr = dst_kaddr;
2368
2369 memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
2370 /*
2371 kunmap_atomic(dst_kaddr, KM_USER0);
2372 if (dst_page != src_page)
2373 kunmap_atomic(src_kaddr, KM_USER1);
2374 */
2375}
2376
2377void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
2378 unsigned long src_offset, unsigned long len)
2379{
2380 size_t cur;
2381 size_t dst_off_in_page;
2382 size_t src_off_in_page;
2383 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
2384 unsigned long dst_i;
2385 unsigned long src_i;
2386
2387 if (src_offset + len > dst->len) {
2388 printk("memmove bogus src_offset %lu move len %lu len %lu\n",
2389 src_offset, len, dst->len);
2390 BUG_ON(1);
2391 }
2392 if (dst_offset + len > dst->len) {
2393 printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
2394 dst_offset, len, dst->len);
2395 BUG_ON(1);
2396 }
2397
2398 while(len > 0) {
2399 dst_off_in_page = dst_offset &
2400 ((unsigned long)PAGE_CACHE_SIZE - 1);
2401 src_off_in_page = src_offset &
2402 ((unsigned long)PAGE_CACHE_SIZE - 1);
2403
2404 dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
2405 src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
2406
2407 if (src_i == 0)
2408 src_off_in_page += start_offset;
2409 if (dst_i == 0)
2410 dst_off_in_page += start_offset;
2411
2412 cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
2413 src_off_in_page));
2414 cur = min(cur, (unsigned long)(PAGE_CACHE_SIZE -
2415 dst_off_in_page));
2416
2417 copy_pages(dst->pages[dst_i], dst->pages[src_i],
2418 dst_off_in_page, src_off_in_page, cur);
2419
2420 src_offset += cur;
2421 dst_offset += cur;
2422 len -= cur;
2423 }
2424}
2425EXPORT_SYMBOL(memcpy_extent_buffer);
2426
2427void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
2428 unsigned long src_offset, unsigned long len)
2429{
2430 size_t cur;
2431 size_t dst_off_in_page;
2432 size_t src_off_in_page;
2433 unsigned long dst_end = dst_offset + len - 1;
2434 unsigned long src_end = src_offset + len - 1;
2435 size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
2436 unsigned long dst_i;
2437 unsigned long src_i;
2438
2439 if (src_offset + len > dst->len) {
2440 printk("memmove bogus src_offset %lu move len %lu len %lu\n",
2441 src_offset, len, dst->len);
2442 BUG_ON(1);
2443 }
2444 if (dst_offset + len > dst->len) {
2445 printk("memmove bogus dst_offset %lu move len %lu len %lu\n",
2446 dst_offset, len, dst->len);
2447 BUG_ON(1);
2448 }
2449 if (dst_offset < src_offset) {
2450 memcpy_extent_buffer(dst, dst_offset, src_offset, len);
2451 return;
2452 }
2453 while(len > 0) {
2454 dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
2455 src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
2456
2457 dst_off_in_page = dst_end &
2458 ((unsigned long)PAGE_CACHE_SIZE - 1);
2459 src_off_in_page = src_end &
2460 ((unsigned long)PAGE_CACHE_SIZE - 1);
2461
2462 if (src_i == 0)
2463 src_off_in_page += start_offset;
2464 if (dst_i == 0)
2465 dst_off_in_page += start_offset;
2466
2467 cur = min(len, src_off_in_page + 1);
2468 cur = min(cur, dst_off_in_page + 1);
2469// printk("move pages orig dst %lu src %lu len %lu, this %lu %lu %lu\n", dst_offset, src_offset, len, dst_off_in_page - cur + 1, src_off_in_page - cur + 1, cur);
2470 move_pages(dst->pages[dst_i], dst->pages[src_i],
2471 dst_off_in_page - cur + 1,
2472 src_off_in_page - cur + 1, cur);
2473
2474 dst_end -= cur - 1;
2475 src_end -= cur - 1;
2476 len -= cur;
2477 }
2478}
2479EXPORT_SYMBOL(memmove_extent_buffer);