2 * Copyright (C) 2011 STRATO. All rights reserved.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
19 #include <linux/sched.h>
20 #include <linux/pagemap.h>
21 #include <linux/writeback.h>
22 #include <linux/blkdev.h>
23 #include <linux/rbtree.h>
24 #include <linux/slab.h>
25 #include <linux/workqueue.h>
26 #include <linux/btrfs.h>
29 #include "transaction.h"
34 #include "extent_io.h"
37 * - subvol delete -> delete when ref goes to 0? delete limits also?
41 * - copy also limits on subvol creation
43 * - caches fuer ulists
44 * - performance benchmarks
45 * - check all ioctl parameters
49 * one struct for each qgroup, organized in fs_info->qgroup_tree.
57 u64 rfer
; /* referenced */
58 u64 rfer_cmpr
; /* referenced compressed */
59 u64 excl
; /* exclusive */
60 u64 excl_cmpr
; /* exclusive compressed */
65 u64 lim_flags
; /* which limits are set */
72 * reservation tracking
79 struct list_head groups
; /* groups this group is member of */
80 struct list_head members
; /* groups that are members of this group */
81 struct list_head dirty
; /* dirty groups */
82 struct rb_node node
; /* tree of qgroups */
85 * temp variables for accounting operations
92 * glue structure to represent the relations between qgroups.
94 struct btrfs_qgroup_list
{
95 struct list_head next_group
;
96 struct list_head next_member
;
97 struct btrfs_qgroup
*group
;
98 struct btrfs_qgroup
*member
;
102 qgroup_rescan_init(struct btrfs_fs_info
*fs_info
, u64 progress_objectid
,
104 static void qgroup_rescan_zero_tracking(struct btrfs_fs_info
*fs_info
);
106 /* must be called with qgroup_ioctl_lock held */
107 static struct btrfs_qgroup
*find_qgroup_rb(struct btrfs_fs_info
*fs_info
,
110 struct rb_node
*n
= fs_info
->qgroup_tree
.rb_node
;
111 struct btrfs_qgroup
*qgroup
;
114 qgroup
= rb_entry(n
, struct btrfs_qgroup
, node
);
115 if (qgroup
->qgroupid
< qgroupid
)
117 else if (qgroup
->qgroupid
> qgroupid
)
125 /* must be called with qgroup_lock held */
126 static struct btrfs_qgroup
*add_qgroup_rb(struct btrfs_fs_info
*fs_info
,
129 struct rb_node
**p
= &fs_info
->qgroup_tree
.rb_node
;
130 struct rb_node
*parent
= NULL
;
131 struct btrfs_qgroup
*qgroup
;
135 qgroup
= rb_entry(parent
, struct btrfs_qgroup
, node
);
137 if (qgroup
->qgroupid
< qgroupid
)
139 else if (qgroup
->qgroupid
> qgroupid
)
145 qgroup
= kzalloc(sizeof(*qgroup
), GFP_ATOMIC
);
147 return ERR_PTR(-ENOMEM
);
149 qgroup
->qgroupid
= qgroupid
;
150 INIT_LIST_HEAD(&qgroup
->groups
);
151 INIT_LIST_HEAD(&qgroup
->members
);
152 INIT_LIST_HEAD(&qgroup
->dirty
);
154 rb_link_node(&qgroup
->node
, parent
, p
);
155 rb_insert_color(&qgroup
->node
, &fs_info
->qgroup_tree
);
160 static void __del_qgroup_rb(struct btrfs_qgroup
*qgroup
)
162 struct btrfs_qgroup_list
*list
;
164 list_del(&qgroup
->dirty
);
165 while (!list_empty(&qgroup
->groups
)) {
166 list
= list_first_entry(&qgroup
->groups
,
167 struct btrfs_qgroup_list
, next_group
);
168 list_del(&list
->next_group
);
169 list_del(&list
->next_member
);
173 while (!list_empty(&qgroup
->members
)) {
174 list
= list_first_entry(&qgroup
->members
,
175 struct btrfs_qgroup_list
, next_member
);
176 list_del(&list
->next_group
);
177 list_del(&list
->next_member
);
183 /* must be called with qgroup_lock held */
184 static int del_qgroup_rb(struct btrfs_fs_info
*fs_info
, u64 qgroupid
)
186 struct btrfs_qgroup
*qgroup
= find_qgroup_rb(fs_info
, qgroupid
);
191 rb_erase(&qgroup
->node
, &fs_info
->qgroup_tree
);
192 __del_qgroup_rb(qgroup
);
196 /* must be called with qgroup_lock held */
197 static int add_relation_rb(struct btrfs_fs_info
*fs_info
,
198 u64 memberid
, u64 parentid
)
200 struct btrfs_qgroup
*member
;
201 struct btrfs_qgroup
*parent
;
202 struct btrfs_qgroup_list
*list
;
204 member
= find_qgroup_rb(fs_info
, memberid
);
205 parent
= find_qgroup_rb(fs_info
, parentid
);
206 if (!member
|| !parent
)
209 list
= kzalloc(sizeof(*list
), GFP_ATOMIC
);
213 list
->group
= parent
;
214 list
->member
= member
;
215 list_add_tail(&list
->next_group
, &member
->groups
);
216 list_add_tail(&list
->next_member
, &parent
->members
);
221 /* must be called with qgroup_lock held */
222 static int del_relation_rb(struct btrfs_fs_info
*fs_info
,
223 u64 memberid
, u64 parentid
)
225 struct btrfs_qgroup
*member
;
226 struct btrfs_qgroup
*parent
;
227 struct btrfs_qgroup_list
*list
;
229 member
= find_qgroup_rb(fs_info
, memberid
);
230 parent
= find_qgroup_rb(fs_info
, parentid
);
231 if (!member
|| !parent
)
234 list_for_each_entry(list
, &member
->groups
, next_group
) {
235 if (list
->group
== parent
) {
236 list_del(&list
->next_group
);
237 list_del(&list
->next_member
);
246 * The full config is read in one go, only called from open_ctree()
247 * It doesn't use any locking, as at this point we're still single-threaded
249 int btrfs_read_qgroup_config(struct btrfs_fs_info
*fs_info
)
251 struct btrfs_key key
;
252 struct btrfs_key found_key
;
253 struct btrfs_root
*quota_root
= fs_info
->quota_root
;
254 struct btrfs_path
*path
= NULL
;
255 struct extent_buffer
*l
;
259 u64 rescan_progress
= 0;
261 if (!fs_info
->quota_enabled
)
264 fs_info
->qgroup_ulist
= ulist_alloc(GFP_NOFS
);
265 if (!fs_info
->qgroup_ulist
) {
270 path
= btrfs_alloc_path();
276 /* default this to quota off, in case no status key is found */
277 fs_info
->qgroup_flags
= 0;
280 * pass 1: read status, all qgroup infos and limits
285 ret
= btrfs_search_slot_for_read(quota_root
, &key
, path
, 1, 1);
290 struct btrfs_qgroup
*qgroup
;
292 slot
= path
->slots
[0];
294 btrfs_item_key_to_cpu(l
, &found_key
, slot
);
296 if (found_key
.type
== BTRFS_QGROUP_STATUS_KEY
) {
297 struct btrfs_qgroup_status_item
*ptr
;
299 ptr
= btrfs_item_ptr(l
, slot
,
300 struct btrfs_qgroup_status_item
);
302 if (btrfs_qgroup_status_version(l
, ptr
) !=
303 BTRFS_QGROUP_STATUS_VERSION
) {
305 "old qgroup version, quota disabled");
308 if (btrfs_qgroup_status_generation(l
, ptr
) !=
309 fs_info
->generation
) {
310 flags
|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
;
312 "qgroup generation mismatch, "
313 "marked as inconsistent");
315 fs_info
->qgroup_flags
= btrfs_qgroup_status_flags(l
,
317 rescan_progress
= btrfs_qgroup_status_rescan(l
, ptr
);
321 if (found_key
.type
!= BTRFS_QGROUP_INFO_KEY
&&
322 found_key
.type
!= BTRFS_QGROUP_LIMIT_KEY
)
325 qgroup
= find_qgroup_rb(fs_info
, found_key
.offset
);
326 if ((qgroup
&& found_key
.type
== BTRFS_QGROUP_INFO_KEY
) ||
327 (!qgroup
&& found_key
.type
== BTRFS_QGROUP_LIMIT_KEY
)) {
328 btrfs_err(fs_info
, "inconsitent qgroup config");
329 flags
|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
;
332 qgroup
= add_qgroup_rb(fs_info
, found_key
.offset
);
333 if (IS_ERR(qgroup
)) {
334 ret
= PTR_ERR(qgroup
);
338 switch (found_key
.type
) {
339 case BTRFS_QGROUP_INFO_KEY
: {
340 struct btrfs_qgroup_info_item
*ptr
;
342 ptr
= btrfs_item_ptr(l
, slot
,
343 struct btrfs_qgroup_info_item
);
344 qgroup
->rfer
= btrfs_qgroup_info_rfer(l
, ptr
);
345 qgroup
->rfer_cmpr
= btrfs_qgroup_info_rfer_cmpr(l
, ptr
);
346 qgroup
->excl
= btrfs_qgroup_info_excl(l
, ptr
);
347 qgroup
->excl_cmpr
= btrfs_qgroup_info_excl_cmpr(l
, ptr
);
348 /* generation currently unused */
351 case BTRFS_QGROUP_LIMIT_KEY
: {
352 struct btrfs_qgroup_limit_item
*ptr
;
354 ptr
= btrfs_item_ptr(l
, slot
,
355 struct btrfs_qgroup_limit_item
);
356 qgroup
->lim_flags
= btrfs_qgroup_limit_flags(l
, ptr
);
357 qgroup
->max_rfer
= btrfs_qgroup_limit_max_rfer(l
, ptr
);
358 qgroup
->max_excl
= btrfs_qgroup_limit_max_excl(l
, ptr
);
359 qgroup
->rsv_rfer
= btrfs_qgroup_limit_rsv_rfer(l
, ptr
);
360 qgroup
->rsv_excl
= btrfs_qgroup_limit_rsv_excl(l
, ptr
);
365 ret
= btrfs_next_item(quota_root
, path
);
371 btrfs_release_path(path
);
374 * pass 2: read all qgroup relations
377 key
.type
= BTRFS_QGROUP_RELATION_KEY
;
379 ret
= btrfs_search_slot_for_read(quota_root
, &key
, path
, 1, 0);
383 slot
= path
->slots
[0];
385 btrfs_item_key_to_cpu(l
, &found_key
, slot
);
387 if (found_key
.type
!= BTRFS_QGROUP_RELATION_KEY
)
390 if (found_key
.objectid
> found_key
.offset
) {
391 /* parent <- member, not needed to build config */
392 /* FIXME should we omit the key completely? */
396 ret
= add_relation_rb(fs_info
, found_key
.objectid
,
398 if (ret
== -ENOENT
) {
400 "orphan qgroup relation 0x%llx->0x%llx",
401 found_key
.objectid
, found_key
.offset
);
402 ret
= 0; /* ignore the error */
407 ret
= btrfs_next_item(quota_root
, path
);
414 fs_info
->qgroup_flags
|= flags
;
415 if (!(fs_info
->qgroup_flags
& BTRFS_QGROUP_STATUS_FLAG_ON
)) {
416 fs_info
->quota_enabled
= 0;
417 fs_info
->pending_quota_state
= 0;
418 } else if (fs_info
->qgroup_flags
& BTRFS_QGROUP_STATUS_FLAG_RESCAN
&&
420 ret
= qgroup_rescan_init(fs_info
, rescan_progress
, 0);
422 btrfs_free_path(path
);
425 ulist_free(fs_info
->qgroup_ulist
);
426 fs_info
->qgroup_ulist
= NULL
;
427 fs_info
->qgroup_flags
&= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN
;
430 return ret
< 0 ? ret
: 0;
434 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
435 * first two are in single-threaded paths.And for the third one, we have set
436 * quota_root to be null with qgroup_lock held before, so it is safe to clean
437 * up the in-memory structures without qgroup_lock held.
439 void btrfs_free_qgroup_config(struct btrfs_fs_info
*fs_info
)
442 struct btrfs_qgroup
*qgroup
;
444 while ((n
= rb_first(&fs_info
->qgroup_tree
))) {
445 qgroup
= rb_entry(n
, struct btrfs_qgroup
, node
);
446 rb_erase(n
, &fs_info
->qgroup_tree
);
447 __del_qgroup_rb(qgroup
);
450 * we call btrfs_free_qgroup_config() when umounting
451 * filesystem and disabling quota, so we set qgroup_ulit
452 * to be null here to avoid double free.
454 ulist_free(fs_info
->qgroup_ulist
);
455 fs_info
->qgroup_ulist
= NULL
;
458 static int add_qgroup_relation_item(struct btrfs_trans_handle
*trans
,
459 struct btrfs_root
*quota_root
,
463 struct btrfs_path
*path
;
464 struct btrfs_key key
;
466 path
= btrfs_alloc_path();
471 key
.type
= BTRFS_QGROUP_RELATION_KEY
;
474 ret
= btrfs_insert_empty_item(trans
, quota_root
, path
, &key
, 0);
476 btrfs_mark_buffer_dirty(path
->nodes
[0]);
478 btrfs_free_path(path
);
482 static int del_qgroup_relation_item(struct btrfs_trans_handle
*trans
,
483 struct btrfs_root
*quota_root
,
487 struct btrfs_path
*path
;
488 struct btrfs_key key
;
490 path
= btrfs_alloc_path();
495 key
.type
= BTRFS_QGROUP_RELATION_KEY
;
498 ret
= btrfs_search_slot(trans
, quota_root
, &key
, path
, -1, 1);
507 ret
= btrfs_del_item(trans
, quota_root
, path
);
509 btrfs_free_path(path
);
513 static int add_qgroup_item(struct btrfs_trans_handle
*trans
,
514 struct btrfs_root
*quota_root
, u64 qgroupid
)
517 struct btrfs_path
*path
;
518 struct btrfs_qgroup_info_item
*qgroup_info
;
519 struct btrfs_qgroup_limit_item
*qgroup_limit
;
520 struct extent_buffer
*leaf
;
521 struct btrfs_key key
;
523 path
= btrfs_alloc_path();
528 key
.type
= BTRFS_QGROUP_INFO_KEY
;
529 key
.offset
= qgroupid
;
531 ret
= btrfs_insert_empty_item(trans
, quota_root
, path
, &key
,
532 sizeof(*qgroup_info
));
536 leaf
= path
->nodes
[0];
537 qgroup_info
= btrfs_item_ptr(leaf
, path
->slots
[0],
538 struct btrfs_qgroup_info_item
);
539 btrfs_set_qgroup_info_generation(leaf
, qgroup_info
, trans
->transid
);
540 btrfs_set_qgroup_info_rfer(leaf
, qgroup_info
, 0);
541 btrfs_set_qgroup_info_rfer_cmpr(leaf
, qgroup_info
, 0);
542 btrfs_set_qgroup_info_excl(leaf
, qgroup_info
, 0);
543 btrfs_set_qgroup_info_excl_cmpr(leaf
, qgroup_info
, 0);
545 btrfs_mark_buffer_dirty(leaf
);
547 btrfs_release_path(path
);
549 key
.type
= BTRFS_QGROUP_LIMIT_KEY
;
550 ret
= btrfs_insert_empty_item(trans
, quota_root
, path
, &key
,
551 sizeof(*qgroup_limit
));
555 leaf
= path
->nodes
[0];
556 qgroup_limit
= btrfs_item_ptr(leaf
, path
->slots
[0],
557 struct btrfs_qgroup_limit_item
);
558 btrfs_set_qgroup_limit_flags(leaf
, qgroup_limit
, 0);
559 btrfs_set_qgroup_limit_max_rfer(leaf
, qgroup_limit
, 0);
560 btrfs_set_qgroup_limit_max_excl(leaf
, qgroup_limit
, 0);
561 btrfs_set_qgroup_limit_rsv_rfer(leaf
, qgroup_limit
, 0);
562 btrfs_set_qgroup_limit_rsv_excl(leaf
, qgroup_limit
, 0);
564 btrfs_mark_buffer_dirty(leaf
);
568 btrfs_free_path(path
);
572 static int del_qgroup_item(struct btrfs_trans_handle
*trans
,
573 struct btrfs_root
*quota_root
, u64 qgroupid
)
576 struct btrfs_path
*path
;
577 struct btrfs_key key
;
579 path
= btrfs_alloc_path();
584 key
.type
= BTRFS_QGROUP_INFO_KEY
;
585 key
.offset
= qgroupid
;
586 ret
= btrfs_search_slot(trans
, quota_root
, &key
, path
, -1, 1);
595 ret
= btrfs_del_item(trans
, quota_root
, path
);
599 btrfs_release_path(path
);
601 key
.type
= BTRFS_QGROUP_LIMIT_KEY
;
602 ret
= btrfs_search_slot(trans
, quota_root
, &key
, path
, -1, 1);
611 ret
= btrfs_del_item(trans
, quota_root
, path
);
614 btrfs_free_path(path
);
618 static int update_qgroup_limit_item(struct btrfs_trans_handle
*trans
,
619 struct btrfs_root
*root
, u64 qgroupid
,
620 u64 flags
, u64 max_rfer
, u64 max_excl
,
621 u64 rsv_rfer
, u64 rsv_excl
)
623 struct btrfs_path
*path
;
624 struct btrfs_key key
;
625 struct extent_buffer
*l
;
626 struct btrfs_qgroup_limit_item
*qgroup_limit
;
631 key
.type
= BTRFS_QGROUP_LIMIT_KEY
;
632 key
.offset
= qgroupid
;
634 path
= btrfs_alloc_path();
638 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
646 slot
= path
->slots
[0];
647 qgroup_limit
= btrfs_item_ptr(l
, slot
, struct btrfs_qgroup_limit_item
);
648 btrfs_set_qgroup_limit_flags(l
, qgroup_limit
, flags
);
649 btrfs_set_qgroup_limit_max_rfer(l
, qgroup_limit
, max_rfer
);
650 btrfs_set_qgroup_limit_max_excl(l
, qgroup_limit
, max_excl
);
651 btrfs_set_qgroup_limit_rsv_rfer(l
, qgroup_limit
, rsv_rfer
);
652 btrfs_set_qgroup_limit_rsv_excl(l
, qgroup_limit
, rsv_excl
);
654 btrfs_mark_buffer_dirty(l
);
657 btrfs_free_path(path
);
661 static int update_qgroup_info_item(struct btrfs_trans_handle
*trans
,
662 struct btrfs_root
*root
,
663 struct btrfs_qgroup
*qgroup
)
665 struct btrfs_path
*path
;
666 struct btrfs_key key
;
667 struct extent_buffer
*l
;
668 struct btrfs_qgroup_info_item
*qgroup_info
;
673 key
.type
= BTRFS_QGROUP_INFO_KEY
;
674 key
.offset
= qgroup
->qgroupid
;
676 path
= btrfs_alloc_path();
680 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
688 slot
= path
->slots
[0];
689 qgroup_info
= btrfs_item_ptr(l
, slot
, struct btrfs_qgroup_info_item
);
690 btrfs_set_qgroup_info_generation(l
, qgroup_info
, trans
->transid
);
691 btrfs_set_qgroup_info_rfer(l
, qgroup_info
, qgroup
->rfer
);
692 btrfs_set_qgroup_info_rfer_cmpr(l
, qgroup_info
, qgroup
->rfer_cmpr
);
693 btrfs_set_qgroup_info_excl(l
, qgroup_info
, qgroup
->excl
);
694 btrfs_set_qgroup_info_excl_cmpr(l
, qgroup_info
, qgroup
->excl_cmpr
);
696 btrfs_mark_buffer_dirty(l
);
699 btrfs_free_path(path
);
703 static int update_qgroup_status_item(struct btrfs_trans_handle
*trans
,
704 struct btrfs_fs_info
*fs_info
,
705 struct btrfs_root
*root
)
707 struct btrfs_path
*path
;
708 struct btrfs_key key
;
709 struct extent_buffer
*l
;
710 struct btrfs_qgroup_status_item
*ptr
;
715 key
.type
= BTRFS_QGROUP_STATUS_KEY
;
718 path
= btrfs_alloc_path();
722 ret
= btrfs_search_slot(trans
, root
, &key
, path
, 0, 1);
730 slot
= path
->slots
[0];
731 ptr
= btrfs_item_ptr(l
, slot
, struct btrfs_qgroup_status_item
);
732 btrfs_set_qgroup_status_flags(l
, ptr
, fs_info
->qgroup_flags
);
733 btrfs_set_qgroup_status_generation(l
, ptr
, trans
->transid
);
734 btrfs_set_qgroup_status_rescan(l
, ptr
,
735 fs_info
->qgroup_rescan_progress
.objectid
);
737 btrfs_mark_buffer_dirty(l
);
740 btrfs_free_path(path
);
745 * called with qgroup_lock held
747 static int btrfs_clean_quota_tree(struct btrfs_trans_handle
*trans
,
748 struct btrfs_root
*root
)
750 struct btrfs_path
*path
;
751 struct btrfs_key key
;
752 struct extent_buffer
*leaf
= NULL
;
756 path
= btrfs_alloc_path();
760 path
->leave_spinning
= 1;
767 ret
= btrfs_search_slot(trans
, root
, &key
, path
, -1, 1);
770 leaf
= path
->nodes
[0];
771 nr
= btrfs_header_nritems(leaf
);
775 * delete the leaf one by one
776 * since the whole tree is going
780 ret
= btrfs_del_items(trans
, root
, path
, 0, nr
);
784 btrfs_release_path(path
);
788 root
->fs_info
->pending_quota_state
= 0;
789 btrfs_free_path(path
);
793 int btrfs_quota_enable(struct btrfs_trans_handle
*trans
,
794 struct btrfs_fs_info
*fs_info
)
796 struct btrfs_root
*quota_root
;
797 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
798 struct btrfs_path
*path
= NULL
;
799 struct btrfs_qgroup_status_item
*ptr
;
800 struct extent_buffer
*leaf
;
801 struct btrfs_key key
;
802 struct btrfs_key found_key
;
803 struct btrfs_qgroup
*qgroup
= NULL
;
807 mutex_lock(&fs_info
->qgroup_ioctl_lock
);
808 if (fs_info
->quota_root
) {
809 fs_info
->pending_quota_state
= 1;
813 fs_info
->qgroup_ulist
= ulist_alloc(GFP_NOFS
);
814 if (!fs_info
->qgroup_ulist
) {
820 * initially create the quota tree
822 quota_root
= btrfs_create_tree(trans
, fs_info
,
823 BTRFS_QUOTA_TREE_OBJECTID
);
824 if (IS_ERR(quota_root
)) {
825 ret
= PTR_ERR(quota_root
);
829 path
= btrfs_alloc_path();
836 key
.type
= BTRFS_QGROUP_STATUS_KEY
;
839 ret
= btrfs_insert_empty_item(trans
, quota_root
, path
, &key
,
844 leaf
= path
->nodes
[0];
845 ptr
= btrfs_item_ptr(leaf
, path
->slots
[0],
846 struct btrfs_qgroup_status_item
);
847 btrfs_set_qgroup_status_generation(leaf
, ptr
, trans
->transid
);
848 btrfs_set_qgroup_status_version(leaf
, ptr
, BTRFS_QGROUP_STATUS_VERSION
);
849 fs_info
->qgroup_flags
= BTRFS_QGROUP_STATUS_FLAG_ON
|
850 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
;
851 btrfs_set_qgroup_status_flags(leaf
, ptr
, fs_info
->qgroup_flags
);
852 btrfs_set_qgroup_status_rescan(leaf
, ptr
, 0);
854 btrfs_mark_buffer_dirty(leaf
);
857 key
.type
= BTRFS_ROOT_REF_KEY
;
860 btrfs_release_path(path
);
861 ret
= btrfs_search_slot_for_read(tree_root
, &key
, path
, 1, 0);
869 slot
= path
->slots
[0];
870 leaf
= path
->nodes
[0];
871 btrfs_item_key_to_cpu(leaf
, &found_key
, slot
);
873 if (found_key
.type
== BTRFS_ROOT_REF_KEY
) {
874 ret
= add_qgroup_item(trans
, quota_root
,
879 qgroup
= add_qgroup_rb(fs_info
, found_key
.offset
);
880 if (IS_ERR(qgroup
)) {
881 ret
= PTR_ERR(qgroup
);
885 ret
= btrfs_next_item(tree_root
, path
);
893 btrfs_release_path(path
);
894 ret
= add_qgroup_item(trans
, quota_root
, BTRFS_FS_TREE_OBJECTID
);
898 qgroup
= add_qgroup_rb(fs_info
, BTRFS_FS_TREE_OBJECTID
);
899 if (IS_ERR(qgroup
)) {
900 ret
= PTR_ERR(qgroup
);
903 spin_lock(&fs_info
->qgroup_lock
);
904 fs_info
->quota_root
= quota_root
;
905 fs_info
->pending_quota_state
= 1;
906 spin_unlock(&fs_info
->qgroup_lock
);
908 btrfs_free_path(path
);
911 free_extent_buffer(quota_root
->node
);
912 free_extent_buffer(quota_root
->commit_root
);
917 ulist_free(fs_info
->qgroup_ulist
);
918 fs_info
->qgroup_ulist
= NULL
;
920 mutex_unlock(&fs_info
->qgroup_ioctl_lock
);
924 int btrfs_quota_disable(struct btrfs_trans_handle
*trans
,
925 struct btrfs_fs_info
*fs_info
)
927 struct btrfs_root
*tree_root
= fs_info
->tree_root
;
928 struct btrfs_root
*quota_root
;
931 mutex_lock(&fs_info
->qgroup_ioctl_lock
);
932 if (!fs_info
->quota_root
)
934 spin_lock(&fs_info
->qgroup_lock
);
935 fs_info
->quota_enabled
= 0;
936 fs_info
->pending_quota_state
= 0;
937 quota_root
= fs_info
->quota_root
;
938 fs_info
->quota_root
= NULL
;
939 spin_unlock(&fs_info
->qgroup_lock
);
941 btrfs_free_qgroup_config(fs_info
);
943 ret
= btrfs_clean_quota_tree(trans
, quota_root
);
947 ret
= btrfs_del_root(trans
, tree_root
, "a_root
->root_key
);
951 list_del("a_root
->dirty_list
);
953 btrfs_tree_lock(quota_root
->node
);
954 clean_tree_block(trans
, tree_root
, quota_root
->node
);
955 btrfs_tree_unlock(quota_root
->node
);
956 btrfs_free_tree_block(trans
, quota_root
, quota_root
->node
, 0, 1);
958 free_extent_buffer(quota_root
->node
);
959 free_extent_buffer(quota_root
->commit_root
);
962 mutex_unlock(&fs_info
->qgroup_ioctl_lock
);
966 static void qgroup_dirty(struct btrfs_fs_info
*fs_info
,
967 struct btrfs_qgroup
*qgroup
)
969 if (list_empty(&qgroup
->dirty
))
970 list_add(&qgroup
->dirty
, &fs_info
->dirty_qgroups
);
973 int btrfs_add_qgroup_relation(struct btrfs_trans_handle
*trans
,
974 struct btrfs_fs_info
*fs_info
, u64 src
, u64 dst
)
976 struct btrfs_root
*quota_root
;
977 struct btrfs_qgroup
*parent
;
978 struct btrfs_qgroup
*member
;
979 struct btrfs_qgroup_list
*list
;
982 mutex_lock(&fs_info
->qgroup_ioctl_lock
);
983 quota_root
= fs_info
->quota_root
;
988 member
= find_qgroup_rb(fs_info
, src
);
989 parent
= find_qgroup_rb(fs_info
, dst
);
990 if (!member
|| !parent
) {
995 /* check if such qgroup relation exist firstly */
996 list_for_each_entry(list
, &member
->groups
, next_group
) {
997 if (list
->group
== parent
) {
1003 ret
= add_qgroup_relation_item(trans
, quota_root
, src
, dst
);
1007 ret
= add_qgroup_relation_item(trans
, quota_root
, dst
, src
);
1009 del_qgroup_relation_item(trans
, quota_root
, src
, dst
);
1013 spin_lock(&fs_info
->qgroup_lock
);
1014 ret
= add_relation_rb(quota_root
->fs_info
, src
, dst
);
1015 spin_unlock(&fs_info
->qgroup_lock
);
1017 mutex_unlock(&fs_info
->qgroup_ioctl_lock
);
1021 int btrfs_del_qgroup_relation(struct btrfs_trans_handle
*trans
,
1022 struct btrfs_fs_info
*fs_info
, u64 src
, u64 dst
)
1024 struct btrfs_root
*quota_root
;
1025 struct btrfs_qgroup
*parent
;
1026 struct btrfs_qgroup
*member
;
1027 struct btrfs_qgroup_list
*list
;
1031 mutex_lock(&fs_info
->qgroup_ioctl_lock
);
1032 quota_root
= fs_info
->quota_root
;
1038 member
= find_qgroup_rb(fs_info
, src
);
1039 parent
= find_qgroup_rb(fs_info
, dst
);
1040 if (!member
|| !parent
) {
1045 /* check if such qgroup relation exist firstly */
1046 list_for_each_entry(list
, &member
->groups
, next_group
) {
1047 if (list
->group
== parent
)
1053 ret
= del_qgroup_relation_item(trans
, quota_root
, src
, dst
);
1054 err
= del_qgroup_relation_item(trans
, quota_root
, dst
, src
);
1058 spin_lock(&fs_info
->qgroup_lock
);
1059 del_relation_rb(fs_info
, src
, dst
);
1060 spin_unlock(&fs_info
->qgroup_lock
);
1062 mutex_unlock(&fs_info
->qgroup_ioctl_lock
);
1066 int btrfs_create_qgroup(struct btrfs_trans_handle
*trans
,
1067 struct btrfs_fs_info
*fs_info
, u64 qgroupid
, char *name
)
1069 struct btrfs_root
*quota_root
;
1070 struct btrfs_qgroup
*qgroup
;
1073 mutex_lock(&fs_info
->qgroup_ioctl_lock
);
1074 quota_root
= fs_info
->quota_root
;
1079 qgroup
= find_qgroup_rb(fs_info
, qgroupid
);
1085 ret
= add_qgroup_item(trans
, quota_root
, qgroupid
);
1089 spin_lock(&fs_info
->qgroup_lock
);
1090 qgroup
= add_qgroup_rb(fs_info
, qgroupid
);
1091 spin_unlock(&fs_info
->qgroup_lock
);
1094 ret
= PTR_ERR(qgroup
);
1096 mutex_unlock(&fs_info
->qgroup_ioctl_lock
);
1100 int btrfs_remove_qgroup(struct btrfs_trans_handle
*trans
,
1101 struct btrfs_fs_info
*fs_info
, u64 qgroupid
)
1103 struct btrfs_root
*quota_root
;
1104 struct btrfs_qgroup
*qgroup
;
1107 mutex_lock(&fs_info
->qgroup_ioctl_lock
);
1108 quota_root
= fs_info
->quota_root
;
1114 qgroup
= find_qgroup_rb(fs_info
, qgroupid
);
1119 /* check if there are no relations to this qgroup */
1120 if (!list_empty(&qgroup
->groups
) ||
1121 !list_empty(&qgroup
->members
)) {
1126 ret
= del_qgroup_item(trans
, quota_root
, qgroupid
);
1128 spin_lock(&fs_info
->qgroup_lock
);
1129 del_qgroup_rb(quota_root
->fs_info
, qgroupid
);
1130 spin_unlock(&fs_info
->qgroup_lock
);
1132 mutex_unlock(&fs_info
->qgroup_ioctl_lock
);
1136 int btrfs_limit_qgroup(struct btrfs_trans_handle
*trans
,
1137 struct btrfs_fs_info
*fs_info
, u64 qgroupid
,
1138 struct btrfs_qgroup_limit
*limit
)
1140 struct btrfs_root
*quota_root
;
1141 struct btrfs_qgroup
*qgroup
;
1144 mutex_lock(&fs_info
->qgroup_ioctl_lock
);
1145 quota_root
= fs_info
->quota_root
;
1151 qgroup
= find_qgroup_rb(fs_info
, qgroupid
);
1156 ret
= update_qgroup_limit_item(trans
, quota_root
, qgroupid
,
1157 limit
->flags
, limit
->max_rfer
,
1158 limit
->max_excl
, limit
->rsv_rfer
,
1161 fs_info
->qgroup_flags
|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
;
1162 btrfs_info(fs_info
, "unable to update quota limit for %llu",
1166 spin_lock(&fs_info
->qgroup_lock
);
1167 qgroup
->lim_flags
= limit
->flags
;
1168 qgroup
->max_rfer
= limit
->max_rfer
;
1169 qgroup
->max_excl
= limit
->max_excl
;
1170 qgroup
->rsv_rfer
= limit
->rsv_rfer
;
1171 qgroup
->rsv_excl
= limit
->rsv_excl
;
1172 spin_unlock(&fs_info
->qgroup_lock
);
1174 mutex_unlock(&fs_info
->qgroup_ioctl_lock
);
1179 * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
1180 * the modification into a list that's later used by btrfs_end_transaction to
1181 * pass the recorded modifications on to btrfs_qgroup_account_ref.
1183 int btrfs_qgroup_record_ref(struct btrfs_trans_handle
*trans
,
1184 struct btrfs_delayed_ref_node
*node
,
1185 struct btrfs_delayed_extent_op
*extent_op
)
1187 struct qgroup_update
*u
;
1189 BUG_ON(!trans
->delayed_ref_elem
.seq
);
1190 u
= kmalloc(sizeof(*u
), GFP_NOFS
);
1195 u
->extent_op
= extent_op
;
1196 list_add_tail(&u
->list
, &trans
->qgroup_ref_list
);
1201 static int qgroup_account_ref_step1(struct btrfs_fs_info
*fs_info
,
1202 struct ulist
*roots
, struct ulist
*tmp
,
1205 struct ulist_node
*unode
;
1206 struct ulist_iterator uiter
;
1207 struct ulist_node
*tmp_unode
;
1208 struct ulist_iterator tmp_uiter
;
1209 struct btrfs_qgroup
*qg
;
1212 ULIST_ITER_INIT(&uiter
);
1213 while ((unode
= ulist_next(roots
, &uiter
))) {
1214 qg
= find_qgroup_rb(fs_info
, unode
->val
);
1219 /* XXX id not needed */
1220 ret
= ulist_add(tmp
, qg
->qgroupid
,
1221 (u64
)(uintptr_t)qg
, GFP_ATOMIC
);
1224 ULIST_ITER_INIT(&tmp_uiter
);
1225 while ((tmp_unode
= ulist_next(tmp
, &tmp_uiter
))) {
1226 struct btrfs_qgroup_list
*glist
;
1228 qg
= (struct btrfs_qgroup
*)(uintptr_t)tmp_unode
->aux
;
1229 if (qg
->refcnt
< seq
)
1230 qg
->refcnt
= seq
+ 1;
1234 list_for_each_entry(glist
, &qg
->groups
, next_group
) {
1235 ret
= ulist_add(tmp
, glist
->group
->qgroupid
,
1236 (u64
)(uintptr_t)glist
->group
,
1247 static int qgroup_account_ref_step2(struct btrfs_fs_info
*fs_info
,
1248 struct ulist
*roots
, struct ulist
*tmp
,
1249 u64 seq
, int sgn
, u64 num_bytes
,
1250 struct btrfs_qgroup
*qgroup
)
1252 struct ulist_node
*unode
;
1253 struct ulist_iterator uiter
;
1254 struct btrfs_qgroup
*qg
;
1255 struct btrfs_qgroup_list
*glist
;
1259 ret
= ulist_add(tmp
, qgroup
->qgroupid
, (uintptr_t)qgroup
, GFP_ATOMIC
);
1263 ULIST_ITER_INIT(&uiter
);
1264 while ((unode
= ulist_next(tmp
, &uiter
))) {
1265 qg
= (struct btrfs_qgroup
*)(uintptr_t)unode
->aux
;
1266 if (qg
->refcnt
< seq
) {
1267 /* not visited by step 1 */
1268 qg
->rfer
+= sgn
* num_bytes
;
1269 qg
->rfer_cmpr
+= sgn
* num_bytes
;
1270 if (roots
->nnodes
== 0) {
1271 qg
->excl
+= sgn
* num_bytes
;
1272 qg
->excl_cmpr
+= sgn
* num_bytes
;
1274 qgroup_dirty(fs_info
, qg
);
1276 WARN_ON(qg
->tag
>= seq
);
1279 list_for_each_entry(glist
, &qg
->groups
, next_group
) {
1280 ret
= ulist_add(tmp
, glist
->group
->qgroupid
,
1281 (uintptr_t)glist
->group
, GFP_ATOMIC
);
1290 static int qgroup_account_ref_step3(struct btrfs_fs_info
*fs_info
,
1291 struct ulist
*roots
, struct ulist
*tmp
,
1292 u64 seq
, int sgn
, u64 num_bytes
)
1294 struct ulist_node
*unode
;
1295 struct ulist_iterator uiter
;
1296 struct btrfs_qgroup
*qg
;
1297 struct ulist_node
*tmp_unode
;
1298 struct ulist_iterator tmp_uiter
;
1301 ULIST_ITER_INIT(&uiter
);
1302 while ((unode
= ulist_next(roots
, &uiter
))) {
1303 qg
= find_qgroup_rb(fs_info
, unode
->val
);
1308 ret
= ulist_add(tmp
, qg
->qgroupid
, (uintptr_t)qg
, GFP_ATOMIC
);
1312 ULIST_ITER_INIT(&tmp_uiter
);
1313 while ((tmp_unode
= ulist_next(tmp
, &tmp_uiter
))) {
1314 struct btrfs_qgroup_list
*glist
;
1316 qg
= (struct btrfs_qgroup
*)(uintptr_t)tmp_unode
->aux
;
1320 if (qg
->refcnt
- seq
== roots
->nnodes
) {
1321 qg
->excl
-= sgn
* num_bytes
;
1322 qg
->excl_cmpr
-= sgn
* num_bytes
;
1323 qgroup_dirty(fs_info
, qg
);
1326 list_for_each_entry(glist
, &qg
->groups
, next_group
) {
1327 ret
= ulist_add(tmp
, glist
->group
->qgroupid
,
1328 (uintptr_t)glist
->group
,
1340 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1341 * from the fs. First, all roots referencing the extent are searched, and
1342 * then the space is accounted accordingly to the different roots. The
1343 * accounting algorithm works in 3 steps documented inline.
1345 int btrfs_qgroup_account_ref(struct btrfs_trans_handle
*trans
,
1346 struct btrfs_fs_info
*fs_info
,
1347 struct btrfs_delayed_ref_node
*node
,
1348 struct btrfs_delayed_extent_op
*extent_op
)
1350 struct btrfs_root
*quota_root
;
1352 struct btrfs_qgroup
*qgroup
;
1353 struct ulist
*roots
= NULL
;
1358 if (!fs_info
->quota_enabled
)
1361 BUG_ON(!fs_info
->quota_root
);
1363 if (node
->type
== BTRFS_TREE_BLOCK_REF_KEY
||
1364 node
->type
== BTRFS_SHARED_BLOCK_REF_KEY
) {
1365 struct btrfs_delayed_tree_ref
*ref
;
1366 ref
= btrfs_delayed_node_to_tree_ref(node
);
1367 ref_root
= ref
->root
;
1368 } else if (node
->type
== BTRFS_EXTENT_DATA_REF_KEY
||
1369 node
->type
== BTRFS_SHARED_DATA_REF_KEY
) {
1370 struct btrfs_delayed_data_ref
*ref
;
1371 ref
= btrfs_delayed_node_to_data_ref(node
);
1372 ref_root
= ref
->root
;
1377 if (!is_fstree(ref_root
)) {
1379 * non-fs-trees are not being accounted
1384 switch (node
->action
) {
1385 case BTRFS_ADD_DELAYED_REF
:
1386 case BTRFS_ADD_DELAYED_EXTENT
:
1388 seq
= btrfs_tree_mod_seq_prev(node
->seq
);
1390 case BTRFS_DROP_DELAYED_REF
:
1394 case BTRFS_UPDATE_DELAYED_HEAD
:
1400 mutex_lock(&fs_info
->qgroup_rescan_lock
);
1401 if (fs_info
->qgroup_flags
& BTRFS_QGROUP_STATUS_FLAG_RESCAN
) {
1402 if (fs_info
->qgroup_rescan_progress
.objectid
<= node
->bytenr
) {
1403 mutex_unlock(&fs_info
->qgroup_rescan_lock
);
1407 mutex_unlock(&fs_info
->qgroup_rescan_lock
);
1410 * the delayed ref sequence number we pass depends on the direction of
1411 * the operation. for add operations, we pass
1412 * tree_mod_log_prev_seq(node->seq) to skip
1413 * the delayed ref's current sequence number, because we need the state
1414 * of the tree before the add operation. for delete operations, we pass
1415 * (node->seq) to include the delayed ref's current sequence number,
1416 * because we need the state of the tree after the delete operation.
1418 ret
= btrfs_find_all_roots(trans
, fs_info
, node
->bytenr
, seq
, &roots
);
1422 spin_lock(&fs_info
->qgroup_lock
);
1424 quota_root
= fs_info
->quota_root
;
1428 qgroup
= find_qgroup_rb(fs_info
, ref_root
);
1433 * step 1: for each old ref, visit all nodes once and inc refcnt
1435 ulist_reinit(fs_info
->qgroup_ulist
);
1436 seq
= fs_info
->qgroup_seq
;
1437 fs_info
->qgroup_seq
+= roots
->nnodes
+ 1; /* max refcnt */
1439 ret
= qgroup_account_ref_step1(fs_info
, roots
, fs_info
->qgroup_ulist
,
1445 * step 2: walk from the new root
1447 ret
= qgroup_account_ref_step2(fs_info
, roots
, fs_info
->qgroup_ulist
,
1448 seq
, sgn
, node
->num_bytes
, qgroup
);
1453 * step 3: walk again from old refs
1455 ret
= qgroup_account_ref_step3(fs_info
, roots
, fs_info
->qgroup_ulist
,
1456 seq
, sgn
, node
->num_bytes
);
1461 spin_unlock(&fs_info
->qgroup_lock
);
1468 * called from commit_transaction. Writes all changed qgroups to disk.
1470 int btrfs_run_qgroups(struct btrfs_trans_handle
*trans
,
1471 struct btrfs_fs_info
*fs_info
)
1473 struct btrfs_root
*quota_root
= fs_info
->quota_root
;
1475 int start_rescan_worker
= 0;
1480 if (!fs_info
->quota_enabled
&& fs_info
->pending_quota_state
)
1481 start_rescan_worker
= 1;
1483 fs_info
->quota_enabled
= fs_info
->pending_quota_state
;
1485 spin_lock(&fs_info
->qgroup_lock
);
1486 while (!list_empty(&fs_info
->dirty_qgroups
)) {
1487 struct btrfs_qgroup
*qgroup
;
1488 qgroup
= list_first_entry(&fs_info
->dirty_qgroups
,
1489 struct btrfs_qgroup
, dirty
);
1490 list_del_init(&qgroup
->dirty
);
1491 spin_unlock(&fs_info
->qgroup_lock
);
1492 ret
= update_qgroup_info_item(trans
, quota_root
, qgroup
);
1494 fs_info
->qgroup_flags
|=
1495 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
;
1496 spin_lock(&fs_info
->qgroup_lock
);
1498 if (fs_info
->quota_enabled
)
1499 fs_info
->qgroup_flags
|= BTRFS_QGROUP_STATUS_FLAG_ON
;
1501 fs_info
->qgroup_flags
&= ~BTRFS_QGROUP_STATUS_FLAG_ON
;
1502 spin_unlock(&fs_info
->qgroup_lock
);
1504 ret
= update_qgroup_status_item(trans
, fs_info
, quota_root
);
1506 fs_info
->qgroup_flags
|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
;
1508 if (!ret
&& start_rescan_worker
) {
1509 ret
= qgroup_rescan_init(fs_info
, 0, 1);
1511 qgroup_rescan_zero_tracking(fs_info
);
1512 btrfs_queue_worker(&fs_info
->qgroup_rescan_workers
,
1513 &fs_info
->qgroup_rescan_work
);
1524 * copy the acounting information between qgroups. This is necessary when a
1525 * snapshot or a subvolume is created
1527 int btrfs_qgroup_inherit(struct btrfs_trans_handle
*trans
,
1528 struct btrfs_fs_info
*fs_info
, u64 srcid
, u64 objectid
,
1529 struct btrfs_qgroup_inherit
*inherit
)
1534 struct btrfs_root
*quota_root
= fs_info
->quota_root
;
1535 struct btrfs_qgroup
*srcgroup
;
1536 struct btrfs_qgroup
*dstgroup
;
1540 mutex_lock(&fs_info
->qgroup_ioctl_lock
);
1541 if (!fs_info
->quota_enabled
)
1550 i_qgroups
= (u64
*)(inherit
+ 1);
1551 nums
= inherit
->num_qgroups
+ 2 * inherit
->num_ref_copies
+
1552 2 * inherit
->num_excl_copies
;
1553 for (i
= 0; i
< nums
; ++i
) {
1554 srcgroup
= find_qgroup_rb(fs_info
, *i_qgroups
);
1564 * create a tracking group for the subvol itself
1566 ret
= add_qgroup_item(trans
, quota_root
, objectid
);
1570 if (inherit
&& inherit
->flags
& BTRFS_QGROUP_INHERIT_SET_LIMITS
) {
1571 ret
= update_qgroup_limit_item(trans
, quota_root
, objectid
,
1573 inherit
->lim
.max_rfer
,
1574 inherit
->lim
.max_excl
,
1575 inherit
->lim
.rsv_rfer
,
1576 inherit
->lim
.rsv_excl
);
1582 struct btrfs_root
*srcroot
;
1583 struct btrfs_key srckey
;
1586 srckey
.objectid
= srcid
;
1587 srckey
.type
= BTRFS_ROOT_ITEM_KEY
;
1588 srckey
.offset
= (u64
)-1;
1589 srcroot
= btrfs_read_fs_root_no_name(fs_info
, &srckey
);
1590 if (IS_ERR(srcroot
)) {
1591 ret
= PTR_ERR(srcroot
);
1596 srcroot_level
= btrfs_header_level(srcroot
->node
);
1597 level_size
= btrfs_level_size(srcroot
, srcroot_level
);
1602 * add qgroup to all inherited groups
1605 i_qgroups
= (u64
*)(inherit
+ 1);
1606 for (i
= 0; i
< inherit
->num_qgroups
; ++i
) {
1607 ret
= add_qgroup_relation_item(trans
, quota_root
,
1608 objectid
, *i_qgroups
);
1611 ret
= add_qgroup_relation_item(trans
, quota_root
,
1612 *i_qgroups
, objectid
);
1620 spin_lock(&fs_info
->qgroup_lock
);
1622 dstgroup
= add_qgroup_rb(fs_info
, objectid
);
1623 if (IS_ERR(dstgroup
)) {
1624 ret
= PTR_ERR(dstgroup
);
1629 srcgroup
= find_qgroup_rb(fs_info
, srcid
);
1632 dstgroup
->rfer
= srcgroup
->rfer
- level_size
;
1633 dstgroup
->rfer_cmpr
= srcgroup
->rfer_cmpr
- level_size
;
1634 srcgroup
->excl
= level_size
;
1635 srcgroup
->excl_cmpr
= level_size
;
1636 qgroup_dirty(fs_info
, dstgroup
);
1637 qgroup_dirty(fs_info
, srcgroup
);
1643 i_qgroups
= (u64
*)(inherit
+ 1);
1644 for (i
= 0; i
< inherit
->num_qgroups
; ++i
) {
1645 ret
= add_relation_rb(quota_root
->fs_info
, objectid
,
1652 for (i
= 0; i
< inherit
->num_ref_copies
; ++i
) {
1653 struct btrfs_qgroup
*src
;
1654 struct btrfs_qgroup
*dst
;
1656 src
= find_qgroup_rb(fs_info
, i_qgroups
[0]);
1657 dst
= find_qgroup_rb(fs_info
, i_qgroups
[1]);
1664 dst
->rfer
= src
->rfer
- level_size
;
1665 dst
->rfer_cmpr
= src
->rfer_cmpr
- level_size
;
1668 for (i
= 0; i
< inherit
->num_excl_copies
; ++i
) {
1669 struct btrfs_qgroup
*src
;
1670 struct btrfs_qgroup
*dst
;
1672 src
= find_qgroup_rb(fs_info
, i_qgroups
[0]);
1673 dst
= find_qgroup_rb(fs_info
, i_qgroups
[1]);
1680 dst
->excl
= src
->excl
+ level_size
;
1681 dst
->excl_cmpr
= src
->excl_cmpr
+ level_size
;
1686 spin_unlock(&fs_info
->qgroup_lock
);
1688 mutex_unlock(&fs_info
->qgroup_ioctl_lock
);
1693 * reserve some space for a qgroup and all its parents. The reservation takes
1694 * place with start_transaction or dealloc_reserve, similar to ENOSPC
1695 * accounting. If not enough space is available, EDQUOT is returned.
1696 * We assume that the requested space is new for all qgroups.
1698 int btrfs_qgroup_reserve(struct btrfs_root
*root
, u64 num_bytes
)
1700 struct btrfs_root
*quota_root
;
1701 struct btrfs_qgroup
*qgroup
;
1702 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
1703 u64 ref_root
= root
->root_key
.objectid
;
1705 struct ulist_node
*unode
;
1706 struct ulist_iterator uiter
;
1708 if (!is_fstree(ref_root
))
1714 spin_lock(&fs_info
->qgroup_lock
);
1715 quota_root
= fs_info
->quota_root
;
1719 qgroup
= find_qgroup_rb(fs_info
, ref_root
);
1724 * in a first step, we check all affected qgroups if any limits would
1727 ulist_reinit(fs_info
->qgroup_ulist
);
1728 ret
= ulist_add(fs_info
->qgroup_ulist
, qgroup
->qgroupid
,
1729 (uintptr_t)qgroup
, GFP_ATOMIC
);
1732 ULIST_ITER_INIT(&uiter
);
1733 while ((unode
= ulist_next(fs_info
->qgroup_ulist
, &uiter
))) {
1734 struct btrfs_qgroup
*qg
;
1735 struct btrfs_qgroup_list
*glist
;
1737 qg
= (struct btrfs_qgroup
*)(uintptr_t)unode
->aux
;
1739 if ((qg
->lim_flags
& BTRFS_QGROUP_LIMIT_MAX_RFER
) &&
1740 qg
->reserved
+ (s64
)qg
->rfer
+ num_bytes
>
1746 if ((qg
->lim_flags
& BTRFS_QGROUP_LIMIT_MAX_EXCL
) &&
1747 qg
->reserved
+ (s64
)qg
->excl
+ num_bytes
>
1753 list_for_each_entry(glist
, &qg
->groups
, next_group
) {
1754 ret
= ulist_add(fs_info
->qgroup_ulist
,
1755 glist
->group
->qgroupid
,
1756 (uintptr_t)glist
->group
, GFP_ATOMIC
);
1763 * no limits exceeded, now record the reservation into all qgroups
1765 ULIST_ITER_INIT(&uiter
);
1766 while ((unode
= ulist_next(fs_info
->qgroup_ulist
, &uiter
))) {
1767 struct btrfs_qgroup
*qg
;
1769 qg
= (struct btrfs_qgroup
*)(uintptr_t)unode
->aux
;
1771 qg
->reserved
+= num_bytes
;
1775 spin_unlock(&fs_info
->qgroup_lock
);
1779 void btrfs_qgroup_free(struct btrfs_root
*root
, u64 num_bytes
)
1781 struct btrfs_root
*quota_root
;
1782 struct btrfs_qgroup
*qgroup
;
1783 struct btrfs_fs_info
*fs_info
= root
->fs_info
;
1784 struct ulist_node
*unode
;
1785 struct ulist_iterator uiter
;
1786 u64 ref_root
= root
->root_key
.objectid
;
1789 if (!is_fstree(ref_root
))
1795 spin_lock(&fs_info
->qgroup_lock
);
1797 quota_root
= fs_info
->quota_root
;
1801 qgroup
= find_qgroup_rb(fs_info
, ref_root
);
1805 ulist_reinit(fs_info
->qgroup_ulist
);
1806 ret
= ulist_add(fs_info
->qgroup_ulist
, qgroup
->qgroupid
,
1807 (uintptr_t)qgroup
, GFP_ATOMIC
);
1810 ULIST_ITER_INIT(&uiter
);
1811 while ((unode
= ulist_next(fs_info
->qgroup_ulist
, &uiter
))) {
1812 struct btrfs_qgroup
*qg
;
1813 struct btrfs_qgroup_list
*glist
;
1815 qg
= (struct btrfs_qgroup
*)(uintptr_t)unode
->aux
;
1817 qg
->reserved
-= num_bytes
;
1819 list_for_each_entry(glist
, &qg
->groups
, next_group
) {
1820 ret
= ulist_add(fs_info
->qgroup_ulist
,
1821 glist
->group
->qgroupid
,
1822 (uintptr_t)glist
->group
, GFP_ATOMIC
);
1829 spin_unlock(&fs_info
->qgroup_lock
);
1832 void assert_qgroups_uptodate(struct btrfs_trans_handle
*trans
)
1834 if (list_empty(&trans
->qgroup_ref_list
) && !trans
->delayed_ref_elem
.seq
)
1836 btrfs_err(trans
->root
->fs_info
,
1837 "qgroups not uptodate in trans handle %p: list is%s empty, "
1839 trans
, list_empty(&trans
->qgroup_ref_list
) ? "" : " not",
1840 (u32
)(trans
->delayed_ref_elem
.seq
>> 32),
1841 (u32
)trans
->delayed_ref_elem
.seq
);
1846 * returns < 0 on error, 0 when more leafs are to be scanned.
1847 * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
1850 qgroup_rescan_leaf(struct btrfs_fs_info
*fs_info
, struct btrfs_path
*path
,
1851 struct btrfs_trans_handle
*trans
, struct ulist
*tmp
,
1852 struct extent_buffer
*scratch_leaf
)
1854 struct btrfs_key found
;
1855 struct ulist
*roots
= NULL
;
1856 struct ulist_node
*unode
;
1857 struct ulist_iterator uiter
;
1858 struct seq_list tree_mod_seq_elem
= {};
1863 path
->leave_spinning
= 1;
1864 mutex_lock(&fs_info
->qgroup_rescan_lock
);
1865 ret
= btrfs_search_slot_for_read(fs_info
->extent_root
,
1866 &fs_info
->qgroup_rescan_progress
,
1869 pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
1870 fs_info
->qgroup_rescan_progress
.objectid
,
1871 fs_info
->qgroup_rescan_progress
.type
,
1872 fs_info
->qgroup_rescan_progress
.offset
, ret
);
1876 * The rescan is about to end, we will not be scanning any
1877 * further blocks. We cannot unset the RESCAN flag here, because
1878 * we want to commit the transaction if everything went well.
1879 * To make the live accounting work in this phase, we set our
1880 * scan progress pointer such that every real extent objectid
1883 fs_info
->qgroup_rescan_progress
.objectid
= (u64
)-1;
1884 btrfs_release_path(path
);
1885 mutex_unlock(&fs_info
->qgroup_rescan_lock
);
1889 btrfs_item_key_to_cpu(path
->nodes
[0], &found
,
1890 btrfs_header_nritems(path
->nodes
[0]) - 1);
1891 fs_info
->qgroup_rescan_progress
.objectid
= found
.objectid
+ 1;
1893 btrfs_get_tree_mod_seq(fs_info
, &tree_mod_seq_elem
);
1894 memcpy(scratch_leaf
, path
->nodes
[0], sizeof(*scratch_leaf
));
1895 slot
= path
->slots
[0];
1896 btrfs_release_path(path
);
1897 mutex_unlock(&fs_info
->qgroup_rescan_lock
);
1899 for (; slot
< btrfs_header_nritems(scratch_leaf
); ++slot
) {
1900 btrfs_item_key_to_cpu(scratch_leaf
, &found
, slot
);
1901 if (found
.type
!= BTRFS_EXTENT_ITEM_KEY
)
1903 ret
= btrfs_find_all_roots(trans
, fs_info
, found
.objectid
,
1904 tree_mod_seq_elem
.seq
, &roots
);
1907 spin_lock(&fs_info
->qgroup_lock
);
1908 seq
= fs_info
->qgroup_seq
;
1909 fs_info
->qgroup_seq
+= roots
->nnodes
+ 1; /* max refcnt */
1911 ret
= qgroup_account_ref_step1(fs_info
, roots
, tmp
, seq
);
1913 spin_unlock(&fs_info
->qgroup_lock
);
1919 * step2 of btrfs_qgroup_account_ref works from a single root,
1920 * we're doing all at once here.
1923 ULIST_ITER_INIT(&uiter
);
1924 while ((unode
= ulist_next(roots
, &uiter
))) {
1925 struct btrfs_qgroup
*qg
;
1927 qg
= find_qgroup_rb(fs_info
, unode
->val
);
1931 ret
= ulist_add(tmp
, qg
->qgroupid
, (uintptr_t)qg
,
1934 spin_unlock(&fs_info
->qgroup_lock
);
1940 /* this loop is similar to step 2 of btrfs_qgroup_account_ref */
1941 ULIST_ITER_INIT(&uiter
);
1942 while ((unode
= ulist_next(tmp
, &uiter
))) {
1943 struct btrfs_qgroup
*qg
;
1944 struct btrfs_qgroup_list
*glist
;
1946 qg
= (struct btrfs_qgroup
*)(uintptr_t) unode
->aux
;
1947 qg
->rfer
+= found
.offset
;
1948 qg
->rfer_cmpr
+= found
.offset
;
1949 WARN_ON(qg
->tag
>= seq
);
1950 if (qg
->refcnt
- seq
== roots
->nnodes
) {
1951 qg
->excl
+= found
.offset
;
1952 qg
->excl_cmpr
+= found
.offset
;
1954 qgroup_dirty(fs_info
, qg
);
1956 list_for_each_entry(glist
, &qg
->groups
, next_group
) {
1957 ret
= ulist_add(tmp
, glist
->group
->qgroupid
,
1958 (uintptr_t)glist
->group
,
1961 spin_unlock(&fs_info
->qgroup_lock
);
1968 spin_unlock(&fs_info
->qgroup_lock
);
1974 btrfs_put_tree_mod_seq(fs_info
, &tree_mod_seq_elem
);
1979 static void btrfs_qgroup_rescan_worker(struct btrfs_work
*work
)
1981 struct btrfs_fs_info
*fs_info
= container_of(work
, struct btrfs_fs_info
,
1982 qgroup_rescan_work
);
1983 struct btrfs_path
*path
;
1984 struct btrfs_trans_handle
*trans
= NULL
;
1985 struct ulist
*tmp
= NULL
;
1986 struct extent_buffer
*scratch_leaf
= NULL
;
1989 path
= btrfs_alloc_path();
1992 tmp
= ulist_alloc(GFP_NOFS
);
1995 scratch_leaf
= kmalloc(sizeof(*scratch_leaf
), GFP_NOFS
);
2001 trans
= btrfs_start_transaction(fs_info
->fs_root
, 0);
2002 if (IS_ERR(trans
)) {
2003 err
= PTR_ERR(trans
);
2006 if (!fs_info
->quota_enabled
) {
2009 err
= qgroup_rescan_leaf(fs_info
, path
, trans
,
2013 btrfs_commit_transaction(trans
, fs_info
->fs_root
);
2015 btrfs_end_transaction(trans
, fs_info
->fs_root
);
2019 kfree(scratch_leaf
);
2021 btrfs_free_path(path
);
2023 mutex_lock(&fs_info
->qgroup_rescan_lock
);
2024 fs_info
->qgroup_flags
&= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN
;
2027 fs_info
->qgroup_flags
& BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
) {
2028 fs_info
->qgroup_flags
&= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
;
2029 } else if (err
< 0) {
2030 fs_info
->qgroup_flags
|= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT
;
2032 mutex_unlock(&fs_info
->qgroup_rescan_lock
);
2035 btrfs_info(fs_info
, "qgroup scan completed%s",
2036 err
== 2 ? " (inconsistency flag cleared)" : "");
2038 btrfs_err(fs_info
, "qgroup scan failed with %d", err
);
2041 complete_all(&fs_info
->qgroup_rescan_completion
);
2045 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
2046 * memory required for the rescan context.
2049 qgroup_rescan_init(struct btrfs_fs_info
*fs_info
, u64 progress_objectid
,
2055 (!(fs_info
->qgroup_flags
& BTRFS_QGROUP_STATUS_FLAG_RESCAN
) ||
2056 !(fs_info
->qgroup_flags
& BTRFS_QGROUP_STATUS_FLAG_ON
))) {
2061 mutex_lock(&fs_info
->qgroup_rescan_lock
);
2062 spin_lock(&fs_info
->qgroup_lock
);
2065 if (fs_info
->qgroup_flags
& BTRFS_QGROUP_STATUS_FLAG_RESCAN
)
2067 else if (!(fs_info
->qgroup_flags
& BTRFS_QGROUP_STATUS_FLAG_ON
))
2071 spin_unlock(&fs_info
->qgroup_lock
);
2072 mutex_unlock(&fs_info
->qgroup_rescan_lock
);
2076 fs_info
->qgroup_flags
|= BTRFS_QGROUP_STATUS_FLAG_RESCAN
;
2079 memset(&fs_info
->qgroup_rescan_progress
, 0,
2080 sizeof(fs_info
->qgroup_rescan_progress
));
2081 fs_info
->qgroup_rescan_progress
.objectid
= progress_objectid
;
2083 spin_unlock(&fs_info
->qgroup_lock
);
2084 mutex_unlock(&fs_info
->qgroup_rescan_lock
);
2086 init_completion(&fs_info
->qgroup_rescan_completion
);
2088 memset(&fs_info
->qgroup_rescan_work
, 0,
2089 sizeof(fs_info
->qgroup_rescan_work
));
2090 fs_info
->qgroup_rescan_work
.func
= btrfs_qgroup_rescan_worker
;
2094 btrfs_info(fs_info
, "qgroup_rescan_init failed with %d", ret
);
2102 qgroup_rescan_zero_tracking(struct btrfs_fs_info
*fs_info
)
2105 struct btrfs_qgroup
*qgroup
;
2107 spin_lock(&fs_info
->qgroup_lock
);
2108 /* clear all current qgroup tracking information */
2109 for (n
= rb_first(&fs_info
->qgroup_tree
); n
; n
= rb_next(n
)) {
2110 qgroup
= rb_entry(n
, struct btrfs_qgroup
, node
);
2112 qgroup
->rfer_cmpr
= 0;
2114 qgroup
->excl_cmpr
= 0;
2116 spin_unlock(&fs_info
->qgroup_lock
);
2120 btrfs_qgroup_rescan(struct btrfs_fs_info
*fs_info
)
2123 struct btrfs_trans_handle
*trans
;
2125 ret
= qgroup_rescan_init(fs_info
, 0, 1);
2130 * We have set the rescan_progress to 0, which means no more
2131 * delayed refs will be accounted by btrfs_qgroup_account_ref.
2132 * However, btrfs_qgroup_account_ref may be right after its call
2133 * to btrfs_find_all_roots, in which case it would still do the
2135 * To solve this, we're committing the transaction, which will
2136 * ensure we run all delayed refs and only after that, we are
2137 * going to clear all tracking information for a clean start.
2140 trans
= btrfs_join_transaction(fs_info
->fs_root
);
2141 if (IS_ERR(trans
)) {
2142 fs_info
->qgroup_flags
&= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN
;
2143 return PTR_ERR(trans
);
2145 ret
= btrfs_commit_transaction(trans
, fs_info
->fs_root
);
2147 fs_info
->qgroup_flags
&= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN
;
2151 qgroup_rescan_zero_tracking(fs_info
);
2153 btrfs_queue_worker(&fs_info
->qgroup_rescan_workers
,
2154 &fs_info
->qgroup_rescan_work
);
2159 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info
*fs_info
)
2164 mutex_lock(&fs_info
->qgroup_rescan_lock
);
2165 spin_lock(&fs_info
->qgroup_lock
);
2166 running
= fs_info
->qgroup_flags
& BTRFS_QGROUP_STATUS_FLAG_RESCAN
;
2167 spin_unlock(&fs_info
->qgroup_lock
);
2168 mutex_unlock(&fs_info
->qgroup_rescan_lock
);
2171 ret
= wait_for_completion_interruptible(
2172 &fs_info
->qgroup_rescan_completion
);
2178 * this is only called from open_ctree where we're still single threaded, thus
2179 * locking is omitted here.
2182 btrfs_qgroup_rescan_resume(struct btrfs_fs_info
*fs_info
)
2184 if (fs_info
->qgroup_flags
& BTRFS_QGROUP_STATUS_FLAG_RESCAN
)
2185 btrfs_queue_worker(&fs_info
->qgroup_rescan_workers
,
2186 &fs_info
->qgroup_rescan_work
);