]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blame - fs/btrfs/qgroup.c
Btrfs: fix memory leak of orphan block rsv
[mirror_ubuntu-zesty-kernel.git] / fs / btrfs / qgroup.c
CommitLineData
bed92eae
AJ
1/*
2 * Copyright (C) 2011 STRATO. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/sched.h>
20#include <linux/pagemap.h>
21#include <linux/writeback.h>
22#include <linux/blkdev.h>
23#include <linux/rbtree.h>
24#include <linux/slab.h>
25#include <linux/workqueue.h>
55e301fd 26#include <linux/btrfs.h>
bed92eae
AJ
27
28#include "ctree.h"
29#include "transaction.h"
30#include "disk-io.h"
31#include "locking.h"
32#include "ulist.h"
bed92eae 33#include "backref.h"
2f232036 34#include "extent_io.h"
bed92eae
AJ
35
36/* TODO XXX FIXME
37 * - subvol delete -> delete when ref goes to 0? delete limits also?
38 * - reorganize keys
39 * - compressed
40 * - sync
bed92eae
AJ
41 * - copy also limits on subvol creation
42 * - limit
43 * - caches fuer ulists
44 * - performance benchmarks
45 * - check all ioctl parameters
46 */
47
48/*
49 * one struct for each qgroup, organized in fs_info->qgroup_tree.
50 */
51struct btrfs_qgroup {
52 u64 qgroupid;
53
54 /*
55 * state
56 */
57 u64 rfer; /* referenced */
58 u64 rfer_cmpr; /* referenced compressed */
59 u64 excl; /* exclusive */
60 u64 excl_cmpr; /* exclusive compressed */
61
62 /*
63 * limits
64 */
65 u64 lim_flags; /* which limits are set */
66 u64 max_rfer;
67 u64 max_excl;
68 u64 rsv_rfer;
69 u64 rsv_excl;
70
71 /*
72 * reservation tracking
73 */
74 u64 reserved;
75
76 /*
77 * lists
78 */
79 struct list_head groups; /* groups this group is member of */
80 struct list_head members; /* groups that are members of this group */
81 struct list_head dirty; /* dirty groups */
82 struct rb_node node; /* tree of qgroups */
83
84 /*
85 * temp variables for accounting operations
86 */
87 u64 tag;
88 u64 refcnt;
89};
90
91/*
92 * glue structure to represent the relations between qgroups.
93 */
94struct btrfs_qgroup_list {
95 struct list_head next_group;
96 struct list_head next_member;
97 struct btrfs_qgroup *group;
98 struct btrfs_qgroup *member;
99};
100
b382a324
JS
101static int
102qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
103 int init_flags);
104static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
2f232036 105
58400fce 106/* must be called with qgroup_ioctl_lock held */
bed92eae
AJ
107static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
108 u64 qgroupid)
109{
110 struct rb_node *n = fs_info->qgroup_tree.rb_node;
111 struct btrfs_qgroup *qgroup;
112
113 while (n) {
114 qgroup = rb_entry(n, struct btrfs_qgroup, node);
115 if (qgroup->qgroupid < qgroupid)
116 n = n->rb_left;
117 else if (qgroup->qgroupid > qgroupid)
118 n = n->rb_right;
119 else
120 return qgroup;
121 }
122 return NULL;
123}
124
125/* must be called with qgroup_lock held */
126static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
127 u64 qgroupid)
128{
129 struct rb_node **p = &fs_info->qgroup_tree.rb_node;
130 struct rb_node *parent = NULL;
131 struct btrfs_qgroup *qgroup;
132
133 while (*p) {
134 parent = *p;
135 qgroup = rb_entry(parent, struct btrfs_qgroup, node);
136
137 if (qgroup->qgroupid < qgroupid)
138 p = &(*p)->rb_left;
139 else if (qgroup->qgroupid > qgroupid)
140 p = &(*p)->rb_right;
141 else
142 return qgroup;
143 }
144
145 qgroup = kzalloc(sizeof(*qgroup), GFP_ATOMIC);
146 if (!qgroup)
147 return ERR_PTR(-ENOMEM);
148
149 qgroup->qgroupid = qgroupid;
150 INIT_LIST_HEAD(&qgroup->groups);
151 INIT_LIST_HEAD(&qgroup->members);
152 INIT_LIST_HEAD(&qgroup->dirty);
153
154 rb_link_node(&qgroup->node, parent, p);
155 rb_insert_color(&qgroup->node, &fs_info->qgroup_tree);
156
157 return qgroup;
158}
159
4082bd3d 160static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
bed92eae 161{
bed92eae
AJ
162 struct btrfs_qgroup_list *list;
163
bed92eae 164 list_del(&qgroup->dirty);
bed92eae
AJ
165 while (!list_empty(&qgroup->groups)) {
166 list = list_first_entry(&qgroup->groups,
167 struct btrfs_qgroup_list, next_group);
168 list_del(&list->next_group);
169 list_del(&list->next_member);
170 kfree(list);
171 }
172
173 while (!list_empty(&qgroup->members)) {
174 list = list_first_entry(&qgroup->members,
175 struct btrfs_qgroup_list, next_member);
176 list_del(&list->next_group);
177 list_del(&list->next_member);
178 kfree(list);
179 }
180 kfree(qgroup);
4082bd3d 181}
bed92eae 182
4082bd3d
WS
183/* must be called with qgroup_lock held */
184static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
185{
186 struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
187
188 if (!qgroup)
189 return -ENOENT;
190
191 rb_erase(&qgroup->node, &fs_info->qgroup_tree);
192 __del_qgroup_rb(qgroup);
bed92eae
AJ
193 return 0;
194}
195
196/* must be called with qgroup_lock held */
197static int add_relation_rb(struct btrfs_fs_info *fs_info,
198 u64 memberid, u64 parentid)
199{
200 struct btrfs_qgroup *member;
201 struct btrfs_qgroup *parent;
202 struct btrfs_qgroup_list *list;
203
204 member = find_qgroup_rb(fs_info, memberid);
205 parent = find_qgroup_rb(fs_info, parentid);
206 if (!member || !parent)
207 return -ENOENT;
208
209 list = kzalloc(sizeof(*list), GFP_ATOMIC);
210 if (!list)
211 return -ENOMEM;
212
213 list->group = parent;
214 list->member = member;
215 list_add_tail(&list->next_group, &member->groups);
216 list_add_tail(&list->next_member, &parent->members);
217
218 return 0;
219}
220
221/* must be called with qgroup_lock held */
222static int del_relation_rb(struct btrfs_fs_info *fs_info,
223 u64 memberid, u64 parentid)
224{
225 struct btrfs_qgroup *member;
226 struct btrfs_qgroup *parent;
227 struct btrfs_qgroup_list *list;
228
229 member = find_qgroup_rb(fs_info, memberid);
230 parent = find_qgroup_rb(fs_info, parentid);
231 if (!member || !parent)
232 return -ENOENT;
233
234 list_for_each_entry(list, &member->groups, next_group) {
235 if (list->group == parent) {
236 list_del(&list->next_group);
237 list_del(&list->next_member);
238 kfree(list);
239 return 0;
240 }
241 }
242 return -ENOENT;
243}
244
245/*
246 * The full config is read in one go, only called from open_ctree()
247 * It doesn't use any locking, as at this point we're still single-threaded
248 */
249int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
250{
251 struct btrfs_key key;
252 struct btrfs_key found_key;
253 struct btrfs_root *quota_root = fs_info->quota_root;
254 struct btrfs_path *path = NULL;
255 struct extent_buffer *l;
256 int slot;
257 int ret = 0;
258 u64 flags = 0;
b382a324 259 u64 rescan_progress = 0;
bed92eae
AJ
260
261 if (!fs_info->quota_enabled)
262 return 0;
263
1e8f9158
WS
264 fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
265 if (!fs_info->qgroup_ulist) {
266 ret = -ENOMEM;
267 goto out;
268 }
269
bed92eae
AJ
270 path = btrfs_alloc_path();
271 if (!path) {
272 ret = -ENOMEM;
273 goto out;
274 }
275
276 /* default this to quota off, in case no status key is found */
277 fs_info->qgroup_flags = 0;
278
279 /*
280 * pass 1: read status, all qgroup infos and limits
281 */
282 key.objectid = 0;
283 key.type = 0;
284 key.offset = 0;
285 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 1);
286 if (ret)
287 goto out;
288
289 while (1) {
290 struct btrfs_qgroup *qgroup;
291
292 slot = path->slots[0];
293 l = path->nodes[0];
294 btrfs_item_key_to_cpu(l, &found_key, slot);
295
296 if (found_key.type == BTRFS_QGROUP_STATUS_KEY) {
297 struct btrfs_qgroup_status_item *ptr;
298
299 ptr = btrfs_item_ptr(l, slot,
300 struct btrfs_qgroup_status_item);
301
302 if (btrfs_qgroup_status_version(l, ptr) !=
303 BTRFS_QGROUP_STATUS_VERSION) {
304 printk(KERN_ERR
305 "btrfs: old qgroup version, quota disabled\n");
306 goto out;
307 }
308 if (btrfs_qgroup_status_generation(l, ptr) !=
309 fs_info->generation) {
310 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
311 printk(KERN_ERR
312 "btrfs: qgroup generation mismatch, "
313 "marked as inconsistent\n");
314 }
315 fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
316 ptr);
b382a324 317 rescan_progress = btrfs_qgroup_status_rescan(l, ptr);
bed92eae
AJ
318 goto next1;
319 }
320
321 if (found_key.type != BTRFS_QGROUP_INFO_KEY &&
322 found_key.type != BTRFS_QGROUP_LIMIT_KEY)
323 goto next1;
324
325 qgroup = find_qgroup_rb(fs_info, found_key.offset);
326 if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
327 (!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
328 printk(KERN_ERR "btrfs: inconsitent qgroup config\n");
329 flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
330 }
331 if (!qgroup) {
332 qgroup = add_qgroup_rb(fs_info, found_key.offset);
333 if (IS_ERR(qgroup)) {
334 ret = PTR_ERR(qgroup);
335 goto out;
336 }
337 }
338 switch (found_key.type) {
339 case BTRFS_QGROUP_INFO_KEY: {
340 struct btrfs_qgroup_info_item *ptr;
341
342 ptr = btrfs_item_ptr(l, slot,
343 struct btrfs_qgroup_info_item);
344 qgroup->rfer = btrfs_qgroup_info_rfer(l, ptr);
345 qgroup->rfer_cmpr = btrfs_qgroup_info_rfer_cmpr(l, ptr);
346 qgroup->excl = btrfs_qgroup_info_excl(l, ptr);
347 qgroup->excl_cmpr = btrfs_qgroup_info_excl_cmpr(l, ptr);
348 /* generation currently unused */
349 break;
350 }
351 case BTRFS_QGROUP_LIMIT_KEY: {
352 struct btrfs_qgroup_limit_item *ptr;
353
354 ptr = btrfs_item_ptr(l, slot,
355 struct btrfs_qgroup_limit_item);
356 qgroup->lim_flags = btrfs_qgroup_limit_flags(l, ptr);
357 qgroup->max_rfer = btrfs_qgroup_limit_max_rfer(l, ptr);
358 qgroup->max_excl = btrfs_qgroup_limit_max_excl(l, ptr);
359 qgroup->rsv_rfer = btrfs_qgroup_limit_rsv_rfer(l, ptr);
360 qgroup->rsv_excl = btrfs_qgroup_limit_rsv_excl(l, ptr);
361 break;
362 }
363 }
364next1:
365 ret = btrfs_next_item(quota_root, path);
366 if (ret < 0)
367 goto out;
368 if (ret)
369 break;
370 }
371 btrfs_release_path(path);
372
373 /*
374 * pass 2: read all qgroup relations
375 */
376 key.objectid = 0;
377 key.type = BTRFS_QGROUP_RELATION_KEY;
378 key.offset = 0;
379 ret = btrfs_search_slot_for_read(quota_root, &key, path, 1, 0);
380 if (ret)
381 goto out;
382 while (1) {
383 slot = path->slots[0];
384 l = path->nodes[0];
385 btrfs_item_key_to_cpu(l, &found_key, slot);
386
387 if (found_key.type != BTRFS_QGROUP_RELATION_KEY)
388 goto next2;
389
390 if (found_key.objectid > found_key.offset) {
391 /* parent <- member, not needed to build config */
392 /* FIXME should we omit the key completely? */
393 goto next2;
394 }
395
396 ret = add_relation_rb(fs_info, found_key.objectid,
397 found_key.offset);
ff24858c
AJ
398 if (ret == -ENOENT) {
399 printk(KERN_WARNING
400 "btrfs: orphan qgroup relation 0x%llx->0x%llx\n",
401 (unsigned long long)found_key.objectid,
402 (unsigned long long)found_key.offset);
403 ret = 0; /* ignore the error */
404 }
bed92eae
AJ
405 if (ret)
406 goto out;
407next2:
408 ret = btrfs_next_item(quota_root, path);
409 if (ret < 0)
410 goto out;
411 if (ret)
412 break;
413 }
414out:
415 fs_info->qgroup_flags |= flags;
416 if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) {
417 fs_info->quota_enabled = 0;
418 fs_info->pending_quota_state = 0;
b382a324
JS
419 } else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
420 ret >= 0) {
421 ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
bed92eae
AJ
422 }
423 btrfs_free_path(path);
424
eb1716af 425 if (ret < 0) {
1e8f9158 426 ulist_free(fs_info->qgroup_ulist);
eb1716af 427 fs_info->qgroup_ulist = NULL;
b382a324 428 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
eb1716af 429 }
1e8f9158 430
bed92eae
AJ
431 return ret < 0 ? ret : 0;
432}
433
434/*
e685da14
WS
435 * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
436 * first two are in single-threaded paths.And for the third one, we have set
437 * quota_root to be null with qgroup_lock held before, so it is safe to clean
438 * up the in-memory structures without qgroup_lock held.
bed92eae
AJ
439 */
440void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
441{
442 struct rb_node *n;
443 struct btrfs_qgroup *qgroup;
bed92eae
AJ
444
445 while ((n = rb_first(&fs_info->qgroup_tree))) {
446 qgroup = rb_entry(n, struct btrfs_qgroup, node);
447 rb_erase(n, &fs_info->qgroup_tree);
4082bd3d 448 __del_qgroup_rb(qgroup);
bed92eae 449 }
1e7bac1e
WS
450 /*
451 * we call btrfs_free_qgroup_config() when umounting
452 * filesystem and disabling quota, so we set qgroup_ulit
453 * to be null here to avoid double free.
454 */
1e8f9158 455 ulist_free(fs_info->qgroup_ulist);
1e7bac1e 456 fs_info->qgroup_ulist = NULL;
bed92eae
AJ
457}
458
459static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
460 struct btrfs_root *quota_root,
461 u64 src, u64 dst)
462{
463 int ret;
464 struct btrfs_path *path;
465 struct btrfs_key key;
466
467 path = btrfs_alloc_path();
468 if (!path)
469 return -ENOMEM;
470
471 key.objectid = src;
472 key.type = BTRFS_QGROUP_RELATION_KEY;
473 key.offset = dst;
474
475 ret = btrfs_insert_empty_item(trans, quota_root, path, &key, 0);
476
477 btrfs_mark_buffer_dirty(path->nodes[0]);
478
479 btrfs_free_path(path);
480 return ret;
481}
482
483static int del_qgroup_relation_item(struct btrfs_trans_handle *trans,
484 struct btrfs_root *quota_root,
485 u64 src, u64 dst)
486{
487 int ret;
488 struct btrfs_path *path;
489 struct btrfs_key key;
490
491 path = btrfs_alloc_path();
492 if (!path)
493 return -ENOMEM;
494
495 key.objectid = src;
496 key.type = BTRFS_QGROUP_RELATION_KEY;
497 key.offset = dst;
498
499 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
500 if (ret < 0)
501 goto out;
502
503 if (ret > 0) {
504 ret = -ENOENT;
505 goto out;
506 }
507
508 ret = btrfs_del_item(trans, quota_root, path);
509out:
510 btrfs_free_path(path);
511 return ret;
512}
513
514static int add_qgroup_item(struct btrfs_trans_handle *trans,
515 struct btrfs_root *quota_root, u64 qgroupid)
516{
517 int ret;
518 struct btrfs_path *path;
519 struct btrfs_qgroup_info_item *qgroup_info;
520 struct btrfs_qgroup_limit_item *qgroup_limit;
521 struct extent_buffer *leaf;
522 struct btrfs_key key;
523
524 path = btrfs_alloc_path();
525 if (!path)
526 return -ENOMEM;
527
528 key.objectid = 0;
529 key.type = BTRFS_QGROUP_INFO_KEY;
530 key.offset = qgroupid;
531
532 ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
533 sizeof(*qgroup_info));
534 if (ret)
535 goto out;
536
537 leaf = path->nodes[0];
538 qgroup_info = btrfs_item_ptr(leaf, path->slots[0],
539 struct btrfs_qgroup_info_item);
540 btrfs_set_qgroup_info_generation(leaf, qgroup_info, trans->transid);
541 btrfs_set_qgroup_info_rfer(leaf, qgroup_info, 0);
542 btrfs_set_qgroup_info_rfer_cmpr(leaf, qgroup_info, 0);
543 btrfs_set_qgroup_info_excl(leaf, qgroup_info, 0);
544 btrfs_set_qgroup_info_excl_cmpr(leaf, qgroup_info, 0);
545
546 btrfs_mark_buffer_dirty(leaf);
547
548 btrfs_release_path(path);
549
550 key.type = BTRFS_QGROUP_LIMIT_KEY;
551 ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
552 sizeof(*qgroup_limit));
553 if (ret)
554 goto out;
555
556 leaf = path->nodes[0];
557 qgroup_limit = btrfs_item_ptr(leaf, path->slots[0],
558 struct btrfs_qgroup_limit_item);
559 btrfs_set_qgroup_limit_flags(leaf, qgroup_limit, 0);
560 btrfs_set_qgroup_limit_max_rfer(leaf, qgroup_limit, 0);
561 btrfs_set_qgroup_limit_max_excl(leaf, qgroup_limit, 0);
562 btrfs_set_qgroup_limit_rsv_rfer(leaf, qgroup_limit, 0);
563 btrfs_set_qgroup_limit_rsv_excl(leaf, qgroup_limit, 0);
564
565 btrfs_mark_buffer_dirty(leaf);
566
567 ret = 0;
568out:
569 btrfs_free_path(path);
570 return ret;
571}
572
573static int del_qgroup_item(struct btrfs_trans_handle *trans,
574 struct btrfs_root *quota_root, u64 qgroupid)
575{
576 int ret;
577 struct btrfs_path *path;
578 struct btrfs_key key;
579
580 path = btrfs_alloc_path();
581 if (!path)
582 return -ENOMEM;
583
584 key.objectid = 0;
585 key.type = BTRFS_QGROUP_INFO_KEY;
586 key.offset = qgroupid;
587 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
588 if (ret < 0)
589 goto out;
590
591 if (ret > 0) {
592 ret = -ENOENT;
593 goto out;
594 }
595
596 ret = btrfs_del_item(trans, quota_root, path);
597 if (ret)
598 goto out;
599
600 btrfs_release_path(path);
601
602 key.type = BTRFS_QGROUP_LIMIT_KEY;
603 ret = btrfs_search_slot(trans, quota_root, &key, path, -1, 1);
604 if (ret < 0)
605 goto out;
606
607 if (ret > 0) {
608 ret = -ENOENT;
609 goto out;
610 }
611
612 ret = btrfs_del_item(trans, quota_root, path);
613
614out:
615 btrfs_free_path(path);
616 return ret;
617}
618
619static int update_qgroup_limit_item(struct btrfs_trans_handle *trans,
620 struct btrfs_root *root, u64 qgroupid,
621 u64 flags, u64 max_rfer, u64 max_excl,
622 u64 rsv_rfer, u64 rsv_excl)
623{
624 struct btrfs_path *path;
625 struct btrfs_key key;
626 struct extent_buffer *l;
627 struct btrfs_qgroup_limit_item *qgroup_limit;
628 int ret;
629 int slot;
630
631 key.objectid = 0;
632 key.type = BTRFS_QGROUP_LIMIT_KEY;
633 key.offset = qgroupid;
634
635 path = btrfs_alloc_path();
84cbe2f7
WS
636 if (!path)
637 return -ENOMEM;
638
bed92eae
AJ
639 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
640 if (ret > 0)
641 ret = -ENOENT;
642
643 if (ret)
644 goto out;
645
646 l = path->nodes[0];
647 slot = path->slots[0];
648 qgroup_limit = btrfs_item_ptr(l, path->slots[0],
649 struct btrfs_qgroup_limit_item);
650 btrfs_set_qgroup_limit_flags(l, qgroup_limit, flags);
651 btrfs_set_qgroup_limit_max_rfer(l, qgroup_limit, max_rfer);
652 btrfs_set_qgroup_limit_max_excl(l, qgroup_limit, max_excl);
653 btrfs_set_qgroup_limit_rsv_rfer(l, qgroup_limit, rsv_rfer);
654 btrfs_set_qgroup_limit_rsv_excl(l, qgroup_limit, rsv_excl);
655
656 btrfs_mark_buffer_dirty(l);
657
658out:
659 btrfs_free_path(path);
660 return ret;
661}
662
663static int update_qgroup_info_item(struct btrfs_trans_handle *trans,
664 struct btrfs_root *root,
665 struct btrfs_qgroup *qgroup)
666{
667 struct btrfs_path *path;
668 struct btrfs_key key;
669 struct extent_buffer *l;
670 struct btrfs_qgroup_info_item *qgroup_info;
671 int ret;
672 int slot;
673
674 key.objectid = 0;
675 key.type = BTRFS_QGROUP_INFO_KEY;
676 key.offset = qgroup->qgroupid;
677
678 path = btrfs_alloc_path();
84cbe2f7
WS
679 if (!path)
680 return -ENOMEM;
681
bed92eae
AJ
682 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
683 if (ret > 0)
684 ret = -ENOENT;
685
686 if (ret)
687 goto out;
688
689 l = path->nodes[0];
690 slot = path->slots[0];
691 qgroup_info = btrfs_item_ptr(l, path->slots[0],
692 struct btrfs_qgroup_info_item);
693 btrfs_set_qgroup_info_generation(l, qgroup_info, trans->transid);
694 btrfs_set_qgroup_info_rfer(l, qgroup_info, qgroup->rfer);
695 btrfs_set_qgroup_info_rfer_cmpr(l, qgroup_info, qgroup->rfer_cmpr);
696 btrfs_set_qgroup_info_excl(l, qgroup_info, qgroup->excl);
697 btrfs_set_qgroup_info_excl_cmpr(l, qgroup_info, qgroup->excl_cmpr);
698
699 btrfs_mark_buffer_dirty(l);
700
701out:
702 btrfs_free_path(path);
703 return ret;
704}
705
706static int update_qgroup_status_item(struct btrfs_trans_handle *trans,
707 struct btrfs_fs_info *fs_info,
708 struct btrfs_root *root)
709{
710 struct btrfs_path *path;
711 struct btrfs_key key;
712 struct extent_buffer *l;
713 struct btrfs_qgroup_status_item *ptr;
714 int ret;
715 int slot;
716
717 key.objectid = 0;
718 key.type = BTRFS_QGROUP_STATUS_KEY;
719 key.offset = 0;
720
721 path = btrfs_alloc_path();
84cbe2f7
WS
722 if (!path)
723 return -ENOMEM;
724
bed92eae
AJ
725 ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
726 if (ret > 0)
727 ret = -ENOENT;
728
729 if (ret)
730 goto out;
731
732 l = path->nodes[0];
733 slot = path->slots[0];
734 ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
735 btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
736 btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
2f232036
JS
737 btrfs_set_qgroup_status_rescan(l, ptr,
738 fs_info->qgroup_rescan_progress.objectid);
bed92eae
AJ
739
740 btrfs_mark_buffer_dirty(l);
741
742out:
743 btrfs_free_path(path);
744 return ret;
745}
746
747/*
748 * called with qgroup_lock held
749 */
750static int btrfs_clean_quota_tree(struct btrfs_trans_handle *trans,
751 struct btrfs_root *root)
752{
753 struct btrfs_path *path;
754 struct btrfs_key key;
06b3a860 755 struct extent_buffer *leaf = NULL;
bed92eae 756 int ret;
06b3a860 757 int nr = 0;
bed92eae 758
bed92eae
AJ
759 path = btrfs_alloc_path();
760 if (!path)
761 return -ENOMEM;
762
06b3a860
WS
763 path->leave_spinning = 1;
764
765 key.objectid = 0;
766 key.offset = 0;
767 key.type = 0;
bed92eae 768
06b3a860 769 while (1) {
bed92eae 770 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
06b3a860
WS
771 if (ret < 0)
772 goto out;
773 leaf = path->nodes[0];
774 nr = btrfs_header_nritems(leaf);
775 if (!nr)
bed92eae 776 break;
06b3a860
WS
777 /*
778 * delete the leaf one by one
779 * since the whole tree is going
780 * to be deleted.
781 */
782 path->slots[0] = 0;
783 ret = btrfs_del_items(trans, root, path, 0, nr);
bed92eae
AJ
784 if (ret)
785 goto out;
06b3a860 786
bed92eae
AJ
787 btrfs_release_path(path);
788 }
789 ret = 0;
790out:
791 root->fs_info->pending_quota_state = 0;
792 btrfs_free_path(path);
793 return ret;
794}
795
796int btrfs_quota_enable(struct btrfs_trans_handle *trans,
797 struct btrfs_fs_info *fs_info)
798{
799 struct btrfs_root *quota_root;
7708f029 800 struct btrfs_root *tree_root = fs_info->tree_root;
bed92eae
AJ
801 struct btrfs_path *path = NULL;
802 struct btrfs_qgroup_status_item *ptr;
803 struct extent_buffer *leaf;
804 struct btrfs_key key;
7708f029
WS
805 struct btrfs_key found_key;
806 struct btrfs_qgroup *qgroup = NULL;
bed92eae 807 int ret = 0;
7708f029 808 int slot;
bed92eae 809
f2f6ed3d 810 mutex_lock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
811 if (fs_info->quota_root) {
812 fs_info->pending_quota_state = 1;
bed92eae
AJ
813 goto out;
814 }
bed92eae 815
1e8f9158
WS
816 fs_info->qgroup_ulist = ulist_alloc(GFP_NOFS);
817 if (!fs_info->qgroup_ulist) {
818 ret = -ENOMEM;
819 goto out;
820 }
821
bed92eae
AJ
822 /*
823 * initially create the quota tree
824 */
825 quota_root = btrfs_create_tree(trans, fs_info,
826 BTRFS_QUOTA_TREE_OBJECTID);
827 if (IS_ERR(quota_root)) {
828 ret = PTR_ERR(quota_root);
829 goto out;
830 }
831
832 path = btrfs_alloc_path();
5b7ff5b3
TI
833 if (!path) {
834 ret = -ENOMEM;
835 goto out_free_root;
836 }
bed92eae
AJ
837
838 key.objectid = 0;
839 key.type = BTRFS_QGROUP_STATUS_KEY;
840 key.offset = 0;
841
842 ret = btrfs_insert_empty_item(trans, quota_root, path, &key,
843 sizeof(*ptr));
844 if (ret)
5b7ff5b3 845 goto out_free_path;
bed92eae
AJ
846
847 leaf = path->nodes[0];
848 ptr = btrfs_item_ptr(leaf, path->slots[0],
849 struct btrfs_qgroup_status_item);
850 btrfs_set_qgroup_status_generation(leaf, ptr, trans->transid);
851 btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
852 fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
853 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
854 btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
2f232036 855 btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
bed92eae
AJ
856
857 btrfs_mark_buffer_dirty(leaf);
858
7708f029
WS
859 key.objectid = 0;
860 key.type = BTRFS_ROOT_REF_KEY;
861 key.offset = 0;
862
863 btrfs_release_path(path);
864 ret = btrfs_search_slot_for_read(tree_root, &key, path, 1, 0);
865 if (ret > 0)
866 goto out_add_root;
867 if (ret < 0)
868 goto out_free_path;
869
870
871 while (1) {
872 slot = path->slots[0];
873 leaf = path->nodes[0];
874 btrfs_item_key_to_cpu(leaf, &found_key, slot);
875
876 if (found_key.type == BTRFS_ROOT_REF_KEY) {
877 ret = add_qgroup_item(trans, quota_root,
878 found_key.offset);
879 if (ret)
880 goto out_free_path;
881
7708f029
WS
882 qgroup = add_qgroup_rb(fs_info, found_key.offset);
883 if (IS_ERR(qgroup)) {
7708f029
WS
884 ret = PTR_ERR(qgroup);
885 goto out_free_path;
886 }
7708f029
WS
887 }
888 ret = btrfs_next_item(tree_root, path);
889 if (ret < 0)
890 goto out_free_path;
891 if (ret)
892 break;
893 }
894
895out_add_root:
896 btrfs_release_path(path);
897 ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
898 if (ret)
899 goto out_free_path;
900
7708f029
WS
901 qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
902 if (IS_ERR(qgroup)) {
7708f029
WS
903 ret = PTR_ERR(qgroup);
904 goto out_free_path;
905 }
58400fce 906 spin_lock(&fs_info->qgroup_lock);
bed92eae
AJ
907 fs_info->quota_root = quota_root;
908 fs_info->pending_quota_state = 1;
909 spin_unlock(&fs_info->qgroup_lock);
5b7ff5b3 910out_free_path:
bed92eae 911 btrfs_free_path(path);
5b7ff5b3
TI
912out_free_root:
913 if (ret) {
914 free_extent_buffer(quota_root->node);
915 free_extent_buffer(quota_root->commit_root);
916 kfree(quota_root);
917 }
918out:
eb1716af 919 if (ret) {
1e8f9158 920 ulist_free(fs_info->qgroup_ulist);
eb1716af
JS
921 fs_info->qgroup_ulist = NULL;
922 }
f2f6ed3d 923 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
924 return ret;
925}
926
927int btrfs_quota_disable(struct btrfs_trans_handle *trans,
928 struct btrfs_fs_info *fs_info)
929{
930 struct btrfs_root *tree_root = fs_info->tree_root;
931 struct btrfs_root *quota_root;
932 int ret = 0;
933
f2f6ed3d 934 mutex_lock(&fs_info->qgroup_ioctl_lock);
58400fce 935 if (!fs_info->quota_root)
f2f6ed3d 936 goto out;
58400fce 937 spin_lock(&fs_info->qgroup_lock);
bed92eae
AJ
938 fs_info->quota_enabled = 0;
939 fs_info->pending_quota_state = 0;
940 quota_root = fs_info->quota_root;
941 fs_info->quota_root = NULL;
bed92eae
AJ
942 spin_unlock(&fs_info->qgroup_lock);
943
e685da14
WS
944 btrfs_free_qgroup_config(fs_info);
945
bed92eae
AJ
946 ret = btrfs_clean_quota_tree(trans, quota_root);
947 if (ret)
948 goto out;
949
950 ret = btrfs_del_root(trans, tree_root, &quota_root->root_key);
951 if (ret)
952 goto out;
953
954 list_del(&quota_root->dirty_list);
955
956 btrfs_tree_lock(quota_root->node);
957 clean_tree_block(trans, tree_root, quota_root->node);
958 btrfs_tree_unlock(quota_root->node);
959 btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
960
961 free_extent_buffer(quota_root->node);
962 free_extent_buffer(quota_root->commit_root);
963 kfree(quota_root);
964out:
f2f6ed3d 965 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
966 return ret;
967}
968
2f232036
JS
969static void qgroup_dirty(struct btrfs_fs_info *fs_info,
970 struct btrfs_qgroup *qgroup)
bed92eae 971{
2f232036
JS
972 if (list_empty(&qgroup->dirty))
973 list_add(&qgroup->dirty, &fs_info->dirty_qgroups);
bed92eae
AJ
974}
975
976int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
977 struct btrfs_fs_info *fs_info, u64 src, u64 dst)
978{
979 struct btrfs_root *quota_root;
b7fef4f5
WS
980 struct btrfs_qgroup *parent;
981 struct btrfs_qgroup *member;
534e6623 982 struct btrfs_qgroup_list *list;
bed92eae
AJ
983 int ret = 0;
984
f2f6ed3d 985 mutex_lock(&fs_info->qgroup_ioctl_lock);
bed92eae 986 quota_root = fs_info->quota_root;
f2f6ed3d
WS
987 if (!quota_root) {
988 ret = -EINVAL;
989 goto out;
990 }
b7fef4f5
WS
991 member = find_qgroup_rb(fs_info, src);
992 parent = find_qgroup_rb(fs_info, dst);
993 if (!member || !parent) {
994 ret = -EINVAL;
995 goto out;
996 }
bed92eae 997
534e6623
WS
998 /* check if such qgroup relation exist firstly */
999 list_for_each_entry(list, &member->groups, next_group) {
1000 if (list->group == parent) {
1001 ret = -EEXIST;
1002 goto out;
1003 }
1004 }
1005
bed92eae
AJ
1006 ret = add_qgroup_relation_item(trans, quota_root, src, dst);
1007 if (ret)
f2f6ed3d 1008 goto out;
bed92eae
AJ
1009
1010 ret = add_qgroup_relation_item(trans, quota_root, dst, src);
1011 if (ret) {
1012 del_qgroup_relation_item(trans, quota_root, src, dst);
f2f6ed3d 1013 goto out;
bed92eae
AJ
1014 }
1015
1016 spin_lock(&fs_info->qgroup_lock);
1017 ret = add_relation_rb(quota_root->fs_info, src, dst);
1018 spin_unlock(&fs_info->qgroup_lock);
f2f6ed3d
WS
1019out:
1020 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
1021 return ret;
1022}
1023
1024int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
1025 struct btrfs_fs_info *fs_info, u64 src, u64 dst)
1026{
1027 struct btrfs_root *quota_root;
534e6623
WS
1028 struct btrfs_qgroup *parent;
1029 struct btrfs_qgroup *member;
1030 struct btrfs_qgroup_list *list;
bed92eae
AJ
1031 int ret = 0;
1032 int err;
1033
f2f6ed3d 1034 mutex_lock(&fs_info->qgroup_ioctl_lock);
bed92eae 1035 quota_root = fs_info->quota_root;
f2f6ed3d
WS
1036 if (!quota_root) {
1037 ret = -EINVAL;
1038 goto out;
1039 }
bed92eae 1040
534e6623
WS
1041 member = find_qgroup_rb(fs_info, src);
1042 parent = find_qgroup_rb(fs_info, dst);
1043 if (!member || !parent) {
1044 ret = -EINVAL;
1045 goto out;
1046 }
1047
1048 /* check if such qgroup relation exist firstly */
1049 list_for_each_entry(list, &member->groups, next_group) {
1050 if (list->group == parent)
1051 goto exist;
1052 }
1053 ret = -ENOENT;
1054 goto out;
1055exist:
bed92eae
AJ
1056 ret = del_qgroup_relation_item(trans, quota_root, src, dst);
1057 err = del_qgroup_relation_item(trans, quota_root, dst, src);
1058 if (err && !ret)
1059 ret = err;
1060
1061 spin_lock(&fs_info->qgroup_lock);
1062 del_relation_rb(fs_info, src, dst);
bed92eae 1063 spin_unlock(&fs_info->qgroup_lock);
f2f6ed3d
WS
1064out:
1065 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
1066 return ret;
1067}
1068
1069int btrfs_create_qgroup(struct btrfs_trans_handle *trans,
1070 struct btrfs_fs_info *fs_info, u64 qgroupid, char *name)
1071{
1072 struct btrfs_root *quota_root;
1073 struct btrfs_qgroup *qgroup;
1074 int ret = 0;
1075
f2f6ed3d 1076 mutex_lock(&fs_info->qgroup_ioctl_lock);
bed92eae 1077 quota_root = fs_info->quota_root;
f2f6ed3d
WS
1078 if (!quota_root) {
1079 ret = -EINVAL;
1080 goto out;
1081 }
534e6623
WS
1082 qgroup = find_qgroup_rb(fs_info, qgroupid);
1083 if (qgroup) {
1084 ret = -EEXIST;
1085 goto out;
1086 }
bed92eae
AJ
1087
1088 ret = add_qgroup_item(trans, quota_root, qgroupid);
534e6623
WS
1089 if (ret)
1090 goto out;
bed92eae
AJ
1091
1092 spin_lock(&fs_info->qgroup_lock);
1093 qgroup = add_qgroup_rb(fs_info, qgroupid);
1094 spin_unlock(&fs_info->qgroup_lock);
1095
1096 if (IS_ERR(qgroup))
1097 ret = PTR_ERR(qgroup);
f2f6ed3d
WS
1098out:
1099 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
1100 return ret;
1101}
1102
1103int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
1104 struct btrfs_fs_info *fs_info, u64 qgroupid)
1105{
1106 struct btrfs_root *quota_root;
2cf68703 1107 struct btrfs_qgroup *qgroup;
bed92eae
AJ
1108 int ret = 0;
1109
f2f6ed3d 1110 mutex_lock(&fs_info->qgroup_ioctl_lock);
bed92eae 1111 quota_root = fs_info->quota_root;
f2f6ed3d
WS
1112 if (!quota_root) {
1113 ret = -EINVAL;
1114 goto out;
1115 }
bed92eae 1116
2cf68703 1117 qgroup = find_qgroup_rb(fs_info, qgroupid);
534e6623
WS
1118 if (!qgroup) {
1119 ret = -ENOENT;
1120 goto out;
1121 } else {
1122 /* check if there are no relations to this qgroup */
1123 if (!list_empty(&qgroup->groups) ||
1124 !list_empty(&qgroup->members)) {
f2f6ed3d
WS
1125 ret = -EBUSY;
1126 goto out;
2cf68703
AJ
1127 }
1128 }
bed92eae
AJ
1129 ret = del_qgroup_item(trans, quota_root, qgroupid);
1130
1131 spin_lock(&fs_info->qgroup_lock);
1132 del_qgroup_rb(quota_root->fs_info, qgroupid);
bed92eae 1133 spin_unlock(&fs_info->qgroup_lock);
f2f6ed3d
WS
1134out:
1135 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
1136 return ret;
1137}
1138
1139int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
1140 struct btrfs_fs_info *fs_info, u64 qgroupid,
1141 struct btrfs_qgroup_limit *limit)
1142{
f2f6ed3d 1143 struct btrfs_root *quota_root;
bed92eae
AJ
1144 struct btrfs_qgroup *qgroup;
1145 int ret = 0;
1146
f2f6ed3d
WS
1147 mutex_lock(&fs_info->qgroup_ioctl_lock);
1148 quota_root = fs_info->quota_root;
1149 if (!quota_root) {
1150 ret = -EINVAL;
1151 goto out;
1152 }
bed92eae 1153
ddb47afa
WS
1154 qgroup = find_qgroup_rb(fs_info, qgroupid);
1155 if (!qgroup) {
1156 ret = -ENOENT;
1157 goto out;
1158 }
bed92eae
AJ
1159 ret = update_qgroup_limit_item(trans, quota_root, qgroupid,
1160 limit->flags, limit->max_rfer,
1161 limit->max_excl, limit->rsv_rfer,
1162 limit->rsv_excl);
1163 if (ret) {
1164 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1165 printk(KERN_INFO "unable to update quota limit for %llu\n",
1166 (unsigned long long)qgroupid);
1167 }
1168
58400fce 1169 spin_lock(&fs_info->qgroup_lock);
bed92eae
AJ
1170 qgroup->lim_flags = limit->flags;
1171 qgroup->max_rfer = limit->max_rfer;
1172 qgroup->max_excl = limit->max_excl;
1173 qgroup->rsv_rfer = limit->rsv_rfer;
1174 qgroup->rsv_excl = limit->rsv_excl;
bed92eae 1175 spin_unlock(&fs_info->qgroup_lock);
f2f6ed3d
WS
1176out:
1177 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
1178 return ret;
1179}
1180
bed92eae
AJ
1181/*
1182 * btrfs_qgroup_record_ref is called when the ref is added or deleted. it puts
1183 * the modification into a list that's later used by btrfs_end_transaction to
1184 * pass the recorded modifications on to btrfs_qgroup_account_ref.
1185 */
1186int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1187 struct btrfs_delayed_ref_node *node,
1188 struct btrfs_delayed_extent_op *extent_op)
1189{
1190 struct qgroup_update *u;
1191
1192 BUG_ON(!trans->delayed_ref_elem.seq);
1193 u = kmalloc(sizeof(*u), GFP_NOFS);
1194 if (!u)
1195 return -ENOMEM;
1196
1197 u->node = node;
1198 u->extent_op = extent_op;
1199 list_add_tail(&u->list, &trans->qgroup_ref_list);
1200
1201 return 0;
1202}
1203
46b665ce
JS
1204static int qgroup_account_ref_step1(struct btrfs_fs_info *fs_info,
1205 struct ulist *roots, struct ulist *tmp,
1206 u64 seq)
1207{
1208 struct ulist_node *unode;
1209 struct ulist_iterator uiter;
1210 struct ulist_node *tmp_unode;
1211 struct ulist_iterator tmp_uiter;
1212 struct btrfs_qgroup *qg;
1213 int ret;
1214
1215 ULIST_ITER_INIT(&uiter);
1216 while ((unode = ulist_next(roots, &uiter))) {
1217 qg = find_qgroup_rb(fs_info, unode->val);
1218 if (!qg)
1219 continue;
1220
1221 ulist_reinit(tmp);
1222 /* XXX id not needed */
1223 ret = ulist_add(tmp, qg->qgroupid,
1224 (u64)(uintptr_t)qg, GFP_ATOMIC);
1225 if (ret < 0)
1226 return ret;
1227 ULIST_ITER_INIT(&tmp_uiter);
1228 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1229 struct btrfs_qgroup_list *glist;
1230
1231 qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1232 if (qg->refcnt < seq)
1233 qg->refcnt = seq + 1;
1234 else
1235 ++qg->refcnt;
1236
1237 list_for_each_entry(glist, &qg->groups, next_group) {
1238 ret = ulist_add(tmp, glist->group->qgroupid,
1239 (u64)(uintptr_t)glist->group,
1240 GFP_ATOMIC);
1241 if (ret < 0)
1242 return ret;
1243 }
1244 }
1245 }
1246
1247 return 0;
1248}
1249
1250static int qgroup_account_ref_step2(struct btrfs_fs_info *fs_info,
1251 struct ulist *roots, struct ulist *tmp,
1252 u64 seq, int sgn, u64 num_bytes,
1253 struct btrfs_qgroup *qgroup)
1254{
1255 struct ulist_node *unode;
1256 struct ulist_iterator uiter;
1257 struct btrfs_qgroup *qg;
1258 struct btrfs_qgroup_list *glist;
1259 int ret;
1260
1261 ulist_reinit(tmp);
1262 ret = ulist_add(tmp, qgroup->qgroupid, (uintptr_t)qgroup, GFP_ATOMIC);
1263 if (ret < 0)
1264 return ret;
1265
1266 ULIST_ITER_INIT(&uiter);
1267 while ((unode = ulist_next(tmp, &uiter))) {
1268 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
1269 if (qg->refcnt < seq) {
1270 /* not visited by step 1 */
1271 qg->rfer += sgn * num_bytes;
1272 qg->rfer_cmpr += sgn * num_bytes;
1273 if (roots->nnodes == 0) {
1274 qg->excl += sgn * num_bytes;
1275 qg->excl_cmpr += sgn * num_bytes;
1276 }
1277 qgroup_dirty(fs_info, qg);
1278 }
1279 WARN_ON(qg->tag >= seq);
1280 qg->tag = seq;
1281
1282 list_for_each_entry(glist, &qg->groups, next_group) {
1283 ret = ulist_add(tmp, glist->group->qgroupid,
1284 (uintptr_t)glist->group, GFP_ATOMIC);
1285 if (ret < 0)
1286 return ret;
1287 }
1288 }
1289
1290 return 0;
1291}
1292
1293static int qgroup_account_ref_step3(struct btrfs_fs_info *fs_info,
1294 struct ulist *roots, struct ulist *tmp,
1295 u64 seq, int sgn, u64 num_bytes)
1296{
1297 struct ulist_node *unode;
1298 struct ulist_iterator uiter;
1299 struct btrfs_qgroup *qg;
1300 struct ulist_node *tmp_unode;
1301 struct ulist_iterator tmp_uiter;
1302 int ret;
1303
1304 ULIST_ITER_INIT(&uiter);
1305 while ((unode = ulist_next(roots, &uiter))) {
1306 qg = find_qgroup_rb(fs_info, unode->val);
1307 if (!qg)
1308 continue;
1309
1310 ulist_reinit(tmp);
1311 ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg, GFP_ATOMIC);
1312 if (ret < 0)
1313 return ret;
1314
1315 ULIST_ITER_INIT(&tmp_uiter);
1316 while ((tmp_unode = ulist_next(tmp, &tmp_uiter))) {
1317 struct btrfs_qgroup_list *glist;
1318
1319 qg = (struct btrfs_qgroup *)(uintptr_t)tmp_unode->aux;
1320 if (qg->tag == seq)
1321 continue;
1322
1323 if (qg->refcnt - seq == roots->nnodes) {
1324 qg->excl -= sgn * num_bytes;
1325 qg->excl_cmpr -= sgn * num_bytes;
1326 qgroup_dirty(fs_info, qg);
1327 }
1328
1329 list_for_each_entry(glist, &qg->groups, next_group) {
1330 ret = ulist_add(tmp, glist->group->qgroupid,
1331 (uintptr_t)glist->group,
1332 GFP_ATOMIC);
1333 if (ret < 0)
1334 return ret;
1335 }
1336 }
1337 }
1338
1339 return 0;
1340}
1341
bed92eae
AJ
1342/*
1343 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1344 * from the fs. First, all roots referencing the extent are searched, and
1345 * then the space is accounted accordingly to the different roots. The
1346 * accounting algorithm works in 3 steps documented inline.
1347 */
1348int btrfs_qgroup_account_ref(struct btrfs_trans_handle *trans,
1349 struct btrfs_fs_info *fs_info,
1350 struct btrfs_delayed_ref_node *node,
1351 struct btrfs_delayed_extent_op *extent_op)
1352{
1353 struct btrfs_key ins;
1354 struct btrfs_root *quota_root;
1355 u64 ref_root;
1356 struct btrfs_qgroup *qgroup;
bed92eae 1357 struct ulist *roots = NULL;
bed92eae
AJ
1358 u64 seq;
1359 int ret = 0;
1360 int sgn;
1361
1362 if (!fs_info->quota_enabled)
1363 return 0;
1364
1365 BUG_ON(!fs_info->quota_root);
1366
1367 ins.objectid = node->bytenr;
1368 ins.offset = node->num_bytes;
1369 ins.type = BTRFS_EXTENT_ITEM_KEY;
1370
1371 if (node->type == BTRFS_TREE_BLOCK_REF_KEY ||
1372 node->type == BTRFS_SHARED_BLOCK_REF_KEY) {
1373 struct btrfs_delayed_tree_ref *ref;
1374 ref = btrfs_delayed_node_to_tree_ref(node);
1375 ref_root = ref->root;
1376 } else if (node->type == BTRFS_EXTENT_DATA_REF_KEY ||
1377 node->type == BTRFS_SHARED_DATA_REF_KEY) {
1378 struct btrfs_delayed_data_ref *ref;
1379 ref = btrfs_delayed_node_to_data_ref(node);
1380 ref_root = ref->root;
1381 } else {
1382 BUG();
1383 }
1384
1385 if (!is_fstree(ref_root)) {
1386 /*
1387 * non-fs-trees are not being accounted
1388 */
1389 return 0;
1390 }
1391
1392 switch (node->action) {
1393 case BTRFS_ADD_DELAYED_REF:
1394 case BTRFS_ADD_DELAYED_EXTENT:
1395 sgn = 1;
fc36ed7e 1396 seq = btrfs_tree_mod_seq_prev(node->seq);
bed92eae
AJ
1397 break;
1398 case BTRFS_DROP_DELAYED_REF:
1399 sgn = -1;
fc36ed7e 1400 seq = node->seq;
bed92eae
AJ
1401 break;
1402 case BTRFS_UPDATE_DELAYED_HEAD:
1403 return 0;
1404 default:
1405 BUG();
1406 }
1407
2f232036
JS
1408 mutex_lock(&fs_info->qgroup_rescan_lock);
1409 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
1410 if (fs_info->qgroup_rescan_progress.objectid <= node->bytenr) {
1411 mutex_unlock(&fs_info->qgroup_rescan_lock);
1412 return 0;
1413 }
1414 }
1415 mutex_unlock(&fs_info->qgroup_rescan_lock);
1416
bed92eae
AJ
1417 /*
1418 * the delayed ref sequence number we pass depends on the direction of
fc36ed7e
JS
1419 * the operation. for add operations, we pass
1420 * tree_mod_log_prev_seq(node->seq) to skip
bed92eae
AJ
1421 * the delayed ref's current sequence number, because we need the state
1422 * of the tree before the add operation. for delete operations, we pass
1423 * (node->seq) to include the delayed ref's current sequence number,
1424 * because we need the state of the tree after the delete operation.
1425 */
fc36ed7e 1426 ret = btrfs_find_all_roots(trans, fs_info, node->bytenr, seq, &roots);
bed92eae 1427 if (ret < 0)
a7975026 1428 return ret;
bed92eae
AJ
1429
1430 spin_lock(&fs_info->qgroup_lock);
2f232036 1431
bed92eae
AJ
1432 quota_root = fs_info->quota_root;
1433 if (!quota_root)
1434 goto unlock;
1435
1436 qgroup = find_qgroup_rb(fs_info, ref_root);
1437 if (!qgroup)
1438 goto unlock;
1439
1440 /*
1441 * step 1: for each old ref, visit all nodes once and inc refcnt
1442 */
1e8f9158 1443 ulist_reinit(fs_info->qgroup_ulist);
bed92eae
AJ
1444 seq = fs_info->qgroup_seq;
1445 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1446
1e8f9158
WS
1447 ret = qgroup_account_ref_step1(fs_info, roots, fs_info->qgroup_ulist,
1448 seq);
46b665ce
JS
1449 if (ret)
1450 goto unlock;
bed92eae
AJ
1451
1452 /*
1453 * step 2: walk from the new root
1454 */
1e8f9158
WS
1455 ret = qgroup_account_ref_step2(fs_info, roots, fs_info->qgroup_ulist,
1456 seq, sgn, node->num_bytes, qgroup);
46b665ce 1457 if (ret)
3c97185c 1458 goto unlock;
bed92eae
AJ
1459
1460 /*
1461 * step 3: walk again from old refs
1462 */
1e8f9158
WS
1463 ret = qgroup_account_ref_step3(fs_info, roots, fs_info->qgroup_ulist,
1464 seq, sgn, node->num_bytes);
46b665ce
JS
1465 if (ret)
1466 goto unlock;
bed92eae 1467
bed92eae
AJ
1468unlock:
1469 spin_unlock(&fs_info->qgroup_lock);
bed92eae 1470 ulist_free(roots);
bed92eae
AJ
1471
1472 return ret;
1473}
1474
1475/*
1476 * called from commit_transaction. Writes all changed qgroups to disk.
1477 */
1478int btrfs_run_qgroups(struct btrfs_trans_handle *trans,
1479 struct btrfs_fs_info *fs_info)
1480{
1481 struct btrfs_root *quota_root = fs_info->quota_root;
1482 int ret = 0;
3d7b5a28 1483 int start_rescan_worker = 0;
bed92eae
AJ
1484
1485 if (!quota_root)
1486 goto out;
1487
3d7b5a28
JS
1488 if (!fs_info->quota_enabled && fs_info->pending_quota_state)
1489 start_rescan_worker = 1;
1490
bed92eae
AJ
1491 fs_info->quota_enabled = fs_info->pending_quota_state;
1492
1493 spin_lock(&fs_info->qgroup_lock);
1494 while (!list_empty(&fs_info->dirty_qgroups)) {
1495 struct btrfs_qgroup *qgroup;
1496 qgroup = list_first_entry(&fs_info->dirty_qgroups,
1497 struct btrfs_qgroup, dirty);
1498 list_del_init(&qgroup->dirty);
1499 spin_unlock(&fs_info->qgroup_lock);
1500 ret = update_qgroup_info_item(trans, quota_root, qgroup);
1501 if (ret)
1502 fs_info->qgroup_flags |=
1503 BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1504 spin_lock(&fs_info->qgroup_lock);
1505 }
1506 if (fs_info->quota_enabled)
1507 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_ON;
1508 else
1509 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
1510 spin_unlock(&fs_info->qgroup_lock);
1511
1512 ret = update_qgroup_status_item(trans, fs_info, quota_root);
1513 if (ret)
1514 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
1515
3d7b5a28 1516 if (!ret && start_rescan_worker) {
b382a324
JS
1517 ret = qgroup_rescan_init(fs_info, 0, 1);
1518 if (!ret) {
1519 qgroup_rescan_zero_tracking(fs_info);
1520 btrfs_queue_worker(&fs_info->qgroup_rescan_workers,
1521 &fs_info->qgroup_rescan_work);
1522 }
3d7b5a28
JS
1523 ret = 0;
1524 }
1525
bed92eae
AJ
1526out:
1527
1528 return ret;
1529}
1530
1531/*
1532 * copy the acounting information between qgroups. This is necessary when a
1533 * snapshot or a subvolume is created
1534 */
1535int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
1536 struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
1537 struct btrfs_qgroup_inherit *inherit)
1538{
1539 int ret = 0;
1540 int i;
1541 u64 *i_qgroups;
1542 struct btrfs_root *quota_root = fs_info->quota_root;
1543 struct btrfs_qgroup *srcgroup;
1544 struct btrfs_qgroup *dstgroup;
1545 u32 level_size = 0;
3f5e2d3b 1546 u64 nums;
bed92eae 1547
f2f6ed3d 1548 mutex_lock(&fs_info->qgroup_ioctl_lock);
bed92eae 1549 if (!fs_info->quota_enabled)
f2f6ed3d 1550 goto out;
bed92eae 1551
f2f6ed3d
WS
1552 if (!quota_root) {
1553 ret = -EINVAL;
1554 goto out;
1555 }
bed92eae 1556
3f5e2d3b
WS
1557 if (inherit) {
1558 i_qgroups = (u64 *)(inherit + 1);
1559 nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
1560 2 * inherit->num_excl_copies;
1561 for (i = 0; i < nums; ++i) {
1562 srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
1563 if (!srcgroup) {
1564 ret = -EINVAL;
1565 goto out;
1566 }
1567 ++i_qgroups;
1568 }
1569 }
1570
bed92eae
AJ
1571 /*
1572 * create a tracking group for the subvol itself
1573 */
1574 ret = add_qgroup_item(trans, quota_root, objectid);
1575 if (ret)
1576 goto out;
1577
1578 if (inherit && inherit->flags & BTRFS_QGROUP_INHERIT_SET_LIMITS) {
1579 ret = update_qgroup_limit_item(trans, quota_root, objectid,
1580 inherit->lim.flags,
1581 inherit->lim.max_rfer,
1582 inherit->lim.max_excl,
1583 inherit->lim.rsv_rfer,
1584 inherit->lim.rsv_excl);
1585 if (ret)
1586 goto out;
1587 }
1588
1589 if (srcid) {
1590 struct btrfs_root *srcroot;
1591 struct btrfs_key srckey;
1592 int srcroot_level;
1593
1594 srckey.objectid = srcid;
1595 srckey.type = BTRFS_ROOT_ITEM_KEY;
1596 srckey.offset = (u64)-1;
1597 srcroot = btrfs_read_fs_root_no_name(fs_info, &srckey);
1598 if (IS_ERR(srcroot)) {
1599 ret = PTR_ERR(srcroot);
1600 goto out;
1601 }
1602
1603 rcu_read_lock();
1604 srcroot_level = btrfs_header_level(srcroot->node);
1605 level_size = btrfs_level_size(srcroot, srcroot_level);
1606 rcu_read_unlock();
1607 }
1608
1609 /*
1610 * add qgroup to all inherited groups
1611 */
1612 if (inherit) {
1613 i_qgroups = (u64 *)(inherit + 1);
1614 for (i = 0; i < inherit->num_qgroups; ++i) {
1615 ret = add_qgroup_relation_item(trans, quota_root,
1616 objectid, *i_qgroups);
1617 if (ret)
1618 goto out;
1619 ret = add_qgroup_relation_item(trans, quota_root,
1620 *i_qgroups, objectid);
1621 if (ret)
1622 goto out;
1623 ++i_qgroups;
1624 }
1625 }
1626
1627
1628 spin_lock(&fs_info->qgroup_lock);
1629
1630 dstgroup = add_qgroup_rb(fs_info, objectid);
57a5a882
DC
1631 if (IS_ERR(dstgroup)) {
1632 ret = PTR_ERR(dstgroup);
bed92eae 1633 goto unlock;
57a5a882 1634 }
bed92eae
AJ
1635
1636 if (srcid) {
1637 srcgroup = find_qgroup_rb(fs_info, srcid);
f3a87f1b 1638 if (!srcgroup)
bed92eae
AJ
1639 goto unlock;
1640 dstgroup->rfer = srcgroup->rfer - level_size;
1641 dstgroup->rfer_cmpr = srcgroup->rfer_cmpr - level_size;
1642 srcgroup->excl = level_size;
1643 srcgroup->excl_cmpr = level_size;
1644 qgroup_dirty(fs_info, dstgroup);
1645 qgroup_dirty(fs_info, srcgroup);
1646 }
1647
f3a87f1b 1648 if (!inherit)
bed92eae
AJ
1649 goto unlock;
1650
1651 i_qgroups = (u64 *)(inherit + 1);
1652 for (i = 0; i < inherit->num_qgroups; ++i) {
1653 ret = add_relation_rb(quota_root->fs_info, objectid,
1654 *i_qgroups);
1655 if (ret)
1656 goto unlock;
1657 ++i_qgroups;
1658 }
1659
1660 for (i = 0; i < inherit->num_ref_copies; ++i) {
1661 struct btrfs_qgroup *src;
1662 struct btrfs_qgroup *dst;
1663
1664 src = find_qgroup_rb(fs_info, i_qgroups[0]);
1665 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1666
1667 if (!src || !dst) {
1668 ret = -EINVAL;
1669 goto unlock;
1670 }
1671
1672 dst->rfer = src->rfer - level_size;
1673 dst->rfer_cmpr = src->rfer_cmpr - level_size;
1674 i_qgroups += 2;
1675 }
1676 for (i = 0; i < inherit->num_excl_copies; ++i) {
1677 struct btrfs_qgroup *src;
1678 struct btrfs_qgroup *dst;
1679
1680 src = find_qgroup_rb(fs_info, i_qgroups[0]);
1681 dst = find_qgroup_rb(fs_info, i_qgroups[1]);
1682
1683 if (!src || !dst) {
1684 ret = -EINVAL;
1685 goto unlock;
1686 }
1687
1688 dst->excl = src->excl + level_size;
1689 dst->excl_cmpr = src->excl_cmpr + level_size;
1690 i_qgroups += 2;
1691 }
1692
1693unlock:
1694 spin_unlock(&fs_info->qgroup_lock);
1695out:
f2f6ed3d 1696 mutex_unlock(&fs_info->qgroup_ioctl_lock);
bed92eae
AJ
1697 return ret;
1698}
1699
1700/*
1701 * reserve some space for a qgroup and all its parents. The reservation takes
1702 * place with start_transaction or dealloc_reserve, similar to ENOSPC
1703 * accounting. If not enough space is available, EDQUOT is returned.
1704 * We assume that the requested space is new for all qgroups.
1705 */
1706int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
1707{
1708 struct btrfs_root *quota_root;
1709 struct btrfs_qgroup *qgroup;
1710 struct btrfs_fs_info *fs_info = root->fs_info;
1711 u64 ref_root = root->root_key.objectid;
1712 int ret = 0;
bed92eae
AJ
1713 struct ulist_node *unode;
1714 struct ulist_iterator uiter;
1715
1716 if (!is_fstree(ref_root))
1717 return 0;
1718
1719 if (num_bytes == 0)
1720 return 0;
1721
1722 spin_lock(&fs_info->qgroup_lock);
1723 quota_root = fs_info->quota_root;
1724 if (!quota_root)
1725 goto out;
1726
1727 qgroup = find_qgroup_rb(fs_info, ref_root);
1728 if (!qgroup)
1729 goto out;
1730
1731 /*
1732 * in a first step, we check all affected qgroups if any limits would
1733 * be exceeded
1734 */
1e8f9158
WS
1735 ulist_reinit(fs_info->qgroup_ulist);
1736 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
3c97185c
WS
1737 (uintptr_t)qgroup, GFP_ATOMIC);
1738 if (ret < 0)
1739 goto out;
bed92eae 1740 ULIST_ITER_INIT(&uiter);
1e8f9158 1741 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
bed92eae
AJ
1742 struct btrfs_qgroup *qg;
1743 struct btrfs_qgroup_list *glist;
1744
995e01b7 1745 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
bed92eae
AJ
1746
1747 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
b4fcd6be 1748 qg->reserved + (s64)qg->rfer + num_bytes >
720f1e20 1749 qg->max_rfer) {
bed92eae 1750 ret = -EDQUOT;
720f1e20
WS
1751 goto out;
1752 }
bed92eae
AJ
1753
1754 if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
b4fcd6be 1755 qg->reserved + (s64)qg->excl + num_bytes >
720f1e20 1756 qg->max_excl) {
bed92eae 1757 ret = -EDQUOT;
720f1e20
WS
1758 goto out;
1759 }
bed92eae
AJ
1760
1761 list_for_each_entry(glist, &qg->groups, next_group) {
1e8f9158
WS
1762 ret = ulist_add(fs_info->qgroup_ulist,
1763 glist->group->qgroupid,
3c97185c
WS
1764 (uintptr_t)glist->group, GFP_ATOMIC);
1765 if (ret < 0)
1766 goto out;
bed92eae
AJ
1767 }
1768 }
3c97185c 1769 ret = 0;
bed92eae
AJ
1770 /*
1771 * no limits exceeded, now record the reservation into all qgroups
1772 */
1773 ULIST_ITER_INIT(&uiter);
1e8f9158 1774 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
bed92eae
AJ
1775 struct btrfs_qgroup *qg;
1776
995e01b7 1777 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
bed92eae
AJ
1778
1779 qg->reserved += num_bytes;
1780 }
1781
1782out:
1783 spin_unlock(&fs_info->qgroup_lock);
bed92eae
AJ
1784 return ret;
1785}
1786
1787void btrfs_qgroup_free(struct btrfs_root *root, u64 num_bytes)
1788{
1789 struct btrfs_root *quota_root;
1790 struct btrfs_qgroup *qgroup;
1791 struct btrfs_fs_info *fs_info = root->fs_info;
bed92eae
AJ
1792 struct ulist_node *unode;
1793 struct ulist_iterator uiter;
1794 u64 ref_root = root->root_key.objectid;
3c97185c 1795 int ret = 0;
bed92eae
AJ
1796
1797 if (!is_fstree(ref_root))
1798 return;
1799
1800 if (num_bytes == 0)
1801 return;
1802
1803 spin_lock(&fs_info->qgroup_lock);
1804
1805 quota_root = fs_info->quota_root;
1806 if (!quota_root)
1807 goto out;
1808
1809 qgroup = find_qgroup_rb(fs_info, ref_root);
1810 if (!qgroup)
1811 goto out;
1812
1e8f9158
WS
1813 ulist_reinit(fs_info->qgroup_ulist);
1814 ret = ulist_add(fs_info->qgroup_ulist, qgroup->qgroupid,
3c97185c
WS
1815 (uintptr_t)qgroup, GFP_ATOMIC);
1816 if (ret < 0)
1817 goto out;
bed92eae 1818 ULIST_ITER_INIT(&uiter);
1e8f9158 1819 while ((unode = ulist_next(fs_info->qgroup_ulist, &uiter))) {
bed92eae
AJ
1820 struct btrfs_qgroup *qg;
1821 struct btrfs_qgroup_list *glist;
1822
995e01b7 1823 qg = (struct btrfs_qgroup *)(uintptr_t)unode->aux;
bed92eae
AJ
1824
1825 qg->reserved -= num_bytes;
1826
1827 list_for_each_entry(glist, &qg->groups, next_group) {
1e8f9158
WS
1828 ret = ulist_add(fs_info->qgroup_ulist,
1829 glist->group->qgroupid,
3c97185c
WS
1830 (uintptr_t)glist->group, GFP_ATOMIC);
1831 if (ret < 0)
1832 goto out;
bed92eae
AJ
1833 }
1834 }
1835
1836out:
1837 spin_unlock(&fs_info->qgroup_lock);
bed92eae
AJ
1838}
1839
1840void assert_qgroups_uptodate(struct btrfs_trans_handle *trans)
1841{
1842 if (list_empty(&trans->qgroup_ref_list) && !trans->delayed_ref_elem.seq)
1843 return;
fc36ed7e 1844 pr_err("btrfs: qgroups not uptodate in trans handle %p: list is%s empty, seq is %#x.%x\n",
bed92eae 1845 trans, list_empty(&trans->qgroup_ref_list) ? "" : " not",
fc36ed7e
JS
1846 (u32)(trans->delayed_ref_elem.seq >> 32),
1847 (u32)trans->delayed_ref_elem.seq);
bed92eae
AJ
1848 BUG();
1849}
2f232036
JS
1850
1851/*
1852 * returns < 0 on error, 0 when more leafs are to be scanned.
1853 * returns 1 when done, 2 when done and FLAG_INCONSISTENT was cleared.
1854 */
1855static int
b382a324 1856qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
2f232036
JS
1857 struct btrfs_trans_handle *trans, struct ulist *tmp,
1858 struct extent_buffer *scratch_leaf)
1859{
1860 struct btrfs_key found;
2f232036
JS
1861 struct ulist *roots = NULL;
1862 struct ulist_node *unode;
1863 struct ulist_iterator uiter;
1864 struct seq_list tree_mod_seq_elem = {};
1865 u64 seq;
1866 int slot;
1867 int ret;
1868
1869 path->leave_spinning = 1;
1870 mutex_lock(&fs_info->qgroup_rescan_lock);
1871 ret = btrfs_search_slot_for_read(fs_info->extent_root,
1872 &fs_info->qgroup_rescan_progress,
1873 path, 1, 0);
1874
1875 pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
1876 (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
1877 fs_info->qgroup_rescan_progress.type,
1878 (unsigned long long)fs_info->qgroup_rescan_progress.offset,
1879 ret);
1880
1881 if (ret) {
1882 /*
1883 * The rescan is about to end, we will not be scanning any
1884 * further blocks. We cannot unset the RESCAN flag here, because
1885 * we want to commit the transaction if everything went well.
1886 * To make the live accounting work in this phase, we set our
1887 * scan progress pointer such that every real extent objectid
1888 * will be smaller.
1889 */
1890 fs_info->qgroup_rescan_progress.objectid = (u64)-1;
1891 btrfs_release_path(path);
1892 mutex_unlock(&fs_info->qgroup_rescan_lock);
1893 return ret;
1894 }
1895
1896 btrfs_item_key_to_cpu(path->nodes[0], &found,
1897 btrfs_header_nritems(path->nodes[0]) - 1);
1898 fs_info->qgroup_rescan_progress.objectid = found.objectid + 1;
1899
1900 btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1901 memcpy(scratch_leaf, path->nodes[0], sizeof(*scratch_leaf));
1902 slot = path->slots[0];
1903 btrfs_release_path(path);
1904 mutex_unlock(&fs_info->qgroup_rescan_lock);
1905
1906 for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) {
1907 btrfs_item_key_to_cpu(scratch_leaf, &found, slot);
1908 if (found.type != BTRFS_EXTENT_ITEM_KEY)
1909 continue;
1910 ret = btrfs_find_all_roots(trans, fs_info, found.objectid,
1911 tree_mod_seq_elem.seq, &roots);
1912 if (ret < 0)
1913 goto out;
1914 spin_lock(&fs_info->qgroup_lock);
1915 seq = fs_info->qgroup_seq;
1916 fs_info->qgroup_seq += roots->nnodes + 1; /* max refcnt */
1917
1918 ret = qgroup_account_ref_step1(fs_info, roots, tmp, seq);
1919 if (ret) {
1920 spin_unlock(&fs_info->qgroup_lock);
1921 ulist_free(roots);
1922 goto out;
1923 }
1924
1925 /*
1926 * step2 of btrfs_qgroup_account_ref works from a single root,
1927 * we're doing all at once here.
1928 */
1929 ulist_reinit(tmp);
1930 ULIST_ITER_INIT(&uiter);
1931 while ((unode = ulist_next(roots, &uiter))) {
1932 struct btrfs_qgroup *qg;
1933
1934 qg = find_qgroup_rb(fs_info, unode->val);
1935 if (!qg)
1936 continue;
1937
1938 ret = ulist_add(tmp, qg->qgroupid, (uintptr_t)qg,
1939 GFP_ATOMIC);
1940 if (ret < 0) {
1941 spin_unlock(&fs_info->qgroup_lock);
1942 ulist_free(roots);
1943 goto out;
1944 }
1945 }
1946
1947 /* this loop is similar to step 2 of btrfs_qgroup_account_ref */
1948 ULIST_ITER_INIT(&uiter);
1949 while ((unode = ulist_next(tmp, &uiter))) {
1950 struct btrfs_qgroup *qg;
1951 struct btrfs_qgroup_list *glist;
1952
1953 qg = (struct btrfs_qgroup *)(uintptr_t) unode->aux;
1954 qg->rfer += found.offset;
1955 qg->rfer_cmpr += found.offset;
1956 WARN_ON(qg->tag >= seq);
1957 if (qg->refcnt - seq == roots->nnodes) {
1958 qg->excl += found.offset;
1959 qg->excl_cmpr += found.offset;
1960 }
1961 qgroup_dirty(fs_info, qg);
1962
1963 list_for_each_entry(glist, &qg->groups, next_group) {
1964 ret = ulist_add(tmp, glist->group->qgroupid,
1965 (uintptr_t)glist->group,
1966 GFP_ATOMIC);
1967 if (ret < 0) {
1968 spin_unlock(&fs_info->qgroup_lock);
1969 ulist_free(roots);
1970 goto out;
1971 }
1972 }
1973 }
1974
1975 spin_unlock(&fs_info->qgroup_lock);
1976 ulist_free(roots);
1977 ret = 0;
1978 }
1979
1980out:
1981 btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
1982
1983 return ret;
1984}
1985
1986static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
1987{
b382a324
JS
1988 struct btrfs_fs_info *fs_info = container_of(work, struct btrfs_fs_info,
1989 qgroup_rescan_work);
2f232036
JS
1990 struct btrfs_path *path;
1991 struct btrfs_trans_handle *trans = NULL;
2f232036
JS
1992 struct ulist *tmp = NULL;
1993 struct extent_buffer *scratch_leaf = NULL;
1994 int err = -ENOMEM;
1995
1996 path = btrfs_alloc_path();
1997 if (!path)
1998 goto out;
1999 tmp = ulist_alloc(GFP_NOFS);
2000 if (!tmp)
2001 goto out;
2002 scratch_leaf = kmalloc(sizeof(*scratch_leaf), GFP_NOFS);
2003 if (!scratch_leaf)
2004 goto out;
2005
2006 err = 0;
2007 while (!err) {
2008 trans = btrfs_start_transaction(fs_info->fs_root, 0);
2009 if (IS_ERR(trans)) {
2010 err = PTR_ERR(trans);
2011 break;
2012 }
2013 if (!fs_info->quota_enabled) {
2014 err = -EINTR;
2015 } else {
b382a324 2016 err = qgroup_rescan_leaf(fs_info, path, trans,
2f232036
JS
2017 tmp, scratch_leaf);
2018 }
2019 if (err > 0)
2020 btrfs_commit_transaction(trans, fs_info->fs_root);
2021 else
2022 btrfs_end_transaction(trans, fs_info->fs_root);
2023 }
2024
2025out:
2026 kfree(scratch_leaf);
2027 ulist_free(tmp);
2028 btrfs_free_path(path);
2f232036
JS
2029
2030 mutex_lock(&fs_info->qgroup_rescan_lock);
2031 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2032
2033 if (err == 2 &&
2034 fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
2035 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2036 } else if (err < 0) {
2037 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
2038 }
2039 mutex_unlock(&fs_info->qgroup_rescan_lock);
2040
2041 if (err >= 0) {
2042 pr_info("btrfs: qgroup scan completed%s\n",
2043 err == 2 ? " (inconsistency flag cleared)" : "");
2044 } else {
2045 pr_err("btrfs: qgroup scan failed with %d\n", err);
2046 }
57254b6e
JS
2047
2048 complete_all(&fs_info->qgroup_rescan_completion);
2f232036
JS
2049}
2050
b382a324
JS
2051/*
2052 * Checks that (a) no rescan is running and (b) quota is enabled. Allocates all
2053 * memory required for the rescan context.
2054 */
2055static int
2056qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
2057 int init_flags)
2f232036
JS
2058{
2059 int ret = 0;
2f232036 2060
b382a324
JS
2061 if (!init_flags &&
2062 (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) ||
2063 !(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))) {
2064 ret = -EINVAL;
2065 goto err;
2066 }
2f232036
JS
2067
2068 mutex_lock(&fs_info->qgroup_rescan_lock);
2069 spin_lock(&fs_info->qgroup_lock);
b382a324
JS
2070
2071 if (init_flags) {
2072 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2073 ret = -EINPROGRESS;
2074 else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
2075 ret = -EINVAL;
2076
2077 if (ret) {
2078 spin_unlock(&fs_info->qgroup_lock);
2079 mutex_unlock(&fs_info->qgroup_rescan_lock);
2080 goto err;
2081 }
2082
2083 fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2f232036
JS
2084 }
2085
2f232036
JS
2086 memset(&fs_info->qgroup_rescan_progress, 0,
2087 sizeof(fs_info->qgroup_rescan_progress));
b382a324
JS
2088 fs_info->qgroup_rescan_progress.objectid = progress_objectid;
2089
2090 spin_unlock(&fs_info->qgroup_lock);
2091 mutex_unlock(&fs_info->qgroup_rescan_lock);
2092
57254b6e 2093 init_completion(&fs_info->qgroup_rescan_completion);
2f232036 2094
b382a324
JS
2095 memset(&fs_info->qgroup_rescan_work, 0,
2096 sizeof(fs_info->qgroup_rescan_work));
2097 fs_info->qgroup_rescan_work.func = btrfs_qgroup_rescan_worker;
2098
2099 if (ret) {
2100err:
2101 pr_info("btrfs: qgroup_rescan_init failed with %d\n", ret);
2102 return ret;
2103 }
2104
2105 return 0;
2106}
2107
2108static void
2109qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
2110{
2111 struct rb_node *n;
2112 struct btrfs_qgroup *qgroup;
2113
2114 spin_lock(&fs_info->qgroup_lock);
2f232036
JS
2115 /* clear all current qgroup tracking information */
2116 for (n = rb_first(&fs_info->qgroup_tree); n; n = rb_next(n)) {
2117 qgroup = rb_entry(n, struct btrfs_qgroup, node);
2118 qgroup->rfer = 0;
2119 qgroup->rfer_cmpr = 0;
2120 qgroup->excl = 0;
2121 qgroup->excl_cmpr = 0;
2122 }
2123 spin_unlock(&fs_info->qgroup_lock);
b382a324 2124}
2f232036 2125
b382a324
JS
2126int
2127btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
2128{
2129 int ret = 0;
2130 struct btrfs_trans_handle *trans;
2131
2132 ret = qgroup_rescan_init(fs_info, 0, 1);
2133 if (ret)
2134 return ret;
2135
2136 /*
2137 * We have set the rescan_progress to 0, which means no more
2138 * delayed refs will be accounted by btrfs_qgroup_account_ref.
2139 * However, btrfs_qgroup_account_ref may be right after its call
2140 * to btrfs_find_all_roots, in which case it would still do the
2141 * accounting.
2142 * To solve this, we're committing the transaction, which will
2143 * ensure we run all delayed refs and only after that, we are
2144 * going to clear all tracking information for a clean start.
2145 */
2146
2147 trans = btrfs_join_transaction(fs_info->fs_root);
2148 if (IS_ERR(trans)) {
2149 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2150 return PTR_ERR(trans);
2151 }
2152 ret = btrfs_commit_transaction(trans, fs_info->fs_root);
2153 if (ret) {
2154 fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2155 return ret;
2156 }
2157
2158 qgroup_rescan_zero_tracking(fs_info);
2159
2160 btrfs_queue_worker(&fs_info->qgroup_rescan_workers,
2161 &fs_info->qgroup_rescan_work);
2f232036
JS
2162
2163 return 0;
2164}
57254b6e
JS
2165
2166int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
2167{
2168 int running;
2169 int ret = 0;
2170
2171 mutex_lock(&fs_info->qgroup_rescan_lock);
2172 spin_lock(&fs_info->qgroup_lock);
2173 running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
2174 spin_unlock(&fs_info->qgroup_lock);
2175 mutex_unlock(&fs_info->qgroup_rescan_lock);
2176
2177 if (running)
2178 ret = wait_for_completion_interruptible(
2179 &fs_info->qgroup_rescan_completion);
2180
2181 return ret;
2182}
b382a324
JS
2183
2184/*
2185 * this is only called from open_ctree where we're still single threaded, thus
2186 * locking is omitted here.
2187 */
2188void
2189btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
2190{
2191 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
2192 btrfs_queue_worker(&fs_info->qgroup_rescan_workers,
2193 &fs_info->qgroup_rescan_work);
2194}