]>
Commit | Line | Data |
---|---|---|
280c2908 JB |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | #include "ctree.h" | |
4 | #include "space-info.h" | |
5 | #include "sysfs.h" | |
6 | #include "volumes.h" | |
5da6afeb | 7 | #include "free-space-cache.h" |
280c2908 JB |
8 | |
9 | u64 btrfs_space_info_used(struct btrfs_space_info *s_info, | |
10 | bool may_use_included) | |
11 | { | |
12 | ASSERT(s_info); | |
13 | return s_info->bytes_used + s_info->bytes_reserved + | |
14 | s_info->bytes_pinned + s_info->bytes_readonly + | |
15 | (may_use_included ? s_info->bytes_may_use : 0); | |
16 | } | |
17 | ||
18 | /* | |
19 | * after adding space to the filesystem, we need to clear the full flags | |
20 | * on all the space infos. | |
21 | */ | |
22 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info) | |
23 | { | |
24 | struct list_head *head = &info->space_info; | |
25 | struct btrfs_space_info *found; | |
26 | ||
27 | rcu_read_lock(); | |
28 | list_for_each_entry_rcu(found, head, list) | |
29 | found->full = 0; | |
30 | rcu_read_unlock(); | |
31 | } | |
32 | ||
33 | static const char *alloc_name(u64 flags) | |
34 | { | |
35 | switch (flags) { | |
36 | case BTRFS_BLOCK_GROUP_METADATA|BTRFS_BLOCK_GROUP_DATA: | |
37 | return "mixed"; | |
38 | case BTRFS_BLOCK_GROUP_METADATA: | |
39 | return "metadata"; | |
40 | case BTRFS_BLOCK_GROUP_DATA: | |
41 | return "data"; | |
42 | case BTRFS_BLOCK_GROUP_SYSTEM: | |
43 | return "system"; | |
44 | default: | |
45 | WARN_ON(1); | |
46 | return "invalid-combination"; | |
47 | }; | |
48 | } | |
49 | ||
50 | static int create_space_info(struct btrfs_fs_info *info, u64 flags) | |
51 | { | |
52 | ||
53 | struct btrfs_space_info *space_info; | |
54 | int i; | |
55 | int ret; | |
56 | ||
57 | space_info = kzalloc(sizeof(*space_info), GFP_NOFS); | |
58 | if (!space_info) | |
59 | return -ENOMEM; | |
60 | ||
61 | ret = percpu_counter_init(&space_info->total_bytes_pinned, 0, | |
62 | GFP_KERNEL); | |
63 | if (ret) { | |
64 | kfree(space_info); | |
65 | return ret; | |
66 | } | |
67 | ||
68 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) | |
69 | INIT_LIST_HEAD(&space_info->block_groups[i]); | |
70 | init_rwsem(&space_info->groups_sem); | |
71 | spin_lock_init(&space_info->lock); | |
72 | space_info->flags = flags & BTRFS_BLOCK_GROUP_TYPE_MASK; | |
73 | space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; | |
74 | init_waitqueue_head(&space_info->wait); | |
75 | INIT_LIST_HEAD(&space_info->ro_bgs); | |
76 | INIT_LIST_HEAD(&space_info->tickets); | |
77 | INIT_LIST_HEAD(&space_info->priority_tickets); | |
78 | ||
79 | ret = kobject_init_and_add(&space_info->kobj, &space_info_ktype, | |
80 | info->space_info_kobj, "%s", | |
81 | alloc_name(space_info->flags)); | |
82 | if (ret) { | |
83 | kobject_put(&space_info->kobj); | |
84 | return ret; | |
85 | } | |
86 | ||
87 | list_add_rcu(&space_info->list, &info->space_info); | |
88 | if (flags & BTRFS_BLOCK_GROUP_DATA) | |
89 | info->data_sinfo = space_info; | |
90 | ||
91 | return ret; | |
92 | } | |
93 | ||
94 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info) | |
95 | { | |
96 | struct btrfs_super_block *disk_super; | |
97 | u64 features; | |
98 | u64 flags; | |
99 | int mixed = 0; | |
100 | int ret; | |
101 | ||
102 | disk_super = fs_info->super_copy; | |
103 | if (!btrfs_super_root(disk_super)) | |
104 | return -EINVAL; | |
105 | ||
106 | features = btrfs_super_incompat_flags(disk_super); | |
107 | if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) | |
108 | mixed = 1; | |
109 | ||
110 | flags = BTRFS_BLOCK_GROUP_SYSTEM; | |
111 | ret = create_space_info(fs_info, flags); | |
112 | if (ret) | |
113 | goto out; | |
114 | ||
115 | if (mixed) { | |
116 | flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA; | |
117 | ret = create_space_info(fs_info, flags); | |
118 | } else { | |
119 | flags = BTRFS_BLOCK_GROUP_METADATA; | |
120 | ret = create_space_info(fs_info, flags); | |
121 | if (ret) | |
122 | goto out; | |
123 | ||
124 | flags = BTRFS_BLOCK_GROUP_DATA; | |
125 | ret = create_space_info(fs_info, flags); | |
126 | } | |
127 | out: | |
128 | return ret; | |
129 | } | |
130 | ||
131 | void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags, | |
132 | u64 total_bytes, u64 bytes_used, | |
133 | u64 bytes_readonly, | |
134 | struct btrfs_space_info **space_info) | |
135 | { | |
136 | struct btrfs_space_info *found; | |
137 | int factor; | |
138 | ||
139 | factor = btrfs_bg_type_to_factor(flags); | |
140 | ||
141 | found = btrfs_find_space_info(info, flags); | |
142 | ASSERT(found); | |
143 | spin_lock(&found->lock); | |
144 | found->total_bytes += total_bytes; | |
145 | found->disk_total += total_bytes * factor; | |
146 | found->bytes_used += bytes_used; | |
147 | found->disk_used += bytes_used * factor; | |
148 | found->bytes_readonly += bytes_readonly; | |
149 | if (total_bytes > 0) | |
150 | found->full = 0; | |
151 | btrfs_space_info_add_new_bytes(info, found, | |
152 | total_bytes - bytes_used - | |
153 | bytes_readonly); | |
154 | spin_unlock(&found->lock); | |
155 | *space_info = found; | |
156 | } | |
157 | ||
158 | struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info, | |
159 | u64 flags) | |
160 | { | |
161 | struct list_head *head = &info->space_info; | |
162 | struct btrfs_space_info *found; | |
163 | ||
164 | flags &= BTRFS_BLOCK_GROUP_TYPE_MASK; | |
165 | ||
166 | rcu_read_lock(); | |
167 | list_for_each_entry_rcu(found, head, list) { | |
168 | if (found->flags & flags) { | |
169 | rcu_read_unlock(); | |
170 | return found; | |
171 | } | |
172 | } | |
173 | rcu_read_unlock(); | |
174 | return NULL; | |
175 | } | |
41783ef2 JB |
176 | |
177 | static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global) | |
178 | { | |
179 | return (global->size << 1); | |
180 | } | |
181 | ||
182 | int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, | |
183 | struct btrfs_space_info *space_info, u64 bytes, | |
184 | enum btrfs_reserve_flush_enum flush, | |
185 | bool system_chunk) | |
186 | { | |
187 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; | |
188 | u64 profile; | |
189 | u64 space_size; | |
190 | u64 avail; | |
191 | u64 used; | |
192 | int factor; | |
193 | ||
194 | /* Don't overcommit when in mixed mode. */ | |
195 | if (space_info->flags & BTRFS_BLOCK_GROUP_DATA) | |
196 | return 0; | |
197 | ||
198 | if (system_chunk) | |
199 | profile = btrfs_system_alloc_profile(fs_info); | |
200 | else | |
201 | profile = btrfs_metadata_alloc_profile(fs_info); | |
202 | ||
203 | used = btrfs_space_info_used(space_info, false); | |
204 | ||
205 | /* | |
206 | * We only want to allow over committing if we have lots of actual space | |
207 | * free, but if we don't have enough space to handle the global reserve | |
208 | * space then we could end up having a real enospc problem when trying | |
209 | * to allocate a chunk or some other such important allocation. | |
210 | */ | |
211 | spin_lock(&global_rsv->lock); | |
212 | space_size = calc_global_rsv_need_space(global_rsv); | |
213 | spin_unlock(&global_rsv->lock); | |
214 | if (used + space_size >= space_info->total_bytes) | |
215 | return 0; | |
216 | ||
217 | used += space_info->bytes_may_use; | |
218 | ||
219 | avail = atomic64_read(&fs_info->free_chunk_space); | |
220 | ||
221 | /* | |
222 | * If we have dup, raid1 or raid10 then only half of the free | |
223 | * space is actually usable. For raid56, the space info used | |
224 | * doesn't include the parity drive, so we don't have to | |
225 | * change the math | |
226 | */ | |
227 | factor = btrfs_bg_type_to_factor(profile); | |
228 | avail = div_u64(avail, factor); | |
229 | ||
230 | /* | |
231 | * If we aren't flushing all things, let us overcommit up to | |
232 | * 1/2th of the space. If we can flush, don't let us overcommit | |
233 | * too much, let it overcommit up to 1/8 of the space. | |
234 | */ | |
235 | if (flush == BTRFS_RESERVE_FLUSH_ALL) | |
236 | avail >>= 3; | |
237 | else | |
238 | avail >>= 1; | |
239 | ||
240 | if (used + bytes < space_info->total_bytes + avail) | |
241 | return 1; | |
242 | return 0; | |
243 | } | |
b338b013 JB |
244 | |
245 | /* | |
246 | * This is for space we already have accounted in space_info->bytes_may_use, so | |
247 | * basically when we're returning space from block_rsv's. | |
248 | */ | |
249 | void btrfs_space_info_add_old_bytes(struct btrfs_fs_info *fs_info, | |
250 | struct btrfs_space_info *space_info, | |
251 | u64 num_bytes) | |
252 | { | |
253 | struct reserve_ticket *ticket; | |
254 | struct list_head *head; | |
255 | u64 used; | |
256 | enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_NO_FLUSH; | |
257 | bool check_overcommit = false; | |
258 | ||
259 | spin_lock(&space_info->lock); | |
260 | head = &space_info->priority_tickets; | |
261 | ||
262 | /* | |
263 | * If we are over our limit then we need to check and see if we can | |
264 | * overcommit, and if we can't then we just need to free up our space | |
265 | * and not satisfy any requests. | |
266 | */ | |
267 | used = btrfs_space_info_used(space_info, true); | |
268 | if (used - num_bytes >= space_info->total_bytes) | |
269 | check_overcommit = true; | |
270 | again: | |
271 | while (!list_empty(head) && num_bytes) { | |
272 | ticket = list_first_entry(head, struct reserve_ticket, | |
273 | list); | |
274 | /* | |
275 | * We use 0 bytes because this space is already reserved, so | |
276 | * adding the ticket space would be a double count. | |
277 | */ | |
278 | if (check_overcommit && | |
279 | !btrfs_can_overcommit(fs_info, space_info, 0, flush, | |
280 | false)) | |
281 | break; | |
282 | if (num_bytes >= ticket->bytes) { | |
283 | list_del_init(&ticket->list); | |
284 | num_bytes -= ticket->bytes; | |
285 | ticket->bytes = 0; | |
286 | space_info->tickets_id++; | |
287 | wake_up(&ticket->wait); | |
288 | } else { | |
289 | ticket->bytes -= num_bytes; | |
290 | num_bytes = 0; | |
291 | } | |
292 | } | |
293 | ||
294 | if (num_bytes && head == &space_info->priority_tickets) { | |
295 | head = &space_info->tickets; | |
296 | flush = BTRFS_RESERVE_FLUSH_ALL; | |
297 | goto again; | |
298 | } | |
299 | btrfs_space_info_update_bytes_may_use(fs_info, space_info, -num_bytes); | |
300 | trace_btrfs_space_reservation(fs_info, "space_info", | |
301 | space_info->flags, num_bytes, 0); | |
302 | spin_unlock(&space_info->lock); | |
303 | } | |
304 | ||
305 | /* | |
306 | * This is for newly allocated space that isn't accounted in | |
307 | * space_info->bytes_may_use yet. So if we allocate a chunk or unpin an extent | |
308 | * we use this helper. | |
309 | */ | |
310 | void btrfs_space_info_add_new_bytes(struct btrfs_fs_info *fs_info, | |
311 | struct btrfs_space_info *space_info, | |
312 | u64 num_bytes) | |
313 | { | |
314 | struct reserve_ticket *ticket; | |
315 | struct list_head *head = &space_info->priority_tickets; | |
316 | ||
317 | again: | |
318 | while (!list_empty(head) && num_bytes) { | |
319 | ticket = list_first_entry(head, struct reserve_ticket, | |
320 | list); | |
321 | if (num_bytes >= ticket->bytes) { | |
322 | trace_btrfs_space_reservation(fs_info, "space_info", | |
323 | space_info->flags, | |
324 | ticket->bytes, 1); | |
325 | list_del_init(&ticket->list); | |
326 | num_bytes -= ticket->bytes; | |
327 | btrfs_space_info_update_bytes_may_use(fs_info, | |
328 | space_info, | |
329 | ticket->bytes); | |
330 | ticket->bytes = 0; | |
331 | space_info->tickets_id++; | |
332 | wake_up(&ticket->wait); | |
333 | } else { | |
334 | trace_btrfs_space_reservation(fs_info, "space_info", | |
335 | space_info->flags, | |
336 | num_bytes, 1); | |
337 | btrfs_space_info_update_bytes_may_use(fs_info, | |
338 | space_info, | |
339 | num_bytes); | |
340 | ticket->bytes -= num_bytes; | |
341 | num_bytes = 0; | |
342 | } | |
343 | } | |
344 | ||
345 | if (num_bytes && head == &space_info->priority_tickets) { | |
346 | head = &space_info->tickets; | |
347 | goto again; | |
348 | } | |
349 | } | |
5da6afeb JB |
350 | |
351 | #define DUMP_BLOCK_RSV(fs_info, rsv_name) \ | |
352 | do { \ | |
353 | struct btrfs_block_rsv *__rsv = &(fs_info)->rsv_name; \ | |
354 | spin_lock(&__rsv->lock); \ | |
355 | btrfs_info(fs_info, #rsv_name ": size %llu reserved %llu", \ | |
356 | __rsv->size, __rsv->reserved); \ | |
357 | spin_unlock(&__rsv->lock); \ | |
358 | } while (0) | |
359 | ||
360 | void btrfs_dump_space_info(struct btrfs_fs_info *fs_info, | |
361 | struct btrfs_space_info *info, u64 bytes, | |
362 | int dump_block_groups) | |
363 | { | |
364 | struct btrfs_block_group_cache *cache; | |
365 | int index = 0; | |
366 | ||
367 | spin_lock(&info->lock); | |
368 | btrfs_info(fs_info, "space_info %llu has %llu free, is %sfull", | |
369 | info->flags, | |
370 | info->total_bytes - btrfs_space_info_used(info, true), | |
371 | info->full ? "" : "not "); | |
372 | btrfs_info(fs_info, | |
373 | "space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu", | |
374 | info->total_bytes, info->bytes_used, info->bytes_pinned, | |
375 | info->bytes_reserved, info->bytes_may_use, | |
376 | info->bytes_readonly); | |
377 | spin_unlock(&info->lock); | |
378 | ||
379 | DUMP_BLOCK_RSV(fs_info, global_block_rsv); | |
380 | DUMP_BLOCK_RSV(fs_info, trans_block_rsv); | |
381 | DUMP_BLOCK_RSV(fs_info, chunk_block_rsv); | |
382 | DUMP_BLOCK_RSV(fs_info, delayed_block_rsv); | |
383 | DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv); | |
384 | ||
385 | if (!dump_block_groups) | |
386 | return; | |
387 | ||
388 | down_read(&info->groups_sem); | |
389 | again: | |
390 | list_for_each_entry(cache, &info->block_groups[index], list) { | |
391 | spin_lock(&cache->lock); | |
392 | btrfs_info(fs_info, | |
393 | "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s", | |
394 | cache->key.objectid, cache->key.offset, | |
395 | btrfs_block_group_used(&cache->item), cache->pinned, | |
396 | cache->reserved, cache->ro ? "[readonly]" : ""); | |
397 | btrfs_dump_free_space(cache, bytes); | |
398 | spin_unlock(&cache->lock); | |
399 | } | |
400 | if (++index < BTRFS_NR_RAID_TYPES) | |
401 | goto again; | |
402 | up_read(&info->groups_sem); | |
403 | } |