]>
Commit | Line | Data |
---|---|---|
ccd979bd MF |
1 | /* -*- mode: c; c-basic-offset: 8; -*- |
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | |
3 | * | |
4 | * suballoc.c | |
5 | * | |
6 | * metadata alloc and free | |
7 | * Inspired by ext3 block groups. | |
8 | * | |
9 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. | |
10 | * | |
11 | * This program is free software; you can redistribute it and/or | |
12 | * modify it under the terms of the GNU General Public | |
13 | * License as published by the Free Software Foundation; either | |
14 | * version 2 of the License, or (at your option) any later version. | |
15 | * | |
16 | * This program is distributed in the hope that it will be useful, | |
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
19 | * General Public License for more details. | |
20 | * | |
21 | * You should have received a copy of the GNU General Public | |
22 | * License along with this program; if not, write to the | |
23 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
24 | * Boston, MA 021110-1307, USA. | |
25 | */ | |
26 | ||
27 | #include <linux/fs.h> | |
28 | #include <linux/types.h> | |
29 | #include <linux/slab.h> | |
30 | #include <linux/highmem.h> | |
31 | ||
32 | #define MLOG_MASK_PREFIX ML_DISK_ALLOC | |
33 | #include <cluster/masklog.h> | |
34 | ||
35 | #include "ocfs2.h" | |
36 | ||
37 | #include "alloc.h" | |
38 | #include "dlmglue.h" | |
39 | #include "inode.h" | |
40 | #include "journal.h" | |
41 | #include "localalloc.h" | |
42 | #include "suballoc.h" | |
43 | #include "super.h" | |
44 | #include "sysfile.h" | |
45 | #include "uptodate.h" | |
46 | ||
47 | #include "buffer_head_io.h" | |
48 | ||
49 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); | |
50 | static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); | |
51 | static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); | |
52 | static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle, | |
53 | struct inode *alloc_inode, | |
54 | struct buffer_head *bg_bh, | |
55 | u64 group_blkno, | |
56 | u16 my_chain, | |
57 | struct ocfs2_chain_list *cl); | |
58 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | |
59 | struct inode *alloc_inode, | |
60 | struct buffer_head *bh); | |
61 | ||
62 | static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |
63 | struct ocfs2_alloc_context *ac); | |
64 | ||
65 | static int ocfs2_cluster_group_search(struct inode *inode, | |
66 | struct buffer_head *group_bh, | |
67 | u32 bits_wanted, u32 min_bits, | |
68 | u16 *bit_off, u16 *bits_found); | |
69 | static int ocfs2_block_group_search(struct inode *inode, | |
70 | struct buffer_head *group_bh, | |
71 | u32 bits_wanted, u32 min_bits, | |
72 | u16 *bit_off, u16 *bits_found); | |
ccd979bd MF |
73 | static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, |
74 | struct ocfs2_alloc_context *ac, | |
75 | u32 bits_wanted, | |
76 | u32 min_bits, | |
77 | u16 *bit_off, | |
78 | unsigned int *num_bits, | |
79 | u64 *bg_blkno); | |
80 | static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, | |
81 | int nr); | |
ccd979bd MF |
82 | static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle, |
83 | struct inode *alloc_inode, | |
84 | struct ocfs2_group_desc *bg, | |
85 | struct buffer_head *group_bh, | |
86 | unsigned int bit_off, | |
87 | unsigned int num_bits); | |
88 | static inline int ocfs2_block_group_clear_bits(struct ocfs2_journal_handle *handle, | |
89 | struct inode *alloc_inode, | |
90 | struct ocfs2_group_desc *bg, | |
91 | struct buffer_head *group_bh, | |
92 | unsigned int bit_off, | |
93 | unsigned int num_bits); | |
94 | ||
95 | static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle, | |
96 | struct inode *alloc_inode, | |
97 | struct buffer_head *fe_bh, | |
98 | struct buffer_head *bg_bh, | |
99 | struct buffer_head *prev_bg_bh, | |
100 | u16 chain); | |
101 | static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, | |
102 | u32 wanted); | |
103 | static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle, | |
104 | struct inode *alloc_inode, | |
105 | struct buffer_head *alloc_bh, | |
106 | unsigned int start_bit, | |
107 | u64 bg_blkno, | |
108 | unsigned int count); | |
109 | static inline u64 ocfs2_which_suballoc_group(u64 block, | |
110 | unsigned int bit); | |
111 | static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, | |
112 | u64 bg_blkno, | |
113 | u16 bg_bit_off); | |
114 | static inline u64 ocfs2_which_cluster_group(struct inode *inode, | |
115 | u32 cluster); | |
116 | static inline void ocfs2_block_to_cluster_group(struct inode *inode, | |
117 | u64 data_blkno, | |
118 | u64 *bg_blkno, | |
119 | u16 *bg_bit_off); | |
120 | ||
121 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | |
122 | { | |
123 | if (ac->ac_inode) | |
124 | iput(ac->ac_inode); | |
125 | if (ac->ac_bh) | |
126 | brelse(ac->ac_bh); | |
127 | kfree(ac); | |
128 | } | |
129 | ||
130 | static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl) | |
131 | { | |
132 | return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc); | |
133 | } | |
134 | ||
7bf72ede MF |
135 | /* somewhat more expensive than our other checks, so use sparingly. */ |
136 | static int ocfs2_check_group_descriptor(struct super_block *sb, | |
137 | struct ocfs2_dinode *di, | |
138 | struct ocfs2_group_desc *gd) | |
139 | { | |
140 | unsigned int max_bits; | |
141 | ||
142 | if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { | |
143 | OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd); | |
144 | return -EIO; | |
145 | } | |
146 | ||
147 | if (di->i_blkno != gd->bg_parent_dinode) { | |
148 | ocfs2_error(sb, "Group descriptor # %llu has bad parent " | |
149 | "pointer (%llu, expected %llu)", | |
150 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | |
151 | (unsigned long long)le64_to_cpu(gd->bg_parent_dinode), | |
152 | (unsigned long long)le64_to_cpu(di->i_blkno)); | |
153 | return -EIO; | |
154 | } | |
155 | ||
156 | max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc); | |
157 | if (le16_to_cpu(gd->bg_bits) > max_bits) { | |
158 | ocfs2_error(sb, "Group descriptor # %llu has bit count of %u", | |
159 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | |
160 | le16_to_cpu(gd->bg_bits)); | |
161 | return -EIO; | |
162 | } | |
163 | ||
164 | if (le16_to_cpu(gd->bg_chain) >= | |
165 | le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) { | |
166 | ocfs2_error(sb, "Group descriptor # %llu has bad chain %u", | |
167 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | |
168 | le16_to_cpu(gd->bg_chain)); | |
169 | return -EIO; | |
170 | } | |
171 | ||
172 | if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) { | |
173 | ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " | |
174 | "claims that %u are free", | |
175 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | |
176 | le16_to_cpu(gd->bg_bits), | |
177 | le16_to_cpu(gd->bg_free_bits_count)); | |
178 | return -EIO; | |
179 | } | |
180 | ||
181 | if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) { | |
182 | ocfs2_error(sb, "Group descriptor # %llu has bit count %u but " | |
183 | "max bitmap bits of %u", | |
184 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | |
185 | le16_to_cpu(gd->bg_bits), | |
186 | 8 * le16_to_cpu(gd->bg_size)); | |
187 | return -EIO; | |
188 | } | |
189 | ||
190 | return 0; | |
191 | } | |
192 | ||
ccd979bd MF |
193 | static int ocfs2_block_group_fill(struct ocfs2_journal_handle *handle, |
194 | struct inode *alloc_inode, | |
195 | struct buffer_head *bg_bh, | |
196 | u64 group_blkno, | |
197 | u16 my_chain, | |
198 | struct ocfs2_chain_list *cl) | |
199 | { | |
200 | int status = 0; | |
201 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; | |
202 | struct super_block * sb = alloc_inode->i_sb; | |
203 | ||
204 | mlog_entry_void(); | |
205 | ||
206 | if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) { | |
b0697053 MF |
207 | ocfs2_error(alloc_inode->i_sb, "group block (%llu) != " |
208 | "b_blocknr (%llu)", | |
209 | (unsigned long long)group_blkno, | |
ccd979bd MF |
210 | (unsigned long long) bg_bh->b_blocknr); |
211 | status = -EIO; | |
212 | goto bail; | |
213 | } | |
214 | ||
215 | status = ocfs2_journal_access(handle, | |
216 | alloc_inode, | |
217 | bg_bh, | |
218 | OCFS2_JOURNAL_ACCESS_CREATE); | |
219 | if (status < 0) { | |
220 | mlog_errno(status); | |
221 | goto bail; | |
222 | } | |
223 | ||
224 | memset(bg, 0, sb->s_blocksize); | |
225 | strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE); | |
226 | bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); | |
227 | bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb)); | |
228 | bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl)); | |
229 | bg->bg_chain = cpu_to_le16(my_chain); | |
230 | bg->bg_next_group = cl->cl_recs[my_chain].c_blkno; | |
231 | bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno); | |
232 | bg->bg_blkno = cpu_to_le64(group_blkno); | |
233 | /* set the 1st bit in the bitmap to account for the descriptor block */ | |
234 | ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap); | |
235 | bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1); | |
236 | ||
237 | status = ocfs2_journal_dirty(handle, bg_bh); | |
238 | if (status < 0) | |
239 | mlog_errno(status); | |
240 | ||
241 | /* There is no need to zero out or otherwise initialize the | |
242 | * other blocks in a group - All valid FS metadata in a block | |
243 | * group stores the superblock fs_generation value at | |
244 | * allocation time. */ | |
245 | ||
246 | bail: | |
247 | mlog_exit(status); | |
248 | return status; | |
249 | } | |
250 | ||
251 | static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl) | |
252 | { | |
253 | u16 curr, best; | |
254 | ||
255 | best = curr = 0; | |
256 | while (curr < le16_to_cpu(cl->cl_count)) { | |
257 | if (le32_to_cpu(cl->cl_recs[best].c_total) > | |
258 | le32_to_cpu(cl->cl_recs[curr].c_total)) | |
259 | best = curr; | |
260 | curr++; | |
261 | } | |
262 | return best; | |
263 | } | |
264 | ||
265 | /* | |
266 | * We expect the block group allocator to already be locked. | |
267 | */ | |
268 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | |
269 | struct inode *alloc_inode, | |
270 | struct buffer_head *bh) | |
271 | { | |
272 | int status, credits; | |
273 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; | |
274 | struct ocfs2_chain_list *cl; | |
275 | struct ocfs2_alloc_context *ac = NULL; | |
276 | struct ocfs2_journal_handle *handle = NULL; | |
277 | u32 bit_off, num_bits; | |
278 | u16 alloc_rec; | |
279 | u64 bg_blkno; | |
280 | struct buffer_head *bg_bh = NULL; | |
281 | struct ocfs2_group_desc *bg; | |
282 | ||
283 | BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode)); | |
284 | ||
285 | mlog_entry_void(); | |
286 | ||
287 | handle = ocfs2_alloc_handle(osb); | |
288 | if (!handle) { | |
289 | status = -ENOMEM; | |
290 | mlog_errno(status); | |
291 | goto bail; | |
292 | } | |
293 | ||
294 | cl = &fe->id2.i_chain; | |
295 | status = ocfs2_reserve_clusters(osb, | |
296 | handle, | |
297 | le16_to_cpu(cl->cl_cpg), | |
298 | &ac); | |
299 | if (status < 0) { | |
300 | if (status != -ENOSPC) | |
301 | mlog_errno(status); | |
302 | goto bail; | |
303 | } | |
304 | ||
305 | credits = ocfs2_calc_group_alloc_credits(osb->sb, | |
306 | le16_to_cpu(cl->cl_cpg)); | |
307 | handle = ocfs2_start_trans(osb, handle, credits); | |
308 | if (IS_ERR(handle)) { | |
309 | status = PTR_ERR(handle); | |
310 | handle = NULL; | |
311 | mlog_errno(status); | |
312 | goto bail; | |
313 | } | |
314 | ||
315 | status = ocfs2_claim_clusters(osb, | |
316 | handle, | |
317 | ac, | |
318 | le16_to_cpu(cl->cl_cpg), | |
319 | &bit_off, | |
320 | &num_bits); | |
321 | if (status < 0) { | |
322 | if (status != -ENOSPC) | |
323 | mlog_errno(status); | |
324 | goto bail; | |
325 | } | |
326 | ||
327 | alloc_rec = ocfs2_find_smallest_chain(cl); | |
328 | ||
329 | /* setup the group */ | |
330 | bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off); | |
b0697053 MF |
331 | mlog(0, "new descriptor, record %u, at block %llu\n", |
332 | alloc_rec, (unsigned long long)bg_blkno); | |
ccd979bd MF |
333 | |
334 | bg_bh = sb_getblk(osb->sb, bg_blkno); | |
335 | if (!bg_bh) { | |
336 | status = -EIO; | |
337 | mlog_errno(status); | |
338 | goto bail; | |
339 | } | |
340 | ocfs2_set_new_buffer_uptodate(alloc_inode, bg_bh); | |
341 | ||
342 | status = ocfs2_block_group_fill(handle, | |
343 | alloc_inode, | |
344 | bg_bh, | |
345 | bg_blkno, | |
346 | alloc_rec, | |
347 | cl); | |
348 | if (status < 0) { | |
349 | mlog_errno(status); | |
350 | goto bail; | |
351 | } | |
352 | ||
353 | bg = (struct ocfs2_group_desc *) bg_bh->b_data; | |
354 | ||
355 | status = ocfs2_journal_access(handle, alloc_inode, | |
356 | bh, OCFS2_JOURNAL_ACCESS_WRITE); | |
357 | if (status < 0) { | |
358 | mlog_errno(status); | |
359 | goto bail; | |
360 | } | |
361 | ||
362 | le32_add_cpu(&cl->cl_recs[alloc_rec].c_free, | |
363 | le16_to_cpu(bg->bg_free_bits_count)); | |
364 | le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, le16_to_cpu(bg->bg_bits)); | |
365 | cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg_blkno); | |
366 | if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count)) | |
367 | le16_add_cpu(&cl->cl_next_free_rec, 1); | |
368 | ||
369 | le32_add_cpu(&fe->id1.bitmap1.i_used, le16_to_cpu(bg->bg_bits) - | |
370 | le16_to_cpu(bg->bg_free_bits_count)); | |
371 | le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits)); | |
372 | le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg)); | |
373 | ||
374 | status = ocfs2_journal_dirty(handle, bh); | |
375 | if (status < 0) { | |
376 | mlog_errno(status); | |
377 | goto bail; | |
378 | } | |
379 | ||
380 | spin_lock(&OCFS2_I(alloc_inode)->ip_lock); | |
381 | OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); | |
382 | fe->i_size = cpu_to_le64(ocfs2_clusters_to_bytes(alloc_inode->i_sb, | |
383 | le32_to_cpu(fe->i_clusters))); | |
384 | spin_unlock(&OCFS2_I(alloc_inode)->ip_lock); | |
385 | i_size_write(alloc_inode, le64_to_cpu(fe->i_size)); | |
386 | alloc_inode->i_blocks = | |
387 | ocfs2_align_bytes_to_sectors(i_size_read(alloc_inode)); | |
388 | ||
389 | status = 0; | |
390 | bail: | |
391 | if (handle) | |
392 | ocfs2_commit_trans(handle); | |
393 | ||
394 | if (ac) | |
395 | ocfs2_free_alloc_context(ac); | |
396 | ||
397 | if (bg_bh) | |
398 | brelse(bg_bh); | |
399 | ||
400 | mlog_exit(status); | |
401 | return status; | |
402 | } | |
403 | ||
404 | static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |
405 | struct ocfs2_alloc_context *ac) | |
406 | { | |
407 | int status; | |
408 | u32 bits_wanted = ac->ac_bits_wanted; | |
409 | struct inode *alloc_inode = ac->ac_inode; | |
410 | struct buffer_head *bh = NULL; | |
411 | struct ocfs2_journal_handle *handle = ac->ac_handle; | |
412 | struct ocfs2_dinode *fe; | |
413 | u32 free_bits; | |
414 | ||
415 | mlog_entry_void(); | |
416 | ||
417 | BUG_ON(handle->flags & OCFS2_HANDLE_STARTED); | |
418 | ||
419 | ocfs2_handle_add_inode(handle, alloc_inode); | |
420 | status = ocfs2_meta_lock(alloc_inode, handle, &bh, 1); | |
421 | if (status < 0) { | |
422 | mlog_errno(status); | |
423 | goto bail; | |
424 | } | |
425 | ||
426 | fe = (struct ocfs2_dinode *) bh->b_data; | |
427 | if (!OCFS2_IS_VALID_DINODE(fe)) { | |
428 | OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); | |
429 | status = -EIO; | |
430 | goto bail; | |
431 | } | |
432 | if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) { | |
b0697053 MF |
433 | ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu", |
434 | (unsigned long long)le64_to_cpu(fe->i_blkno)); | |
ccd979bd MF |
435 | status = -EIO; |
436 | goto bail; | |
437 | } | |
438 | ||
439 | free_bits = le32_to_cpu(fe->id1.bitmap1.i_total) - | |
440 | le32_to_cpu(fe->id1.bitmap1.i_used); | |
441 | ||
442 | if (bits_wanted > free_bits) { | |
443 | /* cluster bitmap never grows */ | |
444 | if (ocfs2_is_cluster_bitmap(alloc_inode)) { | |
445 | mlog(0, "Disk Full: wanted=%u, free_bits=%u\n", | |
446 | bits_wanted, free_bits); | |
447 | status = -ENOSPC; | |
448 | goto bail; | |
449 | } | |
450 | ||
451 | status = ocfs2_block_group_alloc(osb, alloc_inode, bh); | |
452 | if (status < 0) { | |
453 | if (status != -ENOSPC) | |
454 | mlog_errno(status); | |
455 | goto bail; | |
456 | } | |
457 | atomic_inc(&osb->alloc_stats.bg_extends); | |
458 | ||
459 | /* You should never ask for this much metadata */ | |
460 | BUG_ON(bits_wanted > | |
461 | (le32_to_cpu(fe->id1.bitmap1.i_total) | |
462 | - le32_to_cpu(fe->id1.bitmap1.i_used))); | |
463 | } | |
464 | ||
465 | get_bh(bh); | |
466 | ac->ac_bh = bh; | |
467 | bail: | |
468 | if (bh) | |
469 | brelse(bh); | |
470 | ||
471 | mlog_exit(status); | |
472 | return status; | |
473 | } | |
474 | ||
475 | int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, | |
476 | struct ocfs2_journal_handle *handle, | |
477 | struct ocfs2_dinode *fe, | |
478 | struct ocfs2_alloc_context **ac) | |
479 | { | |
480 | int status; | |
481 | struct inode *alloc_inode = NULL; | |
482 | ||
483 | *ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL); | |
484 | if (!(*ac)) { | |
485 | status = -ENOMEM; | |
486 | mlog_errno(status); | |
487 | goto bail; | |
488 | } | |
489 | ||
490 | (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe); | |
491 | (*ac)->ac_handle = handle; | |
492 | (*ac)->ac_which = OCFS2_AC_USE_META; | |
493 | ||
494 | #ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS | |
495 | alloc_inode = ocfs2_get_system_file_inode(osb, | |
496 | EXTENT_ALLOC_SYSTEM_INODE, | |
497 | 0); | |
498 | #else | |
499 | alloc_inode = ocfs2_get_system_file_inode(osb, | |
500 | EXTENT_ALLOC_SYSTEM_INODE, | |
501 | osb->slot_num); | |
502 | #endif | |
503 | if (!alloc_inode) { | |
504 | status = -ENOMEM; | |
505 | mlog_errno(status); | |
506 | goto bail; | |
507 | } | |
508 | ||
509 | (*ac)->ac_inode = igrab(alloc_inode); | |
510 | (*ac)->ac_group_search = ocfs2_block_group_search; | |
511 | ||
512 | status = ocfs2_reserve_suballoc_bits(osb, (*ac)); | |
513 | if (status < 0) { | |
514 | if (status != -ENOSPC) | |
515 | mlog_errno(status); | |
516 | goto bail; | |
517 | } | |
518 | ||
519 | status = 0; | |
520 | bail: | |
521 | if ((status < 0) && *ac) { | |
522 | ocfs2_free_alloc_context(*ac); | |
523 | *ac = NULL; | |
524 | } | |
525 | ||
526 | if (alloc_inode) | |
527 | iput(alloc_inode); | |
528 | ||
529 | mlog_exit(status); | |
530 | return status; | |
531 | } | |
532 | ||
533 | int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | |
534 | struct ocfs2_journal_handle *handle, | |
535 | struct ocfs2_alloc_context **ac) | |
536 | { | |
537 | int status; | |
538 | struct inode *alloc_inode = NULL; | |
539 | ||
540 | *ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL); | |
541 | if (!(*ac)) { | |
542 | status = -ENOMEM; | |
543 | mlog_errno(status); | |
544 | goto bail; | |
545 | } | |
546 | ||
547 | (*ac)->ac_bits_wanted = 1; | |
548 | (*ac)->ac_handle = handle; | |
549 | (*ac)->ac_which = OCFS2_AC_USE_INODE; | |
550 | ||
551 | alloc_inode = ocfs2_get_system_file_inode(osb, | |
552 | INODE_ALLOC_SYSTEM_INODE, | |
553 | osb->slot_num); | |
554 | if (!alloc_inode) { | |
555 | status = -ENOMEM; | |
556 | mlog_errno(status); | |
557 | goto bail; | |
558 | } | |
559 | ||
560 | (*ac)->ac_inode = igrab(alloc_inode); | |
561 | (*ac)->ac_group_search = ocfs2_block_group_search; | |
562 | ||
563 | status = ocfs2_reserve_suballoc_bits(osb, *ac); | |
564 | if (status < 0) { | |
565 | if (status != -ENOSPC) | |
566 | mlog_errno(status); | |
567 | goto bail; | |
568 | } | |
569 | ||
570 | status = 0; | |
571 | bail: | |
572 | if ((status < 0) && *ac) { | |
573 | ocfs2_free_alloc_context(*ac); | |
574 | *ac = NULL; | |
575 | } | |
576 | ||
577 | if (alloc_inode) | |
578 | iput(alloc_inode); | |
579 | ||
580 | mlog_exit(status); | |
581 | return status; | |
582 | } | |
583 | ||
584 | /* local alloc code has to do the same thing, so rather than do this | |
585 | * twice.. */ | |
586 | int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, | |
587 | struct ocfs2_alloc_context *ac) | |
588 | { | |
589 | int status; | |
590 | ||
591 | ac->ac_inode = ocfs2_get_system_file_inode(osb, | |
592 | GLOBAL_BITMAP_SYSTEM_INODE, | |
593 | OCFS2_INVALID_SLOT); | |
594 | if (!ac->ac_inode) { | |
595 | status = -EINVAL; | |
596 | mlog(ML_ERROR, "Could not get bitmap inode!\n"); | |
597 | goto bail; | |
598 | } | |
599 | ac->ac_which = OCFS2_AC_USE_MAIN; | |
600 | ac->ac_group_search = ocfs2_cluster_group_search; | |
601 | ||
602 | status = ocfs2_reserve_suballoc_bits(osb, ac); | |
603 | if (status < 0 && status != -ENOSPC) | |
604 | mlog_errno(status); | |
605 | bail: | |
606 | return status; | |
607 | } | |
608 | ||
609 | /* Callers don't need to care which bitmap (local alloc or main) to | |
610 | * use so we figure it out for them, but unfortunately this clutters | |
611 | * things a bit. */ | |
612 | int ocfs2_reserve_clusters(struct ocfs2_super *osb, | |
613 | struct ocfs2_journal_handle *handle, | |
614 | u32 bits_wanted, | |
615 | struct ocfs2_alloc_context **ac) | |
616 | { | |
617 | int status; | |
618 | ||
619 | mlog_entry_void(); | |
620 | ||
621 | BUG_ON(!handle); | |
622 | ||
623 | *ac = kcalloc(1, sizeof(struct ocfs2_alloc_context), GFP_KERNEL); | |
624 | if (!(*ac)) { | |
625 | status = -ENOMEM; | |
626 | mlog_errno(status); | |
627 | goto bail; | |
628 | } | |
629 | ||
630 | (*ac)->ac_bits_wanted = bits_wanted; | |
631 | (*ac)->ac_handle = handle; | |
632 | ||
633 | status = -ENOSPC; | |
634 | if (ocfs2_alloc_should_use_local(osb, bits_wanted)) { | |
635 | status = ocfs2_reserve_local_alloc_bits(osb, | |
636 | handle, | |
637 | bits_wanted, | |
638 | *ac); | |
639 | if ((status < 0) && (status != -ENOSPC)) { | |
640 | mlog_errno(status); | |
641 | goto bail; | |
642 | } else if (status == -ENOSPC) { | |
643 | /* reserve_local_bits will return enospc with | |
644 | * the local alloc inode still locked, so we | |
645 | * can change this safely here. */ | |
646 | mlog(0, "Disabling local alloc\n"); | |
647 | /* We set to OCFS2_LA_DISABLED so that umount | |
648 | * can clean up what's left of the local | |
649 | * allocation */ | |
650 | osb->local_alloc_state = OCFS2_LA_DISABLED; | |
651 | } | |
652 | } | |
653 | ||
654 | if (status == -ENOSPC) { | |
655 | status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); | |
656 | if (status < 0) { | |
657 | if (status != -ENOSPC) | |
658 | mlog_errno(status); | |
659 | goto bail; | |
660 | } | |
661 | } | |
662 | ||
663 | status = 0; | |
664 | bail: | |
665 | if ((status < 0) && *ac) { | |
666 | ocfs2_free_alloc_context(*ac); | |
667 | *ac = NULL; | |
668 | } | |
669 | ||
670 | mlog_exit(status); | |
671 | return status; | |
672 | } | |
673 | ||
674 | /* | |
675 | * More or less lifted from ext3. I'll leave their description below: | |
676 | * | |
677 | * "For ext3 allocations, we must not reuse any blocks which are | |
678 | * allocated in the bitmap buffer's "last committed data" copy. This | |
679 | * prevents deletes from freeing up the page for reuse until we have | |
680 | * committed the delete transaction. | |
681 | * | |
682 | * If we didn't do this, then deleting something and reallocating it as | |
683 | * data would allow the old block to be overwritten before the | |
684 | * transaction committed (because we force data to disk before commit). | |
685 | * This would lead to corruption if we crashed between overwriting the | |
686 | * data and committing the delete. | |
687 | * | |
688 | * @@@ We may want to make this allocation behaviour conditional on | |
689 | * data-writes at some point, and disable it for metadata allocations or | |
690 | * sync-data inodes." | |
691 | * | |
692 | * Note: OCFS2 already does this differently for metadata vs data | |
693 | * allocations, as those bitmaps are seperate and undo access is never | |
694 | * called on a metadata group descriptor. | |
695 | */ | |
696 | static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, | |
697 | int nr) | |
698 | { | |
699 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; | |
700 | ||
701 | if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) | |
702 | return 0; | |
703 | if (!buffer_jbd(bg_bh) || !bh2jh(bg_bh)->b_committed_data) | |
704 | return 1; | |
705 | ||
706 | bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data; | |
707 | return !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); | |
708 | } | |
709 | ||
710 | static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, | |
711 | struct buffer_head *bg_bh, | |
712 | unsigned int bits_wanted, | |
7bf72ede | 713 | unsigned int total_bits, |
ccd979bd MF |
714 | u16 *bit_off, |
715 | u16 *bits_found) | |
716 | { | |
717 | void *bitmap; | |
718 | u16 best_offset, best_size; | |
719 | int offset, start, found, status = 0; | |
720 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; | |
721 | ||
722 | if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { | |
723 | OCFS2_RO_ON_INVALID_GROUP_DESC(osb->sb, bg); | |
724 | return -EIO; | |
725 | } | |
726 | ||
727 | found = start = best_offset = best_size = 0; | |
728 | bitmap = bg->bg_bitmap; | |
729 | ||
7bf72ede MF |
730 | while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) { |
731 | if (offset == total_bits) | |
ccd979bd MF |
732 | break; |
733 | ||
734 | if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) { | |
735 | /* We found a zero, but we can't use it as it | |
736 | * hasn't been put to disk yet! */ | |
737 | found = 0; | |
738 | start = offset + 1; | |
739 | } else if (offset == start) { | |
740 | /* we found a zero */ | |
741 | found++; | |
742 | /* move start to the next bit to test */ | |
743 | start++; | |
744 | } else { | |
745 | /* got a zero after some ones */ | |
746 | found = 1; | |
747 | start = offset + 1; | |
748 | } | |
749 | if (found > best_size) { | |
750 | best_size = found; | |
751 | best_offset = start - found; | |
752 | } | |
753 | /* we got everything we needed */ | |
754 | if (found == bits_wanted) { | |
755 | /* mlog(0, "Found it all!\n"); */ | |
756 | break; | |
757 | } | |
758 | } | |
759 | ||
760 | /* XXX: I think the first clause is equivalent to the second | |
761 | * - jlbec */ | |
762 | if (found == bits_wanted) { | |
763 | *bit_off = start - found; | |
764 | *bits_found = found; | |
765 | } else if (best_size) { | |
766 | *bit_off = best_offset; | |
767 | *bits_found = best_size; | |
768 | } else { | |
769 | status = -ENOSPC; | |
770 | /* No error log here -- see the comment above | |
771 | * ocfs2_test_bg_bit_allocatable */ | |
772 | } | |
773 | ||
774 | return status; | |
775 | } | |
776 | ||
777 | static inline int ocfs2_block_group_set_bits(struct ocfs2_journal_handle *handle, | |
778 | struct inode *alloc_inode, | |
779 | struct ocfs2_group_desc *bg, | |
780 | struct buffer_head *group_bh, | |
781 | unsigned int bit_off, | |
782 | unsigned int num_bits) | |
783 | { | |
784 | int status; | |
785 | void *bitmap = bg->bg_bitmap; | |
786 | int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; | |
787 | ||
788 | mlog_entry_void(); | |
789 | ||
790 | if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { | |
791 | OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); | |
792 | status = -EIO; | |
793 | goto bail; | |
794 | } | |
795 | BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits); | |
796 | ||
797 | mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off, | |
798 | num_bits); | |
799 | ||
800 | if (ocfs2_is_cluster_bitmap(alloc_inode)) | |
801 | journal_type = OCFS2_JOURNAL_ACCESS_UNDO; | |
802 | ||
803 | status = ocfs2_journal_access(handle, | |
804 | alloc_inode, | |
805 | group_bh, | |
806 | journal_type); | |
807 | if (status < 0) { | |
808 | mlog_errno(status); | |
809 | goto bail; | |
810 | } | |
811 | ||
812 | le16_add_cpu(&bg->bg_free_bits_count, -num_bits); | |
813 | ||
814 | while(num_bits--) | |
815 | ocfs2_set_bit(bit_off++, bitmap); | |
816 | ||
817 | status = ocfs2_journal_dirty(handle, | |
818 | group_bh); | |
819 | if (status < 0) { | |
820 | mlog_errno(status); | |
821 | goto bail; | |
822 | } | |
823 | ||
824 | bail: | |
825 | mlog_exit(status); | |
826 | return status; | |
827 | } | |
828 | ||
829 | /* find the one with the most empty bits */ | |
830 | static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl) | |
831 | { | |
832 | u16 curr, best; | |
833 | ||
834 | BUG_ON(!cl->cl_next_free_rec); | |
835 | ||
836 | best = curr = 0; | |
837 | while (curr < le16_to_cpu(cl->cl_next_free_rec)) { | |
838 | if (le32_to_cpu(cl->cl_recs[curr].c_free) > | |
839 | le32_to_cpu(cl->cl_recs[best].c_free)) | |
840 | best = curr; | |
841 | curr++; | |
842 | } | |
843 | ||
844 | BUG_ON(best >= le16_to_cpu(cl->cl_next_free_rec)); | |
845 | return best; | |
846 | } | |
847 | ||
848 | static int ocfs2_relink_block_group(struct ocfs2_journal_handle *handle, | |
849 | struct inode *alloc_inode, | |
850 | struct buffer_head *fe_bh, | |
851 | struct buffer_head *bg_bh, | |
852 | struct buffer_head *prev_bg_bh, | |
853 | u16 chain) | |
854 | { | |
855 | int status; | |
856 | /* there is a really tiny chance the journal calls could fail, | |
857 | * but we wouldn't want inconsistent blocks in *any* case. */ | |
858 | u64 fe_ptr, bg_ptr, prev_bg_ptr; | |
859 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; | |
860 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; | |
861 | struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data; | |
862 | ||
863 | if (!OCFS2_IS_VALID_DINODE(fe)) { | |
864 | OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); | |
865 | status = -EIO; | |
866 | goto out; | |
867 | } | |
868 | if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { | |
869 | OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); | |
870 | status = -EIO; | |
871 | goto out; | |
872 | } | |
873 | if (!OCFS2_IS_VALID_GROUP_DESC(prev_bg)) { | |
874 | OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, prev_bg); | |
875 | status = -EIO; | |
876 | goto out; | |
877 | } | |
878 | ||
b0697053 MF |
879 | mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n", |
880 | (unsigned long long)fe->i_blkno, chain, | |
881 | (unsigned long long)bg->bg_blkno, | |
882 | (unsigned long long)prev_bg->bg_blkno); | |
ccd979bd MF |
883 | |
884 | fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno); | |
885 | bg_ptr = le64_to_cpu(bg->bg_next_group); | |
886 | prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); | |
887 | ||
888 | status = ocfs2_journal_access(handle, alloc_inode, prev_bg_bh, | |
889 | OCFS2_JOURNAL_ACCESS_WRITE); | |
890 | if (status < 0) { | |
891 | mlog_errno(status); | |
892 | goto out_rollback; | |
893 | } | |
894 | ||
895 | prev_bg->bg_next_group = bg->bg_next_group; | |
896 | ||
897 | status = ocfs2_journal_dirty(handle, prev_bg_bh); | |
898 | if (status < 0) { | |
899 | mlog_errno(status); | |
900 | goto out_rollback; | |
901 | } | |
902 | ||
903 | status = ocfs2_journal_access(handle, alloc_inode, bg_bh, | |
904 | OCFS2_JOURNAL_ACCESS_WRITE); | |
905 | if (status < 0) { | |
906 | mlog_errno(status); | |
907 | goto out_rollback; | |
908 | } | |
909 | ||
910 | bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno; | |
911 | ||
912 | status = ocfs2_journal_dirty(handle, bg_bh); | |
913 | if (status < 0) { | |
914 | mlog_errno(status); | |
915 | goto out_rollback; | |
916 | } | |
917 | ||
918 | status = ocfs2_journal_access(handle, alloc_inode, fe_bh, | |
919 | OCFS2_JOURNAL_ACCESS_WRITE); | |
920 | if (status < 0) { | |
921 | mlog_errno(status); | |
922 | goto out_rollback; | |
923 | } | |
924 | ||
925 | fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno; | |
926 | ||
927 | status = ocfs2_journal_dirty(handle, fe_bh); | |
928 | if (status < 0) { | |
929 | mlog_errno(status); | |
930 | goto out_rollback; | |
931 | } | |
932 | ||
933 | status = 0; | |
934 | out_rollback: | |
935 | if (status < 0) { | |
936 | fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr); | |
937 | bg->bg_next_group = cpu_to_le64(bg_ptr); | |
938 | prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr); | |
939 | } | |
940 | out: | |
941 | mlog_exit(status); | |
942 | return status; | |
943 | } | |
944 | ||
945 | static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg, | |
946 | u32 wanted) | |
947 | { | |
948 | return le16_to_cpu(bg->bg_free_bits_count) > wanted; | |
949 | } | |
950 | ||
951 | /* return 0 on success, -ENOSPC to keep searching and any other < 0 | |
952 | * value on error. */ | |
953 | static int ocfs2_cluster_group_search(struct inode *inode, | |
954 | struct buffer_head *group_bh, | |
955 | u32 bits_wanted, u32 min_bits, | |
956 | u16 *bit_off, u16 *bits_found) | |
957 | { | |
958 | int search = -ENOSPC; | |
959 | int ret; | |
7bf72ede | 960 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; |
ccd979bd | 961 | u16 tmp_off, tmp_found; |
7bf72ede | 962 | unsigned int max_bits, gd_cluster_off; |
ccd979bd MF |
963 | |
964 | BUG_ON(!ocfs2_is_cluster_bitmap(inode)); | |
965 | ||
7bf72ede MF |
966 | if (gd->bg_free_bits_count) { |
967 | max_bits = le16_to_cpu(gd->bg_bits); | |
968 | ||
969 | /* Tail groups in cluster bitmaps which aren't cpg | |
970 | * aligned are prone to partial extention by a failed | |
971 | * fs resize. If the file system resize never got to | |
972 | * update the dinode cluster count, then we don't want | |
973 | * to trust any clusters past it, regardless of what | |
974 | * the group descriptor says. */ | |
975 | gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb, | |
976 | le64_to_cpu(gd->bg_blkno)); | |
977 | if ((gd_cluster_off + max_bits) > | |
978 | OCFS2_I(inode)->ip_clusters) { | |
979 | max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off; | |
980 | mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n", | |
981 | (unsigned long long)le64_to_cpu(gd->bg_blkno), | |
982 | le16_to_cpu(gd->bg_bits), | |
983 | OCFS2_I(inode)->ip_clusters, max_bits); | |
984 | } | |
985 | ||
ccd979bd MF |
986 | ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), |
987 | group_bh, bits_wanted, | |
7bf72ede | 988 | max_bits, |
ccd979bd MF |
989 | &tmp_off, &tmp_found); |
990 | if (ret) | |
991 | return ret; | |
992 | ||
993 | /* ocfs2_block_group_find_clear_bits() might | |
994 | * return success, but we still want to return | |
995 | * -ENOSPC unless it found the minimum number | |
996 | * of bits. */ | |
997 | if (min_bits <= tmp_found) { | |
998 | *bit_off = tmp_off; | |
999 | *bits_found = tmp_found; | |
1000 | search = 0; /* success */ | |
1001 | } | |
1002 | } | |
1003 | ||
1004 | return search; | |
1005 | } | |
1006 | ||
1007 | static int ocfs2_block_group_search(struct inode *inode, | |
1008 | struct buffer_head *group_bh, | |
1009 | u32 bits_wanted, u32 min_bits, | |
1010 | u16 *bit_off, u16 *bits_found) | |
1011 | { | |
1012 | int ret = -ENOSPC; | |
1013 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; | |
1014 | ||
1015 | BUG_ON(min_bits != 1); | |
1016 | BUG_ON(ocfs2_is_cluster_bitmap(inode)); | |
1017 | ||
1018 | if (bg->bg_free_bits_count) | |
1019 | ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), | |
1020 | group_bh, bits_wanted, | |
7bf72ede | 1021 | le16_to_cpu(bg->bg_bits), |
ccd979bd MF |
1022 | bit_off, bits_found); |
1023 | ||
1024 | return ret; | |
1025 | } | |
1026 | ||
883d4cae MF |
1027 | static int ocfs2_alloc_dinode_update_counts(struct inode *inode, |
1028 | struct ocfs2_journal_handle *handle, | |
1029 | struct buffer_head *di_bh, | |
1030 | u32 num_bits, | |
1031 | u16 chain) | |
1032 | { | |
1033 | int ret; | |
1034 | u32 tmp_used; | |
1035 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; | |
1036 | struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain; | |
1037 | ||
1038 | ret = ocfs2_journal_access(handle, inode, di_bh, | |
1039 | OCFS2_JOURNAL_ACCESS_WRITE); | |
1040 | if (ret < 0) { | |
1041 | mlog_errno(ret); | |
1042 | goto out; | |
1043 | } | |
1044 | ||
1045 | tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); | |
1046 | di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used); | |
1047 | le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits); | |
1048 | ||
1049 | ret = ocfs2_journal_dirty(handle, di_bh); | |
1050 | if (ret < 0) | |
1051 | mlog_errno(ret); | |
1052 | ||
1053 | out: | |
1054 | return ret; | |
1055 | } | |
1056 | ||
1057 | static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, | |
1058 | u32 bits_wanted, | |
1059 | u32 min_bits, | |
1060 | u16 *bit_off, | |
1061 | unsigned int *num_bits, | |
1062 | u64 gd_blkno, | |
1063 | u16 *bits_left) | |
1064 | { | |
1065 | int ret; | |
1066 | u16 found; | |
1067 | struct buffer_head *group_bh = NULL; | |
1068 | struct ocfs2_group_desc *gd; | |
1069 | struct inode *alloc_inode = ac->ac_inode; | |
1070 | struct ocfs2_journal_handle *handle = ac->ac_handle; | |
1071 | ||
1072 | ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno, | |
1073 | &group_bh, OCFS2_BH_CACHED, alloc_inode); | |
1074 | if (ret < 0) { | |
1075 | mlog_errno(ret); | |
1076 | return ret; | |
1077 | } | |
1078 | ||
1079 | gd = (struct ocfs2_group_desc *) group_bh->b_data; | |
1080 | if (!OCFS2_IS_VALID_GROUP_DESC(gd)) { | |
1081 | OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd); | |
1082 | ret = -EIO; | |
1083 | goto out; | |
1084 | } | |
1085 | ||
1086 | ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, | |
1087 | bit_off, &found); | |
1088 | if (ret < 0) { | |
1089 | if (ret != -ENOSPC) | |
1090 | mlog_errno(ret); | |
1091 | goto out; | |
1092 | } | |
1093 | ||
1094 | *num_bits = found; | |
1095 | ||
1096 | ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, | |
1097 | *num_bits, | |
1098 | le16_to_cpu(gd->bg_chain)); | |
1099 | if (ret < 0) { | |
1100 | mlog_errno(ret); | |
1101 | goto out; | |
1102 | } | |
1103 | ||
1104 | ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh, | |
1105 | *bit_off, *num_bits); | |
1106 | if (ret < 0) | |
1107 | mlog_errno(ret); | |
1108 | ||
1109 | *bits_left = le16_to_cpu(gd->bg_free_bits_count); | |
1110 | ||
1111 | out: | |
1112 | brelse(group_bh); | |
1113 | ||
1114 | return ret; | |
1115 | } | |
1116 | ||
ccd979bd MF |
1117 | static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, |
1118 | u32 bits_wanted, | |
1119 | u32 min_bits, | |
1120 | u16 *bit_off, | |
1121 | unsigned int *num_bits, | |
883d4cae MF |
1122 | u64 *bg_blkno, |
1123 | u16 *bits_left) | |
ccd979bd MF |
1124 | { |
1125 | int status; | |
1126 | u16 chain, tmp_bits; | |
1127 | u32 tmp_used; | |
1128 | u64 next_group; | |
1129 | struct ocfs2_journal_handle *handle = ac->ac_handle; | |
1130 | struct inode *alloc_inode = ac->ac_inode; | |
1131 | struct buffer_head *group_bh = NULL; | |
1132 | struct buffer_head *prev_group_bh = NULL; | |
1133 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) ac->ac_bh->b_data; | |
1134 | struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; | |
1135 | struct ocfs2_group_desc *bg; | |
1136 | ||
1137 | chain = ac->ac_chain; | |
b0697053 MF |
1138 | mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n", |
1139 | bits_wanted, chain, | |
1140 | (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); | |
ccd979bd MF |
1141 | |
1142 | status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), | |
1143 | le64_to_cpu(cl->cl_recs[chain].c_blkno), | |
1144 | &group_bh, OCFS2_BH_CACHED, alloc_inode); | |
1145 | if (status < 0) { | |
1146 | mlog_errno(status); | |
1147 | goto bail; | |
1148 | } | |
1149 | bg = (struct ocfs2_group_desc *) group_bh->b_data; | |
7bf72ede MF |
1150 | status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); |
1151 | if (status) { | |
1152 | mlog_errno(status); | |
ccd979bd MF |
1153 | goto bail; |
1154 | } | |
1155 | ||
1156 | status = -ENOSPC; | |
1157 | /* for now, the chain search is a bit simplistic. We just use | |
1158 | * the 1st group with any empty bits. */ | |
1159 | while ((status = ac->ac_group_search(alloc_inode, group_bh, | |
1160 | bits_wanted, min_bits, bit_off, | |
1161 | &tmp_bits)) == -ENOSPC) { | |
1162 | if (!bg->bg_next_group) | |
1163 | break; | |
1164 | ||
1165 | if (prev_group_bh) { | |
1166 | brelse(prev_group_bh); | |
1167 | prev_group_bh = NULL; | |
1168 | } | |
1169 | next_group = le64_to_cpu(bg->bg_next_group); | |
1170 | prev_group_bh = group_bh; | |
1171 | group_bh = NULL; | |
1172 | status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), | |
1173 | next_group, &group_bh, | |
1174 | OCFS2_BH_CACHED, alloc_inode); | |
1175 | if (status < 0) { | |
1176 | mlog_errno(status); | |
1177 | goto bail; | |
1178 | } | |
1179 | bg = (struct ocfs2_group_desc *) group_bh->b_data; | |
7bf72ede MF |
1180 | status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg); |
1181 | if (status) { | |
1182 | mlog_errno(status); | |
ccd979bd MF |
1183 | goto bail; |
1184 | } | |
1185 | } | |
1186 | if (status < 0) { | |
1187 | if (status != -ENOSPC) | |
1188 | mlog_errno(status); | |
1189 | goto bail; | |
1190 | } | |
1191 | ||
b0697053 MF |
1192 | mlog(0, "alloc succeeds: we give %u bits from block group %llu\n", |
1193 | tmp_bits, (unsigned long long)bg->bg_blkno); | |
ccd979bd MF |
1194 | |
1195 | *num_bits = tmp_bits; | |
1196 | ||
1197 | BUG_ON(*num_bits == 0); | |
1198 | ||
1199 | /* | |
1200 | * Keep track of previous block descriptor read. When | |
1201 | * we find a target, if we have read more than X | |
1202 | * number of descriptors, and the target is reasonably | |
1203 | * empty, relink him to top of his chain. | |
1204 | * | |
1205 | * We've read 0 extra blocks and only send one more to | |
1206 | * the transaction, yet the next guy to search has a | |
1207 | * much easier time. | |
1208 | * | |
1209 | * Do this *after* figuring out how many bits we're taking out | |
1210 | * of our target group. | |
1211 | */ | |
1212 | if (ac->ac_allow_chain_relink && | |
1213 | (prev_group_bh) && | |
1214 | (ocfs2_block_group_reasonably_empty(bg, *num_bits))) { | |
1215 | status = ocfs2_relink_block_group(handle, alloc_inode, | |
1216 | ac->ac_bh, group_bh, | |
1217 | prev_group_bh, chain); | |
1218 | if (status < 0) { | |
1219 | mlog_errno(status); | |
1220 | goto bail; | |
1221 | } | |
1222 | } | |
1223 | ||
1224 | /* Ok, claim our bits now: set the info on dinode, chainlist | |
1225 | * and then the group */ | |
1226 | status = ocfs2_journal_access(handle, | |
1227 | alloc_inode, | |
1228 | ac->ac_bh, | |
1229 | OCFS2_JOURNAL_ACCESS_WRITE); | |
1230 | if (status < 0) { | |
1231 | mlog_errno(status); | |
1232 | goto bail; | |
1233 | } | |
1234 | ||
1235 | tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); | |
1236 | fe->id1.bitmap1.i_used = cpu_to_le32(*num_bits + tmp_used); | |
1237 | le32_add_cpu(&cl->cl_recs[chain].c_free, -(*num_bits)); | |
1238 | ||
1239 | status = ocfs2_journal_dirty(handle, | |
1240 | ac->ac_bh); | |
1241 | if (status < 0) { | |
1242 | mlog_errno(status); | |
1243 | goto bail; | |
1244 | } | |
1245 | ||
1246 | status = ocfs2_block_group_set_bits(handle, | |
1247 | alloc_inode, | |
1248 | bg, | |
1249 | group_bh, | |
1250 | *bit_off, | |
1251 | *num_bits); | |
1252 | if (status < 0) { | |
1253 | mlog_errno(status); | |
1254 | goto bail; | |
1255 | } | |
1256 | ||
b0697053 MF |
1257 | mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits, |
1258 | (unsigned long long)fe->i_blkno); | |
ccd979bd MF |
1259 | |
1260 | *bg_blkno = le64_to_cpu(bg->bg_blkno); | |
883d4cae | 1261 | *bits_left = le16_to_cpu(bg->bg_free_bits_count); |
ccd979bd MF |
1262 | bail: |
1263 | if (group_bh) | |
1264 | brelse(group_bh); | |
1265 | if (prev_group_bh) | |
1266 | brelse(prev_group_bh); | |
1267 | ||
1268 | mlog_exit(status); | |
1269 | return status; | |
1270 | } | |
1271 | ||
1272 | /* will give out up to bits_wanted contiguous bits. */ | |
1273 | static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, | |
1274 | struct ocfs2_alloc_context *ac, | |
1275 | u32 bits_wanted, | |
1276 | u32 min_bits, | |
1277 | u16 *bit_off, | |
1278 | unsigned int *num_bits, | |
1279 | u64 *bg_blkno) | |
1280 | { | |
1281 | int status; | |
1282 | u16 victim, i; | |
883d4cae MF |
1283 | u16 bits_left = 0; |
1284 | u64 hint_blkno = ac->ac_last_group; | |
ccd979bd MF |
1285 | struct ocfs2_chain_list *cl; |
1286 | struct ocfs2_dinode *fe; | |
1287 | ||
1288 | mlog_entry_void(); | |
1289 | ||
1290 | BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); | |
1291 | BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given)); | |
1292 | BUG_ON(!ac->ac_bh); | |
1293 | ||
1294 | fe = (struct ocfs2_dinode *) ac->ac_bh->b_data; | |
1295 | if (!OCFS2_IS_VALID_DINODE(fe)) { | |
1296 | OCFS2_RO_ON_INVALID_DINODE(osb->sb, fe); | |
1297 | status = -EIO; | |
1298 | goto bail; | |
1299 | } | |
1300 | if (le32_to_cpu(fe->id1.bitmap1.i_used) >= | |
1301 | le32_to_cpu(fe->id1.bitmap1.i_total)) { | |
b0697053 MF |
1302 | ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used " |
1303 | "bits but only %u total.", | |
1304 | (unsigned long long)le64_to_cpu(fe->i_blkno), | |
ccd979bd MF |
1305 | le32_to_cpu(fe->id1.bitmap1.i_used), |
1306 | le32_to_cpu(fe->id1.bitmap1.i_total)); | |
1307 | status = -EIO; | |
1308 | goto bail; | |
1309 | } | |
1310 | ||
883d4cae MF |
1311 | if (hint_blkno) { |
1312 | /* Attempt to short-circuit the usual search mechanism | |
1313 | * by jumping straight to the most recently used | |
1314 | * allocation group. This helps us mantain some | |
1315 | * contiguousness across allocations. */ | |
1316 | status = ocfs2_search_one_group(ac, bits_wanted, min_bits, | |
1317 | bit_off, num_bits, | |
1318 | hint_blkno, &bits_left); | |
1319 | if (!status) { | |
1320 | /* Be careful to update *bg_blkno here as the | |
1321 | * caller is expecting it to be filled in, and | |
1322 | * ocfs2_search_one_group() won't do that for | |
1323 | * us. */ | |
1324 | *bg_blkno = hint_blkno; | |
1325 | goto set_hint; | |
1326 | } | |
1327 | if (status < 0 && status != -ENOSPC) { | |
1328 | mlog_errno(status); | |
1329 | goto bail; | |
1330 | } | |
1331 | } | |
1332 | ||
ccd979bd MF |
1333 | cl = (struct ocfs2_chain_list *) &fe->id2.i_chain; |
1334 | ||
1335 | victim = ocfs2_find_victim_chain(cl); | |
1336 | ac->ac_chain = victim; | |
1337 | ac->ac_allow_chain_relink = 1; | |
1338 | ||
1339 | status = ocfs2_search_chain(ac, bits_wanted, min_bits, bit_off, | |
883d4cae | 1340 | num_bits, bg_blkno, &bits_left); |
ccd979bd | 1341 | if (!status) |
883d4cae | 1342 | goto set_hint; |
ccd979bd MF |
1343 | if (status < 0 && status != -ENOSPC) { |
1344 | mlog_errno(status); | |
1345 | goto bail; | |
1346 | } | |
1347 | ||
1348 | mlog(0, "Search of victim chain %u came up with nothing, " | |
1349 | "trying all chains now.\n", victim); | |
1350 | ||
1351 | /* If we didn't pick a good victim, then just default to | |
1352 | * searching each chain in order. Don't allow chain relinking | |
1353 | * because we only calculate enough journal credits for one | |
1354 | * relink per alloc. */ | |
1355 | ac->ac_allow_chain_relink = 0; | |
1356 | for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) { | |
1357 | if (i == victim) | |
1358 | continue; | |
1359 | if (!cl->cl_recs[i].c_free) | |
1360 | continue; | |
1361 | ||
1362 | ac->ac_chain = i; | |
1363 | status = ocfs2_search_chain(ac, bits_wanted, min_bits, | |
883d4cae MF |
1364 | bit_off, num_bits, bg_blkno, |
1365 | &bits_left); | |
ccd979bd MF |
1366 | if (!status) |
1367 | break; | |
1368 | if (status < 0 && status != -ENOSPC) { | |
1369 | mlog_errno(status); | |
1370 | goto bail; | |
1371 | } | |
1372 | } | |
ccd979bd | 1373 | |
883d4cae MF |
1374 | set_hint: |
1375 | if (status != -ENOSPC) { | |
1376 | /* If the next search of this group is not likely to | |
1377 | * yield a suitable extent, then we reset the last | |
1378 | * group hint so as to not waste a disk read */ | |
1379 | if (bits_left < min_bits) | |
1380 | ac->ac_last_group = 0; | |
1381 | else | |
1382 | ac->ac_last_group = *bg_blkno; | |
1383 | } | |
1384 | ||
1385 | bail: | |
ccd979bd MF |
1386 | mlog_exit(status); |
1387 | return status; | |
1388 | } | |
1389 | ||
1390 | int ocfs2_claim_metadata(struct ocfs2_super *osb, | |
1391 | struct ocfs2_journal_handle *handle, | |
1392 | struct ocfs2_alloc_context *ac, | |
1393 | u32 bits_wanted, | |
1394 | u16 *suballoc_bit_start, | |
1395 | unsigned int *num_bits, | |
1396 | u64 *blkno_start) | |
1397 | { | |
1398 | int status; | |
1399 | u64 bg_blkno; | |
1400 | ||
1401 | BUG_ON(!ac); | |
1402 | BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted)); | |
1403 | BUG_ON(ac->ac_which != OCFS2_AC_USE_META); | |
1404 | BUG_ON(ac->ac_handle != handle); | |
1405 | ||
1406 | status = ocfs2_claim_suballoc_bits(osb, | |
1407 | ac, | |
1408 | bits_wanted, | |
1409 | 1, | |
1410 | suballoc_bit_start, | |
1411 | num_bits, | |
1412 | &bg_blkno); | |
1413 | if (status < 0) { | |
1414 | mlog_errno(status); | |
1415 | goto bail; | |
1416 | } | |
1417 | atomic_inc(&osb->alloc_stats.bg_allocs); | |
1418 | ||
1419 | *blkno_start = bg_blkno + (u64) *suballoc_bit_start; | |
1420 | ac->ac_bits_given += (*num_bits); | |
1421 | status = 0; | |
1422 | bail: | |
1423 | mlog_exit(status); | |
1424 | return status; | |
1425 | } | |
1426 | ||
1427 | int ocfs2_claim_new_inode(struct ocfs2_super *osb, | |
1428 | struct ocfs2_journal_handle *handle, | |
1429 | struct ocfs2_alloc_context *ac, | |
1430 | u16 *suballoc_bit, | |
1431 | u64 *fe_blkno) | |
1432 | { | |
1433 | int status; | |
1434 | unsigned int num_bits; | |
1435 | u64 bg_blkno; | |
1436 | ||
1437 | mlog_entry_void(); | |
1438 | ||
1439 | BUG_ON(!ac); | |
1440 | BUG_ON(ac->ac_bits_given != 0); | |
1441 | BUG_ON(ac->ac_bits_wanted != 1); | |
1442 | BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE); | |
1443 | BUG_ON(ac->ac_handle != handle); | |
1444 | ||
1445 | status = ocfs2_claim_suballoc_bits(osb, | |
1446 | ac, | |
1447 | 1, | |
1448 | 1, | |
1449 | suballoc_bit, | |
1450 | &num_bits, | |
1451 | &bg_blkno); | |
1452 | if (status < 0) { | |
1453 | mlog_errno(status); | |
1454 | goto bail; | |
1455 | } | |
1456 | atomic_inc(&osb->alloc_stats.bg_allocs); | |
1457 | ||
1458 | BUG_ON(num_bits != 1); | |
1459 | ||
1460 | *fe_blkno = bg_blkno + (u64) (*suballoc_bit); | |
1461 | ac->ac_bits_given++; | |
1462 | status = 0; | |
1463 | bail: | |
1464 | mlog_exit(status); | |
1465 | return status; | |
1466 | } | |
1467 | ||
1468 | /* translate a group desc. blkno and it's bitmap offset into | |
1469 | * disk cluster offset. */ | |
1470 | static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode, | |
1471 | u64 bg_blkno, | |
1472 | u16 bg_bit_off) | |
1473 | { | |
1474 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | |
1475 | u32 cluster = 0; | |
1476 | ||
1477 | BUG_ON(!ocfs2_is_cluster_bitmap(inode)); | |
1478 | ||
1479 | if (bg_blkno != osb->first_cluster_group_blkno) | |
1480 | cluster = ocfs2_blocks_to_clusters(inode->i_sb, bg_blkno); | |
1481 | cluster += (u32) bg_bit_off; | |
1482 | return cluster; | |
1483 | } | |
1484 | ||
1485 | /* given a cluster offset, calculate which block group it belongs to | |
1486 | * and return that block offset. */ | |
1487 | static inline u64 ocfs2_which_cluster_group(struct inode *inode, | |
1488 | u32 cluster) | |
1489 | { | |
1490 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | |
1491 | u32 group_no; | |
1492 | ||
1493 | BUG_ON(!ocfs2_is_cluster_bitmap(inode)); | |
1494 | ||
1495 | group_no = cluster / osb->bitmap_cpg; | |
1496 | if (!group_no) | |
1497 | return osb->first_cluster_group_blkno; | |
1498 | return ocfs2_clusters_to_blocks(inode->i_sb, | |
1499 | group_no * osb->bitmap_cpg); | |
1500 | } | |
1501 | ||
1502 | /* given the block number of a cluster start, calculate which cluster | |
1503 | * group and descriptor bitmap offset that corresponds to. */ | |
1504 | static inline void ocfs2_block_to_cluster_group(struct inode *inode, | |
1505 | u64 data_blkno, | |
1506 | u64 *bg_blkno, | |
1507 | u16 *bg_bit_off) | |
1508 | { | |
1509 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | |
1510 | u32 data_cluster = ocfs2_blocks_to_clusters(osb->sb, data_blkno); | |
1511 | ||
1512 | BUG_ON(!ocfs2_is_cluster_bitmap(inode)); | |
1513 | ||
1514 | *bg_blkno = ocfs2_which_cluster_group(inode, | |
1515 | data_cluster); | |
1516 | ||
1517 | if (*bg_blkno == osb->first_cluster_group_blkno) | |
1518 | *bg_bit_off = (u16) data_cluster; | |
1519 | else | |
1520 | *bg_bit_off = (u16) ocfs2_blocks_to_clusters(osb->sb, | |
1521 | data_blkno - *bg_blkno); | |
1522 | } | |
1523 | ||
1524 | /* | |
1525 | * min_bits - minimum contiguous chunk from this total allocation we | |
1526 | * can handle. set to what we asked for originally for a full | |
1527 | * contig. allocation, set to '1' to indicate we can deal with extents | |
1528 | * of any size. | |
1529 | */ | |
1530 | int ocfs2_claim_clusters(struct ocfs2_super *osb, | |
1531 | struct ocfs2_journal_handle *handle, | |
1532 | struct ocfs2_alloc_context *ac, | |
1533 | u32 min_clusters, | |
1534 | u32 *cluster_start, | |
1535 | u32 *num_clusters) | |
1536 | { | |
1537 | int status; | |
1538 | unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; | |
883d4cae | 1539 | u64 bg_blkno = 0; |
ccd979bd MF |
1540 | u16 bg_bit_off; |
1541 | ||
1542 | mlog_entry_void(); | |
1543 | ||
1544 | BUG_ON(!ac); | |
1545 | BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); | |
1546 | ||
1547 | BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL | |
1548 | && ac->ac_which != OCFS2_AC_USE_MAIN); | |
1549 | BUG_ON(ac->ac_handle != handle); | |
1550 | ||
1551 | if (ac->ac_which == OCFS2_AC_USE_LOCAL) { | |
1552 | status = ocfs2_claim_local_alloc_bits(osb, | |
1553 | handle, | |
1554 | ac, | |
1555 | bits_wanted, | |
1556 | cluster_start, | |
1557 | num_clusters); | |
1558 | if (!status) | |
1559 | atomic_inc(&osb->alloc_stats.local_data); | |
1560 | } else { | |
1561 | if (min_clusters > (osb->bitmap_cpg - 1)) { | |
1562 | /* The only paths asking for contiguousness | |
1563 | * should know about this already. */ | |
1564 | mlog(ML_ERROR, "minimum allocation requested exceeds " | |
1565 | "group bitmap size!"); | |
1566 | status = -ENOSPC; | |
1567 | goto bail; | |
1568 | } | |
1569 | /* clamp the current request down to a realistic size. */ | |
1570 | if (bits_wanted > (osb->bitmap_cpg - 1)) | |
1571 | bits_wanted = osb->bitmap_cpg - 1; | |
1572 | ||
1573 | status = ocfs2_claim_suballoc_bits(osb, | |
1574 | ac, | |
1575 | bits_wanted, | |
1576 | min_clusters, | |
1577 | &bg_bit_off, | |
1578 | num_clusters, | |
1579 | &bg_blkno); | |
1580 | if (!status) { | |
1581 | *cluster_start = | |
1582 | ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode, | |
1583 | bg_blkno, | |
1584 | bg_bit_off); | |
1585 | atomic_inc(&osb->alloc_stats.bitmap_data); | |
1586 | } | |
1587 | } | |
1588 | if (status < 0) { | |
1589 | if (status != -ENOSPC) | |
1590 | mlog_errno(status); | |
1591 | goto bail; | |
1592 | } | |
1593 | ||
1594 | ac->ac_bits_given += *num_clusters; | |
1595 | ||
1596 | bail: | |
1597 | mlog_exit(status); | |
1598 | return status; | |
1599 | } | |
1600 | ||
1601 | static inline int ocfs2_block_group_clear_bits(struct ocfs2_journal_handle *handle, | |
1602 | struct inode *alloc_inode, | |
1603 | struct ocfs2_group_desc *bg, | |
1604 | struct buffer_head *group_bh, | |
1605 | unsigned int bit_off, | |
1606 | unsigned int num_bits) | |
1607 | { | |
1608 | int status; | |
1609 | unsigned int tmp; | |
1610 | int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; | |
1611 | struct ocfs2_group_desc *undo_bg = NULL; | |
1612 | ||
1613 | mlog_entry_void(); | |
1614 | ||
1615 | if (!OCFS2_IS_VALID_GROUP_DESC(bg)) { | |
1616 | OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg); | |
1617 | status = -EIO; | |
1618 | goto bail; | |
1619 | } | |
1620 | ||
1621 | mlog(0, "off = %u, num = %u\n", bit_off, num_bits); | |
1622 | ||
1623 | if (ocfs2_is_cluster_bitmap(alloc_inode)) | |
1624 | journal_type = OCFS2_JOURNAL_ACCESS_UNDO; | |
1625 | ||
1626 | status = ocfs2_journal_access(handle, alloc_inode, group_bh, | |
1627 | journal_type); | |
1628 | if (status < 0) { | |
1629 | mlog_errno(status); | |
1630 | goto bail; | |
1631 | } | |
1632 | ||
1633 | if (ocfs2_is_cluster_bitmap(alloc_inode)) | |
1634 | undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data; | |
1635 | ||
1636 | tmp = num_bits; | |
1637 | while(tmp--) { | |
1638 | ocfs2_clear_bit((bit_off + tmp), | |
1639 | (unsigned long *) bg->bg_bitmap); | |
1640 | if (ocfs2_is_cluster_bitmap(alloc_inode)) | |
1641 | ocfs2_set_bit(bit_off + tmp, | |
1642 | (unsigned long *) undo_bg->bg_bitmap); | |
1643 | } | |
1644 | le16_add_cpu(&bg->bg_free_bits_count, num_bits); | |
1645 | ||
1646 | status = ocfs2_journal_dirty(handle, group_bh); | |
1647 | if (status < 0) | |
1648 | mlog_errno(status); | |
1649 | bail: | |
1650 | return status; | |
1651 | } | |
1652 | ||
1653 | /* | |
1654 | * expects the suballoc inode to already be locked. | |
1655 | */ | |
1656 | static int ocfs2_free_suballoc_bits(struct ocfs2_journal_handle *handle, | |
1657 | struct inode *alloc_inode, | |
1658 | struct buffer_head *alloc_bh, | |
1659 | unsigned int start_bit, | |
1660 | u64 bg_blkno, | |
1661 | unsigned int count) | |
1662 | { | |
1663 | int status = 0; | |
1664 | u32 tmp_used; | |
1665 | struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb); | |
1666 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data; | |
1667 | struct ocfs2_chain_list *cl = &fe->id2.i_chain; | |
1668 | struct buffer_head *group_bh = NULL; | |
1669 | struct ocfs2_group_desc *group; | |
1670 | ||
1671 | mlog_entry_void(); | |
1672 | ||
1673 | if (!OCFS2_IS_VALID_DINODE(fe)) { | |
1674 | OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe); | |
1675 | status = -EIO; | |
1676 | goto bail; | |
1677 | } | |
1678 | BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl)); | |
1679 | ||
b0697053 MF |
1680 | mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n", |
1681 | (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, | |
1682 | (unsigned long long)bg_blkno, start_bit); | |
ccd979bd MF |
1683 | |
1684 | status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED, | |
1685 | alloc_inode); | |
1686 | if (status < 0) { | |
1687 | mlog_errno(status); | |
1688 | goto bail; | |
1689 | } | |
1690 | ||
1691 | group = (struct ocfs2_group_desc *) group_bh->b_data; | |
7bf72ede MF |
1692 | status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group); |
1693 | if (status) { | |
1694 | mlog_errno(status); | |
ccd979bd MF |
1695 | goto bail; |
1696 | } | |
1697 | BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); | |
1698 | ||
1699 | status = ocfs2_block_group_clear_bits(handle, alloc_inode, | |
1700 | group, group_bh, | |
1701 | start_bit, count); | |
1702 | if (status < 0) { | |
1703 | mlog_errno(status); | |
1704 | goto bail; | |
1705 | } | |
1706 | ||
1707 | status = ocfs2_journal_access(handle, alloc_inode, alloc_bh, | |
1708 | OCFS2_JOURNAL_ACCESS_WRITE); | |
1709 | if (status < 0) { | |
1710 | mlog_errno(status); | |
1711 | goto bail; | |
1712 | } | |
1713 | ||
1714 | le32_add_cpu(&cl->cl_recs[le16_to_cpu(group->bg_chain)].c_free, | |
1715 | count); | |
1716 | tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); | |
1717 | fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count); | |
1718 | ||
1719 | status = ocfs2_journal_dirty(handle, alloc_bh); | |
1720 | if (status < 0) { | |
1721 | mlog_errno(status); | |
1722 | goto bail; | |
1723 | } | |
1724 | ||
1725 | bail: | |
1726 | if (group_bh) | |
1727 | brelse(group_bh); | |
1728 | ||
1729 | mlog_exit(status); | |
1730 | return status; | |
1731 | } | |
1732 | ||
1733 | static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit) | |
1734 | { | |
1735 | u64 group = block - (u64) bit; | |
1736 | ||
1737 | return group; | |
1738 | } | |
1739 | ||
1740 | int ocfs2_free_dinode(struct ocfs2_journal_handle *handle, | |
1741 | struct inode *inode_alloc_inode, | |
1742 | struct buffer_head *inode_alloc_bh, | |
1743 | struct ocfs2_dinode *di) | |
1744 | { | |
1745 | u64 blk = le64_to_cpu(di->i_blkno); | |
1746 | u16 bit = le16_to_cpu(di->i_suballoc_bit); | |
1747 | u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit); | |
1748 | ||
1749 | return ocfs2_free_suballoc_bits(handle, inode_alloc_inode, | |
1750 | inode_alloc_bh, bit, bg_blkno, 1); | |
1751 | } | |
1752 | ||
1753 | int ocfs2_free_extent_block(struct ocfs2_journal_handle *handle, | |
1754 | struct inode *eb_alloc_inode, | |
1755 | struct buffer_head *eb_alloc_bh, | |
1756 | struct ocfs2_extent_block *eb) | |
1757 | { | |
1758 | u64 blk = le64_to_cpu(eb->h_blkno); | |
1759 | u16 bit = le16_to_cpu(eb->h_suballoc_bit); | |
1760 | u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit); | |
1761 | ||
1762 | return ocfs2_free_suballoc_bits(handle, eb_alloc_inode, eb_alloc_bh, | |
1763 | bit, bg_blkno, 1); | |
1764 | } | |
1765 | ||
1766 | int ocfs2_free_clusters(struct ocfs2_journal_handle *handle, | |
1767 | struct inode *bitmap_inode, | |
1768 | struct buffer_head *bitmap_bh, | |
1769 | u64 start_blk, | |
1770 | unsigned int num_clusters) | |
1771 | { | |
1772 | int status; | |
1773 | u16 bg_start_bit; | |
1774 | u64 bg_blkno; | |
1775 | struct ocfs2_dinode *fe; | |
1776 | ||
1777 | /* You can't ever have a contiguous set of clusters | |
1778 | * bigger than a block group bitmap so we never have to worry | |
1779 | * about looping on them. */ | |
1780 | ||
1781 | mlog_entry_void(); | |
1782 | ||
1783 | /* This is expensive. We can safely remove once this stuff has | |
1784 | * gotten tested really well. */ | |
1785 | BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, ocfs2_blocks_to_clusters(bitmap_inode->i_sb, start_blk))); | |
1786 | ||
1787 | fe = (struct ocfs2_dinode *) bitmap_bh->b_data; | |
1788 | ||
1789 | ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno, | |
1790 | &bg_start_bit); | |
1791 | ||
b0697053 MF |
1792 | mlog(0, "want to free %u clusters starting at block %llu\n", |
1793 | num_clusters, (unsigned long long)start_blk); | |
1794 | mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n", | |
1795 | (unsigned long long)bg_blkno, bg_start_bit); | |
ccd979bd MF |
1796 | |
1797 | status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, | |
1798 | bg_start_bit, bg_blkno, | |
1799 | num_clusters); | |
1800 | if (status < 0) | |
1801 | mlog_errno(status); | |
1802 | ||
1803 | mlog_exit(status); | |
1804 | return status; | |
1805 | } | |
1806 | ||
1807 | static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg) | |
1808 | { | |
1809 | printk("Block Group:\n"); | |
1810 | printk("bg_signature: %s\n", bg->bg_signature); | |
1811 | printk("bg_size: %u\n", bg->bg_size); | |
1812 | printk("bg_bits: %u\n", bg->bg_bits); | |
1813 | printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count); | |
1814 | printk("bg_chain: %u\n", bg->bg_chain); | |
1815 | printk("bg_generation: %u\n", le32_to_cpu(bg->bg_generation)); | |
b0697053 MF |
1816 | printk("bg_next_group: %llu\n", |
1817 | (unsigned long long)bg->bg_next_group); | |
1818 | printk("bg_parent_dinode: %llu\n", | |
1819 | (unsigned long long)bg->bg_parent_dinode); | |
1820 | printk("bg_blkno: %llu\n", | |
1821 | (unsigned long long)bg->bg_blkno); | |
ccd979bd MF |
1822 | } |
1823 | ||
1824 | static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe) | |
1825 | { | |
1826 | int i; | |
1827 | ||
b0697053 | 1828 | printk("Suballoc Inode %llu:\n", (unsigned long long)fe->i_blkno); |
ccd979bd | 1829 | printk("i_signature: %s\n", fe->i_signature); |
b0697053 MF |
1830 | printk("i_size: %llu\n", |
1831 | (unsigned long long)fe->i_size); | |
ccd979bd MF |
1832 | printk("i_clusters: %u\n", fe->i_clusters); |
1833 | printk("i_generation: %u\n", | |
1834 | le32_to_cpu(fe->i_generation)); | |
1835 | printk("id1.bitmap1.i_used: %u\n", | |
1836 | le32_to_cpu(fe->id1.bitmap1.i_used)); | |
1837 | printk("id1.bitmap1.i_total: %u\n", | |
1838 | le32_to_cpu(fe->id1.bitmap1.i_total)); | |
1839 | printk("id2.i_chain.cl_cpg: %u\n", fe->id2.i_chain.cl_cpg); | |
1840 | printk("id2.i_chain.cl_bpc: %u\n", fe->id2.i_chain.cl_bpc); | |
1841 | printk("id2.i_chain.cl_count: %u\n", fe->id2.i_chain.cl_count); | |
1842 | printk("id2.i_chain.cl_next_free_rec: %u\n", | |
1843 | fe->id2.i_chain.cl_next_free_rec); | |
1844 | for(i = 0; i < fe->id2.i_chain.cl_next_free_rec; i++) { | |
1845 | printk("fe->id2.i_chain.cl_recs[%d].c_free: %u\n", i, | |
1846 | fe->id2.i_chain.cl_recs[i].c_free); | |
1847 | printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i, | |
1848 | fe->id2.i_chain.cl_recs[i].c_total); | |
b0697053 MF |
1849 | printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i, |
1850 | (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno); | |
ccd979bd MF |
1851 | } |
1852 | } |