]>
Commit | Line | Data |
---|---|---|
5db53f3e JE |
1 | /* |
2 | * fs/logfs/segment.c - Handling the Object Store | |
3 | * | |
4 | * As should be obvious for Linux kernel code, license is GPLv2 | |
5 | * | |
6 | * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org> | |
7 | * | |
8 | * Object store or ostore makes up the complete device with exception of | |
9 | * the superblock and journal areas. Apart from its own metadata it stores | |
10 | * three kinds of objects: inodes, dentries and blocks, both data and indirect. | |
11 | */ | |
12 | #include "logfs.h" | |
5a0e3ad6 | 13 | #include <linux/slab.h> |
5db53f3e JE |
14 | |
15 | static int logfs_mark_segment_bad(struct super_block *sb, u32 segno) | |
16 | { | |
17 | struct logfs_super *super = logfs_super(sb); | |
18 | struct btree_head32 *head = &super->s_reserved_segments; | |
19 | int err; | |
20 | ||
21 | err = btree_insert32(head, segno, (void *)1, GFP_NOFS); | |
22 | if (err) | |
23 | return err; | |
24 | logfs_super(sb)->s_bad_segments++; | |
25 | /* FIXME: write to journal */ | |
26 | return 0; | |
27 | } | |
28 | ||
9421502b | 29 | int logfs_erase_segment(struct super_block *sb, u32 segno, int ensure_erase) |
5db53f3e JE |
30 | { |
31 | struct logfs_super *super = logfs_super(sb); | |
32 | ||
33 | super->s_gec++; | |
34 | ||
35 | return super->s_devops->erase(sb, (u64)segno << super->s_segshift, | |
9421502b | 36 | super->s_segsize, ensure_erase); |
5db53f3e JE |
37 | } |
38 | ||
39 | static s64 logfs_get_free_bytes(struct logfs_area *area, size_t bytes) | |
40 | { | |
41 | s32 ofs; | |
42 | ||
43 | logfs_open_area(area, bytes); | |
44 | ||
45 | ofs = area->a_used_bytes; | |
46 | area->a_used_bytes += bytes; | |
47 | BUG_ON(area->a_used_bytes >= logfs_super(area->a_sb)->s_segsize); | |
48 | ||
49 | return dev_ofs(area->a_sb, area->a_segno, ofs); | |
50 | } | |
51 | ||
52 | static struct page *get_mapping_page(struct super_block *sb, pgoff_t index, | |
53 | int use_filler) | |
54 | { | |
55 | struct logfs_super *super = logfs_super(sb); | |
56 | struct address_space *mapping = super->s_mapping_inode->i_mapping; | |
57 | filler_t *filler = super->s_devops->readpage; | |
58 | struct page *page; | |
59 | ||
c62d2555 | 60 | BUG_ON(mapping_gfp_constraint(mapping, __GFP_FS)); |
5db53f3e JE |
61 | if (use_filler) |
62 | page = read_cache_page(mapping, index, filler, sb); | |
63 | else { | |
64 | page = find_or_create_page(mapping, index, GFP_NOFS); | |
2252b62a YL |
65 | if (page) |
66 | unlock_page(page); | |
5db53f3e JE |
67 | } |
68 | return page; | |
69 | } | |
70 | ||
20503664 | 71 | int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len, |
5db53f3e JE |
72 | int use_filler) |
73 | { | |
74 | pgoff_t index = ofs >> PAGE_SHIFT; | |
75 | struct page *page; | |
76 | long offset = ofs & (PAGE_SIZE-1); | |
77 | long copylen; | |
78 | ||
79 | /* Only logfs_wbuf_recover may use len==0 */ | |
80 | BUG_ON(!len && !use_filler); | |
81 | do { | |
82 | copylen = min((ulong)len, PAGE_SIZE - offset); | |
83 | ||
84 | page = get_mapping_page(area->a_sb, index, use_filler); | |
20503664 JE |
85 | if (IS_ERR(page)) |
86 | return PTR_ERR(page); | |
5db53f3e | 87 | BUG_ON(!page); /* FIXME: reserve a pool */ |
20503664 | 88 | SetPageUptodate(page); |
5db53f3e | 89 | memcpy(page_address(page) + offset, buf, copylen); |
96150606 PJ |
90 | |
91 | if (!PagePrivate(page)) { | |
92 | SetPagePrivate(page); | |
93 | page_cache_get(page); | |
94 | } | |
5db53f3e JE |
95 | page_cache_release(page); |
96 | ||
97 | buf += copylen; | |
98 | len -= copylen; | |
99 | offset = 0; | |
100 | index++; | |
101 | } while (len); | |
20503664 | 102 | return 0; |
5db53f3e JE |
103 | } |
104 | ||
81def6b9 | 105 | static void pad_partial_page(struct logfs_area *area) |
5db53f3e JE |
106 | { |
107 | struct super_block *sb = area->a_sb; | |
5db53f3e JE |
108 | struct page *page; |
109 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); | |
110 | pgoff_t index = ofs >> PAGE_SHIFT; | |
111 | long offset = ofs & (PAGE_SIZE-1); | |
112 | u32 len = PAGE_SIZE - offset; | |
113 | ||
81def6b9 JE |
114 | if (len % PAGE_SIZE) { |
115 | page = get_mapping_page(sb, index, 0); | |
5db53f3e JE |
116 | BUG_ON(!page); /* FIXME: reserve a pool */ |
117 | memset(page_address(page) + offset, 0xff, len); | |
96150606 PJ |
118 | if (!PagePrivate(page)) { |
119 | SetPagePrivate(page); | |
120 | page_cache_get(page); | |
121 | } | |
5db53f3e JE |
122 | page_cache_release(page); |
123 | } | |
81def6b9 | 124 | } |
5db53f3e | 125 | |
81def6b9 JE |
126 | static void pad_full_pages(struct logfs_area *area) |
127 | { | |
128 | struct super_block *sb = area->a_sb; | |
129 | struct logfs_super *super = logfs_super(sb); | |
130 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes); | |
131 | u32 len = super->s_segsize - area->a_used_bytes; | |
132 | pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT; | |
133 | pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT; | |
134 | struct page *page; | |
5db53f3e | 135 | |
81def6b9 JE |
136 | while (no_indizes) { |
137 | page = get_mapping_page(sb, index, 0); | |
5db53f3e | 138 | BUG_ON(!page); /* FIXME: reserve a pool */ |
81def6b9 JE |
139 | SetPageUptodate(page); |
140 | memset(page_address(page), 0xff, PAGE_CACHE_SIZE); | |
96150606 PJ |
141 | if (!PagePrivate(page)) { |
142 | SetPagePrivate(page); | |
143 | page_cache_get(page); | |
144 | } | |
5db53f3e | 145 | page_cache_release(page); |
81def6b9 JE |
146 | index++; |
147 | no_indizes--; | |
5db53f3e JE |
148 | } |
149 | } | |
150 | ||
81def6b9 JE |
151 | /* |
152 | * bdev_writeseg will write full pages. Memset the tail to prevent data leaks. | |
153 | * Also make sure we allocate (and memset) all pages for final writeout. | |
154 | */ | |
155 | static void pad_wbuf(struct logfs_area *area, int final) | |
156 | { | |
157 | pad_partial_page(area); | |
158 | if (final) | |
159 | pad_full_pages(area); | |
160 | } | |
161 | ||
5db53f3e JE |
162 | /* |
163 | * We have to be careful with the alias tree. Since lookup is done by bix, | |
164 | * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with | |
165 | * indirect blocks. So always use it through accessor functions. | |
166 | */ | |
167 | static void *alias_tree_lookup(struct super_block *sb, u64 ino, u64 bix, | |
168 | level_t level) | |
169 | { | |
170 | struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree; | |
171 | pgoff_t index = logfs_pack_index(bix, level); | |
172 | ||
173 | return btree_lookup128(head, ino, index); | |
174 | } | |
175 | ||
176 | static int alias_tree_insert(struct super_block *sb, u64 ino, u64 bix, | |
177 | level_t level, void *val) | |
178 | { | |
179 | struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree; | |
180 | pgoff_t index = logfs_pack_index(bix, level); | |
181 | ||
182 | return btree_insert128(head, ino, index, val, GFP_NOFS); | |
183 | } | |
184 | ||
185 | static int btree_write_alias(struct super_block *sb, struct logfs_block *block, | |
186 | write_alias_t *write_one_alias) | |
187 | { | |
188 | struct object_alias_item *item; | |
189 | int err; | |
190 | ||
191 | list_for_each_entry(item, &block->item_list, list) { | |
192 | err = write_alias_journal(sb, block->ino, block->bix, | |
193 | block->level, item->child_no, item->val); | |
194 | if (err) | |
195 | return err; | |
196 | } | |
197 | return 0; | |
198 | } | |
199 | ||
5db53f3e JE |
200 | static struct logfs_block_ops btree_block_ops = { |
201 | .write_block = btree_write_block, | |
5db53f3e JE |
202 | .free_block = __free_block, |
203 | .write_alias = btree_write_alias, | |
204 | }; | |
205 | ||
206 | int logfs_load_object_aliases(struct super_block *sb, | |
207 | struct logfs_obj_alias *oa, int count) | |
208 | { | |
209 | struct logfs_super *super = logfs_super(sb); | |
210 | struct logfs_block *block; | |
211 | struct object_alias_item *item; | |
212 | u64 ino, bix; | |
213 | level_t level; | |
214 | int i, err; | |
215 | ||
216 | super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS; | |
217 | count /= sizeof(*oa); | |
218 | for (i = 0; i < count; i++) { | |
219 | item = mempool_alloc(super->s_alias_pool, GFP_NOFS); | |
220 | if (!item) | |
221 | return -ENOMEM; | |
222 | memset(item, 0, sizeof(*item)); | |
223 | ||
224 | super->s_no_object_aliases++; | |
225 | item->val = oa[i].val; | |
226 | item->child_no = be16_to_cpu(oa[i].child_no); | |
227 | ||
228 | ino = be64_to_cpu(oa[i].ino); | |
229 | bix = be64_to_cpu(oa[i].bix); | |
230 | level = LEVEL(oa[i].level); | |
231 | ||
232 | log_aliases("logfs_load_object_aliases(%llx, %llx, %x, %x) %llx\n", | |
233 | ino, bix, level, item->child_no, | |
234 | be64_to_cpu(item->val)); | |
235 | block = alias_tree_lookup(sb, ino, bix, level); | |
236 | if (!block) { | |
237 | block = __alloc_block(sb, ino, bix, level); | |
238 | block->ops = &btree_block_ops; | |
239 | err = alias_tree_insert(sb, ino, bix, level, block); | |
240 | BUG_ON(err); /* mempool empty */ | |
241 | } | |
242 | if (test_and_set_bit(item->child_no, block->alias_map)) { | |
243 | printk(KERN_ERR"LogFS: Alias collision detected\n"); | |
244 | return -EIO; | |
245 | } | |
246 | list_move_tail(&block->alias_list, &super->s_object_alias); | |
247 | list_add(&item->list, &block->item_list); | |
248 | } | |
249 | return 0; | |
250 | } | |
251 | ||
252 | static void kill_alias(void *_block, unsigned long ignore0, | |
253 | u64 ignore1, u64 ignore2, size_t ignore3) | |
254 | { | |
255 | struct logfs_block *block = _block; | |
256 | struct super_block *sb = block->sb; | |
257 | struct logfs_super *super = logfs_super(sb); | |
258 | struct object_alias_item *item; | |
259 | ||
260 | while (!list_empty(&block->item_list)) { | |
261 | item = list_entry(block->item_list.next, typeof(*item), list); | |
262 | list_del(&item->list); | |
263 | mempool_free(item, super->s_alias_pool); | |
264 | } | |
265 | block->ops->free_block(sb, block); | |
266 | } | |
267 | ||
268 | static int obj_type(struct inode *inode, level_t level) | |
269 | { | |
270 | if (level == 0) { | |
271 | if (S_ISDIR(inode->i_mode)) | |
272 | return OBJ_DENTRY; | |
273 | if (inode->i_ino == LOGFS_INO_MASTER) | |
274 | return OBJ_INODE; | |
275 | } | |
276 | return OBJ_BLOCK; | |
277 | } | |
278 | ||
279 | static int obj_len(struct super_block *sb, int obj_type) | |
280 | { | |
281 | switch (obj_type) { | |
282 | case OBJ_DENTRY: | |
283 | return sizeof(struct logfs_disk_dentry); | |
284 | case OBJ_INODE: | |
285 | return sizeof(struct logfs_disk_inode); | |
286 | case OBJ_BLOCK: | |
287 | return sb->s_blocksize; | |
288 | default: | |
289 | BUG(); | |
290 | } | |
291 | } | |
292 | ||
293 | static int __logfs_segment_write(struct inode *inode, void *buf, | |
294 | struct logfs_shadow *shadow, int type, int len, int compr) | |
295 | { | |
296 | struct logfs_area *area; | |
297 | struct super_block *sb = inode->i_sb; | |
298 | s64 ofs; | |
299 | struct logfs_object_header h; | |
300 | int acc_len; | |
301 | ||
302 | if (shadow->gc_level == 0) | |
303 | acc_len = len; | |
304 | else | |
305 | acc_len = obj_len(sb, type); | |
306 | ||
307 | area = get_area(sb, shadow->gc_level); | |
308 | ofs = logfs_get_free_bytes(area, len + LOGFS_OBJECT_HEADERSIZE); | |
309 | LOGFS_BUG_ON(ofs <= 0, sb); | |
310 | /* | |
311 | * Order is important. logfs_get_free_bytes(), by modifying the | |
312 | * segment file, may modify the content of the very page we're about | |
313 | * to write now. Which is fine, as long as the calculated crc and | |
314 | * written data still match. So do the modifications _before_ | |
315 | * calculating the crc. | |
316 | */ | |
317 | ||
318 | h.len = cpu_to_be16(len); | |
319 | h.type = type; | |
320 | h.compr = compr; | |
321 | h.ino = cpu_to_be64(inode->i_ino); | |
322 | h.bix = cpu_to_be64(shadow->bix); | |
323 | h.crc = logfs_crc32(&h, sizeof(h) - 4, 4); | |
324 | h.data_crc = logfs_crc32(buf, len, 0); | |
325 | ||
326 | logfs_buf_write(area, ofs, &h, sizeof(h)); | |
327 | logfs_buf_write(area, ofs + LOGFS_OBJECT_HEADERSIZE, buf, len); | |
328 | ||
329 | shadow->new_ofs = ofs; | |
330 | shadow->new_len = acc_len + LOGFS_OBJECT_HEADERSIZE; | |
331 | ||
332 | return 0; | |
333 | } | |
334 | ||
335 | static s64 logfs_segment_write_compress(struct inode *inode, void *buf, | |
336 | struct logfs_shadow *shadow, int type, int len) | |
337 | { | |
338 | struct super_block *sb = inode->i_sb; | |
339 | void *compressor_buf = logfs_super(sb)->s_compressed_je; | |
340 | ssize_t compr_len; | |
341 | int ret; | |
342 | ||
343 | mutex_lock(&logfs_super(sb)->s_journal_mutex); | |
344 | compr_len = logfs_compress(buf, compressor_buf, len, len); | |
345 | ||
346 | if (compr_len >= 0) { | |
347 | ret = __logfs_segment_write(inode, compressor_buf, shadow, | |
348 | type, compr_len, COMPR_ZLIB); | |
349 | } else { | |
350 | ret = __logfs_segment_write(inode, buf, shadow, type, len, | |
351 | COMPR_NONE); | |
352 | } | |
353 | mutex_unlock(&logfs_super(sb)->s_journal_mutex); | |
354 | return ret; | |
355 | } | |
356 | ||
357 | /** | |
358 | * logfs_segment_write - write data block to object store | |
359 | * @inode: inode containing data | |
360 | * | |
361 | * Returns an errno or zero. | |
362 | */ | |
363 | int logfs_segment_write(struct inode *inode, struct page *page, | |
364 | struct logfs_shadow *shadow) | |
365 | { | |
366 | struct super_block *sb = inode->i_sb; | |
367 | struct logfs_super *super = logfs_super(sb); | |
368 | int do_compress, type, len; | |
369 | int ret; | |
370 | void *buf; | |
371 | ||
c6d38301 JE |
372 | super->s_flags |= LOGFS_SB_FLAG_DIRTY; |
373 | BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN); | |
5db53f3e JE |
374 | do_compress = logfs_inode(inode)->li_flags & LOGFS_IF_COMPRESSED; |
375 | if (shadow->gc_level != 0) { | |
376 | /* temporarily disable compression for indirect blocks */ | |
377 | do_compress = 0; | |
378 | } | |
379 | ||
380 | type = obj_type(inode, shrink_level(shadow->gc_level)); | |
381 | len = obj_len(sb, type); | |
382 | buf = kmap(page); | |
383 | if (do_compress) | |
384 | ret = logfs_segment_write_compress(inode, buf, shadow, type, | |
385 | len); | |
386 | else | |
387 | ret = __logfs_segment_write(inode, buf, shadow, type, len, | |
388 | COMPR_NONE); | |
389 | kunmap(page); | |
390 | ||
391 | log_segment("logfs_segment_write(%llx, %llx, %x) %llx->%llx %x->%x\n", | |
392 | shadow->ino, shadow->bix, shadow->gc_level, | |
393 | shadow->old_ofs, shadow->new_ofs, | |
394 | shadow->old_len, shadow->new_len); | |
395 | /* this BUG_ON did catch a locking bug. useful */ | |
396 | BUG_ON(!(shadow->new_ofs & (super->s_segsize - 1))); | |
397 | return ret; | |
398 | } | |
399 | ||
400 | int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf) | |
401 | { | |
402 | pgoff_t index = ofs >> PAGE_SHIFT; | |
403 | struct page *page; | |
404 | long offset = ofs & (PAGE_SIZE-1); | |
405 | long copylen; | |
406 | ||
407 | while (len) { | |
408 | copylen = min((ulong)len, PAGE_SIZE - offset); | |
409 | ||
410 | page = get_mapping_page(sb, index, 1); | |
411 | if (IS_ERR(page)) | |
412 | return PTR_ERR(page); | |
413 | memcpy(buf, page_address(page) + offset, copylen); | |
414 | page_cache_release(page); | |
415 | ||
416 | buf += copylen; | |
417 | len -= copylen; | |
418 | offset = 0; | |
419 | index++; | |
420 | } | |
421 | return 0; | |
422 | } | |
423 | ||
424 | /* | |
425 | * The "position" of indirect blocks is ambiguous. It can be the position | |
426 | * of any data block somewhere behind this indirect block. So we need to | |
427 | * normalize the positions through logfs_block_mask() before comparing. | |
428 | */ | |
429 | static int check_pos(struct super_block *sb, u64 pos1, u64 pos2, level_t level) | |
430 | { | |
431 | return (pos1 & logfs_block_mask(sb, level)) != | |
432 | (pos2 & logfs_block_mask(sb, level)); | |
433 | } | |
434 | ||
435 | #if 0 | |
436 | static int read_seg_header(struct super_block *sb, u64 ofs, | |
437 | struct logfs_segment_header *sh) | |
438 | { | |
439 | __be32 crc; | |
440 | int err; | |
441 | ||
442 | err = wbuf_read(sb, ofs, sizeof(*sh), sh); | |
443 | if (err) | |
444 | return err; | |
445 | crc = logfs_crc32(sh, sizeof(*sh), 4); | |
446 | if (crc != sh->crc) { | |
447 | printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, " | |
448 | "got %x\n", ofs, be32_to_cpu(sh->crc), | |
449 | be32_to_cpu(crc)); | |
450 | return -EIO; | |
451 | } | |
452 | return 0; | |
453 | } | |
454 | #endif | |
455 | ||
456 | static int read_obj_header(struct super_block *sb, u64 ofs, | |
457 | struct logfs_object_header *oh) | |
458 | { | |
459 | __be32 crc; | |
460 | int err; | |
461 | ||
462 | err = wbuf_read(sb, ofs, sizeof(*oh), oh); | |
463 | if (err) | |
464 | return err; | |
465 | crc = logfs_crc32(oh, sizeof(*oh) - 4, 4); | |
466 | if (crc != oh->crc) { | |
467 | printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, " | |
468 | "got %x\n", ofs, be32_to_cpu(oh->crc), | |
469 | be32_to_cpu(crc)); | |
470 | return -EIO; | |
471 | } | |
472 | return 0; | |
473 | } | |
474 | ||
475 | static void move_btree_to_page(struct inode *inode, struct page *page, | |
476 | __be64 *data) | |
477 | { | |
478 | struct super_block *sb = inode->i_sb; | |
479 | struct logfs_super *super = logfs_super(sb); | |
480 | struct btree_head128 *head = &super->s_object_alias_tree; | |
481 | struct logfs_block *block; | |
482 | struct object_alias_item *item, *next; | |
483 | ||
484 | if (!(super->s_flags & LOGFS_SB_FLAG_OBJ_ALIAS)) | |
485 | return; | |
486 | ||
487 | block = btree_remove128(head, inode->i_ino, page->index); | |
488 | if (!block) | |
489 | return; | |
490 | ||
491 | log_blockmove("move_btree_to_page(%llx, %llx, %x)\n", | |
492 | block->ino, block->bix, block->level); | |
493 | list_for_each_entry_safe(item, next, &block->item_list, list) { | |
494 | data[item->child_no] = item->val; | |
495 | list_del(&item->list); | |
496 | mempool_free(item, super->s_alias_pool); | |
497 | } | |
498 | block->page = page; | |
96150606 PJ |
499 | |
500 | if (!PagePrivate(page)) { | |
501 | SetPagePrivate(page); | |
502 | page_cache_get(page); | |
503 | set_page_private(page, (unsigned long) block); | |
504 | } | |
5db53f3e JE |
505 | block->ops = &indirect_block_ops; |
506 | initialize_block_counters(page, block, data, 0); | |
507 | } | |
508 | ||
509 | /* | |
510 | * This silences a false, yet annoying gcc warning. I hate it when my editor | |
511 | * jumps into bitops.h each time I recompile this file. | |
512 | * TODO: Complain to gcc folks about this and upgrade compiler. | |
513 | */ | |
514 | static unsigned long fnb(const unsigned long *addr, | |
515 | unsigned long size, unsigned long offset) | |
516 | { | |
517 | return find_next_bit(addr, size, offset); | |
518 | } | |
519 | ||
520 | void move_page_to_btree(struct page *page) | |
521 | { | |
522 | struct logfs_block *block = logfs_block(page); | |
523 | struct super_block *sb = block->sb; | |
524 | struct logfs_super *super = logfs_super(sb); | |
525 | struct object_alias_item *item; | |
526 | unsigned long pos; | |
527 | __be64 *child; | |
528 | int err; | |
529 | ||
530 | if (super->s_flags & LOGFS_SB_FLAG_SHUTDOWN) { | |
531 | block->ops->free_block(sb, block); | |
532 | return; | |
533 | } | |
534 | log_blockmove("move_page_to_btree(%llx, %llx, %x)\n", | |
535 | block->ino, block->bix, block->level); | |
536 | super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS; | |
537 | ||
538 | for (pos = 0; ; pos++) { | |
539 | pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos); | |
540 | if (pos >= LOGFS_BLOCK_FACTOR) | |
541 | break; | |
542 | ||
543 | item = mempool_alloc(super->s_alias_pool, GFP_NOFS); | |
544 | BUG_ON(!item); /* mempool empty */ | |
545 | memset(item, 0, sizeof(*item)); | |
546 | ||
50bc9b65 | 547 | child = kmap_atomic(page); |
5db53f3e | 548 | item->val = child[pos]; |
50bc9b65 | 549 | kunmap_atomic(child); |
5db53f3e JE |
550 | item->child_no = pos; |
551 | list_add(&item->list, &block->item_list); | |
552 | } | |
553 | block->page = NULL; | |
96150606 PJ |
554 | |
555 | if (PagePrivate(page)) { | |
556 | ClearPagePrivate(page); | |
557 | page_cache_release(page); | |
558 | set_page_private(page, 0); | |
559 | } | |
5db53f3e JE |
560 | block->ops = &btree_block_ops; |
561 | err = alias_tree_insert(block->sb, block->ino, block->bix, block->level, | |
562 | block); | |
563 | BUG_ON(err); /* mempool empty */ | |
564 | ClearPageUptodate(page); | |
565 | } | |
566 | ||
567 | static int __logfs_segment_read(struct inode *inode, void *buf, | |
568 | u64 ofs, u64 bix, level_t level) | |
569 | { | |
570 | struct super_block *sb = inode->i_sb; | |
571 | void *compressor_buf = logfs_super(sb)->s_compressed_je; | |
572 | struct logfs_object_header oh; | |
573 | __be32 crc; | |
574 | u16 len; | |
575 | int err, block_len; | |
576 | ||
577 | block_len = obj_len(sb, obj_type(inode, level)); | |
578 | err = read_obj_header(sb, ofs, &oh); | |
579 | if (err) | |
580 | goto out_err; | |
581 | ||
582 | err = -EIO; | |
583 | if (be64_to_cpu(oh.ino) != inode->i_ino | |
584 | || check_pos(sb, be64_to_cpu(oh.bix), bix, level)) { | |
585 | printk(KERN_ERR"LOGFS: (ino, bix) don't match at %llx: " | |
586 | "expected (%lx, %llx), got (%llx, %llx)\n", | |
587 | ofs, inode->i_ino, bix, | |
588 | be64_to_cpu(oh.ino), be64_to_cpu(oh.bix)); | |
589 | goto out_err; | |
590 | } | |
591 | ||
592 | len = be16_to_cpu(oh.len); | |
593 | ||
594 | switch (oh.compr) { | |
595 | case COMPR_NONE: | |
596 | err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, buf); | |
597 | if (err) | |
598 | goto out_err; | |
599 | crc = logfs_crc32(buf, len, 0); | |
600 | if (crc != oh.data_crc) { | |
601 | printk(KERN_ERR"LOGFS: uncompressed data crc error at " | |
602 | "%llx: expected %x, got %x\n", ofs, | |
603 | be32_to_cpu(oh.data_crc), | |
604 | be32_to_cpu(crc)); | |
605 | goto out_err; | |
606 | } | |
607 | break; | |
608 | case COMPR_ZLIB: | |
609 | mutex_lock(&logfs_super(sb)->s_journal_mutex); | |
610 | err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, | |
611 | compressor_buf); | |
612 | if (err) { | |
613 | mutex_unlock(&logfs_super(sb)->s_journal_mutex); | |
614 | goto out_err; | |
615 | } | |
616 | crc = logfs_crc32(compressor_buf, len, 0); | |
617 | if (crc != oh.data_crc) { | |
618 | printk(KERN_ERR"LOGFS: compressed data crc error at " | |
619 | "%llx: expected %x, got %x\n", ofs, | |
620 | be32_to_cpu(oh.data_crc), | |
621 | be32_to_cpu(crc)); | |
622 | mutex_unlock(&logfs_super(sb)->s_journal_mutex); | |
623 | goto out_err; | |
624 | } | |
625 | err = logfs_uncompress(compressor_buf, buf, len, block_len); | |
626 | mutex_unlock(&logfs_super(sb)->s_journal_mutex); | |
627 | if (err) { | |
628 | printk(KERN_ERR"LOGFS: uncompress error at %llx\n", ofs); | |
629 | goto out_err; | |
630 | } | |
631 | break; | |
632 | default: | |
633 | LOGFS_BUG(sb); | |
634 | err = -EIO; | |
635 | goto out_err; | |
636 | } | |
637 | return 0; | |
638 | ||
639 | out_err: | |
640 | logfs_set_ro(sb); | |
641 | printk(KERN_ERR"LOGFS: device is read-only now\n"); | |
642 | LOGFS_BUG(sb); | |
643 | return err; | |
644 | } | |
645 | ||
646 | /** | |
647 | * logfs_segment_read - read data block from object store | |
648 | * @inode: inode containing data | |
649 | * @buf: data buffer | |
650 | * @ofs: physical data offset | |
651 | * @bix: block index | |
652 | * @level: block level | |
653 | * | |
654 | * Returns 0 on success or a negative errno. | |
655 | */ | |
656 | int logfs_segment_read(struct inode *inode, struct page *page, | |
657 | u64 ofs, u64 bix, level_t level) | |
658 | { | |
659 | int err; | |
660 | void *buf; | |
661 | ||
662 | if (PageUptodate(page)) | |
663 | return 0; | |
664 | ||
665 | ofs &= ~LOGFS_FULLY_POPULATED; | |
666 | ||
667 | buf = kmap(page); | |
668 | err = __logfs_segment_read(inode, buf, ofs, bix, level); | |
669 | if (!err) { | |
670 | move_btree_to_page(inode, page, buf); | |
671 | SetPageUptodate(page); | |
672 | } | |
673 | kunmap(page); | |
674 | log_segment("logfs_segment_read(%lx, %llx, %x) %llx (%d)\n", | |
675 | inode->i_ino, bix, level, ofs, err); | |
676 | return err; | |
677 | } | |
678 | ||
679 | int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow) | |
680 | { | |
681 | struct super_block *sb = inode->i_sb; | |
c6d38301 | 682 | struct logfs_super *super = logfs_super(sb); |
5db53f3e JE |
683 | struct logfs_object_header h; |
684 | u16 len; | |
685 | int err; | |
686 | ||
c6d38301 JE |
687 | super->s_flags |= LOGFS_SB_FLAG_DIRTY; |
688 | BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN); | |
5db53f3e JE |
689 | BUG_ON(shadow->old_ofs & LOGFS_FULLY_POPULATED); |
690 | if (!shadow->old_ofs) | |
691 | return 0; | |
692 | ||
693 | log_segment("logfs_segment_delete(%llx, %llx, %x) %llx->%llx %x->%x\n", | |
694 | shadow->ino, shadow->bix, shadow->gc_level, | |
695 | shadow->old_ofs, shadow->new_ofs, | |
696 | shadow->old_len, shadow->new_len); | |
697 | err = read_obj_header(sb, shadow->old_ofs, &h); | |
698 | LOGFS_BUG_ON(err, sb); | |
699 | LOGFS_BUG_ON(be64_to_cpu(h.ino) != inode->i_ino, sb); | |
700 | LOGFS_BUG_ON(check_pos(sb, shadow->bix, be64_to_cpu(h.bix), | |
701 | shrink_level(shadow->gc_level)), sb); | |
702 | ||
703 | if (shadow->gc_level == 0) | |
704 | len = be16_to_cpu(h.len); | |
705 | else | |
706 | len = obj_len(sb, h.type); | |
707 | shadow->old_len = len + sizeof(h); | |
708 | return 0; | |
709 | } | |
710 | ||
723b2ff4 | 711 | void freeseg(struct super_block *sb, u32 segno) |
5db53f3e JE |
712 | { |
713 | struct logfs_super *super = logfs_super(sb); | |
714 | struct address_space *mapping = super->s_mapping_inode->i_mapping; | |
715 | struct page *page; | |
716 | u64 ofs, start, end; | |
717 | ||
718 | start = dev_ofs(sb, segno, 0); | |
719 | end = dev_ofs(sb, segno + 1, 0); | |
720 | for (ofs = start; ofs < end; ofs += PAGE_SIZE) { | |
721 | page = find_get_page(mapping, ofs >> PAGE_SHIFT); | |
722 | if (!page) | |
723 | continue; | |
96150606 PJ |
724 | if (PagePrivate(page)) { |
725 | ClearPagePrivate(page); | |
726 | page_cache_release(page); | |
727 | } | |
5db53f3e JE |
728 | page_cache_release(page); |
729 | } | |
730 | } | |
731 | ||
732 | int logfs_open_area(struct logfs_area *area, size_t bytes) | |
733 | { | |
734 | struct super_block *sb = area->a_sb; | |
735 | struct logfs_super *super = logfs_super(sb); | |
736 | int err, closed = 0; | |
737 | ||
738 | if (area->a_is_open && area->a_used_bytes + bytes <= super->s_segsize) | |
739 | return 0; | |
740 | ||
741 | if (area->a_is_open) { | |
742 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes); | |
743 | u32 len = super->s_segsize - area->a_written_bytes; | |
744 | ||
745 | log_gc("logfs_close_area(%x)\n", area->a_segno); | |
746 | pad_wbuf(area, 1); | |
747 | super->s_devops->writeseg(area->a_sb, ofs, len); | |
748 | freeseg(sb, area->a_segno); | |
749 | closed = 1; | |
750 | } | |
751 | ||
752 | area->a_used_bytes = 0; | |
753 | area->a_written_bytes = 0; | |
754 | again: | |
755 | area->a_ops->get_free_segment(area); | |
756 | area->a_ops->get_erase_count(area); | |
757 | ||
758 | log_gc("logfs_open_area(%x, %x)\n", area->a_segno, area->a_level); | |
759 | err = area->a_ops->erase_segment(area); | |
760 | if (err) { | |
761 | printk(KERN_WARNING "LogFS: Error erasing segment %x\n", | |
762 | area->a_segno); | |
763 | logfs_mark_segment_bad(sb, area->a_segno); | |
764 | goto again; | |
765 | } | |
766 | area->a_is_open = 1; | |
767 | return closed; | |
768 | } | |
769 | ||
770 | void logfs_sync_area(struct logfs_area *area) | |
771 | { | |
772 | struct super_block *sb = area->a_sb; | |
773 | struct logfs_super *super = logfs_super(sb); | |
774 | u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes); | |
775 | u32 len = (area->a_used_bytes - area->a_written_bytes); | |
776 | ||
777 | if (super->s_writesize) | |
778 | len &= ~(super->s_writesize - 1); | |
779 | if (len == 0) | |
780 | return; | |
781 | pad_wbuf(area, 0); | |
782 | super->s_devops->writeseg(sb, ofs, len); | |
783 | area->a_written_bytes += len; | |
784 | } | |
785 | ||
786 | void logfs_sync_segments(struct super_block *sb) | |
787 | { | |
788 | struct logfs_super *super = logfs_super(sb); | |
789 | int i; | |
790 | ||
791 | for_each_area(i) | |
792 | logfs_sync_area(super->s_area[i]); | |
793 | } | |
794 | ||
795 | /* | |
796 | * Pick a free segment to be used for this area. Effectively takes a | |
797 | * candidate from the free list (not really a candidate anymore). | |
798 | */ | |
799 | static void ostore_get_free_segment(struct logfs_area *area) | |
800 | { | |
801 | struct super_block *sb = area->a_sb; | |
802 | struct logfs_super *super = logfs_super(sb); | |
803 | ||
804 | if (super->s_free_list.count == 0) { | |
805 | printk(KERN_ERR"LOGFS: ran out of free segments\n"); | |
806 | LOGFS_BUG(sb); | |
807 | } | |
808 | ||
809 | area->a_segno = get_best_cand(sb, &super->s_free_list, NULL); | |
810 | } | |
811 | ||
812 | static void ostore_get_erase_count(struct logfs_area *area) | |
813 | { | |
814 | struct logfs_segment_entry se; | |
815 | u32 ec_level; | |
816 | ||
817 | logfs_get_segment_entry(area->a_sb, area->a_segno, &se); | |
818 | BUG_ON(se.ec_level == cpu_to_be32(BADSEG) || | |
819 | se.valid == cpu_to_be32(RESERVED)); | |
820 | ||
821 | ec_level = be32_to_cpu(se.ec_level); | |
822 | area->a_erase_count = (ec_level >> 4) + 1; | |
823 | } | |
824 | ||
825 | static int ostore_erase_segment(struct logfs_area *area) | |
826 | { | |
827 | struct super_block *sb = area->a_sb; | |
828 | struct logfs_segment_header sh; | |
829 | u64 ofs; | |
830 | int err; | |
831 | ||
9421502b | 832 | err = logfs_erase_segment(sb, area->a_segno, 0); |
5db53f3e JE |
833 | if (err) |
834 | return err; | |
835 | ||
836 | sh.pad = 0; | |
837 | sh.type = SEG_OSTORE; | |
838 | sh.level = (__force u8)area->a_level; | |
839 | sh.segno = cpu_to_be32(area->a_segno); | |
840 | sh.ec = cpu_to_be32(area->a_erase_count); | |
841 | sh.gec = cpu_to_be64(logfs_super(sb)->s_gec); | |
842 | sh.crc = logfs_crc32(&sh, sizeof(sh), 4); | |
843 | ||
844 | logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count, | |
845 | area->a_level); | |
846 | ||
847 | ofs = dev_ofs(sb, area->a_segno, 0); | |
848 | area->a_used_bytes = sizeof(sh); | |
849 | logfs_buf_write(area, ofs, &sh, sizeof(sh)); | |
850 | return 0; | |
851 | } | |
852 | ||
853 | static const struct logfs_area_ops ostore_area_ops = { | |
854 | .get_free_segment = ostore_get_free_segment, | |
855 | .get_erase_count = ostore_get_erase_count, | |
856 | .erase_segment = ostore_erase_segment, | |
857 | }; | |
858 | ||
859 | static void free_area(struct logfs_area *area) | |
860 | { | |
861 | if (area) | |
862 | freeseg(area->a_sb, area->a_segno); | |
863 | kfree(area); | |
864 | } | |
865 | ||
1bcceaff JE |
866 | void free_areas(struct super_block *sb) |
867 | { | |
868 | struct logfs_super *super = logfs_super(sb); | |
869 | int i; | |
870 | ||
871 | for_each_area(i) | |
872 | free_area(super->s_area[i]); | |
873 | free_area(super->s_journal_area); | |
874 | } | |
875 | ||
5db53f3e JE |
876 | static struct logfs_area *alloc_area(struct super_block *sb) |
877 | { | |
878 | struct logfs_area *area; | |
879 | ||
880 | area = kzalloc(sizeof(*area), GFP_KERNEL); | |
881 | if (!area) | |
882 | return NULL; | |
883 | ||
884 | area->a_sb = sb; | |
885 | return area; | |
886 | } | |
887 | ||
d47992f8 LC |
888 | static void map_invalidatepage(struct page *page, unsigned int o, |
889 | unsigned int l) | |
5db53f3e | 890 | { |
d2dcd908 | 891 | return; |
5db53f3e JE |
892 | } |
893 | ||
894 | static int map_releasepage(struct page *page, gfp_t g) | |
895 | { | |
896 | /* Don't release these pages */ | |
897 | return 0; | |
898 | } | |
899 | ||
900 | static const struct address_space_operations mapping_aops = { | |
901 | .invalidatepage = map_invalidatepage, | |
902 | .releasepage = map_releasepage, | |
903 | .set_page_dirty = __set_page_dirty_nobuffers, | |
904 | }; | |
905 | ||
906 | int logfs_init_mapping(struct super_block *sb) | |
907 | { | |
908 | struct logfs_super *super = logfs_super(sb); | |
909 | struct address_space *mapping; | |
910 | struct inode *inode; | |
911 | ||
912 | inode = logfs_new_meta_inode(sb, LOGFS_INO_MAPPING); | |
913 | if (IS_ERR(inode)) | |
914 | return PTR_ERR(inode); | |
915 | super->s_mapping_inode = inode; | |
916 | mapping = inode->i_mapping; | |
917 | mapping->a_ops = &mapping_aops; | |
918 | /* Would it be possible to use __GFP_HIGHMEM as well? */ | |
919 | mapping_set_gfp_mask(mapping, GFP_NOFS); | |
920 | return 0; | |
921 | } | |
922 | ||
923 | int logfs_init_areas(struct super_block *sb) | |
924 | { | |
925 | struct logfs_super *super = logfs_super(sb); | |
926 | int i = -1; | |
927 | ||
928 | super->s_alias_pool = mempool_create_kmalloc_pool(600, | |
929 | sizeof(struct object_alias_item)); | |
930 | if (!super->s_alias_pool) | |
931 | return -ENOMEM; | |
932 | ||
933 | super->s_journal_area = alloc_area(sb); | |
934 | if (!super->s_journal_area) | |
935 | goto err; | |
936 | ||
937 | for_each_area(i) { | |
938 | super->s_area[i] = alloc_area(sb); | |
939 | if (!super->s_area[i]) | |
940 | goto err; | |
941 | super->s_area[i]->a_level = GC_LEVEL(i); | |
942 | super->s_area[i]->a_ops = &ostore_area_ops; | |
943 | } | |
944 | btree_init_mempool128(&super->s_object_alias_tree, | |
945 | super->s_btree_pool); | |
946 | return 0; | |
947 | ||
948 | err: | |
949 | for (i--; i >= 0; i--) | |
950 | free_area(super->s_area[i]); | |
951 | free_area(super->s_journal_area); | |
1f1b0008 | 952 | logfs_mempool_destroy(super->s_alias_pool); |
5db53f3e JE |
953 | return -ENOMEM; |
954 | } | |
955 | ||
956 | void logfs_cleanup_areas(struct super_block *sb) | |
957 | { | |
958 | struct logfs_super *super = logfs_super(sb); | |
5db53f3e JE |
959 | |
960 | btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias); | |
5db53f3e | 961 | } |