]>
Commit | Line | Data |
---|---|---|
0bd49f94 RK |
1 | /* |
2 | * page.c - buffer/page management specific to NILFS | |
3 | * | |
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | * | |
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net>, | |
21 | * Seiji Kihara <kihara@osrg.net>. | |
22 | */ | |
23 | ||
24 | #include <linux/pagemap.h> | |
25 | #include <linux/writeback.h> | |
26 | #include <linux/swap.h> | |
27 | #include <linux/bitops.h> | |
28 | #include <linux/page-flags.h> | |
29 | #include <linux/list.h> | |
30 | #include <linux/highmem.h> | |
31 | #include <linux/pagevec.h> | |
32 | #include "nilfs.h" | |
33 | #include "page.h" | |
34 | #include "mdt.h" | |
35 | ||
36 | ||
37 | #define NILFS_BUFFER_INHERENT_BITS \ | |
38 | ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ | |
39 | (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated)) | |
40 | ||
41 | static struct buffer_head * | |
42 | __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, | |
43 | int blkbits, unsigned long b_state) | |
44 | ||
45 | { | |
46 | unsigned long first_block; | |
47 | struct buffer_head *bh; | |
48 | ||
49 | if (!page_has_buffers(page)) | |
50 | create_empty_buffers(page, 1 << blkbits, b_state); | |
51 | ||
52 | first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits); | |
53 | bh = nilfs_page_get_nth_block(page, block - first_block); | |
54 | ||
55 | touch_buffer(bh); | |
56 | wait_on_buffer(bh); | |
57 | return bh; | |
58 | } | |
59 | ||
60 | /* | |
61 | * Since the page cache of B-tree node pages or data page cache of pseudo | |
62 | * inodes does not have a valid mapping->host pointer, calling | |
63 | * mark_buffer_dirty() for their buffers causes a NULL pointer dereference; | |
64 | * it calls __mark_inode_dirty(NULL) through __set_page_dirty(). | |
65 | * To avoid this problem, the old style mark_buffer_dirty() is used instead. | |
66 | */ | |
67 | void nilfs_mark_buffer_dirty(struct buffer_head *bh) | |
68 | { | |
69 | if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) | |
70 | __set_page_dirty_nobuffers(bh->b_page); | |
71 | } | |
72 | ||
73 | struct buffer_head *nilfs_grab_buffer(struct inode *inode, | |
74 | struct address_space *mapping, | |
75 | unsigned long blkoff, | |
76 | unsigned long b_state) | |
77 | { | |
78 | int blkbits = inode->i_blkbits; | |
79 | pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits); | |
80 | struct page *page, *opage; | |
81 | struct buffer_head *bh, *obh; | |
82 | ||
83 | page = grab_cache_page(mapping, index); | |
84 | if (unlikely(!page)) | |
85 | return NULL; | |
86 | ||
87 | bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state); | |
88 | if (unlikely(!bh)) { | |
89 | unlock_page(page); | |
90 | page_cache_release(page); | |
91 | return NULL; | |
92 | } | |
93 | if (!buffer_uptodate(bh) && mapping->assoc_mapping != NULL) { | |
94 | /* | |
95 | * Shadow page cache uses assoc_mapping to point its original | |
96 | * page cache. The following code tries the original cache | |
97 | * if the given cache is a shadow and it didn't hit. | |
98 | */ | |
99 | opage = find_lock_page(mapping->assoc_mapping, index); | |
100 | if (!opage) | |
101 | return bh; | |
102 | ||
103 | obh = __nilfs_get_page_block(opage, blkoff, index, blkbits, | |
104 | b_state); | |
105 | if (buffer_uptodate(obh)) { | |
106 | nilfs_copy_buffer(bh, obh); | |
107 | if (buffer_dirty(obh)) { | |
108 | nilfs_mark_buffer_dirty(bh); | |
109 | if (!buffer_nilfs_node(bh) && NILFS_MDT(inode)) | |
110 | nilfs_mdt_mark_dirty(inode); | |
111 | } | |
112 | } | |
113 | brelse(obh); | |
114 | unlock_page(opage); | |
115 | page_cache_release(opage); | |
116 | } | |
117 | return bh; | |
118 | } | |
119 | ||
120 | /** | |
121 | * nilfs_forget_buffer - discard dirty state | |
122 | * @inode: owner inode of the buffer | |
123 | * @bh: buffer head of the buffer to be discarded | |
124 | */ | |
125 | void nilfs_forget_buffer(struct buffer_head *bh) | |
126 | { | |
127 | struct page *page = bh->b_page; | |
128 | ||
129 | lock_buffer(bh); | |
130 | clear_buffer_nilfs_volatile(bh); | |
131 | if (test_clear_buffer_dirty(bh) && nilfs_page_buffers_clean(page)) | |
132 | __nilfs_clear_page_dirty(page); | |
133 | ||
134 | clear_buffer_uptodate(bh); | |
135 | clear_buffer_mapped(bh); | |
136 | bh->b_blocknr = -1; | |
137 | ClearPageUptodate(page); | |
138 | ClearPageMappedToDisk(page); | |
139 | unlock_buffer(bh); | |
140 | brelse(bh); | |
141 | } | |
142 | ||
143 | /** | |
144 | * nilfs_copy_buffer -- copy buffer data and flags | |
145 | * @dbh: destination buffer | |
146 | * @sbh: source buffer | |
147 | */ | |
148 | void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) | |
149 | { | |
150 | void *kaddr0, *kaddr1; | |
151 | unsigned long bits; | |
152 | struct page *spage = sbh->b_page, *dpage = dbh->b_page; | |
153 | struct buffer_head *bh; | |
154 | ||
155 | kaddr0 = kmap_atomic(spage, KM_USER0); | |
156 | kaddr1 = kmap_atomic(dpage, KM_USER1); | |
157 | memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); | |
158 | kunmap_atomic(kaddr1, KM_USER1); | |
159 | kunmap_atomic(kaddr0, KM_USER0); | |
160 | ||
161 | dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; | |
162 | dbh->b_blocknr = sbh->b_blocknr; | |
163 | dbh->b_bdev = sbh->b_bdev; | |
164 | ||
165 | bh = dbh; | |
166 | bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped)); | |
167 | while ((bh = bh->b_this_page) != dbh) { | |
168 | lock_buffer(bh); | |
169 | bits &= bh->b_state; | |
170 | unlock_buffer(bh); | |
171 | } | |
172 | if (bits & (1UL << BH_Uptodate)) | |
173 | SetPageUptodate(dpage); | |
174 | else | |
175 | ClearPageUptodate(dpage); | |
176 | if (bits & (1UL << BH_Mapped)) | |
177 | SetPageMappedToDisk(dpage); | |
178 | else | |
179 | ClearPageMappedToDisk(dpage); | |
180 | } | |
181 | ||
182 | /** | |
183 | * nilfs_page_buffers_clean - check if a page has dirty buffers or not. | |
184 | * @page: page to be checked | |
185 | * | |
186 | * nilfs_page_buffers_clean() returns zero if the page has dirty buffers. | |
187 | * Otherwise, it returns non-zero value. | |
188 | */ | |
189 | int nilfs_page_buffers_clean(struct page *page) | |
190 | { | |
191 | struct buffer_head *bh, *head; | |
192 | ||
193 | bh = head = page_buffers(page); | |
194 | do { | |
195 | if (buffer_dirty(bh)) | |
196 | return 0; | |
197 | bh = bh->b_this_page; | |
198 | } while (bh != head); | |
199 | return 1; | |
200 | } | |
201 | ||
202 | void nilfs_page_bug(struct page *page) | |
203 | { | |
204 | struct address_space *m; | |
205 | unsigned long ino = 0; | |
206 | ||
207 | if (unlikely(!page)) { | |
208 | printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); | |
209 | return; | |
210 | } | |
211 | ||
212 | m = page->mapping; | |
213 | if (m) { | |
214 | struct inode *inode = NILFS_AS_I(m); | |
215 | if (inode != NULL) | |
216 | ino = inode->i_ino; | |
217 | } | |
218 | printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " | |
219 | "mapping=%p ino=%lu\n", | |
220 | page, atomic_read(&page->_count), | |
221 | (unsigned long long)page->index, page->flags, m, ino); | |
222 | ||
223 | if (page_has_buffers(page)) { | |
224 | struct buffer_head *bh, *head; | |
225 | int i = 0; | |
226 | ||
227 | bh = head = page_buffers(page); | |
228 | do { | |
229 | printk(KERN_CRIT | |
230 | " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n", | |
231 | i++, bh, atomic_read(&bh->b_count), | |
232 | (unsigned long long)bh->b_blocknr, bh->b_state); | |
233 | bh = bh->b_this_page; | |
234 | } while (bh != head); | |
235 | } | |
236 | } | |
237 | ||
238 | /** | |
239 | * nilfs_alloc_private_page - allocate a private page with buffer heads | |
240 | * | |
241 | * Return Value: On success, a pointer to the allocated page is returned. | |
242 | * On error, NULL is returned. | |
243 | */ | |
244 | struct page *nilfs_alloc_private_page(struct block_device *bdev, int size, | |
245 | unsigned long state) | |
246 | { | |
247 | struct buffer_head *bh, *head, *tail; | |
248 | struct page *page; | |
249 | ||
250 | page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */ | |
251 | if (unlikely(!page)) | |
252 | return NULL; | |
253 | ||
254 | lock_page(page); | |
255 | head = alloc_page_buffers(page, size, 0); | |
256 | if (unlikely(!head)) { | |
257 | unlock_page(page); | |
258 | __free_page(page); | |
259 | return NULL; | |
260 | } | |
261 | ||
262 | bh = head; | |
263 | do { | |
264 | bh->b_state = (1UL << BH_NILFS_Allocated) | state; | |
265 | tail = bh; | |
266 | bh->b_bdev = bdev; | |
267 | bh = bh->b_this_page; | |
268 | } while (bh); | |
269 | ||
270 | tail->b_this_page = head; | |
271 | attach_page_buffers(page, head); | |
272 | ||
273 | return page; | |
274 | } | |
275 | ||
276 | void nilfs_free_private_page(struct page *page) | |
277 | { | |
278 | BUG_ON(!PageLocked(page)); | |
279 | BUG_ON(page->mapping); | |
280 | ||
281 | if (page_has_buffers(page) && !try_to_free_buffers(page)) | |
282 | NILFS_PAGE_BUG(page, "failed to free page"); | |
283 | ||
284 | unlock_page(page); | |
285 | __free_page(page); | |
286 | } | |
287 | ||
288 | /** | |
289 | * nilfs_copy_page -- copy the page with buffers | |
290 | * @dst: destination page | |
291 | * @src: source page | |
292 | * @copy_dirty: flag whether to copy dirty states on the page's buffer heads. | |
293 | * | |
294 | * This fuction is for both data pages and btnode pages. The dirty flag | |
295 | * should be treated by caller. The page must not be under i/o. | |
296 | * Both src and dst page must be locked | |
297 | */ | |
298 | static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty) | |
299 | { | |
300 | struct buffer_head *dbh, *dbufs, *sbh, *sbufs; | |
301 | unsigned long mask = NILFS_BUFFER_INHERENT_BITS; | |
302 | ||
303 | BUG_ON(PageWriteback(dst)); | |
304 | ||
305 | sbh = sbufs = page_buffers(src); | |
306 | if (!page_has_buffers(dst)) | |
307 | create_empty_buffers(dst, sbh->b_size, 0); | |
308 | ||
309 | if (copy_dirty) | |
310 | mask |= (1UL << BH_Dirty); | |
311 | ||
312 | dbh = dbufs = page_buffers(dst); | |
313 | do { | |
314 | lock_buffer(sbh); | |
315 | lock_buffer(dbh); | |
316 | dbh->b_state = sbh->b_state & mask; | |
317 | dbh->b_blocknr = sbh->b_blocknr; | |
318 | dbh->b_bdev = sbh->b_bdev; | |
319 | sbh = sbh->b_this_page; | |
320 | dbh = dbh->b_this_page; | |
321 | } while (dbh != dbufs); | |
322 | ||
323 | copy_highpage(dst, src); | |
324 | ||
325 | if (PageUptodate(src) && !PageUptodate(dst)) | |
326 | SetPageUptodate(dst); | |
327 | else if (!PageUptodate(src) && PageUptodate(dst)) | |
328 | ClearPageUptodate(dst); | |
329 | if (PageMappedToDisk(src) && !PageMappedToDisk(dst)) | |
330 | SetPageMappedToDisk(dst); | |
331 | else if (!PageMappedToDisk(src) && PageMappedToDisk(dst)) | |
332 | ClearPageMappedToDisk(dst); | |
333 | ||
334 | do { | |
335 | unlock_buffer(sbh); | |
336 | unlock_buffer(dbh); | |
337 | sbh = sbh->b_this_page; | |
338 | dbh = dbh->b_this_page; | |
339 | } while (dbh != dbufs); | |
340 | } | |
341 | ||
342 | int nilfs_copy_dirty_pages(struct address_space *dmap, | |
343 | struct address_space *smap) | |
344 | { | |
345 | struct pagevec pvec; | |
346 | unsigned int i; | |
347 | pgoff_t index = 0; | |
348 | int err = 0; | |
349 | ||
350 | pagevec_init(&pvec, 0); | |
351 | repeat: | |
352 | if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY, | |
353 | PAGEVEC_SIZE)) | |
354 | return 0; | |
355 | ||
356 | for (i = 0; i < pagevec_count(&pvec); i++) { | |
357 | struct page *page = pvec.pages[i], *dpage; | |
358 | ||
359 | lock_page(page); | |
360 | if (unlikely(!PageDirty(page))) | |
361 | NILFS_PAGE_BUG(page, "inconsistent dirty state"); | |
362 | ||
363 | dpage = grab_cache_page(dmap, page->index); | |
364 | if (unlikely(!dpage)) { | |
365 | /* No empty page is added to the page cache */ | |
366 | err = -ENOMEM; | |
367 | unlock_page(page); | |
368 | break; | |
369 | } | |
370 | if (unlikely(!page_has_buffers(page))) | |
371 | NILFS_PAGE_BUG(page, | |
372 | "found empty page in dat page cache"); | |
373 | ||
374 | nilfs_copy_page(dpage, page, 1); | |
375 | __set_page_dirty_nobuffers(dpage); | |
376 | ||
377 | unlock_page(dpage); | |
378 | page_cache_release(dpage); | |
379 | unlock_page(page); | |
380 | } | |
381 | pagevec_release(&pvec); | |
382 | cond_resched(); | |
383 | ||
384 | if (likely(!err)) | |
385 | goto repeat; | |
386 | return err; | |
387 | } | |
388 | ||
389 | /** | |
390 | * nilfs_copy_back_pages -- copy back pages to orignal cache from shadow cache | |
391 | * @dmap: destination page cache | |
392 | * @smap: source page cache | |
393 | * | |
394 | * No pages must no be added to the cache during this process. | |
395 | * This must be ensured by the caller. | |
396 | */ | |
397 | void nilfs_copy_back_pages(struct address_space *dmap, | |
398 | struct address_space *smap) | |
399 | { | |
400 | struct pagevec pvec; | |
401 | unsigned int i, n; | |
402 | pgoff_t index = 0; | |
403 | int err; | |
404 | ||
405 | pagevec_init(&pvec, 0); | |
406 | repeat: | |
407 | n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE); | |
408 | if (!n) | |
409 | return; | |
410 | index = pvec.pages[n - 1]->index + 1; | |
411 | ||
412 | for (i = 0; i < pagevec_count(&pvec); i++) { | |
413 | struct page *page = pvec.pages[i], *dpage; | |
414 | pgoff_t offset = page->index; | |
415 | ||
416 | lock_page(page); | |
417 | dpage = find_lock_page(dmap, offset); | |
418 | if (dpage) { | |
419 | /* override existing page on the destination cache */ | |
420 | BUG_ON(PageDirty(dpage)); | |
421 | nilfs_copy_page(dpage, page, 0); | |
422 | unlock_page(dpage); | |
423 | page_cache_release(dpage); | |
424 | } else { | |
425 | struct page *page2; | |
426 | ||
427 | /* move the page to the destination cache */ | |
428 | spin_lock_irq(&smap->tree_lock); | |
429 | page2 = radix_tree_delete(&smap->page_tree, offset); | |
430 | if (unlikely(page2 != page)) | |
431 | NILFS_PAGE_BUG(page, "page removal failed " | |
432 | "(offset=%lu, page2=%p)", | |
433 | offset, page2); | |
434 | smap->nrpages--; | |
435 | spin_unlock_irq(&smap->tree_lock); | |
436 | ||
437 | spin_lock_irq(&dmap->tree_lock); | |
438 | err = radix_tree_insert(&dmap->page_tree, offset, page); | |
439 | if (unlikely(err < 0)) { | |
440 | BUG_ON(err == -EEXIST); | |
441 | page->mapping = NULL; | |
442 | page_cache_release(page); /* for cache */ | |
443 | } else { | |
444 | page->mapping = dmap; | |
445 | dmap->nrpages++; | |
446 | if (PageDirty(page)) | |
447 | radix_tree_tag_set(&dmap->page_tree, | |
448 | offset, | |
449 | PAGECACHE_TAG_DIRTY); | |
450 | } | |
451 | spin_unlock_irq(&dmap->tree_lock); | |
452 | } | |
453 | unlock_page(page); | |
454 | } | |
455 | pagevec_release(&pvec); | |
456 | cond_resched(); | |
457 | ||
458 | goto repeat; | |
459 | } | |
460 | ||
461 | void nilfs_clear_dirty_pages(struct address_space *mapping) | |
462 | { | |
463 | struct pagevec pvec; | |
464 | unsigned int i; | |
465 | pgoff_t index = 0; | |
466 | ||
467 | pagevec_init(&pvec, 0); | |
468 | ||
469 | while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, | |
470 | PAGEVEC_SIZE)) { | |
471 | for (i = 0; i < pagevec_count(&pvec); i++) { | |
472 | struct page *page = pvec.pages[i]; | |
473 | struct buffer_head *bh, *head; | |
474 | ||
475 | lock_page(page); | |
476 | ClearPageUptodate(page); | |
477 | ClearPageMappedToDisk(page); | |
478 | bh = head = page_buffers(page); | |
479 | do { | |
480 | lock_buffer(bh); | |
481 | clear_buffer_dirty(bh); | |
482 | clear_buffer_nilfs_volatile(bh); | |
483 | clear_buffer_uptodate(bh); | |
484 | clear_buffer_mapped(bh); | |
485 | unlock_buffer(bh); | |
486 | bh = bh->b_this_page; | |
487 | } while (bh != head); | |
488 | ||
489 | __nilfs_clear_page_dirty(page); | |
490 | unlock_page(page); | |
491 | } | |
492 | pagevec_release(&pvec); | |
493 | cond_resched(); | |
494 | } | |
495 | } | |
496 | ||
497 | unsigned nilfs_page_count_clean_buffers(struct page *page, | |
498 | unsigned from, unsigned to) | |
499 | { | |
500 | unsigned block_start, block_end; | |
501 | struct buffer_head *bh, *head; | |
502 | unsigned nc = 0; | |
503 | ||
504 | for (bh = head = page_buffers(page), block_start = 0; | |
505 | bh != head || !block_start; | |
506 | block_start = block_end, bh = bh->b_this_page) { | |
507 | block_end = block_start + bh->b_size; | |
508 | if (block_end > from && block_start < to && !buffer_dirty(bh)) | |
509 | nc++; | |
510 | } | |
511 | return nc; | |
512 | } | |
513 | ||
514 | /* | |
515 | * NILFS2 needs clear_page_dirty() in the following two cases: | |
516 | * | |
517 | * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears | |
518 | * page dirty flags when it copies back pages from the shadow cache | |
519 | * (gcdat->{i_mapping,i_btnode_cache}) to its original cache | |
520 | * (dat->{i_mapping,i_btnode_cache}). | |
521 | * | |
522 | * 2) Some B-tree operations like insertion or deletion may dispose buffers | |
523 | * in dirty state, and this needs to cancel the dirty state of their pages. | |
524 | */ | |
525 | int __nilfs_clear_page_dirty(struct page *page) | |
526 | { | |
527 | struct address_space *mapping = page->mapping; | |
528 | ||
529 | if (mapping) { | |
530 | spin_lock_irq(&mapping->tree_lock); | |
531 | if (test_bit(PG_dirty, &page->flags)) { | |
532 | radix_tree_tag_clear(&mapping->page_tree, | |
533 | page_index(page), | |
534 | PAGECACHE_TAG_DIRTY); | |
535 | spin_unlock_irq(&mapping->tree_lock); | |
536 | return clear_page_dirty_for_io(page); | |
537 | } | |
538 | spin_unlock_irq(&mapping->tree_lock); | |
539 | return 0; | |
540 | } | |
541 | return TestClearPageDirty(page); | |
542 | } |