]>
Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
18 | * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf | |
19 | * | |
20 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
21 | * CA 95054 USA or visit www.sun.com if you need additional information or | |
22 | * have any questions. | |
23 | * | |
24 | * GPL HEADER END | |
25 | */ | |
26 | /* | |
27 | * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. | |
28 | * Use is subject to license terms. | |
29 | * | |
30 | * Copyright (c) 2011, 2012, Intel Corporation. | |
31 | */ | |
32 | /* | |
33 | * This file is part of Lustre, http://www.lustre.org/ | |
34 | * Lustre is a trademark of Sun Microsystems, Inc. | |
35 | * | |
36 | * Implementation of cl_page for VVP layer. | |
37 | * | |
38 | * Author: Nikita Danilov <nikita.danilov@sun.com> | |
39 | * Author: Jinshan Xiong <jinshan.xiong@whamcloud.com> | |
40 | */ | |
41 | ||
42 | #define DEBUG_SUBSYSTEM S_LLITE | |
43 | ||
44 | ||
67a235f5 GKH |
45 | #include "../include/obd.h" |
46 | #include "../include/lustre_lite.h" | |
d7e09d03 PT |
47 | |
48 | #include "vvp_internal.h" | |
49 | ||
50 | /***************************************************************************** | |
51 | * | |
52 | * Page operations. | |
53 | * | |
54 | */ | |
55 | ||
56 | static void vvp_page_fini_common(struct ccc_page *cp) | |
57 | { | |
58 | struct page *vmpage = cp->cpg_page; | |
59 | ||
60 | LASSERT(vmpage != NULL); | |
61 | page_cache_release(vmpage); | |
62 | } | |
63 | ||
64 | static void vvp_page_fini(const struct lu_env *env, | |
65 | struct cl_page_slice *slice) | |
66 | { | |
67 | struct ccc_page *cp = cl2ccc_page(slice); | |
68 | struct page *vmpage = cp->cpg_page; | |
69 | ||
70 | /* | |
71 | * vmpage->private was already cleared when page was moved into | |
72 | * VPG_FREEING state. | |
73 | */ | |
74 | LASSERT((struct cl_page *)vmpage->private != slice->cpl_page); | |
75 | vvp_page_fini_common(cp); | |
76 | } | |
77 | ||
78 | static int vvp_page_own(const struct lu_env *env, | |
79 | const struct cl_page_slice *slice, struct cl_io *io, | |
80 | int nonblock) | |
81 | { | |
82 | struct ccc_page *vpg = cl2ccc_page(slice); | |
83 | struct page *vmpage = vpg->cpg_page; | |
84 | ||
85 | LASSERT(vmpage != NULL); | |
86 | if (nonblock) { | |
87 | if (!trylock_page(vmpage)) | |
88 | return -EAGAIN; | |
89 | ||
90 | if (unlikely(PageWriteback(vmpage))) { | |
91 | unlock_page(vmpage); | |
92 | return -EAGAIN; | |
93 | } | |
94 | ||
95 | return 0; | |
96 | } | |
97 | ||
98 | lock_page(vmpage); | |
99 | wait_on_page_writeback(vmpage); | |
100 | return 0; | |
101 | } | |
102 | ||
103 | static void vvp_page_assume(const struct lu_env *env, | |
104 | const struct cl_page_slice *slice, | |
105 | struct cl_io *unused) | |
106 | { | |
107 | struct page *vmpage = cl2vm_page(slice); | |
108 | ||
109 | LASSERT(vmpage != NULL); | |
110 | LASSERT(PageLocked(vmpage)); | |
111 | wait_on_page_writeback(vmpage); | |
112 | } | |
113 | ||
114 | static void vvp_page_unassume(const struct lu_env *env, | |
115 | const struct cl_page_slice *slice, | |
116 | struct cl_io *unused) | |
117 | { | |
118 | struct page *vmpage = cl2vm_page(slice); | |
119 | ||
120 | LASSERT(vmpage != NULL); | |
121 | LASSERT(PageLocked(vmpage)); | |
122 | } | |
123 | ||
124 | static void vvp_page_disown(const struct lu_env *env, | |
125 | const struct cl_page_slice *slice, struct cl_io *io) | |
126 | { | |
127 | struct page *vmpage = cl2vm_page(slice); | |
128 | ||
129 | LASSERT(vmpage != NULL); | |
130 | LASSERT(PageLocked(vmpage)); | |
131 | ||
132 | unlock_page(cl2vm_page(slice)); | |
133 | } | |
134 | ||
135 | static void vvp_page_discard(const struct lu_env *env, | |
136 | const struct cl_page_slice *slice, | |
137 | struct cl_io *unused) | |
138 | { | |
139 | struct page *vmpage = cl2vm_page(slice); | |
140 | struct address_space *mapping; | |
141 | struct ccc_page *cpg = cl2ccc_page(slice); | |
142 | ||
143 | LASSERT(vmpage != NULL); | |
144 | LASSERT(PageLocked(vmpage)); | |
145 | ||
146 | mapping = vmpage->mapping; | |
147 | ||
148 | if (cpg->cpg_defer_uptodate && !cpg->cpg_ra_used) | |
149 | ll_ra_stats_inc(mapping, RA_STAT_DISCARDED); | |
150 | ||
151 | /* | |
152 | * truncate_complete_page() calls | |
153 | * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete(). | |
154 | */ | |
155 | truncate_complete_page(mapping, vmpage); | |
156 | } | |
157 | ||
158 | static int vvp_page_unmap(const struct lu_env *env, | |
159 | const struct cl_page_slice *slice, | |
160 | struct cl_io *unused) | |
161 | { | |
162 | struct page *vmpage = cl2vm_page(slice); | |
163 | __u64 offset; | |
164 | ||
165 | LASSERT(vmpage != NULL); | |
166 | LASSERT(PageLocked(vmpage)); | |
167 | ||
168 | offset = vmpage->index << PAGE_CACHE_SHIFT; | |
169 | ||
170 | /* | |
171 | * XXX is it safe to call this with the page lock held? | |
172 | */ | |
173 | ll_teardown_mmaps(vmpage->mapping, offset, offset + PAGE_CACHE_SIZE); | |
174 | return 0; | |
175 | } | |
176 | ||
177 | static void vvp_page_delete(const struct lu_env *env, | |
178 | const struct cl_page_slice *slice) | |
179 | { | |
180 | struct page *vmpage = cl2vm_page(slice); | |
181 | struct inode *inode = vmpage->mapping->host; | |
182 | struct cl_object *obj = slice->cpl_obj; | |
183 | ||
184 | LASSERT(PageLocked(vmpage)); | |
185 | LASSERT((struct cl_page *)vmpage->private == slice->cpl_page); | |
186 | LASSERT(inode == ccc_object_inode(obj)); | |
187 | ||
188 | vvp_write_complete(cl2ccc(obj), cl2ccc_page(slice)); | |
189 | ClearPagePrivate(vmpage); | |
190 | vmpage->private = 0; | |
191 | /* | |
192 | * Reference from vmpage to cl_page is removed, but the reference back | |
193 | * is still here. It is removed later in vvp_page_fini(). | |
194 | */ | |
195 | } | |
196 | ||
197 | static void vvp_page_export(const struct lu_env *env, | |
198 | const struct cl_page_slice *slice, | |
199 | int uptodate) | |
200 | { | |
201 | struct page *vmpage = cl2vm_page(slice); | |
202 | ||
203 | LASSERT(vmpage != NULL); | |
204 | LASSERT(PageLocked(vmpage)); | |
205 | if (uptodate) | |
206 | SetPageUptodate(vmpage); | |
207 | else | |
208 | ClearPageUptodate(vmpage); | |
209 | } | |
210 | ||
211 | static int vvp_page_is_vmlocked(const struct lu_env *env, | |
212 | const struct cl_page_slice *slice) | |
213 | { | |
214 | return PageLocked(cl2vm_page(slice)) ? -EBUSY : -ENODATA; | |
215 | } | |
216 | ||
217 | static int vvp_page_prep_read(const struct lu_env *env, | |
218 | const struct cl_page_slice *slice, | |
219 | struct cl_io *unused) | |
220 | { | |
d7e09d03 | 221 | /* Skip the page already marked as PG_uptodate. */ |
0a3bdb00 | 222 | return PageUptodate(cl2vm_page(slice)) ? -EALREADY : 0; |
d7e09d03 PT |
223 | } |
224 | ||
225 | static int vvp_page_prep_write(const struct lu_env *env, | |
226 | const struct cl_page_slice *slice, | |
227 | struct cl_io *unused) | |
228 | { | |
229 | struct page *vmpage = cl2vm_page(slice); | |
230 | ||
231 | LASSERT(PageLocked(vmpage)); | |
232 | LASSERT(!PageDirty(vmpage)); | |
233 | ||
234 | set_page_writeback(vmpage); | |
235 | vvp_write_pending(cl2ccc(slice->cpl_obj), cl2ccc_page(slice)); | |
236 | ||
237 | return 0; | |
238 | } | |
239 | ||
240 | /** | |
241 | * Handles page transfer errors at VM level. | |
242 | * | |
243 | * This takes inode as a separate argument, because inode on which error is to | |
244 | * be set can be different from \a vmpage inode in case of direct-io. | |
245 | */ | |
246 | static void vvp_vmpage_error(struct inode *inode, struct page *vmpage, int ioret) | |
247 | { | |
248 | struct ccc_object *obj = cl_inode2ccc(inode); | |
249 | ||
250 | if (ioret == 0) { | |
251 | ClearPageError(vmpage); | |
252 | obj->cob_discard_page_warned = 0; | |
253 | } else { | |
254 | SetPageError(vmpage); | |
255 | if (ioret == -ENOSPC) | |
256 | set_bit(AS_ENOSPC, &inode->i_mapping->flags); | |
257 | else | |
258 | set_bit(AS_EIO, &inode->i_mapping->flags); | |
259 | ||
260 | if ((ioret == -ESHUTDOWN || ioret == -EINTR) && | |
261 | obj->cob_discard_page_warned == 0) { | |
262 | obj->cob_discard_page_warned = 1; | |
263 | ll_dirty_page_discard_warn(vmpage, ioret); | |
264 | } | |
265 | } | |
266 | } | |
267 | ||
268 | static void vvp_page_completion_read(const struct lu_env *env, | |
269 | const struct cl_page_slice *slice, | |
270 | int ioret) | |
271 | { | |
272 | struct ccc_page *cp = cl2ccc_page(slice); | |
273 | struct page *vmpage = cp->cpg_page; | |
274 | struct cl_page *page = cl_page_top(slice->cpl_page); | |
275 | struct inode *inode = ccc_object_inode(page->cp_obj); | |
d7e09d03 PT |
276 | |
277 | LASSERT(PageLocked(vmpage)); | |
278 | CL_PAGE_HEADER(D_PAGE, env, page, "completing READ with %d\n", ioret); | |
279 | ||
280 | if (cp->cpg_defer_uptodate) | |
281 | ll_ra_count_put(ll_i2sbi(inode), 1); | |
282 | ||
283 | if (ioret == 0) { | |
284 | if (!cp->cpg_defer_uptodate) | |
285 | cl_page_export(env, page, 1); | |
286 | } else | |
287 | cp->cpg_defer_uptodate = 0; | |
288 | ||
289 | if (page->cp_sync_io == NULL) | |
290 | unlock_page(vmpage); | |
d7e09d03 PT |
291 | } |
292 | ||
293 | static void vvp_page_completion_write(const struct lu_env *env, | |
294 | const struct cl_page_slice *slice, | |
295 | int ioret) | |
296 | { | |
297 | struct ccc_page *cp = cl2ccc_page(slice); | |
298 | struct cl_page *pg = slice->cpl_page; | |
299 | struct page *vmpage = cp->cpg_page; | |
d7e09d03 PT |
300 | |
301 | LASSERT(ergo(pg->cp_sync_io != NULL, PageLocked(vmpage))); | |
302 | LASSERT(PageWriteback(vmpage)); | |
303 | ||
304 | CL_PAGE_HEADER(D_PAGE, env, pg, "completing WRITE with %d\n", ioret); | |
305 | ||
306 | /* | |
307 | * TODO: Actually it makes sense to add the page into oap pending | |
308 | * list again and so that we don't need to take the page out from | |
309 | * SoM write pending list, if we just meet a recoverable error, | |
310 | * -ENOMEM, etc. | |
311 | * To implement this, we just need to return a non zero value in | |
312 | * ->cpo_completion method. The underlying transfer should be notified | |
313 | * and then re-add the page into pending transfer queue. -jay | |
314 | */ | |
315 | ||
316 | cp->cpg_write_queued = 0; | |
317 | vvp_write_complete(cl2ccc(slice->cpl_obj), cp); | |
318 | ||
319 | /* | |
320 | * Only mark the page error only when it's an async write because | |
321 | * applications won't wait for IO to finish. | |
322 | */ | |
323 | if (pg->cp_sync_io == NULL) | |
324 | vvp_vmpage_error(ccc_object_inode(pg->cp_obj), vmpage, ioret); | |
325 | ||
326 | end_page_writeback(vmpage); | |
d7e09d03 PT |
327 | } |
328 | ||
329 | /** | |
330 | * Implements cl_page_operations::cpo_make_ready() method. | |
331 | * | |
332 | * This is called to yank a page from the transfer cache and to send it out as | |
333 | * a part of transfer. This function try-locks the page. If try-lock failed, | |
334 | * page is owned by some concurrent IO, and should be skipped (this is bad, | |
335 | * but hopefully rare situation, as it usually results in transfer being | |
336 | * shorter than possible). | |
337 | * | |
338 | * \retval 0 success, page can be placed into transfer | |
339 | * | |
340 | * \retval -EAGAIN page is either used by concurrent IO has been | |
341 | * truncated. Skip it. | |
342 | */ | |
343 | static int vvp_page_make_ready(const struct lu_env *env, | |
344 | const struct cl_page_slice *slice) | |
345 | { | |
346 | struct page *vmpage = cl2vm_page(slice); | |
347 | struct cl_page *pg = slice->cpl_page; | |
348 | int result = 0; | |
349 | ||
350 | lock_page(vmpage); | |
351 | if (clear_page_dirty_for_io(vmpage)) { | |
352 | LASSERT(pg->cp_state == CPS_CACHED); | |
353 | /* This actually clears the dirty bit in the radix | |
354 | * tree. */ | |
355 | set_page_writeback(vmpage); | |
356 | vvp_write_pending(cl2ccc(slice->cpl_obj), | |
357 | cl2ccc_page(slice)); | |
358 | CL_PAGE_HEADER(D_PAGE, env, pg, "readied\n"); | |
359 | } else if (pg->cp_state == CPS_PAGEOUT) { | |
360 | /* is it possible for osc_flush_async_page() to already | |
361 | * make it ready? */ | |
362 | result = -EALREADY; | |
363 | } else { | |
364 | CL_PAGE_DEBUG(D_ERROR, env, pg, "Unexpecting page state %d.\n", | |
365 | pg->cp_state); | |
366 | LBUG(); | |
367 | } | |
368 | unlock_page(vmpage); | |
0a3bdb00 | 369 | return result; |
d7e09d03 PT |
370 | } |
371 | ||
372 | static int vvp_page_print(const struct lu_env *env, | |
373 | const struct cl_page_slice *slice, | |
374 | void *cookie, lu_printer_t printer) | |
375 | { | |
376 | struct ccc_page *vp = cl2ccc_page(slice); | |
377 | struct page *vmpage = vp->cpg_page; | |
378 | ||
2d00bd17 | 379 | (*printer)(env, cookie, LUSTRE_VVP_NAME "-page@%p(%d:%d:%d) vm@%p ", |
d7e09d03 PT |
380 | vp, vp->cpg_defer_uptodate, vp->cpg_ra_used, |
381 | vp->cpg_write_queued, vmpage); | |
382 | if (vmpage != NULL) { | |
383 | (*printer)(env, cookie, "%lx %d:%d %lx %lu %slru", | |
384 | (long)vmpage->flags, page_count(vmpage), | |
385 | page_mapcount(vmpage), vmpage->private, | |
386 | page_index(vmpage), | |
387 | list_empty(&vmpage->lru) ? "not-" : ""); | |
388 | } | |
389 | (*printer)(env, cookie, "\n"); | |
390 | return 0; | |
391 | } | |
392 | ||
393 | static const struct cl_page_operations vvp_page_ops = { | |
394 | .cpo_own = vvp_page_own, | |
395 | .cpo_assume = vvp_page_assume, | |
396 | .cpo_unassume = vvp_page_unassume, | |
397 | .cpo_disown = vvp_page_disown, | |
398 | .cpo_vmpage = ccc_page_vmpage, | |
399 | .cpo_discard = vvp_page_discard, | |
400 | .cpo_delete = vvp_page_delete, | |
401 | .cpo_unmap = vvp_page_unmap, | |
402 | .cpo_export = vvp_page_export, | |
403 | .cpo_is_vmlocked = vvp_page_is_vmlocked, | |
404 | .cpo_fini = vvp_page_fini, | |
405 | .cpo_print = vvp_page_print, | |
406 | .cpo_is_under_lock = ccc_page_is_under_lock, | |
407 | .io = { | |
408 | [CRT_READ] = { | |
409 | .cpo_prep = vvp_page_prep_read, | |
410 | .cpo_completion = vvp_page_completion_read, | |
411 | .cpo_make_ready = ccc_fail, | |
412 | }, | |
413 | [CRT_WRITE] = { | |
414 | .cpo_prep = vvp_page_prep_write, | |
415 | .cpo_completion = vvp_page_completion_write, | |
416 | .cpo_make_ready = vvp_page_make_ready, | |
417 | } | |
418 | } | |
419 | }; | |
420 | ||
421 | static void vvp_transient_page_verify(const struct cl_page *page) | |
422 | { | |
423 | struct inode *inode = ccc_object_inode(page->cp_obj); | |
424 | ||
425 | LASSERT(!mutex_trylock(&inode->i_mutex)); | |
426 | } | |
427 | ||
428 | static int vvp_transient_page_own(const struct lu_env *env, | |
429 | const struct cl_page_slice *slice, | |
430 | struct cl_io *unused, int nonblock) | |
431 | { | |
432 | vvp_transient_page_verify(slice->cpl_page); | |
433 | return 0; | |
434 | } | |
435 | ||
436 | static void vvp_transient_page_assume(const struct lu_env *env, | |
437 | const struct cl_page_slice *slice, | |
438 | struct cl_io *unused) | |
439 | { | |
440 | vvp_transient_page_verify(slice->cpl_page); | |
441 | } | |
442 | ||
443 | static void vvp_transient_page_unassume(const struct lu_env *env, | |
444 | const struct cl_page_slice *slice, | |
445 | struct cl_io *unused) | |
446 | { | |
447 | vvp_transient_page_verify(slice->cpl_page); | |
448 | } | |
449 | ||
450 | static void vvp_transient_page_disown(const struct lu_env *env, | |
451 | const struct cl_page_slice *slice, | |
452 | struct cl_io *unused) | |
453 | { | |
454 | vvp_transient_page_verify(slice->cpl_page); | |
455 | } | |
456 | ||
457 | static void vvp_transient_page_discard(const struct lu_env *env, | |
458 | const struct cl_page_slice *slice, | |
459 | struct cl_io *unused) | |
460 | { | |
461 | struct cl_page *page = slice->cpl_page; | |
462 | ||
463 | vvp_transient_page_verify(slice->cpl_page); | |
464 | ||
465 | /* | |
466 | * For transient pages, remove it from the radix tree. | |
467 | */ | |
468 | cl_page_delete(env, page); | |
469 | } | |
470 | ||
471 | static int vvp_transient_page_is_vmlocked(const struct lu_env *env, | |
472 | const struct cl_page_slice *slice) | |
473 | { | |
474 | struct inode *inode = ccc_object_inode(slice->cpl_obj); | |
475 | int locked; | |
476 | ||
477 | locked = !mutex_trylock(&inode->i_mutex); | |
478 | if (!locked) | |
479 | mutex_unlock(&inode->i_mutex); | |
480 | return locked ? -EBUSY : -ENODATA; | |
481 | } | |
482 | ||
483 | static void | |
484 | vvp_transient_page_completion(const struct lu_env *env, | |
485 | const struct cl_page_slice *slice, | |
486 | int ioret) | |
487 | { | |
488 | vvp_transient_page_verify(slice->cpl_page); | |
489 | } | |
490 | ||
491 | static void vvp_transient_page_fini(const struct lu_env *env, | |
492 | struct cl_page_slice *slice) | |
493 | { | |
494 | struct ccc_page *cp = cl2ccc_page(slice); | |
495 | struct cl_page *clp = slice->cpl_page; | |
496 | struct ccc_object *clobj = cl2ccc(clp->cp_obj); | |
497 | ||
498 | vvp_page_fini_common(cp); | |
499 | LASSERT(!mutex_trylock(&clobj->cob_inode->i_mutex)); | |
500 | clobj->cob_transient_pages--; | |
501 | } | |
502 | ||
503 | static const struct cl_page_operations vvp_transient_page_ops = { | |
504 | .cpo_own = vvp_transient_page_own, | |
505 | .cpo_assume = vvp_transient_page_assume, | |
506 | .cpo_unassume = vvp_transient_page_unassume, | |
507 | .cpo_disown = vvp_transient_page_disown, | |
508 | .cpo_discard = vvp_transient_page_discard, | |
509 | .cpo_vmpage = ccc_page_vmpage, | |
510 | .cpo_fini = vvp_transient_page_fini, | |
511 | .cpo_is_vmlocked = vvp_transient_page_is_vmlocked, | |
512 | .cpo_print = vvp_page_print, | |
513 | .cpo_is_under_lock = ccc_page_is_under_lock, | |
514 | .io = { | |
515 | [CRT_READ] = { | |
516 | .cpo_prep = ccc_transient_page_prep, | |
517 | .cpo_completion = vvp_transient_page_completion, | |
518 | }, | |
519 | [CRT_WRITE] = { | |
520 | .cpo_prep = ccc_transient_page_prep, | |
521 | .cpo_completion = vvp_transient_page_completion, | |
522 | } | |
523 | } | |
524 | }; | |
525 | ||
526 | int vvp_page_init(const struct lu_env *env, struct cl_object *obj, | |
527 | struct cl_page *page, struct page *vmpage) | |
528 | { | |
529 | struct ccc_page *cpg = cl_object_page_slice(obj, page); | |
530 | ||
531 | CLOBINVRNT(env, obj, ccc_object_invariant(obj)); | |
532 | ||
533 | cpg->cpg_page = vmpage; | |
534 | page_cache_get(vmpage); | |
535 | ||
536 | INIT_LIST_HEAD(&cpg->cpg_pending_linkage); | |
537 | if (page->cp_type == CPT_CACHEABLE) { | |
538 | SetPagePrivate(vmpage); | |
539 | vmpage->private = (unsigned long)page; | |
540 | cl_page_slice_add(page, &cpg->cpg_cl, obj, | |
541 | &vvp_page_ops); | |
542 | } else { | |
543 | struct ccc_object *clobj = cl2ccc(obj); | |
544 | ||
545 | LASSERT(!mutex_trylock(&clobj->cob_inode->i_mutex)); | |
546 | cl_page_slice_add(page, &cpg->cpg_cl, obj, | |
547 | &vvp_transient_page_ops); | |
548 | clobj->cob_transient_pages++; | |
549 | } | |
550 | return 0; | |
551 | } |