4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2015, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * Implementation of cl_page for OSC layer.
38 * Author: Nikita Danilov <nikita.danilov@sun.com>
41 #define DEBUG_SUBSYSTEM S_OSC
43 #include "osc_cl_internal.h"
45 static void osc_lru_del(struct client_obd
*cli
, struct osc_page
*opg
, bool del
);
46 static void osc_lru_add(struct client_obd
*cli
, struct osc_page
*opg
);
47 static int osc_lru_reserve(const struct lu_env
*env
, struct osc_object
*obj
,
48 struct osc_page
*opg
);
55 * Comment out osc_page_protected because it may sleep inside the
56 * the client_obd_list_lock.
57 * client_obd_list_lock -> osc_ap_completion -> osc_completion ->
58 * -> osc_page_protected -> osc_page_is_dlocked -> osc_match_base
59 * -> ldlm_lock_match -> sptlrpc_import_check_ctx -> sleep.
62 static int osc_page_is_dlocked(const struct lu_env
*env
,
63 const struct osc_page
*opg
,
64 enum cl_lock_mode mode
, int pending
, int unref
)
67 struct osc_object
*obj
;
68 struct osc_thread_info
*info
;
69 struct ldlm_res_id
*resname
;
70 struct lustre_handle
*lockh
;
71 ldlm_policy_data_t
*policy
;
77 info
= osc_env_info(env
);
78 resname
= &info
->oti_resname
;
79 policy
= &info
->oti_policy
;
80 lockh
= &info
->oti_handle
;
81 page
= opg
->ops_cl
.cpl_page
;
82 obj
= cl2osc(opg
->ops_cl
.cpl_obj
);
84 flags
= LDLM_FL_TEST_LOCK
| LDLM_FL_BLOCK_GRANTED
;
86 flags
|= LDLM_FL_CBPENDING
;
88 dlmmode
= osc_cl_lock2ldlm(mode
) | LCK_PW
;
89 osc_lock_build_res(env
, obj
, resname
);
90 osc_index2policy(policy
, page
->cp_obj
, page
->cp_index
, page
->cp_index
);
91 return osc_match_base(osc_export(obj
), resname
, LDLM_EXTENT
, policy
,
92 dlmmode
, &flags
, NULL
, lockh
, unref
);
96 * Checks an invariant that a page in the cache is covered by a lock, as
99 static int osc_page_protected(const struct lu_env
*env
,
100 const struct osc_page
*opg
,
101 enum cl_lock_mode mode
, int unref
)
103 struct cl_object_header
*hdr
;
104 struct cl_lock
*scan
;
105 struct cl_page
*page
;
106 struct cl_lock_descr
*descr
;
109 LINVRNT(!opg
->ops_temp
);
111 page
= opg
->ops_cl
.cpl_page
;
112 if (page
->cp_owner
!= NULL
&&
113 cl_io_top(page
->cp_owner
)->ci_lockreq
== CILR_NEVER
)
115 * If IO is done without locks (liblustre, or lloop), lock is
120 /* otherwise check for a DLM lock */
121 result
= osc_page_is_dlocked(env
, opg
, mode
, 1, unref
);
123 /* maybe this page is a part of a lockless io? */
124 hdr
= cl_object_header(opg
->ops_cl
.cpl_obj
);
125 descr
= &osc_env_info(env
)->oti_descr
;
126 descr
->cld_mode
= mode
;
127 descr
->cld_start
= page
->cp_index
;
128 descr
->cld_end
= page
->cp_index
;
129 spin_lock(&hdr
->coh_lock_guard
);
130 list_for_each_entry(scan
, &hdr
->coh_locks
, cll_linkage
) {
132 * Lock-less sub-lock has to be either in HELD state
133 * (when io is actively going on), or in CACHED state,
134 * when top-lock is being unlocked:
135 * cl_io_unlock()->cl_unuse()->...->lov_lock_unuse().
137 if ((scan
->cll_state
== CLS_HELD
||
138 scan
->cll_state
== CLS_CACHED
) &&
139 cl_lock_ext_match(&scan
->cll_descr
, descr
)) {
140 struct osc_lock
*olck
;
142 olck
= osc_lock_at(scan
);
143 result
= osc_lock_is_lockless(olck
);
147 spin_unlock(&hdr
->coh_lock_guard
);
152 static int osc_page_protected(const struct lu_env
*env
,
153 const struct osc_page
*opg
,
154 enum cl_lock_mode mode
, int unref
)
160 /*****************************************************************************
165 static void osc_page_fini(const struct lu_env
*env
,
166 struct cl_page_slice
*slice
)
168 struct osc_page
*opg
= cl2osc_page(slice
);
170 CDEBUG(D_TRACE
, "%p\n", opg
);
171 LASSERT(opg
->ops_lock
== NULL
);
174 static void osc_page_transfer_get(struct osc_page
*opg
, const char *label
)
176 struct cl_page
*page
= cl_page_top(opg
->ops_cl
.cpl_page
);
178 LASSERT(!opg
->ops_transfer_pinned
);
180 lu_ref_add_atomic(&page
->cp_reference
, label
, page
);
181 opg
->ops_transfer_pinned
= 1;
184 static void osc_page_transfer_put(const struct lu_env
*env
,
185 struct osc_page
*opg
)
187 struct cl_page
*page
= cl_page_top(opg
->ops_cl
.cpl_page
);
189 if (opg
->ops_transfer_pinned
) {
190 lu_ref_del(&page
->cp_reference
, "transfer", page
);
191 opg
->ops_transfer_pinned
= 0;
192 cl_page_put(env
, page
);
197 * This is called once for every page when it is submitted for a transfer
198 * either opportunistic (osc_page_cache_add()), or immediate
199 * (osc_page_submit()).
201 static void osc_page_transfer_add(const struct lu_env
*env
,
202 struct osc_page
*opg
, enum cl_req_type crt
)
204 struct osc_object
*obj
= cl2osc(opg
->ops_cl
.cpl_obj
);
206 /* ops_lru and ops_inflight share the same field, so take it from LRU
207 * first and then use it as inflight. */
208 osc_lru_del(osc_cli(obj
), opg
, false);
210 spin_lock(&obj
->oo_seatbelt
);
211 list_add(&opg
->ops_inflight
, &obj
->oo_inflight
[crt
]);
212 opg
->ops_submitter
= current
;
213 spin_unlock(&obj
->oo_seatbelt
);
216 static int osc_page_cache_add(const struct lu_env
*env
,
217 const struct cl_page_slice
*slice
,
220 struct osc_io
*oio
= osc_env_io(env
);
221 struct osc_page
*opg
= cl2osc_page(slice
);
224 LINVRNT(osc_page_protected(env
, opg
, CLM_WRITE
, 0));
226 osc_page_transfer_get(opg
, "transfer\0cache");
227 result
= osc_queue_async_io(env
, io
, opg
);
229 osc_page_transfer_put(env
, opg
);
231 osc_page_transfer_add(env
, opg
, CRT_WRITE
);
233 /* for sync write, kernel will wait for this page to be flushed before
234 * osc_io_end() is called, so release it earlier.
235 * for mkwrite(), it's known there is no further pages. */
236 if (cl_io_is_sync_write(io
) || cl_io_is_mkwrite(io
)) {
237 if (oio
->oi_active
!= NULL
) {
238 osc_extent_release(env
, oio
->oi_active
);
239 oio
->oi_active
= NULL
;
246 void osc_index2policy(ldlm_policy_data_t
*policy
, const struct cl_object
*obj
,
247 pgoff_t start
, pgoff_t end
)
249 memset(policy
, 0, sizeof(*policy
));
250 policy
->l_extent
.start
= cl_offset(obj
, start
);
251 policy
->l_extent
.end
= cl_offset(obj
, end
+ 1) - 1;
254 static int osc_page_addref_lock(const struct lu_env
*env
,
255 struct osc_page
*opg
,
256 struct cl_lock
*lock
)
258 struct osc_lock
*olock
;
261 LASSERT(opg
->ops_lock
== NULL
);
263 olock
= osc_lock_at(lock
);
264 if (atomic_inc_return(&olock
->ols_pageref
) <= 0) {
265 atomic_dec(&olock
->ols_pageref
);
269 opg
->ops_lock
= lock
;
275 static void osc_page_putref_lock(const struct lu_env
*env
,
276 struct osc_page
*opg
)
278 struct cl_lock
*lock
= opg
->ops_lock
;
279 struct osc_lock
*olock
;
281 LASSERT(lock
!= NULL
);
282 olock
= osc_lock_at(lock
);
284 atomic_dec(&olock
->ols_pageref
);
285 opg
->ops_lock
= NULL
;
287 cl_lock_put(env
, lock
);
290 static int osc_page_is_under_lock(const struct lu_env
*env
,
291 const struct cl_page_slice
*slice
,
292 struct cl_io
*unused
)
294 struct cl_lock
*lock
;
295 int result
= -ENODATA
;
297 lock
= cl_lock_at_page(env
, slice
->cpl_obj
, slice
->cpl_page
,
300 if (osc_page_addref_lock(env
, cl2osc_page(slice
), lock
) == 0)
302 cl_lock_put(env
, lock
);
307 static void osc_page_disown(const struct lu_env
*env
,
308 const struct cl_page_slice
*slice
,
311 struct osc_page
*opg
= cl2osc_page(slice
);
313 if (unlikely(opg
->ops_lock
))
314 osc_page_putref_lock(env
, opg
);
317 static void osc_page_completion_read(const struct lu_env
*env
,
318 const struct cl_page_slice
*slice
,
321 struct osc_page
*opg
= cl2osc_page(slice
);
322 struct osc_object
*obj
= cl2osc(opg
->ops_cl
.cpl_obj
);
324 if (likely(opg
->ops_lock
))
325 osc_page_putref_lock(env
, opg
);
326 osc_lru_add(osc_cli(obj
), opg
);
329 static void osc_page_completion_write(const struct lu_env
*env
,
330 const struct cl_page_slice
*slice
,
333 struct osc_page
*opg
= cl2osc_page(slice
);
334 struct osc_object
*obj
= cl2osc(slice
->cpl_obj
);
336 osc_lru_add(osc_cli(obj
), opg
);
339 static int osc_page_fail(const struct lu_env
*env
,
340 const struct cl_page_slice
*slice
,
341 struct cl_io
*unused
)
350 static const char *osc_list(struct list_head
*head
)
352 return list_empty(head
) ? "-" : "+";
355 static inline unsigned long osc_submit_duration(struct osc_page
*opg
)
357 if (opg
->ops_submit_time
== 0)
360 return (cfs_time_current() - opg
->ops_submit_time
);
363 static int osc_page_print(const struct lu_env
*env
,
364 const struct cl_page_slice
*slice
,
365 void *cookie
, lu_printer_t printer
)
367 struct osc_page
*opg
= cl2osc_page(slice
);
368 struct osc_async_page
*oap
= &opg
->ops_oap
;
369 struct osc_object
*obj
= cl2osc(slice
->cpl_obj
);
370 struct client_obd
*cli
= &osc_export(obj
)->exp_obd
->u
.cli
;
372 return (*printer
)(env
, cookie
, LUSTRE_OSC_NAME
"-page@%p: 1< %#x %d %u %s %s > 2< %llu %u %u %#x %#x | %p %p %p > 3< %s %p %d %lu %d > 4< %d %d %d %lu %s | %s %s %s %s > 5< %s %s %s %s | %d %s | %d %s %s>\n",
375 oap
->oap_magic
, oap
->oap_cmd
,
376 oap
->oap_interrupted
,
377 osc_list(&oap
->oap_pending_item
),
378 osc_list(&oap
->oap_rpc_item
),
380 oap
->oap_obj_off
, oap
->oap_page_off
, oap
->oap_count
,
381 oap
->oap_async_flags
, oap
->oap_brw_flags
,
382 oap
->oap_request
, oap
->oap_cli
, obj
,
384 osc_list(&opg
->ops_inflight
),
385 opg
->ops_submitter
, opg
->ops_transfer_pinned
,
386 osc_submit_duration(opg
), opg
->ops_srvlock
,
388 cli
->cl_r_in_flight
, cli
->cl_w_in_flight
,
389 cli
->cl_max_rpcs_in_flight
,
391 osc_list(&cli
->cl_cache_waiters
),
392 osc_list(&cli
->cl_loi_ready_list
),
393 osc_list(&cli
->cl_loi_hp_ready_list
),
394 osc_list(&cli
->cl_loi_write_list
),
395 osc_list(&cli
->cl_loi_read_list
),
397 osc_list(&obj
->oo_ready_item
),
398 osc_list(&obj
->oo_hp_ready_item
),
399 osc_list(&obj
->oo_write_item
),
400 osc_list(&obj
->oo_read_item
),
401 atomic_read(&obj
->oo_nr_reads
),
402 osc_list(&obj
->oo_reading_exts
),
403 atomic_read(&obj
->oo_nr_writes
),
404 osc_list(&obj
->oo_hp_exts
),
405 osc_list(&obj
->oo_urgent_exts
));
408 static void osc_page_delete(const struct lu_env
*env
,
409 const struct cl_page_slice
*slice
)
411 struct osc_page
*opg
= cl2osc_page(slice
);
412 struct osc_object
*obj
= cl2osc(opg
->ops_cl
.cpl_obj
);
415 LINVRNT(opg
->ops_temp
|| osc_page_protected(env
, opg
, CLM_READ
, 1));
417 CDEBUG(D_TRACE
, "%p\n", opg
);
418 osc_page_transfer_put(env
, opg
);
419 rc
= osc_teardown_async_page(env
, obj
, opg
);
421 CL_PAGE_DEBUG(D_ERROR
, env
, cl_page_top(slice
->cpl_page
),
422 "Trying to teardown failed: %d\n", rc
);
426 spin_lock(&obj
->oo_seatbelt
);
427 if (opg
->ops_submitter
!= NULL
) {
428 LASSERT(!list_empty(&opg
->ops_inflight
));
429 list_del_init(&opg
->ops_inflight
);
430 opg
->ops_submitter
= NULL
;
432 spin_unlock(&obj
->oo_seatbelt
);
434 osc_lru_del(osc_cli(obj
), opg
, true);
437 void osc_page_clip(const struct lu_env
*env
, const struct cl_page_slice
*slice
,
440 struct osc_page
*opg
= cl2osc_page(slice
);
441 struct osc_async_page
*oap
= &opg
->ops_oap
;
443 LINVRNT(osc_page_protected(env
, opg
, CLM_READ
, 0));
445 opg
->ops_from
= from
;
447 spin_lock(&oap
->oap_lock
);
448 oap
->oap_async_flags
|= ASYNC_COUNT_STABLE
;
449 spin_unlock(&oap
->oap_lock
);
452 static int osc_page_cancel(const struct lu_env
*env
,
453 const struct cl_page_slice
*slice
)
455 struct osc_page
*opg
= cl2osc_page(slice
);
458 LINVRNT(osc_page_protected(env
, opg
, CLM_READ
, 0));
460 /* Check if the transferring against this page
461 * is completed, or not even queued. */
462 if (opg
->ops_transfer_pinned
)
463 /* FIXME: may not be interrupted.. */
464 rc
= osc_cancel_async_page(env
, opg
);
465 LASSERT(ergo(rc
== 0, opg
->ops_transfer_pinned
== 0));
469 static int osc_page_flush(const struct lu_env
*env
,
470 const struct cl_page_slice
*slice
,
473 struct osc_page
*opg
= cl2osc_page(slice
);
476 rc
= osc_flush_async_page(env
, io
, opg
);
480 static const struct cl_page_operations osc_page_ops
= {
481 .cpo_fini
= osc_page_fini
,
482 .cpo_print
= osc_page_print
,
483 .cpo_delete
= osc_page_delete
,
484 .cpo_is_under_lock
= osc_page_is_under_lock
,
485 .cpo_disown
= osc_page_disown
,
488 .cpo_cache_add
= osc_page_fail
,
489 .cpo_completion
= osc_page_completion_read
492 .cpo_cache_add
= osc_page_cache_add
,
493 .cpo_completion
= osc_page_completion_write
496 .cpo_clip
= osc_page_clip
,
497 .cpo_cancel
= osc_page_cancel
,
498 .cpo_flush
= osc_page_flush
501 int osc_page_init(const struct lu_env
*env
, struct cl_object
*obj
,
502 struct cl_page
*page
, struct page
*vmpage
)
504 struct osc_object
*osc
= cl2osc(obj
);
505 struct osc_page
*opg
= cl_object_page_slice(obj
, page
);
509 opg
->ops_to
= PAGE_CACHE_SIZE
;
511 result
= osc_prep_async_page(osc
, opg
, vmpage
,
512 cl_offset(obj
, page
->cp_index
));
514 struct osc_io
*oio
= osc_env_io(env
);
516 opg
->ops_srvlock
= osc_io_srvlock(oio
);
517 cl_page_slice_add(page
, &opg
->ops_cl
, obj
,
521 * Cannot assert osc_page_protected() here as read-ahead
522 * creates temporary pages outside of a lock.
524 /* ops_inflight and ops_lru are the same field, but it doesn't
525 * hurt to initialize it twice :-) */
526 INIT_LIST_HEAD(&opg
->ops_inflight
);
527 INIT_LIST_HEAD(&opg
->ops_lru
);
529 /* reserve an LRU space for this page */
530 if (page
->cp_type
== CPT_CACHEABLE
&& result
== 0)
531 result
= osc_lru_reserve(env
, osc
, opg
);
537 * Helper function called by osc_io_submit() for every page in an immediate
538 * transfer (i.e., transferred synchronously).
540 void osc_page_submit(const struct lu_env
*env
, struct osc_page
*opg
,
541 enum cl_req_type crt
, int brw_flags
)
543 struct osc_async_page
*oap
= &opg
->ops_oap
;
544 struct osc_object
*obj
= oap
->oap_obj
;
546 LINVRNT(osc_page_protected(env
, opg
,
547 crt
== CRT_WRITE
? CLM_WRITE
: CLM_READ
, 1));
549 LASSERTF(oap
->oap_magic
== OAP_MAGIC
, "Bad oap magic: oap %p, magic 0x%x\n",
550 oap
, oap
->oap_magic
);
551 LASSERT(oap
->oap_async_flags
& ASYNC_READY
);
552 LASSERT(oap
->oap_async_flags
& ASYNC_COUNT_STABLE
);
554 oap
->oap_cmd
= crt
== CRT_WRITE
? OBD_BRW_WRITE
: OBD_BRW_READ
;
555 oap
->oap_page_off
= opg
->ops_from
;
556 oap
->oap_count
= opg
->ops_to
- opg
->ops_from
;
557 oap
->oap_brw_flags
= brw_flags
| OBD_BRW_SYNC
;
559 if (!client_is_remote(osc_export(obj
)) &&
560 capable(CFS_CAP_SYS_RESOURCE
)) {
561 oap
->oap_brw_flags
|= OBD_BRW_NOQUOTA
;
562 oap
->oap_cmd
|= OBD_BRW_NOQUOTA
;
565 opg
->ops_submit_time
= cfs_time_current();
566 osc_page_transfer_get(opg
, "transfer\0imm");
567 osc_page_transfer_add(env
, opg
, crt
);
570 /* --------------- LRU page management ------------------ */
572 /* OSC is a natural place to manage LRU pages as applications are specialized
573 * to write OSC by OSC. Ideally, if one OSC is used more frequently it should
574 * occupy more LRU slots. On the other hand, we should avoid using up all LRU
575 * slots (client_obd::cl_lru_left) otherwise process has to be put into sleep
576 * for free LRU slots - this will be very bad so the algorithm requires each
577 * OSC to free slots voluntarily to maintain a reasonable number of free slots
581 static DECLARE_WAIT_QUEUE_HEAD(osc_lru_waitq
);
582 static atomic_t osc_lru_waiters
= ATOMIC_INIT(0);
583 /* LRU pages are freed in batch mode. OSC should at least free this
584 * number of pages to avoid running out of LRU budget, and.. */
585 static const int lru_shrink_min
= 2 << (20 - PAGE_CACHE_SHIFT
); /* 2M */
586 /* free this number at most otherwise it will take too long time to finish. */
587 static const int lru_shrink_max
= 32 << (20 - PAGE_CACHE_SHIFT
); /* 32M */
589 /* Check if we can free LRU slots from this OSC. If there exists LRU waiters,
590 * we should free slots aggressively. In this way, slots are freed in a steady
591 * step to maintain fairness among OSCs.
593 * Return how many LRU pages should be freed. */
594 static int osc_cache_too_much(struct client_obd
*cli
)
596 struct cl_client_cache
*cache
= cli
->cl_cache
;
597 int pages
= atomic_read(&cli
->cl_lru_in_list
) >> 1;
599 if (atomic_read(&osc_lru_waiters
) > 0 &&
600 atomic_read(cli
->cl_lru_left
) < lru_shrink_max
)
601 /* drop lru pages aggressively */
602 return min(pages
, lru_shrink_max
);
604 /* if it's going to run out LRU slots, we should free some, but not
605 * too much to maintain fairness among OSCs. */
606 if (atomic_read(cli
->cl_lru_left
) < cache
->ccc_lru_max
>> 4) {
609 tmp
= cache
->ccc_lru_max
/ atomic_read(&cache
->ccc_users
);
611 return min(pages
, lru_shrink_max
);
613 return pages
> lru_shrink_min
? lru_shrink_min
: 0;
619 /* Return how many pages are not discarded in @pvec. */
620 static int discard_pagevec(const struct lu_env
*env
, struct cl_io
*io
,
621 struct cl_page
**pvec
, int max_index
)
626 for (count
= 0, i
= 0; i
< max_index
; i
++) {
627 struct cl_page
*page
= pvec
[i
];
629 if (cl_page_own_try(env
, io
, page
) == 0) {
630 /* free LRU page only if nobody is using it.
631 * This check is necessary to avoid freeing the pages
632 * having already been removed from LRU and pinned
634 if (!cl_page_in_use(page
)) {
635 cl_page_unmap(env
, io
, page
);
636 cl_page_discard(env
, io
, page
);
639 cl_page_disown(env
, io
, page
);
641 cl_page_put(env
, page
);
644 return max_index
- count
;
648 * Drop @target of pages from LRU at most.
650 int osc_lru_shrink(struct client_obd
*cli
, int target
)
652 struct cl_env_nest nest
;
655 struct cl_object
*clobj
= NULL
;
656 struct cl_page
**pvec
;
657 struct osc_page
*opg
;
663 LASSERT(atomic_read(&cli
->cl_lru_in_list
) >= 0);
664 if (atomic_read(&cli
->cl_lru_in_list
) == 0 || target
<= 0)
667 env
= cl_env_nested_get(&nest
);
671 pvec
= osc_env_info(env
)->oti_pvec
;
672 io
= &osc_env_info(env
)->oti_io
;
674 client_obd_list_lock(&cli
->cl_lru_list_lock
);
675 atomic_inc(&cli
->cl_lru_shrinkers
);
676 maxscan
= min(target
<< 1, atomic_read(&cli
->cl_lru_in_list
));
677 while (!list_empty(&cli
->cl_lru_list
)) {
678 struct cl_page
*page
;
683 opg
= list_entry(cli
->cl_lru_list
.next
, struct osc_page
,
685 page
= cl_page_top(opg
->ops_cl
.cpl_page
);
686 if (cl_page_in_use_noref(page
)) {
687 list_move_tail(&opg
->ops_lru
, &cli
->cl_lru_list
);
691 LASSERT(page
->cp_obj
!= NULL
);
692 if (clobj
!= page
->cp_obj
) {
693 struct cl_object
*tmp
= page
->cp_obj
;
696 client_obd_list_unlock(&cli
->cl_lru_list_lock
);
699 count
-= discard_pagevec(env
, io
, pvec
, index
);
703 cl_object_put(env
, clobj
);
709 io
->ci_ignore_layout
= 1;
710 rc
= cl_io_init(env
, io
, CIT_MISC
, clobj
);
712 client_obd_list_lock(&cli
->cl_lru_list_lock
);
721 /* move this page to the end of list as it will be discarded
722 * soon. The page will be finally removed from LRU list in
723 * osc_page_delete(). */
724 list_move_tail(&opg
->ops_lru
, &cli
->cl_lru_list
);
726 /* it's okay to grab a refcount here w/o holding lock because
727 * it has to grab cl_lru_list_lock to delete the page. */
729 pvec
[index
++] = page
;
730 if (++count
>= target
)
733 if (unlikely(index
== OTI_PVEC_SIZE
)) {
734 client_obd_list_unlock(&cli
->cl_lru_list_lock
);
735 count
-= discard_pagevec(env
, io
, pvec
, index
);
738 client_obd_list_lock(&cli
->cl_lru_list_lock
);
741 client_obd_list_unlock(&cli
->cl_lru_list_lock
);
744 count
-= discard_pagevec(env
, io
, pvec
, index
);
747 cl_object_put(env
, clobj
);
749 cl_env_nested_put(&nest
, env
);
751 atomic_dec(&cli
->cl_lru_shrinkers
);
752 return count
> 0 ? count
: rc
;
755 static void osc_lru_add(struct client_obd
*cli
, struct osc_page
*opg
)
759 if (!opg
->ops_in_lru
)
762 atomic_dec(&cli
->cl_lru_busy
);
763 client_obd_list_lock(&cli
->cl_lru_list_lock
);
764 if (list_empty(&opg
->ops_lru
)) {
765 list_move_tail(&opg
->ops_lru
, &cli
->cl_lru_list
);
766 atomic_inc_return(&cli
->cl_lru_in_list
);
767 wakeup
= atomic_read(&osc_lru_waiters
) > 0;
769 client_obd_list_unlock(&cli
->cl_lru_list_lock
);
772 osc_lru_shrink(cli
, osc_cache_too_much(cli
));
773 wake_up_all(&osc_lru_waitq
);
777 /* delete page from LRUlist. The page can be deleted from LRUlist for two
778 * reasons: redirtied or deleted from page cache. */
779 static void osc_lru_del(struct client_obd
*cli
, struct osc_page
*opg
, bool del
)
781 if (opg
->ops_in_lru
) {
782 client_obd_list_lock(&cli
->cl_lru_list_lock
);
783 if (!list_empty(&opg
->ops_lru
)) {
784 LASSERT(atomic_read(&cli
->cl_lru_in_list
) > 0);
785 list_del_init(&opg
->ops_lru
);
786 atomic_dec(&cli
->cl_lru_in_list
);
788 atomic_inc(&cli
->cl_lru_busy
);
790 LASSERT(atomic_read(&cli
->cl_lru_busy
) > 0);
791 atomic_dec(&cli
->cl_lru_busy
);
793 client_obd_list_unlock(&cli
->cl_lru_list_lock
);
795 atomic_inc(cli
->cl_lru_left
);
796 /* this is a great place to release more LRU pages if
797 * this osc occupies too many LRU pages and kernel is
798 * stealing one of them.
799 * cl_lru_shrinkers is to avoid recursive call in case
800 * we're already in the context of osc_lru_shrink(). */
801 if (atomic_read(&cli
->cl_lru_shrinkers
) == 0 &&
802 !memory_pressure_get())
803 osc_lru_shrink(cli
, osc_cache_too_much(cli
));
804 wake_up(&osc_lru_waitq
);
807 LASSERT(list_empty(&opg
->ops_lru
));
811 static inline int max_to_shrink(struct client_obd
*cli
)
813 return min(atomic_read(&cli
->cl_lru_in_list
) >> 1, lru_shrink_max
);
816 static int osc_lru_reclaim(struct client_obd
*cli
)
818 struct cl_client_cache
*cache
= cli
->cl_cache
;
822 LASSERT(cache
!= NULL
);
824 rc
= osc_lru_shrink(cli
, lru_shrink_min
);
826 CDEBUG(D_CACHE
, "%s: Free %d pages from own LRU: %p.\n",
827 cli
->cl_import
->imp_obd
->obd_name
, rc
, cli
);
831 CDEBUG(D_CACHE
, "%s: cli %p no free slots, pages: %d, busy: %d.\n",
832 cli
->cl_import
->imp_obd
->obd_name
, cli
,
833 atomic_read(&cli
->cl_lru_in_list
),
834 atomic_read(&cli
->cl_lru_busy
));
836 /* Reclaim LRU slots from other client_obd as it can't free enough
837 * from its own. This should rarely happen. */
838 spin_lock(&cache
->ccc_lru_lock
);
839 LASSERT(!list_empty(&cache
->ccc_lru
));
841 cache
->ccc_lru_shrinkers
++;
842 list_move_tail(&cli
->cl_lru_osc
, &cache
->ccc_lru
);
844 max_scans
= atomic_read(&cache
->ccc_users
);
845 while (--max_scans
> 0 && !list_empty(&cache
->ccc_lru
)) {
846 cli
= list_entry(cache
->ccc_lru
.next
, struct client_obd
,
849 CDEBUG(D_CACHE
, "%s: cli %p LRU pages: %d, busy: %d.\n",
850 cli
->cl_import
->imp_obd
->obd_name
, cli
,
851 atomic_read(&cli
->cl_lru_in_list
),
852 atomic_read(&cli
->cl_lru_busy
));
854 list_move_tail(&cli
->cl_lru_osc
, &cache
->ccc_lru
);
855 if (atomic_read(&cli
->cl_lru_in_list
) > 0) {
856 spin_unlock(&cache
->ccc_lru_lock
);
858 rc
= osc_lru_shrink(cli
, max_to_shrink(cli
));
859 spin_lock(&cache
->ccc_lru_lock
);
864 spin_unlock(&cache
->ccc_lru_lock
);
866 CDEBUG(D_CACHE
, "%s: cli %p freed %d pages.\n",
867 cli
->cl_import
->imp_obd
->obd_name
, cli
, rc
);
871 static int osc_lru_reserve(const struct lu_env
*env
, struct osc_object
*obj
,
872 struct osc_page
*opg
)
874 struct l_wait_info lwi
= LWI_INTR(LWI_ON_SIGNAL_NOOP
, NULL
);
875 struct client_obd
*cli
= osc_cli(obj
);
878 if (cli
->cl_cache
== NULL
) /* shall not be in LRU */
881 LASSERT(atomic_read(cli
->cl_lru_left
) >= 0);
882 while (!atomic_add_unless(cli
->cl_lru_left
, -1, 0)) {
885 /* run out of LRU spaces, try to drop some by itself */
886 rc
= osc_lru_reclaim(cli
);
894 /* slowest case, all of caching pages are busy, notifying
895 * other OSCs that we're lack of LRU slots. */
896 atomic_inc(&osc_lru_waiters
);
898 gen
= atomic_read(&cli
->cl_lru_in_list
);
899 rc
= l_wait_event(osc_lru_waitq
,
900 atomic_read(cli
->cl_lru_left
) > 0 ||
901 (atomic_read(&cli
->cl_lru_in_list
) > 0 &&
902 gen
!= atomic_read(&cli
->cl_lru_in_list
)),
905 atomic_dec(&osc_lru_waiters
);
911 atomic_inc(&cli
->cl_lru_busy
);