4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/ptlrpc/ptlrpcd.c
39 /** \defgroup ptlrpcd PortalRPC daemon
41 * ptlrpcd is a special thread with its own set where other user might add
42 * requests when they don't want to wait for their completion.
43 * PtlRPCD will take care of sending such requests and then processing their
44 * replies and calling completion callbacks as necessary.
45 * The callbacks are called directly from ptlrpcd context.
46 * It is important to never significantly block (esp. on RPCs!) within such
47 * completion handler or a deadlock might occur where ptlrpcd enters some
48 * callback that attempts to send another RPC and wait for it to return,
49 * during which time ptlrpcd is completely blocked, so e.g. if import
50 * fails, recovery cannot progress because connection requests are also
56 #define DEBUG_SUBSYSTEM S_RPC
58 #include "../../include/linux/libcfs/libcfs.h"
60 #include "../include/lustre_net.h"
61 #include "../include/lustre_lib.h"
62 #include "../include/lustre_ha.h"
63 #include "../include/obd_class.h" /* for obd_zombie */
64 #include "../include/obd_support.h" /* for OBD_FAIL_CHECK */
65 #include "../include/cl_object.h" /* cl_env_{get,put}() */
66 #include "../include/lprocfs_status.h"
68 #include "ptlrpc_internal.h"
74 struct ptlrpcd_ctl pd_thread_rcv
;
75 struct ptlrpcd_ctl pd_threads
[0];
78 static int max_ptlrpcds
;
79 module_param(max_ptlrpcds
, int, 0644);
80 MODULE_PARM_DESC(max_ptlrpcds
, "Max ptlrpcd thread count to be started.");
82 static int ptlrpcd_bind_policy
= PDB_POLICY_PAIR
;
83 module_param(ptlrpcd_bind_policy
, int, 0644);
84 MODULE_PARM_DESC(ptlrpcd_bind_policy
, "Ptlrpcd threads binding mode.");
85 static struct ptlrpcd
*ptlrpcds
;
87 struct mutex ptlrpcd_mutex
;
88 static int ptlrpcd_users
;
90 void ptlrpcd_wake(struct ptlrpc_request
*req
)
92 struct ptlrpc_request_set
*rq_set
= req
->rq_set
;
94 LASSERT(rq_set
!= NULL
);
96 wake_up(&rq_set
->set_waitq
);
98 EXPORT_SYMBOL(ptlrpcd_wake
);
100 static struct ptlrpcd_ctl
*
101 ptlrpcd_select_pc(struct ptlrpc_request
*req
, pdl_policy_t policy
, int index
)
105 if (req
!= NULL
&& req
->rq_send_state
!= LUSTRE_IMP_FULL
)
106 return &ptlrpcds
->pd_thread_rcv
;
109 case PDL_POLICY_SAME
:
110 idx
= smp_processor_id() % ptlrpcds
->pd_nthreads
;
112 case PDL_POLICY_LOCAL
:
113 /* Before CPU partition patches available, process it the same
114 * as "PDL_POLICY_ROUND". */
115 # ifdef CFS_CPU_MODE_NUMA
116 # warning "fix this code to use new CPU partition APIs"
118 /* Fall through to PDL_POLICY_ROUND until the CPU
119 * CPU partition patches are available. */
121 case PDL_POLICY_PREFERRED
:
122 if (index
>= 0 && index
< num_online_cpus()) {
123 idx
= index
% ptlrpcds
->pd_nthreads
;
126 /* Fall through to PDL_POLICY_ROUND for bad index. */
128 /* Fall through to PDL_POLICY_ROUND for unknown policy. */
129 case PDL_POLICY_ROUND
:
130 /* We do not care whether it is strict load balance. */
131 idx
= ptlrpcds
->pd_index
+ 1;
132 if (idx
== smp_processor_id())
134 idx
%= ptlrpcds
->pd_nthreads
;
135 ptlrpcds
->pd_index
= idx
;
139 return &ptlrpcds
->pd_threads
[idx
];
143 * Move all request from an existing request set to the ptlrpcd queue.
144 * All requests from the set must be in phase RQ_PHASE_NEW.
146 void ptlrpcd_add_rqset(struct ptlrpc_request_set
*set
)
148 struct list_head
*tmp
, *pos
;
149 struct ptlrpcd_ctl
*pc
;
150 struct ptlrpc_request_set
*new;
153 pc
= ptlrpcd_select_pc(NULL
, PDL_POLICY_LOCAL
, -1);
156 list_for_each_safe(pos
, tmp
, &set
->set_requests
) {
157 struct ptlrpc_request
*req
=
158 list_entry(pos
, struct ptlrpc_request
,
161 LASSERT(req
->rq_phase
== RQ_PHASE_NEW
);
163 req
->rq_queued_time
= cfs_time_current();
166 spin_lock(&new->set_new_req_lock
);
167 list_splice_init(&set
->set_requests
, &new->set_new_requests
);
168 i
= atomic_read(&set
->set_remaining
);
169 count
= atomic_add_return(i
, &new->set_new_count
);
170 atomic_set(&set
->set_remaining
, 0);
171 spin_unlock(&new->set_new_req_lock
);
173 wake_up(&new->set_waitq
);
175 /* XXX: It maybe unnecessary to wakeup all the partners. But to
176 * guarantee the async RPC can be processed ASAP, we have
177 * no other better choice. It maybe fixed in future. */
178 for (i
= 0; i
< pc
->pc_npartners
; i
++)
179 wake_up(&pc
->pc_partners
[i
]->pc_set
->set_waitq
);
182 EXPORT_SYMBOL(ptlrpcd_add_rqset
);
185 * Return transferred RPCs count.
187 static int ptlrpcd_steal_rqset(struct ptlrpc_request_set
*des
,
188 struct ptlrpc_request_set
*src
)
190 struct list_head
*tmp
, *pos
;
191 struct ptlrpc_request
*req
;
194 spin_lock(&src
->set_new_req_lock
);
195 if (likely(!list_empty(&src
->set_new_requests
))) {
196 list_for_each_safe(pos
, tmp
, &src
->set_new_requests
) {
197 req
= list_entry(pos
, struct ptlrpc_request
,
201 list_splice_init(&src
->set_new_requests
,
203 rc
= atomic_read(&src
->set_new_count
);
204 atomic_add(rc
, &des
->set_remaining
);
205 atomic_set(&src
->set_new_count
, 0);
207 spin_unlock(&src
->set_new_req_lock
);
212 * Requests that are added to the ptlrpcd queue are sent via
213 * ptlrpcd_check->ptlrpc_check_set().
215 void ptlrpcd_add_req(struct ptlrpc_request
*req
, pdl_policy_t policy
, int idx
)
217 struct ptlrpcd_ctl
*pc
;
220 lustre_msg_set_jobid(req
->rq_reqmsg
, NULL
);
222 spin_lock(&req
->rq_lock
);
223 if (req
->rq_invalid_rqset
) {
224 struct l_wait_info lwi
= LWI_TIMEOUT(cfs_time_seconds(5),
225 back_to_sleep
, NULL
);
227 req
->rq_invalid_rqset
= 0;
228 spin_unlock(&req
->rq_lock
);
229 l_wait_event(req
->rq_set_waitq
, (req
->rq_set
== NULL
), &lwi
);
230 } else if (req
->rq_set
) {
231 /* If we have a valid "rq_set", just reuse it to avoid double
233 LASSERT(req
->rq_phase
== RQ_PHASE_NEW
);
234 LASSERT(req
->rq_send_state
== LUSTRE_IMP_REPLAY
);
236 /* ptlrpc_check_set will decrease the count */
237 atomic_inc(&req
->rq_set
->set_remaining
);
238 spin_unlock(&req
->rq_lock
);
239 wake_up(&req
->rq_set
->set_waitq
);
242 spin_unlock(&req
->rq_lock
);
245 pc
= ptlrpcd_select_pc(req
, policy
, idx
);
247 DEBUG_REQ(D_INFO
, req
, "add req [%p] to pc [%s:%d]",
248 req
, pc
->pc_name
, pc
->pc_index
);
250 ptlrpc_set_add_new_req(pc
, req
);
252 EXPORT_SYMBOL(ptlrpcd_add_req
);
254 static inline void ptlrpc_reqset_get(struct ptlrpc_request_set
*set
)
256 atomic_inc(&set
->set_refcount
);
260 * Check if there is more work to do on ptlrpcd set.
263 static int ptlrpcd_check(struct lu_env
*env
, struct ptlrpcd_ctl
*pc
)
265 struct list_head
*tmp
, *pos
;
266 struct ptlrpc_request
*req
;
267 struct ptlrpc_request_set
*set
= pc
->pc_set
;
271 if (atomic_read(&set
->set_new_count
)) {
272 spin_lock(&set
->set_new_req_lock
);
273 if (likely(!list_empty(&set
->set_new_requests
))) {
274 list_splice_init(&set
->set_new_requests
,
276 atomic_add(atomic_read(&set
->set_new_count
),
277 &set
->set_remaining
);
278 atomic_set(&set
->set_new_count
, 0);
280 * Need to calculate its timeout.
284 spin_unlock(&set
->set_new_req_lock
);
287 /* We should call lu_env_refill() before handling new requests to make
288 * sure that env key the requests depending on really exists.
290 rc2
= lu_env_refill(env
);
293 * XXX This is very awkward situation, because
294 * execution can neither continue (request
295 * interpreters assume that env is set up), nor repeat
296 * the loop (as this potentially results in a tight
297 * loop of -ENOMEM's).
299 * Fortunately, refill only ever does something when
300 * new modules are loaded, i.e., early during boot up.
302 CERROR("Failure to refill session: %d\n", rc2
);
306 if (atomic_read(&set
->set_remaining
))
307 rc
|= ptlrpc_check_set(env
, set
);
309 /* NB: ptlrpc_check_set has already moved completed request at the
310 * head of seq::set_requests */
311 list_for_each_safe(pos
, tmp
, &set
->set_requests
) {
312 req
= list_entry(pos
, struct ptlrpc_request
, rq_set_chain
);
313 if (req
->rq_phase
!= RQ_PHASE_COMPLETE
)
316 list_del_init(&req
->rq_set_chain
);
318 ptlrpc_req_finished(req
);
323 * If new requests have been added, make sure to wake up.
325 rc
= atomic_read(&set
->set_new_count
);
327 /* If we have nothing to do, check whether we can take some
328 * work from our partner threads. */
329 if (rc
== 0 && pc
->pc_npartners
> 0) {
330 struct ptlrpcd_ctl
*partner
;
331 struct ptlrpc_request_set
*ps
;
332 int first
= pc
->pc_cursor
;
335 partner
= pc
->pc_partners
[pc
->pc_cursor
++];
336 if (pc
->pc_cursor
>= pc
->pc_npartners
)
341 spin_lock(&partner
->pc_lock
);
342 ps
= partner
->pc_set
;
344 spin_unlock(&partner
->pc_lock
);
348 ptlrpc_reqset_get(ps
);
349 spin_unlock(&partner
->pc_lock
);
351 if (atomic_read(&ps
->set_new_count
)) {
352 rc
= ptlrpcd_steal_rqset(set
, ps
);
354 CDEBUG(D_RPCTRACE
, "transfer %d async RPCs [%d->%d]\n",
355 rc
, partner
->pc_index
,
358 ptlrpc_reqset_put(ps
);
359 } while (rc
== 0 && pc
->pc_cursor
!= first
);
367 * Main ptlrpcd thread.
368 * ptlrpc's code paths like to execute in process context, so we have this
369 * thread which spins on a set which contains the rpcs and sends them.
372 static int ptlrpcd(void *arg
)
374 struct ptlrpcd_ctl
*pc
= arg
;
375 struct ptlrpc_request_set
*set
= pc
->pc_set
;
376 struct lu_env env
= { .le_ses
= NULL
};
380 #if defined(CONFIG_SMP)
381 if (test_bit(LIOD_BIND
, &pc
->pc_flags
)) {
382 int index
= pc
->pc_index
;
384 if (index
>= 0 && index
< num_possible_cpus()) {
385 while (!cpu_online(index
)) {
386 if (++index
>= num_possible_cpus())
389 set_cpus_allowed_ptr(current
,
390 cpumask_of_node(cpu_to_node(index
)));
395 * XXX So far only "client" ptlrpcd uses an environment. In
396 * the future, ptlrpcd thread (or a thread-set) has to given
397 * an argument, describing its "scope".
399 rc
= lu_context_init(&env
.le_ctx
,
400 LCT_CL_THREAD
|LCT_REMEMBER
|LCT_NOREF
);
401 complete(&pc
->pc_starting
);
407 * This mainloop strongly resembles ptlrpc_set_wait() except that our
408 * set never completes. ptlrpcd_check() calls ptlrpc_check_set() when
409 * there are requests in the set. New requests come in on the set's
410 * new_req_list and ptlrpcd_check() moves them into the set.
413 struct l_wait_info lwi
;
416 timeout
= ptlrpc_set_next_timeout(set
);
417 lwi
= LWI_TIMEOUT(cfs_time_seconds(timeout
? timeout
: 1),
418 ptlrpc_expired_set
, set
);
420 lu_context_enter(&env
.le_ctx
);
421 l_wait_event(set
->set_waitq
,
422 ptlrpcd_check(&env
, pc
), &lwi
);
423 lu_context_exit(&env
.le_ctx
);
426 * Abort inflight rpcs for forced stop case.
428 if (test_bit(LIOD_STOP
, &pc
->pc_flags
)) {
429 if (test_bit(LIOD_FORCE
, &pc
->pc_flags
))
430 ptlrpc_abort_set(set
);
435 * Let's make one more loop to make sure that ptlrpcd_check()
436 * copied all raced new rpcs into the set so we can kill them.
441 * Wait for inflight requests to drain.
443 if (!list_empty(&set
->set_requests
))
444 ptlrpc_set_wait(set
);
445 lu_context_fini(&env
.le_ctx
);
447 complete(&pc
->pc_finishing
);
452 /* XXX: We want multiple CPU cores to share the async RPC load. So we start many
453 * ptlrpcd threads. We also want to reduce the ptlrpcd overhead caused by
454 * data transfer cross-CPU cores. So we bind ptlrpcd thread to specified
455 * CPU core. But binding all ptlrpcd threads maybe cause response delay
456 * because of some CPU core(s) busy with other loads.
458 * For example: "ls -l", some async RPCs for statahead are assigned to
459 * ptlrpcd_0, and ptlrpcd_0 is bound to CPU_0, but CPU_0 may be quite busy
460 * with other non-ptlrpcd, like "ls -l" itself (we want to the "ls -l"
461 * thread, statahead thread, and ptlrpcd thread can run in parallel), under
462 * such case, the statahead async RPCs can not be processed in time, it is
463 * unexpected. If ptlrpcd_0 can be re-scheduled on other CPU core, it may
464 * be better. But it breaks former data transfer policy.
466 * So we shouldn't be blind for avoiding the data transfer. We make some
467 * compromise: divide the ptlrpcd threads pool into two parts. One part is
468 * for bound mode, each ptlrpcd thread in this part is bound to some CPU
469 * core. The other part is for free mode, all the ptlrpcd threads in the
470 * part can be scheduled on any CPU core. We specify some partnership
471 * between bound mode ptlrpcd thread(s) and free mode ptlrpcd thread(s),
472 * and the async RPC load within the partners are shared.
474 * It can partly avoid data transfer cross-CPU (if the bound mode ptlrpcd
475 * thread can be scheduled in time), and try to guarantee the async RPC
476 * processed ASAP (as long as the free mode ptlrpcd thread can be scheduled
479 * As for how to specify the partnership between bound mode ptlrpcd
480 * thread(s) and free mode ptlrpcd thread(s), the simplest way is to use
481 * <free bound> pair. In future, we can specify some more complex
482 * partnership based on the patches for CPU partition. But before such
483 * patches are available, we prefer to use the simplest one.
485 # ifdef CFS_CPU_MODE_NUMA
486 # warning "fix ptlrpcd_bind() to use new CPU partition APIs"
488 static int ptlrpcd_bind(int index
, int max
)
490 struct ptlrpcd_ctl
*pc
;
492 #if defined(CONFIG_NUMA)
496 LASSERT(index
<= max
- 1);
497 pc
= &ptlrpcds
->pd_threads
[index
];
498 switch (ptlrpcd_bind_policy
) {
499 case PDB_POLICY_NONE
:
500 pc
->pc_npartners
= -1;
502 case PDB_POLICY_FULL
:
503 pc
->pc_npartners
= 0;
504 set_bit(LIOD_BIND
, &pc
->pc_flags
);
506 case PDB_POLICY_PAIR
:
507 LASSERT(max
% 2 == 0);
508 pc
->pc_npartners
= 1;
510 case PDB_POLICY_NEIGHBOR
:
511 #if defined(CONFIG_NUMA)
514 cpumask_copy(&mask
, cpumask_of_node(cpu_to_node(index
)));
515 for (i
= max
; i
< num_online_cpus(); i
++)
516 cpumask_clear_cpu(i
, &mask
);
517 pc
->pc_npartners
= cpumask_weight(&mask
) - 1;
518 set_bit(LIOD_BIND
, &pc
->pc_flags
);
522 pc
->pc_npartners
= 2;
526 CERROR("unknown ptlrpcd bind policy %d\n", ptlrpcd_bind_policy
);
530 if (rc
== 0 && pc
->pc_npartners
> 0) {
531 pc
->pc_partners
= kcalloc(pc
->pc_npartners
,
532 sizeof(struct ptlrpcd_ctl
*),
534 if (pc
->pc_partners
== NULL
) {
535 pc
->pc_npartners
= 0;
538 switch (ptlrpcd_bind_policy
) {
539 case PDB_POLICY_PAIR
:
541 set_bit(LIOD_BIND
, &pc
->pc_flags
);
542 pc
->pc_partners
[0] = &ptlrpcds
->
543 pd_threads
[index
- 1];
544 ptlrpcds
->pd_threads
[index
- 1].
548 case PDB_POLICY_NEIGHBOR
:
549 #if defined(CONFIG_NUMA)
551 struct ptlrpcd_ctl
*ppc
;
553 /* partners are cores in the same NUMA node.
554 * setup partnership only with ptlrpcd threads
555 * that are already initialized
557 for (pidx
= 0, i
= 0; i
< index
; i
++) {
558 if (cpumask_test_cpu(i
, &mask
)) {
559 ppc
= &ptlrpcds
->pd_threads
[i
];
560 pc
->pc_partners
[pidx
++] = ppc
;
561 ppc
->pc_partners
[ppc
->
562 pc_npartners
++] = pc
;
565 /* adjust number of partners to the number
566 * of partnership really setup */
567 pc
->pc_npartners
= pidx
;
571 set_bit(LIOD_BIND
, &pc
->pc_flags
);
573 pc
->pc_partners
[0] = &ptlrpcds
->
574 pd_threads
[index
- 1];
575 ptlrpcds
->pd_threads
[index
- 1].
577 if (index
== max
- 1) {
579 &ptlrpcds
->pd_threads
[0];
580 ptlrpcds
->pd_threads
[0].
594 int ptlrpcd_start(int index
, int max
, const char *name
, struct ptlrpcd_ctl
*pc
)
599 * Do not allow start second thread for one pc.
601 if (test_and_set_bit(LIOD_START
, &pc
->pc_flags
)) {
602 CWARN("Starting second thread (%s) for same pc %p\n",
607 pc
->pc_index
= index
;
608 init_completion(&pc
->pc_starting
);
609 init_completion(&pc
->pc_finishing
);
610 spin_lock_init(&pc
->pc_lock
);
611 strlcpy(pc
->pc_name
, name
, sizeof(pc
->pc_name
));
612 pc
->pc_set
= ptlrpc_prep_set();
613 if (pc
->pc_set
== NULL
) {
619 * So far only "client" ptlrpcd uses an environment. In the future,
620 * ptlrpcd thread (or a thread-set) has to be given an argument,
621 * describing its "scope".
623 rc
= lu_context_init(&pc
->pc_env
.le_ctx
, LCT_CL_THREAD
|LCT_REMEMBER
);
628 struct task_struct
*task
;
630 rc
= ptlrpcd_bind(index
, max
);
635 task
= kthread_run(ptlrpcd
, pc
, "%s", pc
->pc_name
);
641 wait_for_completion(&pc
->pc_starting
);
646 lu_context_fini(&pc
->pc_env
.le_ctx
);
649 if (pc
->pc_set
!= NULL
) {
650 struct ptlrpc_request_set
*set
= pc
->pc_set
;
652 spin_lock(&pc
->pc_lock
);
654 spin_unlock(&pc
->pc_lock
);
655 ptlrpc_set_destroy(set
);
657 clear_bit(LIOD_BIND
, &pc
->pc_flags
);
660 clear_bit(LIOD_START
, &pc
->pc_flags
);
664 void ptlrpcd_stop(struct ptlrpcd_ctl
*pc
, int force
)
666 if (!test_bit(LIOD_START
, &pc
->pc_flags
)) {
667 CWARN("Thread for pc %p was not started\n", pc
);
671 set_bit(LIOD_STOP
, &pc
->pc_flags
);
673 set_bit(LIOD_FORCE
, &pc
->pc_flags
);
674 wake_up(&pc
->pc_set
->set_waitq
);
677 void ptlrpcd_free(struct ptlrpcd_ctl
*pc
)
679 struct ptlrpc_request_set
*set
= pc
->pc_set
;
681 if (!test_bit(LIOD_START
, &pc
->pc_flags
)) {
682 CWARN("Thread for pc %p was not started\n", pc
);
686 wait_for_completion(&pc
->pc_finishing
);
687 lu_context_fini(&pc
->pc_env
.le_ctx
);
689 spin_lock(&pc
->pc_lock
);
691 spin_unlock(&pc
->pc_lock
);
692 ptlrpc_set_destroy(set
);
694 clear_bit(LIOD_START
, &pc
->pc_flags
);
695 clear_bit(LIOD_STOP
, &pc
->pc_flags
);
696 clear_bit(LIOD_FORCE
, &pc
->pc_flags
);
697 clear_bit(LIOD_BIND
, &pc
->pc_flags
);
700 if (pc
->pc_npartners
> 0) {
701 LASSERT(pc
->pc_partners
!= NULL
);
703 kfree(pc
->pc_partners
);
704 pc
->pc_partners
= NULL
;
706 pc
->pc_npartners
= 0;
709 static void ptlrpcd_fini(void)
713 if (ptlrpcds
!= NULL
) {
714 for (i
= 0; i
< ptlrpcds
->pd_nthreads
; i
++)
715 ptlrpcd_stop(&ptlrpcds
->pd_threads
[i
], 0);
716 for (i
= 0; i
< ptlrpcds
->pd_nthreads
; i
++)
717 ptlrpcd_free(&ptlrpcds
->pd_threads
[i
]);
718 ptlrpcd_stop(&ptlrpcds
->pd_thread_rcv
, 0);
719 ptlrpcd_free(&ptlrpcds
->pd_thread_rcv
);
725 static int ptlrpcd_init(void)
727 int nthreads
= num_online_cpus();
729 int size
, i
= -1, j
, rc
= 0;
731 if (max_ptlrpcds
> 0 && max_ptlrpcds
< nthreads
)
732 nthreads
= max_ptlrpcds
;
735 if (nthreads
< 3 && ptlrpcd_bind_policy
== PDB_POLICY_NEIGHBOR
)
736 ptlrpcd_bind_policy
= PDB_POLICY_PAIR
;
737 else if (nthreads
% 2 != 0 && ptlrpcd_bind_policy
== PDB_POLICY_PAIR
)
738 nthreads
&= ~1; /* make sure it is even */
740 size
= offsetof(struct ptlrpcd
, pd_threads
[nthreads
]);
741 ptlrpcds
= kzalloc(size
, GFP_NOFS
);
742 if (ptlrpcds
== NULL
) {
747 snprintf(name
, sizeof(name
), "ptlrpcd_rcv");
748 set_bit(LIOD_RECOVERY
, &ptlrpcds
->pd_thread_rcv
.pc_flags
);
749 rc
= ptlrpcd_start(-1, nthreads
, name
, &ptlrpcds
->pd_thread_rcv
);
753 /* XXX: We start nthreads ptlrpc daemons. Each of them can process any
754 * non-recovery async RPC to improve overall async RPC efficiency.
756 * But there are some issues with async I/O RPCs and async non-I/O
757 * RPCs processed in the same set under some cases. The ptlrpcd may
758 * be blocked by some async I/O RPC(s), then will cause other async
759 * non-I/O RPC(s) can not be processed in time.
761 * Maybe we should distinguish blocked async RPCs from non-blocked
762 * async RPCs, and process them in different ptlrpcd sets to avoid
763 * unnecessary dependency. But how to distribute async RPCs load
764 * among all the ptlrpc daemons becomes another trouble. */
765 for (i
= 0; i
< nthreads
; i
++) {
766 snprintf(name
, sizeof(name
), "ptlrpcd_%d", i
);
767 rc
= ptlrpcd_start(i
, nthreads
, name
, &ptlrpcds
->pd_threads
[i
]);
772 ptlrpcds
->pd_size
= size
;
773 ptlrpcds
->pd_index
= 0;
774 ptlrpcds
->pd_nthreads
= nthreads
;
777 if (rc
!= 0 && ptlrpcds
!= NULL
) {
778 for (j
= 0; j
<= i
; j
++)
779 ptlrpcd_stop(&ptlrpcds
->pd_threads
[j
], 0);
780 for (j
= 0; j
<= i
; j
++)
781 ptlrpcd_free(&ptlrpcds
->pd_threads
[j
]);
782 ptlrpcd_stop(&ptlrpcds
->pd_thread_rcv
, 0);
783 ptlrpcd_free(&ptlrpcds
->pd_thread_rcv
);
791 int ptlrpcd_addref(void)
795 mutex_lock(&ptlrpcd_mutex
);
796 if (++ptlrpcd_users
== 1)
798 mutex_unlock(&ptlrpcd_mutex
);
801 EXPORT_SYMBOL(ptlrpcd_addref
);
803 void ptlrpcd_decref(void)
805 mutex_lock(&ptlrpcd_mutex
);
806 if (--ptlrpcd_users
== 0)
808 mutex_unlock(&ptlrpcd_mutex
);
810 EXPORT_SYMBOL(ptlrpcd_decref
);