]>
Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
18 | * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf | |
19 | * | |
20 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
21 | * CA 95054 USA or visit www.sun.com if you need additional information or | |
22 | * have any questions. | |
23 | * | |
24 | * GPL HEADER END | |
25 | */ | |
26 | /* | |
27 | * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. | |
28 | * Use is subject to license terms. | |
29 | * | |
30 | * Copyright (c) 2012, Intel Corporation. | |
31 | */ | |
32 | /* | |
33 | * This file is part of Lustre, http://www.lustre.org/ | |
34 | * Lustre is a trademark of Sun Microsystems, Inc. | |
35 | */ | |
36 | ||
37 | #define DEBUG_SUBSYSTEM S_RPC | |
38 | ||
9fdaf8c0 | 39 | #include "../../include/linux/libcfs/libcfs.h" |
d7e09d03 PT |
40 | # ifdef __mips64__ |
41 | # include <linux/kernel.h> | |
42 | # endif | |
43 | ||
e27db149 GKH |
44 | #include "../include/obd_class.h" |
45 | #include "../include/lustre_net.h" | |
46 | #include "../include/lustre_sec.h" | |
d7e09d03 PT |
47 | #include "ptlrpc_internal.h" |
48 | ||
49 | lnet_handle_eq_t ptlrpc_eq_h; | |
50 | ||
51 | /* | |
52 | * Client's outgoing request callback | |
53 | */ | |
54 | void request_out_callback(lnet_event_t *ev) | |
55 | { | |
d0bfef31 | 56 | struct ptlrpc_cb_id *cbid = ev->md.user_ptr; |
d7e09d03 | 57 | struct ptlrpc_request *req = cbid->cbid_arg; |
d7e09d03 | 58 | |
3949015e KM |
59 | LASSERT(ev->type == LNET_EVENT_SEND || |
60 | ev->type == LNET_EVENT_UNLINK); | |
61 | LASSERT(ev->unlinked); | |
d7e09d03 PT |
62 | |
63 | DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status); | |
64 | ||
65 | sptlrpc_request_out_callback(req); | |
cf378ff7 | 66 | spin_lock(&req->rq_lock); |
7264b8a5 | 67 | req->rq_real_sent = get_seconds(); |
cf378ff7 AL |
68 | if (ev->unlinked) |
69 | req->rq_req_unlink = 0; | |
d7e09d03 PT |
70 | |
71 | if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) { | |
72 | ||
73 | /* Failed send: make it seem like the reply timed out, just | |
74 | * like failing sends in client.c does currently... */ | |
75 | ||
d7e09d03 | 76 | req->rq_net_err = 1; |
d7e09d03 PT |
77 | ptlrpc_client_wake_req(req); |
78 | } | |
cf378ff7 | 79 | spin_unlock(&req->rq_lock); |
d7e09d03 PT |
80 | |
81 | ptlrpc_req_finished(req); | |
d7e09d03 PT |
82 | } |
83 | ||
84 | /* | |
85 | * Client's incoming reply callback | |
86 | */ | |
87 | void reply_in_callback(lnet_event_t *ev) | |
88 | { | |
d0bfef31 | 89 | struct ptlrpc_cb_id *cbid = ev->md.user_ptr; |
d7e09d03 | 90 | struct ptlrpc_request *req = cbid->cbid_arg; |
d7e09d03 PT |
91 | |
92 | DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status); | |
93 | ||
3949015e KM |
94 | LASSERT(ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_UNLINK); |
95 | LASSERT(ev->md.start == req->rq_repbuf); | |
96 | LASSERT(ev->offset + ev->mlength <= req->rq_repbuf_len); | |
d7e09d03 PT |
97 | /* We've set LNET_MD_MANAGE_REMOTE for all outgoing requests |
98 | for adaptive timeouts' early reply. */ | |
99 | LASSERT((ev->md.options & LNET_MD_MANAGE_REMOTE) != 0); | |
100 | ||
101 | spin_lock(&req->rq_lock); | |
102 | ||
103 | req->rq_receiving_reply = 0; | |
104 | req->rq_early = 0; | |
105 | if (ev->unlinked) | |
cf378ff7 | 106 | req->rq_reply_unlink = 0; |
d7e09d03 PT |
107 | |
108 | if (ev->status) | |
109 | goto out_wake; | |
110 | ||
111 | if (ev->type == LNET_EVENT_UNLINK) { | |
112 | LASSERT(ev->unlinked); | |
113 | DEBUG_REQ(D_NET, req, "unlink"); | |
114 | goto out_wake; | |
115 | } | |
116 | ||
3949015e | 117 | if (ev->mlength < ev->rlength) { |
d7e09d03 PT |
118 | CDEBUG(D_RPCTRACE, "truncate req %p rpc %d - %d+%d\n", req, |
119 | req->rq_replen, ev->rlength, ev->offset); | |
120 | req->rq_reply_truncate = 1; | |
121 | req->rq_replied = 1; | |
122 | req->rq_status = -EOVERFLOW; | |
123 | req->rq_nob_received = ev->rlength + ev->offset; | |
124 | goto out_wake; | |
125 | } | |
126 | ||
127 | if ((ev->offset == 0) && | |
128 | ((lustre_msghdr_get_flags(req->rq_reqmsg) & MSGHDR_AT_SUPPORT))) { | |
129 | /* Early reply */ | |
130 | DEBUG_REQ(D_ADAPTTO, req, | |
2d00bd17 JP |
131 | "Early reply received: mlen=%u offset=%d replen=%d replied=%d unlinked=%d", |
132 | ev->mlength, ev->offset, | |
d7e09d03 PT |
133 | req->rq_replen, req->rq_replied, ev->unlinked); |
134 | ||
135 | req->rq_early_count++; /* number received, client side */ | |
136 | ||
137 | if (req->rq_replied) /* already got the real reply */ | |
138 | goto out_wake; | |
139 | ||
140 | req->rq_early = 1; | |
141 | req->rq_reply_off = ev->offset; | |
142 | req->rq_nob_received = ev->mlength; | |
143 | /* And we're still receiving */ | |
144 | req->rq_receiving_reply = 1; | |
145 | } else { | |
146 | /* Real reply */ | |
147 | req->rq_rep_swab_mask = 0; | |
148 | req->rq_replied = 1; | |
5c689e68 AB |
149 | /* Got reply, no resend required */ |
150 | req->rq_resend = 0; | |
d7e09d03 PT |
151 | req->rq_reply_off = ev->offset; |
152 | req->rq_nob_received = ev->mlength; | |
153 | /* LNetMDUnlink can't be called under the LNET_LOCK, | |
154 | so we must unlink in ptlrpc_unregister_reply */ | |
155 | DEBUG_REQ(D_INFO, req, | |
156 | "reply in flags=%x mlen=%u offset=%d replen=%d", | |
157 | lustre_msg_get_flags(req->rq_reqmsg), | |
158 | ev->mlength, ev->offset, req->rq_replen); | |
159 | } | |
160 | ||
7264b8a5 | 161 | req->rq_import->imp_last_reply_time = get_seconds(); |
d7e09d03 PT |
162 | |
163 | out_wake: | |
164 | /* NB don't unlock till after wakeup; req can disappear under us | |
165 | * since we don't have our own ref */ | |
166 | ptlrpc_client_wake_req(req); | |
167 | spin_unlock(&req->rq_lock); | |
d7e09d03 PT |
168 | } |
169 | ||
170 | /* | |
171 | * Client's bulk has been written/read | |
172 | */ | |
3949015e | 173 | void client_bulk_callback(lnet_event_t *ev) |
d7e09d03 | 174 | { |
d0bfef31 | 175 | struct ptlrpc_cb_id *cbid = ev->md.user_ptr; |
d7e09d03 | 176 | struct ptlrpc_bulk_desc *desc = cbid->cbid_arg; |
d0bfef31 | 177 | struct ptlrpc_request *req; |
d7e09d03 | 178 | |
3949015e KM |
179 | LASSERT((desc->bd_type == BULK_PUT_SINK && |
180 | ev->type == LNET_EVENT_PUT) || | |
181 | (desc->bd_type == BULK_GET_SOURCE && | |
182 | ev->type == LNET_EVENT_GET) || | |
183 | ev->type == LNET_EVENT_UNLINK); | |
184 | LASSERT(ev->unlinked); | |
d7e09d03 PT |
185 | |
186 | if (CFS_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_CLIENT_BULK_CB, CFS_FAIL_ONCE)) | |
187 | ev->status = -EIO; | |
188 | ||
1d8cb70c GD |
189 | if (CFS_FAIL_CHECK_ORSET(OBD_FAIL_PTLRPC_CLIENT_BULK_CB2, |
190 | CFS_FAIL_ONCE)) | |
d7e09d03 PT |
191 | ev->status = -EIO; |
192 | ||
193 | CDEBUG((ev->status == 0) ? D_NET : D_ERROR, | |
194 | "event type %d, status %d, desc %p\n", | |
195 | ev->type, ev->status, desc); | |
196 | ||
197 | spin_lock(&desc->bd_lock); | |
198 | req = desc->bd_req; | |
199 | LASSERT(desc->bd_md_count > 0); | |
200 | desc->bd_md_count--; | |
201 | ||
202 | if (ev->type != LNET_EVENT_UNLINK && ev->status == 0) { | |
203 | desc->bd_nob_transferred += ev->mlength; | |
204 | desc->bd_sender = ev->sender; | |
205 | } else { | |
206 | /* start reconnect and resend if network error hit */ | |
207 | spin_lock(&req->rq_lock); | |
208 | req->rq_net_err = 1; | |
209 | spin_unlock(&req->rq_lock); | |
210 | } | |
211 | ||
212 | if (ev->status != 0) | |
213 | desc->bd_failure = 1; | |
214 | ||
215 | /* NB don't unlock till after wakeup; desc can disappear under us | |
216 | * otherwise */ | |
217 | if (desc->bd_md_count == 0) | |
218 | ptlrpc_client_wake_req(desc->bd_req); | |
219 | ||
220 | spin_unlock(&desc->bd_lock); | |
d7e09d03 PT |
221 | } |
222 | ||
223 | /* | |
224 | * We will have percpt request history list for ptlrpc service in upcoming | |
225 | * patches because we don't want to be serialized by current per-service | |
226 | * history operations. So we require history ID can (somehow) show arriving | |
227 | * order w/o grabbing global lock, and user can sort them in userspace. | |
228 | * | |
229 | * This is how we generate history ID for ptlrpc_request: | |
230 | * ---------------------------------------------------- | |
231 | * | 32 bits | 16 bits | (16 - X)bits | X bits | | |
232 | * ---------------------------------------------------- | |
233 | * | seconds | usec / 16 | sequence | CPT id | | |
234 | * ---------------------------------------------------- | |
235 | * | |
236 | * it might not be precise but should be good enough. | |
237 | */ | |
238 | ||
239 | #define REQS_CPT_BITS(svcpt) ((svcpt)->scp_service->srv_cpt_bits) | |
240 | ||
241 | #define REQS_SEC_SHIFT 32 | |
242 | #define REQS_USEC_SHIFT 16 | |
243 | #define REQS_SEQ_SHIFT(svcpt) REQS_CPT_BITS(svcpt) | |
244 | ||
245 | static void ptlrpc_req_add_history(struct ptlrpc_service_part *svcpt, | |
246 | struct ptlrpc_request *req) | |
247 | { | |
d0bfef31 CH |
248 | __u64 sec = req->rq_arrival_time.tv_sec; |
249 | __u32 usec = req->rq_arrival_time.tv_usec >> 4; /* usec / 16 */ | |
250 | __u64 new_seq; | |
d7e09d03 PT |
251 | |
252 | /* set sequence ID for request and add it to history list, | |
253 | * it must be called with hold svcpt::scp_lock */ | |
254 | ||
255 | new_seq = (sec << REQS_SEC_SHIFT) | | |
256 | (usec << REQS_USEC_SHIFT) | | |
257 | (svcpt->scp_cpt < 0 ? 0 : svcpt->scp_cpt); | |
258 | ||
259 | if (new_seq > svcpt->scp_hist_seq) { | |
260 | /* This handles the initial case of scp_hist_seq == 0 or | |
261 | * we just jumped into a new time window */ | |
262 | svcpt->scp_hist_seq = new_seq; | |
263 | } else { | |
264 | LASSERT(REQS_SEQ_SHIFT(svcpt) < REQS_USEC_SHIFT); | |
265 | /* NB: increase sequence number in current usec bucket, | |
266 | * however, it's possible that we used up all bits for | |
267 | * sequence and jumped into the next usec bucket (future time), | |
268 | * then we hope there will be less RPCs per bucket at some | |
269 | * point, and sequence will catch up again */ | |
270 | svcpt->scp_hist_seq += (1U << REQS_SEQ_SHIFT(svcpt)); | |
271 | new_seq = svcpt->scp_hist_seq; | |
272 | } | |
273 | ||
274 | req->rq_history_seq = new_seq; | |
275 | ||
276 | list_add_tail(&req->rq_history_list, &svcpt->scp_hist_reqs); | |
277 | } | |
278 | ||
279 | /* | |
280 | * Server's incoming request callback | |
281 | */ | |
282 | void request_in_callback(lnet_event_t *ev) | |
283 | { | |
d0bfef31 | 284 | struct ptlrpc_cb_id *cbid = ev->md.user_ptr; |
d7e09d03 | 285 | struct ptlrpc_request_buffer_desc *rqbd = cbid->cbid_arg; |
d0bfef31 CH |
286 | struct ptlrpc_service_part *svcpt = rqbd->rqbd_svcpt; |
287 | struct ptlrpc_service *service = svcpt->scp_service; | |
288 | struct ptlrpc_request *req; | |
d7e09d03 | 289 | |
3949015e KM |
290 | LASSERT(ev->type == LNET_EVENT_PUT || |
291 | ev->type == LNET_EVENT_UNLINK); | |
292 | LASSERT((char *)ev->md.start >= rqbd->rqbd_buffer); | |
293 | LASSERT((char *)ev->md.start + ev->offset + ev->mlength <= | |
294 | rqbd->rqbd_buffer + service->srv_buf_size); | |
d7e09d03 PT |
295 | |
296 | CDEBUG((ev->status == 0) ? D_NET : D_ERROR, | |
297 | "event type %d, status %d, service %s\n", | |
298 | ev->type, ev->status, service->srv_name); | |
299 | ||
300 | if (ev->unlinked) { | |
301 | /* If this is the last request message to fit in the | |
302 | * request buffer we can use the request object embedded in | |
303 | * rqbd. Note that if we failed to allocate a request, | |
304 | * we'd have to re-post the rqbd, which we can't do in this | |
305 | * context. */ | |
306 | req = &rqbd->rqbd_req; | |
3949015e | 307 | memset(req, 0, sizeof(*req)); |
d7e09d03 | 308 | } else { |
3949015e | 309 | LASSERT(ev->type == LNET_EVENT_PUT); |
d7e09d03 PT |
310 | if (ev->status != 0) { |
311 | /* We moaned above already... */ | |
312 | return; | |
313 | } | |
15191cac | 314 | req = ptlrpc_request_cache_alloc(GFP_ATOMIC); |
d7e09d03 | 315 | if (req == NULL) { |
2d00bd17 | 316 | CERROR("Can't allocate incoming request descriptor: Dropping %s RPC from %s\n", |
d7e09d03 PT |
317 | service->srv_name, |
318 | libcfs_id2str(ev->initiator)); | |
319 | return; | |
320 | } | |
321 | } | |
322 | ||
323 | /* NB we ABSOLUTELY RELY on req being zeroed, so pointers are NULL, | |
324 | * flags are reset and scalars are zero. We only set the message | |
325 | * size to non-zero if this was a successful receive. */ | |
326 | req->rq_xid = ev->match_bits; | |
327 | req->rq_reqbuf = ev->md.start + ev->offset; | |
328 | if (ev->type == LNET_EVENT_PUT && ev->status == 0) | |
329 | req->rq_reqdata_len = ev->mlength; | |
330 | do_gettimeofday(&req->rq_arrival_time); | |
331 | req->rq_peer = ev->initiator; | |
332 | req->rq_self = ev->target.nid; | |
333 | req->rq_rqbd = rqbd; | |
334 | req->rq_phase = RQ_PHASE_NEW; | |
335 | spin_lock_init(&req->rq_lock); | |
336 | INIT_LIST_HEAD(&req->rq_timed_list); | |
337 | INIT_LIST_HEAD(&req->rq_exp_list); | |
338 | atomic_set(&req->rq_refcount, 1); | |
339 | if (ev->type == LNET_EVENT_PUT) | |
b0f5aad5 | 340 | CDEBUG(D_INFO, "incoming req@%p x%llu msgsize %u\n", |
d7e09d03 PT |
341 | req, req->rq_xid, ev->mlength); |
342 | ||
343 | CDEBUG(D_RPCTRACE, "peer: %s\n", libcfs_id2str(req->rq_peer)); | |
344 | ||
345 | spin_lock(&svcpt->scp_lock); | |
346 | ||
347 | ptlrpc_req_add_history(svcpt, req); | |
348 | ||
349 | if (ev->unlinked) { | |
350 | svcpt->scp_nrqbds_posted--; | |
351 | CDEBUG(D_INFO, "Buffer complete: %d buffers still posted\n", | |
352 | svcpt->scp_nrqbds_posted); | |
353 | ||
354 | /* Normally, don't complain about 0 buffers posted; LNET won't | |
355 | * drop incoming reqs since we set the portal lazy */ | |
356 | if (test_req_buffer_pressure && | |
357 | ev->type != LNET_EVENT_UNLINK && | |
358 | svcpt->scp_nrqbds_posted == 0) | |
359 | CWARN("All %s request buffers busy\n", | |
360 | service->srv_name); | |
361 | ||
362 | /* req takes over the network's ref on rqbd */ | |
363 | } else { | |
364 | /* req takes a ref on rqbd */ | |
365 | rqbd->rqbd_refcount++; | |
366 | } | |
367 | ||
368 | list_add_tail(&req->rq_list, &svcpt->scp_req_incoming); | |
369 | svcpt->scp_nreqs_incoming++; | |
370 | ||
371 | /* NB everything can disappear under us once the request | |
372 | * has been queued and we unlock, so do the wake now... */ | |
373 | wake_up(&svcpt->scp_waitq); | |
374 | ||
375 | spin_unlock(&svcpt->scp_lock); | |
d7e09d03 PT |
376 | } |
377 | ||
378 | /* | |
379 | * Server's outgoing reply callback | |
380 | */ | |
381 | void reply_out_callback(lnet_event_t *ev) | |
382 | { | |
d0bfef31 | 383 | struct ptlrpc_cb_id *cbid = ev->md.user_ptr; |
d7e09d03 PT |
384 | struct ptlrpc_reply_state *rs = cbid->cbid_arg; |
385 | struct ptlrpc_service_part *svcpt = rs->rs_svcpt; | |
d7e09d03 | 386 | |
3949015e KM |
387 | LASSERT(ev->type == LNET_EVENT_SEND || |
388 | ev->type == LNET_EVENT_ACK || | |
389 | ev->type == LNET_EVENT_UNLINK); | |
d7e09d03 PT |
390 | |
391 | if (!rs->rs_difficult) { | |
392 | /* 'Easy' replies have no further processing so I drop the | |
393 | * net's ref on 'rs' */ | |
3949015e | 394 | LASSERT(ev->unlinked); |
d7e09d03 | 395 | ptlrpc_rs_decref(rs); |
d7e09d03 PT |
396 | return; |
397 | } | |
398 | ||
3949015e | 399 | LASSERT(rs->rs_on_net); |
d7e09d03 PT |
400 | |
401 | if (ev->unlinked) { | |
402 | /* Last network callback. The net's ref on 'rs' stays put | |
403 | * until ptlrpc_handle_rs() is done with it */ | |
404 | spin_lock(&svcpt->scp_rep_lock); | |
405 | spin_lock(&rs->rs_lock); | |
406 | ||
407 | rs->rs_on_net = 0; | |
408 | if (!rs->rs_no_ack || | |
409 | rs->rs_transno <= | |
410 | rs->rs_export->exp_obd->obd_last_committed) | |
411 | ptlrpc_schedule_difficult_reply(rs); | |
412 | ||
413 | spin_unlock(&rs->rs_lock); | |
414 | spin_unlock(&svcpt->scp_rep_lock); | |
415 | } | |
d7e09d03 PT |
416 | } |
417 | ||
418 | ||
419 | static void ptlrpc_master_callback(lnet_event_t *ev) | |
420 | { | |
421 | struct ptlrpc_cb_id *cbid = ev->md.user_ptr; | |
422 | void (*callback)(lnet_event_t *ev) = cbid->cbid_fn; | |
423 | ||
424 | /* Honestly, it's best to find out early. */ | |
3949015e KM |
425 | LASSERT(cbid->cbid_arg != LP_POISON); |
426 | LASSERT(callback == request_out_callback || | |
427 | callback == reply_in_callback || | |
428 | callback == client_bulk_callback || | |
429 | callback == request_in_callback || | |
430 | callback == reply_out_callback); | |
431 | ||
432 | callback(ev); | |
d7e09d03 PT |
433 | } |
434 | ||
3949015e | 435 | int ptlrpc_uuid_to_peer(struct obd_uuid *uuid, |
d0bfef31 | 436 | lnet_process_id_t *peer, lnet_nid_t *self) |
d7e09d03 | 437 | { |
d0bfef31 CH |
438 | int best_dist = 0; |
439 | __u32 best_order = 0; | |
440 | int count = 0; | |
441 | int rc = -ENOENT; | |
442 | int portals_compatibility; | |
443 | int dist; | |
444 | __u32 order; | |
445 | lnet_nid_t dst_nid; | |
446 | lnet_nid_t src_nid; | |
d7e09d03 PT |
447 | |
448 | portals_compatibility = LNetCtl(IOC_LIBCFS_PORTALS_COMPATIBILITY, NULL); | |
449 | ||
450 | peer->pid = LUSTRE_SRV_LNET_PID; | |
451 | ||
452 | /* Choose the matching UUID that's closest */ | |
453 | while (lustre_uuid_to_peer(uuid->uuid, &dst_nid, count++) == 0) { | |
454 | dist = LNetDist(dst_nid, &src_nid, &order); | |
455 | if (dist < 0) | |
456 | continue; | |
457 | ||
458 | if (dist == 0) { /* local! use loopback LND */ | |
459 | peer->nid = *self = LNET_MKNID(LNET_MKNET(LOLND, 0), 0); | |
460 | rc = 0; | |
461 | break; | |
462 | } | |
463 | ||
464 | if (rc < 0 || | |
465 | dist < best_dist || | |
466 | (dist == best_dist && order < best_order)) { | |
467 | best_dist = dist; | |
468 | best_order = order; | |
469 | ||
470 | if (portals_compatibility > 1) { | |
471 | /* Strong portals compatibility: Zero the nid's | |
472 | * NET, so if I'm reading new config logs, or | |
473 | * getting configured by (new) lconf I can | |
474 | * still talk to old servers. */ | |
475 | dst_nid = LNET_MKNID(0, LNET_NIDADDR(dst_nid)); | |
476 | src_nid = LNET_MKNID(0, LNET_NIDADDR(src_nid)); | |
477 | } | |
478 | peer->nid = dst_nid; | |
479 | *self = src_nid; | |
480 | rc = 0; | |
481 | } | |
482 | } | |
483 | ||
1d8cb70c | 484 | CDEBUG(D_NET, "%s->%s\n", uuid->uuid, libcfs_id2str(*peer)); |
d7e09d03 PT |
485 | return rc; |
486 | } | |
487 | ||
9a2477c2 | 488 | static void ptlrpc_ni_fini(void) |
d7e09d03 | 489 | { |
d0bfef31 CH |
490 | wait_queue_head_t waitq; |
491 | struct l_wait_info lwi; | |
492 | int rc; | |
493 | int retries; | |
d7e09d03 PT |
494 | |
495 | /* Wait for the event queue to become idle since there may still be | |
496 | * messages in flight with pending events (i.e. the fire-and-forget | |
497 | * messages == client requests and "non-difficult" server | |
498 | * replies */ | |
499 | ||
500 | for (retries = 0;; retries++) { | |
501 | rc = LNetEQFree(ptlrpc_eq_h); | |
502 | switch (rc) { | |
503 | default: | |
504 | LBUG(); | |
505 | ||
506 | case 0: | |
507 | LNetNIFini(); | |
508 | return; | |
509 | ||
510 | case -EBUSY: | |
511 | if (retries != 0) | |
512 | CWARN("Event queue still busy\n"); | |
513 | ||
514 | /* Wait for a bit */ | |
515 | init_waitqueue_head(&waitq); | |
516 | lwi = LWI_TIMEOUT(cfs_time_seconds(2), NULL, NULL); | |
517 | l_wait_event(waitq, 0, &lwi); | |
518 | break; | |
519 | } | |
520 | } | |
521 | /* notreached */ | |
522 | } | |
523 | ||
524 | lnet_pid_t ptl_get_pid(void) | |
525 | { | |
d0bfef31 | 526 | lnet_pid_t pid; |
d7e09d03 PT |
527 | |
528 | pid = LUSTRE_SRV_LNET_PID; | |
529 | return pid; | |
530 | } | |
531 | ||
9a2477c2 | 532 | static int ptlrpc_ni_init(void) |
d7e09d03 | 533 | { |
d0bfef31 CH |
534 | int rc; |
535 | lnet_pid_t pid; | |
d7e09d03 PT |
536 | |
537 | pid = ptl_get_pid(); | |
538 | CDEBUG(D_NET, "My pid is: %x\n", pid); | |
539 | ||
540 | /* We're not passing any limits yet... */ | |
541 | rc = LNetNIInit(pid); | |
542 | if (rc < 0) { | |
3949015e | 543 | CDEBUG(D_NET, "Can't init network interface: %d\n", rc); |
fbe7c6c7 | 544 | return -ENOENT; |
d7e09d03 PT |
545 | } |
546 | ||
547 | /* CAVEAT EMPTOR: how we process portals events is _radically_ | |
548 | * different depending on... */ | |
549 | /* kernel LNet calls our master callback when there are new event, | |
550 | * because we are guaranteed to get every event via callback, | |
b6da17f3 | 551 | * so we just set EQ size to 0 to avoid overhead of serializing |
d7e09d03 PT |
552 | * enqueue/dequeue operations in LNet. */ |
553 | rc = LNetEQAlloc(0, ptlrpc_master_callback, &ptlrpc_eq_h); | |
554 | if (rc == 0) | |
555 | return 0; | |
556 | ||
3949015e | 557 | CERROR("Failed to allocate event queue: %d\n", rc); |
d7e09d03 PT |
558 | LNetNIFini(); |
559 | ||
fbe7c6c7 | 560 | return -ENOMEM; |
d7e09d03 PT |
561 | } |
562 | ||
563 | ||
564 | int ptlrpc_init_portals(void) | |
565 | { | |
d0bfef31 | 566 | int rc = ptlrpc_ni_init(); |
d7e09d03 PT |
567 | |
568 | if (rc != 0) { | |
569 | CERROR("network initialisation failed\n"); | |
570 | return -EIO; | |
571 | } | |
572 | rc = ptlrpcd_addref(); | |
573 | if (rc == 0) | |
574 | return 0; | |
575 | ||
576 | CERROR("rpcd initialisation failed\n"); | |
577 | ptlrpc_ni_fini(); | |
578 | return rc; | |
579 | } | |
580 | ||
581 | void ptlrpc_exit_portals(void) | |
582 | { | |
583 | ptlrpcd_decref(); | |
584 | ptlrpc_ni_fini(); | |
585 | } |