]>
Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. | |
3 | * | |
4 | * Copyright (c) 2011, 2012, Intel Corporation. | |
5 | * | |
6 | * Author: Zach Brown <zab@zabbo.net> | |
7 | * Author: Peter J. Braam <braam@clusterfs.com> | |
8 | * Author: Phil Schwan <phil@clusterfs.com> | |
9 | * Author: Eric Barton <eric@bartonsoftware.com> | |
10 | * | |
11 | * This file is part of Lustre, http://www.lustre.org | |
12 | * | |
13 | * Portals is free software; you can redistribute it and/or | |
14 | * modify it under the terms of version 2 of the GNU General Public | |
15 | * License as published by the Free Software Foundation. | |
16 | * | |
17 | * Portals is distributed in the hope that it will be useful, | |
18 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 | * GNU General Public License for more details. | |
21 | * | |
22 | * You should have received a copy of the GNU General Public License | |
23 | * along with Portals; if not, write to the Free Software | |
24 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
25 | * | |
26 | */ | |
27 | ||
12c41f00 JH |
28 | #ifndef _SOCKLND_SOCKLND_H_ |
29 | #define _SOCKLND_SOCKLND_H_ | |
30 | ||
d7e09d03 PT |
31 | #define DEBUG_PORTAL_ALLOC |
32 | #define DEBUG_SUBSYSTEM S_LND | |
33 | ||
12c41f00 JH |
34 | #include <linux/crc32.h> |
35 | #include <linux/errno.h> | |
36 | #include <linux/if.h> | |
37 | #include <linux/init.h> | |
38 | #include <linux/kernel.h> | |
39 | #include <linux/kmod.h> | |
40 | #include <linux/list.h> | |
41 | #include <linux/mm.h> | |
42 | #include <linux/module.h> | |
43 | #include <linux/stat.h> | |
44 | #include <linux/string.h> | |
45 | #include <linux/syscalls.h> | |
46 | #include <linux/sysctl.h> | |
47 | #include <linux/uio.h> | |
48 | #include <linux/unistd.h> | |
49 | #include <net/sock.h> | |
50 | #include <net/tcp.h> | |
d7e09d03 | 51 | |
9fdaf8c0 GKH |
52 | #include "../../../include/linux/libcfs/libcfs.h" |
53 | #include "../../../include/linux/lnet/lnet.h" | |
54 | #include "../../../include/linux/lnet/lib-lnet.h" | |
55 | #include "../../../include/linux/lnet/socklnd.h" | |
d7e09d03 | 56 | |
12c41f00 JH |
57 | /* assume one thread for each connection type */ |
58 | #define SOCKNAL_NSCHEDS 3 | |
59 | #define SOCKNAL_NSCHEDS_HIGH (SOCKNAL_NSCHEDS << 1) | |
60 | ||
97d10d0a MS |
61 | #define SOCKNAL_PEER_HASH_SIZE 101 /* # peer lists */ |
62 | #define SOCKNAL_RESCHED 100 /* # scheduler loops before reschedule */ | |
63 | #define SOCKNAL_INSANITY_RECONN 5000 /* connd is trying on reconn infinitely */ | |
64 | #define SOCKNAL_ENOMEM_RETRY CFS_TICK /* jiffies between retries */ | |
d7e09d03 | 65 | |
97d10d0a MS |
66 | #define SOCKNAL_SINGLE_FRAG_TX 0 /* disable multi-fragment sends */ |
67 | #define SOCKNAL_SINGLE_FRAG_RX 0 /* disable multi-fragment receives */ | |
d7e09d03 | 68 | |
97d10d0a | 69 | #define SOCKNAL_VERSION_DEBUG 0 /* enable protocol version debugging */ |
d7e09d03 PT |
70 | |
71 | /* risk kmap deadlock on multi-frag I/O (backs off to single-frag if disabled). | |
72 | * no risk if we're not running on a CONFIG_HIGHMEM platform. */ | |
73 | #ifdef CONFIG_HIGHMEM | |
74 | # define SOCKNAL_RISK_KMAP_DEADLOCK 0 | |
75 | #else | |
76 | # define SOCKNAL_RISK_KMAP_DEADLOCK 1 | |
77 | #endif | |
78 | ||
79 | struct ksock_sched_info; | |
80 | ||
81 | typedef struct /* per scheduler state */ | |
82 | { | |
97d10d0a MS |
83 | spinlock_t kss_lock; /* serialise */ |
84 | struct list_head kss_rx_conns; /* conn waiting to be read */ | |
85 | struct list_head kss_tx_conns; /* conn waiting to be written */ | |
86 | struct list_head kss_zombie_noop_txs; /* zombie noop tx list */ | |
87 | wait_queue_head_t kss_waitq; /* where scheduler sleeps */ | |
88 | int kss_nconns; /* # connections assigned to | |
89 | * this scheduler */ | |
90 | struct ksock_sched_info *kss_info; /* owner of it */ | |
91 | struct page *kss_rx_scratch_pgs[LNET_MAX_IOV]; | |
92 | struct kvec kss_scratch_iov[LNET_MAX_IOV]; | |
d7e09d03 PT |
93 | } ksock_sched_t; |
94 | ||
95 | struct ksock_sched_info { | |
97d10d0a MS |
96 | int ksi_nthreads_max; /* max allowed threads */ |
97 | int ksi_nthreads; /* number of threads */ | |
98 | int ksi_cpt; /* CPT id */ | |
99 | ksock_sched_t *ksi_scheds; /* array of schedulers */ | |
d7e09d03 PT |
100 | }; |
101 | ||
97d10d0a MS |
102 | #define KSOCK_CPT_SHIFT 16 |
103 | #define KSOCK_THREAD_ID(cpt, sid) (((cpt) << KSOCK_CPT_SHIFT) | (sid)) | |
104 | #define KSOCK_THREAD_CPT(id) ((id) >> KSOCK_CPT_SHIFT) | |
105 | #define KSOCK_THREAD_SID(id) ((id) & ((1UL << KSOCK_CPT_SHIFT) - 1)) | |
d7e09d03 | 106 | |
97d10d0a | 107 | typedef struct /* in-use interface */ |
d7e09d03 PT |
108 | { |
109 | __u32 ksni_ipaddr; /* interface's IP address */ | |
110 | __u32 ksni_netmask; /* interface's network mask */ | |
111 | int ksni_nroutes; /* # routes using (active) */ | |
112 | int ksni_npeers; /* # peers using (passive) */ | |
113 | char ksni_name[IFNAMSIZ]; /* interface name */ | |
114 | } ksock_interface_t; | |
115 | ||
75c49d40 | 116 | typedef struct { |
97d10d0a MS |
117 | int *ksnd_timeout; /* "stuck" socket timeout |
118 | * (seconds) */ | |
119 | int *ksnd_nscheds; /* # scheduler threads in each | |
120 | * pool while starting */ | |
121 | int *ksnd_nconnds; /* # connection daemons */ | |
122 | int *ksnd_nconnds_max; /* max # connection daemons */ | |
123 | int *ksnd_min_reconnectms; /* first connection retry after | |
124 | * (ms)... */ | |
125 | int *ksnd_max_reconnectms; /* ...exponentially increasing to | |
126 | * this */ | |
127 | int *ksnd_eager_ack; /* make TCP ack eagerly? */ | |
128 | int *ksnd_typed_conns; /* drive sockets by type? */ | |
129 | int *ksnd_min_bulk; /* smallest "large" message */ | |
130 | int *ksnd_tx_buffer_size; /* socket tx buffer size */ | |
131 | int *ksnd_rx_buffer_size; /* socket rx buffer size */ | |
132 | int *ksnd_nagle; /* enable NAGLE? */ | |
133 | int *ksnd_round_robin; /* round robin for multiple | |
134 | * interfaces */ | |
135 | int *ksnd_keepalive; /* # secs for sending keepalive | |
136 | * NOOP */ | |
137 | int *ksnd_keepalive_idle; /* # idle secs before 1st probe | |
138 | */ | |
139 | int *ksnd_keepalive_count; /* # probes */ | |
140 | int *ksnd_keepalive_intvl; /* time between probes */ | |
141 | int *ksnd_credits; /* # concurrent sends */ | |
142 | int *ksnd_peertxcredits; /* # concurrent sends to 1 peer | |
143 | */ | |
144 | int *ksnd_peerrtrcredits; /* # per-peer router buffer | |
145 | * credits */ | |
146 | int *ksnd_peertimeout; /* seconds to consider peer dead | |
147 | */ | |
148 | int *ksnd_enable_csum; /* enable check sum */ | |
149 | int *ksnd_inject_csum_error; /* set non-zero to inject | |
150 | * checksum error */ | |
151 | int *ksnd_nonblk_zcack; /* always send zc-ack on | |
152 | * non-blocking connection */ | |
153 | unsigned int *ksnd_zc_min_payload; /* minimum zero copy payload | |
154 | * size */ | |
155 | int *ksnd_zc_recv; /* enable ZC receive (for | |
156 | * Chelsio TOE) */ | |
157 | int *ksnd_zc_recv_min_nfrags; /* minimum # of fragments to | |
158 | * enable ZC receive */ | |
d7e09d03 PT |
159 | } ksock_tunables_t; |
160 | ||
75c49d40 | 161 | typedef struct { |
d7e09d03 PT |
162 | __u64 ksnn_incarnation; /* my epoch */ |
163 | spinlock_t ksnn_lock; /* serialise */ | |
164 | struct list_head ksnn_list; /* chain on global list */ | |
165 | int ksnn_npeers; /* # peers */ | |
166 | int ksnn_shutdown; /* shutting down? */ | |
167 | int ksnn_ninterfaces; /* IP interfaces */ | |
168 | ksock_interface_t ksnn_interfaces[LNET_MAX_INTERFACES]; | |
169 | } ksock_net_t; | |
170 | ||
171 | /** connd timeout */ | |
172 | #define SOCKNAL_CONND_TIMEOUT 120 | |
173 | /** reserved thread for accepting & creating new connd */ | |
174 | #define SOCKNAL_CONND_RESV 1 | |
175 | ||
75c49d40 | 176 | typedef struct { |
97d10d0a MS |
177 | int ksnd_init; /* initialisation state |
178 | */ | |
179 | int ksnd_nnets; /* # networks set up */ | |
180 | struct list_head ksnd_nets; /* list of nets */ | |
181 | rwlock_t ksnd_global_lock; /* stabilize peer/conn | |
182 | * ops */ | |
183 | struct list_head *ksnd_peers; /* hash table of all my | |
184 | * known peers */ | |
185 | int ksnd_peer_hash_size; /* size of ksnd_peers */ | |
186 | ||
187 | int ksnd_nthreads; /* # live threads */ | |
188 | int ksnd_shuttingdown; /* tell threads to exit | |
189 | */ | |
190 | struct ksock_sched_info **ksnd_sched_info; /* schedulers info */ | |
191 | ||
192 | atomic_t ksnd_nactive_txs; /* #active txs */ | |
193 | ||
194 | struct list_head ksnd_deathrow_conns; /* conns to close: | |
195 | * reaper_lock*/ | |
196 | struct list_head ksnd_zombie_conns; /* conns to free: | |
197 | * reaper_lock */ | |
198 | struct list_head ksnd_enomem_conns; /* conns to retry: | |
199 | * reaper_lock*/ | |
200 | wait_queue_head_t ksnd_reaper_waitq; /* reaper sleeps here */ | |
201 | unsigned long ksnd_reaper_waketime; /* when reaper will wake | |
202 | */ | |
203 | spinlock_t ksnd_reaper_lock; /* serialise */ | |
204 | ||
205 | int ksnd_enomem_tx; /* test ENOMEM sender */ | |
206 | int ksnd_stall_tx; /* test sluggish sender | |
207 | */ | |
208 | int ksnd_stall_rx; /* test sluggish | |
209 | * receiver */ | |
210 | ||
211 | struct list_head ksnd_connd_connreqs; /* incoming connection | |
212 | * requests */ | |
213 | struct list_head ksnd_connd_routes; /* routes waiting to be | |
214 | * connected */ | |
215 | wait_queue_head_t ksnd_connd_waitq; /* connds sleep here */ | |
216 | int ksnd_connd_connecting; /* # connds connecting | |
217 | */ | |
218 | long ksnd_connd_failed_stamp;/* time stamp of the | |
219 | * last failed | |
220 | * connecting attempt */ | |
221 | unsigned ksnd_connd_starting; /* # starting connd */ | |
222 | long ksnd_connd_starting_stamp;/* time stamp of the | |
223 | * last starting connd | |
224 | */ | |
225 | unsigned ksnd_connd_running; /* # running connd */ | |
226 | spinlock_t ksnd_connd_lock; /* serialise */ | |
227 | ||
228 | struct list_head ksnd_idle_noop_txs; /* list head for freed | |
229 | * noop tx */ | |
230 | spinlock_t ksnd_tx_lock; /* serialise, g_lock | |
231 | * unsafe */ | |
d7e09d03 PT |
232 | |
233 | } ksock_nal_data_t; | |
234 | ||
97d10d0a MS |
235 | #define SOCKNAL_INIT_NOTHING 0 |
236 | #define SOCKNAL_INIT_DATA 1 | |
237 | #define SOCKNAL_INIT_ALL 2 | |
d7e09d03 PT |
238 | |
239 | /* A packet just assembled for transmission is represented by 1 or more | |
240 | * struct iovec fragments (the first frag contains the portals header), | |
241 | * followed by 0 or more lnet_kiov_t fragments. | |
242 | * | |
243 | * On the receive side, initially 1 struct iovec fragment is posted for | |
244 | * receive (the header). Once the header has been received, the payload is | |
245 | * received into either struct iovec or lnet_kiov_t fragments, depending on | |
246 | * what the header matched or whether the message needs forwarding. */ | |
247 | ||
97d10d0a MS |
248 | struct ksock_conn; /* forward ref */ |
249 | struct ksock_peer; /* forward ref */ | |
250 | struct ksock_route; /* forward ref */ | |
251 | struct ksock_proto; /* forward ref */ | |
d7e09d03 | 252 | |
97d10d0a | 253 | typedef struct /* transmit packet */ |
d7e09d03 | 254 | { |
97d10d0a MS |
255 | struct list_head tx_list; /* queue on conn for transmission etc |
256 | */ | |
257 | struct list_head tx_zc_list; /* queue on peer for ZC request */ | |
258 | atomic_t tx_refcount; /* tx reference count */ | |
259 | int tx_nob; /* # packet bytes */ | |
260 | int tx_resid; /* residual bytes */ | |
261 | int tx_niov; /* # packet iovec frags */ | |
262 | struct kvec *tx_iov; /* packet iovec frags */ | |
263 | int tx_nkiov; /* # packet page frags */ | |
264 | unsigned short tx_zc_aborted; /* aborted ZC request */ | |
265 | unsigned short tx_zc_capable:1; /* payload is large enough for ZC */ | |
266 | unsigned short tx_zc_checked:1; /* Have I checked if I should ZC? */ | |
267 | unsigned short tx_nonblk:1; /* it's a non-blocking ACK */ | |
268 | lnet_kiov_t *tx_kiov; /* packet page frags */ | |
269 | struct ksock_conn *tx_conn; /* owning conn */ | |
270 | lnet_msg_t *tx_lnetmsg; /* lnet message for lnet_finalize() | |
271 | */ | |
272 | unsigned long tx_deadline; /* when (in jiffies) tx times out */ | |
273 | ksock_msg_t tx_msg; /* socklnd message buffer */ | |
274 | int tx_desc_size; /* size of this descriptor */ | |
d7e09d03 PT |
275 | union { |
276 | struct { | |
97d10d0a MS |
277 | struct kvec iov; /* virt hdr */ |
278 | lnet_kiov_t kiov[0]; /* paged payload */ | |
279 | } paged; | |
d7e09d03 | 280 | struct { |
97d10d0a MS |
281 | struct kvec iov[1]; /* virt hdr + payload */ |
282 | } virt; | |
283 | } tx_frags; | |
d7e09d03 PT |
284 | } ksock_tx_t; |
285 | ||
97d10d0a | 286 | #define KSOCK_NOOP_TX_SIZE ((int)offsetof(ksock_tx_t, tx_frags.paged.kiov[0])) |
d7e09d03 PT |
287 | |
288 | /* network zero copy callback descriptor embedded in ksock_tx_t */ | |
289 | ||
290 | /* space for the rx frag descriptors; we either read a single contiguous | |
291 | * header, or up to LNET_MAX_IOV frags of payload of either type. */ | |
292 | typedef union { | |
f351bad2 | 293 | struct kvec iov[LNET_MAX_IOV]; |
d7e09d03 PT |
294 | lnet_kiov_t kiov[LNET_MAX_IOV]; |
295 | } ksock_rxiovspace_t; | |
296 | ||
97d10d0a MS |
297 | #define SOCKNAL_RX_KSM_HEADER 1 /* reading ksock message header */ |
298 | #define SOCKNAL_RX_LNET_HEADER 2 /* reading lnet message header */ | |
299 | #define SOCKNAL_RX_PARSE 3 /* Calling lnet_parse() */ | |
300 | #define SOCKNAL_RX_PARSE_WAIT 4 /* waiting to be told to read the body */ | |
301 | #define SOCKNAL_RX_LNET_PAYLOAD 5 /* reading lnet payload (to deliver here) */ | |
302 | #define SOCKNAL_RX_SLOP 6 /* skipping body */ | |
d7e09d03 | 303 | |
75c49d40 | 304 | typedef struct ksock_conn { |
97d10d0a MS |
305 | struct ksock_peer *ksnc_peer; /* owning peer */ |
306 | struct ksock_route *ksnc_route; /* owning route */ | |
307 | struct list_head ksnc_list; /* stash on peer's conn list */ | |
308 | struct socket *ksnc_sock; /* actual socket */ | |
309 | void *ksnc_saved_data_ready; /* socket's original | |
310 | * data_ready() callback */ | |
311 | void *ksnc_saved_write_space; /* socket's original | |
312 | * write_space() callback */ | |
313 | atomic_t ksnc_conn_refcount;/* conn refcount */ | |
314 | atomic_t ksnc_sock_refcount;/* sock refcount */ | |
315 | ksock_sched_t *ksnc_scheduler; /* who schedules this connection | |
316 | */ | |
317 | __u32 ksnc_myipaddr; /* my IP */ | |
318 | __u32 ksnc_ipaddr; /* peer's IP */ | |
319 | int ksnc_port; /* peer's port */ | |
320 | signed int ksnc_type:3; /* type of connection, should be | |
321 | * signed value */ | |
322 | unsigned int ksnc_closing:1; /* being shut down */ | |
323 | unsigned int ksnc_flip:1; /* flip or not, only for V2.x */ | |
324 | unsigned int ksnc_zc_capable:1; /* enable to ZC */ | |
325 | struct ksock_proto *ksnc_proto; /* protocol for the connection */ | |
d7e09d03 PT |
326 | |
327 | /* reader */ | |
97d10d0a MS |
328 | struct list_head ksnc_rx_list; /* where I enq waiting input or a |
329 | * forwarding descriptor */ | |
330 | unsigned long ksnc_rx_deadline; /* when (in jiffies) receive times | |
331 | * out */ | |
332 | __u8 ksnc_rx_started; /* started receiving a message */ | |
333 | __u8 ksnc_rx_ready; /* data ready to read */ | |
334 | __u8 ksnc_rx_scheduled; /* being progressed */ | |
335 | __u8 ksnc_rx_state; /* what is being read */ | |
336 | int ksnc_rx_nob_left; /* # bytes to next hdr/body */ | |
337 | int ksnc_rx_nob_wanted;/* bytes actually wanted */ | |
338 | int ksnc_rx_niov; /* # iovec frags */ | |
339 | struct kvec *ksnc_rx_iov; /* the iovec frags */ | |
340 | int ksnc_rx_nkiov; /* # page frags */ | |
341 | lnet_kiov_t *ksnc_rx_kiov; /* the page frags */ | |
342 | ksock_rxiovspace_t ksnc_rx_iov_space; /* space for frag descriptors */ | |
343 | __u32 ksnc_rx_csum; /* partial checksum for incoming | |
344 | * data */ | |
345 | void *ksnc_cookie; /* rx lnet_finalize passthru arg | |
346 | */ | |
347 | ksock_msg_t ksnc_msg; /* incoming message buffer: | |
348 | * V2.x message takes the | |
349 | * whole struct | |
350 | * V1.x message is a bare | |
351 | * lnet_hdr_t, it's stored in | |
352 | * ksnc_msg.ksm_u.lnetmsg */ | |
d7e09d03 PT |
353 | |
354 | /* WRITER */ | |
97d10d0a MS |
355 | struct list_head ksnc_tx_list; /* where I enq waiting for output |
356 | * space */ | |
357 | struct list_head ksnc_tx_queue; /* packets waiting to be sent */ | |
358 | ksock_tx_t *ksnc_tx_carrier; /* next TX that can carry a LNet | |
359 | * message or ZC-ACK */ | |
360 | unsigned long ksnc_tx_deadline; /* when (in jiffies) tx times out | |
361 | */ | |
362 | int ksnc_tx_bufnob; /* send buffer marker */ | |
363 | atomic_t ksnc_tx_nob; /* # bytes queued */ | |
364 | int ksnc_tx_ready; /* write space */ | |
365 | int ksnc_tx_scheduled; /* being progressed */ | |
366 | unsigned long ksnc_tx_last_post; /* time stamp of the last posted | |
367 | * TX */ | |
d7e09d03 PT |
368 | } ksock_conn_t; |
369 | ||
75c49d40 | 370 | typedef struct ksock_route { |
97d10d0a MS |
371 | struct list_head ksnr_list; /* chain on peer route list */ |
372 | struct list_head ksnr_connd_list; /* chain on ksnr_connd_routes */ | |
373 | struct ksock_peer *ksnr_peer; /* owning peer */ | |
374 | atomic_t ksnr_refcount; /* # users */ | |
375 | unsigned long ksnr_timeout; /* when (in jiffies) reconnection | |
376 | * can happen next */ | |
377 | long ksnr_retry_interval; /* how long between retries */ | |
378 | __u32 ksnr_myipaddr; /* my IP */ | |
379 | __u32 ksnr_ipaddr; /* IP address to connect to */ | |
380 | int ksnr_port; /* port to connect to */ | |
381 | unsigned int ksnr_scheduled:1; /* scheduled for attention */ | |
382 | unsigned int ksnr_connecting:1; /* connection establishment in | |
383 | * progress */ | |
384 | unsigned int ksnr_connected:4; /* connections established by | |
385 | * type */ | |
386 | unsigned int ksnr_deleted:1; /* been removed from peer? */ | |
387 | unsigned int ksnr_share_count; /* created explicitly? */ | |
388 | int ksnr_conn_count; /* # conns established by this | |
389 | * route */ | |
d7e09d03 PT |
390 | } ksock_route_t; |
391 | ||
97d10d0a | 392 | #define SOCKNAL_KEEPALIVE_PING 1 /* cookie for keepalive ping */ |
d7e09d03 | 393 | |
75c49d40 | 394 | typedef struct ksock_peer { |
97d10d0a MS |
395 | struct list_head ksnp_list; /* stash on global peer list */ |
396 | unsigned long ksnp_last_alive; /* when (in jiffies) I was last | |
397 | * alive */ | |
398 | lnet_process_id_t ksnp_id; /* who's on the other end(s) */ | |
399 | atomic_t ksnp_refcount; /* # users */ | |
400 | int ksnp_sharecount; /* lconf usage counter */ | |
401 | int ksnp_closing; /* being closed */ | |
402 | int ksnp_accepting; /* # passive connections pending | |
403 | */ | |
404 | int ksnp_error; /* errno on closing last conn */ | |
405 | __u64 ksnp_zc_next_cookie; /* ZC completion cookie */ | |
406 | __u64 ksnp_incarnation; /* latest known peer incarnation | |
407 | */ | |
408 | struct ksock_proto *ksnp_proto; /* latest known peer protocol */ | |
409 | struct list_head ksnp_conns; /* all active connections */ | |
410 | struct list_head ksnp_routes; /* routes */ | |
411 | struct list_head ksnp_tx_queue; /* waiting packets */ | |
412 | spinlock_t ksnp_lock; /* serialize, g_lock unsafe */ | |
413 | struct list_head ksnp_zc_req_list; /* zero copy requests wait for | |
414 | * ACK */ | |
415 | unsigned long ksnp_send_keepalive; /* time to send keepalive */ | |
416 | lnet_ni_t *ksnp_ni; /* which network */ | |
417 | int ksnp_n_passive_ips; /* # of... */ | |
418 | ||
419 | /* preferred local interfaces */ | |
420 | __u32 ksnp_passive_ips[LNET_MAX_INTERFACES]; | |
d7e09d03 PT |
421 | } ksock_peer_t; |
422 | ||
75c49d40 | 423 | typedef struct ksock_connreq { |
97d10d0a MS |
424 | struct list_head ksncr_list; /* stash on ksnd_connd_connreqs */ |
425 | lnet_ni_t *ksncr_ni; /* chosen NI */ | |
426 | struct socket *ksncr_sock; /* accepted socket */ | |
d7e09d03 PT |
427 | } ksock_connreq_t; |
428 | ||
429 | extern ksock_nal_data_t ksocknal_data; | |
430 | extern ksock_tunables_t ksocknal_tunables; | |
431 | ||
97d10d0a MS |
432 | #define SOCKNAL_MATCH_NO 0 /* TX can't match type of connection */ |
433 | #define SOCKNAL_MATCH_YES 1 /* TX matches type of connection */ | |
434 | #define SOCKNAL_MATCH_MAY 2 /* TX can be sent on the connection, but not | |
435 | * preferred */ | |
d7e09d03 | 436 | |
75c49d40 | 437 | typedef struct ksock_proto { |
97d10d0a MS |
438 | /* version number of protocol */ |
439 | int pro_version; | |
440 | ||
441 | /* handshake function */ | |
442 | int (*pro_send_hello)(ksock_conn_t *, ksock_hello_msg_t *); | |
443 | ||
444 | /* handshake function */ | |
445 | int (*pro_recv_hello)(ksock_conn_t *, ksock_hello_msg_t *, int); | |
446 | ||
447 | /* message pack */ | |
448 | void (*pro_pack)(ksock_tx_t *); | |
449 | ||
450 | /* message unpack */ | |
451 | void (*pro_unpack)(ksock_msg_t *); | |
452 | ||
453 | /* queue tx on the connection */ | |
454 | ksock_tx_t *(*pro_queue_tx_msg)(ksock_conn_t *, ksock_tx_t *); | |
455 | ||
456 | /* queue ZC ack on the connection */ | |
457 | int (*pro_queue_tx_zcack)(ksock_conn_t *, ksock_tx_t *, __u64); | |
458 | ||
459 | /* handle ZC request */ | |
460 | int (*pro_handle_zcreq)(ksock_conn_t *, __u64, int); | |
461 | ||
462 | /* handle ZC ACK */ | |
463 | int (*pro_handle_zcack)(ksock_conn_t *, __u64, __u64); | |
464 | ||
465 | /* msg type matches the connection type: | |
466 | * return value: | |
467 | * return MATCH_NO : no | |
468 | * return MATCH_YES : matching type | |
469 | * return MATCH_MAY : can be backup */ | |
470 | int (*pro_match_tx)(ksock_conn_t *, ksock_tx_t *, int); | |
d7e09d03 PT |
471 | } ksock_proto_t; |
472 | ||
473 | extern ksock_proto_t ksocknal_protocol_v1x; | |
474 | extern ksock_proto_t ksocknal_protocol_v2x; | |
475 | extern ksock_proto_t ksocknal_protocol_v3x; | |
476 | ||
97d10d0a MS |
477 | #define KSOCK_PROTO_V1_MAJOR LNET_PROTO_TCP_VERSION_MAJOR |
478 | #define KSOCK_PROTO_V1_MINOR LNET_PROTO_TCP_VERSION_MINOR | |
479 | #define KSOCK_PROTO_V1 KSOCK_PROTO_V1_MAJOR | |
d7e09d03 PT |
480 | |
481 | #ifndef CPU_MASK_NONE | |
482 | #define CPU_MASK_NONE 0UL | |
483 | #endif | |
484 | ||
12c41f00 JH |
485 | static inline __u32 ksocknal_csum(__u32 crc, unsigned char const *p, size_t len) |
486 | { | |
487 | #if 1 | |
488 | return crc32_le(crc, p, len); | |
489 | #else | |
490 | while (len-- > 0) | |
491 | crc = ((crc + 0x100) & ~0xff) | ((crc + *p++) & 0xff) ; | |
492 | return crc; | |
493 | #endif | |
494 | } | |
495 | ||
d7e09d03 PT |
496 | static inline int |
497 | ksocknal_route_mask(void) | |
498 | { | |
499 | if (!*ksocknal_tunables.ksnd_typed_conns) | |
500 | return (1 << SOCKLND_CONN_ANY); | |
501 | ||
502 | return ((1 << SOCKLND_CONN_CONTROL) | | |
503 | (1 << SOCKLND_CONN_BULK_IN) | | |
504 | (1 << SOCKLND_CONN_BULK_OUT)); | |
505 | } | |
506 | ||
507 | static inline struct list_head * | |
d9dfb48f | 508 | ksocknal_nid2peerlist(lnet_nid_t nid) |
d7e09d03 PT |
509 | { |
510 | unsigned int hash = ((unsigned int)nid) % ksocknal_data.ksnd_peer_hash_size; | |
511 | ||
71397095 | 512 | return &ksocknal_data.ksnd_peers[hash]; |
d7e09d03 PT |
513 | } |
514 | ||
515 | static inline void | |
d9dfb48f | 516 | ksocknal_conn_addref(ksock_conn_t *conn) |
d7e09d03 | 517 | { |
d9dfb48f | 518 | LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0); |
d7e09d03 PT |
519 | atomic_inc(&conn->ksnc_conn_refcount); |
520 | } | |
521 | ||
d9dfb48f | 522 | extern void ksocknal_queue_zombie_conn(ksock_conn_t *conn); |
d7e09d03 PT |
523 | extern void ksocknal_finalize_zcreq(ksock_conn_t *conn); |
524 | ||
525 | static inline void | |
d9dfb48f | 526 | ksocknal_conn_decref(ksock_conn_t *conn) |
d7e09d03 | 527 | { |
d9dfb48f | 528 | LASSERT(atomic_read(&conn->ksnc_conn_refcount) > 0); |
d7e09d03 PT |
529 | if (atomic_dec_and_test(&conn->ksnc_conn_refcount)) |
530 | ksocknal_queue_zombie_conn(conn); | |
531 | } | |
532 | ||
533 | static inline int | |
d9dfb48f | 534 | ksocknal_connsock_addref(ksock_conn_t *conn) |
d7e09d03 | 535 | { |
97d10d0a | 536 | int rc = -ESHUTDOWN; |
d7e09d03 PT |
537 | |
538 | read_lock(&ksocknal_data.ksnd_global_lock); | |
539 | if (!conn->ksnc_closing) { | |
540 | LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0); | |
541 | atomic_inc(&conn->ksnc_sock_refcount); | |
542 | rc = 0; | |
543 | } | |
544 | read_unlock(&ksocknal_data.ksnd_global_lock); | |
545 | ||
71397095 | 546 | return rc; |
d7e09d03 PT |
547 | } |
548 | ||
549 | static inline void | |
d9dfb48f | 550 | ksocknal_connsock_decref(ksock_conn_t *conn) |
d7e09d03 | 551 | { |
d9dfb48f | 552 | LASSERT(atomic_read(&conn->ksnc_sock_refcount) > 0); |
d7e09d03 | 553 | if (atomic_dec_and_test(&conn->ksnc_sock_refcount)) { |
d9dfb48f | 554 | LASSERT(conn->ksnc_closing); |
e52fc91d | 555 | sock_release(conn->ksnc_sock); |
d7e09d03 PT |
556 | conn->ksnc_sock = NULL; |
557 | ksocknal_finalize_zcreq(conn); | |
558 | } | |
559 | } | |
560 | ||
561 | static inline void | |
d9dfb48f | 562 | ksocknal_tx_addref(ksock_tx_t *tx) |
d7e09d03 | 563 | { |
d9dfb48f | 564 | LASSERT(atomic_read(&tx->tx_refcount) > 0); |
d7e09d03 PT |
565 | atomic_inc(&tx->tx_refcount); |
566 | } | |
567 | ||
d9dfb48f AD |
568 | extern void ksocknal_tx_prep(ksock_conn_t *, ksock_tx_t *tx); |
569 | extern void ksocknal_tx_done(lnet_ni_t *ni, ksock_tx_t *tx); | |
d7e09d03 PT |
570 | |
571 | static inline void | |
d9dfb48f | 572 | ksocknal_tx_decref(ksock_tx_t *tx) |
d7e09d03 | 573 | { |
d9dfb48f | 574 | LASSERT(atomic_read(&tx->tx_refcount) > 0); |
d7e09d03 PT |
575 | if (atomic_dec_and_test(&tx->tx_refcount)) |
576 | ksocknal_tx_done(NULL, tx); | |
577 | } | |
578 | ||
579 | static inline void | |
d9dfb48f | 580 | ksocknal_route_addref(ksock_route_t *route) |
d7e09d03 | 581 | { |
d9dfb48f | 582 | LASSERT(atomic_read(&route->ksnr_refcount) > 0); |
d7e09d03 PT |
583 | atomic_inc(&route->ksnr_refcount); |
584 | } | |
585 | ||
d9dfb48f | 586 | extern void ksocknal_destroy_route(ksock_route_t *route); |
d7e09d03 PT |
587 | |
588 | static inline void | |
d9dfb48f | 589 | ksocknal_route_decref(ksock_route_t *route) |
d7e09d03 | 590 | { |
d9dfb48f | 591 | LASSERT(atomic_read(&route->ksnr_refcount) > 0); |
d7e09d03 | 592 | if (atomic_dec_and_test(&route->ksnr_refcount)) |
d9dfb48f | 593 | ksocknal_destroy_route(route); |
d7e09d03 PT |
594 | } |
595 | ||
596 | static inline void | |
d9dfb48f | 597 | ksocknal_peer_addref(ksock_peer_t *peer) |
d7e09d03 | 598 | { |
d9dfb48f | 599 | LASSERT(atomic_read(&peer->ksnp_refcount) > 0); |
d7e09d03 PT |
600 | atomic_inc(&peer->ksnp_refcount); |
601 | } | |
602 | ||
d9dfb48f | 603 | extern void ksocknal_destroy_peer(ksock_peer_t *peer); |
d7e09d03 PT |
604 | |
605 | static inline void | |
d9dfb48f | 606 | ksocknal_peer_decref(ksock_peer_t *peer) |
d7e09d03 | 607 | { |
d9dfb48f | 608 | LASSERT(atomic_read(&peer->ksnp_refcount) > 0); |
d7e09d03 | 609 | if (atomic_dec_and_test(&peer->ksnp_refcount)) |
d9dfb48f | 610 | ksocknal_destroy_peer(peer); |
d7e09d03 PT |
611 | } |
612 | ||
d9dfb48f AD |
613 | int ksocknal_startup(lnet_ni_t *ni); |
614 | void ksocknal_shutdown(lnet_ni_t *ni); | |
d7e09d03 | 615 | int ksocknal_ctl(lnet_ni_t *ni, unsigned int cmd, void *arg); |
d9dfb48f | 616 | int ksocknal_send(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg); |
d7e09d03 PT |
617 | int ksocknal_recv(lnet_ni_t *ni, void *private, lnet_msg_t *lntmsg, |
618 | int delayed, unsigned int niov, | |
f351bad2 | 619 | struct kvec *iov, lnet_kiov_t *kiov, |
d7e09d03 | 620 | unsigned int offset, unsigned int mlen, unsigned int rlen); |
e327dc88 | 621 | int ksocknal_accept(lnet_ni_t *ni, struct socket *sock); |
d7e09d03 PT |
622 | |
623 | extern int ksocknal_add_peer(lnet_ni_t *ni, lnet_process_id_t id, __u32 ip, int port); | |
d9dfb48f AD |
624 | extern ksock_peer_t *ksocknal_find_peer_locked(lnet_ni_t *ni, lnet_process_id_t id); |
625 | extern ksock_peer_t *ksocknal_find_peer(lnet_ni_t *ni, lnet_process_id_t id); | |
626 | extern void ksocknal_peer_failed(ksock_peer_t *peer); | |
627 | extern int ksocknal_create_conn(lnet_ni_t *ni, ksock_route_t *route, | |
e327dc88 | 628 | struct socket *sock, int type); |
d9dfb48f AD |
629 | extern void ksocknal_close_conn_locked(ksock_conn_t *conn, int why); |
630 | extern void ksocknal_terminate_conn(ksock_conn_t *conn); | |
631 | extern void ksocknal_destroy_conn(ksock_conn_t *conn); | |
632 | extern int ksocknal_close_peer_conns_locked(ksock_peer_t *peer, | |
d7e09d03 | 633 | __u32 ipaddr, int why); |
d9dfb48f AD |
634 | extern int ksocknal_close_conn_and_siblings(ksock_conn_t *conn, int why); |
635 | extern int ksocknal_close_matching_conns(lnet_process_id_t id, __u32 ipaddr); | |
d7e09d03 PT |
636 | extern ksock_conn_t *ksocknal_find_conn_locked(ksock_peer_t *peer, |
637 | ksock_tx_t *tx, int nonblk); | |
638 | ||
639 | extern int ksocknal_launch_packet(lnet_ni_t *ni, ksock_tx_t *tx, | |
640 | lnet_process_id_t id); | |
641 | extern ksock_tx_t *ksocknal_alloc_tx(int type, int size); | |
d9dfb48f | 642 | extern void ksocknal_free_tx(ksock_tx_t *tx); |
d7e09d03 PT |
643 | extern ksock_tx_t *ksocknal_alloc_tx_noop(__u64 cookie, int nonblk); |
644 | extern void ksocknal_next_tx_carrier(ksock_conn_t *conn); | |
d9dfb48f AD |
645 | extern void ksocknal_queue_tx_locked(ksock_tx_t *tx, ksock_conn_t *conn); |
646 | extern void ksocknal_txlist_done(lnet_ni_t *ni, struct list_head *txlist, | |
d7e09d03 | 647 | int error); |
d9dfb48f AD |
648 | extern void ksocknal_notify(lnet_ni_t *ni, lnet_nid_t gw_nid, int alive); |
649 | extern void ksocknal_query(struct lnet_ni *ni, lnet_nid_t nid, unsigned long *when); | |
d7e09d03 | 650 | extern int ksocknal_thread_start(int (*fn)(void *arg), void *arg, char *name); |
d9dfb48f AD |
651 | extern void ksocknal_thread_fini(void); |
652 | extern void ksocknal_launch_all_connections_locked(ksock_peer_t *peer); | |
653 | extern ksock_route_t *ksocknal_find_connectable_route_locked(ksock_peer_t *peer); | |
654 | extern ksock_route_t *ksocknal_find_connecting_route_locked(ksock_peer_t *peer); | |
655 | extern int ksocknal_new_packet(ksock_conn_t *conn, int skip); | |
656 | extern int ksocknal_scheduler(void *arg); | |
657 | extern int ksocknal_connd(void *arg); | |
658 | extern int ksocknal_reaper(void *arg); | |
659 | extern int ksocknal_send_hello(lnet_ni_t *ni, ksock_conn_t *conn, | |
d7e09d03 | 660 | lnet_nid_t peer_nid, ksock_hello_msg_t *hello); |
d9dfb48f | 661 | extern int ksocknal_recv_hello(lnet_ni_t *ni, ksock_conn_t *conn, |
d7e09d03 PT |
662 | ksock_hello_msg_t *hello, lnet_process_id_t *id, |
663 | __u64 *incarnation); | |
664 | extern void ksocknal_read_callback(ksock_conn_t *conn); | |
665 | extern void ksocknal_write_callback(ksock_conn_t *conn); | |
666 | ||
667 | extern int ksocknal_lib_zc_capable(ksock_conn_t *conn); | |
e327dc88 GKH |
668 | extern void ksocknal_lib_save_callback(struct socket *sock, ksock_conn_t *conn); |
669 | extern void ksocknal_lib_set_callback(struct socket *sock, ksock_conn_t *conn); | |
670 | extern void ksocknal_lib_reset_callback(struct socket *sock, ksock_conn_t *conn); | |
d9dfb48f AD |
671 | extern void ksocknal_lib_push_conn(ksock_conn_t *conn); |
672 | extern int ksocknal_lib_get_conn_addrs(ksock_conn_t *conn); | |
673 | extern int ksocknal_lib_setup_sock(struct socket *so); | |
674 | extern int ksocknal_lib_send_iov(ksock_conn_t *conn, ksock_tx_t *tx); | |
675 | extern int ksocknal_lib_send_kiov(ksock_conn_t *conn, ksock_tx_t *tx); | |
676 | extern void ksocknal_lib_eager_ack(ksock_conn_t *conn); | |
677 | extern int ksocknal_lib_recv_iov(ksock_conn_t *conn); | |
678 | extern int ksocknal_lib_recv_kiov(ksock_conn_t *conn); | |
679 | extern int ksocknal_lib_get_conn_tunables(ksock_conn_t *conn, int *txmem, | |
d7e09d03 PT |
680 | int *rxmem, int *nagle); |
681 | ||
682 | extern int ksocknal_tunables_init(void); | |
d7e09d03 PT |
683 | |
684 | extern void ksocknal_lib_csum_tx(ksock_tx_t *tx); | |
685 | ||
686 | extern int ksocknal_lib_memory_pressure(ksock_conn_t *conn); | |
687 | extern int ksocknal_lib_bind_thread_to_cpu(int id); | |
12c41f00 JH |
688 | |
689 | #endif /* _SOCKLND_SOCKLND_H_ */ |