]>
Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
31b8006e SW |
2 | #ifndef __FS_CEPH_MESSENGER_H |
3 | #define __FS_CEPH_MESSENGER_H | |
4 | ||
9f082171 | 5 | #include <linux/bvec.h> |
c2e552e7 | 6 | #include <linux/kref.h> |
31b8006e SW |
7 | #include <linux/mutex.h> |
8 | #include <linux/net.h> | |
9 | #include <linux/radix-tree.h> | |
10 | #include <linux/uio.h> | |
31b8006e | 11 | #include <linux/workqueue.h> |
757856d2 | 12 | #include <net/net_namespace.h> |
31b8006e | 13 | |
a1ce3928 DH |
14 | #include <linux/ceph/types.h> |
15 | #include <linux/ceph/buffer.h> | |
31b8006e SW |
16 | |
17 | struct ceph_msg; | |
18 | struct ceph_connection; | |
19 | ||
31b8006e SW |
20 | /* |
21 | * Ceph defines these callbacks for handling connection events. | |
22 | */ | |
23 | struct ceph_connection_operations { | |
24 | struct ceph_connection *(*get)(struct ceph_connection *); | |
25 | void (*put)(struct ceph_connection *); | |
26 | ||
27 | /* handle an incoming message. */ | |
28 | void (*dispatch) (struct ceph_connection *con, struct ceph_msg *m); | |
29 | ||
4e7a5dcd | 30 | /* authorize an outgoing connection */ |
a3530df3 AE |
31 | struct ceph_auth_handshake *(*get_authorizer) ( |
32 | struct ceph_connection *con, | |
8f43fb53 | 33 | int *proto, int force_new); |
6daca13d ID |
34 | int (*add_authorizer_challenge)(struct ceph_connection *con, |
35 | void *challenge_buf, | |
36 | int challenge_buf_len); | |
0dde5848 | 37 | int (*verify_authorizer_reply) (struct ceph_connection *con); |
9bd2e6f8 | 38 | int (*invalidate_authorizer)(struct ceph_connection *con); |
4e7a5dcd | 39 | |
31b8006e SW |
40 | /* there was some error on the socket (disconnect, whatever) */ |
41 | void (*fault) (struct ceph_connection *con); | |
42 | ||
43 | /* a remote host as terminated a message exchange session, and messages | |
44 | * we sent (or they tried to send us) may be lost. */ | |
45 | void (*peer_reset) (struct ceph_connection *con); | |
46 | ||
47 | struct ceph_msg * (*alloc_msg) (struct ceph_connection *con, | |
2450418c YS |
48 | struct ceph_msg_header *hdr, |
49 | int *skip); | |
33d07337 | 50 | |
98ad5ebd ID |
51 | void (*reencode_message) (struct ceph_msg *msg); |
52 | ||
79dbd1ba ID |
53 | int (*sign_message) (struct ceph_msg *msg); |
54 | int (*check_message_signature) (struct ceph_msg *msg); | |
31b8006e SW |
55 | }; |
56 | ||
31b8006e | 57 | /* use format string %s%d */ |
ca9d93a2 | 58 | #define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num) |
31b8006e SW |
59 | |
60 | struct ceph_messenger { | |
61 | struct ceph_entity_inst inst; /* my name+address */ | |
63f2d211 | 62 | struct ceph_entity_addr my_enc_addr; |
31b8006e | 63 | |
a2a32584 | 64 | atomic_t stopping; |
757856d2 | 65 | possible_net_t net; |
31b8006e SW |
66 | |
67 | /* | |
68 | * the global_seq counts connections i (attempt to) initiate | |
69 | * in order to disambiguate certain connect race conditions. | |
70 | */ | |
71 | u32 global_seq; | |
72 | spinlock_t global_seq_lock; | |
73 | }; | |
74 | ||
43794509 AE |
75 | enum ceph_msg_data_type { |
76 | CEPH_MSG_DATA_NONE, /* message contains no data payload */ | |
77 | CEPH_MSG_DATA_PAGES, /* data source/destination is a page array */ | |
78 | CEPH_MSG_DATA_PAGELIST, /* data source/destination is a pagelist */ | |
79 | #ifdef CONFIG_BLOCK | |
80 | CEPH_MSG_DATA_BIO, /* data source/destination is a bio list */ | |
81 | #endif /* CONFIG_BLOCK */ | |
b9e281c2 | 82 | CEPH_MSG_DATA_BVECS, /* data source/destination is a bio_vec array */ |
43794509 AE |
83 | }; |
84 | ||
5359a17d ID |
85 | #ifdef CONFIG_BLOCK |
86 | ||
87 | struct ceph_bio_iter { | |
88 | struct bio *bio; | |
89 | struct bvec_iter iter; | |
90 | }; | |
91 | ||
92 | #define __ceph_bio_iter_advance_step(it, n, STEP) do { \ | |
93 | unsigned int __n = (n), __cur_n; \ | |
94 | \ | |
95 | while (__n) { \ | |
96 | BUG_ON(!(it)->iter.bi_size); \ | |
97 | __cur_n = min((it)->iter.bi_size, __n); \ | |
98 | (void)(STEP); \ | |
99 | bio_advance_iter((it)->bio, &(it)->iter, __cur_n); \ | |
100 | if (!(it)->iter.bi_size && (it)->bio->bi_next) { \ | |
101 | dout("__ceph_bio_iter_advance_step next bio\n"); \ | |
102 | (it)->bio = (it)->bio->bi_next; \ | |
103 | (it)->iter = (it)->bio->bi_iter; \ | |
104 | } \ | |
105 | __n -= __cur_n; \ | |
106 | } \ | |
107 | } while (0) | |
108 | ||
109 | /* | |
110 | * Advance @it by @n bytes. | |
111 | */ | |
112 | #define ceph_bio_iter_advance(it, n) \ | |
113 | __ceph_bio_iter_advance_step(it, n, 0) | |
114 | ||
115 | /* | |
116 | * Advance @it by @n bytes, executing BVEC_STEP for each bio_vec. | |
117 | */ | |
118 | #define ceph_bio_iter_advance_step(it, n, BVEC_STEP) \ | |
119 | __ceph_bio_iter_advance_step(it, n, ({ \ | |
120 | struct bio_vec bv; \ | |
121 | struct bvec_iter __cur_iter; \ | |
122 | \ | |
123 | __cur_iter = (it)->iter; \ | |
124 | __cur_iter.bi_size = __cur_n; \ | |
125 | __bio_for_each_segment(bv, (it)->bio, __cur_iter, __cur_iter) \ | |
126 | (void)(BVEC_STEP); \ | |
127 | })) | |
128 | ||
129 | #endif /* CONFIG_BLOCK */ | |
130 | ||
b9e281c2 ID |
131 | struct ceph_bvec_iter { |
132 | struct bio_vec *bvecs; | |
133 | struct bvec_iter iter; | |
134 | }; | |
135 | ||
136 | #define __ceph_bvec_iter_advance_step(it, n, STEP) do { \ | |
137 | BUG_ON((n) > (it)->iter.bi_size); \ | |
138 | (void)(STEP); \ | |
139 | bvec_iter_advance((it)->bvecs, &(it)->iter, (n)); \ | |
140 | } while (0) | |
141 | ||
142 | /* | |
143 | * Advance @it by @n bytes. | |
144 | */ | |
145 | #define ceph_bvec_iter_advance(it, n) \ | |
146 | __ceph_bvec_iter_advance_step(it, n, 0) | |
147 | ||
148 | /* | |
149 | * Advance @it by @n bytes, executing BVEC_STEP for each bio_vec. | |
150 | */ | |
151 | #define ceph_bvec_iter_advance_step(it, n, BVEC_STEP) \ | |
152 | __ceph_bvec_iter_advance_step(it, n, ({ \ | |
153 | struct bio_vec bv; \ | |
154 | struct bvec_iter __cur_iter; \ | |
155 | \ | |
156 | __cur_iter = (it)->iter; \ | |
157 | __cur_iter.bi_size = (n); \ | |
158 | for_each_bvec(bv, (it)->bvecs, __cur_iter, __cur_iter) \ | |
159 | (void)(BVEC_STEP); \ | |
160 | })) | |
161 | ||
162 | #define ceph_bvec_iter_shorten(it, n) do { \ | |
163 | BUG_ON((n) > (it)->iter.bi_size); \ | |
164 | (it)->iter.bi_size = (n); \ | |
165 | } while (0) | |
166 | ||
36153ec9 AE |
167 | struct ceph_msg_data { |
168 | enum ceph_msg_data_type type; | |
169 | union { | |
170 | #ifdef CONFIG_BLOCK | |
171 | struct { | |
5359a17d ID |
172 | struct ceph_bio_iter bio_pos; |
173 | u32 bio_length; | |
36153ec9 AE |
174 | }; |
175 | #endif /* CONFIG_BLOCK */ | |
b9e281c2 | 176 | struct ceph_bvec_iter bvec_pos; |
36153ec9 AE |
177 | struct { |
178 | struct page **pages; /* NOT OWNER. */ | |
179 | size_t length; /* total # bytes */ | |
180 | unsigned int alignment; /* first page */ | |
181 | }; | |
182 | struct ceph_pagelist *pagelist; | |
183 | }; | |
36153ec9 AE |
184 | }; |
185 | ||
fe38a2b6 | 186 | struct ceph_msg_data_cursor { |
ca8b3a69 | 187 | size_t total_resid; /* across all data items */ |
ca8b3a69 AE |
188 | |
189 | struct ceph_msg_data *data; /* current data item */ | |
8ae4f4f5 AE |
190 | size_t resid; /* bytes not yet consumed */ |
191 | bool last_piece; /* current is last piece */ | |
192 | bool need_crc; /* crc update needed */ | |
dd236fcb | 193 | union { |
6aaa4511 | 194 | #ifdef CONFIG_BLOCK |
5359a17d | 195 | struct ceph_bio_iter bio_iter; |
6aaa4511 | 196 | #endif /* CONFIG_BLOCK */ |
b9e281c2 | 197 | struct bvec_iter bvec_iter; |
e766d7b5 | 198 | struct { /* pages */ |
e766d7b5 AE |
199 | unsigned int page_offset; /* offset in page */ |
200 | unsigned short page_index; /* index in array */ | |
201 | unsigned short page_count; /* pages in array */ | |
202 | }; | |
dd236fcb AE |
203 | struct { /* pagelist */ |
204 | struct page *page; /* page from list */ | |
205 | size_t offset; /* bytes from list */ | |
206 | }; | |
207 | }; | |
fe38a2b6 AE |
208 | }; |
209 | ||
31b8006e SW |
210 | /* |
211 | * a single message. it contains a header (src, dest, message type, etc.), | |
212 | * footer (crc values, mainly), a "front" message body, and possibly a | |
213 | * data payload (stored in some number of pages). | |
214 | */ | |
215 | struct ceph_msg { | |
216 | struct ceph_msg_header hdr; /* header */ | |
33d07337 YZ |
217 | union { |
218 | struct ceph_msg_footer footer; /* footer */ | |
219 | struct ceph_msg_footer_old old_footer; /* old format footer */ | |
220 | }; | |
31b8006e SW |
221 | struct kvec front; /* unaligned blobs of message */ |
222 | struct ceph_buffer *middle; | |
38941f80 | 223 | |
36153ec9 | 224 | size_t data_length; |
0d9c1ab3 ID |
225 | struct ceph_msg_data *data; |
226 | int num_data_items; | |
227 | int max_data_items; | |
36153ec9 | 228 | struct ceph_msg_data_cursor cursor; |
02afca6c AE |
229 | |
230 | struct ceph_connection *con; | |
231 | struct list_head list_head; /* links for connection lists */ | |
232 | ||
233 | struct kref kref; | |
31b8006e | 234 | bool more_to_follow; |
e84346b7 | 235 | bool needs_out_seq; |
3cea4c30 | 236 | int front_alloc_len; |
4cf9d544 | 237 | unsigned long ack_stamp; /* tx: when we were acked */ |
31b8006e SW |
238 | |
239 | struct ceph_msgpool *pool; | |
240 | }; | |
241 | ||
31b8006e SW |
242 | /* ceph connection fault delay defaults, for exponential backoff */ |
243 | #define BASE_DELAY_INTERVAL (HZ/2) | |
244 | #define MAX_DELAY_INTERVAL (5 * 60 * HZ) | |
245 | ||
31b8006e SW |
246 | /* |
247 | * A single connection with another host. | |
248 | * | |
249 | * We maintain a queue of outgoing messages, and some session state to | |
250 | * ensure that we can preserve the lossless, ordered delivery of | |
251 | * messages in the case of a TCP disconnect. | |
252 | */ | |
253 | struct ceph_connection { | |
254 | void *private; | |
31b8006e SW |
255 | |
256 | const struct ceph_connection_operations *ops; | |
257 | ||
258 | struct ceph_messenger *msgr; | |
ce2c8903 AE |
259 | |
260 | atomic_t sock_state; | |
31b8006e | 261 | struct socket *sock; |
ce2c8903 AE |
262 | struct ceph_entity_addr peer_addr; /* peer address */ |
263 | struct ceph_entity_addr peer_addr_for_me; | |
264 | ||
928443cd AE |
265 | unsigned long flags; |
266 | unsigned long state; | |
31b8006e SW |
267 | const char *error_msg; /* error message, if any */ |
268 | ||
31b8006e | 269 | struct ceph_entity_name peer_name; /* peer name */ |
ce2c8903 | 270 | |
12b4629a | 271 | u64 peer_features; |
31b8006e SW |
272 | u32 connect_seq; /* identify the most recent connection |
273 | attempt for this connection, client */ | |
274 | u32 peer_global_seq; /* peer's global seq for this connection */ | |
275 | ||
262614c4 | 276 | struct ceph_auth_handshake *auth; |
4e7a5dcd | 277 | int auth_retry; /* true if we need a newer authorizer */ |
4e7a5dcd | 278 | |
ec302645 SW |
279 | struct mutex mutex; |
280 | ||
31b8006e | 281 | /* out queue */ |
31b8006e SW |
282 | struct list_head out_queue; |
283 | struct list_head out_sent; /* sending or sent but unacked */ | |
284 | u64 out_seq; /* last message queued for send */ | |
31b8006e SW |
285 | |
286 | u64 in_seq, in_seq_acked; /* last message received, acked */ | |
287 | ||
288 | /* connection negotiation temps */ | |
289 | char in_banner[CEPH_BANNER_MAX_LEN]; | |
a16cb1f7 SW |
290 | struct ceph_msg_connect out_connect; |
291 | struct ceph_msg_connect_reply in_reply; | |
31b8006e SW |
292 | struct ceph_entity_addr actual_peer_addr; |
293 | ||
294 | /* message out temps */ | |
67645d76 | 295 | struct ceph_msg_header out_hdr; |
31b8006e SW |
296 | struct ceph_msg *out_msg; /* sending message (== tail of |
297 | out_sent) */ | |
c86a2930 | 298 | bool out_msg_done; |
31b8006e SW |
299 | |
300 | struct kvec out_kvec[8], /* sending header/footer data */ | |
301 | *out_kvec_cur; | |
302 | int out_kvec_left; /* kvec's left in out_kvec */ | |
303 | int out_skip; /* skip this many bytes */ | |
304 | int out_kvec_bytes; /* total bytes left */ | |
31b8006e SW |
305 | int out_more; /* there is more data after the kvecs */ |
306 | __le64 out_temp_ack; /* for writing an ack */ | |
7f61f545 ID |
307 | struct ceph_timespec out_temp_keepalive2; /* for writing keepalive2 |
308 | stamp */ | |
31b8006e SW |
309 | |
310 | /* message in temps */ | |
311 | struct ceph_msg_header in_hdr; | |
312 | struct ceph_msg *in_msg; | |
31b8006e SW |
313 | u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */ |
314 | ||
315 | char in_tag; /* protocol control byte */ | |
316 | int in_base_pos; /* bytes read */ | |
317 | __le64 in_temp_ack; /* for reading an ack */ | |
318 | ||
473bd2d7 | 319 | struct timespec64 last_keepalive_ack; /* keepalive2 ack stamp */ |
8b9558aa | 320 | |
31b8006e SW |
321 | struct delayed_work work; /* send|recv work */ |
322 | unsigned long delay; /* current delay interval */ | |
323 | }; | |
324 | ||
325 | ||
b726ec97 JL |
326 | extern const char *ceph_pr_addr(const struct ceph_entity_addr *addr); |
327 | ||
31b8006e SW |
328 | extern int ceph_parse_ips(const char *c, const char *end, |
329 | struct ceph_entity_addr *addr, | |
330 | int max_count, int *count); | |
331 | ||
332 | ||
333 | extern int ceph_msgr_init(void); | |
334 | extern void ceph_msgr_exit(void); | |
a922d38f | 335 | extern void ceph_msgr_flush(void); |
31b8006e | 336 | |
15d9882c | 337 | extern void ceph_messenger_init(struct ceph_messenger *msgr, |
859bff51 | 338 | struct ceph_entity_addr *myaddr); |
757856d2 | 339 | extern void ceph_messenger_fini(struct ceph_messenger *msgr); |
31b8006e | 340 | |
1bfd89f4 AE |
341 | extern void ceph_con_init(struct ceph_connection *con, void *private, |
342 | const struct ceph_connection_operations *ops, | |
b7a9e5dd | 343 | struct ceph_messenger *msgr); |
31b8006e | 344 | extern void ceph_con_open(struct ceph_connection *con, |
b7a9e5dd | 345 | __u8 entity_type, __u64 entity_num, |
31b8006e | 346 | struct ceph_entity_addr *addr); |
87b315a5 | 347 | extern bool ceph_con_opened(struct ceph_connection *con); |
31b8006e SW |
348 | extern void ceph_con_close(struct ceph_connection *con); |
349 | extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg); | |
6740a845 AE |
350 | |
351 | extern void ceph_msg_revoke(struct ceph_msg *msg); | |
8921d114 AE |
352 | extern void ceph_msg_revoke_incoming(struct ceph_msg *msg); |
353 | ||
31b8006e | 354 | extern void ceph_con_keepalive(struct ceph_connection *con); |
8b9558aa YZ |
355 | extern bool ceph_con_keepalive_expired(struct ceph_connection *con, |
356 | unsigned long interval); | |
31b8006e | 357 | |
90af3602 | 358 | extern void ceph_msg_data_add_pages(struct ceph_msg *msg, struct page **pages, |
f1baeb2b | 359 | size_t length, size_t alignment); |
90af3602 | 360 | extern void ceph_msg_data_add_pagelist(struct ceph_msg *msg, |
27fa8385 | 361 | struct ceph_pagelist *pagelist); |
ea96571f | 362 | #ifdef CONFIG_BLOCK |
5359a17d ID |
363 | void ceph_msg_data_add_bio(struct ceph_msg *msg, struct ceph_bio_iter *bio_pos, |
364 | u32 length); | |
ea96571f | 365 | #endif /* CONFIG_BLOCK */ |
b9e281c2 ID |
366 | void ceph_msg_data_add_bvecs(struct ceph_msg *msg, |
367 | struct ceph_bvec_iter *bvec_pos); | |
02afca6c | 368 | |
0d9c1ab3 ID |
369 | struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items, |
370 | gfp_t flags, bool can_fail); | |
b61c2763 SW |
371 | extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, |
372 | bool can_fail); | |
31b8006e | 373 | |
0215e44b ID |
374 | extern struct ceph_msg *ceph_msg_get(struct ceph_msg *msg); |
375 | extern void ceph_msg_put(struct ceph_msg *msg); | |
31b8006e | 376 | |
9ec7cab1 SW |
377 | extern void ceph_msg_dump(struct ceph_msg *msg); |
378 | ||
31b8006e | 379 | #endif |