]>
Commit | Line | Data |
---|---|---|
36bedb3f AG |
1 | /* |
2 | * Copyright (c) 2018 Chelsio Communications, Inc. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or modify | |
5 | * it under the terms of the GNU General Public License version 2 as | |
6 | * published by the Free Software Foundation. | |
7 | * | |
8 | * Written by: Atul Gupta (atul.gupta@chelsio.com) | |
9 | */ | |
10 | ||
11 | #include <linux/module.h> | |
12 | #include <linux/list.h> | |
13 | #include <linux/workqueue.h> | |
14 | #include <linux/skbuff.h> | |
15 | #include <linux/timer.h> | |
16 | #include <linux/notifier.h> | |
17 | #include <linux/inetdevice.h> | |
18 | #include <linux/ip.h> | |
19 | #include <linux/tcp.h> | |
20 | #include <linux/sched/signal.h> | |
21 | #include <net/tcp.h> | |
22 | #include <net/busy_poll.h> | |
23 | #include <crypto/aes.h> | |
24 | ||
25 | #include "chtls.h" | |
26 | #include "chtls_cm.h" | |
27 | ||
28 | static bool is_tls_tx(struct chtls_sock *csk) | |
29 | { | |
30 | return csk->tlshws.txkey >= 0; | |
31 | } | |
32 | ||
b647993f AG |
33 | static bool is_tls_rx(struct chtls_sock *csk) |
34 | { | |
35 | return csk->tlshws.rxkey >= 0; | |
36 | } | |
37 | ||
36bedb3f AG |
38 | static int data_sgl_len(const struct sk_buff *skb) |
39 | { | |
40 | unsigned int cnt; | |
41 | ||
42 | cnt = skb_shinfo(skb)->nr_frags; | |
43 | return sgl_len(cnt) * 8; | |
44 | } | |
45 | ||
46 | static int nos_ivs(struct sock *sk, unsigned int size) | |
47 | { | |
48 | struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); | |
49 | ||
50 | return DIV_ROUND_UP(size, csk->tlshws.mfs); | |
51 | } | |
52 | ||
53 | static int set_ivs_imm(struct sock *sk, const struct sk_buff *skb) | |
54 | { | |
55 | int ivs_size = nos_ivs(sk, skb->len) * CIPHER_BLOCK_SIZE; | |
56 | int hlen = TLS_WR_CPL_LEN + data_sgl_len(skb); | |
57 | ||
58 | if ((hlen + KEY_ON_MEM_SZ + ivs_size) < | |
59 | MAX_IMM_OFLD_TX_DATA_WR_LEN) { | |
60 | ULP_SKB_CB(skb)->ulp.tls.iv = 1; | |
61 | return 1; | |
62 | } | |
63 | ULP_SKB_CB(skb)->ulp.tls.iv = 0; | |
64 | return 0; | |
65 | } | |
66 | ||
67 | static int max_ivs_size(struct sock *sk, int size) | |
68 | { | |
69 | return nos_ivs(sk, size) * CIPHER_BLOCK_SIZE; | |
70 | } | |
71 | ||
72 | static int ivs_size(struct sock *sk, const struct sk_buff *skb) | |
73 | { | |
74 | return set_ivs_imm(sk, skb) ? (nos_ivs(sk, skb->len) * | |
75 | CIPHER_BLOCK_SIZE) : 0; | |
76 | } | |
77 | ||
78 | static int flowc_wr_credits(int nparams, int *flowclenp) | |
79 | { | |
80 | int flowclen16, flowclen; | |
81 | ||
82 | flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]); | |
83 | flowclen16 = DIV_ROUND_UP(flowclen, 16); | |
84 | flowclen = flowclen16 * 16; | |
85 | ||
86 | if (flowclenp) | |
87 | *flowclenp = flowclen; | |
88 | ||
89 | return flowclen16; | |
90 | } | |
91 | ||
92 | static struct sk_buff *create_flowc_wr_skb(struct sock *sk, | |
93 | struct fw_flowc_wr *flowc, | |
94 | int flowclen) | |
95 | { | |
96 | struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); | |
97 | struct sk_buff *skb; | |
98 | ||
99 | skb = alloc_skb(flowclen, GFP_ATOMIC); | |
100 | if (!skb) | |
101 | return NULL; | |
102 | ||
103 | memcpy(__skb_put(skb, flowclen), flowc, flowclen); | |
104 | skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA); | |
105 | ||
106 | return skb; | |
107 | } | |
108 | ||
109 | static int send_flowc_wr(struct sock *sk, struct fw_flowc_wr *flowc, | |
110 | int flowclen) | |
111 | { | |
112 | struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); | |
113 | struct tcp_sock *tp = tcp_sk(sk); | |
36bedb3f | 114 | struct sk_buff *skb; |
b647993f | 115 | int flowclen16; |
36bedb3f AG |
116 | int ret; |
117 | ||
b647993f AG |
118 | flowclen16 = flowclen / 16; |
119 | ||
36bedb3f AG |
120 | if (csk_flag(sk, CSK_TX_DATA_SENT)) { |
121 | skb = create_flowc_wr_skb(sk, flowc, flowclen); | |
122 | if (!skb) | |
123 | return -ENOMEM; | |
124 | ||
125 | skb_entail(sk, skb, | |
126 | ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND); | |
127 | return 0; | |
128 | } | |
129 | ||
130 | ret = cxgb4_immdata_send(csk->egress_dev, | |
131 | csk->txq_idx, | |
132 | flowc, flowclen); | |
133 | if (!ret) | |
134 | return flowclen16; | |
135 | skb = create_flowc_wr_skb(sk, flowc, flowclen); | |
136 | if (!skb) | |
137 | return -ENOMEM; | |
138 | send_or_defer(sk, tp, skb, 0); | |
139 | return flowclen16; | |
140 | } | |
141 | ||
142 | static u8 tcp_state_to_flowc_state(u8 state) | |
143 | { | |
144 | switch (state) { | |
145 | case TCP_ESTABLISHED: | |
146 | return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED; | |
147 | case TCP_CLOSE_WAIT: | |
148 | return FW_FLOWC_MNEM_TCPSTATE_CLOSEWAIT; | |
149 | case TCP_FIN_WAIT1: | |
150 | return FW_FLOWC_MNEM_TCPSTATE_FINWAIT1; | |
151 | case TCP_CLOSING: | |
152 | return FW_FLOWC_MNEM_TCPSTATE_CLOSING; | |
153 | case TCP_LAST_ACK: | |
154 | return FW_FLOWC_MNEM_TCPSTATE_LASTACK; | |
155 | case TCP_FIN_WAIT2: | |
156 | return FW_FLOWC_MNEM_TCPSTATE_FINWAIT2; | |
157 | } | |
158 | ||
159 | return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED; | |
160 | } | |
161 | ||
162 | int send_tx_flowc_wr(struct sock *sk, int compl, | |
163 | u32 snd_nxt, u32 rcv_nxt) | |
164 | { | |
165 | struct flowc_packed { | |
166 | struct fw_flowc_wr fc; | |
167 | struct fw_flowc_mnemval mnemval[FW_FLOWC_MNEM_MAX]; | |
168 | } __packed sflowc; | |
169 | int nparams, paramidx, flowclen16, flowclen; | |
170 | struct fw_flowc_wr *flowc; | |
171 | struct chtls_sock *csk; | |
172 | struct tcp_sock *tp; | |
173 | ||
174 | csk = rcu_dereference_sk_user_data(sk); | |
175 | tp = tcp_sk(sk); | |
176 | memset(&sflowc, 0, sizeof(sflowc)); | |
177 | flowc = &sflowc.fc; | |
178 | ||
179 | #define FLOWC_PARAM(__m, __v) \ | |
180 | do { \ | |
181 | flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \ | |
182 | flowc->mnemval[paramidx].val = cpu_to_be32(__v); \ | |
183 | paramidx++; \ | |
184 | } while (0) | |
185 | ||
186 | paramidx = 0; | |
187 | ||
188 | FLOWC_PARAM(PFNVFN, FW_PFVF_CMD_PFN_V(csk->cdev->lldi->pf)); | |
189 | FLOWC_PARAM(CH, csk->tx_chan); | |
190 | FLOWC_PARAM(PORT, csk->tx_chan); | |
191 | FLOWC_PARAM(IQID, csk->rss_qid); | |
192 | FLOWC_PARAM(SNDNXT, tp->snd_nxt); | |
193 | FLOWC_PARAM(RCVNXT, tp->rcv_nxt); | |
194 | FLOWC_PARAM(SNDBUF, csk->sndbuf); | |
195 | FLOWC_PARAM(MSS, tp->mss_cache); | |
196 | FLOWC_PARAM(TCPSTATE, tcp_state_to_flowc_state(sk->sk_state)); | |
197 | ||
198 | if (SND_WSCALE(tp)) | |
199 | FLOWC_PARAM(RCV_SCALE, SND_WSCALE(tp)); | |
200 | ||
201 | if (csk->ulp_mode == ULP_MODE_TLS) | |
202 | FLOWC_PARAM(ULD_MODE, ULP_MODE_TLS); | |
203 | ||
204 | if (csk->tlshws.fcplenmax) | |
205 | FLOWC_PARAM(TXDATAPLEN_MAX, csk->tlshws.fcplenmax); | |
206 | ||
207 | nparams = paramidx; | |
208 | #undef FLOWC_PARAM | |
209 | ||
210 | flowclen16 = flowc_wr_credits(nparams, &flowclen); | |
211 | flowc->op_to_nparams = | |
212 | cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) | | |
213 | FW_WR_COMPL_V(compl) | | |
214 | FW_FLOWC_WR_NPARAMS_V(nparams)); | |
215 | flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) | | |
216 | FW_WR_FLOWID_V(csk->tid)); | |
217 | ||
218 | return send_flowc_wr(sk, flowc, flowclen); | |
219 | } | |
220 | ||
221 | /* Copy IVs to WR */ | |
222 | static int tls_copy_ivs(struct sock *sk, struct sk_buff *skb) | |
223 | ||
224 | { | |
225 | struct chtls_sock *csk; | |
226 | unsigned char *iv_loc; | |
227 | struct chtls_hws *hws; | |
228 | unsigned char *ivs; | |
229 | u16 number_of_ivs; | |
230 | struct page *page; | |
231 | int err = 0; | |
232 | ||
233 | csk = rcu_dereference_sk_user_data(sk); | |
234 | hws = &csk->tlshws; | |
235 | number_of_ivs = nos_ivs(sk, skb->len); | |
236 | ||
237 | if (number_of_ivs > MAX_IVS_PAGE) { | |
238 | pr_warn("MAX IVs in PAGE exceeded %d\n", number_of_ivs); | |
239 | return -ENOMEM; | |
240 | } | |
241 | ||
242 | /* generate the IVs */ | |
6da2ec56 | 243 | ivs = kmalloc_array(CIPHER_BLOCK_SIZE, number_of_ivs, GFP_ATOMIC); |
36bedb3f AG |
244 | if (!ivs) |
245 | return -ENOMEM; | |
246 | get_random_bytes(ivs, number_of_ivs * CIPHER_BLOCK_SIZE); | |
247 | ||
248 | if (skb_ulp_tls_iv_imm(skb)) { | |
249 | /* send the IVs as immediate data in the WR */ | |
250 | iv_loc = (unsigned char *)__skb_push(skb, number_of_ivs * | |
251 | CIPHER_BLOCK_SIZE); | |
252 | if (iv_loc) | |
253 | memcpy(iv_loc, ivs, number_of_ivs * CIPHER_BLOCK_SIZE); | |
254 | ||
255 | hws->ivsize = number_of_ivs * CIPHER_BLOCK_SIZE; | |
256 | } else { | |
257 | /* Send the IVs as sgls */ | |
258 | /* Already accounted IV DSGL for credits */ | |
259 | skb_shinfo(skb)->nr_frags--; | |
260 | page = alloc_pages(sk->sk_allocation | __GFP_COMP, 0); | |
261 | if (!page) { | |
262 | pr_info("%s : Page allocation for IVs failed\n", | |
263 | __func__); | |
264 | err = -ENOMEM; | |
265 | goto out; | |
266 | } | |
267 | memcpy(page_address(page), ivs, number_of_ivs * | |
268 | CIPHER_BLOCK_SIZE); | |
269 | skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, page, 0, | |
270 | number_of_ivs * CIPHER_BLOCK_SIZE); | |
271 | hws->ivsize = 0; | |
272 | } | |
273 | out: | |
274 | kfree(ivs); | |
275 | return err; | |
276 | } | |
277 | ||
278 | /* Copy Key to WR */ | |
279 | static void tls_copy_tx_key(struct sock *sk, struct sk_buff *skb) | |
280 | { | |
281 | struct ulptx_sc_memrd *sc_memrd; | |
282 | struct chtls_sock *csk; | |
283 | struct chtls_dev *cdev; | |
284 | struct ulptx_idata *sc; | |
285 | struct chtls_hws *hws; | |
286 | u32 immdlen; | |
287 | int kaddr; | |
288 | ||
289 | csk = rcu_dereference_sk_user_data(sk); | |
290 | hws = &csk->tlshws; | |
291 | cdev = csk->cdev; | |
292 | ||
293 | immdlen = sizeof(*sc) + sizeof(*sc_memrd); | |
294 | kaddr = keyid_to_addr(cdev->kmap.start, hws->txkey); | |
295 | sc = (struct ulptx_idata *)__skb_push(skb, immdlen); | |
296 | if (sc) { | |
297 | sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP)); | |
298 | sc->len = htonl(0); | |
299 | sc_memrd = (struct ulptx_sc_memrd *)(sc + 1); | |
300 | sc_memrd->cmd_to_len = | |
301 | htonl(ULPTX_CMD_V(ULP_TX_SC_MEMRD) | | |
302 | ULP_TX_SC_MORE_V(1) | | |
303 | ULPTX_LEN16_V(hws->keylen >> 4)); | |
304 | sc_memrd->addr = htonl(kaddr); | |
305 | } | |
306 | } | |
307 | ||
308 | static u64 tlstx_incr_seqnum(struct chtls_hws *hws) | |
309 | { | |
310 | return hws->tx_seq_no++; | |
311 | } | |
312 | ||
313 | static bool is_sg_request(const struct sk_buff *skb) | |
314 | { | |
315 | return skb->peeked || | |
316 | (skb->len > MAX_IMM_ULPTX_WR_LEN); | |
317 | } | |
318 | ||
319 | /* | |
320 | * Returns true if an sk_buff carries urgent data. | |
321 | */ | |
322 | static bool skb_urgent(struct sk_buff *skb) | |
323 | { | |
324 | return ULP_SKB_CB(skb)->flags & ULPCB_FLAG_URG; | |
325 | } | |
326 | ||
327 | /* TLS content type for CPL SFO */ | |
328 | static unsigned char tls_content_type(unsigned char content_type) | |
329 | { | |
330 | switch (content_type) { | |
331 | case TLS_HDR_TYPE_CCS: | |
332 | return CPL_TX_TLS_SFO_TYPE_CCS; | |
333 | case TLS_HDR_TYPE_ALERT: | |
334 | return CPL_TX_TLS_SFO_TYPE_ALERT; | |
335 | case TLS_HDR_TYPE_HANDSHAKE: | |
336 | return CPL_TX_TLS_SFO_TYPE_HANDSHAKE; | |
337 | case TLS_HDR_TYPE_HEARTBEAT: | |
338 | return CPL_TX_TLS_SFO_TYPE_HEARTBEAT; | |
339 | } | |
340 | return CPL_TX_TLS_SFO_TYPE_DATA; | |
341 | } | |
342 | ||
343 | static void tls_tx_data_wr(struct sock *sk, struct sk_buff *skb, | |
344 | int dlen, int tls_immd, u32 credits, | |
345 | int expn, int pdus) | |
346 | { | |
347 | struct fw_tlstx_data_wr *req_wr; | |
348 | struct cpl_tx_tls_sfo *req_cpl; | |
349 | unsigned int wr_ulp_mode_force; | |
350 | struct tls_scmd *updated_scmd; | |
351 | unsigned char data_type; | |
352 | struct chtls_sock *csk; | |
353 | struct net_device *dev; | |
354 | struct chtls_hws *hws; | |
355 | struct tls_scmd *scmd; | |
356 | struct adapter *adap; | |
357 | unsigned char *req; | |
358 | int immd_len; | |
359 | int iv_imm; | |
360 | int len; | |
361 | ||
362 | csk = rcu_dereference_sk_user_data(sk); | |
363 | iv_imm = skb_ulp_tls_iv_imm(skb); | |
364 | dev = csk->egress_dev; | |
365 | adap = netdev2adap(dev); | |
366 | hws = &csk->tlshws; | |
367 | scmd = &hws->scmd; | |
368 | len = dlen + expn; | |
369 | ||
370 | dlen = (dlen < hws->mfs) ? dlen : hws->mfs; | |
371 | atomic_inc(&adap->chcr_stats.tls_pdu_tx); | |
372 | ||
373 | updated_scmd = scmd; | |
374 | updated_scmd->seqno_numivs &= 0xffffff80; | |
375 | updated_scmd->seqno_numivs |= SCMD_NUM_IVS_V(pdus); | |
376 | hws->scmd = *updated_scmd; | |
377 | ||
378 | req = (unsigned char *)__skb_push(skb, sizeof(struct cpl_tx_tls_sfo)); | |
379 | req_cpl = (struct cpl_tx_tls_sfo *)req; | |
380 | req = (unsigned char *)__skb_push(skb, (sizeof(struct | |
381 | fw_tlstx_data_wr))); | |
382 | ||
383 | req_wr = (struct fw_tlstx_data_wr *)req; | |
384 | immd_len = (tls_immd ? dlen : 0); | |
385 | req_wr->op_to_immdlen = | |
386 | htonl(FW_WR_OP_V(FW_TLSTX_DATA_WR) | | |
387 | FW_TLSTX_DATA_WR_COMPL_V(1) | | |
388 | FW_TLSTX_DATA_WR_IMMDLEN_V(immd_len)); | |
389 | req_wr->flowid_len16 = htonl(FW_TLSTX_DATA_WR_FLOWID_V(csk->tid) | | |
390 | FW_TLSTX_DATA_WR_LEN16_V(credits)); | |
391 | wr_ulp_mode_force = TX_ULP_MODE_V(ULP_MODE_TLS); | |
392 | ||
393 | if (is_sg_request(skb)) | |
394 | wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F | | |
395 | ((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 : | |
396 | FW_OFLD_TX_DATA_WR_SHOVE_F); | |
397 | ||
398 | req_wr->lsodisable_to_flags = | |
399 | htonl(TX_ULP_MODE_V(ULP_MODE_TLS) | | |
400 | FW_OFLD_TX_DATA_WR_URGENT_V(skb_urgent(skb)) | | |
401 | T6_TX_FORCE_F | wr_ulp_mode_force | | |
402 | TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) && | |
403 | skb_queue_empty(&csk->txq))); | |
404 | ||
405 | req_wr->ctxloc_to_exp = | |
406 | htonl(FW_TLSTX_DATA_WR_NUMIVS_V(pdus) | | |
407 | FW_TLSTX_DATA_WR_EXP_V(expn) | | |
408 | FW_TLSTX_DATA_WR_CTXLOC_V(CHTLS_KEY_CONTEXT_DDR) | | |
409 | FW_TLSTX_DATA_WR_IVDSGL_V(!iv_imm) | | |
410 | FW_TLSTX_DATA_WR_KEYSIZE_V(hws->keylen >> 4)); | |
411 | ||
412 | /* Fill in the length */ | |
413 | req_wr->plen = htonl(len); | |
414 | req_wr->mfs = htons(hws->mfs); | |
415 | req_wr->adjustedplen_pkd = | |
416 | htons(FW_TLSTX_DATA_WR_ADJUSTEDPLEN_V(hws->adjustlen)); | |
417 | req_wr->expinplenmax_pkd = | |
418 | htons(FW_TLSTX_DATA_WR_EXPINPLENMAX_V(hws->expansion)); | |
419 | req_wr->pdusinplenmax_pkd = | |
420 | FW_TLSTX_DATA_WR_PDUSINPLENMAX_V(hws->pdus); | |
421 | req_wr->r10 = 0; | |
422 | ||
423 | data_type = tls_content_type(ULP_SKB_CB(skb)->ulp.tls.type); | |
424 | req_cpl->op_to_seg_len = htonl(CPL_TX_TLS_SFO_OPCODE_V(CPL_TX_TLS_SFO) | | |
425 | CPL_TX_TLS_SFO_DATA_TYPE_V(data_type) | | |
426 | CPL_TX_TLS_SFO_CPL_LEN_V(2) | | |
427 | CPL_TX_TLS_SFO_SEG_LEN_V(dlen)); | |
428 | req_cpl->pld_len = htonl(len - expn); | |
429 | ||
430 | req_cpl->type_protover = htonl(CPL_TX_TLS_SFO_TYPE_V | |
431 | ((data_type == CPL_TX_TLS_SFO_TYPE_HEARTBEAT) ? | |
432 | TLS_HDR_TYPE_HEARTBEAT : 0) | | |
433 | CPL_TX_TLS_SFO_PROTOVER_V(0)); | |
434 | ||
435 | /* create the s-command */ | |
436 | req_cpl->r1_lo = 0; | |
437 | req_cpl->seqno_numivs = cpu_to_be32(hws->scmd.seqno_numivs); | |
438 | req_cpl->ivgen_hdrlen = cpu_to_be32(hws->scmd.ivgen_hdrlen); | |
439 | req_cpl->scmd1 = cpu_to_be64(tlstx_incr_seqnum(hws)); | |
440 | } | |
441 | ||
442 | /* | |
443 | * Calculate the TLS data expansion size | |
444 | */ | |
445 | static int chtls_expansion_size(struct sock *sk, int data_len, | |
446 | int fullpdu, | |
447 | unsigned short *pducnt) | |
448 | { | |
449 | struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); | |
450 | struct chtls_hws *hws = &csk->tlshws; | |
451 | struct tls_scmd *scmd = &hws->scmd; | |
452 | int fragsize = hws->mfs; | |
453 | int expnsize = 0; | |
454 | int fragleft; | |
455 | int fragcnt; | |
456 | int expppdu; | |
457 | ||
458 | if (SCMD_CIPH_MODE_G(scmd->seqno_numivs) == | |
459 | SCMD_CIPH_MODE_AES_GCM) { | |
460 | expppdu = GCM_TAG_SIZE + AEAD_EXPLICIT_DATA_SIZE + | |
461 | TLS_HEADER_LENGTH; | |
462 | ||
463 | if (fullpdu) { | |
464 | *pducnt = data_len / (expppdu + fragsize); | |
465 | if (*pducnt > 32) | |
466 | *pducnt = 32; | |
467 | else if (!*pducnt) | |
468 | *pducnt = 1; | |
469 | expnsize = (*pducnt) * expppdu; | |
470 | return expnsize; | |
471 | } | |
472 | fragcnt = (data_len / fragsize); | |
473 | expnsize = fragcnt * expppdu; | |
474 | fragleft = data_len % fragsize; | |
475 | if (fragleft > 0) | |
476 | expnsize += expppdu; | |
477 | } | |
478 | return expnsize; | |
479 | } | |
480 | ||
481 | /* WR with IV, KEY and CPL SFO added */ | |
482 | static void make_tlstx_data_wr(struct sock *sk, struct sk_buff *skb, | |
483 | int tls_tx_imm, int tls_len, u32 credits) | |
484 | { | |
485 | unsigned short pdus_per_ulp = 0; | |
486 | struct chtls_sock *csk; | |
487 | struct chtls_hws *hws; | |
488 | int expn_sz; | |
489 | int pdus; | |
490 | ||
491 | csk = rcu_dereference_sk_user_data(sk); | |
492 | hws = &csk->tlshws; | |
493 | pdus = DIV_ROUND_UP(tls_len, hws->mfs); | |
494 | expn_sz = chtls_expansion_size(sk, tls_len, 0, NULL); | |
495 | if (!hws->compute) { | |
496 | hws->expansion = chtls_expansion_size(sk, | |
497 | hws->fcplenmax, | |
498 | 1, &pdus_per_ulp); | |
499 | hws->pdus = pdus_per_ulp; | |
500 | hws->adjustlen = hws->pdus * | |
501 | ((hws->expansion / hws->pdus) + hws->mfs); | |
502 | hws->compute = 1; | |
503 | } | |
504 | if (tls_copy_ivs(sk, skb)) | |
505 | return; | |
506 | tls_copy_tx_key(sk, skb); | |
507 | tls_tx_data_wr(sk, skb, tls_len, tls_tx_imm, credits, expn_sz, pdus); | |
508 | hws->tx_seq_no += (pdus - 1); | |
509 | } | |
510 | ||
511 | static void make_tx_data_wr(struct sock *sk, struct sk_buff *skb, | |
512 | unsigned int immdlen, int len, | |
513 | u32 credits, u32 compl) | |
514 | { | |
515 | struct fw_ofld_tx_data_wr *req; | |
516 | unsigned int wr_ulp_mode_force; | |
517 | struct chtls_sock *csk; | |
518 | unsigned int opcode; | |
519 | ||
520 | csk = rcu_dereference_sk_user_data(sk); | |
521 | opcode = FW_OFLD_TX_DATA_WR; | |
522 | ||
523 | req = (struct fw_ofld_tx_data_wr *)__skb_push(skb, sizeof(*req)); | |
524 | req->op_to_immdlen = htonl(WR_OP_V(opcode) | | |
525 | FW_WR_COMPL_V(compl) | | |
526 | FW_WR_IMMDLEN_V(immdlen)); | |
527 | req->flowid_len16 = htonl(FW_WR_FLOWID_V(csk->tid) | | |
528 | FW_WR_LEN16_V(credits)); | |
529 | ||
530 | wr_ulp_mode_force = TX_ULP_MODE_V(csk->ulp_mode); | |
531 | if (is_sg_request(skb)) | |
532 | wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F | | |
533 | ((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 : | |
534 | FW_OFLD_TX_DATA_WR_SHOVE_F); | |
535 | ||
536 | req->tunnel_to_proxy = htonl(wr_ulp_mode_force | | |
537 | FW_OFLD_TX_DATA_WR_URGENT_V(skb_urgent(skb)) | | |
538 | FW_OFLD_TX_DATA_WR_SHOVE_V((!csk_flag | |
539 | (sk, CSK_TX_MORE_DATA)) && | |
540 | skb_queue_empty(&csk->txq))); | |
541 | req->plen = htonl(len); | |
542 | } | |
543 | ||
544 | static int chtls_wr_size(struct chtls_sock *csk, const struct sk_buff *skb, | |
545 | bool size) | |
546 | { | |
547 | int wr_size; | |
548 | ||
549 | wr_size = TLS_WR_CPL_LEN; | |
550 | wr_size += KEY_ON_MEM_SZ; | |
551 | wr_size += ivs_size(csk->sk, skb); | |
552 | ||
553 | if (size) | |
554 | return wr_size; | |
555 | ||
556 | /* frags counted for IV dsgl */ | |
557 | if (!skb_ulp_tls_iv_imm(skb)) | |
558 | skb_shinfo(skb)->nr_frags++; | |
559 | ||
560 | return wr_size; | |
561 | } | |
562 | ||
563 | static bool is_ofld_imm(struct chtls_sock *csk, const struct sk_buff *skb) | |
564 | { | |
565 | int length = skb->len; | |
566 | ||
567 | if (skb->peeked || skb->len > MAX_IMM_ULPTX_WR_LEN) | |
568 | return false; | |
569 | ||
570 | if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) { | |
571 | /* Check TLS header len for Immediate */ | |
572 | if (csk->ulp_mode == ULP_MODE_TLS && | |
573 | skb_ulp_tls_inline(skb)) | |
574 | length += chtls_wr_size(csk, skb, true); | |
575 | else | |
576 | length += sizeof(struct fw_ofld_tx_data_wr); | |
577 | ||
578 | return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN; | |
579 | } | |
580 | return true; | |
581 | } | |
582 | ||
583 | static unsigned int calc_tx_flits(const struct sk_buff *skb, | |
584 | unsigned int immdlen) | |
585 | { | |
586 | unsigned int flits, cnt; | |
587 | ||
588 | flits = immdlen / 8; /* headers */ | |
589 | cnt = skb_shinfo(skb)->nr_frags; | |
590 | if (skb_tail_pointer(skb) != skb_transport_header(skb)) | |
591 | cnt++; | |
592 | return flits + sgl_len(cnt); | |
593 | } | |
594 | ||
595 | static void arp_failure_discard(void *handle, struct sk_buff *skb) | |
596 | { | |
597 | kfree_skb(skb); | |
598 | } | |
599 | ||
600 | int chtls_push_frames(struct chtls_sock *csk, int comp) | |
601 | { | |
602 | struct chtls_hws *hws = &csk->tlshws; | |
603 | struct tcp_sock *tp; | |
604 | struct sk_buff *skb; | |
605 | int total_size = 0; | |
606 | struct sock *sk; | |
607 | int wr_size; | |
608 | ||
609 | wr_size = sizeof(struct fw_ofld_tx_data_wr); | |
610 | sk = csk->sk; | |
611 | tp = tcp_sk(sk); | |
612 | ||
613 | if (unlikely(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE))) | |
614 | return 0; | |
615 | ||
616 | if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN))) | |
617 | return 0; | |
618 | ||
619 | while (csk->wr_credits && (skb = skb_peek(&csk->txq)) && | |
620 | (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_HOLD) || | |
621 | skb_queue_len(&csk->txq) > 1)) { | |
622 | unsigned int credit_len = skb->len; | |
623 | unsigned int credits_needed; | |
624 | unsigned int completion = 0; | |
625 | int tls_len = skb->len;/* TLS data len before IV/key */ | |
626 | unsigned int immdlen; | |
627 | int len = skb->len; /* length [ulp bytes] inserted by hw */ | |
628 | int flowclen16 = 0; | |
629 | int tls_tx_imm = 0; | |
630 | ||
631 | immdlen = skb->len; | |
632 | if (!is_ofld_imm(csk, skb)) { | |
633 | immdlen = skb_transport_offset(skb); | |
634 | if (skb_ulp_tls_inline(skb)) | |
635 | wr_size = chtls_wr_size(csk, skb, false); | |
636 | credit_len = 8 * calc_tx_flits(skb, immdlen); | |
637 | } else { | |
638 | if (skb_ulp_tls_inline(skb)) { | |
639 | wr_size = chtls_wr_size(csk, skb, false); | |
640 | tls_tx_imm = 1; | |
641 | } | |
642 | } | |
643 | if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) | |
644 | credit_len += wr_size; | |
645 | credits_needed = DIV_ROUND_UP(credit_len, 16); | |
646 | if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) { | |
647 | flowclen16 = send_tx_flowc_wr(sk, 1, tp->snd_nxt, | |
648 | tp->rcv_nxt); | |
649 | if (flowclen16 <= 0) | |
650 | break; | |
651 | csk->wr_credits -= flowclen16; | |
652 | csk->wr_unacked += flowclen16; | |
653 | csk->wr_nondata += flowclen16; | |
654 | csk_set_flag(csk, CSK_TX_DATA_SENT); | |
655 | } | |
656 | ||
657 | if (csk->wr_credits < credits_needed) { | |
658 | if (skb_ulp_tls_inline(skb) && | |
659 | !skb_ulp_tls_iv_imm(skb)) | |
660 | skb_shinfo(skb)->nr_frags--; | |
661 | break; | |
662 | } | |
663 | ||
664 | __skb_unlink(skb, &csk->txq); | |
665 | skb_set_queue_mapping(skb, (csk->txq_idx << 1) | | |
666 | CPL_PRIORITY_DATA); | |
667 | if (hws->ofld) | |
668 | hws->txqid = (skb->queue_mapping >> 1); | |
669 | skb->csum = (__force __wsum)(credits_needed + csk->wr_nondata); | |
670 | csk->wr_credits -= credits_needed; | |
671 | csk->wr_unacked += credits_needed; | |
672 | csk->wr_nondata = 0; | |
673 | enqueue_wr(csk, skb); | |
674 | ||
675 | if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) { | |
676 | if ((comp && csk->wr_unacked == credits_needed) || | |
677 | (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) || | |
678 | csk->wr_unacked >= csk->wr_max_credits / 2) { | |
679 | completion = 1; | |
680 | csk->wr_unacked = 0; | |
681 | } | |
682 | if (skb_ulp_tls_inline(skb)) | |
683 | make_tlstx_data_wr(sk, skb, tls_tx_imm, | |
684 | tls_len, credits_needed); | |
685 | else | |
686 | make_tx_data_wr(sk, skb, immdlen, len, | |
687 | credits_needed, completion); | |
688 | tp->snd_nxt += len; | |
689 | tp->lsndtime = tcp_time_stamp(tp); | |
690 | if (completion) | |
691 | ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_NEED_HDR; | |
692 | } else { | |
693 | struct cpl_close_con_req *req = cplhdr(skb); | |
694 | unsigned int cmd = CPL_OPCODE_G(ntohl | |
695 | (OPCODE_TID(req))); | |
696 | ||
697 | if (cmd == CPL_CLOSE_CON_REQ) | |
698 | csk_set_flag(csk, | |
699 | CSK_CLOSE_CON_REQUESTED); | |
700 | ||
701 | if ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) && | |
702 | (csk->wr_unacked >= csk->wr_max_credits / 2)) { | |
703 | req->wr.wr_hi |= htonl(FW_WR_COMPL_F); | |
704 | csk->wr_unacked = 0; | |
705 | } | |
706 | } | |
707 | total_size += skb->truesize; | |
708 | if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_BARRIER) | |
709 | csk_set_flag(csk, CSK_TX_WAIT_IDLE); | |
710 | t4_set_arp_err_handler(skb, NULL, arp_failure_discard); | |
711 | cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry); | |
712 | } | |
713 | sk->sk_wmem_queued -= total_size; | |
714 | return total_size; | |
715 | } | |
716 | ||
717 | static void mark_urg(struct tcp_sock *tp, int flags, | |
718 | struct sk_buff *skb) | |
719 | { | |
720 | if (unlikely(flags & MSG_OOB)) { | |
721 | tp->snd_up = tp->write_seq; | |
722 | ULP_SKB_CB(skb)->flags = ULPCB_FLAG_URG | | |
723 | ULPCB_FLAG_BARRIER | | |
724 | ULPCB_FLAG_NO_APPEND | | |
725 | ULPCB_FLAG_NEED_HDR; | |
726 | } | |
727 | } | |
728 | ||
729 | /* | |
730 | * Returns true if a connection should send more data to TCP engine | |
731 | */ | |
732 | static bool should_push(struct sock *sk) | |
733 | { | |
734 | struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); | |
735 | struct chtls_dev *cdev = csk->cdev; | |
736 | struct tcp_sock *tp = tcp_sk(sk); | |
737 | ||
738 | /* | |
739 | * If we've released our offload resources there's nothing to do ... | |
740 | */ | |
741 | if (!cdev) | |
742 | return false; | |
743 | ||
744 | /* | |
745 | * If there aren't any work requests in flight, or there isn't enough | |
746 | * data in flight, or Nagle is off then send the current TX_DATA | |
747 | * otherwise hold it and wait to accumulate more data. | |
748 | */ | |
749 | return csk->wr_credits == csk->wr_max_credits || | |
750 | (tp->nonagle & TCP_NAGLE_OFF); | |
751 | } | |
752 | ||
753 | /* | |
754 | * Returns true if a TCP socket is corked. | |
755 | */ | |
756 | static bool corked(const struct tcp_sock *tp, int flags) | |
757 | { | |
758 | return (flags & MSG_MORE) || (tp->nonagle & TCP_NAGLE_CORK); | |
759 | } | |
760 | ||
761 | /* | |
762 | * Returns true if a send should try to push new data. | |
763 | */ | |
764 | static bool send_should_push(struct sock *sk, int flags) | |
765 | { | |
766 | return should_push(sk) && !corked(tcp_sk(sk), flags); | |
767 | } | |
768 | ||
769 | void chtls_tcp_push(struct sock *sk, int flags) | |
770 | { | |
771 | struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); | |
772 | int qlen = skb_queue_len(&csk->txq); | |
773 | ||
774 | if (likely(qlen)) { | |
775 | struct sk_buff *skb = skb_peek_tail(&csk->txq); | |
776 | struct tcp_sock *tp = tcp_sk(sk); | |
777 | ||
778 | mark_urg(tp, flags, skb); | |
779 | ||
780 | if (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) && | |
781 | corked(tp, flags)) { | |
782 | ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_HOLD; | |
783 | return; | |
784 | } | |
785 | ||
786 | ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_HOLD; | |
787 | if (qlen == 1 && | |
788 | ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) || | |
789 | should_push(sk))) | |
790 | chtls_push_frames(csk, 1); | |
791 | } | |
792 | } | |
793 | ||
794 | /* | |
795 | * Calculate the size for a new send sk_buff. It's maximum size so we can | |
796 | * pack lots of data into it, unless we plan to send it immediately, in which | |
797 | * case we size it more tightly. | |
798 | * | |
799 | * Note: we don't bother compensating for MSS < PAGE_SIZE because it doesn't | |
800 | * arise in normal cases and when it does we are just wasting memory. | |
801 | */ | |
802 | static int select_size(struct sock *sk, int io_len, int flags, int len) | |
803 | { | |
804 | const int pgbreak = SKB_MAX_HEAD(len); | |
805 | ||
806 | /* | |
807 | * If the data wouldn't fit in the main body anyway, put only the | |
808 | * header in the main body so it can use immediate data and place all | |
809 | * the payload in page fragments. | |
810 | */ | |
811 | if (io_len > pgbreak) | |
812 | return 0; | |
813 | ||
814 | /* | |
815 | * If we will be accumulating payload get a large main body. | |
816 | */ | |
817 | if (!send_should_push(sk, flags)) | |
818 | return pgbreak; | |
819 | ||
820 | return io_len; | |
821 | } | |
822 | ||
823 | void skb_entail(struct sock *sk, struct sk_buff *skb, int flags) | |
824 | { | |
825 | struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); | |
826 | struct tcp_sock *tp = tcp_sk(sk); | |
827 | ||
828 | ULP_SKB_CB(skb)->seq = tp->write_seq; | |
829 | ULP_SKB_CB(skb)->flags = flags; | |
830 | __skb_queue_tail(&csk->txq, skb); | |
831 | sk->sk_wmem_queued += skb->truesize; | |
832 | ||
833 | if (TCP_PAGE(sk) && TCP_OFF(sk)) { | |
834 | put_page(TCP_PAGE(sk)); | |
835 | TCP_PAGE(sk) = NULL; | |
836 | TCP_OFF(sk) = 0; | |
837 | } | |
838 | } | |
839 | ||
840 | static struct sk_buff *get_tx_skb(struct sock *sk, int size) | |
841 | { | |
842 | struct sk_buff *skb; | |
843 | ||
844 | skb = alloc_skb(size + TX_HEADER_LEN, sk->sk_allocation); | |
845 | if (likely(skb)) { | |
846 | skb_reserve(skb, TX_HEADER_LEN); | |
847 | skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR); | |
848 | skb_reset_transport_header(skb); | |
849 | } | |
850 | return skb; | |
851 | } | |
852 | ||
853 | static struct sk_buff *get_record_skb(struct sock *sk, int size, bool zcopy) | |
854 | { | |
855 | struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); | |
856 | struct sk_buff *skb; | |
857 | ||
858 | skb = alloc_skb(((zcopy ? 0 : size) + TX_TLSHDR_LEN + | |
859 | KEY_ON_MEM_SZ + max_ivs_size(sk, size)), | |
860 | sk->sk_allocation); | |
861 | if (likely(skb)) { | |
862 | skb_reserve(skb, (TX_TLSHDR_LEN + | |
863 | KEY_ON_MEM_SZ + max_ivs_size(sk, size))); | |
864 | skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR); | |
865 | skb_reset_transport_header(skb); | |
866 | ULP_SKB_CB(skb)->ulp.tls.ofld = 1; | |
867 | ULP_SKB_CB(skb)->ulp.tls.type = csk->tlshws.type; | |
868 | } | |
869 | return skb; | |
870 | } | |
871 | ||
872 | static void tx_skb_finalize(struct sk_buff *skb) | |
873 | { | |
874 | struct ulp_skb_cb *cb = ULP_SKB_CB(skb); | |
875 | ||
876 | if (!(cb->flags & ULPCB_FLAG_NO_HDR)) | |
877 | cb->flags = ULPCB_FLAG_NEED_HDR; | |
878 | cb->flags |= ULPCB_FLAG_NO_APPEND; | |
879 | } | |
880 | ||
881 | static void push_frames_if_head(struct sock *sk) | |
882 | { | |
883 | struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); | |
884 | ||
885 | if (skb_queue_len(&csk->txq) == 1) | |
886 | chtls_push_frames(csk, 1); | |
887 | } | |
888 | ||
889 | static int chtls_skb_copy_to_page_nocache(struct sock *sk, | |
890 | struct iov_iter *from, | |
891 | struct sk_buff *skb, | |
892 | struct page *page, | |
893 | int off, int copy) | |
894 | { | |
895 | int err; | |
896 | ||
897 | err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) + | |
898 | off, copy, skb->len); | |
899 | if (err) | |
900 | return err; | |
901 | ||
902 | skb->len += copy; | |
903 | skb->data_len += copy; | |
904 | skb->truesize += copy; | |
905 | sk->sk_wmem_queued += copy; | |
906 | return 0; | |
907 | } | |
908 | ||
909 | /* Read TLS header to find content type and data length */ | |
1dfe57aa | 910 | static int tls_header_read(struct tls_hdr *thdr, struct iov_iter *from) |
36bedb3f AG |
911 | { |
912 | if (copy_from_iter(thdr, sizeof(*thdr), from) != sizeof(*thdr)) | |
913 | return -EFAULT; | |
1dfe57aa | 914 | return (__force int)cpu_to_be16(thdr->length); |
36bedb3f AG |
915 | } |
916 | ||
3b8305f5 AG |
917 | static int csk_mem_free(struct chtls_dev *cdev, struct sock *sk) |
918 | { | |
919 | return (cdev->max_host_sndbuf - sk->sk_wmem_queued); | |
920 | } | |
921 | ||
922 | static int csk_wait_memory(struct chtls_dev *cdev, | |
923 | struct sock *sk, long *timeo_p) | |
924 | { | |
925 | DEFINE_WAIT_FUNC(wait, woken_wake_function); | |
926 | int sndbuf, err = 0; | |
927 | long current_timeo; | |
928 | long vm_wait = 0; | |
929 | bool noblock; | |
930 | ||
931 | current_timeo = *timeo_p; | |
932 | noblock = (*timeo_p ? false : true); | |
933 | sndbuf = cdev->max_host_sndbuf; | |
934 | if (csk_mem_free(cdev, sk)) { | |
935 | current_timeo = (prandom_u32() % (HZ / 5)) + 2; | |
936 | vm_wait = (prandom_u32() % (HZ / 5)) + 2; | |
937 | } | |
938 | ||
939 | add_wait_queue(sk_sleep(sk), &wait); | |
940 | while (1) { | |
941 | sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); | |
942 | ||
943 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) | |
944 | goto do_error; | |
945 | if (!*timeo_p) { | |
946 | if (noblock) | |
947 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | |
948 | goto do_nonblock; | |
949 | } | |
950 | if (signal_pending(current)) | |
951 | goto do_interrupted; | |
952 | sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); | |
953 | if (csk_mem_free(cdev, sk) && !vm_wait) | |
954 | break; | |
955 | ||
956 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | |
957 | sk->sk_write_pending++; | |
958 | sk_wait_event(sk, ¤t_timeo, sk->sk_err || | |
959 | (sk->sk_shutdown & SEND_SHUTDOWN) || | |
960 | (csk_mem_free(cdev, sk) && !vm_wait), &wait); | |
961 | sk->sk_write_pending--; | |
962 | ||
963 | if (vm_wait) { | |
964 | vm_wait -= current_timeo; | |
965 | current_timeo = *timeo_p; | |
966 | if (current_timeo != MAX_SCHEDULE_TIMEOUT) { | |
967 | current_timeo -= vm_wait; | |
968 | if (current_timeo < 0) | |
969 | current_timeo = 0; | |
970 | } | |
971 | vm_wait = 0; | |
972 | } | |
973 | *timeo_p = current_timeo; | |
974 | } | |
975 | do_rm_wq: | |
976 | remove_wait_queue(sk_sleep(sk), &wait); | |
977 | return err; | |
978 | do_error: | |
979 | err = -EPIPE; | |
980 | goto do_rm_wq; | |
981 | do_nonblock: | |
982 | err = -EAGAIN; | |
983 | goto do_rm_wq; | |
984 | do_interrupted: | |
985 | err = sock_intr_errno(*timeo_p); | |
986 | goto do_rm_wq; | |
987 | } | |
988 | ||
36bedb3f AG |
989 | int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) |
990 | { | |
991 | struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); | |
992 | struct chtls_dev *cdev = csk->cdev; | |
993 | struct tcp_sock *tp = tcp_sk(sk); | |
994 | struct sk_buff *skb; | |
995 | int mss, flags, err; | |
996 | int recordsz = 0; | |
997 | int copied = 0; | |
998 | int hdrlen = 0; | |
999 | long timeo; | |
1000 | ||
1001 | lock_sock(sk); | |
1002 | flags = msg->msg_flags; | |
1003 | timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); | |
1004 | ||
1005 | if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { | |
1006 | err = sk_stream_wait_connect(sk, &timeo); | |
1007 | if (err) | |
1008 | goto out_err; | |
1009 | } | |
1010 | ||
1011 | sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); | |
1012 | err = -EPIPE; | |
1013 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) | |
1014 | goto out_err; | |
1015 | ||
1016 | mss = csk->mss; | |
1017 | csk_set_flag(csk, CSK_TX_MORE_DATA); | |
1018 | ||
1019 | while (msg_data_left(msg)) { | |
1020 | int copy = 0; | |
1021 | ||
1022 | skb = skb_peek_tail(&csk->txq); | |
1023 | if (skb) { | |
1024 | copy = mss - skb->len; | |
1025 | skb->ip_summed = CHECKSUM_UNNECESSARY; | |
1026 | } | |
3b8305f5 AG |
1027 | if (!csk_mem_free(cdev, sk)) |
1028 | goto wait_for_sndbuf; | |
36bedb3f AG |
1029 | |
1030 | if (is_tls_tx(csk) && !csk->tlshws.txleft) { | |
1031 | struct tls_hdr hdr; | |
1032 | ||
1033 | recordsz = tls_header_read(&hdr, &msg->msg_iter); | |
1034 | size -= TLS_HEADER_LENGTH; | |
1035 | hdrlen += TLS_HEADER_LENGTH; | |
1036 | csk->tlshws.txleft = recordsz; | |
1037 | csk->tlshws.type = hdr.type; | |
1038 | if (skb) | |
1039 | ULP_SKB_CB(skb)->ulp.tls.type = hdr.type; | |
1040 | } | |
1041 | ||
1042 | if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) || | |
1043 | copy <= 0) { | |
1044 | new_buf: | |
1045 | if (skb) { | |
1046 | tx_skb_finalize(skb); | |
1047 | push_frames_if_head(sk); | |
1048 | } | |
1049 | ||
1050 | if (is_tls_tx(csk)) { | |
1051 | skb = get_record_skb(sk, | |
1052 | select_size(sk, | |
1053 | recordsz, | |
1054 | flags, | |
1055 | TX_TLSHDR_LEN), | |
1056 | false); | |
1057 | } else { | |
1058 | skb = get_tx_skb(sk, | |
1059 | select_size(sk, size, flags, | |
1060 | TX_HEADER_LEN)); | |
1061 | } | |
1062 | if (unlikely(!skb)) | |
1063 | goto wait_for_memory; | |
1064 | ||
1065 | skb->ip_summed = CHECKSUM_UNNECESSARY; | |
1066 | copy = mss; | |
1067 | } | |
1068 | if (copy > size) | |
1069 | copy = size; | |
1070 | ||
1071 | if (skb_tailroom(skb) > 0) { | |
1072 | copy = min(copy, skb_tailroom(skb)); | |
1073 | if (is_tls_tx(csk)) | |
1074 | copy = min_t(int, copy, csk->tlshws.txleft); | |
1075 | err = skb_add_data_nocache(sk, skb, | |
1076 | &msg->msg_iter, copy); | |
1077 | if (err) | |
1078 | goto do_fault; | |
1079 | } else { | |
1080 | int i = skb_shinfo(skb)->nr_frags; | |
1081 | struct page *page = TCP_PAGE(sk); | |
1082 | int pg_size = PAGE_SIZE; | |
1083 | int off = TCP_OFF(sk); | |
1084 | bool merge; | |
1085 | ||
1dfe57aa AG |
1086 | if (!page) |
1087 | goto wait_for_memory; | |
36bedb3f | 1088 | |
1dfe57aa | 1089 | pg_size <<= compound_order(page); |
36bedb3f AG |
1090 | if (off < pg_size && |
1091 | skb_can_coalesce(skb, i, page, off)) { | |
1092 | merge = 1; | |
1093 | goto copy; | |
1094 | } | |
1095 | merge = 0; | |
1096 | if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) : | |
1097 | MAX_SKB_FRAGS)) | |
1098 | goto new_buf; | |
1099 | ||
1100 | if (page && off == pg_size) { | |
1101 | put_page(page); | |
1102 | TCP_PAGE(sk) = page = NULL; | |
1103 | pg_size = PAGE_SIZE; | |
1104 | } | |
1105 | ||
1106 | if (!page) { | |
1107 | gfp_t gfp = sk->sk_allocation; | |
1108 | int order = cdev->send_page_order; | |
1109 | ||
1110 | if (order) { | |
1111 | page = alloc_pages(gfp | __GFP_COMP | | |
1112 | __GFP_NOWARN | | |
1113 | __GFP_NORETRY, | |
1114 | order); | |
1115 | if (page) | |
1116 | pg_size <<= | |
1117 | compound_order(page); | |
1118 | } | |
1119 | if (!page) { | |
1120 | page = alloc_page(gfp); | |
1121 | pg_size = PAGE_SIZE; | |
1122 | } | |
1123 | if (!page) | |
1124 | goto wait_for_memory; | |
1125 | off = 0; | |
1126 | } | |
1127 | copy: | |
1128 | if (copy > pg_size - off) | |
1129 | copy = pg_size - off; | |
1130 | if (is_tls_tx(csk)) | |
1131 | copy = min_t(int, copy, csk->tlshws.txleft); | |
1132 | ||
1133 | err = chtls_skb_copy_to_page_nocache(sk, &msg->msg_iter, | |
1134 | skb, page, | |
1135 | off, copy); | |
1136 | if (unlikely(err)) { | |
1137 | if (!TCP_PAGE(sk)) { | |
1138 | TCP_PAGE(sk) = page; | |
1139 | TCP_OFF(sk) = 0; | |
1140 | } | |
1141 | goto do_fault; | |
1142 | } | |
1143 | /* Update the skb. */ | |
1144 | if (merge) { | |
1145 | skb_shinfo(skb)->frags[i - 1].size += copy; | |
1146 | } else { | |
1147 | skb_fill_page_desc(skb, i, page, off, copy); | |
1148 | if (off + copy < pg_size) { | |
1149 | /* space left keep page */ | |
1150 | get_page(page); | |
1151 | TCP_PAGE(sk) = page; | |
1152 | } else { | |
1153 | TCP_PAGE(sk) = NULL; | |
1154 | } | |
1155 | } | |
1156 | TCP_OFF(sk) = off + copy; | |
1157 | } | |
1158 | if (unlikely(skb->len == mss)) | |
1159 | tx_skb_finalize(skb); | |
1160 | tp->write_seq += copy; | |
1161 | copied += copy; | |
1162 | size -= copy; | |
1163 | ||
1164 | if (is_tls_tx(csk)) | |
1165 | csk->tlshws.txleft -= copy; | |
1166 | ||
1167 | if (corked(tp, flags) && | |
1168 | (sk_stream_wspace(sk) < sk_stream_min_wspace(sk))) | |
1169 | ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND; | |
1170 | ||
1171 | if (size == 0) | |
1172 | goto out; | |
1173 | ||
1174 | if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) | |
1175 | push_frames_if_head(sk); | |
1176 | continue; | |
3b8305f5 AG |
1177 | wait_for_sndbuf: |
1178 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | |
36bedb3f | 1179 | wait_for_memory: |
3b8305f5 | 1180 | err = csk_wait_memory(cdev, sk, &timeo); |
36bedb3f AG |
1181 | if (err) |
1182 | goto do_error; | |
1183 | } | |
1184 | out: | |
1185 | csk_reset_flag(csk, CSK_TX_MORE_DATA); | |
1186 | if (copied) | |
1187 | chtls_tcp_push(sk, flags); | |
1188 | done: | |
1189 | release_sock(sk); | |
1190 | return copied + hdrlen; | |
1191 | do_fault: | |
1192 | if (!skb->len) { | |
1193 | __skb_unlink(skb, &csk->txq); | |
1194 | sk->sk_wmem_queued -= skb->truesize; | |
1195 | __kfree_skb(skb); | |
1196 | } | |
1197 | do_error: | |
1198 | if (copied) | |
1199 | goto out; | |
1200 | out_err: | |
1201 | if (csk_conn_inline(csk)) | |
1202 | csk_reset_flag(csk, CSK_TX_MORE_DATA); | |
1203 | copied = sk_stream_error(sk, flags, err); | |
1204 | goto done; | |
1205 | } | |
1206 | ||
1207 | int chtls_sendpage(struct sock *sk, struct page *page, | |
1208 | int offset, size_t size, int flags) | |
1209 | { | |
1210 | struct chtls_sock *csk; | |
3b8305f5 | 1211 | struct chtls_dev *cdev; |
36bedb3f AG |
1212 | int mss, err, copied; |
1213 | struct tcp_sock *tp; | |
1214 | long timeo; | |
1215 | ||
1216 | tp = tcp_sk(sk); | |
1217 | copied = 0; | |
1218 | csk = rcu_dereference_sk_user_data(sk); | |
3b8305f5 | 1219 | cdev = csk->cdev; |
36bedb3f AG |
1220 | timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); |
1221 | ||
1222 | err = sk_stream_wait_connect(sk, &timeo); | |
1223 | if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && | |
1224 | err != 0) | |
1225 | goto out_err; | |
1226 | ||
1227 | mss = csk->mss; | |
1228 | csk_set_flag(csk, CSK_TX_MORE_DATA); | |
1229 | ||
1230 | while (size > 0) { | |
1231 | struct sk_buff *skb = skb_peek_tail(&csk->txq); | |
1232 | int copy, i; | |
1233 | ||
36bedb3f | 1234 | if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) || |
ea5213b0 | 1235 | (copy = mss - skb->len) <= 0) { |
36bedb3f | 1236 | new_buf: |
3b8305f5 AG |
1237 | if (!csk_mem_free(cdev, sk)) |
1238 | goto wait_for_sndbuf; | |
36bedb3f AG |
1239 | |
1240 | if (is_tls_tx(csk)) { | |
1241 | skb = get_record_skb(sk, | |
1242 | select_size(sk, size, | |
1243 | flags, | |
1244 | TX_TLSHDR_LEN), | |
1245 | true); | |
1246 | } else { | |
1247 | skb = get_tx_skb(sk, 0); | |
1248 | } | |
1249 | if (!skb) | |
3b8305f5 | 1250 | goto wait_for_memory; |
36bedb3f AG |
1251 | copy = mss; |
1252 | } | |
1253 | if (copy > size) | |
1254 | copy = size; | |
1255 | ||
1256 | i = skb_shinfo(skb)->nr_frags; | |
1257 | if (skb_can_coalesce(skb, i, page, offset)) { | |
1258 | skb_shinfo(skb)->frags[i - 1].size += copy; | |
1259 | } else if (i < MAX_SKB_FRAGS) { | |
1260 | get_page(page); | |
1261 | skb_fill_page_desc(skb, i, page, offset, copy); | |
1262 | } else { | |
1263 | tx_skb_finalize(skb); | |
1264 | push_frames_if_head(sk); | |
1265 | goto new_buf; | |
1266 | } | |
1267 | ||
1268 | skb->len += copy; | |
1269 | if (skb->len == mss) | |
1270 | tx_skb_finalize(skb); | |
1271 | skb->data_len += copy; | |
1272 | skb->truesize += copy; | |
1273 | sk->sk_wmem_queued += copy; | |
1274 | tp->write_seq += copy; | |
1275 | copied += copy; | |
1276 | offset += copy; | |
1277 | size -= copy; | |
1278 | ||
1279 | if (corked(tp, flags) && | |
1280 | (sk_stream_wspace(sk) < sk_stream_min_wspace(sk))) | |
1281 | ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND; | |
1282 | ||
1283 | if (!size) | |
1284 | break; | |
1285 | ||
1286 | if (unlikely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND)) | |
1287 | push_frames_if_head(sk); | |
1288 | continue; | |
3b8305f5 | 1289 | wait_for_sndbuf: |
36bedb3f | 1290 | set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); |
3b8305f5 AG |
1291 | wait_for_memory: |
1292 | err = csk_wait_memory(cdev, sk, &timeo); | |
1293 | if (err) | |
1294 | goto do_error; | |
36bedb3f AG |
1295 | } |
1296 | out: | |
1297 | csk_reset_flag(csk, CSK_TX_MORE_DATA); | |
1298 | if (copied) | |
1299 | chtls_tcp_push(sk, flags); | |
1300 | done: | |
1301 | release_sock(sk); | |
1302 | return copied; | |
1303 | ||
1304 | do_error: | |
1305 | if (copied) | |
1306 | goto out; | |
1307 | ||
1308 | out_err: | |
1309 | if (csk_conn_inline(csk)) | |
1310 | csk_reset_flag(csk, CSK_TX_MORE_DATA); | |
1311 | copied = sk_stream_error(sk, flags, err); | |
1312 | goto done; | |
1313 | } | |
b647993f AG |
1314 | |
1315 | static void chtls_select_window(struct sock *sk) | |
1316 | { | |
1317 | struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); | |
1318 | struct tcp_sock *tp = tcp_sk(sk); | |
1319 | unsigned int wnd = tp->rcv_wnd; | |
1320 | ||
1321 | wnd = max_t(unsigned int, wnd, tcp_full_space(sk)); | |
1322 | wnd = max_t(unsigned int, MIN_RCV_WND, wnd); | |
1323 | ||
1324 | if (wnd > MAX_RCV_WND) | |
1325 | wnd = MAX_RCV_WND; | |
1326 | ||
1327 | /* | |
1328 | * Check if we need to grow the receive window in response to an increase in | |
1329 | * the socket's receive buffer size. Some applications increase the buffer | |
1330 | * size dynamically and rely on the window to grow accordingly. | |
1331 | */ | |
1332 | ||
1333 | if (wnd > tp->rcv_wnd) { | |
1334 | tp->rcv_wup -= wnd - tp->rcv_wnd; | |
1335 | tp->rcv_wnd = wnd; | |
1336 | /* Mark the receive window as updated */ | |
1337 | csk_reset_flag(csk, CSK_UPDATE_RCV_WND); | |
1338 | } | |
1339 | } | |
1340 | ||
1341 | /* | |
1342 | * Send RX credits through an RX_DATA_ACK CPL message. We are permitted | |
1343 | * to return without sending the message in case we cannot allocate | |
1344 | * an sk_buff. Returns the number of credits sent. | |
1345 | */ | |
1346 | static u32 send_rx_credits(struct chtls_sock *csk, u32 credits) | |
1347 | { | |
1348 | struct cpl_rx_data_ack *req; | |
1349 | struct sk_buff *skb; | |
1350 | ||
1351 | skb = alloc_skb(sizeof(*req), GFP_ATOMIC); | |
1352 | if (!skb) | |
1353 | return 0; | |
1354 | __skb_put(skb, sizeof(*req)); | |
1355 | req = (struct cpl_rx_data_ack *)skb->head; | |
1356 | ||
1357 | set_wr_txq(skb, CPL_PRIORITY_ACK, csk->port_id); | |
1358 | INIT_TP_WR(req, csk->tid); | |
1359 | OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, | |
1360 | csk->tid)); | |
1361 | req->credit_dack = cpu_to_be32(RX_CREDITS_V(credits) | | |
1362 | RX_FORCE_ACK_F); | |
1363 | cxgb4_ofld_send(csk->cdev->ports[csk->port_id], skb); | |
1364 | return credits; | |
1365 | } | |
1366 | ||
1367 | #define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | \ | |
1368 | TCPF_FIN_WAIT1 | \ | |
1369 | TCPF_FIN_WAIT2) | |
1370 | ||
1371 | /* | |
1372 | * Called after some received data has been read. It returns RX credits | |
1373 | * to the HW for the amount of data processed. | |
1374 | */ | |
1375 | static void chtls_cleanup_rbuf(struct sock *sk, int copied) | |
1376 | { | |
1377 | struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); | |
1378 | struct tcp_sock *tp; | |
1379 | int must_send; | |
1380 | u32 credits; | |
1381 | u32 thres; | |
1382 | ||
1383 | thres = 15 * 1024; | |
1384 | ||
1385 | if (!sk_in_state(sk, CREDIT_RETURN_STATE)) | |
1386 | return; | |
1387 | ||
1388 | chtls_select_window(sk); | |
1389 | tp = tcp_sk(sk); | |
1390 | credits = tp->copied_seq - tp->rcv_wup; | |
1391 | if (unlikely(!credits)) | |
1392 | return; | |
1393 | ||
1394 | /* | |
1395 | * For coalescing to work effectively ensure the receive window has | |
1396 | * at least 16KB left. | |
1397 | */ | |
1398 | must_send = credits + 16384 >= tp->rcv_wnd; | |
1399 | ||
1400 | if (must_send || credits >= thres) | |
1401 | tp->rcv_wup += send_rx_credits(csk, credits); | |
1402 | } | |
1403 | ||
1404 | static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, | |
1405 | int nonblock, int flags, int *addr_len) | |
1406 | { | |
1407 | struct chtls_sock *csk = rcu_dereference_sk_user_data(sk); | |
1408 | struct net_device *dev = csk->egress_dev; | |
1409 | struct chtls_hws *hws = &csk->tlshws; | |
1410 | struct tcp_sock *tp = tcp_sk(sk); | |
1411 | struct adapter *adap; | |
1412 | unsigned long avail; | |
1413 | int buffers_freed; | |
1414 | int copied = 0; | |
1415 | int request; | |
1416 | int target; | |
1417 | long timeo; | |
1418 | ||
1419 | adap = netdev2adap(dev); | |
1420 | buffers_freed = 0; | |
1421 | ||
1422 | timeo = sock_rcvtimeo(sk, nonblock); | |
1423 | target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); | |
1424 | request = len; | |
1425 | ||
1426 | if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND))) | |
1427 | chtls_cleanup_rbuf(sk, copied); | |
1428 | ||
1429 | do { | |
1430 | struct sk_buff *skb; | |
1431 | u32 offset = 0; | |
1432 | ||
1433 | if (unlikely(tp->urg_data && | |
1434 | tp->urg_seq == tp->copied_seq)) { | |
1435 | if (copied) | |
1436 | break; | |
1437 | if (signal_pending(current)) { | |
1438 | copied = timeo ? sock_intr_errno(timeo) : | |
1439 | -EAGAIN; | |
1440 | break; | |
1441 | } | |
1442 | } | |
1443 | skb = skb_peek(&sk->sk_receive_queue); | |
1444 | if (skb) | |
1445 | goto found_ok_skb; | |
1446 | if (csk->wr_credits && | |
1447 | skb_queue_len(&csk->txq) && | |
1448 | chtls_push_frames(csk, csk->wr_credits == | |
1449 | csk->wr_max_credits)) | |
1450 | sk->sk_write_space(sk); | |
1451 | ||
1452 | if (copied >= target && !sk->sk_backlog.tail) | |
1453 | break; | |
1454 | ||
1455 | if (copied) { | |
1456 | if (sk->sk_err || sk->sk_state == TCP_CLOSE || | |
1457 | (sk->sk_shutdown & RCV_SHUTDOWN) || | |
1458 | signal_pending(current)) | |
1459 | break; | |
1460 | ||
1461 | if (!timeo) | |
1462 | break; | |
1463 | } else { | |
1464 | if (sock_flag(sk, SOCK_DONE)) | |
1465 | break; | |
1466 | if (sk->sk_err) { | |
1467 | copied = sock_error(sk); | |
1468 | break; | |
1469 | } | |
1470 | if (sk->sk_shutdown & RCV_SHUTDOWN) | |
1471 | break; | |
1472 | if (sk->sk_state == TCP_CLOSE) { | |
1473 | copied = -ENOTCONN; | |
1474 | break; | |
1475 | } | |
1476 | if (!timeo) { | |
1477 | copied = -EAGAIN; | |
1478 | break; | |
1479 | } | |
1480 | if (signal_pending(current)) { | |
1481 | copied = sock_intr_errno(timeo); | |
1482 | break; | |
1483 | } | |
1484 | } | |
1485 | if (sk->sk_backlog.tail) { | |
1486 | release_sock(sk); | |
1487 | lock_sock(sk); | |
1488 | chtls_cleanup_rbuf(sk, copied); | |
1489 | continue; | |
1490 | } | |
1491 | ||
1492 | if (copied >= target) | |
1493 | break; | |
1494 | chtls_cleanup_rbuf(sk, copied); | |
1495 | sk_wait_data(sk, &timeo, NULL); | |
1dfe57aa | 1496 | continue; |
b647993f AG |
1497 | found_ok_skb: |
1498 | if (!skb->len) { | |
1499 | skb_dst_set(skb, NULL); | |
1500 | __skb_unlink(skb, &sk->sk_receive_queue); | |
1501 | kfree_skb(skb); | |
1502 | ||
1503 | if (!copied && !timeo) { | |
1504 | copied = -EAGAIN; | |
1505 | break; | |
1506 | } | |
1507 | ||
1508 | if (copied < target) { | |
1509 | release_sock(sk); | |
1510 | lock_sock(sk); | |
1511 | continue; | |
1512 | } | |
1513 | break; | |
1514 | } | |
1515 | offset = hws->copied_seq; | |
1516 | avail = skb->len - offset; | |
1517 | if (len < avail) | |
1518 | avail = len; | |
1519 | ||
1520 | if (unlikely(tp->urg_data)) { | |
1521 | u32 urg_offset = tp->urg_seq - tp->copied_seq; | |
1522 | ||
1523 | if (urg_offset < avail) { | |
1524 | if (urg_offset) { | |
1525 | avail = urg_offset; | |
1526 | } else if (!sock_flag(sk, SOCK_URGINLINE)) { | |
1527 | /* First byte is urgent, skip */ | |
1528 | tp->copied_seq++; | |
1529 | offset++; | |
1530 | avail--; | |
1531 | if (!avail) | |
1532 | goto skip_copy; | |
1533 | } | |
1534 | } | |
1535 | } | |
17a7d24a AG |
1536 | if (skb_copy_datagram_msg(skb, offset, msg, avail)) { |
1537 | if (!copied) { | |
1538 | copied = -EFAULT; | |
1539 | break; | |
b647993f AG |
1540 | } |
1541 | } | |
17a7d24a | 1542 | |
b647993f AG |
1543 | copied += avail; |
1544 | len -= avail; | |
1545 | hws->copied_seq += avail; | |
1546 | skip_copy: | |
1547 | if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) | |
1548 | tp->urg_data = 0; | |
1549 | ||
17a7d24a | 1550 | if ((avail + offset) >= skb->len) { |
b647993f AG |
1551 | if (likely(skb)) |
1552 | chtls_free_skb(sk, skb); | |
1553 | buffers_freed++; | |
17a7d24a AG |
1554 | if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) { |
1555 | tp->copied_seq += skb->len; | |
1556 | hws->rcvpld = skb->hdr_len; | |
1557 | } else { | |
1558 | tp->copied_seq += hws->rcvpld; | |
1559 | } | |
b647993f AG |
1560 | hws->copied_seq = 0; |
1561 | if (copied >= target && | |
1562 | !skb_peek(&sk->sk_receive_queue)) | |
1563 | break; | |
b647993f AG |
1564 | } |
1565 | } while (len > 0); | |
1566 | ||
1567 | if (buffers_freed) | |
1568 | chtls_cleanup_rbuf(sk, copied); | |
1569 | release_sock(sk); | |
1570 | return copied; | |
1571 | } | |
1572 | ||
1573 | /* | |
1574 | * Peek at data in a socket's receive buffer. | |
1575 | */ | |
1576 | static int peekmsg(struct sock *sk, struct msghdr *msg, | |
1577 | size_t len, int nonblock, int flags) | |
1578 | { | |
1579 | struct tcp_sock *tp = tcp_sk(sk); | |
1580 | u32 peek_seq, offset; | |
1581 | struct sk_buff *skb; | |
1582 | int copied = 0; | |
1583 | size_t avail; /* amount of available data in current skb */ | |
1584 | long timeo; | |
1585 | ||
1586 | lock_sock(sk); | |
1587 | timeo = sock_rcvtimeo(sk, nonblock); | |
1588 | peek_seq = tp->copied_seq; | |
1589 | ||
1590 | do { | |
1591 | if (unlikely(tp->urg_data && tp->urg_seq == peek_seq)) { | |
1592 | if (copied) | |
1593 | break; | |
1594 | if (signal_pending(current)) { | |
1595 | copied = timeo ? sock_intr_errno(timeo) : | |
1596 | -EAGAIN; | |
1597 | break; | |
1598 | } | |
1599 | } | |
1600 | ||
1601 | skb_queue_walk(&sk->sk_receive_queue, skb) { | |
1602 | offset = peek_seq - ULP_SKB_CB(skb)->seq; | |
1603 | if (offset < skb->len) | |
1604 | goto found_ok_skb; | |
1605 | } | |
1606 | ||
1607 | /* empty receive queue */ | |
1608 | if (copied) | |
1609 | break; | |
1610 | if (sock_flag(sk, SOCK_DONE)) | |
1611 | break; | |
1612 | if (sk->sk_err) { | |
1613 | copied = sock_error(sk); | |
1614 | break; | |
1615 | } | |
1616 | if (sk->sk_shutdown & RCV_SHUTDOWN) | |
1617 | break; | |
1618 | if (sk->sk_state == TCP_CLOSE) { | |
1619 | copied = -ENOTCONN; | |
1620 | break; | |
1621 | } | |
1622 | if (!timeo) { | |
1623 | copied = -EAGAIN; | |
1624 | break; | |
1625 | } | |
1626 | if (signal_pending(current)) { | |
1627 | copied = sock_intr_errno(timeo); | |
1628 | break; | |
1629 | } | |
1630 | ||
1631 | if (sk->sk_backlog.tail) { | |
1632 | /* Do not sleep, just process backlog. */ | |
1633 | release_sock(sk); | |
1634 | lock_sock(sk); | |
1635 | } else { | |
1636 | sk_wait_data(sk, &timeo, NULL); | |
1637 | } | |
1638 | ||
1639 | if (unlikely(peek_seq != tp->copied_seq)) { | |
1640 | if (net_ratelimit()) | |
1641 | pr_info("TCP(%s:%d), race in MSG_PEEK.\n", | |
1642 | current->comm, current->pid); | |
1643 | peek_seq = tp->copied_seq; | |
1644 | } | |
1645 | continue; | |
1646 | ||
1647 | found_ok_skb: | |
1648 | avail = skb->len - offset; | |
1649 | if (len < avail) | |
1650 | avail = len; | |
1651 | /* | |
1652 | * Do we have urgent data here? We need to skip over the | |
1653 | * urgent byte. | |
1654 | */ | |
1655 | if (unlikely(tp->urg_data)) { | |
1656 | u32 urg_offset = tp->urg_seq - peek_seq; | |
1657 | ||
1658 | if (urg_offset < avail) { | |
1659 | /* | |
1660 | * The amount of data we are preparing to copy | |
1661 | * contains urgent data. | |
1662 | */ | |
1663 | if (!urg_offset) { /* First byte is urgent */ | |
1664 | if (!sock_flag(sk, SOCK_URGINLINE)) { | |
1665 | peek_seq++; | |
1666 | offset++; | |
1667 | avail--; | |
1668 | } | |
1669 | if (!avail) | |
1670 | continue; | |
1671 | } else { | |
1672 | /* stop short of the urgent data */ | |
1673 | avail = urg_offset; | |
1674 | } | |
1675 | } | |
1676 | } | |
1677 | ||
1678 | /* | |
1679 | * If MSG_TRUNC is specified the data is discarded. | |
1680 | */ | |
1681 | if (likely(!(flags & MSG_TRUNC))) | |
1682 | if (skb_copy_datagram_msg(skb, offset, msg, len)) { | |
1683 | if (!copied) { | |
1684 | copied = -EFAULT; | |
1685 | break; | |
1686 | } | |
1687 | } | |
1688 | peek_seq += avail; | |
1689 | copied += avail; | |
1690 | len -= avail; | |
1691 | } while (len > 0); | |
1692 | ||
1693 | release_sock(sk); | |
1694 | return copied; | |
1695 | } | |
1696 | ||
1697 | int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, | |
1698 | int nonblock, int flags, int *addr_len) | |
1699 | { | |
1700 | struct tcp_sock *tp = tcp_sk(sk); | |
1701 | struct chtls_sock *csk; | |
1702 | struct chtls_hws *hws; | |
1703 | unsigned long avail; /* amount of available data in current skb */ | |
1704 | int buffers_freed; | |
1705 | int copied = 0; | |
1706 | int request; | |
1707 | long timeo; | |
1708 | int target; /* Read at least this many bytes */ | |
1709 | ||
1710 | buffers_freed = 0; | |
1711 | ||
1712 | if (unlikely(flags & MSG_OOB)) | |
1713 | return tcp_prot.recvmsg(sk, msg, len, nonblock, flags, | |
1714 | addr_len); | |
1715 | ||
1716 | if (unlikely(flags & MSG_PEEK)) | |
1717 | return peekmsg(sk, msg, len, nonblock, flags); | |
1718 | ||
1719 | if (sk_can_busy_loop(sk) && | |
1720 | skb_queue_empty(&sk->sk_receive_queue) && | |
1721 | sk->sk_state == TCP_ESTABLISHED) | |
1722 | sk_busy_loop(sk, nonblock); | |
1723 | ||
1724 | lock_sock(sk); | |
1725 | csk = rcu_dereference_sk_user_data(sk); | |
1726 | hws = &csk->tlshws; | |
1727 | ||
1728 | if (is_tls_rx(csk)) | |
1729 | return chtls_pt_recvmsg(sk, msg, len, nonblock, | |
1730 | flags, addr_len); | |
1731 | ||
1732 | timeo = sock_rcvtimeo(sk, nonblock); | |
1733 | target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); | |
1734 | request = len; | |
1735 | ||
1736 | if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND))) | |
1737 | chtls_cleanup_rbuf(sk, copied); | |
1738 | ||
1739 | do { | |
1740 | struct sk_buff *skb; | |
1741 | u32 offset; | |
1742 | ||
1743 | if (unlikely(tp->urg_data && tp->urg_seq == tp->copied_seq)) { | |
1744 | if (copied) | |
1745 | break; | |
1746 | if (signal_pending(current)) { | |
1747 | copied = timeo ? sock_intr_errno(timeo) : | |
1748 | -EAGAIN; | |
1749 | break; | |
1750 | } | |
1751 | } | |
1752 | ||
1753 | skb = skb_peek(&sk->sk_receive_queue); | |
1754 | if (skb) | |
1755 | goto found_ok_skb; | |
1756 | ||
1757 | if (csk->wr_credits && | |
1758 | skb_queue_len(&csk->txq) && | |
1759 | chtls_push_frames(csk, csk->wr_credits == | |
1760 | csk->wr_max_credits)) | |
1761 | sk->sk_write_space(sk); | |
1762 | ||
1763 | if (copied >= target && !sk->sk_backlog.tail) | |
1764 | break; | |
1765 | ||
1766 | if (copied) { | |
1767 | if (sk->sk_err || sk->sk_state == TCP_CLOSE || | |
1768 | (sk->sk_shutdown & RCV_SHUTDOWN) || | |
1769 | signal_pending(current)) | |
1770 | break; | |
1771 | } else { | |
1772 | if (sock_flag(sk, SOCK_DONE)) | |
1773 | break; | |
1774 | if (sk->sk_err) { | |
1775 | copied = sock_error(sk); | |
1776 | break; | |
1777 | } | |
1778 | if (sk->sk_shutdown & RCV_SHUTDOWN) | |
1779 | break; | |
1780 | if (sk->sk_state == TCP_CLOSE) { | |
1781 | copied = -ENOTCONN; | |
1782 | break; | |
1783 | } | |
1784 | if (!timeo) { | |
1785 | copied = -EAGAIN; | |
1786 | break; | |
1787 | } | |
1788 | if (signal_pending(current)) { | |
1789 | copied = sock_intr_errno(timeo); | |
1790 | break; | |
1791 | } | |
1792 | } | |
1793 | ||
1794 | if (sk->sk_backlog.tail) { | |
1795 | release_sock(sk); | |
1796 | lock_sock(sk); | |
1797 | chtls_cleanup_rbuf(sk, copied); | |
1798 | continue; | |
1799 | } | |
1800 | ||
1801 | if (copied >= target) | |
1802 | break; | |
1803 | chtls_cleanup_rbuf(sk, copied); | |
1804 | sk_wait_data(sk, &timeo, NULL); | |
1805 | continue; | |
1806 | ||
1807 | found_ok_skb: | |
1808 | if (!skb->len) { | |
1809 | chtls_kfree_skb(sk, skb); | |
1810 | if (!copied && !timeo) { | |
1811 | copied = -EAGAIN; | |
1812 | break; | |
1813 | } | |
1814 | ||
1815 | if (copied < target) | |
1816 | continue; | |
1817 | ||
1818 | break; | |
1819 | } | |
1820 | ||
1821 | offset = tp->copied_seq - ULP_SKB_CB(skb)->seq; | |
1822 | avail = skb->len - offset; | |
1823 | if (len < avail) | |
1824 | avail = len; | |
1825 | ||
1826 | if (unlikely(tp->urg_data)) { | |
1827 | u32 urg_offset = tp->urg_seq - tp->copied_seq; | |
1828 | ||
1829 | if (urg_offset < avail) { | |
1830 | if (urg_offset) { | |
1831 | avail = urg_offset; | |
1832 | } else if (!sock_flag(sk, SOCK_URGINLINE)) { | |
1833 | tp->copied_seq++; | |
1834 | offset++; | |
1835 | avail--; | |
1836 | if (!avail) | |
1837 | goto skip_copy; | |
1838 | } | |
1839 | } | |
1840 | } | |
1841 | ||
1842 | if (likely(!(flags & MSG_TRUNC))) { | |
1843 | if (skb_copy_datagram_msg(skb, offset, | |
1844 | msg, avail)) { | |
1845 | if (!copied) { | |
1846 | copied = -EFAULT; | |
1847 | break; | |
1848 | } | |
1849 | } | |
1850 | } | |
1851 | ||
1852 | tp->copied_seq += avail; | |
1853 | copied += avail; | |
1854 | len -= avail; | |
1855 | ||
1856 | skip_copy: | |
1857 | if (tp->urg_data && after(tp->copied_seq, tp->urg_seq)) | |
1858 | tp->urg_data = 0; | |
1859 | ||
1860 | if (avail + offset >= skb->len) { | |
1861 | if (likely(skb)) | |
1862 | chtls_free_skb(sk, skb); | |
1863 | buffers_freed++; | |
1864 | ||
1865 | if (copied >= target && | |
1866 | !skb_peek(&sk->sk_receive_queue)) | |
1867 | break; | |
1868 | } | |
1869 | } while (len > 0); | |
1870 | ||
1871 | if (buffers_freed) | |
1872 | chtls_cleanup_rbuf(sk, copied); | |
1873 | ||
1874 | release_sock(sk); | |
1875 | return copied; | |
1876 | } |