2 * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved.
3 * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved.
4 * Copyright (c) 2016-2017, Lance Chao <lancerchao@fb.com>. All rights reserved.
5 * Copyright (c) 2016, Fridolin Pokorny <fridolin.pokorny@gmail.com>. All rights reserved.
6 * Copyright (c) 2016, Nikos Mavrogiannopoulos <nmav@gnutls.org>. All rights reserved.
7 * Copyright (c) 2018, Covalent IO, Inc. http://covalent.io
9 * This software is available to you under a choice of one of two
10 * licenses. You may choose to be licensed under the terms of the GNU
11 * General Public License (GPL) Version 2, available from the file
12 * COPYING in the main directory of this source tree, or the
13 * OpenIB.org BSD license below:
15 * Redistribution and use in source and binary forms, with or
16 * without modification, are permitted provided that the following
19 * - Redistributions of source code must retain the above
20 * copyright notice, this list of conditions and the following
23 * - Redistributions in binary form must reproduce the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer in the documentation and/or other materials
26 * provided with the distribution.
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38 #include <linux/sched/signal.h>
39 #include <linux/module.h>
40 #include <crypto/aead.h>
42 #include <net/strparser.h>
45 #define MAX_IV_SIZE TLS_CIPHER_AES_GCM_128_IV_SIZE
47 static int __skb_nsg(struct sk_buff
*skb
, int offset
, int len
,
48 unsigned int recursion_level
)
50 int start
= skb_headlen(skb
);
51 int i
, chunk
= start
- offset
;
52 struct sk_buff
*frag_iter
;
55 if (unlikely(recursion_level
>= 24))
68 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
71 WARN_ON(start
> offset
+ len
);
73 end
= start
+ skb_frag_size(&skb_shinfo(skb
)->frags
[i
]);
87 if (unlikely(skb_has_frag_list(skb
))) {
88 skb_walk_frags(skb
, frag_iter
) {
91 WARN_ON(start
> offset
+ len
);
93 end
= start
+ frag_iter
->len
;
98 ret
= __skb_nsg(frag_iter
, offset
- start
, chunk
,
100 if (unlikely(ret
< 0))
115 /* Return the number of scatterlist elements required to completely map the
116 * skb, or -EMSGSIZE if the recursion depth is exceeded.
118 static int skb_nsg(struct sk_buff
*skb
, int offset
, int len
)
120 return __skb_nsg(skb
, offset
, len
, 0);
123 static void tls_decrypt_done(struct crypto_async_request
*req
, int err
)
125 struct aead_request
*aead_req
= (struct aead_request
*)req
;
126 struct scatterlist
*sgout
= aead_req
->dst
;
127 struct scatterlist
*sgin
= aead_req
->src
;
128 struct tls_sw_context_rx
*ctx
;
129 struct tls_context
*tls_ctx
;
130 struct scatterlist
*sg
;
135 skb
= (struct sk_buff
*)req
->data
;
136 tls_ctx
= tls_get_ctx(skb
->sk
);
137 ctx
= tls_sw_ctx_rx(tls_ctx
);
139 /* Propagate if there was an err */
141 ctx
->async_wait
.err
= err
;
142 tls_err_abort(skb
->sk
, err
);
144 struct strp_msg
*rxm
= strp_msg(skb
);
146 rxm
->offset
+= tls_ctx
->rx
.prepend_size
;
147 rxm
->full_len
-= tls_ctx
->rx
.overhead_size
;
150 /* After using skb->sk to propagate sk through crypto async callback
151 * we need to NULL it again.
156 /* Free the destination pages if skb was not decrypted inplace */
158 /* Skip the first S/G entry as it points to AAD */
159 for_each_sg(sg_next(sgout
), sg
, UINT_MAX
, pages
) {
162 put_page(sg_page(sg
));
168 pending
= atomic_dec_return(&ctx
->decrypt_pending
);
170 if (!pending
&& READ_ONCE(ctx
->async_notify
))
171 complete(&ctx
->async_wait
.completion
);
174 static int tls_do_decryption(struct sock
*sk
,
176 struct scatterlist
*sgin
,
177 struct scatterlist
*sgout
,
180 struct aead_request
*aead_req
,
183 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
184 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
187 aead_request_set_tfm(aead_req
, ctx
->aead_recv
);
188 aead_request_set_ad(aead_req
, TLS_AAD_SPACE_SIZE
);
189 aead_request_set_crypt(aead_req
, sgin
, sgout
,
190 data_len
+ tls_ctx
->rx
.tag_size
,
194 /* Using skb->sk to push sk through to crypto async callback
195 * handler. This allows propagating errors up to the socket
196 * if needed. It _must_ be cleared in the async handler
197 * before kfree_skb is called. We _know_ skb->sk is NULL
198 * because it is a clone from strparser.
201 aead_request_set_callback(aead_req
,
202 CRYPTO_TFM_REQ_MAY_BACKLOG
,
203 tls_decrypt_done
, skb
);
204 atomic_inc(&ctx
->decrypt_pending
);
206 aead_request_set_callback(aead_req
,
207 CRYPTO_TFM_REQ_MAY_BACKLOG
,
208 crypto_req_done
, &ctx
->async_wait
);
211 ret
= crypto_aead_decrypt(aead_req
);
212 if (ret
== -EINPROGRESS
) {
216 ret
= crypto_wait_req(ret
, &ctx
->async_wait
);
220 atomic_dec(&ctx
->decrypt_pending
);
225 static void tls_trim_both_msgs(struct sock
*sk
, int target_size
)
227 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
228 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
229 struct tls_rec
*rec
= ctx
->open_rec
;
231 sk_msg_trim(sk
, &rec
->msg_plaintext
, target_size
);
233 target_size
+= tls_ctx
->tx
.overhead_size
;
234 sk_msg_trim(sk
, &rec
->msg_encrypted
, target_size
);
237 static int tls_alloc_encrypted_msg(struct sock
*sk
, int len
)
239 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
240 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
241 struct tls_rec
*rec
= ctx
->open_rec
;
242 struct sk_msg
*msg_en
= &rec
->msg_encrypted
;
244 return sk_msg_alloc(sk
, msg_en
, len
, 0);
247 static int tls_clone_plaintext_msg(struct sock
*sk
, int required
)
249 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
250 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
251 struct tls_rec
*rec
= ctx
->open_rec
;
252 struct sk_msg
*msg_pl
= &rec
->msg_plaintext
;
253 struct sk_msg
*msg_en
= &rec
->msg_encrypted
;
256 /* We add page references worth len bytes from encrypted sg
257 * at the end of plaintext sg. It is guaranteed that msg_en
258 * has enough required room (ensured by caller).
260 len
= required
- msg_pl
->sg
.size
;
262 /* Skip initial bytes in msg_en's data to be able to use
263 * same offset of both plain and encrypted data.
265 skip
= tls_ctx
->tx
.prepend_size
+ msg_pl
->sg
.size
;
267 return sk_msg_clone(sk
, msg_pl
, msg_en
, skip
, len
);
270 static struct tls_rec
*tls_get_rec(struct sock
*sk
)
272 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
273 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
274 struct sk_msg
*msg_pl
, *msg_en
;
278 mem_size
= sizeof(struct tls_rec
) + crypto_aead_reqsize(ctx
->aead_send
);
280 rec
= kzalloc(mem_size
, sk
->sk_allocation
);
284 msg_pl
= &rec
->msg_plaintext
;
285 msg_en
= &rec
->msg_encrypted
;
290 sg_init_table(rec
->sg_aead_in
, 2);
291 sg_set_buf(&rec
->sg_aead_in
[0], rec
->aad_space
,
292 sizeof(rec
->aad_space
));
293 sg_unmark_end(&rec
->sg_aead_in
[1]);
295 sg_init_table(rec
->sg_aead_out
, 2);
296 sg_set_buf(&rec
->sg_aead_out
[0], rec
->aad_space
,
297 sizeof(rec
->aad_space
));
298 sg_unmark_end(&rec
->sg_aead_out
[1]);
303 static void tls_free_rec(struct sock
*sk
, struct tls_rec
*rec
)
305 sk_msg_free(sk
, &rec
->msg_encrypted
);
306 sk_msg_free(sk
, &rec
->msg_plaintext
);
310 static void tls_free_open_rec(struct sock
*sk
)
312 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
313 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
314 struct tls_rec
*rec
= ctx
->open_rec
;
317 tls_free_rec(sk
, rec
);
318 ctx
->open_rec
= NULL
;
322 int tls_tx_records(struct sock
*sk
, int flags
)
324 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
325 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
326 struct tls_rec
*rec
, *tmp
;
327 struct sk_msg
*msg_en
;
328 int tx_flags
, rc
= 0;
330 if (tls_is_partially_sent_record(tls_ctx
)) {
331 rec
= list_first_entry(&ctx
->tx_list
,
332 struct tls_rec
, list
);
335 tx_flags
= rec
->tx_flags
;
339 rc
= tls_push_partial_record(sk
, tls_ctx
, tx_flags
);
343 /* Full record has been transmitted.
344 * Remove the head of tx_list
346 list_del(&rec
->list
);
347 sk_msg_free(sk
, &rec
->msg_plaintext
);
351 /* Tx all ready records */
352 list_for_each_entry_safe(rec
, tmp
, &ctx
->tx_list
, list
) {
353 if (READ_ONCE(rec
->tx_ready
)) {
355 tx_flags
= rec
->tx_flags
;
359 msg_en
= &rec
->msg_encrypted
;
360 rc
= tls_push_sg(sk
, tls_ctx
,
361 &msg_en
->sg
.data
[msg_en
->sg
.curr
],
366 list_del(&rec
->list
);
367 sk_msg_free(sk
, &rec
->msg_plaintext
);
375 if (rc
< 0 && rc
!= -EAGAIN
)
376 tls_err_abort(sk
, EBADMSG
);
381 static void tls_encrypt_done(struct crypto_async_request
*req
, int err
)
383 struct aead_request
*aead_req
= (struct aead_request
*)req
;
384 struct sock
*sk
= req
->data
;
385 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
386 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
387 struct scatterlist
*sge
;
388 struct sk_msg
*msg_en
;
393 rec
= container_of(aead_req
, struct tls_rec
, aead_req
);
394 msg_en
= &rec
->msg_encrypted
;
396 sge
= sk_msg_elem(msg_en
, msg_en
->sg
.curr
);
397 sge
->offset
-= tls_ctx
->tx
.prepend_size
;
398 sge
->length
+= tls_ctx
->tx
.prepend_size
;
400 /* Check if error is previously set on socket */
401 if (err
|| sk
->sk_err
) {
404 /* If err is already set on socket, return the same code */
406 ctx
->async_wait
.err
= sk
->sk_err
;
408 ctx
->async_wait
.err
= err
;
409 tls_err_abort(sk
, err
);
414 struct tls_rec
*first_rec
;
416 /* Mark the record as ready for transmission */
417 smp_store_mb(rec
->tx_ready
, true);
419 /* If received record is at head of tx_list, schedule tx */
420 first_rec
= list_first_entry(&ctx
->tx_list
,
421 struct tls_rec
, list
);
422 if (rec
== first_rec
)
426 pending
= atomic_dec_return(&ctx
->encrypt_pending
);
428 if (!pending
&& READ_ONCE(ctx
->async_notify
))
429 complete(&ctx
->async_wait
.completion
);
434 /* Schedule the transmission */
435 if (!test_and_set_bit(BIT_TX_SCHEDULED
, &ctx
->tx_bitmask
))
436 schedule_delayed_work(&ctx
->tx_work
.work
, 1);
439 static int tls_do_encryption(struct sock
*sk
,
440 struct tls_context
*tls_ctx
,
441 struct tls_sw_context_tx
*ctx
,
442 struct aead_request
*aead_req
,
443 size_t data_len
, u32 start
)
445 struct tls_rec
*rec
= ctx
->open_rec
;
446 struct sk_msg
*msg_en
= &rec
->msg_encrypted
;
447 struct scatterlist
*sge
= sk_msg_elem(msg_en
, start
);
450 sge
->offset
+= tls_ctx
->tx
.prepend_size
;
451 sge
->length
-= tls_ctx
->tx
.prepend_size
;
453 msg_en
->sg
.curr
= start
;
455 aead_request_set_tfm(aead_req
, ctx
->aead_send
);
456 aead_request_set_ad(aead_req
, TLS_AAD_SPACE_SIZE
);
457 aead_request_set_crypt(aead_req
, rec
->sg_aead_in
,
459 data_len
, tls_ctx
->tx
.iv
);
461 aead_request_set_callback(aead_req
, CRYPTO_TFM_REQ_MAY_BACKLOG
,
462 tls_encrypt_done
, sk
);
464 /* Add the record in tx_list */
465 list_add_tail((struct list_head
*)&rec
->list
, &ctx
->tx_list
);
466 atomic_inc(&ctx
->encrypt_pending
);
468 rc
= crypto_aead_encrypt(aead_req
);
469 if (!rc
|| rc
!= -EINPROGRESS
) {
470 atomic_dec(&ctx
->encrypt_pending
);
471 sge
->offset
-= tls_ctx
->tx
.prepend_size
;
472 sge
->length
+= tls_ctx
->tx
.prepend_size
;
476 WRITE_ONCE(rec
->tx_ready
, true);
477 } else if (rc
!= -EINPROGRESS
) {
478 list_del(&rec
->list
);
482 /* Unhook the record from context if encryption is not failure */
483 ctx
->open_rec
= NULL
;
484 tls_advance_record_sn(sk
, &tls_ctx
->tx
);
488 static int tls_split_open_record(struct sock
*sk
, struct tls_rec
*from
,
489 struct tls_rec
**to
, struct sk_msg
*msg_opl
,
490 struct sk_msg
*msg_oen
, u32 split_point
,
491 u32 tx_overhead_size
, u32
*orig_end
)
493 u32 i
, j
, bytes
= 0, apply
= msg_opl
->apply_bytes
;
494 struct scatterlist
*sge
, *osge
, *nsge
;
495 u32 orig_size
= msg_opl
->sg
.size
;
496 struct scatterlist tmp
= { };
497 struct sk_msg
*msg_npl
;
501 new = tls_get_rec(sk
);
504 ret
= sk_msg_alloc(sk
, &new->msg_encrypted
, msg_opl
->sg
.size
+
505 tx_overhead_size
, 0);
507 tls_free_rec(sk
, new);
511 *orig_end
= msg_opl
->sg
.end
;
512 i
= msg_opl
->sg
.start
;
513 sge
= sk_msg_elem(msg_opl
, i
);
514 while (apply
&& sge
->length
) {
515 if (sge
->length
> apply
) {
516 u32 len
= sge
->length
- apply
;
518 get_page(sg_page(sge
));
519 sg_set_page(&tmp
, sg_page(sge
), len
,
520 sge
->offset
+ apply
);
525 apply
-= sge
->length
;
526 bytes
+= sge
->length
;
529 sk_msg_iter_var_next(i
);
530 if (i
== msg_opl
->sg
.end
)
532 sge
= sk_msg_elem(msg_opl
, i
);
536 msg_opl
->sg
.curr
= i
;
537 msg_opl
->sg
.copybreak
= 0;
538 msg_opl
->apply_bytes
= 0;
539 msg_opl
->sg
.size
= bytes
;
541 msg_npl
= &new->msg_plaintext
;
542 msg_npl
->apply_bytes
= apply
;
543 msg_npl
->sg
.size
= orig_size
- bytes
;
545 j
= msg_npl
->sg
.start
;
546 nsge
= sk_msg_elem(msg_npl
, j
);
548 memcpy(nsge
, &tmp
, sizeof(*nsge
));
549 sk_msg_iter_var_next(j
);
550 nsge
= sk_msg_elem(msg_npl
, j
);
553 osge
= sk_msg_elem(msg_opl
, i
);
554 while (osge
->length
) {
555 memcpy(nsge
, osge
, sizeof(*nsge
));
557 sk_msg_iter_var_next(i
);
558 sk_msg_iter_var_next(j
);
561 osge
= sk_msg_elem(msg_opl
, i
);
562 nsge
= sk_msg_elem(msg_npl
, j
);
566 msg_npl
->sg
.curr
= j
;
567 msg_npl
->sg
.copybreak
= 0;
573 static void tls_merge_open_record(struct sock
*sk
, struct tls_rec
*to
,
574 struct tls_rec
*from
, u32 orig_end
)
576 struct sk_msg
*msg_npl
= &from
->msg_plaintext
;
577 struct sk_msg
*msg_opl
= &to
->msg_plaintext
;
578 struct scatterlist
*osge
, *nsge
;
582 sk_msg_iter_var_prev(i
);
583 j
= msg_npl
->sg
.start
;
585 osge
= sk_msg_elem(msg_opl
, i
);
586 nsge
= sk_msg_elem(msg_npl
, j
);
588 if (sg_page(osge
) == sg_page(nsge
) &&
589 osge
->offset
+ osge
->length
== nsge
->offset
) {
590 osge
->length
+= nsge
->length
;
591 put_page(sg_page(nsge
));
594 msg_opl
->sg
.end
= orig_end
;
595 msg_opl
->sg
.curr
= orig_end
;
596 msg_opl
->sg
.copybreak
= 0;
597 msg_opl
->apply_bytes
= msg_opl
->sg
.size
+ msg_npl
->sg
.size
;
598 msg_opl
->sg
.size
+= msg_npl
->sg
.size
;
600 sk_msg_free(sk
, &to
->msg_encrypted
);
601 sk_msg_xfer_full(&to
->msg_encrypted
, &from
->msg_encrypted
);
606 static int tls_push_record(struct sock
*sk
, int flags
,
607 unsigned char record_type
)
609 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
610 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
611 struct tls_rec
*rec
= ctx
->open_rec
, *tmp
= NULL
;
612 u32 i
, split_point
, uninitialized_var(orig_end
);
613 struct sk_msg
*msg_pl
, *msg_en
;
614 struct aead_request
*req
;
621 msg_pl
= &rec
->msg_plaintext
;
622 msg_en
= &rec
->msg_encrypted
;
624 split_point
= msg_pl
->apply_bytes
;
625 split
= split_point
&& split_point
< msg_pl
->sg
.size
;
627 rc
= tls_split_open_record(sk
, rec
, &tmp
, msg_pl
, msg_en
,
628 split_point
, tls_ctx
->tx
.overhead_size
,
632 sk_msg_trim(sk
, msg_en
, msg_pl
->sg
.size
+
633 tls_ctx
->tx
.overhead_size
);
636 rec
->tx_flags
= flags
;
637 req
= &rec
->aead_req
;
640 sk_msg_iter_var_prev(i
);
641 sg_mark_end(sk_msg_elem(msg_pl
, i
));
643 i
= msg_pl
->sg
.start
;
644 sg_chain(rec
->sg_aead_in
, 2, rec
->inplace_crypto
?
645 &msg_en
->sg
.data
[i
] : &msg_pl
->sg
.data
[i
]);
648 sk_msg_iter_var_prev(i
);
649 sg_mark_end(sk_msg_elem(msg_en
, i
));
651 i
= msg_en
->sg
.start
;
652 sg_chain(rec
->sg_aead_out
, 2, &msg_en
->sg
.data
[i
]);
654 tls_make_aad(rec
->aad_space
, msg_pl
->sg
.size
,
655 tls_ctx
->tx
.rec_seq
, tls_ctx
->tx
.rec_seq_size
,
658 tls_fill_prepend(tls_ctx
,
659 page_address(sg_page(&msg_en
->sg
.data
[i
])) +
660 msg_en
->sg
.data
[i
].offset
, msg_pl
->sg
.size
,
663 tls_ctx
->pending_open_record_frags
= false;
665 rc
= tls_do_encryption(sk
, tls_ctx
, ctx
, req
, msg_pl
->sg
.size
, i
);
667 if (rc
!= -EINPROGRESS
) {
668 tls_err_abort(sk
, EBADMSG
);
670 tls_ctx
->pending_open_record_frags
= true;
671 tls_merge_open_record(sk
, rec
, tmp
, orig_end
);
676 msg_pl
= &tmp
->msg_plaintext
;
677 msg_en
= &tmp
->msg_encrypted
;
678 sk_msg_trim(sk
, msg_en
, msg_pl
->sg
.size
+
679 tls_ctx
->tx
.overhead_size
);
680 tls_ctx
->pending_open_record_frags
= true;
684 return tls_tx_records(sk
, flags
);
687 static int bpf_exec_tx_verdict(struct sk_msg
*msg
, struct sock
*sk
,
688 bool full_record
, u8 record_type
,
689 size_t *copied
, int flags
)
691 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
692 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
693 struct sk_msg msg_redir
= { };
694 struct sk_psock
*psock
;
695 struct sock
*sk_redir
;
701 policy
= !(flags
& MSG_SENDPAGE_NOPOLICY
);
702 psock
= sk_psock_get(sk
);
703 if (!psock
|| !policy
)
704 return tls_push_record(sk
, flags
, record_type
);
706 enospc
= sk_msg_full(msg
);
707 if (psock
->eval
== __SK_NONE
) {
708 delta
= msg
->sg
.size
;
709 psock
->eval
= sk_psock_msg_verdict(sk
, psock
, msg
);
710 if (delta
< msg
->sg
.size
)
711 delta
-= msg
->sg
.size
;
715 if (msg
->cork_bytes
&& msg
->cork_bytes
> msg
->sg
.size
&&
716 !enospc
&& !full_record
) {
722 if (msg
->apply_bytes
&& msg
->apply_bytes
< send
)
723 send
= msg
->apply_bytes
;
725 switch (psock
->eval
) {
727 err
= tls_push_record(sk
, flags
, record_type
);
729 *copied
-= sk_msg_free(sk
, msg
);
730 tls_free_open_rec(sk
);
735 sk_redir
= psock
->sk_redir
;
736 memcpy(&msg_redir
, msg
, sizeof(*msg
));
737 if (msg
->apply_bytes
< send
)
738 msg
->apply_bytes
= 0;
740 msg
->apply_bytes
-= send
;
741 sk_msg_return_zero(sk
, msg
, send
);
742 msg
->sg
.size
-= send
;
744 err
= tcp_bpf_sendmsg_redir(sk_redir
, &msg_redir
, send
, flags
);
747 *copied
-= sk_msg_free_nocharge(sk
, &msg_redir
);
750 if (msg
->sg
.size
== 0)
751 tls_free_open_rec(sk
);
755 sk_msg_free_partial(sk
, msg
, send
);
756 if (msg
->apply_bytes
< send
)
757 msg
->apply_bytes
= 0;
759 msg
->apply_bytes
-= send
;
760 if (msg
->sg
.size
== 0)
761 tls_free_open_rec(sk
);
762 *copied
-= (send
+ delta
);
767 bool reset_eval
= !ctx
->open_rec
;
771 msg
= &rec
->msg_plaintext
;
772 if (!msg
->apply_bytes
)
776 psock
->eval
= __SK_NONE
;
777 if (psock
->sk_redir
) {
778 sock_put(psock
->sk_redir
);
779 psock
->sk_redir
= NULL
;
786 sk_psock_put(sk
, psock
);
790 static int tls_sw_push_pending_record(struct sock
*sk
, int flags
)
792 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
793 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
794 struct tls_rec
*rec
= ctx
->open_rec
;
795 struct sk_msg
*msg_pl
;
801 msg_pl
= &rec
->msg_plaintext
;
802 copied
= msg_pl
->sg
.size
;
806 return bpf_exec_tx_verdict(msg_pl
, sk
, true, TLS_RECORD_TYPE_DATA
,
810 int tls_sw_sendmsg(struct sock
*sk
, struct msghdr
*msg
, size_t size
)
812 long timeo
= sock_sndtimeo(sk
, msg
->msg_flags
& MSG_DONTWAIT
);
813 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
814 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
815 struct crypto_tfm
*tfm
= crypto_aead_tfm(ctx
->aead_send
);
816 bool async_capable
= tfm
->__crt_alg
->cra_flags
& CRYPTO_ALG_ASYNC
;
817 unsigned char record_type
= TLS_RECORD_TYPE_DATA
;
818 bool is_kvec
= iov_iter_is_kvec(&msg
->msg_iter
);
819 bool eor
= !(msg
->msg_flags
& MSG_MORE
);
820 size_t try_to_copy
, copied
= 0;
821 struct sk_msg
*msg_pl
, *msg_en
;
831 if (msg
->msg_flags
& ~(MSG_MORE
| MSG_DONTWAIT
| MSG_NOSIGNAL
))
836 /* Wait till there is any pending write on socket */
837 if (unlikely(sk
->sk_write_pending
)) {
838 ret
= wait_on_pending_writer(sk
, &timeo
);
843 if (unlikely(msg
->msg_controllen
)) {
844 ret
= tls_proccess_cmsg(sk
, msg
, &record_type
);
846 if (ret
== -EINPROGRESS
)
848 else if (ret
!= -EAGAIN
)
853 while (msg_data_left(msg
)) {
862 rec
= ctx
->open_rec
= tls_get_rec(sk
);
868 msg_pl
= &rec
->msg_plaintext
;
869 msg_en
= &rec
->msg_encrypted
;
871 orig_size
= msg_pl
->sg
.size
;
873 try_to_copy
= msg_data_left(msg
);
874 record_room
= TLS_MAX_PAYLOAD_SIZE
- msg_pl
->sg
.size
;
875 if (try_to_copy
>= record_room
) {
876 try_to_copy
= record_room
;
880 required_size
= msg_pl
->sg
.size
+ try_to_copy
+
881 tls_ctx
->tx
.overhead_size
;
883 if (!sk_stream_memory_free(sk
))
884 goto wait_for_sndbuf
;
887 ret
= tls_alloc_encrypted_msg(sk
, required_size
);
890 goto wait_for_memory
;
892 /* Adjust try_to_copy according to the amount that was
893 * actually allocated. The difference is due
894 * to max sg elements limit
896 try_to_copy
-= required_size
- msg_en
->sg
.size
;
900 if (!is_kvec
&& (full_record
|| eor
) && !async_capable
) {
901 u32 first
= msg_pl
->sg
.end
;
903 ret
= sk_msg_zerocopy_from_iter(sk
, &msg
->msg_iter
,
904 msg_pl
, try_to_copy
);
906 goto fallback_to_reg_send
;
908 rec
->inplace_crypto
= 0;
911 copied
+= try_to_copy
;
913 sk_msg_sg_copy_set(msg_pl
, first
);
914 ret
= bpf_exec_tx_verdict(msg_pl
, sk
, full_record
,
915 record_type
, &copied
,
918 if (ret
== -EINPROGRESS
)
920 else if (ret
== -ENOMEM
)
921 goto wait_for_memory
;
922 else if (ret
== -ENOSPC
)
924 else if (ret
!= -EAGAIN
)
929 copied
-= try_to_copy
;
930 sk_msg_sg_copy_clear(msg_pl
, first
);
931 iov_iter_revert(&msg
->msg_iter
,
932 msg_pl
->sg
.size
- orig_size
);
933 fallback_to_reg_send
:
934 sk_msg_trim(sk
, msg_pl
, orig_size
);
937 required_size
= msg_pl
->sg
.size
+ try_to_copy
;
939 ret
= tls_clone_plaintext_msg(sk
, required_size
);
944 /* Adjust try_to_copy according to the amount that was
945 * actually allocated. The difference is due
946 * to max sg elements limit
948 try_to_copy
-= required_size
- msg_pl
->sg
.size
;
950 sk_msg_trim(sk
, msg_en
, msg_pl
->sg
.size
+
951 tls_ctx
->tx
.overhead_size
);
955 ret
= sk_msg_memcopy_from_iter(sk
, &msg
->msg_iter
,
956 msg_pl
, try_to_copy
);
961 /* Open records defined only if successfully copied, otherwise
962 * we would trim the sg but not reset the open record frags.
964 tls_ctx
->pending_open_record_frags
= true;
965 copied
+= try_to_copy
;
966 if (full_record
|| eor
) {
967 ret
= bpf_exec_tx_verdict(msg_pl
, sk
, full_record
,
968 record_type
, &copied
,
971 if (ret
== -EINPROGRESS
)
973 else if (ret
== -ENOMEM
)
974 goto wait_for_memory
;
975 else if (ret
!= -EAGAIN
) {
986 set_bit(SOCK_NOSPACE
, &sk
->sk_socket
->flags
);
988 ret
= sk_stream_wait_memory(sk
, &timeo
);
991 tls_trim_both_msgs(sk
, orig_size
);
995 if (msg_en
->sg
.size
< required_size
)
996 goto alloc_encrypted
;
1001 } else if (num_zc
) {
1002 /* Wait for pending encryptions to get completed */
1003 smp_store_mb(ctx
->async_notify
, true);
1005 if (atomic_read(&ctx
->encrypt_pending
))
1006 crypto_wait_req(-EINPROGRESS
, &ctx
->async_wait
);
1008 reinit_completion(&ctx
->async_wait
.completion
);
1010 WRITE_ONCE(ctx
->async_notify
, false);
1012 if (ctx
->async_wait
.err
) {
1013 ret
= ctx
->async_wait
.err
;
1018 /* Transmit if any encryptions have completed */
1019 if (test_and_clear_bit(BIT_TX_SCHEDULED
, &ctx
->tx_bitmask
)) {
1020 cancel_delayed_work(&ctx
->tx_work
.work
);
1021 tls_tx_records(sk
, msg
->msg_flags
);
1025 ret
= sk_stream_error(sk
, msg
->msg_flags
, ret
);
1028 return copied
? copied
: ret
;
1031 static int tls_sw_do_sendpage(struct sock
*sk
, struct page
*page
,
1032 int offset
, size_t size
, int flags
)
1034 long timeo
= sock_sndtimeo(sk
, flags
& MSG_DONTWAIT
);
1035 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1036 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
1037 unsigned char record_type
= TLS_RECORD_TYPE_DATA
;
1038 struct sk_msg
*msg_pl
;
1039 struct tls_rec
*rec
;
1047 eor
= !(flags
& (MSG_MORE
| MSG_SENDPAGE_NOTLAST
));
1048 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE
, sk
);
1050 /* Wait till there is any pending write on socket */
1051 if (unlikely(sk
->sk_write_pending
)) {
1052 ret
= wait_on_pending_writer(sk
, &timeo
);
1057 /* Call the sk_stream functions to manage the sndbuf mem. */
1059 size_t copy
, required_size
;
1067 rec
= ctx
->open_rec
;
1069 rec
= ctx
->open_rec
= tls_get_rec(sk
);
1075 msg_pl
= &rec
->msg_plaintext
;
1077 full_record
= false;
1078 record_room
= TLS_MAX_PAYLOAD_SIZE
- msg_pl
->sg
.size
;
1081 if (copy
>= record_room
) {
1086 required_size
= msg_pl
->sg
.size
+ copy
+
1087 tls_ctx
->tx
.overhead_size
;
1089 if (!sk_stream_memory_free(sk
))
1090 goto wait_for_sndbuf
;
1092 ret
= tls_alloc_encrypted_msg(sk
, required_size
);
1095 goto wait_for_memory
;
1097 /* Adjust copy according to the amount that was
1098 * actually allocated. The difference is due
1099 * to max sg elements limit
1101 copy
-= required_size
- msg_pl
->sg
.size
;
1105 sk_msg_page_add(msg_pl
, page
, copy
, offset
);
1106 sk_mem_charge(sk
, copy
);
1112 tls_ctx
->pending_open_record_frags
= true;
1113 if (full_record
|| eor
|| sk_msg_full(msg_pl
)) {
1114 rec
->inplace_crypto
= 0;
1115 ret
= bpf_exec_tx_verdict(msg_pl
, sk
, full_record
,
1116 record_type
, &copied
, flags
);
1118 if (ret
== -EINPROGRESS
)
1120 else if (ret
== -ENOMEM
)
1121 goto wait_for_memory
;
1122 else if (ret
!= -EAGAIN
) {
1131 set_bit(SOCK_NOSPACE
, &sk
->sk_socket
->flags
);
1133 ret
= sk_stream_wait_memory(sk
, &timeo
);
1135 tls_trim_both_msgs(sk
, msg_pl
->sg
.size
);
1143 /* Transmit if any encryptions have completed */
1144 if (test_and_clear_bit(BIT_TX_SCHEDULED
, &ctx
->tx_bitmask
)) {
1145 cancel_delayed_work(&ctx
->tx_work
.work
);
1146 tls_tx_records(sk
, flags
);
1150 ret
= sk_stream_error(sk
, flags
, ret
);
1151 return copied
? copied
: ret
;
1154 int tls_sw_sendpage(struct sock
*sk
, struct page
*page
,
1155 int offset
, size_t size
, int flags
)
1159 if (flags
& ~(MSG_MORE
| MSG_DONTWAIT
| MSG_NOSIGNAL
|
1160 MSG_SENDPAGE_NOTLAST
| MSG_SENDPAGE_NOPOLICY
))
1164 ret
= tls_sw_do_sendpage(sk
, page
, offset
, size
, flags
);
1169 static struct sk_buff
*tls_wait_data(struct sock
*sk
, struct sk_psock
*psock
,
1170 int flags
, long timeo
, int *err
)
1172 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1173 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1174 struct sk_buff
*skb
;
1175 DEFINE_WAIT_FUNC(wait
, woken_wake_function
);
1177 while (!(skb
= ctx
->recv_pkt
) && sk_psock_queue_empty(psock
)) {
1179 *err
= sock_error(sk
);
1183 if (sk
->sk_shutdown
& RCV_SHUTDOWN
)
1186 if (sock_flag(sk
, SOCK_DONE
))
1189 if ((flags
& MSG_DONTWAIT
) || !timeo
) {
1194 add_wait_queue(sk_sleep(sk
), &wait
);
1195 sk_set_bit(SOCKWQ_ASYNC_WAITDATA
, sk
);
1196 sk_wait_event(sk
, &timeo
,
1197 ctx
->recv_pkt
!= skb
||
1198 !sk_psock_queue_empty(psock
),
1200 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA
, sk
);
1201 remove_wait_queue(sk_sleep(sk
), &wait
);
1203 /* Handle signals */
1204 if (signal_pending(current
)) {
1205 *err
= sock_intr_errno(timeo
);
1213 static int tls_setup_from_iter(struct sock
*sk
, struct iov_iter
*from
,
1214 int length
, int *pages_used
,
1215 unsigned int *size_used
,
1216 struct scatterlist
*to
,
1219 int rc
= 0, i
= 0, num_elem
= *pages_used
, maxpages
;
1220 struct page
*pages
[MAX_SKB_FRAGS
];
1221 unsigned int size
= *size_used
;
1222 ssize_t copied
, use
;
1225 while (length
> 0) {
1227 maxpages
= to_max_pages
- num_elem
;
1228 if (maxpages
== 0) {
1232 copied
= iov_iter_get_pages(from
, pages
,
1240 iov_iter_advance(from
, copied
);
1245 use
= min_t(int, copied
, PAGE_SIZE
- offset
);
1247 sg_set_page(&to
[num_elem
],
1248 pages
[i
], use
, offset
);
1249 sg_unmark_end(&to
[num_elem
]);
1250 /* We do not uncharge memory from this API */
1259 /* Mark the end in the last sg entry if newly added */
1260 if (num_elem
> *pages_used
)
1261 sg_mark_end(&to
[num_elem
- 1]);
1264 iov_iter_revert(from
, size
- *size_used
);
1266 *pages_used
= num_elem
;
1271 /* This function decrypts the input skb into either out_iov or in out_sg
1272 * or in skb buffers itself. The input parameter 'zc' indicates if
1273 * zero-copy mode needs to be tried or not. With zero-copy mode, either
1274 * out_iov or out_sg must be non-NULL. In case both out_iov and out_sg are
1275 * NULL, then the decryption happens inside skb buffers itself, i.e.
1276 * zero-copy gets disabled and 'zc' is updated.
1279 static int decrypt_internal(struct sock
*sk
, struct sk_buff
*skb
,
1280 struct iov_iter
*out_iov
,
1281 struct scatterlist
*out_sg
,
1282 int *chunk
, bool *zc
, bool async
)
1284 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1285 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1286 struct strp_msg
*rxm
= strp_msg(skb
);
1287 int n_sgin
, n_sgout
, nsg
, mem_size
, aead_size
, err
, pages
= 0;
1288 struct aead_request
*aead_req
;
1289 struct sk_buff
*unused
;
1290 u8
*aad
, *iv
, *mem
= NULL
;
1291 struct scatterlist
*sgin
= NULL
;
1292 struct scatterlist
*sgout
= NULL
;
1293 const int data_len
= rxm
->full_len
- tls_ctx
->rx
.overhead_size
;
1295 if (*zc
&& (out_iov
|| out_sg
)) {
1297 n_sgout
= iov_iter_npages(out_iov
, INT_MAX
) + 1;
1299 n_sgout
= sg_nents(out_sg
);
1300 n_sgin
= skb_nsg(skb
, rxm
->offset
+ tls_ctx
->rx
.prepend_size
,
1301 rxm
->full_len
- tls_ctx
->rx
.prepend_size
);
1305 n_sgin
= skb_cow_data(skb
, 0, &unused
);
1311 /* Increment to accommodate AAD */
1312 n_sgin
= n_sgin
+ 1;
1314 nsg
= n_sgin
+ n_sgout
;
1316 aead_size
= sizeof(*aead_req
) + crypto_aead_reqsize(ctx
->aead_recv
);
1317 mem_size
= aead_size
+ (nsg
* sizeof(struct scatterlist
));
1318 mem_size
= mem_size
+ TLS_AAD_SPACE_SIZE
;
1319 mem_size
= mem_size
+ crypto_aead_ivsize(ctx
->aead_recv
);
1321 /* Allocate a single block of memory which contains
1322 * aead_req || sgin[] || sgout[] || aad || iv.
1323 * This order achieves correct alignment for aead_req, sgin, sgout.
1325 mem
= kmalloc(mem_size
, sk
->sk_allocation
);
1329 /* Segment the allocated memory */
1330 aead_req
= (struct aead_request
*)mem
;
1331 sgin
= (struct scatterlist
*)(mem
+ aead_size
);
1332 sgout
= sgin
+ n_sgin
;
1333 aad
= (u8
*)(sgout
+ n_sgout
);
1334 iv
= aad
+ TLS_AAD_SPACE_SIZE
;
1337 err
= skb_copy_bits(skb
, rxm
->offset
+ TLS_HEADER_SIZE
,
1338 iv
+ TLS_CIPHER_AES_GCM_128_SALT_SIZE
,
1339 tls_ctx
->rx
.iv_size
);
1344 memcpy(iv
, tls_ctx
->rx
.iv
, TLS_CIPHER_AES_GCM_128_SALT_SIZE
);
1347 tls_make_aad(aad
, rxm
->full_len
- tls_ctx
->rx
.overhead_size
,
1348 tls_ctx
->rx
.rec_seq
, tls_ctx
->rx
.rec_seq_size
,
1352 sg_init_table(sgin
, n_sgin
);
1353 sg_set_buf(&sgin
[0], aad
, TLS_AAD_SPACE_SIZE
);
1354 err
= skb_to_sgvec(skb
, &sgin
[1],
1355 rxm
->offset
+ tls_ctx
->rx
.prepend_size
,
1356 rxm
->full_len
- tls_ctx
->rx
.prepend_size
);
1364 sg_init_table(sgout
, n_sgout
);
1365 sg_set_buf(&sgout
[0], aad
, TLS_AAD_SPACE_SIZE
);
1368 err
= tls_setup_from_iter(sk
, out_iov
, data_len
,
1369 &pages
, chunk
, &sgout
[1],
1372 goto fallback_to_reg_recv
;
1373 } else if (out_sg
) {
1374 memcpy(sgout
, out_sg
, n_sgout
* sizeof(*sgout
));
1376 goto fallback_to_reg_recv
;
1379 fallback_to_reg_recv
:
1386 /* Prepare and submit AEAD request */
1387 err
= tls_do_decryption(sk
, skb
, sgin
, sgout
, iv
,
1388 data_len
, aead_req
, async
);
1389 if (err
== -EINPROGRESS
)
1392 /* Release the pages in case iov was mapped to pages */
1393 for (; pages
> 0; pages
--)
1394 put_page(sg_page(&sgout
[pages
]));
1400 static int decrypt_skb_update(struct sock
*sk
, struct sk_buff
*skb
,
1401 struct iov_iter
*dest
, int *chunk
, bool *zc
,
1404 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1405 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1406 struct strp_msg
*rxm
= strp_msg(skb
);
1409 #ifdef CONFIG_TLS_DEVICE
1410 err
= tls_device_decrypted(sk
, skb
);
1414 if (!ctx
->decrypted
) {
1415 err
= decrypt_internal(sk
, skb
, dest
, NULL
, chunk
, zc
, async
);
1417 if (err
== -EINPROGRESS
)
1418 tls_advance_record_sn(sk
, &tls_ctx
->rx
);
1426 rxm
->offset
+= tls_ctx
->rx
.prepend_size
;
1427 rxm
->full_len
-= tls_ctx
->rx
.overhead_size
;
1428 tls_advance_record_sn(sk
, &tls_ctx
->rx
);
1429 ctx
->decrypted
= true;
1430 ctx
->saved_data_ready(sk
);
1435 int decrypt_skb(struct sock
*sk
, struct sk_buff
*skb
,
1436 struct scatterlist
*sgout
)
1441 return decrypt_internal(sk
, skb
, NULL
, sgout
, &chunk
, &zc
, false);
1444 static bool tls_sw_advance_skb(struct sock
*sk
, struct sk_buff
*skb
,
1447 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1448 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1451 struct strp_msg
*rxm
= strp_msg(skb
);
1453 if (len
< rxm
->full_len
) {
1455 rxm
->full_len
-= len
;
1461 /* Finished with message */
1462 ctx
->recv_pkt
= NULL
;
1463 __strp_unpause(&ctx
->strp
);
1468 /* This function traverses the rx_list in tls receive context to copies the
1469 * decrypted data records into the buffer provided by caller zero copy is not
1470 * true. Further, the records are removed from the rx_list if it is not a peek
1471 * case and the record has been consumed completely.
1473 static int process_rx_list(struct tls_sw_context_rx
*ctx
,
1480 struct sk_buff
*skb
= skb_peek(&ctx
->rx_list
);
1483 while (skip
&& skb
) {
1484 struct strp_msg
*rxm
= strp_msg(skb
);
1486 if (skip
< rxm
->full_len
)
1489 skip
= skip
- rxm
->full_len
;
1490 skb
= skb_peek_next(skb
, &ctx
->rx_list
);
1493 while (len
&& skb
) {
1494 struct sk_buff
*next_skb
;
1495 struct strp_msg
*rxm
= strp_msg(skb
);
1496 int chunk
= min_t(unsigned int, rxm
->full_len
- skip
, len
);
1498 if (!zc
|| (rxm
->full_len
- skip
) > len
) {
1499 int err
= skb_copy_datagram_msg(skb
, rxm
->offset
+ skip
,
1506 copied
= copied
+ chunk
;
1508 /* Consume the data from record if it is non-peek case*/
1510 rxm
->offset
= rxm
->offset
+ chunk
;
1511 rxm
->full_len
= rxm
->full_len
- chunk
;
1513 /* Return if there is unconsumed data in the record */
1514 if (rxm
->full_len
- skip
)
1518 /* The remaining skip-bytes must lie in 1st record in rx_list.
1519 * So from the 2nd record, 'skip' should be 0.
1524 msg
->msg_flags
|= MSG_EOR
;
1526 next_skb
= skb_peek_next(skb
, &ctx
->rx_list
);
1529 skb_unlink(skb
, &ctx
->rx_list
);
1539 int tls_sw_recvmsg(struct sock
*sk
,
1546 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1547 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1548 struct sk_psock
*psock
;
1549 unsigned char control
= 0;
1550 ssize_t decrypted
= 0;
1551 struct strp_msg
*rxm
;
1552 struct sk_buff
*skb
;
1555 int target
, err
= 0;
1557 bool is_kvec
= iov_iter_is_kvec(&msg
->msg_iter
);
1558 bool is_peek
= flags
& MSG_PEEK
;
1563 if (unlikely(flags
& MSG_ERRQUEUE
))
1564 return sock_recv_errqueue(sk
, msg
, len
, SOL_IP
, IP_RECVERR
);
1566 psock
= sk_psock_get(sk
);
1569 /* Process pending decrypted records. It must be non-zero-copy */
1570 err
= process_rx_list(ctx
, msg
, 0, len
, false, is_peek
);
1572 tls_err_abort(sk
, err
);
1580 target
= sock_rcvlowat(sk
, flags
& MSG_WAITALL
, len
);
1581 timeo
= sock_rcvtimeo(sk
, flags
& MSG_DONTWAIT
);
1587 bool retain_skb
= false;
1593 skb
= tls_wait_data(sk
, psock
, flags
, timeo
, &err
);
1596 int ret
= __tcp_bpf_recvmsg(sk
, psock
,
1608 rxm
= strp_msg(skb
);
1613 cerr
= put_cmsg(msg
, SOL_TLS
, TLS_GET_RECORD_TYPE
,
1614 sizeof(ctx
->control
), &ctx
->control
);
1616 control
= ctx
->control
;
1617 if (ctx
->control
!= TLS_RECORD_TYPE_DATA
) {
1618 if (cerr
|| msg
->msg_flags
& MSG_CTRUNC
) {
1623 } else if (control
!= ctx
->control
) {
1627 to_decrypt
= rxm
->full_len
- tls_ctx
->rx
.overhead_size
;
1629 if (to_decrypt
<= len
&& !is_kvec
&& !is_peek
)
1632 err
= decrypt_skb_update(sk
, skb
, &msg
->msg_iter
,
1633 &chunk
, &zc
, ctx
->async_capable
);
1634 if (err
< 0 && err
!= -EINPROGRESS
) {
1635 tls_err_abort(sk
, EBADMSG
);
1639 if (err
== -EINPROGRESS
) {
1642 goto pick_next_record
;
1645 if (rxm
->full_len
> len
) {
1649 chunk
= rxm
->full_len
;
1652 err
= skb_copy_datagram_msg(skb
, rxm
->offset
,
1658 rxm
->offset
= rxm
->offset
+ chunk
;
1659 rxm
->full_len
= rxm
->full_len
- chunk
;
1671 /* For async or peek case, queue the current skb */
1672 if (async
|| is_peek
|| retain_skb
) {
1673 skb_queue_tail(&ctx
->rx_list
, skb
);
1677 if (tls_sw_advance_skb(sk
, skb
, chunk
)) {
1678 /* Return full control message to
1679 * userspace before trying to parse
1680 * another message type
1682 msg
->msg_flags
|= MSG_EOR
;
1683 if (ctx
->control
!= TLS_RECORD_TYPE_DATA
)
1689 /* If we have a new message from strparser, continue now. */
1690 if (decrypted
>= target
&& !ctx
->recv_pkt
)
1696 /* Wait for all previously submitted records to be decrypted */
1697 smp_store_mb(ctx
->async_notify
, true);
1698 if (atomic_read(&ctx
->decrypt_pending
)) {
1699 err
= crypto_wait_req(-EINPROGRESS
, &ctx
->async_wait
);
1701 /* one of async decrypt failed */
1702 tls_err_abort(sk
, err
);
1708 reinit_completion(&ctx
->async_wait
.completion
);
1710 WRITE_ONCE(ctx
->async_notify
, false);
1712 /* Drain records from the rx_list & copy if required */
1713 if (is_peek
|| is_kvec
)
1714 err
= process_rx_list(ctx
, msg
, copied
,
1715 decrypted
, false, is_peek
);
1717 err
= process_rx_list(ctx
, msg
, 0,
1718 decrypted
, true, is_peek
);
1720 tls_err_abort(sk
, err
);
1725 WARN_ON(decrypted
!= err
);
1728 copied
+= decrypted
;
1733 sk_psock_put(sk
, psock
);
1734 return copied
? : err
;
1737 ssize_t
tls_sw_splice_read(struct socket
*sock
, loff_t
*ppos
,
1738 struct pipe_inode_info
*pipe
,
1739 size_t len
, unsigned int flags
)
1741 struct tls_context
*tls_ctx
= tls_get_ctx(sock
->sk
);
1742 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1743 struct strp_msg
*rxm
= NULL
;
1744 struct sock
*sk
= sock
->sk
;
1745 struct sk_buff
*skb
;
1754 timeo
= sock_rcvtimeo(sk
, flags
& MSG_DONTWAIT
);
1756 skb
= tls_wait_data(sk
, NULL
, flags
, timeo
, &err
);
1758 goto splice_read_end
;
1760 /* splice does not support reading control messages */
1761 if (ctx
->control
!= TLS_RECORD_TYPE_DATA
) {
1763 goto splice_read_end
;
1766 if (!ctx
->decrypted
) {
1767 err
= decrypt_skb_update(sk
, skb
, NULL
, &chunk
, &zc
, false);
1770 tls_err_abort(sk
, EBADMSG
);
1771 goto splice_read_end
;
1773 ctx
->decrypted
= true;
1775 rxm
= strp_msg(skb
);
1777 chunk
= min_t(unsigned int, rxm
->full_len
, len
);
1778 copied
= skb_splice_bits(skb
, sk
, rxm
->offset
, pipe
, chunk
, flags
);
1780 goto splice_read_end
;
1782 if (likely(!(flags
& MSG_PEEK
)))
1783 tls_sw_advance_skb(sk
, skb
, copied
);
1787 return copied
? : err
;
1790 bool tls_sw_stream_read(const struct sock
*sk
)
1792 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1793 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1794 bool ingress_empty
= true;
1795 struct sk_psock
*psock
;
1798 psock
= sk_psock(sk
);
1800 ingress_empty
= list_empty(&psock
->ingress_msg
);
1803 return !ingress_empty
|| ctx
->recv_pkt
;
1806 static int tls_read_size(struct strparser
*strp
, struct sk_buff
*skb
)
1808 struct tls_context
*tls_ctx
= tls_get_ctx(strp
->sk
);
1809 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1810 char header
[TLS_HEADER_SIZE
+ MAX_IV_SIZE
];
1811 struct strp_msg
*rxm
= strp_msg(skb
);
1812 size_t cipher_overhead
;
1813 size_t data_len
= 0;
1816 /* Verify that we have a full TLS header, or wait for more data */
1817 if (rxm
->offset
+ tls_ctx
->rx
.prepend_size
> skb
->len
)
1820 /* Sanity-check size of on-stack buffer. */
1821 if (WARN_ON(tls_ctx
->rx
.prepend_size
> sizeof(header
))) {
1826 /* Linearize header to local buffer */
1827 ret
= skb_copy_bits(skb
, rxm
->offset
, header
, tls_ctx
->rx
.prepend_size
);
1832 ctx
->control
= header
[0];
1834 data_len
= ((header
[4] & 0xFF) | (header
[3] << 8));
1836 cipher_overhead
= tls_ctx
->rx
.tag_size
+ tls_ctx
->rx
.iv_size
;
1838 if (data_len
> TLS_MAX_PAYLOAD_SIZE
+ cipher_overhead
) {
1842 if (data_len
< cipher_overhead
) {
1847 if (header
[1] != TLS_VERSION_MINOR(tls_ctx
->crypto_recv
.info
.version
) ||
1848 header
[2] != TLS_VERSION_MAJOR(tls_ctx
->crypto_recv
.info
.version
)) {
1853 #ifdef CONFIG_TLS_DEVICE
1854 handle_device_resync(strp
->sk
, TCP_SKB_CB(skb
)->seq
+ rxm
->offset
,
1855 *(u64
*)tls_ctx
->rx
.rec_seq
);
1857 return data_len
+ TLS_HEADER_SIZE
;
1860 tls_err_abort(strp
->sk
, ret
);
1865 static void tls_queue(struct strparser
*strp
, struct sk_buff
*skb
)
1867 struct tls_context
*tls_ctx
= tls_get_ctx(strp
->sk
);
1868 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1870 ctx
->decrypted
= false;
1872 ctx
->recv_pkt
= skb
;
1875 ctx
->saved_data_ready(strp
->sk
);
1878 static void tls_data_ready(struct sock
*sk
)
1880 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1881 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1882 struct sk_psock
*psock
;
1884 strp_data_ready(&ctx
->strp
);
1886 psock
= sk_psock_get(sk
);
1887 if (psock
&& !list_empty(&psock
->ingress_msg
)) {
1888 ctx
->saved_data_ready(sk
);
1889 sk_psock_put(sk
, psock
);
1893 void tls_sw_free_resources_tx(struct sock
*sk
)
1895 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1896 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
1897 struct tls_rec
*rec
, *tmp
;
1899 /* Wait for any pending async encryptions to complete */
1900 smp_store_mb(ctx
->async_notify
, true);
1901 if (atomic_read(&ctx
->encrypt_pending
))
1902 crypto_wait_req(-EINPROGRESS
, &ctx
->async_wait
);
1904 cancel_delayed_work_sync(&ctx
->tx_work
.work
);
1906 /* Tx whatever records we can transmit and abandon the rest */
1907 tls_tx_records(sk
, -1);
1909 /* Free up un-sent records in tx_list. First, free
1910 * the partially sent record if any at head of tx_list.
1912 if (tls_ctx
->partially_sent_record
) {
1913 struct scatterlist
*sg
= tls_ctx
->partially_sent_record
;
1916 put_page(sg_page(sg
));
1917 sk_mem_uncharge(sk
, sg
->length
);
1924 tls_ctx
->partially_sent_record
= NULL
;
1926 rec
= list_first_entry(&ctx
->tx_list
,
1927 struct tls_rec
, list
);
1928 list_del(&rec
->list
);
1929 sk_msg_free(sk
, &rec
->msg_plaintext
);
1933 list_for_each_entry_safe(rec
, tmp
, &ctx
->tx_list
, list
) {
1934 list_del(&rec
->list
);
1935 sk_msg_free(sk
, &rec
->msg_encrypted
);
1936 sk_msg_free(sk
, &rec
->msg_plaintext
);
1940 crypto_free_aead(ctx
->aead_send
);
1941 tls_free_open_rec(sk
);
1946 void tls_sw_release_resources_rx(struct sock
*sk
)
1948 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1949 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1951 if (ctx
->aead_recv
) {
1952 kfree_skb(ctx
->recv_pkt
);
1953 ctx
->recv_pkt
= NULL
;
1954 skb_queue_purge(&ctx
->rx_list
);
1955 crypto_free_aead(ctx
->aead_recv
);
1956 strp_stop(&ctx
->strp
);
1957 write_lock_bh(&sk
->sk_callback_lock
);
1958 sk
->sk_data_ready
= ctx
->saved_data_ready
;
1959 write_unlock_bh(&sk
->sk_callback_lock
);
1961 strp_done(&ctx
->strp
);
1966 void tls_sw_free_resources_rx(struct sock
*sk
)
1968 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1969 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1971 tls_sw_release_resources_rx(sk
);
1976 /* The work handler to transmitt the encrypted records in tx_list */
1977 static void tx_work_handler(struct work_struct
*work
)
1979 struct delayed_work
*delayed_work
= to_delayed_work(work
);
1980 struct tx_work
*tx_work
= container_of(delayed_work
,
1981 struct tx_work
, work
);
1982 struct sock
*sk
= tx_work
->sk
;
1983 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1984 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
1986 if (!test_and_clear_bit(BIT_TX_SCHEDULED
, &ctx
->tx_bitmask
))
1990 tls_tx_records(sk
, -1);
1994 int tls_set_sw_offload(struct sock
*sk
, struct tls_context
*ctx
, int tx
)
1996 struct tls_crypto_info
*crypto_info
;
1997 struct tls12_crypto_info_aes_gcm_128
*gcm_128_info
;
1998 struct tls_sw_context_tx
*sw_ctx_tx
= NULL
;
1999 struct tls_sw_context_rx
*sw_ctx_rx
= NULL
;
2000 struct cipher_context
*cctx
;
2001 struct crypto_aead
**aead
;
2002 struct strp_callbacks cb
;
2003 u16 nonce_size
, tag_size
, iv_size
, rec_seq_size
;
2004 struct crypto_tfm
*tfm
;
2014 if (!ctx
->priv_ctx_tx
) {
2015 sw_ctx_tx
= kzalloc(sizeof(*sw_ctx_tx
), GFP_KERNEL
);
2020 ctx
->priv_ctx_tx
= sw_ctx_tx
;
2023 (struct tls_sw_context_tx
*)ctx
->priv_ctx_tx
;
2026 if (!ctx
->priv_ctx_rx
) {
2027 sw_ctx_rx
= kzalloc(sizeof(*sw_ctx_rx
), GFP_KERNEL
);
2032 ctx
->priv_ctx_rx
= sw_ctx_rx
;
2035 (struct tls_sw_context_rx
*)ctx
->priv_ctx_rx
;
2040 crypto_init_wait(&sw_ctx_tx
->async_wait
);
2041 crypto_info
= &ctx
->crypto_send
.info
;
2043 aead
= &sw_ctx_tx
->aead_send
;
2044 INIT_LIST_HEAD(&sw_ctx_tx
->tx_list
);
2045 INIT_DELAYED_WORK(&sw_ctx_tx
->tx_work
.work
, tx_work_handler
);
2046 sw_ctx_tx
->tx_work
.sk
= sk
;
2048 crypto_init_wait(&sw_ctx_rx
->async_wait
);
2049 crypto_info
= &ctx
->crypto_recv
.info
;
2051 skb_queue_head_init(&sw_ctx_rx
->rx_list
);
2052 aead
= &sw_ctx_rx
->aead_recv
;
2055 switch (crypto_info
->cipher_type
) {
2056 case TLS_CIPHER_AES_GCM_128
: {
2057 nonce_size
= TLS_CIPHER_AES_GCM_128_IV_SIZE
;
2058 tag_size
= TLS_CIPHER_AES_GCM_128_TAG_SIZE
;
2059 iv_size
= TLS_CIPHER_AES_GCM_128_IV_SIZE
;
2060 iv
= ((struct tls12_crypto_info_aes_gcm_128
*)crypto_info
)->iv
;
2061 rec_seq_size
= TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE
;
2063 ((struct tls12_crypto_info_aes_gcm_128
*)crypto_info
)->rec_seq
;
2065 (struct tls12_crypto_info_aes_gcm_128
*)crypto_info
;
2073 /* Sanity-check the IV size for stack allocations. */
2074 if (iv_size
> MAX_IV_SIZE
|| nonce_size
> MAX_IV_SIZE
) {
2079 cctx
->prepend_size
= TLS_HEADER_SIZE
+ nonce_size
;
2080 cctx
->tag_size
= tag_size
;
2081 cctx
->overhead_size
= cctx
->prepend_size
+ cctx
->tag_size
;
2082 cctx
->iv_size
= iv_size
;
2083 cctx
->iv
= kmalloc(iv_size
+ TLS_CIPHER_AES_GCM_128_SALT_SIZE
,
2089 memcpy(cctx
->iv
, gcm_128_info
->salt
, TLS_CIPHER_AES_GCM_128_SALT_SIZE
);
2090 memcpy(cctx
->iv
+ TLS_CIPHER_AES_GCM_128_SALT_SIZE
, iv
, iv_size
);
2091 cctx
->rec_seq_size
= rec_seq_size
;
2092 cctx
->rec_seq
= kmemdup(rec_seq
, rec_seq_size
, GFP_KERNEL
);
2093 if (!cctx
->rec_seq
) {
2099 *aead
= crypto_alloc_aead("gcm(aes)", 0, 0);
2100 if (IS_ERR(*aead
)) {
2101 rc
= PTR_ERR(*aead
);
2107 ctx
->push_pending_record
= tls_sw_push_pending_record
;
2109 rc
= crypto_aead_setkey(*aead
, gcm_128_info
->key
,
2110 TLS_CIPHER_AES_GCM_128_KEY_SIZE
);
2114 rc
= crypto_aead_setauthsize(*aead
, cctx
->tag_size
);
2119 tfm
= crypto_aead_tfm(sw_ctx_rx
->aead_recv
);
2120 sw_ctx_rx
->async_capable
=
2121 tfm
->__crt_alg
->cra_flags
& CRYPTO_ALG_ASYNC
;
2123 /* Set up strparser */
2124 memset(&cb
, 0, sizeof(cb
));
2125 cb
.rcv_msg
= tls_queue
;
2126 cb
.parse_msg
= tls_read_size
;
2128 strp_init(&sw_ctx_rx
->strp
, sk
, &cb
);
2130 write_lock_bh(&sk
->sk_callback_lock
);
2131 sw_ctx_rx
->saved_data_ready
= sk
->sk_data_ready
;
2132 sk
->sk_data_ready
= tls_data_ready
;
2133 write_unlock_bh(&sk
->sk_callback_lock
);
2135 strp_check_rcv(&sw_ctx_rx
->strp
);
2141 crypto_free_aead(*aead
);
2144 kfree(cctx
->rec_seq
);
2145 cctx
->rec_seq
= NULL
;
2151 kfree(ctx
->priv_ctx_tx
);
2152 ctx
->priv_ctx_tx
= NULL
;
2154 kfree(ctx
->priv_ctx_rx
);
2155 ctx
->priv_ctx_rx
= NULL
;