2 * Copyright (c) 2016-2017, Mellanox Technologies. All rights reserved.
3 * Copyright (c) 2016-2017, Dave Watson <davejwatson@fb.com>. All rights reserved.
4 * Copyright (c) 2016-2017, Lance Chao <lancerchao@fb.com>. All rights reserved.
5 * Copyright (c) 2016, Fridolin Pokorny <fridolin.pokorny@gmail.com>. All rights reserved.
6 * Copyright (c) 2016, Nikos Mavrogiannopoulos <nmav@gnutls.org>. All rights reserved.
7 * Copyright (c) 2018, Covalent IO, Inc. http://covalent.io
9 * This software is available to you under a choice of one of two
10 * licenses. You may choose to be licensed under the terms of the GNU
11 * General Public License (GPL) Version 2, available from the file
12 * COPYING in the main directory of this source tree, or the
13 * OpenIB.org BSD license below:
15 * Redistribution and use in source and binary forms, with or
16 * without modification, are permitted provided that the following
19 * - Redistributions of source code must retain the above
20 * copyright notice, this list of conditions and the following
23 * - Redistributions in binary form must reproduce the above
24 * copyright notice, this list of conditions and the following
25 * disclaimer in the documentation and/or other materials
26 * provided with the distribution.
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
30 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
32 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
33 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38 #include <linux/sched/signal.h>
39 #include <linux/module.h>
40 #include <crypto/aead.h>
42 #include <net/strparser.h>
45 #define MAX_IV_SIZE TLS_CIPHER_AES_GCM_128_IV_SIZE
47 static int __skb_nsg(struct sk_buff
*skb
, int offset
, int len
,
48 unsigned int recursion_level
)
50 int start
= skb_headlen(skb
);
51 int i
, chunk
= start
- offset
;
52 struct sk_buff
*frag_iter
;
55 if (unlikely(recursion_level
>= 24))
68 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
71 WARN_ON(start
> offset
+ len
);
73 end
= start
+ skb_frag_size(&skb_shinfo(skb
)->frags
[i
]);
87 if (unlikely(skb_has_frag_list(skb
))) {
88 skb_walk_frags(skb
, frag_iter
) {
91 WARN_ON(start
> offset
+ len
);
93 end
= start
+ frag_iter
->len
;
98 ret
= __skb_nsg(frag_iter
, offset
- start
, chunk
,
100 if (unlikely(ret
< 0))
115 /* Return the number of scatterlist elements required to completely map the
116 * skb, or -EMSGSIZE if the recursion depth is exceeded.
118 static int skb_nsg(struct sk_buff
*skb
, int offset
, int len
)
120 return __skb_nsg(skb
, offset
, len
, 0);
123 static void tls_decrypt_done(struct crypto_async_request
*req
, int err
)
125 struct aead_request
*aead_req
= (struct aead_request
*)req
;
126 struct scatterlist
*sgout
= aead_req
->dst
;
127 struct tls_sw_context_rx
*ctx
;
128 struct tls_context
*tls_ctx
;
129 struct scatterlist
*sg
;
134 skb
= (struct sk_buff
*)req
->data
;
135 tls_ctx
= tls_get_ctx(skb
->sk
);
136 ctx
= tls_sw_ctx_rx(tls_ctx
);
137 pending
= atomic_dec_return(&ctx
->decrypt_pending
);
139 /* Propagate if there was an err */
141 ctx
->async_wait
.err
= err
;
142 tls_err_abort(skb
->sk
, err
);
145 /* After using skb->sk to propagate sk through crypto async callback
146 * we need to NULL it again.
150 /* Release the skb, pages and memory allocated for crypto req */
153 /* Skip the first S/G entry as it points to AAD */
154 for_each_sg(sg_next(sgout
), sg
, UINT_MAX
, pages
) {
157 put_page(sg_page(sg
));
162 if (!pending
&& READ_ONCE(ctx
->async_notify
))
163 complete(&ctx
->async_wait
.completion
);
166 static int tls_do_decryption(struct sock
*sk
,
168 struct scatterlist
*sgin
,
169 struct scatterlist
*sgout
,
172 struct aead_request
*aead_req
,
175 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
176 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
179 aead_request_set_tfm(aead_req
, ctx
->aead_recv
);
180 aead_request_set_ad(aead_req
, TLS_AAD_SPACE_SIZE
);
181 aead_request_set_crypt(aead_req
, sgin
, sgout
,
182 data_len
+ tls_ctx
->rx
.tag_size
,
186 /* Using skb->sk to push sk through to crypto async callback
187 * handler. This allows propagating errors up to the socket
188 * if needed. It _must_ be cleared in the async handler
189 * before kfree_skb is called. We _know_ skb->sk is NULL
190 * because it is a clone from strparser.
193 aead_request_set_callback(aead_req
,
194 CRYPTO_TFM_REQ_MAY_BACKLOG
,
195 tls_decrypt_done
, skb
);
196 atomic_inc(&ctx
->decrypt_pending
);
198 aead_request_set_callback(aead_req
,
199 CRYPTO_TFM_REQ_MAY_BACKLOG
,
200 crypto_req_done
, &ctx
->async_wait
);
203 ret
= crypto_aead_decrypt(aead_req
);
204 if (ret
== -EINPROGRESS
) {
208 ret
= crypto_wait_req(ret
, &ctx
->async_wait
);
212 atomic_dec(&ctx
->decrypt_pending
);
217 static void tls_trim_both_msgs(struct sock
*sk
, int target_size
)
219 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
220 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
221 struct tls_rec
*rec
= ctx
->open_rec
;
223 sk_msg_trim(sk
, &rec
->msg_plaintext
, target_size
);
225 target_size
+= tls_ctx
->tx
.overhead_size
;
226 sk_msg_trim(sk
, &rec
->msg_encrypted
, target_size
);
229 static int tls_alloc_encrypted_msg(struct sock
*sk
, int len
)
231 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
232 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
233 struct tls_rec
*rec
= ctx
->open_rec
;
234 struct sk_msg
*msg_en
= &rec
->msg_encrypted
;
236 return sk_msg_alloc(sk
, msg_en
, len
, 0);
239 static int tls_clone_plaintext_msg(struct sock
*sk
, int required
)
241 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
242 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
243 struct tls_rec
*rec
= ctx
->open_rec
;
244 struct sk_msg
*msg_pl
= &rec
->msg_plaintext
;
245 struct sk_msg
*msg_en
= &rec
->msg_encrypted
;
248 /* We add page references worth len bytes from encrypted sg
249 * at the end of plaintext sg. It is guaranteed that msg_en
250 * has enough required room (ensured by caller).
252 len
= required
- msg_pl
->sg
.size
;
254 /* Skip initial bytes in msg_en's data to be able to use
255 * same offset of both plain and encrypted data.
257 skip
= tls_ctx
->tx
.prepend_size
+ msg_pl
->sg
.size
;
259 return sk_msg_clone(sk
, msg_pl
, msg_en
, skip
, len
);
262 static struct tls_rec
*tls_get_rec(struct sock
*sk
)
264 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
265 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
266 struct sk_msg
*msg_pl
, *msg_en
;
270 mem_size
= sizeof(struct tls_rec
) + crypto_aead_reqsize(ctx
->aead_send
);
272 rec
= kzalloc(mem_size
, sk
->sk_allocation
);
276 msg_pl
= &rec
->msg_plaintext
;
277 msg_en
= &rec
->msg_encrypted
;
282 sg_init_table(rec
->sg_aead_in
, 2);
283 sg_set_buf(&rec
->sg_aead_in
[0], rec
->aad_space
,
284 sizeof(rec
->aad_space
));
285 sg_unmark_end(&rec
->sg_aead_in
[1]);
287 sg_init_table(rec
->sg_aead_out
, 2);
288 sg_set_buf(&rec
->sg_aead_out
[0], rec
->aad_space
,
289 sizeof(rec
->aad_space
));
290 sg_unmark_end(&rec
->sg_aead_out
[1]);
295 static void tls_free_rec(struct sock
*sk
, struct tls_rec
*rec
)
297 sk_msg_free(sk
, &rec
->msg_encrypted
);
298 sk_msg_free(sk
, &rec
->msg_plaintext
);
302 static void tls_free_open_rec(struct sock
*sk
)
304 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
305 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
306 struct tls_rec
*rec
= ctx
->open_rec
;
309 tls_free_rec(sk
, rec
);
310 ctx
->open_rec
= NULL
;
314 int tls_tx_records(struct sock
*sk
, int flags
)
316 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
317 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
318 struct tls_rec
*rec
, *tmp
;
319 struct sk_msg
*msg_en
;
320 int tx_flags
, rc
= 0;
322 if (tls_is_partially_sent_record(tls_ctx
)) {
323 rec
= list_first_entry(&ctx
->tx_list
,
324 struct tls_rec
, list
);
327 tx_flags
= rec
->tx_flags
;
331 rc
= tls_push_partial_record(sk
, tls_ctx
, tx_flags
);
335 /* Full record has been transmitted.
336 * Remove the head of tx_list
338 list_del(&rec
->list
);
339 sk_msg_free(sk
, &rec
->msg_plaintext
);
343 /* Tx all ready records */
344 list_for_each_entry_safe(rec
, tmp
, &ctx
->tx_list
, list
) {
345 if (READ_ONCE(rec
->tx_ready
)) {
347 tx_flags
= rec
->tx_flags
;
351 msg_en
= &rec
->msg_encrypted
;
352 rc
= tls_push_sg(sk
, tls_ctx
,
353 &msg_en
->sg
.data
[msg_en
->sg
.curr
],
358 list_del(&rec
->list
);
359 sk_msg_free(sk
, &rec
->msg_plaintext
);
367 if (rc
< 0 && rc
!= -EAGAIN
)
368 tls_err_abort(sk
, EBADMSG
);
373 static void tls_encrypt_done(struct crypto_async_request
*req
, int err
)
375 struct aead_request
*aead_req
= (struct aead_request
*)req
;
376 struct sock
*sk
= req
->data
;
377 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
378 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
379 struct scatterlist
*sge
;
380 struct sk_msg
*msg_en
;
385 rec
= container_of(aead_req
, struct tls_rec
, aead_req
);
386 msg_en
= &rec
->msg_encrypted
;
388 sge
= sk_msg_elem(msg_en
, msg_en
->sg
.curr
);
389 sge
->offset
-= tls_ctx
->tx
.prepend_size
;
390 sge
->length
+= tls_ctx
->tx
.prepend_size
;
392 /* Check if error is previously set on socket */
393 if (err
|| sk
->sk_err
) {
396 /* If err is already set on socket, return the same code */
398 ctx
->async_wait
.err
= sk
->sk_err
;
400 ctx
->async_wait
.err
= err
;
401 tls_err_abort(sk
, err
);
406 struct tls_rec
*first_rec
;
408 /* Mark the record as ready for transmission */
409 smp_store_mb(rec
->tx_ready
, true);
411 /* If received record is at head of tx_list, schedule tx */
412 first_rec
= list_first_entry(&ctx
->tx_list
,
413 struct tls_rec
, list
);
414 if (rec
== first_rec
)
418 pending
= atomic_dec_return(&ctx
->encrypt_pending
);
420 if (!pending
&& READ_ONCE(ctx
->async_notify
))
421 complete(&ctx
->async_wait
.completion
);
426 /* Schedule the transmission */
427 if (!test_and_set_bit(BIT_TX_SCHEDULED
, &ctx
->tx_bitmask
))
428 schedule_delayed_work(&ctx
->tx_work
.work
, 1);
431 static int tls_do_encryption(struct sock
*sk
,
432 struct tls_context
*tls_ctx
,
433 struct tls_sw_context_tx
*ctx
,
434 struct aead_request
*aead_req
,
435 size_t data_len
, u32 start
)
437 struct tls_rec
*rec
= ctx
->open_rec
;
438 struct sk_msg
*msg_en
= &rec
->msg_encrypted
;
439 struct scatterlist
*sge
= sk_msg_elem(msg_en
, start
);
442 sge
->offset
+= tls_ctx
->tx
.prepend_size
;
443 sge
->length
-= tls_ctx
->tx
.prepend_size
;
445 msg_en
->sg
.curr
= start
;
447 aead_request_set_tfm(aead_req
, ctx
->aead_send
);
448 aead_request_set_ad(aead_req
, TLS_AAD_SPACE_SIZE
);
449 aead_request_set_crypt(aead_req
, rec
->sg_aead_in
,
451 data_len
, tls_ctx
->tx
.iv
);
453 aead_request_set_callback(aead_req
, CRYPTO_TFM_REQ_MAY_BACKLOG
,
454 tls_encrypt_done
, sk
);
456 /* Add the record in tx_list */
457 list_add_tail((struct list_head
*)&rec
->list
, &ctx
->tx_list
);
458 atomic_inc(&ctx
->encrypt_pending
);
460 rc
= crypto_aead_encrypt(aead_req
);
461 if (!rc
|| rc
!= -EINPROGRESS
) {
462 atomic_dec(&ctx
->encrypt_pending
);
463 sge
->offset
-= tls_ctx
->tx
.prepend_size
;
464 sge
->length
+= tls_ctx
->tx
.prepend_size
;
468 WRITE_ONCE(rec
->tx_ready
, true);
469 } else if (rc
!= -EINPROGRESS
) {
470 list_del(&rec
->list
);
474 /* Unhook the record from context if encryption is not failure */
475 ctx
->open_rec
= NULL
;
476 tls_advance_record_sn(sk
, &tls_ctx
->tx
);
480 static int tls_split_open_record(struct sock
*sk
, struct tls_rec
*from
,
481 struct tls_rec
**to
, struct sk_msg
*msg_opl
,
482 struct sk_msg
*msg_oen
, u32 split_point
,
483 u32 tx_overhead_size
, u32
*orig_end
)
485 u32 i
, j
, bytes
= 0, apply
= msg_opl
->apply_bytes
;
486 struct scatterlist
*sge
, *osge
, *nsge
;
487 u32 orig_size
= msg_opl
->sg
.size
;
488 struct scatterlist tmp
= { };
489 struct sk_msg
*msg_npl
;
493 new = tls_get_rec(sk
);
496 ret
= sk_msg_alloc(sk
, &new->msg_encrypted
, msg_opl
->sg
.size
+
497 tx_overhead_size
, 0);
499 tls_free_rec(sk
, new);
503 *orig_end
= msg_opl
->sg
.end
;
504 i
= msg_opl
->sg
.start
;
505 sge
= sk_msg_elem(msg_opl
, i
);
506 while (apply
&& sge
->length
) {
507 if (sge
->length
> apply
) {
508 u32 len
= sge
->length
- apply
;
510 get_page(sg_page(sge
));
511 sg_set_page(&tmp
, sg_page(sge
), len
,
512 sge
->offset
+ apply
);
517 apply
-= sge
->length
;
518 bytes
+= sge
->length
;
521 sk_msg_iter_var_next(i
);
522 if (i
== msg_opl
->sg
.end
)
524 sge
= sk_msg_elem(msg_opl
, i
);
528 msg_opl
->sg
.curr
= i
;
529 msg_opl
->sg
.copybreak
= 0;
530 msg_opl
->apply_bytes
= 0;
531 msg_opl
->sg
.size
= bytes
;
533 msg_npl
= &new->msg_plaintext
;
534 msg_npl
->apply_bytes
= apply
;
535 msg_npl
->sg
.size
= orig_size
- bytes
;
537 j
= msg_npl
->sg
.start
;
538 nsge
= sk_msg_elem(msg_npl
, j
);
540 memcpy(nsge
, &tmp
, sizeof(*nsge
));
541 sk_msg_iter_var_next(j
);
542 nsge
= sk_msg_elem(msg_npl
, j
);
545 osge
= sk_msg_elem(msg_opl
, i
);
546 while (osge
->length
) {
547 memcpy(nsge
, osge
, sizeof(*nsge
));
549 sk_msg_iter_var_next(i
);
550 sk_msg_iter_var_next(j
);
553 osge
= sk_msg_elem(msg_opl
, i
);
554 nsge
= sk_msg_elem(msg_npl
, j
);
558 msg_npl
->sg
.curr
= j
;
559 msg_npl
->sg
.copybreak
= 0;
565 static void tls_merge_open_record(struct sock
*sk
, struct tls_rec
*to
,
566 struct tls_rec
*from
, u32 orig_end
)
568 struct sk_msg
*msg_npl
= &from
->msg_plaintext
;
569 struct sk_msg
*msg_opl
= &to
->msg_plaintext
;
570 struct scatterlist
*osge
, *nsge
;
574 sk_msg_iter_var_prev(i
);
575 j
= msg_npl
->sg
.start
;
577 osge
= sk_msg_elem(msg_opl
, i
);
578 nsge
= sk_msg_elem(msg_npl
, j
);
580 if (sg_page(osge
) == sg_page(nsge
) &&
581 osge
->offset
+ osge
->length
== nsge
->offset
) {
582 osge
->length
+= nsge
->length
;
583 put_page(sg_page(nsge
));
586 msg_opl
->sg
.end
= orig_end
;
587 msg_opl
->sg
.curr
= orig_end
;
588 msg_opl
->sg
.copybreak
= 0;
589 msg_opl
->apply_bytes
= msg_opl
->sg
.size
+ msg_npl
->sg
.size
;
590 msg_opl
->sg
.size
+= msg_npl
->sg
.size
;
592 sk_msg_free(sk
, &to
->msg_encrypted
);
593 sk_msg_xfer_full(&to
->msg_encrypted
, &from
->msg_encrypted
);
598 static int tls_push_record(struct sock
*sk
, int flags
,
599 unsigned char record_type
)
601 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
602 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
603 struct tls_rec
*rec
= ctx
->open_rec
, *tmp
= NULL
;
604 u32 i
, split_point
, uninitialized_var(orig_end
);
605 struct sk_msg
*msg_pl
, *msg_en
;
606 struct aead_request
*req
;
613 msg_pl
= &rec
->msg_plaintext
;
614 msg_en
= &rec
->msg_encrypted
;
616 split_point
= msg_pl
->apply_bytes
;
617 split
= split_point
&& split_point
< msg_pl
->sg
.size
;
619 rc
= tls_split_open_record(sk
, rec
, &tmp
, msg_pl
, msg_en
,
620 split_point
, tls_ctx
->tx
.overhead_size
,
624 sk_msg_trim(sk
, msg_en
, msg_pl
->sg
.size
+
625 tls_ctx
->tx
.overhead_size
);
628 rec
->tx_flags
= flags
;
629 req
= &rec
->aead_req
;
632 sk_msg_iter_var_prev(i
);
633 sg_mark_end(sk_msg_elem(msg_pl
, i
));
635 i
= msg_pl
->sg
.start
;
636 sg_chain(rec
->sg_aead_in
, 2, rec
->inplace_crypto
?
637 &msg_en
->sg
.data
[i
] : &msg_pl
->sg
.data
[i
]);
640 sk_msg_iter_var_prev(i
);
641 sg_mark_end(sk_msg_elem(msg_en
, i
));
643 i
= msg_en
->sg
.start
;
644 sg_chain(rec
->sg_aead_out
, 2, &msg_en
->sg
.data
[i
]);
646 tls_make_aad(rec
->aad_space
, msg_pl
->sg
.size
,
647 tls_ctx
->tx
.rec_seq
, tls_ctx
->tx
.rec_seq_size
,
650 tls_fill_prepend(tls_ctx
,
651 page_address(sg_page(&msg_en
->sg
.data
[i
])) +
652 msg_en
->sg
.data
[i
].offset
, msg_pl
->sg
.size
,
655 tls_ctx
->pending_open_record_frags
= false;
657 rc
= tls_do_encryption(sk
, tls_ctx
, ctx
, req
, msg_pl
->sg
.size
, i
);
659 if (rc
!= -EINPROGRESS
) {
660 tls_err_abort(sk
, EBADMSG
);
662 tls_ctx
->pending_open_record_frags
= true;
663 tls_merge_open_record(sk
, rec
, tmp
, orig_end
);
668 msg_pl
= &tmp
->msg_plaintext
;
669 msg_en
= &tmp
->msg_encrypted
;
670 sk_msg_trim(sk
, msg_en
, msg_pl
->sg
.size
+
671 tls_ctx
->tx
.overhead_size
);
672 tls_ctx
->pending_open_record_frags
= true;
676 return tls_tx_records(sk
, flags
);
679 static int bpf_exec_tx_verdict(struct sk_msg
*msg
, struct sock
*sk
,
680 bool full_record
, u8 record_type
,
681 size_t *copied
, int flags
)
683 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
684 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
685 struct sk_msg msg_redir
= { };
686 struct sk_psock
*psock
;
687 struct sock
*sk_redir
;
693 policy
= !(flags
& MSG_SENDPAGE_NOPOLICY
);
694 psock
= sk_psock_get(sk
);
695 if (!psock
|| !policy
)
696 return tls_push_record(sk
, flags
, record_type
);
698 enospc
= sk_msg_full(msg
);
699 if (psock
->eval
== __SK_NONE
) {
700 delta
= msg
->sg
.size
;
701 psock
->eval
= sk_psock_msg_verdict(sk
, psock
, msg
);
702 if (delta
< msg
->sg
.size
)
703 delta
-= msg
->sg
.size
;
707 if (msg
->cork_bytes
&& msg
->cork_bytes
> msg
->sg
.size
&&
708 !enospc
&& !full_record
) {
714 if (msg
->apply_bytes
&& msg
->apply_bytes
< send
)
715 send
= msg
->apply_bytes
;
717 switch (psock
->eval
) {
719 err
= tls_push_record(sk
, flags
, record_type
);
721 *copied
-= sk_msg_free(sk
, msg
);
722 tls_free_open_rec(sk
);
727 sk_redir
= psock
->sk_redir
;
728 memcpy(&msg_redir
, msg
, sizeof(*msg
));
729 if (msg
->apply_bytes
< send
)
730 msg
->apply_bytes
= 0;
732 msg
->apply_bytes
-= send
;
733 sk_msg_return_zero(sk
, msg
, send
);
734 msg
->sg
.size
-= send
;
736 err
= tcp_bpf_sendmsg_redir(sk_redir
, &msg_redir
, send
, flags
);
739 *copied
-= sk_msg_free_nocharge(sk
, &msg_redir
);
742 if (msg
->sg
.size
== 0)
743 tls_free_open_rec(sk
);
747 sk_msg_free_partial(sk
, msg
, send
);
748 if (msg
->apply_bytes
< send
)
749 msg
->apply_bytes
= 0;
751 msg
->apply_bytes
-= send
;
752 if (msg
->sg
.size
== 0)
753 tls_free_open_rec(sk
);
754 *copied
-= (send
+ delta
);
759 bool reset_eval
= !ctx
->open_rec
;
763 msg
= &rec
->msg_plaintext
;
764 if (!msg
->apply_bytes
)
768 psock
->eval
= __SK_NONE
;
769 if (psock
->sk_redir
) {
770 sock_put(psock
->sk_redir
);
771 psock
->sk_redir
= NULL
;
778 sk_psock_put(sk
, psock
);
782 static int tls_sw_push_pending_record(struct sock
*sk
, int flags
)
784 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
785 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
786 struct tls_rec
*rec
= ctx
->open_rec
;
787 struct sk_msg
*msg_pl
;
793 msg_pl
= &rec
->msg_plaintext
;
794 copied
= msg_pl
->sg
.size
;
798 return bpf_exec_tx_verdict(msg_pl
, sk
, true, TLS_RECORD_TYPE_DATA
,
802 int tls_sw_sendmsg(struct sock
*sk
, struct msghdr
*msg
, size_t size
)
804 long timeo
= sock_sndtimeo(sk
, msg
->msg_flags
& MSG_DONTWAIT
);
805 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
806 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
807 struct crypto_tfm
*tfm
= crypto_aead_tfm(ctx
->aead_send
);
808 bool async_capable
= tfm
->__crt_alg
->cra_flags
& CRYPTO_ALG_ASYNC
;
809 unsigned char record_type
= TLS_RECORD_TYPE_DATA
;
810 bool is_kvec
= iov_iter_is_kvec(&msg
->msg_iter
);
811 bool eor
= !(msg
->msg_flags
& MSG_MORE
);
812 size_t try_to_copy
, copied
= 0;
813 struct sk_msg
*msg_pl
, *msg_en
;
823 if (msg
->msg_flags
& ~(MSG_MORE
| MSG_DONTWAIT
| MSG_NOSIGNAL
))
828 /* Wait till there is any pending write on socket */
829 if (unlikely(sk
->sk_write_pending
)) {
830 ret
= wait_on_pending_writer(sk
, &timeo
);
835 if (unlikely(msg
->msg_controllen
)) {
836 ret
= tls_proccess_cmsg(sk
, msg
, &record_type
);
838 if (ret
== -EINPROGRESS
)
840 else if (ret
!= -EAGAIN
)
845 while (msg_data_left(msg
)) {
854 rec
= ctx
->open_rec
= tls_get_rec(sk
);
860 msg_pl
= &rec
->msg_plaintext
;
861 msg_en
= &rec
->msg_encrypted
;
863 orig_size
= msg_pl
->sg
.size
;
865 try_to_copy
= msg_data_left(msg
);
866 record_room
= TLS_MAX_PAYLOAD_SIZE
- msg_pl
->sg
.size
;
867 if (try_to_copy
>= record_room
) {
868 try_to_copy
= record_room
;
872 required_size
= msg_pl
->sg
.size
+ try_to_copy
+
873 tls_ctx
->tx
.overhead_size
;
875 if (!sk_stream_memory_free(sk
))
876 goto wait_for_sndbuf
;
879 ret
= tls_alloc_encrypted_msg(sk
, required_size
);
882 goto wait_for_memory
;
884 /* Adjust try_to_copy according to the amount that was
885 * actually allocated. The difference is due
886 * to max sg elements limit
888 try_to_copy
-= required_size
- msg_en
->sg
.size
;
892 if (!is_kvec
&& (full_record
|| eor
) && !async_capable
) {
893 u32 first
= msg_pl
->sg
.end
;
895 ret
= sk_msg_zerocopy_from_iter(sk
, &msg
->msg_iter
,
896 msg_pl
, try_to_copy
);
898 goto fallback_to_reg_send
;
900 rec
->inplace_crypto
= 0;
903 copied
+= try_to_copy
;
905 sk_msg_sg_copy_set(msg_pl
, first
);
906 ret
= bpf_exec_tx_verdict(msg_pl
, sk
, full_record
,
907 record_type
, &copied
,
910 if (ret
== -EINPROGRESS
)
912 else if (ret
== -ENOMEM
)
913 goto wait_for_memory
;
914 else if (ret
== -ENOSPC
)
916 else if (ret
!= -EAGAIN
)
921 copied
-= try_to_copy
;
922 sk_msg_sg_copy_clear(msg_pl
, first
);
923 iov_iter_revert(&msg
->msg_iter
,
924 msg_pl
->sg
.size
- orig_size
);
925 fallback_to_reg_send
:
926 sk_msg_trim(sk
, msg_pl
, orig_size
);
929 required_size
= msg_pl
->sg
.size
+ try_to_copy
;
931 ret
= tls_clone_plaintext_msg(sk
, required_size
);
936 /* Adjust try_to_copy according to the amount that was
937 * actually allocated. The difference is due
938 * to max sg elements limit
940 try_to_copy
-= required_size
- msg_pl
->sg
.size
;
942 sk_msg_trim(sk
, msg_en
, msg_pl
->sg
.size
+
943 tls_ctx
->tx
.overhead_size
);
947 ret
= sk_msg_memcopy_from_iter(sk
, &msg
->msg_iter
,
948 msg_pl
, try_to_copy
);
953 /* Open records defined only if successfully copied, otherwise
954 * we would trim the sg but not reset the open record frags.
956 tls_ctx
->pending_open_record_frags
= true;
957 copied
+= try_to_copy
;
958 if (full_record
|| eor
) {
959 ret
= bpf_exec_tx_verdict(msg_pl
, sk
, full_record
,
960 record_type
, &copied
,
963 if (ret
== -EINPROGRESS
)
965 else if (ret
== -ENOMEM
)
966 goto wait_for_memory
;
967 else if (ret
!= -EAGAIN
) {
978 set_bit(SOCK_NOSPACE
, &sk
->sk_socket
->flags
);
980 ret
= sk_stream_wait_memory(sk
, &timeo
);
983 tls_trim_both_msgs(sk
, orig_size
);
987 if (msg_en
->sg
.size
< required_size
)
988 goto alloc_encrypted
;
994 /* Wait for pending encryptions to get completed */
995 smp_store_mb(ctx
->async_notify
, true);
997 if (atomic_read(&ctx
->encrypt_pending
))
998 crypto_wait_req(-EINPROGRESS
, &ctx
->async_wait
);
1000 reinit_completion(&ctx
->async_wait
.completion
);
1002 WRITE_ONCE(ctx
->async_notify
, false);
1004 if (ctx
->async_wait
.err
) {
1005 ret
= ctx
->async_wait
.err
;
1010 /* Transmit if any encryptions have completed */
1011 if (test_and_clear_bit(BIT_TX_SCHEDULED
, &ctx
->tx_bitmask
)) {
1012 cancel_delayed_work(&ctx
->tx_work
.work
);
1013 tls_tx_records(sk
, msg
->msg_flags
);
1017 ret
= sk_stream_error(sk
, msg
->msg_flags
, ret
);
1020 return copied
? copied
: ret
;
1023 int tls_sw_do_sendpage(struct sock
*sk
, struct page
*page
,
1024 int offset
, size_t size
, int flags
)
1026 long timeo
= sock_sndtimeo(sk
, flags
& MSG_DONTWAIT
);
1027 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1028 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
1029 unsigned char record_type
= TLS_RECORD_TYPE_DATA
;
1030 struct sk_msg
*msg_pl
;
1031 struct tls_rec
*rec
;
1039 eor
= !(flags
& (MSG_MORE
| MSG_SENDPAGE_NOTLAST
));
1040 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE
, sk
);
1042 /* Wait till there is any pending write on socket */
1043 if (unlikely(sk
->sk_write_pending
)) {
1044 ret
= wait_on_pending_writer(sk
, &timeo
);
1049 /* Call the sk_stream functions to manage the sndbuf mem. */
1051 size_t copy
, required_size
;
1059 rec
= ctx
->open_rec
;
1061 rec
= ctx
->open_rec
= tls_get_rec(sk
);
1067 msg_pl
= &rec
->msg_plaintext
;
1069 full_record
= false;
1070 record_room
= TLS_MAX_PAYLOAD_SIZE
- msg_pl
->sg
.size
;
1073 if (copy
>= record_room
) {
1078 required_size
= msg_pl
->sg
.size
+ copy
+
1079 tls_ctx
->tx
.overhead_size
;
1081 if (!sk_stream_memory_free(sk
))
1082 goto wait_for_sndbuf
;
1084 ret
= tls_alloc_encrypted_msg(sk
, required_size
);
1087 goto wait_for_memory
;
1089 /* Adjust copy according to the amount that was
1090 * actually allocated. The difference is due
1091 * to max sg elements limit
1093 copy
-= required_size
- msg_pl
->sg
.size
;
1097 sk_msg_page_add(msg_pl
, page
, copy
, offset
);
1098 sk_mem_charge(sk
, copy
);
1104 tls_ctx
->pending_open_record_frags
= true;
1105 if (full_record
|| eor
|| sk_msg_full(msg_pl
)) {
1106 rec
->inplace_crypto
= 0;
1107 ret
= bpf_exec_tx_verdict(msg_pl
, sk
, full_record
,
1108 record_type
, &copied
, flags
);
1110 if (ret
== -EINPROGRESS
)
1112 else if (ret
== -ENOMEM
)
1113 goto wait_for_memory
;
1114 else if (ret
!= -EAGAIN
) {
1123 set_bit(SOCK_NOSPACE
, &sk
->sk_socket
->flags
);
1125 ret
= sk_stream_wait_memory(sk
, &timeo
);
1127 tls_trim_both_msgs(sk
, msg_pl
->sg
.size
);
1135 /* Transmit if any encryptions have completed */
1136 if (test_and_clear_bit(BIT_TX_SCHEDULED
, &ctx
->tx_bitmask
)) {
1137 cancel_delayed_work(&ctx
->tx_work
.work
);
1138 tls_tx_records(sk
, flags
);
1142 ret
= sk_stream_error(sk
, flags
, ret
);
1143 return copied
? copied
: ret
;
1146 int tls_sw_sendpage_locked(struct sock
*sk
, struct page
*page
,
1147 int offset
, size_t size
, int flags
)
1149 if (flags
& ~(MSG_MORE
| MSG_DONTWAIT
| MSG_NOSIGNAL
|
1150 MSG_SENDPAGE_NOTLAST
| MSG_SENDPAGE_NOPOLICY
))
1153 return tls_sw_do_sendpage(sk
, page
, offset
, size
, flags
);
1156 int tls_sw_sendpage(struct sock
*sk
, struct page
*page
,
1157 int offset
, size_t size
, int flags
)
1161 if (flags
& ~(MSG_MORE
| MSG_DONTWAIT
| MSG_NOSIGNAL
|
1162 MSG_SENDPAGE_NOTLAST
| MSG_SENDPAGE_NOPOLICY
))
1166 ret
= tls_sw_do_sendpage(sk
, page
, offset
, size
, flags
);
1171 static struct sk_buff
*tls_wait_data(struct sock
*sk
, struct sk_psock
*psock
,
1172 int flags
, long timeo
, int *err
)
1174 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1175 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1176 struct sk_buff
*skb
;
1177 DEFINE_WAIT_FUNC(wait
, woken_wake_function
);
1179 while (!(skb
= ctx
->recv_pkt
) && sk_psock_queue_empty(psock
)) {
1181 *err
= sock_error(sk
);
1185 if (sk
->sk_shutdown
& RCV_SHUTDOWN
)
1188 if (sock_flag(sk
, SOCK_DONE
))
1191 if ((flags
& MSG_DONTWAIT
) || !timeo
) {
1196 add_wait_queue(sk_sleep(sk
), &wait
);
1197 sk_set_bit(SOCKWQ_ASYNC_WAITDATA
, sk
);
1198 sk_wait_event(sk
, &timeo
,
1199 ctx
->recv_pkt
!= skb
||
1200 !sk_psock_queue_empty(psock
),
1202 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA
, sk
);
1203 remove_wait_queue(sk_sleep(sk
), &wait
);
1205 /* Handle signals */
1206 if (signal_pending(current
)) {
1207 *err
= sock_intr_errno(timeo
);
1215 static int tls_setup_from_iter(struct sock
*sk
, struct iov_iter
*from
,
1216 int length
, int *pages_used
,
1217 unsigned int *size_used
,
1218 struct scatterlist
*to
,
1221 int rc
= 0, i
= 0, num_elem
= *pages_used
, maxpages
;
1222 struct page
*pages
[MAX_SKB_FRAGS
];
1223 unsigned int size
= *size_used
;
1224 ssize_t copied
, use
;
1227 while (length
> 0) {
1229 maxpages
= to_max_pages
- num_elem
;
1230 if (maxpages
== 0) {
1234 copied
= iov_iter_get_pages(from
, pages
,
1242 iov_iter_advance(from
, copied
);
1247 use
= min_t(int, copied
, PAGE_SIZE
- offset
);
1249 sg_set_page(&to
[num_elem
],
1250 pages
[i
], use
, offset
);
1251 sg_unmark_end(&to
[num_elem
]);
1252 /* We do not uncharge memory from this API */
1261 /* Mark the end in the last sg entry if newly added */
1262 if (num_elem
> *pages_used
)
1263 sg_mark_end(&to
[num_elem
- 1]);
1266 iov_iter_revert(from
, size
- *size_used
);
1268 *pages_used
= num_elem
;
1273 /* This function decrypts the input skb into either out_iov or in out_sg
1274 * or in skb buffers itself. The input parameter 'zc' indicates if
1275 * zero-copy mode needs to be tried or not. With zero-copy mode, either
1276 * out_iov or out_sg must be non-NULL. In case both out_iov and out_sg are
1277 * NULL, then the decryption happens inside skb buffers itself, i.e.
1278 * zero-copy gets disabled and 'zc' is updated.
1281 static int decrypt_internal(struct sock
*sk
, struct sk_buff
*skb
,
1282 struct iov_iter
*out_iov
,
1283 struct scatterlist
*out_sg
,
1284 int *chunk
, bool *zc
)
1286 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1287 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1288 struct strp_msg
*rxm
= strp_msg(skb
);
1289 int n_sgin
, n_sgout
, nsg
, mem_size
, aead_size
, err
, pages
= 0;
1290 struct aead_request
*aead_req
;
1291 struct sk_buff
*unused
;
1292 u8
*aad
, *iv
, *mem
= NULL
;
1293 struct scatterlist
*sgin
= NULL
;
1294 struct scatterlist
*sgout
= NULL
;
1295 const int data_len
= rxm
->full_len
- tls_ctx
->rx
.overhead_size
;
1297 if (*zc
&& (out_iov
|| out_sg
)) {
1299 n_sgout
= iov_iter_npages(out_iov
, INT_MAX
) + 1;
1301 n_sgout
= sg_nents(out_sg
);
1302 n_sgin
= skb_nsg(skb
, rxm
->offset
+ tls_ctx
->rx
.prepend_size
,
1303 rxm
->full_len
- tls_ctx
->rx
.prepend_size
);
1307 n_sgin
= skb_cow_data(skb
, 0, &unused
);
1313 /* Increment to accommodate AAD */
1314 n_sgin
= n_sgin
+ 1;
1316 nsg
= n_sgin
+ n_sgout
;
1318 aead_size
= sizeof(*aead_req
) + crypto_aead_reqsize(ctx
->aead_recv
);
1319 mem_size
= aead_size
+ (nsg
* sizeof(struct scatterlist
));
1320 mem_size
= mem_size
+ TLS_AAD_SPACE_SIZE
;
1321 mem_size
= mem_size
+ crypto_aead_ivsize(ctx
->aead_recv
);
1323 /* Allocate a single block of memory which contains
1324 * aead_req || sgin[] || sgout[] || aad || iv.
1325 * This order achieves correct alignment for aead_req, sgin, sgout.
1327 mem
= kmalloc(mem_size
, sk
->sk_allocation
);
1331 /* Segment the allocated memory */
1332 aead_req
= (struct aead_request
*)mem
;
1333 sgin
= (struct scatterlist
*)(mem
+ aead_size
);
1334 sgout
= sgin
+ n_sgin
;
1335 aad
= (u8
*)(sgout
+ n_sgout
);
1336 iv
= aad
+ TLS_AAD_SPACE_SIZE
;
1339 err
= skb_copy_bits(skb
, rxm
->offset
+ TLS_HEADER_SIZE
,
1340 iv
+ TLS_CIPHER_AES_GCM_128_SALT_SIZE
,
1341 tls_ctx
->rx
.iv_size
);
1346 memcpy(iv
, tls_ctx
->rx
.iv
, TLS_CIPHER_AES_GCM_128_SALT_SIZE
);
1349 tls_make_aad(aad
, rxm
->full_len
- tls_ctx
->rx
.overhead_size
,
1350 tls_ctx
->rx
.rec_seq
, tls_ctx
->rx
.rec_seq_size
,
1354 sg_init_table(sgin
, n_sgin
);
1355 sg_set_buf(&sgin
[0], aad
, TLS_AAD_SPACE_SIZE
);
1356 err
= skb_to_sgvec(skb
, &sgin
[1],
1357 rxm
->offset
+ tls_ctx
->rx
.prepend_size
,
1358 rxm
->full_len
- tls_ctx
->rx
.prepend_size
);
1366 sg_init_table(sgout
, n_sgout
);
1367 sg_set_buf(&sgout
[0], aad
, TLS_AAD_SPACE_SIZE
);
1370 err
= tls_setup_from_iter(sk
, out_iov
, data_len
,
1371 &pages
, chunk
, &sgout
[1],
1374 goto fallback_to_reg_recv
;
1375 } else if (out_sg
) {
1376 memcpy(sgout
, out_sg
, n_sgout
* sizeof(*sgout
));
1378 goto fallback_to_reg_recv
;
1381 fallback_to_reg_recv
:
1388 /* Prepare and submit AEAD request */
1389 err
= tls_do_decryption(sk
, skb
, sgin
, sgout
, iv
,
1390 data_len
, aead_req
, *zc
);
1391 if (err
== -EINPROGRESS
)
1394 /* Release the pages in case iov was mapped to pages */
1395 for (; pages
> 0; pages
--)
1396 put_page(sg_page(&sgout
[pages
]));
1402 static int decrypt_skb_update(struct sock
*sk
, struct sk_buff
*skb
,
1403 struct iov_iter
*dest
, int *chunk
, bool *zc
)
1405 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1406 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1407 struct strp_msg
*rxm
= strp_msg(skb
);
1410 #ifdef CONFIG_TLS_DEVICE
1411 err
= tls_device_decrypted(sk
, skb
);
1415 if (!ctx
->decrypted
) {
1416 err
= decrypt_internal(sk
, skb
, dest
, NULL
, chunk
, zc
);
1418 if (err
== -EINPROGRESS
)
1419 tls_advance_record_sn(sk
, &tls_ctx
->rx
);
1427 rxm
->offset
+= tls_ctx
->rx
.prepend_size
;
1428 rxm
->full_len
-= tls_ctx
->rx
.overhead_size
;
1429 tls_advance_record_sn(sk
, &tls_ctx
->rx
);
1430 ctx
->decrypted
= true;
1431 ctx
->saved_data_ready(sk
);
1436 int decrypt_skb(struct sock
*sk
, struct sk_buff
*skb
,
1437 struct scatterlist
*sgout
)
1442 return decrypt_internal(sk
, skb
, NULL
, sgout
, &chunk
, &zc
);
1445 static bool tls_sw_advance_skb(struct sock
*sk
, struct sk_buff
*skb
,
1448 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1449 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1452 struct strp_msg
*rxm
= strp_msg(skb
);
1454 if (len
< rxm
->full_len
) {
1456 rxm
->full_len
-= len
;
1462 /* Finished with message */
1463 ctx
->recv_pkt
= NULL
;
1464 __strp_unpause(&ctx
->strp
);
1469 int tls_sw_recvmsg(struct sock
*sk
,
1476 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1477 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1478 struct sk_psock
*psock
;
1479 unsigned char control
;
1480 struct strp_msg
*rxm
;
1481 struct sk_buff
*skb
;
1484 int target
, err
= 0;
1486 bool is_kvec
= iov_iter_is_kvec(&msg
->msg_iter
);
1491 if (unlikely(flags
& MSG_ERRQUEUE
))
1492 return sock_recv_errqueue(sk
, msg
, len
, SOL_IP
, IP_RECVERR
);
1494 psock
= sk_psock_get(sk
);
1497 target
= sock_rcvlowat(sk
, flags
& MSG_WAITALL
, len
);
1498 timeo
= sock_rcvtimeo(sk
, flags
& MSG_DONTWAIT
);
1504 skb
= tls_wait_data(sk
, psock
, flags
, timeo
, &err
);
1507 int ret
= __tcp_bpf_recvmsg(sk
, psock
,
1519 rxm
= strp_msg(skb
);
1524 cerr
= put_cmsg(msg
, SOL_TLS
, TLS_GET_RECORD_TYPE
,
1525 sizeof(ctx
->control
), &ctx
->control
);
1527 control
= ctx
->control
;
1528 if (ctx
->control
!= TLS_RECORD_TYPE_DATA
) {
1529 if (cerr
|| msg
->msg_flags
& MSG_CTRUNC
) {
1534 } else if (control
!= ctx
->control
) {
1538 if (!ctx
->decrypted
) {
1539 int to_copy
= rxm
->full_len
- tls_ctx
->rx
.overhead_size
;
1541 if (!is_kvec
&& to_copy
<= len
&&
1542 likely(!(flags
& MSG_PEEK
)))
1545 err
= decrypt_skb_update(sk
, skb
, &msg
->msg_iter
,
1547 if (err
< 0 && err
!= -EINPROGRESS
) {
1548 tls_err_abort(sk
, EBADMSG
);
1552 if (err
== -EINPROGRESS
) {
1555 goto pick_next_record
;
1558 ctx
->decrypted
= true;
1562 chunk
= min_t(unsigned int, rxm
->full_len
, len
);
1564 err
= skb_copy_datagram_msg(skb
, rxm
->offset
, msg
,
1573 if (likely(!(flags
& MSG_PEEK
))) {
1574 u8 control
= ctx
->control
;
1576 /* For async, drop current skb reference */
1580 if (tls_sw_advance_skb(sk
, skb
, chunk
)) {
1581 /* Return full control message to
1582 * userspace before trying to parse
1583 * another message type
1585 msg
->msg_flags
|= MSG_EOR
;
1586 if (control
!= TLS_RECORD_TYPE_DATA
)
1592 /* MSG_PEEK right now cannot look beyond current skb
1593 * from strparser, meaning we cannot advance skb here
1594 * and thus unpause strparser since we'd loose original
1600 /* If we have a new message from strparser, continue now. */
1601 if (copied
>= target
&& !ctx
->recv_pkt
)
1607 /* Wait for all previously submitted records to be decrypted */
1608 smp_store_mb(ctx
->async_notify
, true);
1609 if (atomic_read(&ctx
->decrypt_pending
)) {
1610 err
= crypto_wait_req(-EINPROGRESS
, &ctx
->async_wait
);
1612 /* one of async decrypt failed */
1613 tls_err_abort(sk
, err
);
1617 reinit_completion(&ctx
->async_wait
.completion
);
1619 WRITE_ONCE(ctx
->async_notify
, false);
1624 sk_psock_put(sk
, psock
);
1625 return copied
? : err
;
1628 ssize_t
tls_sw_splice_read(struct socket
*sock
, loff_t
*ppos
,
1629 struct pipe_inode_info
*pipe
,
1630 size_t len
, unsigned int flags
)
1632 struct tls_context
*tls_ctx
= tls_get_ctx(sock
->sk
);
1633 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1634 struct strp_msg
*rxm
= NULL
;
1635 struct sock
*sk
= sock
->sk
;
1636 struct sk_buff
*skb
;
1645 timeo
= sock_rcvtimeo(sk
, flags
& MSG_DONTWAIT
);
1647 skb
= tls_wait_data(sk
, NULL
, flags
, timeo
, &err
);
1649 goto splice_read_end
;
1651 /* splice does not support reading control messages */
1652 if (ctx
->control
!= TLS_RECORD_TYPE_DATA
) {
1654 goto splice_read_end
;
1657 if (!ctx
->decrypted
) {
1658 err
= decrypt_skb_update(sk
, skb
, NULL
, &chunk
, &zc
);
1661 tls_err_abort(sk
, EBADMSG
);
1662 goto splice_read_end
;
1664 ctx
->decrypted
= true;
1666 rxm
= strp_msg(skb
);
1668 chunk
= min_t(unsigned int, rxm
->full_len
, len
);
1669 copied
= skb_splice_bits(skb
, sk
, rxm
->offset
, pipe
, chunk
, flags
);
1671 goto splice_read_end
;
1673 if (likely(!(flags
& MSG_PEEK
)))
1674 tls_sw_advance_skb(sk
, skb
, copied
);
1678 return copied
? : err
;
1681 bool tls_sw_stream_read(const struct sock
*sk
)
1683 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1684 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1685 bool ingress_empty
= true;
1686 struct sk_psock
*psock
;
1689 psock
= sk_psock(sk
);
1691 ingress_empty
= list_empty(&psock
->ingress_msg
);
1694 return !ingress_empty
|| ctx
->recv_pkt
;
1697 static int tls_read_size(struct strparser
*strp
, struct sk_buff
*skb
)
1699 struct tls_context
*tls_ctx
= tls_get_ctx(strp
->sk
);
1700 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1701 char header
[TLS_HEADER_SIZE
+ MAX_IV_SIZE
];
1702 struct strp_msg
*rxm
= strp_msg(skb
);
1703 size_t cipher_overhead
;
1704 size_t data_len
= 0;
1707 /* Verify that we have a full TLS header, or wait for more data */
1708 if (rxm
->offset
+ tls_ctx
->rx
.prepend_size
> skb
->len
)
1711 /* Sanity-check size of on-stack buffer. */
1712 if (WARN_ON(tls_ctx
->rx
.prepend_size
> sizeof(header
))) {
1717 /* Linearize header to local buffer */
1718 ret
= skb_copy_bits(skb
, rxm
->offset
, header
, tls_ctx
->rx
.prepend_size
);
1723 ctx
->control
= header
[0];
1725 data_len
= ((header
[4] & 0xFF) | (header
[3] << 8));
1727 cipher_overhead
= tls_ctx
->rx
.tag_size
+ tls_ctx
->rx
.iv_size
;
1729 if (data_len
> TLS_MAX_PAYLOAD_SIZE
+ cipher_overhead
) {
1733 if (data_len
< cipher_overhead
) {
1738 if (header
[1] != TLS_VERSION_MINOR(tls_ctx
->crypto_recv
.info
.version
) ||
1739 header
[2] != TLS_VERSION_MAJOR(tls_ctx
->crypto_recv
.info
.version
)) {
1744 #ifdef CONFIG_TLS_DEVICE
1745 handle_device_resync(strp
->sk
, TCP_SKB_CB(skb
)->seq
+ rxm
->offset
,
1746 *(u64
*)tls_ctx
->rx
.rec_seq
);
1748 return data_len
+ TLS_HEADER_SIZE
;
1751 tls_err_abort(strp
->sk
, ret
);
1756 static void tls_queue(struct strparser
*strp
, struct sk_buff
*skb
)
1758 struct tls_context
*tls_ctx
= tls_get_ctx(strp
->sk
);
1759 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1761 ctx
->decrypted
= false;
1763 ctx
->recv_pkt
= skb
;
1766 ctx
->saved_data_ready(strp
->sk
);
1769 static void tls_data_ready(struct sock
*sk
)
1771 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1772 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1773 struct sk_psock
*psock
;
1775 strp_data_ready(&ctx
->strp
);
1777 psock
= sk_psock_get(sk
);
1778 if (psock
&& !list_empty(&psock
->ingress_msg
)) {
1779 ctx
->saved_data_ready(sk
);
1780 sk_psock_put(sk
, psock
);
1784 void tls_sw_free_resources_tx(struct sock
*sk
)
1786 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1787 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
1788 struct tls_rec
*rec
, *tmp
;
1790 /* Wait for any pending async encryptions to complete */
1791 smp_store_mb(ctx
->async_notify
, true);
1792 if (atomic_read(&ctx
->encrypt_pending
))
1793 crypto_wait_req(-EINPROGRESS
, &ctx
->async_wait
);
1795 cancel_delayed_work_sync(&ctx
->tx_work
.work
);
1797 /* Tx whatever records we can transmit and abandon the rest */
1798 tls_tx_records(sk
, -1);
1800 /* Free up un-sent records in tx_list. First, free
1801 * the partially sent record if any at head of tx_list.
1803 if (tls_ctx
->partially_sent_record
) {
1804 struct scatterlist
*sg
= tls_ctx
->partially_sent_record
;
1807 put_page(sg_page(sg
));
1808 sk_mem_uncharge(sk
, sg
->length
);
1815 tls_ctx
->partially_sent_record
= NULL
;
1817 rec
= list_first_entry(&ctx
->tx_list
,
1818 struct tls_rec
, list
);
1819 list_del(&rec
->list
);
1820 sk_msg_free(sk
, &rec
->msg_plaintext
);
1824 list_for_each_entry_safe(rec
, tmp
, &ctx
->tx_list
, list
) {
1825 list_del(&rec
->list
);
1826 sk_msg_free(sk
, &rec
->msg_encrypted
);
1827 sk_msg_free(sk
, &rec
->msg_plaintext
);
1831 crypto_free_aead(ctx
->aead_send
);
1832 tls_free_open_rec(sk
);
1837 void tls_sw_release_resources_rx(struct sock
*sk
)
1839 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1840 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1842 if (ctx
->aead_recv
) {
1843 kfree_skb(ctx
->recv_pkt
);
1844 ctx
->recv_pkt
= NULL
;
1845 crypto_free_aead(ctx
->aead_recv
);
1846 strp_stop(&ctx
->strp
);
1847 write_lock_bh(&sk
->sk_callback_lock
);
1848 sk
->sk_data_ready
= ctx
->saved_data_ready
;
1849 write_unlock_bh(&sk
->sk_callback_lock
);
1851 strp_done(&ctx
->strp
);
1856 void tls_sw_free_resources_rx(struct sock
*sk
)
1858 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1859 struct tls_sw_context_rx
*ctx
= tls_sw_ctx_rx(tls_ctx
);
1861 tls_sw_release_resources_rx(sk
);
1866 /* The work handler to transmitt the encrypted records in tx_list */
1867 static void tx_work_handler(struct work_struct
*work
)
1869 struct delayed_work
*delayed_work
= to_delayed_work(work
);
1870 struct tx_work
*tx_work
= container_of(delayed_work
,
1871 struct tx_work
, work
);
1872 struct sock
*sk
= tx_work
->sk
;
1873 struct tls_context
*tls_ctx
= tls_get_ctx(sk
);
1874 struct tls_sw_context_tx
*ctx
= tls_sw_ctx_tx(tls_ctx
);
1876 if (!test_and_clear_bit(BIT_TX_SCHEDULED
, &ctx
->tx_bitmask
))
1880 tls_tx_records(sk
, -1);
1884 int tls_set_sw_offload(struct sock
*sk
, struct tls_context
*ctx
, int tx
)
1886 struct tls_crypto_info
*crypto_info
;
1887 struct tls12_crypto_info_aes_gcm_128
*gcm_128_info
;
1888 struct tls_sw_context_tx
*sw_ctx_tx
= NULL
;
1889 struct tls_sw_context_rx
*sw_ctx_rx
= NULL
;
1890 struct cipher_context
*cctx
;
1891 struct crypto_aead
**aead
;
1892 struct strp_callbacks cb
;
1893 u16 nonce_size
, tag_size
, iv_size
, rec_seq_size
;
1903 if (!ctx
->priv_ctx_tx
) {
1904 sw_ctx_tx
= kzalloc(sizeof(*sw_ctx_tx
), GFP_KERNEL
);
1909 ctx
->priv_ctx_tx
= sw_ctx_tx
;
1912 (struct tls_sw_context_tx
*)ctx
->priv_ctx_tx
;
1915 if (!ctx
->priv_ctx_rx
) {
1916 sw_ctx_rx
= kzalloc(sizeof(*sw_ctx_rx
), GFP_KERNEL
);
1921 ctx
->priv_ctx_rx
= sw_ctx_rx
;
1924 (struct tls_sw_context_rx
*)ctx
->priv_ctx_rx
;
1929 crypto_init_wait(&sw_ctx_tx
->async_wait
);
1930 crypto_info
= &ctx
->crypto_send
.info
;
1932 aead
= &sw_ctx_tx
->aead_send
;
1933 INIT_LIST_HEAD(&sw_ctx_tx
->tx_list
);
1934 INIT_DELAYED_WORK(&sw_ctx_tx
->tx_work
.work
, tx_work_handler
);
1935 sw_ctx_tx
->tx_work
.sk
= sk
;
1937 crypto_init_wait(&sw_ctx_rx
->async_wait
);
1938 crypto_info
= &ctx
->crypto_recv
.info
;
1940 aead
= &sw_ctx_rx
->aead_recv
;
1943 switch (crypto_info
->cipher_type
) {
1944 case TLS_CIPHER_AES_GCM_128
: {
1945 nonce_size
= TLS_CIPHER_AES_GCM_128_IV_SIZE
;
1946 tag_size
= TLS_CIPHER_AES_GCM_128_TAG_SIZE
;
1947 iv_size
= TLS_CIPHER_AES_GCM_128_IV_SIZE
;
1948 iv
= ((struct tls12_crypto_info_aes_gcm_128
*)crypto_info
)->iv
;
1949 rec_seq_size
= TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE
;
1951 ((struct tls12_crypto_info_aes_gcm_128
*)crypto_info
)->rec_seq
;
1953 (struct tls12_crypto_info_aes_gcm_128
*)crypto_info
;
1961 /* Sanity-check the IV size for stack allocations. */
1962 if (iv_size
> MAX_IV_SIZE
|| nonce_size
> MAX_IV_SIZE
) {
1967 cctx
->prepend_size
= TLS_HEADER_SIZE
+ nonce_size
;
1968 cctx
->tag_size
= tag_size
;
1969 cctx
->overhead_size
= cctx
->prepend_size
+ cctx
->tag_size
;
1970 cctx
->iv_size
= iv_size
;
1971 cctx
->iv
= kmalloc(iv_size
+ TLS_CIPHER_AES_GCM_128_SALT_SIZE
,
1977 memcpy(cctx
->iv
, gcm_128_info
->salt
, TLS_CIPHER_AES_GCM_128_SALT_SIZE
);
1978 memcpy(cctx
->iv
+ TLS_CIPHER_AES_GCM_128_SALT_SIZE
, iv
, iv_size
);
1979 cctx
->rec_seq_size
= rec_seq_size
;
1980 cctx
->rec_seq
= kmemdup(rec_seq
, rec_seq_size
, GFP_KERNEL
);
1981 if (!cctx
->rec_seq
) {
1987 *aead
= crypto_alloc_aead("gcm(aes)", 0, 0);
1988 if (IS_ERR(*aead
)) {
1989 rc
= PTR_ERR(*aead
);
1995 ctx
->push_pending_record
= tls_sw_push_pending_record
;
1997 rc
= crypto_aead_setkey(*aead
, gcm_128_info
->key
,
1998 TLS_CIPHER_AES_GCM_128_KEY_SIZE
);
2002 rc
= crypto_aead_setauthsize(*aead
, cctx
->tag_size
);
2007 /* Set up strparser */
2008 memset(&cb
, 0, sizeof(cb
));
2009 cb
.rcv_msg
= tls_queue
;
2010 cb
.parse_msg
= tls_read_size
;
2012 strp_init(&sw_ctx_rx
->strp
, sk
, &cb
);
2014 write_lock_bh(&sk
->sk_callback_lock
);
2015 sw_ctx_rx
->saved_data_ready
= sk
->sk_data_ready
;
2016 sk
->sk_data_ready
= tls_data_ready
;
2017 write_unlock_bh(&sk
->sk_callback_lock
);
2019 strp_check_rcv(&sw_ctx_rx
->strp
);
2025 crypto_free_aead(*aead
);
2028 kfree(cctx
->rec_seq
);
2029 cctx
->rec_seq
= NULL
;
2035 kfree(ctx
->priv_ctx_tx
);
2036 ctx
->priv_ctx_tx
= NULL
;
2038 kfree(ctx
->priv_ctx_rx
);
2039 ctx
->priv_ctx_rx
= NULL
;