1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* RxRPC recvmsg() implementation
4 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10 #include <linux/net.h>
11 #include <linux/skbuff.h>
12 #include <linux/export.h>
13 #include <linux/sched/signal.h>
16 #include <net/af_rxrpc.h>
17 #include "ar-internal.h"
20 * Post a call for attention by the socket or kernel service. Further
21 * notifications are suppressed by putting recvmsg_link on a dummy queue.
23 void rxrpc_notify_socket(struct rxrpc_call
*call
)
25 struct rxrpc_sock
*rx
;
28 _enter("%d", call
->debug_id
);
30 if (!list_empty(&call
->recvmsg_link
))
35 rx
= rcu_dereference(call
->socket
);
37 if (rx
&& sk
->sk_state
< RXRPC_CLOSE
) {
38 if (call
->notify_rx
) {
39 spin_lock_bh(&call
->notify_lock
);
40 call
->notify_rx(sk
, call
, call
->user_call_ID
);
41 spin_unlock_bh(&call
->notify_lock
);
43 write_lock_bh(&rx
->recvmsg_lock
);
44 if (list_empty(&call
->recvmsg_link
)) {
45 rxrpc_get_call(call
, rxrpc_call_got
);
46 list_add_tail(&call
->recvmsg_link
, &rx
->recvmsg_q
);
48 write_unlock_bh(&rx
->recvmsg_lock
);
50 if (!sock_flag(sk
, SOCK_DEAD
)) {
51 _debug("call %ps", sk
->sk_data_ready
);
52 sk
->sk_data_ready(sk
);
62 * Pass a call terminating message to userspace.
64 static int rxrpc_recvmsg_term(struct rxrpc_call
*call
, struct msghdr
*msg
)
69 switch (call
->completion
) {
70 case RXRPC_CALL_SUCCEEDED
:
72 if (rxrpc_is_service_call(call
))
73 ret
= put_cmsg(msg
, SOL_RXRPC
, RXRPC_ACK
, 0, &tmp
);
75 case RXRPC_CALL_REMOTELY_ABORTED
:
76 tmp
= call
->abort_code
;
77 ret
= put_cmsg(msg
, SOL_RXRPC
, RXRPC_ABORT
, 4, &tmp
);
79 case RXRPC_CALL_LOCALLY_ABORTED
:
80 tmp
= call
->abort_code
;
81 ret
= put_cmsg(msg
, SOL_RXRPC
, RXRPC_ABORT
, 4, &tmp
);
83 case RXRPC_CALL_NETWORK_ERROR
:
85 ret
= put_cmsg(msg
, SOL_RXRPC
, RXRPC_NET_ERROR
, 4, &tmp
);
87 case RXRPC_CALL_LOCAL_ERROR
:
89 ret
= put_cmsg(msg
, SOL_RXRPC
, RXRPC_LOCAL_ERROR
, 4, &tmp
);
92 pr_err("Invalid terminal call state %u\n", call
->state
);
97 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_terminal
, call
->rx_hard_ack
,
98 call
->rx_pkt_offset
, call
->rx_pkt_len
, ret
);
103 * Pass back notification of a new call. The call is added to the
104 * to-be-accepted list. This means that the next call to be accepted might not
105 * be the last call seen awaiting acceptance, but unless we leave this on the
106 * front of the queue and block all other messages until someone gives us a
107 * user_ID for it, there's not a lot we can do.
109 static int rxrpc_recvmsg_new_call(struct rxrpc_sock
*rx
,
110 struct rxrpc_call
*call
,
111 struct msghdr
*msg
, int flags
)
115 ret
= put_cmsg(msg
, SOL_RXRPC
, RXRPC_NEW_CALL
, 0, &tmp
);
117 if (ret
== 0 && !(flags
& MSG_PEEK
)) {
118 _debug("to be accepted");
119 write_lock_bh(&rx
->recvmsg_lock
);
120 list_del_init(&call
->recvmsg_link
);
121 write_unlock_bh(&rx
->recvmsg_lock
);
123 rxrpc_get_call(call
, rxrpc_call_got
);
124 write_lock(&rx
->call_lock
);
125 list_add_tail(&call
->accept_link
, &rx
->to_be_accepted
);
126 write_unlock(&rx
->call_lock
);
129 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_to_be_accepted
, 1, 0, 0, ret
);
134 * End the packet reception phase.
136 static void rxrpc_end_rx_phase(struct rxrpc_call
*call
, rxrpc_serial_t serial
)
138 _enter("%d,%s", call
->debug_id
, rxrpc_call_states
[call
->state
]);
140 trace_rxrpc_receive(call
, rxrpc_receive_end
, 0, call
->rx_top
);
141 ASSERTCMP(call
->rx_hard_ack
, ==, call
->rx_top
);
143 if (call
->state
== RXRPC_CALL_CLIENT_RECV_REPLY
) {
144 rxrpc_propose_ACK(call
, RXRPC_ACK_IDLE
, serial
, false, true,
145 rxrpc_propose_ack_terminal_ack
);
146 //rxrpc_send_ack_packet(call, false, NULL);
149 write_lock_bh(&call
->state_lock
);
151 switch (call
->state
) {
152 case RXRPC_CALL_CLIENT_RECV_REPLY
:
153 __rxrpc_call_completed(call
);
154 write_unlock_bh(&call
->state_lock
);
157 case RXRPC_CALL_SERVER_RECV_REQUEST
:
158 call
->tx_phase
= true;
159 call
->state
= RXRPC_CALL_SERVER_ACK_REQUEST
;
160 call
->expect_req_by
= jiffies
+ MAX_JIFFY_OFFSET
;
161 write_unlock_bh(&call
->state_lock
);
162 rxrpc_propose_ACK(call
, RXRPC_ACK_DELAY
, serial
, false, true,
163 rxrpc_propose_ack_processing_op
);
166 write_unlock_bh(&call
->state_lock
);
172 * Discard a packet we've used up and advance the Rx window by one.
174 static void rxrpc_rotate_rx_window(struct rxrpc_call
*call
)
176 struct rxrpc_skb_priv
*sp
;
178 rxrpc_serial_t serial
;
179 rxrpc_seq_t hard_ack
, top
;
183 _enter("%d", call
->debug_id
);
185 hard_ack
= call
->rx_hard_ack
;
186 top
= smp_load_acquire(&call
->rx_top
);
187 ASSERT(before(hard_ack
, top
));
190 ix
= hard_ack
& RXRPC_RXTX_BUFF_MASK
;
191 skb
= call
->rxtx_buffer
[ix
];
192 rxrpc_see_skb(skb
, rxrpc_skb_rx_rotated
);
194 flags
= sp
->hdr
.flags
;
195 serial
= sp
->hdr
.serial
;
196 if (call
->rxtx_annotations
[ix
] & RXRPC_RX_ANNO_JUMBO
)
197 serial
+= (call
->rxtx_annotations
[ix
] & RXRPC_RX_ANNO_JUMBO
) - 1;
199 call
->rxtx_buffer
[ix
] = NULL
;
200 call
->rxtx_annotations
[ix
] = 0;
201 /* Barrier against rxrpc_input_data(). */
202 smp_store_release(&call
->rx_hard_ack
, hard_ack
);
204 rxrpc_free_skb(skb
, rxrpc_skb_rx_freed
);
206 _debug("%u,%u,%02x", hard_ack
, top
, flags
);
207 trace_rxrpc_receive(call
, rxrpc_receive_rotate
, serial
, hard_ack
);
208 if (flags
& RXRPC_LAST_PACKET
) {
209 rxrpc_end_rx_phase(call
, serial
);
211 /* Check to see if there's an ACK that needs sending. */
212 if (after_eq(hard_ack
, call
->ackr_consumed
+ 2) ||
213 after_eq(top
, call
->ackr_seen
+ 2) ||
214 (hard_ack
== top
&& after(hard_ack
, call
->ackr_consumed
)))
215 rxrpc_propose_ACK(call
, RXRPC_ACK_DELAY
, serial
,
217 rxrpc_propose_ack_rotate_rx
);
218 if (call
->ackr_reason
&& call
->ackr_reason
!= RXRPC_ACK_DELAY
)
219 rxrpc_send_ack_packet(call
, false, NULL
);
224 * Decrypt and verify a (sub)packet. The packet's length may be changed due to
225 * padding, but if this is the case, the packet length will be resident in the
226 * socket buffer. Note that we can't modify the master skb info as the skb may
227 * be the home to multiple subpackets.
229 static int rxrpc_verify_packet(struct rxrpc_call
*call
, struct sk_buff
*skb
,
231 unsigned int offset
, unsigned int len
)
233 struct rxrpc_skb_priv
*sp
= rxrpc_skb(skb
);
234 rxrpc_seq_t seq
= sp
->hdr
.seq
;
235 u16 cksum
= sp
->hdr
.cksum
;
239 /* For all but the head jumbo subpacket, the security checksum is in a
240 * jumbo header immediately prior to the data.
242 if ((annotation
& RXRPC_RX_ANNO_JUMBO
) > 1) {
244 if (skb_copy_bits(skb
, offset
- 2, &tmp
, 2) < 0)
247 seq
+= (annotation
& RXRPC_RX_ANNO_JUMBO
) - 1;
250 return call
->conn
->security
->verify_packet(call
, skb
, offset
, len
,
255 * Locate the data within a packet. This is complicated by:
257 * (1) An skb may contain a jumbo packet - so we have to find the appropriate
260 * (2) The (sub)packets may be encrypted and, if so, the encrypted portion
261 * contains an extra header which includes the true length of the data,
262 * excluding any encrypted padding.
264 static int rxrpc_locate_data(struct rxrpc_call
*call
, struct sk_buff
*skb
,
266 unsigned int *_offset
, unsigned int *_len
)
268 unsigned int offset
= sizeof(struct rxrpc_wire_header
);
271 u8 annotation
= *_annotation
;
273 /* Locate the subpacket */
274 len
= skb
->len
- offset
;
275 if ((annotation
& RXRPC_RX_ANNO_JUMBO
) > 0) {
276 offset
+= (((annotation
& RXRPC_RX_ANNO_JUMBO
) - 1) *
277 RXRPC_JUMBO_SUBPKTLEN
);
278 len
= (annotation
& RXRPC_RX_ANNO_JLAST
) ?
279 skb
->len
- offset
: RXRPC_JUMBO_SUBPKTLEN
;
282 if (!(annotation
& RXRPC_RX_ANNO_VERIFIED
)) {
283 ret
= rxrpc_verify_packet(call
, skb
, annotation
, offset
, len
);
286 *_annotation
|= RXRPC_RX_ANNO_VERIFIED
;
291 call
->conn
->security
->locate_data(call
, skb
, _offset
, _len
);
296 * Deliver messages to a call. This keeps processing packets until the buffer
297 * is filled and we find either more DATA (returns 0) or the end of the DATA
298 * (returns 1). If more packets are required, it returns -EAGAIN.
300 static int rxrpc_recvmsg_data(struct socket
*sock
, struct rxrpc_call
*call
,
301 struct msghdr
*msg
, struct iov_iter
*iter
,
302 size_t len
, int flags
, size_t *_offset
)
304 struct rxrpc_skb_priv
*sp
;
306 rxrpc_seq_t hard_ack
, top
, seq
;
309 unsigned int rx_pkt_offset
, rx_pkt_len
;
310 int ix
, copy
, ret
= -EAGAIN
, ret2
;
312 if (test_and_clear_bit(RXRPC_CALL_RX_UNDERRUN
, &call
->flags
) &&
314 rxrpc_send_ack_packet(call
, false, NULL
);
316 rx_pkt_offset
= call
->rx_pkt_offset
;
317 rx_pkt_len
= call
->rx_pkt_len
;
319 if (call
->state
>= RXRPC_CALL_SERVER_ACK_REQUEST
) {
320 seq
= call
->rx_hard_ack
;
325 /* Barriers against rxrpc_input_data(). */
326 hard_ack
= call
->rx_hard_ack
;
328 while (top
= smp_load_acquire(&call
->rx_top
),
331 ix
= seq
& RXRPC_RXTX_BUFF_MASK
;
332 skb
= call
->rxtx_buffer
[ix
];
334 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_hole
, seq
,
335 rx_pkt_offset
, rx_pkt_len
, 0);
339 rxrpc_see_skb(skb
, rxrpc_skb_rx_seen
);
342 if (!(flags
& MSG_PEEK
))
343 trace_rxrpc_receive(call
, rxrpc_receive_front
,
344 sp
->hdr
.serial
, seq
);
347 sock_recv_timestamp(msg
, sock
->sk
, skb
);
349 if (rx_pkt_offset
== 0) {
350 ret2
= rxrpc_locate_data(call
, skb
,
351 &call
->rxtx_annotations
[ix
],
352 &rx_pkt_offset
, &rx_pkt_len
);
353 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_next
, seq
,
354 rx_pkt_offset
, rx_pkt_len
, ret2
);
360 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_cont
, seq
,
361 rx_pkt_offset
, rx_pkt_len
, 0);
364 /* We have to handle short, empty and used-up DATA packets. */
365 remain
= len
- *_offset
;
370 ret2
= skb_copy_datagram_iter(skb
, rx_pkt_offset
, iter
,
377 /* handle piecemeal consumption of data packets */
378 rx_pkt_offset
+= copy
;
383 if (rx_pkt_len
> 0) {
384 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_full
, seq
,
385 rx_pkt_offset
, rx_pkt_len
, 0);
386 ASSERTCMP(*_offset
, ==, len
);
391 /* The whole packet has been transferred. */
392 last
= sp
->hdr
.flags
& RXRPC_LAST_PACKET
;
393 if (!(flags
& MSG_PEEK
))
394 rxrpc_rotate_rx_window(call
);
399 ASSERTCMP(seq
, ==, READ_ONCE(call
->rx_top
));
408 if (!(flags
& MSG_PEEK
)) {
409 call
->rx_pkt_offset
= rx_pkt_offset
;
410 call
->rx_pkt_len
= rx_pkt_len
;
413 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_data_return
, seq
,
414 rx_pkt_offset
, rx_pkt_len
, ret
);
416 set_bit(RXRPC_CALL_RX_UNDERRUN
, &call
->flags
);
421 * Receive a message from an RxRPC socket
422 * - we need to be careful about two or more threads calling recvmsg
425 int rxrpc_recvmsg(struct socket
*sock
, struct msghdr
*msg
, size_t len
,
428 struct rxrpc_call
*call
;
429 struct rxrpc_sock
*rx
= rxrpc_sk(sock
->sk
);
437 trace_rxrpc_recvmsg(NULL
, rxrpc_recvmsg_enter
, 0, 0, 0, 0);
439 if (flags
& (MSG_OOB
| MSG_TRUNC
))
442 timeo
= sock_rcvtimeo(&rx
->sk
, flags
& MSG_DONTWAIT
);
447 /* Return immediately if a client socket has no outstanding calls */
448 if (RB_EMPTY_ROOT(&rx
->calls
) &&
449 list_empty(&rx
->recvmsg_q
) &&
450 rx
->sk
.sk_state
!= RXRPC_SERVER_LISTENING
) {
451 release_sock(&rx
->sk
);
455 if (list_empty(&rx
->recvmsg_q
)) {
462 release_sock(&rx
->sk
);
464 /* Wait for something to happen */
465 prepare_to_wait_exclusive(sk_sleep(&rx
->sk
), &wait
,
467 ret
= sock_error(&rx
->sk
);
471 if (list_empty(&rx
->recvmsg_q
)) {
472 if (signal_pending(current
))
473 goto wait_interrupted
;
474 trace_rxrpc_recvmsg(NULL
, rxrpc_recvmsg_wait
,
476 timeo
= schedule_timeout(timeo
);
478 finish_wait(sk_sleep(&rx
->sk
), &wait
);
482 /* Find the next call and dequeue it if we're not just peeking. If we
483 * do dequeue it, that comes with a ref that we will need to release.
485 write_lock_bh(&rx
->recvmsg_lock
);
486 l
= rx
->recvmsg_q
.next
;
487 call
= list_entry(l
, struct rxrpc_call
, recvmsg_link
);
488 if (!(flags
& MSG_PEEK
))
489 list_del_init(&call
->recvmsg_link
);
491 rxrpc_get_call(call
, rxrpc_call_got
);
492 write_unlock_bh(&rx
->recvmsg_lock
);
494 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_dequeue
, 0, 0, 0, 0);
496 /* We're going to drop the socket lock, so we need to lock the call
497 * against interference by sendmsg.
499 if (!mutex_trylock(&call
->user_mutex
)) {
501 if (flags
& MSG_DONTWAIT
)
502 goto error_requeue_call
;
504 if (mutex_lock_interruptible(&call
->user_mutex
) < 0)
505 goto error_requeue_call
;
508 release_sock(&rx
->sk
);
510 if (test_bit(RXRPC_CALL_RELEASED
, &call
->flags
))
513 if (test_bit(RXRPC_CALL_HAS_USERID
, &call
->flags
)) {
514 if (flags
& MSG_CMSG_COMPAT
) {
515 unsigned int id32
= call
->user_call_ID
;
517 ret
= put_cmsg(msg
, SOL_RXRPC
, RXRPC_USER_CALL_ID
,
518 sizeof(unsigned int), &id32
);
520 unsigned long idl
= call
->user_call_ID
;
522 ret
= put_cmsg(msg
, SOL_RXRPC
, RXRPC_USER_CALL_ID
,
523 sizeof(unsigned long), &idl
);
526 goto error_unlock_call
;
530 struct sockaddr_rxrpc
*srx
= msg
->msg_name
;
531 size_t len
= sizeof(call
->peer
->srx
);
533 memcpy(msg
->msg_name
, &call
->peer
->srx
, len
);
534 srx
->srx_service
= call
->service_id
;
535 msg
->msg_namelen
= len
;
538 switch (READ_ONCE(call
->state
)) {
539 case RXRPC_CALL_SERVER_ACCEPTING
:
540 ret
= rxrpc_recvmsg_new_call(rx
, call
, msg
, flags
);
542 case RXRPC_CALL_CLIENT_RECV_REPLY
:
543 case RXRPC_CALL_SERVER_RECV_REQUEST
:
544 case RXRPC_CALL_SERVER_ACK_REQUEST
:
545 ret
= rxrpc_recvmsg_data(sock
, call
, msg
, &msg
->msg_iter
, len
,
550 if (after(call
->rx_top
, call
->rx_hard_ack
) &&
551 call
->rxtx_buffer
[(call
->rx_hard_ack
+ 1) & RXRPC_RXTX_BUFF_MASK
])
552 rxrpc_notify_socket(call
);
560 goto error_unlock_call
;
562 if (call
->state
== RXRPC_CALL_COMPLETE
) {
563 ret
= rxrpc_recvmsg_term(call
, msg
);
565 goto error_unlock_call
;
566 if (!(flags
& MSG_PEEK
))
567 rxrpc_release_call(rx
, call
);
568 msg
->msg_flags
|= MSG_EOR
;
573 msg
->msg_flags
|= MSG_MORE
;
575 msg
->msg_flags
&= ~MSG_MORE
;
579 mutex_unlock(&call
->user_mutex
);
580 rxrpc_put_call(call
, rxrpc_call_put
);
581 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_return
, 0, 0, 0, ret
);
585 if (!(flags
& MSG_PEEK
)) {
586 write_lock_bh(&rx
->recvmsg_lock
);
587 list_add(&call
->recvmsg_link
, &rx
->recvmsg_q
);
588 write_unlock_bh(&rx
->recvmsg_lock
);
589 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_requeue
, 0, 0, 0, 0);
591 rxrpc_put_call(call
, rxrpc_call_put
);
594 release_sock(&rx
->sk
);
596 trace_rxrpc_recvmsg(call
, rxrpc_recvmsg_return
, 0, 0, 0, ret
);
600 ret
= sock_intr_errno(timeo
);
602 finish_wait(sk_sleep(&rx
->sk
), &wait
);
608 * rxrpc_kernel_recv_data - Allow a kernel service to receive data/info
609 * @sock: The socket that the call exists on
610 * @call: The call to send data through
611 * @iter: The buffer to receive into
612 * @want_more: True if more data is expected to be read
613 * @_abort: Where the abort code is stored if -ECONNABORTED is returned
614 * @_service: Where to store the actual service ID (may be upgraded)
616 * Allow a kernel service to receive data and pick up information about the
617 * state of a call. Returns 0 if got what was asked for and there's more
618 * available, 1 if we got what was asked for and we're at the end of the data
619 * and -EAGAIN if we need more data.
621 * Note that we may return -EAGAIN to drain empty packets at the end of the
622 * data, even if we've already copied over the requested data.
624 * *_abort should also be initialised to 0.
626 int rxrpc_kernel_recv_data(struct socket
*sock
, struct rxrpc_call
*call
,
627 struct iov_iter
*iter
,
628 bool want_more
, u32
*_abort
, u16
*_service
)
633 _enter("{%d,%s},%zu,%d",
634 call
->debug_id
, rxrpc_call_states
[call
->state
],
635 iov_iter_count(iter
), want_more
);
637 ASSERTCMP(call
->state
, !=, RXRPC_CALL_SERVER_ACCEPTING
);
639 mutex_lock(&call
->user_mutex
);
641 switch (READ_ONCE(call
->state
)) {
642 case RXRPC_CALL_CLIENT_RECV_REPLY
:
643 case RXRPC_CALL_SERVER_RECV_REQUEST
:
644 case RXRPC_CALL_SERVER_ACK_REQUEST
:
645 ret
= rxrpc_recvmsg_data(sock
, call
, NULL
, iter
,
646 iov_iter_count(iter
), 0,
651 /* We can only reach here with a partially full buffer if we
652 * have reached the end of the data. We must otherwise have a
653 * full buffer or have been given -EAGAIN.
656 if (iov_iter_count(iter
) > 0)
659 goto read_phase_complete
;
668 case RXRPC_CALL_COMPLETE
:
679 switch (call
->ackr_reason
) {
682 case RXRPC_ACK_DELAY
:
687 rxrpc_send_ack_packet(call
, false, NULL
);
691 *_service
= call
->service_id
;
692 mutex_unlock(&call
->user_mutex
);
693 _leave(" = %d [%zu,%d]", ret
, iov_iter_count(iter
), *_abort
);
697 trace_rxrpc_rx_eproto(call
, 0, tracepoint_string("short_data"));
701 trace_rxrpc_rx_eproto(call
, 0, tracepoint_string("excess_data"));
705 *_abort
= call
->abort_code
;
707 if (call
->completion
== RXRPC_CALL_SUCCEEDED
) {
709 if (iov_iter_count(iter
) > 0)
714 EXPORT_SYMBOL(rxrpc_kernel_recv_data
);
717 * rxrpc_kernel_get_reply_time - Get timestamp on first reply packet
718 * @sock: The socket that the call exists on
719 * @call: The call to query
720 * @_ts: Where to put the timestamp
722 * Retrieve the timestamp from the first DATA packet of the reply if it is
723 * in the ring. Returns true if successful, false if not.
725 bool rxrpc_kernel_get_reply_time(struct socket
*sock
, struct rxrpc_call
*call
,
729 rxrpc_seq_t hard_ack
, top
, seq
;
730 bool success
= false;
732 mutex_lock(&call
->user_mutex
);
734 if (READ_ONCE(call
->state
) != RXRPC_CALL_CLIENT_RECV_REPLY
)
737 hard_ack
= call
->rx_hard_ack
;
742 top
= smp_load_acquire(&call
->rx_top
);
746 skb
= call
->rxtx_buffer
[seq
& RXRPC_RXTX_BUFF_MASK
];
750 *_ts
= skb_get_ktime(skb
);
754 mutex_unlock(&call
->user_mutex
);
757 EXPORT_SYMBOL(rxrpc_kernel_get_reply_time
);