]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/net/ethernet/sfc/rx.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
[mirror_ubuntu-jammy-kernel.git] / drivers / net / ethernet / sfc / rx.c
CommitLineData
d2912cb1 1// SPDX-License-Identifier: GPL-2.0-only
8ceee660 2/****************************************************************************
f7a6d2c4 3 * Driver for Solarflare network controllers and boards
8ceee660 4 * Copyright 2005-2006 Fen Systems Ltd.
f7a6d2c4 5 * Copyright 2005-2013 Solarflare Communications Inc.
8ceee660
BH
6 */
7
8#include <linux/socket.h>
9#include <linux/in.h>
5a0e3ad6 10#include <linux/slab.h>
8ceee660 11#include <linux/ip.h>
c47b2d9d 12#include <linux/ipv6.h>
8ceee660
BH
13#include <linux/tcp.h>
14#include <linux/udp.h>
70c71606 15#include <linux/prefetch.h>
6eb07caf 16#include <linux/moduleparam.h>
2768935a 17#include <linux/iommu.h>
8ceee660
BH
18#include <net/ip.h>
19#include <net/checksum.h>
eb9a36be
CM
20#include <net/xdp.h>
21#include <linux/bpf_trace.h>
8ceee660 22#include "net_driver.h"
8ceee660 23#include "efx.h"
add72477 24#include "filter.h"
744093c9 25#include "nic.h"
3273c2e8 26#include "selftest.h"
8ceee660
BH
27#include "workarounds.h"
28
1648a23f
DP
29/* Preferred number of descriptors to fill at once */
30#define EFX_RX_PREFERRED_BATCH 8U
8ceee660 31
eb9a36be
CM
32/* Maximum rx prefix used by any architecture. */
33#define EFX_MAX_RX_PREFIX_SIZE 16
34
2768935a
DP
35/* Number of RX buffers to recycle pages for. When creating the RX page recycle
36 * ring, this number is divided by the number of buffers per page to calculate
37 * the number of pages to store in the RX page recycle ring.
38 */
39#define EFX_RECYCLE_RING_SIZE_IOMMU 4096
1648a23f 40#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH)
62b330ba 41
8ceee660 42/* Size of buffer allocated for skb header area. */
d4ef5b6f 43#define EFX_SKB_HEADERS 128u
8ceee660 44
8ceee660
BH
45/* This is the percentage fill level below which new RX descriptors
46 * will be added to the RX descriptor ring.
47 */
64235187 48static unsigned int rx_refill_threshold;
8ceee660 49
85740cdf
BH
50/* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */
51#define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \
52 EFX_RX_USR_BUF_SIZE)
53
8ceee660
BH
54/*
55 * RX maximum head room required.
56 *
85740cdf
BH
57 * This must be at least 1 to prevent overflow, plus one packet-worth
58 * to allow pipelined receives.
8ceee660 59 */
85740cdf 60#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS)
8ceee660 61
b184f16b 62static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf)
39c9cf07 63{
b184f16b 64 return page_address(buf->page) + buf->page_offset;
a526f140
SH
65}
66
43a3739d 67static inline u32 efx_rx_buf_hash(struct efx_nic *efx, const u8 *eh)
a526f140 68{
43a3739d
JC
69#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
70 return __le32_to_cpup((const __le32 *)(eh + efx->rx_packet_hash_offset));
39c9cf07 71#else
43a3739d 72 const u8 *data = eh + efx->rx_packet_hash_offset;
0beaca2c
BH
73 return (u32)data[0] |
74 (u32)data[1] << 8 |
75 (u32)data[2] << 16 |
76 (u32)data[3] << 24;
39c9cf07
BH
77#endif
78}
79
85740cdf
BH
80static inline struct efx_rx_buffer *
81efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf)
82{
83 if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask)))
84 return efx_rx_buffer(rx_queue, 0);
85 else
86 return rx_buf + 1;
87}
88
2768935a
DP
89static inline void efx_sync_rx_buffer(struct efx_nic *efx,
90 struct efx_rx_buffer *rx_buf,
91 unsigned int len)
92{
93 dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr, len,
94 DMA_FROM_DEVICE);
95}
96
1648a23f
DP
97void efx_rx_config_page_split(struct efx_nic *efx)
98{
2ec03014 99 efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align,
950c54df 100 EFX_RX_BUF_ALIGNMENT);
1648a23f
DP
101 efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 :
102 ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) /
eb9a36be 103 (efx->rx_page_buf_step + XDP_PACKET_HEADROOM));
1648a23f
DP
104 efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) /
105 efx->rx_bufs_per_page;
106 efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH,
107 efx->rx_bufs_per_page);
108}
109
2768935a
DP
110/* Check the RX page recycle ring for a page that can be reused. */
111static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue)
112{
113 struct efx_nic *efx = rx_queue->efx;
114 struct page *page;
115 struct efx_rx_page_state *state;
116 unsigned index;
117
118 index = rx_queue->page_remove & rx_queue->page_ptr_mask;
119 page = rx_queue->page_ring[index];
120 if (page == NULL)
121 return NULL;
122
123 rx_queue->page_ring[index] = NULL;
124 /* page_remove cannot exceed page_add. */
125 if (rx_queue->page_remove != rx_queue->page_add)
126 ++rx_queue->page_remove;
127
128 /* If page_count is 1 then we hold the only reference to this page. */
129 if (page_count(page) == 1) {
130 ++rx_queue->page_recycle_count;
131 return page;
132 } else {
133 state = page_address(page);
134 dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
135 PAGE_SIZE << efx->rx_buffer_order,
136 DMA_FROM_DEVICE);
137 put_page(page);
138 ++rx_queue->page_recycle_failed;
139 }
140
141 return NULL;
142}
143
8ceee660 144/**
97d48a10 145 * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers
8ceee660
BH
146 *
147 * @rx_queue: Efx RX queue
8ceee660 148 *
1648a23f
DP
149 * This allocates a batch of pages, maps them for DMA, and populates
150 * struct efx_rx_buffers for each one. Return a negative error code or
151 * 0 on success. If a single page can be used for multiple buffers,
152 * then the page will either be inserted fully, or not at all.
8ceee660 153 */
cce28794 154static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic)
8ceee660
BH
155{
156 struct efx_nic *efx = rx_queue->efx;
f7d6f379
SH
157 struct efx_rx_buffer *rx_buf;
158 struct page *page;
b590ace0 159 unsigned int page_offset;
62b330ba 160 struct efx_rx_page_state *state;
f7d6f379
SH
161 dma_addr_t dma_addr;
162 unsigned index, count;
163
1648a23f
DP
164 count = 0;
165 do {
2768935a
DP
166 page = efx_reuse_page(rx_queue);
167 if (page == NULL) {
453f85d4 168 page = alloc_pages(__GFP_COMP |
cce28794 169 (atomic ? GFP_ATOMIC : GFP_KERNEL),
2768935a
DP
170 efx->rx_buffer_order);
171 if (unlikely(page == NULL))
172 return -ENOMEM;
173 dma_addr =
174 dma_map_page(&efx->pci_dev->dev, page, 0,
175 PAGE_SIZE << efx->rx_buffer_order,
176 DMA_FROM_DEVICE);
177 if (unlikely(dma_mapping_error(&efx->pci_dev->dev,
178 dma_addr))) {
179 __free_pages(page, efx->rx_buffer_order);
180 return -EIO;
181 }
182 state = page_address(page);
183 state->dma_addr = dma_addr;
184 } else {
185 state = page_address(page);
186 dma_addr = state->dma_addr;
8ceee660 187 }
62b330ba 188
62b330ba 189 dma_addr += sizeof(struct efx_rx_page_state);
b590ace0 190 page_offset = sizeof(struct efx_rx_page_state);
f7d6f379 191
1648a23f 192 do {
eb9a36be
CM
193 page_offset += XDP_PACKET_HEADROOM;
194 dma_addr += XDP_PACKET_HEADROOM;
195
1648a23f
DP
196 index = rx_queue->added_count & rx_queue->ptr_mask;
197 rx_buf = efx_rx_buffer(rx_queue, index);
2ec03014 198 rx_buf->dma_addr = dma_addr + efx->rx_ip_align;
1648a23f 199 rx_buf->page = page;
2ec03014 200 rx_buf->page_offset = page_offset + efx->rx_ip_align;
1648a23f 201 rx_buf->len = efx->rx_dma_len;
179ea7f0 202 rx_buf->flags = 0;
1648a23f
DP
203 ++rx_queue->added_count;
204 get_page(page);
205 dma_addr += efx->rx_page_buf_step;
206 page_offset += efx->rx_page_buf_step;
207 } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE);
179ea7f0
BH
208
209 rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE;
1648a23f 210 } while (++count < efx->rx_pages_per_batch);
8ceee660 211
8ceee660
BH
212 return 0;
213}
214
2768935a
DP
215/* Unmap a DMA-mapped page. This function is only called for the final RX
216 * buffer in a page.
217 */
4d566063 218static void efx_unmap_rx_buffer(struct efx_nic *efx,
2768935a 219 struct efx_rx_buffer *rx_buf)
8ceee660 220{
2768935a
DP
221 struct page *page = rx_buf->page;
222
223 if (page) {
224 struct efx_rx_page_state *state = page_address(page);
225 dma_unmap_page(&efx->pci_dev->dev,
226 state->dma_addr,
227 PAGE_SIZE << efx->rx_buffer_order,
228 DMA_FROM_DEVICE);
8ceee660
BH
229 }
230}
231
9eb0a5d1
DP
232static void efx_free_rx_buffers(struct efx_rx_queue *rx_queue,
233 struct efx_rx_buffer *rx_buf,
234 unsigned int num_bufs)
8ceee660 235{
9eb0a5d1
DP
236 do {
237 if (rx_buf->page) {
238 put_page(rx_buf->page);
239 rx_buf->page = NULL;
240 }
241 rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
242 } while (--num_bufs);
8ceee660
BH
243}
244
2768935a
DP
245/* Attempt to recycle the page if there is an RX recycle ring; the page can
246 * only be added if this is the final RX buffer, to prevent pages being used in
247 * the descriptor ring and appearing in the recycle ring simultaneously.
248 */
249static void efx_recycle_rx_page(struct efx_channel *channel,
250 struct efx_rx_buffer *rx_buf)
8ceee660 251{
2768935a
DP
252 struct page *page = rx_buf->page;
253 struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
254 struct efx_nic *efx = rx_queue->efx;
255 unsigned index;
8ceee660 256
2768935a 257 /* Only recycle the page after processing the final buffer. */
179ea7f0 258 if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE))
62b330ba 259 return;
24455800 260
2768935a
DP
261 index = rx_queue->page_add & rx_queue->page_ptr_mask;
262 if (rx_queue->page_ring[index] == NULL) {
263 unsigned read_index = rx_queue->page_remove &
264 rx_queue->page_ptr_mask;
24455800 265
2768935a
DP
266 /* The next slot in the recycle ring is available, but
267 * increment page_remove if the read pointer currently
268 * points here.
269 */
270 if (read_index == index)
271 ++rx_queue->page_remove;
272 rx_queue->page_ring[index] = page;
273 ++rx_queue->page_add;
274 return;
275 }
276 ++rx_queue->page_recycle_full;
277 efx_unmap_rx_buffer(efx, rx_buf);
278 put_page(rx_buf->page);
24455800
SH
279}
280
2768935a
DP
281static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
282 struct efx_rx_buffer *rx_buf)
283{
284 /* Release the page reference we hold for the buffer. */
285 if (rx_buf->page)
286 put_page(rx_buf->page);
287
288 /* If this is the last buffer in a page, unmap and free it. */
179ea7f0 289 if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) {
2768935a 290 efx_unmap_rx_buffer(rx_queue->efx, rx_buf);
9eb0a5d1 291 efx_free_rx_buffers(rx_queue, rx_buf, 1);
2768935a
DP
292 }
293 rx_buf->page = NULL;
294}
295
296/* Recycle the pages that are used by buffers that have just been received. */
734d4e15
BH
297static void efx_recycle_rx_pages(struct efx_channel *channel,
298 struct efx_rx_buffer *rx_buf,
299 unsigned int n_frags)
24455800 300{
f7d12cdc 301 struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
24455800 302
85740cdf 303 do {
2768935a 304 efx_recycle_rx_page(channel, rx_buf);
85740cdf
BH
305 rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
306 } while (--n_frags);
24455800
SH
307}
308
734d4e15
BH
309static void efx_discard_rx_packet(struct efx_channel *channel,
310 struct efx_rx_buffer *rx_buf,
311 unsigned int n_frags)
312{
313 struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
314
315 efx_recycle_rx_pages(channel, rx_buf, n_frags);
316
9eb0a5d1 317 efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
734d4e15
BH
318}
319
8ceee660
BH
320/**
321 * efx_fast_push_rx_descriptors - push new RX descriptors quickly
322 * @rx_queue: RX descriptor queue
49ce9c2c 323 *
8ceee660 324 * This will aim to fill the RX descriptor queue up to
da9ca505 325 * @rx_queue->@max_fill. If there is insufficient atomic
90d683af
SH
326 * memory to do so, a slow fill will be scheduled.
327 *
328 * The caller must provide serialisation (none is used here). In practise,
329 * this means this function must run from the NAPI handler, or be called
330 * when NAPI is disabled.
8ceee660 331 */
cce28794 332void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic)
8ceee660 333{
1648a23f
DP
334 struct efx_nic *efx = rx_queue->efx;
335 unsigned int fill_level, batch_size;
f7d6f379 336 int space, rc = 0;
8ceee660 337
d8aec745
BH
338 if (!rx_queue->refill_enabled)
339 return;
340
90d683af 341 /* Calculate current fill level, and exit if we don't need to fill */
8ceee660 342 fill_level = (rx_queue->added_count - rx_queue->removed_count);
e01b16a7 343 EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries);
8ceee660 344 if (fill_level >= rx_queue->fast_fill_trigger)
24455800 345 goto out;
8ceee660
BH
346
347 /* Record minimum fill level */
b3475645 348 if (unlikely(fill_level < rx_queue->min_fill)) {
8ceee660
BH
349 if (fill_level)
350 rx_queue->min_fill = fill_level;
b3475645 351 }
8ceee660 352
1648a23f 353 batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page;
da9ca505 354 space = rx_queue->max_fill - fill_level;
e01b16a7 355 EFX_WARN_ON_ONCE_PARANOID(space < batch_size);
8ceee660 356
62776d03
BH
357 netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
358 "RX queue %d fast-filling descriptor ring from"
97d48a10 359 " level %d to level %d\n",
ba1e8a35 360 efx_rx_queue_index(rx_queue), fill_level,
97d48a10
AR
361 rx_queue->max_fill);
362
8ceee660
BH
363
364 do {
cce28794 365 rc = efx_init_rx_buffers(rx_queue, atomic);
f7d6f379
SH
366 if (unlikely(rc)) {
367 /* Ensure that we don't leave the rx queue empty */
50f444aa 368 efx_schedule_slow_fill(rx_queue);
f7d6f379 369 goto out;
8ceee660 370 }
1648a23f 371 } while ((space -= batch_size) >= batch_size);
8ceee660 372
62776d03
BH
373 netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
374 "RX queue %d fast-filled descriptor ring "
ba1e8a35 375 "to level %d\n", efx_rx_queue_index(rx_queue),
62776d03 376 rx_queue->added_count - rx_queue->removed_count);
8ceee660
BH
377
378 out:
24455800
SH
379 if (rx_queue->notified_count != rx_queue->added_count)
380 efx_nic_notify_rx_desc(rx_queue);
8ceee660
BH
381}
382
7aa1402e 383void efx_rx_slow_fill(struct timer_list *t)
8ceee660 384{
7aa1402e 385 struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill);
8ceee660 386
90d683af 387 /* Post an event to cause NAPI to run and refill the queue */
2ae75dac 388 efx_nic_generate_fill_event(rx_queue);
8ceee660 389 ++rx_queue->slow_fill_count;
8ceee660
BH
390}
391
4d566063
BH
392static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
393 struct efx_rx_buffer *rx_buf,
97d48a10 394 int len)
8ceee660
BH
395{
396 struct efx_nic *efx = rx_queue->efx;
397 unsigned max_len = rx_buf->len - efx->type->rx_buffer_padding;
398
399 if (likely(len <= max_len))
400 return;
401
402 /* The packet must be discarded, but this is only a fatal error
403 * if the caller indicated it was
404 */
db339569 405 rx_buf->flags |= EFX_RX_PKT_DISCARD;
8ceee660 406
5a6681e2
EC
407 if (net_ratelimit())
408 netif_err(efx, rx_err, efx->net_dev,
409 "RX queue %d overlength RX event (%#x > %#x)\n",
410 efx_rx_queue_index(rx_queue), len, max_len);
8ceee660 411
ba1e8a35 412 efx_rx_queue_channel(rx_queue)->n_rx_overlength++;
8ceee660
BH
413}
414
61321d92
BH
415/* Pass a received packet up through GRO. GRO can handle pages
416 * regardless of checksum state and skbs with a good checksum.
8ceee660 417 */
85740cdf
BH
418static void
419efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
420 unsigned int n_frags, u8 *eh)
8ceee660 421{
da3bc071 422 struct napi_struct *napi = &channel->napi_str;
97d48a10 423 struct efx_nic *efx = channel->efx;
97d48a10 424 struct sk_buff *skb;
8ceee660 425
97d48a10 426 skb = napi_get_frags(napi);
85740cdf 427 if (unlikely(!skb)) {
9eb0a5d1
DP
428 struct efx_rx_queue *rx_queue;
429
430 rx_queue = efx_channel_get_rx_queue(channel);
431 efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
97d48a10
AR
432 return;
433 }
76620aaf 434
97d48a10 435 if (efx->net_dev->features & NETIF_F_RXHASH)
c7cb38af
TH
436 skb_set_hash(skb, efx_rx_buf_hash(efx, eh),
437 PKT_HASH_TYPE_L3);
97d48a10
AR
438 skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
439 CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
da50ae2e 440 skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL);
8ceee660 441
85740cdf
BH
442 for (;;) {
443 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
444 rx_buf->page, rx_buf->page_offset,
445 rx_buf->len);
446 rx_buf->page = NULL;
447 skb->len += rx_buf->len;
448 if (skb_shinfo(skb)->nr_frags == n_frags)
449 break;
3eadb7b0 450
85740cdf
BH
451 rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
452 }
453
454 skb->data_len = skb->len;
455 skb->truesize += n_frags * efx->rx_buffer_truesize;
456
457 skb_record_rx_queue(skb, channel->rx_queue.core_index);
8ceee660 458
5e040d4b 459 napi_gro_frags(napi);
97d48a10 460}
1241e951 461
85740cdf 462/* Allocate and construct an SKB around page fragments */
97d48a10
AR
463static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
464 struct efx_rx_buffer *rx_buf,
85740cdf 465 unsigned int n_frags,
97d48a10
AR
466 u8 *eh, int hdr_len)
467{
468 struct efx_nic *efx = channel->efx;
469 struct sk_buff *skb;
18e1d2be 470
97d48a10 471 /* Allocate an SKB to store the headers */
2ccd0b19
BH
472 skb = netdev_alloc_skb(efx->net_dev,
473 efx->rx_ip_align + efx->rx_prefix_size +
474 hdr_len);
e4d112e4
EC
475 if (unlikely(skb == NULL)) {
476 atomic_inc(&efx->n_rx_noskb_drops);
97d48a10 477 return NULL;
e4d112e4 478 }
97d48a10 479
e01b16a7 480 EFX_WARN_ON_ONCE_PARANOID(rx_buf->len < hdr_len);
97d48a10 481
2ccd0b19
BH
482 memcpy(skb->data + efx->rx_ip_align, eh - efx->rx_prefix_size,
483 efx->rx_prefix_size + hdr_len);
484 skb_reserve(skb, efx->rx_ip_align + efx->rx_prefix_size);
485 __skb_put(skb, hdr_len);
97d48a10 486
85740cdf 487 /* Append the remaining page(s) onto the frag list */
97d48a10 488 if (rx_buf->len > hdr_len) {
85740cdf
BH
489 rx_buf->page_offset += hdr_len;
490 rx_buf->len -= hdr_len;
491
492 for (;;) {
493 skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
494 rx_buf->page, rx_buf->page_offset,
495 rx_buf->len);
496 rx_buf->page = NULL;
497 skb->len += rx_buf->len;
498 skb->data_len += rx_buf->len;
499 if (skb_shinfo(skb)->nr_frags == n_frags)
500 break;
501
502 rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
503 }
97d48a10
AR
504 } else {
505 __free_pages(rx_buf->page, efx->rx_buffer_order);
85740cdf
BH
506 rx_buf->page = NULL;
507 n_frags = 0;
18e1d2be 508 }
97d48a10 509
85740cdf 510 skb->truesize += n_frags * efx->rx_buffer_truesize;
97d48a10
AR
511
512 /* Move past the ethernet header */
513 skb->protocol = eth_type_trans(skb, efx->net_dev);
514
36763266
AR
515 skb_mark_napi_id(skb, &channel->napi_str);
516
97d48a10 517 return skb;
8ceee660
BH
518}
519
8ceee660 520void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
85740cdf 521 unsigned int n_frags, unsigned int len, u16 flags)
8ceee660
BH
522{
523 struct efx_nic *efx = rx_queue->efx;
ba1e8a35 524 struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
8ceee660 525 struct efx_rx_buffer *rx_buf;
8ceee660 526
8ccf3800
AR
527 rx_queue->rx_packets++;
528
8ceee660 529 rx_buf = efx_rx_buffer(rx_queue, index);
179ea7f0 530 rx_buf->flags |= flags;
8ceee660 531
85740cdf
BH
532 /* Validate the number of fragments and completed length */
533 if (n_frags == 1) {
3dced740
BH
534 if (!(flags & EFX_RX_PKT_PREFIX_LEN))
535 efx_rx_packet__check_len(rx_queue, rx_buf, len);
85740cdf 536 } else if (unlikely(n_frags > EFX_RX_MAX_FRAGS) ||
e8c68c0a
JC
537 unlikely(len <= (n_frags - 1) * efx->rx_dma_len) ||
538 unlikely(len > n_frags * efx->rx_dma_len) ||
85740cdf
BH
539 unlikely(!efx->rx_scatter)) {
540 /* If this isn't an explicit discard request, either
541 * the hardware or the driver is broken.
542 */
543 WARN_ON(!(len == 0 && rx_buf->flags & EFX_RX_PKT_DISCARD));
544 rx_buf->flags |= EFX_RX_PKT_DISCARD;
545 }
8ceee660 546
62776d03 547 netif_vdbg(efx, rx_status, efx->net_dev,
85740cdf 548 "RX queue %d received ids %x-%x len %d %s%s\n",
ba1e8a35 549 efx_rx_queue_index(rx_queue), index,
85740cdf 550 (index + n_frags - 1) & rx_queue->ptr_mask, len,
db339569
BH
551 (rx_buf->flags & EFX_RX_PKT_CSUMMED) ? " [SUMMED]" : "",
552 (rx_buf->flags & EFX_RX_PKT_DISCARD) ? " [DISCARD]" : "");
8ceee660 553
85740cdf
BH
554 /* Discard packet, if instructed to do so. Process the
555 * previous receive first.
556 */
db339569 557 if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) {
85740cdf 558 efx_rx_flush_packet(channel);
734d4e15 559 efx_discard_rx_packet(channel, rx_buf, n_frags);
85740cdf 560 return;
8ceee660
BH
561 }
562
3dced740 563 if (n_frags == 1 && !(flags & EFX_RX_PKT_PREFIX_LEN))
85740cdf
BH
564 rx_buf->len = len;
565
2768935a
DP
566 /* Release and/or sync the DMA mapping - assumes all RX buffers
567 * consumed in-order per RX queue.
8ceee660 568 */
2768935a 569 efx_sync_rx_buffer(efx, rx_buf, rx_buf->len);
8ceee660
BH
570
571 /* Prefetch nice and early so data will (hopefully) be in cache by
572 * the time we look at it.
573 */
5036b7c7 574 prefetch(efx_rx_buf_va(rx_buf));
8ceee660 575
43a3739d
JC
576 rx_buf->page_offset += efx->rx_prefix_size;
577 rx_buf->len -= efx->rx_prefix_size;
85740cdf
BH
578
579 if (n_frags > 1) {
580 /* Release/sync DMA mapping for additional fragments.
581 * Fix length for last fragment.
582 */
583 unsigned int tail_frags = n_frags - 1;
584
585 for (;;) {
586 rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
587 if (--tail_frags == 0)
588 break;
e8c68c0a 589 efx_sync_rx_buffer(efx, rx_buf, efx->rx_dma_len);
85740cdf 590 }
e8c68c0a 591 rx_buf->len = len - (n_frags - 1) * efx->rx_dma_len;
2768935a 592 efx_sync_rx_buffer(efx, rx_buf, rx_buf->len);
85740cdf 593 }
b74e3e8c 594
734d4e15 595 /* All fragments have been DMA-synced, so recycle pages. */
2768935a 596 rx_buf = efx_rx_buffer(rx_queue, index);
734d4e15 597 efx_recycle_rx_pages(channel, rx_buf, n_frags);
2768935a 598
8ceee660
BH
599 /* Pipeline receives so that we give time for packet headers to be
600 * prefetched into cache.
601 */
ff734ef4 602 efx_rx_flush_packet(channel);
85740cdf
BH
603 channel->rx_pkt_n_frags = n_frags;
604 channel->rx_pkt_index = index;
8ceee660
BH
605}
606
97d48a10 607static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
85740cdf
BH
608 struct efx_rx_buffer *rx_buf,
609 unsigned int n_frags)
1ddceb4c
BH
610{
611 struct sk_buff *skb;
97d48a10 612 u16 hdr_len = min_t(u16, rx_buf->len, EFX_SKB_HEADERS);
1ddceb4c 613
85740cdf 614 skb = efx_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len);
97d48a10 615 if (unlikely(skb == NULL)) {
9eb0a5d1
DP
616 struct efx_rx_queue *rx_queue;
617
618 rx_queue = efx_channel_get_rx_queue(channel);
619 efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
97d48a10
AR
620 return;
621 }
622 skb_record_rx_queue(skb, channel->rx_queue.core_index);
1ddceb4c
BH
623
624 /* Set the SKB flags */
625 skb_checksum_none_assert(skb);
da50ae2e 626 if (likely(rx_buf->flags & EFX_RX_PKT_CSUMMED)) {
c99dffc4 627 skb->ip_summed = CHECKSUM_UNNECESSARY;
da50ae2e
JC
628 skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL);
629 }
1ddceb4c 630
bd9a265d
JC
631 efx_rx_skb_attach_timestamp(channel, skb);
632
c31e5f9f 633 if (channel->type->receive_skb)
4a74dc65 634 if (channel->type->receive_skb(channel, skb))
97d48a10 635 return;
4a74dc65
BH
636
637 /* Pass the packet up */
e090bfb9
EC
638 if (channel->rx_list != NULL)
639 /* Add to list, will pass up later */
640 list_add_tail(&skb->list, channel->rx_list);
641 else
642 /* No list, so pass it up now */
643 netif_receive_skb(skb);
1ddceb4c
BH
644}
645
eb9a36be
CM
646/** efx_do_xdp: perform XDP processing on a received packet
647 *
648 * Returns true if packet should still be delivered.
649 */
650static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel,
651 struct efx_rx_buffer *rx_buf, u8 **ehp)
652{
653 u8 rx_prefix[EFX_MAX_RX_PREFIX_SIZE];
654 struct efx_rx_queue *rx_queue;
655 struct bpf_prog *xdp_prog;
dfe44c1f 656 struct xdp_frame *xdpf;
eb9a36be
CM
657 struct xdp_buff xdp;
658 u32 xdp_act;
659 s16 offset;
660 int err;
661
662 rcu_read_lock();
663 xdp_prog = rcu_dereference(efx->xdp_prog);
664 if (!xdp_prog) {
665 rcu_read_unlock();
666 return true;
667 }
668
669 rx_queue = efx_channel_get_rx_queue(channel);
670
671 if (unlikely(channel->rx_pkt_n_frags > 1)) {
672 /* We can't do XDP on fragmented packets - drop. */
673 rcu_read_unlock();
674 efx_free_rx_buffers(rx_queue, rx_buf,
675 channel->rx_pkt_n_frags);
676 if (net_ratelimit())
677 netif_err(efx, rx_err, efx->net_dev,
678 "XDP is not possible with multiple receive fragments (%d)\n",
679 channel->rx_pkt_n_frags);
cd846bef 680 channel->n_rx_xdp_bad_drops++;
eb9a36be
CM
681 return false;
682 }
683
684 dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr,
685 rx_buf->len, DMA_FROM_DEVICE);
686
687 /* Save the rx prefix. */
688 EFX_WARN_ON_PARANOID(efx->rx_prefix_size > EFX_MAX_RX_PREFIX_SIZE);
689 memcpy(rx_prefix, *ehp - efx->rx_prefix_size,
690 efx->rx_prefix_size);
691
692 xdp.data = *ehp;
693 xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
694
695 /* No support yet for XDP metadata */
696 xdp_set_data_meta_invalid(&xdp);
697 xdp.data_end = xdp.data + rx_buf->len;
698 xdp.rxq = &rx_queue->xdp_rxq_info;
699
700 xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp);
701 rcu_read_unlock();
702
703 offset = (u8 *)xdp.data - *ehp;
704
705 switch (xdp_act) {
706 case XDP_PASS:
707 /* Fix up rx prefix. */
708 if (offset) {
709 *ehp += offset;
710 rx_buf->page_offset += offset;
711 rx_buf->len -= offset;
712 memcpy(*ehp - efx->rx_prefix_size, rx_prefix,
713 efx->rx_prefix_size);
714 }
715 break;
716
717 case XDP_TX:
dfe44c1f
CM
718 /* Buffer ownership passes to tx on success. */
719 xdpf = convert_to_xdp_frame(&xdp);
720 err = efx_xdp_tx_buffers(efx, 1, &xdpf, true);
721 if (unlikely(err != 1)) {
722 efx_free_rx_buffers(rx_queue, rx_buf, 1);
723 if (net_ratelimit())
724 netif_err(efx, rx_err, efx->net_dev,
725 "XDP TX failed (%d)\n", err);
cd846bef 726 channel->n_rx_xdp_bad_drops++;
9440a875 727 trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act);
cd846bef
CM
728 } else {
729 channel->n_rx_xdp_tx++;
dfe44c1f
CM
730 }
731 break;
eb9a36be
CM
732
733 case XDP_REDIRECT:
734 err = xdp_do_redirect(efx->net_dev, &xdp, xdp_prog);
735 if (unlikely(err)) {
736 efx_free_rx_buffers(rx_queue, rx_buf, 1);
737 if (net_ratelimit())
738 netif_err(efx, rx_err, efx->net_dev,
739 "XDP redirect failed (%d)\n", err);
cd846bef 740 channel->n_rx_xdp_bad_drops++;
9440a875 741 trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act);
cd846bef
CM
742 } else {
743 channel->n_rx_xdp_redirect++;
eb9a36be
CM
744 }
745 break;
746
747 default:
748 bpf_warn_invalid_xdp_action(xdp_act);
749 efx_free_rx_buffers(rx_queue, rx_buf, 1);
cd846bef 750 channel->n_rx_xdp_bad_drops++;
9440a875 751 trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act);
eb9a36be
CM
752 break;
753
754 case XDP_ABORTED:
755 trace_xdp_exception(efx->net_dev, xdp_prog, xdp_act);
756 /* Fall through */
757 case XDP_DROP:
758 efx_free_rx_buffers(rx_queue, rx_buf, 1);
cd846bef 759 channel->n_rx_xdp_drops++;
eb9a36be
CM
760 break;
761 }
762
763 return xdp_act == XDP_PASS;
764}
765
8ceee660 766/* Handle a received packet. Second half: Touches packet payload. */
85740cdf 767void __efx_rx_packet(struct efx_channel *channel)
8ceee660
BH
768{
769 struct efx_nic *efx = channel->efx;
85740cdf
BH
770 struct efx_rx_buffer *rx_buf =
771 efx_rx_buffer(&channel->rx_queue, channel->rx_pkt_index);
b74e3e8c 772 u8 *eh = efx_rx_buf_va(rx_buf);
604f6049 773
3dced740
BH
774 /* Read length from the prefix if necessary. This already
775 * excludes the length of the prefix itself.
776 */
777 if (rx_buf->flags & EFX_RX_PKT_PREFIX_LEN)
778 rx_buf->len = le16_to_cpup((__le16 *)
779 (eh + efx->rx_packet_len_offset));
780
3273c2e8
BH
781 /* If we're in loopback test, then pass the packet directly to the
782 * loopback layer, and free the rx_buf here
783 */
784 if (unlikely(efx->loopback_selftest)) {
9eb0a5d1
DP
785 struct efx_rx_queue *rx_queue;
786
a526f140 787 efx_loopback_rx_packet(efx, eh, rx_buf->len);
9eb0a5d1
DP
788 rx_queue = efx_channel_get_rx_queue(channel);
789 efx_free_rx_buffers(rx_queue, rx_buf,
790 channel->rx_pkt_n_frags);
85740cdf 791 goto out;
3273c2e8
BH
792 }
793
eb9a36be
CM
794 if (!efx_do_xdp(efx, channel, rx_buf, &eh))
795 goto out;
796
abfe9039 797 if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM)))
db339569 798 rx_buf->flags &= ~EFX_RX_PKT_CSUMMED;
ab3cf6d0 799
e7fe9491 800 if ((rx_buf->flags & EFX_RX_PKT_TCP) && !channel->type->receive_skb)
85740cdf 801 efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh);
1ddceb4c 802 else
85740cdf
BH
803 efx_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags);
804out:
805 channel->rx_pkt_n_frags = 0;
8ceee660
BH
806}
807
808int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
809{
810 struct efx_nic *efx = rx_queue->efx;
ecc910f5 811 unsigned int entries;
8ceee660
BH
812 int rc;
813
ecc910f5
SH
814 /* Create the smallest power-of-two aligned ring */
815 entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE);
e01b16a7 816 EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
ecc910f5
SH
817 rx_queue->ptr_mask = entries - 1;
818
62776d03 819 netif_dbg(efx, probe, efx->net_dev,
ecc910f5
SH
820 "creating RX queue %d size %#x mask %#x\n",
821 efx_rx_queue_index(rx_queue), efx->rxq_entries,
822 rx_queue->ptr_mask);
8ceee660
BH
823
824 /* Allocate RX buffers */
c2e4e25a 825 rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer),
ecc910f5 826 GFP_KERNEL);
8831da7b
BH
827 if (!rx_queue->buffer)
828 return -ENOMEM;
8ceee660 829
152b6a62 830 rc = efx_nic_probe_rx(rx_queue);
8831da7b
BH
831 if (rc) {
832 kfree(rx_queue->buffer);
833 rx_queue->buffer = NULL;
834 }
2768935a 835
8ceee660
BH
836 return rc;
837}
838
debd0034 839static void efx_init_rx_recycle_ring(struct efx_nic *efx,
840 struct efx_rx_queue *rx_queue)
2768935a
DP
841{
842 unsigned int bufs_in_recycle_ring, page_ring_size;
843
844 /* Set the RX recycle ring size */
845#ifdef CONFIG_PPC64
846 bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
847#else
636d73da 848 if (iommu_present(&pci_bus_type))
2768935a
DP
849 bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
850 else
851 bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU;
852#endif /* CONFIG_PPC64 */
853
854 page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring /
855 efx->rx_bufs_per_page);
856 rx_queue->page_ring = kcalloc(page_ring_size,
857 sizeof(*rx_queue->page_ring), GFP_KERNEL);
858 rx_queue->page_ptr_mask = page_ring_size - 1;
859}
860
bc3c90a2 861void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
8ceee660 862{
ecc910f5 863 struct efx_nic *efx = rx_queue->efx;
64235187 864 unsigned int max_fill, trigger, max_trigger;
eb9a36be 865 int rc = 0;
8ceee660 866
62776d03 867 netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
ba1e8a35 868 "initialising RX queue %d\n", efx_rx_queue_index(rx_queue));
8ceee660
BH
869
870 /* Initialise ptr fields */
871 rx_queue->added_count = 0;
872 rx_queue->notified_count = 0;
873 rx_queue->removed_count = 0;
874 rx_queue->min_fill = -1U;
2768935a
DP
875 efx_init_rx_recycle_ring(efx, rx_queue);
876
877 rx_queue->page_remove = 0;
878 rx_queue->page_add = rx_queue->page_ptr_mask + 1;
879 rx_queue->page_recycle_count = 0;
880 rx_queue->page_recycle_failed = 0;
881 rx_queue->page_recycle_full = 0;
8ceee660
BH
882
883 /* Initialise limit fields */
ecc910f5 884 max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM;
1648a23f
DP
885 max_trigger =
886 max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page;
64235187
DR
887 if (rx_refill_threshold != 0) {
888 trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
889 if (trigger > max_trigger)
890 trigger = max_trigger;
891 } else {
892 trigger = max_trigger;
893 }
8ceee660
BH
894
895 rx_queue->max_fill = max_fill;
896 rx_queue->fast_fill_trigger = trigger;
d8aec745 897 rx_queue->refill_enabled = true;
8ceee660 898
eb9a36be
CM
899 /* Initialise XDP queue information */
900 rc = xdp_rxq_info_reg(&rx_queue->xdp_rxq_info, efx->net_dev,
901 rx_queue->core_index);
902
903 if (rc) {
904 netif_err(efx, rx_err, efx->net_dev,
905 "Failure to initialise XDP queue information rc=%d\n",
906 rc);
907 efx->xdp_rxq_info_failed = true;
908 } else {
909 rx_queue->xdp_rxq_info_valid = true;
910 }
911
8ceee660 912 /* Set up RX descriptor ring */
152b6a62 913 efx_nic_init_rx(rx_queue);
8ceee660
BH
914}
915
916void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
917{
918 int i;
2768935a 919 struct efx_nic *efx = rx_queue->efx;
8ceee660
BH
920 struct efx_rx_buffer *rx_buf;
921
62776d03 922 netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
ba1e8a35 923 "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue));
8ceee660 924
90d683af 925 del_timer_sync(&rx_queue->slow_fill);
8ceee660 926
2768935a 927 /* Release RX buffers from the current read ptr to the write ptr */
8ceee660 928 if (rx_queue->buffer) {
2768935a
DP
929 for (i = rx_queue->removed_count; i < rx_queue->added_count;
930 i++) {
931 unsigned index = i & rx_queue->ptr_mask;
932 rx_buf = efx_rx_buffer(rx_queue, index);
8ceee660
BH
933 efx_fini_rx_buffer(rx_queue, rx_buf);
934 }
935 }
2768935a
DP
936
937 /* Unmap and release the pages in the recycle ring. Remove the ring. */
938 for (i = 0; i <= rx_queue->page_ptr_mask; i++) {
939 struct page *page = rx_queue->page_ring[i];
940 struct efx_rx_page_state *state;
941
942 if (page == NULL)
943 continue;
944
945 state = page_address(page);
946 dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
947 PAGE_SIZE << efx->rx_buffer_order,
948 DMA_FROM_DEVICE);
949 put_page(page);
950 }
951 kfree(rx_queue->page_ring);
952 rx_queue->page_ring = NULL;
eb9a36be
CM
953
954 if (rx_queue->xdp_rxq_info_valid)
955 xdp_rxq_info_unreg(&rx_queue->xdp_rxq_info);
956
957 rx_queue->xdp_rxq_info_valid = false;
8ceee660
BH
958}
959
960void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)
961{
62776d03 962 netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
ba1e8a35 963 "destroying RX queue %d\n", efx_rx_queue_index(rx_queue));
8ceee660 964
152b6a62 965 efx_nic_remove_rx(rx_queue);
8ceee660
BH
966
967 kfree(rx_queue->buffer);
968 rx_queue->buffer = NULL;
8ceee660
BH
969}
970
8ceee660 971
8ceee660
BH
972module_param(rx_refill_threshold, uint, 0444);
973MODULE_PARM_DESC(rx_refill_threshold,
64235187 974 "RX descriptor ring refill threshold (%)");
8ceee660 975
add72477
BH
976#ifdef CONFIG_RFS_ACCEL
977
3af0f342
EC
978static void efx_filter_rfs_work(struct work_struct *data)
979{
980 struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion,
981 work);
982 struct efx_nic *efx = netdev_priv(req->net_dev);
983 struct efx_channel *channel = efx_get_channel(efx, req->rxq_index);
f993740e 984 int slot_idx = req - efx->rps_slot;
f8d62037
EC
985 struct efx_arfs_rule *rule;
986 u16 arfs_id = 0;
3af0f342
EC
987 int rc;
988
494bef4c 989 rc = efx->type->filter_insert(efx, &req->spec, true);
ded8b9c7 990 if (rc >= 0)
8490e75c 991 /* Discard 'priority' part of EF10+ filter ID (mcdi_filters) */
ded8b9c7 992 rc %= efx->type->max_rx_ip_filters;
f8d62037
EC
993 if (efx->rps_hash_table) {
994 spin_lock_bh(&efx->rps_hash_lock);
995 rule = efx_rps_hash_find(efx, &req->spec);
996 /* The rule might have already gone, if someone else's request
997 * for the same spec was already worked and then expired before
998 * we got around to our work. In that case we have nothing
999 * tying us to an arfs_id, meaning that as soon as the filter
1000 * is considered for expiry it will be removed.
1001 */
1002 if (rule) {
1003 if (rc < 0)
1004 rule->filter_id = EFX_ARFS_FILTER_ID_ERROR;
1005 else
1006 rule->filter_id = rc;
1007 arfs_id = rule->arfs_id;
1008 }
1009 spin_unlock_bh(&efx->rps_hash_lock);
1010 }
3af0f342
EC
1011 if (rc >= 0) {
1012 /* Remember this so we can check whether to expire the filter
1013 * later.
1014 */
1015 mutex_lock(&efx->rps_mutex);
8490e75c
EC
1016 if (channel->rps_flow_id[rc] == RPS_FLOW_ID_INVALID)
1017 channel->rfs_filter_count++;
3af0f342 1018 channel->rps_flow_id[rc] = req->flow_id;
3af0f342
EC
1019 mutex_unlock(&efx->rps_mutex);
1020
1021 if (req->spec.ether_type == htons(ETH_P_IP))
1022 netif_info(efx, rx_status, efx->net_dev,
f8d62037 1023 "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d id %u]\n",
3af0f342
EC
1024 (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
1025 req->spec.rem_host, ntohs(req->spec.rem_port),
1026 req->spec.loc_host, ntohs(req->spec.loc_port),
f8d62037 1027 req->rxq_index, req->flow_id, rc, arfs_id);
3af0f342
EC
1028 else
1029 netif_info(efx, rx_status, efx->net_dev,
f8d62037 1030 "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d id %u]\n",
3af0f342
EC
1031 (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
1032 req->spec.rem_host, ntohs(req->spec.rem_port),
1033 req->spec.loc_host, ntohs(req->spec.loc_port),
f8d62037 1034 req->rxq_index, req->flow_id, rc, arfs_id);
ca70bd42 1035 channel->n_rfs_succeeded++;
0aa6608d
EC
1036 } else {
1037 if (req->spec.ether_type == htons(ETH_P_IP))
1038 netif_dbg(efx, rx_status, efx->net_dev,
1039 "failed to steer %s %pI4:%u:%pI4:%u to queue %u [flow %u rc %d id %u]\n",
1040 (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
1041 req->spec.rem_host, ntohs(req->spec.rem_port),
1042 req->spec.loc_host, ntohs(req->spec.loc_port),
1043 req->rxq_index, req->flow_id, rc, arfs_id);
1044 else
1045 netif_dbg(efx, rx_status, efx->net_dev,
1046 "failed to steer %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u rc %d id %u]\n",
1047 (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
1048 req->spec.rem_host, ntohs(req->spec.rem_port),
1049 req->spec.loc_host, ntohs(req->spec.loc_port),
1050 req->rxq_index, req->flow_id, rc, arfs_id);
ca70bd42 1051 channel->n_rfs_failed++;
0aa6608d
EC
1052 /* We're overloading the NIC's filter tables, so let's do a
1053 * chunk of extra expiry work.
1054 */
1055 __efx_filter_rfs_expire(channel, min(channel->rfs_filter_count,
1056 100u));
3af0f342
EC
1057 }
1058
1059 /* Release references */
f993740e 1060 clear_bit(slot_idx, &efx->rps_slot_map);
3af0f342 1061 dev_put(req->net_dev);
3af0f342
EC
1062}
1063
add72477
BH
1064int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
1065 u16 rxq_index, u32 flow_id)
1066{
1067 struct efx_nic *efx = netdev_priv(net_dev);
3af0f342 1068 struct efx_async_filter_insertion *req;
f8d62037 1069 struct efx_arfs_rule *rule;
68bb399e 1070 struct flow_keys fk;
f993740e 1071 int slot_idx;
f8d62037 1072 bool new;
f993740e 1073 int rc;
add72477 1074
f993740e
EC
1075 /* find a free slot */
1076 for (slot_idx = 0; slot_idx < EFX_RPS_MAX_IN_FLIGHT; slot_idx++)
1077 if (!test_and_set_bit(slot_idx, &efx->rps_slot_map))
1078 break;
1079 if (slot_idx >= EFX_RPS_MAX_IN_FLIGHT)
1080 return -EBUSY;
faf8dcc1 1081
f993740e
EC
1082 if (flow_id == RPS_FLOW_ID_INVALID) {
1083 rc = -EINVAL;
1084 goto out_clear;
1085 }
add72477 1086
f993740e
EC
1087 if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) {
1088 rc = -EPROTONOSUPPORT;
1089 goto out_clear;
1090 }
add72477 1091
f993740e
EC
1092 if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6)) {
1093 rc = -EPROTONOSUPPORT;
1094 goto out_clear;
1095 }
1096 if (fk.control.flags & FLOW_DIS_IS_FRAGMENT) {
1097 rc = -EPROTONOSUPPORT;
1098 goto out_clear;
1099 }
3af0f342 1100
f993740e 1101 req = efx->rps_slot + slot_idx;
3af0f342 1102 efx_filter_init_rx(&req->spec, EFX_FILTER_PRI_HINT,
add72477
BH
1103 efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
1104 rxq_index);
3af0f342 1105 req->spec.match_flags =
c47b2d9d
BH
1106 EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO |
1107 EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT |
1108 EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT;
3af0f342
EC
1109 req->spec.ether_type = fk.basic.n_proto;
1110 req->spec.ip_proto = fk.basic.ip_proto;
68bb399e
EC
1111
1112 if (fk.basic.n_proto == htons(ETH_P_IP)) {
3af0f342
EC
1113 req->spec.rem_host[0] = fk.addrs.v4addrs.src;
1114 req->spec.loc_host[0] = fk.addrs.v4addrs.dst;
c47b2d9d 1115 } else {
3af0f342
EC
1116 memcpy(req->spec.rem_host, &fk.addrs.v6addrs.src,
1117 sizeof(struct in6_addr));
1118 memcpy(req->spec.loc_host, &fk.addrs.v6addrs.dst,
1119 sizeof(struct in6_addr));
c47b2d9d
BH
1120 }
1121
3af0f342
EC
1122 req->spec.rem_port = fk.ports.src;
1123 req->spec.loc_port = fk.ports.dst;
add72477 1124
f8d62037
EC
1125 if (efx->rps_hash_table) {
1126 /* Add it to ARFS hash table */
1127 spin_lock(&efx->rps_hash_lock);
1128 rule = efx_rps_hash_add(efx, &req->spec, &new);
1129 if (!rule) {
1130 rc = -ENOMEM;
1131 goto out_unlock;
1132 }
1133 if (new)
1134 rule->arfs_id = efx->rps_next_id++ % RPS_NO_FILTER;
1135 rc = rule->arfs_id;
1136 /* Skip if existing or pending filter already does the right thing */
1137 if (!new && rule->rxq_index == rxq_index &&
1138 rule->filter_id >= EFX_ARFS_FILTER_ID_PENDING)
1139 goto out_unlock;
1140 rule->rxq_index = rxq_index;
1141 rule->filter_id = EFX_ARFS_FILTER_ID_PENDING;
1142 spin_unlock(&efx->rps_hash_lock);
1143 } else {
1144 /* Without an ARFS hash table, we just use arfs_id 0 for all
1145 * filters. This means if multiple flows hash to the same
1146 * flow_id, all but the most recently touched will be eligible
1147 * for expiry.
1148 */
1149 rc = 0;
1150 }
1151
1152 /* Queue the request */
3af0f342
EC
1153 dev_hold(req->net_dev = net_dev);
1154 INIT_WORK(&req->work, efx_filter_rfs_work);
1155 req->rxq_index = rxq_index;
1156 req->flow_id = flow_id;
1157 schedule_work(&req->work);
f8d62037
EC
1158 return rc;
1159out_unlock:
1160 spin_unlock(&efx->rps_hash_lock);
f993740e
EC
1161out_clear:
1162 clear_bit(slot_idx, &efx->rps_slot_map);
1163 return rc;
add72477
BH
1164}
1165
8490e75c 1166bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota)
add72477
BH
1167{
1168 bool (*expire_one)(struct efx_nic *efx, u32 flow_id, unsigned int index);
8490e75c
EC
1169 struct efx_nic *efx = channel->efx;
1170 unsigned int index, size, start;
add72477
BH
1171 u32 flow_id;
1172
3af0f342 1173 if (!mutex_trylock(&efx->rps_mutex))
add72477 1174 return false;
add72477 1175 expire_one = efx->type->filter_rfs_expire_one;
8490e75c
EC
1176 index = channel->rfs_expire_index;
1177 start = index;
add72477 1178 size = efx->type->max_rx_ip_filters;
8490e75c 1179 while (quota) {
faf8dcc1
JC
1180 flow_id = channel->rps_flow_id[index];
1181
8490e75c
EC
1182 if (flow_id != RPS_FLOW_ID_INVALID) {
1183 quota--;
1184 if (expire_one(efx, flow_id, index)) {
1185 netif_info(efx, rx_status, efx->net_dev,
1186 "expired filter %d [channel %u flow %u]\n",
1187 index, channel->channel, flow_id);
1188 channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID;
1189 channel->rfs_filter_count--;
1190 }
faf8dcc1 1191 }
8490e75c 1192 if (++index == size)
add72477 1193 index = 0;
8490e75c 1194 /* If we were called with a quota that exceeds the total number
0aa6608d
EC
1195 * of filters in the table (which shouldn't happen, but could
1196 * if two callers race), ensure that we don't loop forever -
1197 * stop when we've examined every row of the table.
8490e75c 1198 */
0aa6608d 1199 if (index == start)
8490e75c 1200 break;
add72477 1201 }
add72477 1202
8490e75c 1203 channel->rfs_expire_index = index;
3af0f342 1204 mutex_unlock(&efx->rps_mutex);
add72477
BH
1205 return true;
1206}
1207
1208#endif /* CONFIG_RFS_ACCEL */
b883d0bd
BH
1209
1210/**
1211 * efx_filter_is_mc_recipient - test whether spec is a multicast recipient
1212 * @spec: Specification to test
1213 *
1214 * Return: %true if the specification is a non-drop RX filter that
1215 * matches a local MAC address I/G bit value of 1 or matches a local
1216 * IPv4 or IPv6 address value in the respective multicast address
1217 * range. Otherwise %false.
1218 */
1219bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec)
1220{
1221 if (!(spec->flags & EFX_FILTER_FLAG_RX) ||
1222 spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP)
1223 return false;
1224
1225 if (spec->match_flags &
1226 (EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG) &&
1227 is_multicast_ether_addr(spec->loc_mac))
1228 return true;
1229
1230 if ((spec->match_flags &
1231 (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) ==
1232 (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) {
1233 if (spec->ether_type == htons(ETH_P_IP) &&
1234 ipv4_is_multicast(spec->loc_host[0]))
1235 return true;
1236 if (spec->ether_type == htons(ETH_P_IPV6) &&
1237 ((const u8 *)spec->loc_host)[0] == 0xff)
1238 return true;
1239 }
1240
1241 return false;
1242}