#include <linux/iopoll.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
+#include <net/xdp_sock_drv.h>
#define TSNEP_RX_OFFSET (max(NET_SKB_PAD, XDP_PACKET_HEADROOM) + NET_IP_ALIGN)
#define TSNEP_HEADROOM ALIGN(TSNEP_RX_OFFSET, 4)
#define TSNEP_MAX_RX_BUF_SIZE (PAGE_SIZE - TSNEP_HEADROOM - \
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+/* XSK buffer shall store at least Q-in-Q frame */
+#define TSNEP_XSK_RX_BUF_SIZE (ALIGN(TSNEP_RX_INLINE_METADATA_SIZE + \
+ ETH_FRAME_LEN + ETH_FCS_LEN + \
+ VLAN_HLEN * 2, 4))
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
#define DMA_ADDR_HIGH(dma_addr) ((u32)(((dma_addr) >> 32) & 0xFFFFFFFF))
for (i = 0; i < TSNEP_RING_SIZE; i++) {
entry = &rx->entry[i];
- if (entry->page)
+ if (!rx->xsk_pool && entry->page)
page_pool_put_full_page(rx->page_pool, entry->page,
false);
+ if (rx->xsk_pool && entry->xdp)
+ xsk_buff_free(entry->xdp);
+ /* xdp is union with page */
entry->page = NULL;
}
return rx->read - rx->write - 1;
}
+static void tsnep_rx_free_page_buffer(struct tsnep_rx *rx)
+{
+ struct page **page;
+
+ /* last entry of page_buffer is always zero, because ring cannot be
+ * filled completely
+ */
+ page = rx->page_buffer;
+ while (*page) {
+ page_pool_put_full_page(rx->page_pool, *page, false);
+ *page = NULL;
+ page++;
+ }
+}
+
+static int tsnep_rx_alloc_page_buffer(struct tsnep_rx *rx)
+{
+ int i;
+
+ /* alloc for all ring entries except the last one, because ring cannot
+ * be filled completely
+ */
+ for (i = 0; i < TSNEP_RING_SIZE - 1; i++) {
+ rx->page_buffer[i] = page_pool_dev_alloc_pages(rx->page_pool);
+ if (!rx->page_buffer[i]) {
+ tsnep_rx_free_page_buffer(rx);
+
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
static void tsnep_rx_set_page(struct tsnep_rx *rx, struct tsnep_rx_entry *entry,
struct page *page)
{
{
struct tsnep_rx_entry *entry = &rx->entry[index];
- /* TSNEP_MAX_RX_BUF_SIZE is a multiple of 4 */
+ /* TSNEP_MAX_RX_BUF_SIZE and TSNEP_XSK_RX_BUF_SIZE are multiple of 4 */
entry->properties = entry->len & TSNEP_DESC_LENGTH_MASK;
entry->properties |= TSNEP_DESC_INTERRUPT_FLAG;
if (index == rx->increment_owner_counter) {
return desc_refilled;
}
+static void tsnep_rx_set_xdp(struct tsnep_rx *rx, struct tsnep_rx_entry *entry,
+ struct xdp_buff *xdp)
+{
+ entry->xdp = xdp;
+ entry->len = TSNEP_XSK_RX_BUF_SIZE;
+ entry->dma = xsk_buff_xdp_get_dma(entry->xdp);
+ entry->desc->rx = __cpu_to_le64(entry->dma);
+}
+
+static void tsnep_rx_reuse_buffer_zc(struct tsnep_rx *rx, int index)
+{
+ struct tsnep_rx_entry *entry = &rx->entry[index];
+ struct tsnep_rx_entry *read = &rx->entry[rx->read];
+
+ tsnep_rx_set_xdp(rx, entry, read->xdp);
+ read->xdp = NULL;
+}
+
+static int tsnep_rx_alloc_zc(struct tsnep_rx *rx, int count, bool reuse)
+{
+ u32 allocated;
+ int i;
+
+ allocated = xsk_buff_alloc_batch(rx->xsk_pool, rx->xdp_batch, count);
+ for (i = 0; i < allocated; i++) {
+ int index = (rx->write + i) & TSNEP_RING_MASK;
+ struct tsnep_rx_entry *entry = &rx->entry[index];
+
+ tsnep_rx_set_xdp(rx, entry, rx->xdp_batch[i]);
+ tsnep_rx_activate(rx, index);
+ }
+ if (i == 0) {
+ rx->alloc_failed++;
+
+ if (reuse) {
+ tsnep_rx_reuse_buffer_zc(rx, rx->write);
+ tsnep_rx_activate(rx, rx->write);
+ }
+ }
+
+ if (i)
+ rx->write = (rx->write + i) & TSNEP_RING_MASK;
+
+ return i;
+}
+
+static void tsnep_rx_free_zc(struct tsnep_rx *rx)
+{
+ int i;
+
+ for (i = 0; i < TSNEP_RING_SIZE; i++) {
+ struct tsnep_rx_entry *entry = &rx->entry[i];
+
+ if (entry->xdp)
+ xsk_buff_free(entry->xdp);
+ entry->xdp = NULL;
+ }
+}
+
+static int tsnep_rx_refill_zc(struct tsnep_rx *rx, int count, bool reuse)
+{
+ int desc_refilled;
+
+ desc_refilled = tsnep_rx_alloc_zc(rx, count, reuse);
+ if (desc_refilled)
+ tsnep_rx_enable(rx);
+
+ return desc_refilled;
+}
+
static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog,
struct xdp_buff *xdp, int *status,
struct netdev_queue *tx_nq, struct tsnep_tx *tx)
length = xdp->data_end - xdp->data_hard_start - XDP_PACKET_HEADROOM;
act = bpf_prog_run_xdp(prog, xdp);
-
- /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */
- sync = xdp->data_end - xdp->data_hard_start - XDP_PACKET_HEADROOM;
- sync = max(sync, length);
-
switch (act) {
case XDP_PASS:
return false;
trace_xdp_exception(rx->adapter->netdev, prog, act);
fallthrough;
case XDP_DROP:
+ /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU
+ * touch
+ */
+ sync = xdp->data_end - xdp->data_hard_start -
+ XDP_PACKET_HEADROOM;
+ sync = max(sync, length);
page_pool_put_page(rx->page_pool, virt_to_head_page(xdp->data),
sync, true);
return true;
}
}
+static bool tsnep_xdp_run_prog_zc(struct tsnep_rx *rx, struct bpf_prog *prog,
+ struct xdp_buff *xdp, int *status,
+ struct netdev_queue *tx_nq,
+ struct tsnep_tx *tx)
+{
+ u32 act;
+
+ act = bpf_prog_run_xdp(prog, xdp);
+
+ /* XDP_REDIRECT is the main action for zero-copy */
+ if (likely(act == XDP_REDIRECT)) {
+ if (xdp_do_redirect(rx->adapter->netdev, xdp, prog) < 0)
+ goto out_failure;
+ *status |= TSNEP_XDP_REDIRECT;
+ return true;
+ }
+
+ switch (act) {
+ case XDP_PASS:
+ return false;
+ case XDP_TX:
+ if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx))
+ goto out_failure;
+ *status |= TSNEP_XDP_TX;
+ return true;
+ default:
+ bpf_warn_invalid_xdp_action(rx->adapter->netdev, prog, act);
+ fallthrough;
+ case XDP_ABORTED:
+out_failure:
+ trace_xdp_exception(rx->adapter->netdev, prog, act);
+ fallthrough;
+ case XDP_DROP:
+ xsk_buff_free(xdp);
+ return true;
+ }
+}
+
static void tsnep_finalize_xdp(struct tsnep_adapter *adapter, int status,
struct netdev_queue *tx_nq, struct tsnep_tx *tx)
{
return done;
}
+static int tsnep_rx_poll_zc(struct tsnep_rx *rx, struct napi_struct *napi,
+ int budget)
+{
+ struct tsnep_rx_entry *entry;
+ struct netdev_queue *tx_nq;
+ struct bpf_prog *prog;
+ struct tsnep_tx *tx;
+ int desc_available;
+ int xdp_status = 0;
+ struct page *page;
+ int done = 0;
+ int length;
+
+ desc_available = tsnep_rx_desc_available(rx);
+ prog = READ_ONCE(rx->adapter->xdp_prog);
+ if (prog) {
+ tx_nq = netdev_get_tx_queue(rx->adapter->netdev,
+ rx->tx_queue_index);
+ tx = &rx->adapter->tx[rx->tx_queue_index];
+ }
+
+ while (likely(done < budget) && (rx->read != rx->write)) {
+ entry = &rx->entry[rx->read];
+ if ((__le32_to_cpu(entry->desc_wb->properties) &
+ TSNEP_DESC_OWNER_COUNTER_MASK) !=
+ (entry->properties & TSNEP_DESC_OWNER_COUNTER_MASK))
+ break;
+ done++;
+
+ if (desc_available >= TSNEP_RING_RX_REFILL) {
+ bool reuse = desc_available >= TSNEP_RING_RX_REUSE;
+
+ desc_available -= tsnep_rx_refill_zc(rx, desc_available,
+ reuse);
+ if (!entry->xdp) {
+ /* buffer has been reused for refill to prevent
+ * empty RX ring, thus buffer cannot be used for
+ * RX processing
+ */
+ rx->read = (rx->read + 1) & TSNEP_RING_MASK;
+ desc_available++;
+
+ rx->dropped++;
+
+ continue;
+ }
+ }
+
+ /* descriptor properties shall be read first, because valid data
+ * is signaled there
+ */
+ dma_rmb();
+
+ prefetch(entry->xdp->data);
+ length = __le32_to_cpu(entry->desc_wb->properties) &
+ TSNEP_DESC_LENGTH_MASK;
+ xsk_buff_set_size(entry->xdp, length);
+ xsk_buff_dma_sync_for_cpu(entry->xdp, rx->xsk_pool);
+
+ /* RX metadata with timestamps is in front of actual data,
+ * subtract metadata size to get length of actual data and
+ * consider metadata size as offset of actual data during RX
+ * processing
+ */
+ length -= TSNEP_RX_INLINE_METADATA_SIZE;
+
+ rx->read = (rx->read + 1) & TSNEP_RING_MASK;
+ desc_available++;
+
+ if (prog) {
+ bool consume;
+
+ entry->xdp->data += TSNEP_RX_INLINE_METADATA_SIZE;
+ entry->xdp->data_meta += TSNEP_RX_INLINE_METADATA_SIZE;
+
+ consume = tsnep_xdp_run_prog_zc(rx, prog, entry->xdp,
+ &xdp_status, tx_nq, tx);
+ if (consume) {
+ rx->packets++;
+ rx->bytes += length;
+
+ entry->xdp = NULL;
+
+ continue;
+ }
+ }
+
+ page = page_pool_dev_alloc_pages(rx->page_pool);
+ if (page) {
+ memcpy(page_address(page) + TSNEP_RX_OFFSET,
+ entry->xdp->data - TSNEP_RX_INLINE_METADATA_SIZE,
+ length + TSNEP_RX_INLINE_METADATA_SIZE);
+ tsnep_rx_page(rx, napi, page, length);
+ } else {
+ rx->dropped++;
+ }
+ xsk_buff_free(entry->xdp);
+ entry->xdp = NULL;
+ }
+
+ if (xdp_status)
+ tsnep_finalize_xdp(rx->adapter, xdp_status, tx_nq, tx);
+
+ if (desc_available)
+ desc_available -= tsnep_rx_refill_zc(rx, desc_available, false);
+
+ if (xsk_uses_need_wakeup(rx->xsk_pool)) {
+ if (desc_available)
+ xsk_set_rx_need_wakeup(rx->xsk_pool);
+ else
+ xsk_clear_rx_need_wakeup(rx->xsk_pool);
+
+ return done;
+ }
+
+ return desc_available ? budget : done;
+}
+
static bool tsnep_rx_pending(struct tsnep_rx *rx)
{
struct tsnep_rx_entry *entry;
tsnep_rx_init(rx);
desc_available = tsnep_rx_desc_available(rx);
- retval = tsnep_rx_alloc(rx, desc_available, false);
+ if (rx->xsk_pool)
+ retval = tsnep_rx_alloc_zc(rx, desc_available, false);
+ else
+ retval = tsnep_rx_alloc(rx, desc_available, false);
if (retval != desc_available) {
- tsnep_rx_ring_cleanup(rx);
+ retval = -ENOMEM;
- return -ENOMEM;
+ goto alloc_failed;
+ }
+
+ /* prealloc pages to prevent allocation failures when XSK pool is
+ * disabled at runtime
+ */
+ if (rx->xsk_pool) {
+ retval = tsnep_rx_alloc_page_buffer(rx);
+ if (retval)
+ goto alloc_failed;
}
return 0;
+
+alloc_failed:
+ tsnep_rx_ring_cleanup(rx);
+ return retval;
}
static void tsnep_rx_close(struct tsnep_rx *rx)
{
+ if (rx->xsk_pool)
+ tsnep_rx_free_page_buffer(rx);
+
tsnep_rx_ring_cleanup(rx);
}
+static void tsnep_rx_reopen(struct tsnep_rx *rx)
+{
+ struct page **page = rx->page_buffer;
+ int i;
+
+ tsnep_rx_init(rx);
+
+ for (i = 0; i < TSNEP_RING_SIZE; i++) {
+ struct tsnep_rx_entry *entry = &rx->entry[i];
+
+ /* defined initial values for properties are required for
+ * correct owner counter checking
+ */
+ entry->desc->properties = 0;
+ entry->desc_wb->properties = 0;
+
+ /* prevent allocation failures by reusing kept pages */
+ if (*page) {
+ tsnep_rx_set_page(rx, entry, *page);
+ tsnep_rx_activate(rx, rx->write);
+ rx->write++;
+
+ *page = NULL;
+ page++;
+ }
+ }
+}
+
+static void tsnep_rx_reopen_xsk(struct tsnep_rx *rx)
+{
+ struct page **page = rx->page_buffer;
+ u32 allocated;
+ int i;
+
+ tsnep_rx_init(rx);
+
+ /* alloc all ring entries except the last one, because ring cannot be
+ * filled completely, as many buffers as possible is enough as wakeup is
+ * done if new buffers are available
+ */
+ allocated = xsk_buff_alloc_batch(rx->xsk_pool, rx->xdp_batch,
+ TSNEP_RING_SIZE - 1);
+
+ for (i = 0; i < TSNEP_RING_SIZE; i++) {
+ struct tsnep_rx_entry *entry = &rx->entry[i];
+
+ /* keep pages to prevent allocation failures when xsk is
+ * disabled
+ */
+ if (entry->page) {
+ *page = entry->page;
+ entry->page = NULL;
+
+ page++;
+ }
+
+ /* defined initial values for properties are required for
+ * correct owner counter checking
+ */
+ entry->desc->properties = 0;
+ entry->desc_wb->properties = 0;
+
+ if (allocated) {
+ tsnep_rx_set_xdp(rx, entry,
+ rx->xdp_batch[allocated - 1]);
+ tsnep_rx_activate(rx, rx->write);
+ rx->write++;
+
+ allocated--;
+ }
+ }
+}
+
static bool tsnep_pending(struct tsnep_queue *queue)
{
if (queue->tx && tsnep_tx_pending(queue->tx))
complete = tsnep_tx_poll(queue->tx, budget);
if (queue->rx) {
- done = tsnep_rx_poll(queue->rx, napi, budget);
+ done = queue->rx->xsk_pool ?
+ tsnep_rx_poll_zc(queue->rx, napi, budget) :
+ tsnep_rx_poll(queue->rx, napi, budget);
if (done >= budget)
complete = false;
}
tsnep_free_irq(queue, first);
- if (rx && xdp_rxq_info_is_reg(&rx->xdp_rxq))
- xdp_rxq_info_unreg(&rx->xdp_rxq);
+ if (rx) {
+ if (xdp_rxq_info_is_reg(&rx->xdp_rxq))
+ xdp_rxq_info_unreg(&rx->xdp_rxq);
+ if (xdp_rxq_info_is_reg(&rx->xdp_rxq_zc))
+ xdp_rxq_info_unreg(&rx->xdp_rxq_zc);
+ }
netif_napi_del(&queue->napi);
}
else
rx->tx_queue_index = 0;
+ /* prepare both memory models to eliminate possible registration
+ * errors when memory model is switched between page pool and
+ * XSK pool during runtime
+ */
retval = xdp_rxq_info_reg(&rx->xdp_rxq, adapter->netdev,
rx->queue_index, queue->napi.napi_id);
if (retval)
rx->page_pool);
if (retval)
goto failed;
+ retval = xdp_rxq_info_reg(&rx->xdp_rxq_zc, adapter->netdev,
+ rx->queue_index, queue->napi.napi_id);
+ if (retval)
+ goto failed;
+ retval = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq_zc,
+ MEM_TYPE_XSK_BUFF_POOL,
+ NULL);
+ if (retval)
+ goto failed;
+ if (rx->xsk_pool)
+ xsk_pool_set_rxq_info(rx->xsk_pool, &rx->xdp_rxq_zc);
}
retval = tsnep_request_irq(queue, first);
return 0;
}
+int tsnep_enable_xsk(struct tsnep_queue *queue, struct xsk_buff_pool *pool)
+{
+ bool running = netif_running(queue->adapter->netdev);
+ u32 frame_size;
+
+ frame_size = xsk_pool_get_rx_frame_size(pool);
+ if (frame_size < TSNEP_XSK_RX_BUF_SIZE)
+ return -EOPNOTSUPP;
+
+ queue->rx->page_buffer = kcalloc(TSNEP_RING_SIZE,
+ sizeof(*queue->rx->page_buffer),
+ GFP_KERNEL);
+ if (!queue->rx->page_buffer)
+ return -ENOMEM;
+ queue->rx->xdp_batch = kcalloc(TSNEP_RING_SIZE,
+ sizeof(*queue->rx->xdp_batch),
+ GFP_KERNEL);
+ if (!queue->rx->xdp_batch) {
+ kfree(queue->rx->page_buffer);
+ queue->rx->page_buffer = NULL;
+
+ return -ENOMEM;
+ }
+
+ xsk_pool_set_rxq_info(pool, &queue->rx->xdp_rxq_zc);
+
+ if (running)
+ tsnep_queue_disable(queue);
+
+ queue->rx->xsk_pool = pool;
+
+ if (running) {
+ tsnep_rx_reopen_xsk(queue->rx);
+ tsnep_queue_enable(queue);
+ }
+
+ return 0;
+}
+
+void tsnep_disable_xsk(struct tsnep_queue *queue)
+{
+ bool running = netif_running(queue->adapter->netdev);
+
+ if (running)
+ tsnep_queue_disable(queue);
+
+ tsnep_rx_free_zc(queue->rx);
+
+ queue->rx->xsk_pool = NULL;
+
+ if (running) {
+ tsnep_rx_reopen(queue->rx);
+ tsnep_queue_enable(queue);
+ }
+
+ kfree(queue->rx->xdp_batch);
+ queue->rx->xdp_batch = NULL;
+ kfree(queue->rx->page_buffer);
+ queue->rx->page_buffer = NULL;
+}
+
static netdev_tx_t tsnep_netdev_xmit_frame(struct sk_buff *skb,
struct net_device *netdev)
{
switch (bpf->command) {
case XDP_SETUP_PROG:
return tsnep_xdp_setup_prog(adapter, bpf->prog, bpf->extack);
+ case XDP_SETUP_XSK_POOL:
+ return tsnep_xdp_setup_pool(adapter, bpf->xsk.pool,
+ bpf->xsk.queue_id);
default:
return -EOPNOTSUPP;
}
return nxmit;
}
+static int tsnep_netdev_xsk_wakeup(struct net_device *dev, u32 queue_id,
+ u32 flags)
+{
+ struct tsnep_adapter *adapter = netdev_priv(dev);
+ struct tsnep_queue *queue;
+
+ if (queue_id >= adapter->num_rx_queues ||
+ queue_id >= adapter->num_tx_queues)
+ return -EINVAL;
+
+ queue = &adapter->queue[queue_id];
+
+ if (!napi_if_scheduled_mark_missed(&queue->napi))
+ napi_schedule(&queue->napi);
+
+ return 0;
+}
+
static const struct net_device_ops tsnep_netdev_ops = {
.ndo_open = tsnep_netdev_open,
.ndo_stop = tsnep_netdev_close,
.ndo_setup_tc = tsnep_tc_setup,
.ndo_bpf = tsnep_netdev_bpf,
.ndo_xdp_xmit = tsnep_netdev_xdp_xmit,
+ .ndo_xsk_wakeup = tsnep_netdev_xsk_wakeup,
};
static int tsnep_mac_init(struct tsnep_adapter *adapter)