1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright(c) 2010-2014 Intel Corporation.
7 * This code is inspired from the book "Linux Device Drivers" by
8 * Alessandro Rubini and Jonathan Corbet, published by O'Reilly & Associates
11 #include <linux/device.h>
12 #include <linux/module.h>
13 #include <linux/version.h>
14 #include <linux/netdevice.h>
15 #include <linux/etherdevice.h> /* eth_type_trans */
16 #include <linux/skbuff.h>
17 #include <linux/kthread.h>
18 #include <linux/delay.h>
20 #include <rte_kni_common.h>
26 #define WD_TIMEOUT 5 /*jiffies */
28 #define KNI_WAIT_RESPONSE_TIMEOUT 300 /* 3 seconds */
30 /* typedef for rx function */
31 typedef void (*kni_net_rx_t
)(struct kni_dev
*kni
);
33 static void kni_net_rx_normal(struct kni_dev
*kni
);
35 /* kni rx function pointer, with default to normal rx */
36 static kni_net_rx_t kni_net_rx_func
= kni_net_rx_normal
;
38 /* physical address to kernel virtual address */
42 return phys_to_virt((unsigned long)pa
);
45 /* physical address to virtual address */
47 pa2va(void *pa
, struct rte_kni_mbuf
*m
)
51 va
= (void *)((unsigned long)pa
+
52 (unsigned long)m
->buf_addr
-
53 (unsigned long)m
->buf_physaddr
);
57 /* mbuf data kernel virtual address from mbuf kernel virtual address */
59 kva2data_kva(struct rte_kni_mbuf
*m
)
61 return phys_to_virt(m
->buf_physaddr
+ m
->data_off
);
64 /* virtual address to physical address */
66 va2pa(void *va
, struct rte_kni_mbuf
*m
)
70 pa
= (void *)((unsigned long)va
-
71 ((unsigned long)m
->buf_addr
-
72 (unsigned long)m
->buf_physaddr
));
77 * It can be called to process the request.
80 kni_net_process_request(struct kni_dev
*kni
, struct rte_kni_request
*req
)
88 pr_err("No kni instance or request\n");
92 mutex_lock(&kni
->sync_lock
);
95 memcpy(kni
->sync_kva
, req
, sizeof(struct rte_kni_request
));
96 num
= kni_fifo_put(kni
->req_q
, &kni
->sync_va
, 1);
98 pr_err("Cannot send to req_q\n");
103 ret_val
= wait_event_interruptible_timeout(kni
->wq
,
104 kni_fifo_count(kni
->resp_q
), 3 * HZ
);
105 if (signal_pending(current
) || ret_val
<= 0) {
109 num
= kni_fifo_get(kni
->resp_q
, (void **)&resp_va
, 1);
110 if (num
!= 1 || resp_va
!= kni
->sync_va
) {
111 /* This should never happen */
112 pr_err("No data in resp_q\n");
117 memcpy(req
, kni
->sync_kva
, sizeof(struct rte_kni_request
));
121 mutex_unlock(&kni
->sync_lock
);
129 kni_net_open(struct net_device
*dev
)
132 struct rte_kni_request req
;
133 struct kni_dev
*kni
= netdev_priv(dev
);
135 netif_start_queue(dev
);
136 if (dflt_carrier
== 1)
137 netif_carrier_on(dev
);
139 netif_carrier_off(dev
);
141 memset(&req
, 0, sizeof(req
));
142 req
.req_id
= RTE_KNI_REQ_CFG_NETWORK_IF
;
144 /* Setting if_up to non-zero means up */
146 ret
= kni_net_process_request(kni
, &req
);
148 return (ret
== 0) ? req
.result
: ret
;
152 kni_net_release(struct net_device
*dev
)
155 struct rte_kni_request req
;
156 struct kni_dev
*kni
= netdev_priv(dev
);
158 netif_stop_queue(dev
); /* can't transmit any more */
159 netif_carrier_off(dev
);
161 memset(&req
, 0, sizeof(req
));
162 req
.req_id
= RTE_KNI_REQ_CFG_NETWORK_IF
;
164 /* Setting if_up to 0 means down */
166 ret
= kni_net_process_request(kni
, &req
);
168 return (ret
== 0) ? req
.result
: ret
;
172 kni_fifo_trans_pa2va(struct kni_dev
*kni
,
173 struct rte_kni_fifo
*src_pa
, struct rte_kni_fifo
*dst_va
)
175 uint32_t ret
, i
, num_dst
, num_rx
;
178 num_dst
= kni_fifo_free_count(dst_va
);
182 num_rx
= min_t(uint32_t, num_dst
, MBUF_BURST_SZ
);
184 num_rx
= kni_fifo_get(src_pa
, kni
->pa
, num_rx
);
188 for (i
= 0; i
< num_rx
; i
++) {
189 kva
= pa2kva(kni
->pa
[i
]);
190 kni
->va
[i
] = pa2va(kni
->pa
[i
], kva
);
193 ret
= kni_fifo_put(dst_va
, kni
->va
, num_rx
);
195 /* Failing should not happen */
196 pr_err("Fail to enqueue entries into dst_va\n");
202 /* Try to release mbufs when kni release */
203 void kni_net_release_fifo_phy(struct kni_dev
*kni
)
205 /* release rx_q first, because it can't release in userspace */
206 kni_fifo_trans_pa2va(kni
, kni
->rx_q
, kni
->free_q
);
207 /* release alloc_q for speeding up kni release in userspace */
208 kni_fifo_trans_pa2va(kni
, kni
->alloc_q
, kni
->free_q
);
212 * Configuration changes (passed on by ifconfig)
215 kni_net_config(struct net_device
*dev
, struct ifmap
*map
)
217 if (dev
->flags
& IFF_UP
) /* can't act on a running interface */
220 /* ignore other fields */
225 * Transmit a packet (called by the kernel)
228 kni_net_tx(struct sk_buff
*skb
, struct net_device
*dev
)
232 struct kni_dev
*kni
= netdev_priv(dev
);
233 struct rte_kni_mbuf
*pkt_kva
= NULL
;
237 /* save the timestamp */
238 #ifdef HAVE_TRANS_START_HELPER
239 netif_trans_update(dev
);
241 dev
->trans_start
= jiffies
;
244 /* Check if the length of skb is less than mbuf size */
245 if (skb
->len
> kni
->mbuf_size
)
249 * Check if it has at least one free entry in tx_q and
250 * one entry in alloc_q.
252 if (kni_fifo_free_count(kni
->tx_q
) == 0 ||
253 kni_fifo_count(kni
->alloc_q
) == 0) {
255 * If no free entry in tx_q or no entry in alloc_q,
256 * drops skb and goes out.
261 /* dequeue a mbuf from alloc_q */
262 ret
= kni_fifo_get(kni
->alloc_q
, &pkt_pa
, 1);
263 if (likely(ret
== 1)) {
266 pkt_kva
= pa2kva(pkt_pa
);
267 data_kva
= kva2data_kva(pkt_kva
);
268 pkt_va
= pa2va(pkt_pa
, pkt_kva
);
271 memcpy(data_kva
, skb
->data
, len
);
272 if (unlikely(len
< ETH_ZLEN
)) {
273 memset(data_kva
+ len
, 0, ETH_ZLEN
- len
);
276 pkt_kva
->pkt_len
= len
;
277 pkt_kva
->data_len
= len
;
279 /* enqueue mbuf into tx_q */
280 ret
= kni_fifo_put(kni
->tx_q
, &pkt_va
, 1);
281 if (unlikely(ret
!= 1)) {
282 /* Failing should not happen */
283 pr_err("Fail to enqueue mbuf into tx_q\n");
287 /* Failing should not happen */
288 pr_err("Fail to dequeue mbuf from alloc_q\n");
292 /* Free skb and update statistics */
294 kni
->stats
.tx_bytes
+= len
;
295 kni
->stats
.tx_packets
++;
300 /* Free skb and update statistics */
302 kni
->stats
.tx_dropped
++;
308 * RX: normal working mode
311 kni_net_rx_normal(struct kni_dev
*kni
)
315 uint32_t i
, num_rx
, num_fq
;
316 struct rte_kni_mbuf
*kva
;
319 struct net_device
*dev
= kni
->net_dev
;
321 /* Get the number of free entries in free_q */
322 num_fq
= kni_fifo_free_count(kni
->free_q
);
324 /* No room on the free_q, bail out */
328 /* Calculate the number of entries to dequeue from rx_q */
329 num_rx
= min_t(uint32_t, num_fq
, MBUF_BURST_SZ
);
331 /* Burst dequeue from rx_q */
332 num_rx
= kni_fifo_get(kni
->rx_q
, kni
->pa
, num_rx
);
336 /* Transfer received packets to netif */
337 for (i
= 0; i
< num_rx
; i
++) {
338 kva
= pa2kva(kni
->pa
[i
]);
340 data_kva
= kva2data_kva(kva
);
341 kni
->va
[i
] = pa2va(kni
->pa
[i
], kva
);
343 skb
= dev_alloc_skb(len
+ 2);
345 /* Update statistics */
346 kni
->stats
.rx_dropped
++;
350 /* Align IP on 16B boundary */
353 if (kva
->nb_segs
== 1) {
354 memcpy(skb_put(skb
, len
), data_kva
, len
);
357 int kva_nb_segs
= kva
->nb_segs
;
359 for (nb_segs
= 0; nb_segs
< kva_nb_segs
; nb_segs
++) {
360 memcpy(skb_put(skb
, kva
->data_len
),
361 data_kva
, kva
->data_len
);
366 kva
= pa2kva(va2pa(kva
->next
, kva
));
367 data_kva
= kva2data_kva(kva
);
372 skb
->protocol
= eth_type_trans(skb
, dev
);
373 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
375 /* Call netif interface */
378 /* Update statistics */
379 kni
->stats
.rx_bytes
+= len
;
380 kni
->stats
.rx_packets
++;
383 /* Burst enqueue mbufs into free_q */
384 ret
= kni_fifo_put(kni
->free_q
, kni
->va
, num_rx
);
386 /* Failing should not happen */
387 pr_err("Fail to enqueue entries into free_q\n");
391 * RX: loopback with enqueue/dequeue fifos.
394 kni_net_rx_lo_fifo(struct kni_dev
*kni
)
398 uint32_t i
, num
, num_rq
, num_tq
, num_aq
, num_fq
;
399 struct rte_kni_mbuf
*kva
;
401 struct rte_kni_mbuf
*alloc_kva
;
402 void *alloc_data_kva
;
404 /* Get the number of entries in rx_q */
405 num_rq
= kni_fifo_count(kni
->rx_q
);
407 /* Get the number of free entrie in tx_q */
408 num_tq
= kni_fifo_free_count(kni
->tx_q
);
410 /* Get the number of entries in alloc_q */
411 num_aq
= kni_fifo_count(kni
->alloc_q
);
413 /* Get the number of free entries in free_q */
414 num_fq
= kni_fifo_free_count(kni
->free_q
);
416 /* Calculate the number of entries to be dequeued from rx_q */
417 num
= min(num_rq
, num_tq
);
418 num
= min(num
, num_aq
);
419 num
= min(num
, num_fq
);
420 num
= min_t(uint32_t, num
, MBUF_BURST_SZ
);
422 /* Return if no entry to dequeue from rx_q */
426 /* Burst dequeue from rx_q */
427 ret
= kni_fifo_get(kni
->rx_q
, kni
->pa
, num
);
429 return; /* Failing should not happen */
431 /* Dequeue entries from alloc_q */
432 ret
= kni_fifo_get(kni
->alloc_q
, kni
->alloc_pa
, num
);
436 for (i
= 0; i
< num
; i
++) {
437 kva
= pa2kva(kni
->pa
[i
]);
439 data_kva
= kva2data_kva(kva
);
440 kni
->va
[i
] = pa2va(kni
->pa
[i
], kva
);
442 alloc_kva
= pa2kva(kni
->alloc_pa
[i
]);
443 alloc_data_kva
= kva2data_kva(alloc_kva
);
444 kni
->alloc_va
[i
] = pa2va(kni
->alloc_pa
[i
], alloc_kva
);
446 memcpy(alloc_data_kva
, data_kva
, len
);
447 alloc_kva
->pkt_len
= len
;
448 alloc_kva
->data_len
= len
;
450 kni
->stats
.tx_bytes
+= len
;
451 kni
->stats
.rx_bytes
+= len
;
454 /* Burst enqueue mbufs into tx_q */
455 ret
= kni_fifo_put(kni
->tx_q
, kni
->alloc_va
, num
);
457 /* Failing should not happen */
458 pr_err("Fail to enqueue mbufs into tx_q\n");
461 /* Burst enqueue mbufs into free_q */
462 ret
= kni_fifo_put(kni
->free_q
, kni
->va
, num
);
464 /* Failing should not happen */
465 pr_err("Fail to enqueue mbufs into free_q\n");
468 * Update statistic, and enqueue/dequeue failure is impossible,
469 * as all queues are checked at first.
471 kni
->stats
.tx_packets
+= num
;
472 kni
->stats
.rx_packets
+= num
;
476 * RX: loopback with enqueue/dequeue fifos and sk buffer copies.
479 kni_net_rx_lo_fifo_skb(struct kni_dev
*kni
)
483 uint32_t i
, num_rq
, num_fq
, num
;
484 struct rte_kni_mbuf
*kva
;
487 struct net_device
*dev
= kni
->net_dev
;
489 /* Get the number of entries in rx_q */
490 num_rq
= kni_fifo_count(kni
->rx_q
);
492 /* Get the number of free entries in free_q */
493 num_fq
= kni_fifo_free_count(kni
->free_q
);
495 /* Calculate the number of entries to dequeue from rx_q */
496 num
= min(num_rq
, num_fq
);
497 num
= min_t(uint32_t, num
, MBUF_BURST_SZ
);
499 /* Return if no entry to dequeue from rx_q */
503 /* Burst dequeue mbufs from rx_q */
504 ret
= kni_fifo_get(kni
->rx_q
, kni
->pa
, num
);
508 /* Copy mbufs to sk buffer and then call tx interface */
509 for (i
= 0; i
< num
; i
++) {
510 kva
= pa2kva(kni
->pa
[i
]);
512 data_kva
= kva2data_kva(kva
);
513 kni
->va
[i
] = pa2va(kni
->pa
[i
], kva
);
515 skb
= dev_alloc_skb(len
+ 2);
517 /* Align IP on 16B boundary */
519 memcpy(skb_put(skb
, len
), data_kva
, len
);
521 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
525 /* Simulate real usage, allocate/copy skb twice */
526 skb
= dev_alloc_skb(len
+ 2);
528 kni
->stats
.rx_dropped
++;
532 /* Align IP on 16B boundary */
535 if (kva
->nb_segs
== 1) {
536 memcpy(skb_put(skb
, len
), data_kva
, len
);
539 int kva_nb_segs
= kva
->nb_segs
;
541 for (nb_segs
= 0; nb_segs
< kva_nb_segs
; nb_segs
++) {
542 memcpy(skb_put(skb
, kva
->data_len
),
543 data_kva
, kva
->data_len
);
548 kva
= pa2kva(va2pa(kva
->next
, kva
));
549 data_kva
= kva2data_kva(kva
);
554 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
556 kni
->stats
.rx_bytes
+= len
;
557 kni
->stats
.rx_packets
++;
559 /* call tx interface */
560 kni_net_tx(skb
, dev
);
563 /* enqueue all the mbufs from rx_q into free_q */
564 ret
= kni_fifo_put(kni
->free_q
, kni
->va
, num
);
566 /* Failing should not happen */
567 pr_err("Fail to enqueue mbufs into free_q\n");
572 kni_net_rx(struct kni_dev
*kni
)
575 * It doesn't need to check if it is NULL pointer,
576 * as it has a default value
578 (*kni_net_rx_func
)(kni
);
582 * Deal with a transmit timeout.
585 kni_net_tx_timeout(struct net_device
*dev
)
587 struct kni_dev
*kni
= netdev_priv(dev
);
589 pr_debug("Transmit timeout at %ld, latency %ld\n", jiffies
,
590 jiffies
- dev_trans_start(dev
));
592 kni
->stats
.tx_errors
++;
593 netif_wake_queue(dev
);
600 kni_net_ioctl(struct net_device
*dev
, struct ifreq
*rq
, int cmd
)
602 pr_debug("kni_net_ioctl group:%d cmd:%d\n",
603 ((struct kni_dev
*)netdev_priv(dev
))->group_id
, cmd
);
609 kni_net_set_rx_mode(struct net_device
*dev
)
614 kni_net_change_mtu(struct net_device
*dev
, int new_mtu
)
617 struct rte_kni_request req
;
618 struct kni_dev
*kni
= netdev_priv(dev
);
620 pr_debug("kni_net_change_mtu new mtu %d to be set\n", new_mtu
);
622 memset(&req
, 0, sizeof(req
));
623 req
.req_id
= RTE_KNI_REQ_CHANGE_MTU
;
624 req
.new_mtu
= new_mtu
;
625 ret
= kni_net_process_request(kni
, &req
);
626 if (ret
== 0 && req
.result
== 0)
629 return (ret
== 0) ? req
.result
: ret
;
633 kni_net_set_promiscusity(struct net_device
*netdev
, int flags
)
635 struct rte_kni_request req
;
636 struct kni_dev
*kni
= netdev_priv(netdev
);
638 memset(&req
, 0, sizeof(req
));
639 req
.req_id
= RTE_KNI_REQ_CHANGE_PROMISC
;
641 if (netdev
->flags
& IFF_PROMISC
)
642 req
.promiscusity
= 1;
644 req
.promiscusity
= 0;
645 kni_net_process_request(kni
, &req
);
649 * Checks if the user space application provided the resp message
652 kni_net_poll_resp(struct kni_dev
*kni
)
654 if (kni_fifo_count(kni
->resp_q
))
655 wake_up_interruptible(&kni
->wq
);
659 * Return statistics to the caller
661 static struct net_device_stats
*
662 kni_net_stats(struct net_device
*dev
)
664 struct kni_dev
*kni
= netdev_priv(dev
);
670 * Fill the eth header
673 kni_net_header(struct sk_buff
*skb
, struct net_device
*dev
,
674 unsigned short type
, const void *daddr
,
675 const void *saddr
, uint32_t len
)
677 struct ethhdr
*eth
= (struct ethhdr
*) skb_push(skb
, ETH_HLEN
);
679 memcpy(eth
->h_source
, saddr
? saddr
: dev
->dev_addr
, dev
->addr_len
);
680 memcpy(eth
->h_dest
, daddr
? daddr
: dev
->dev_addr
, dev
->addr_len
);
681 eth
->h_proto
= htons(type
);
683 return dev
->hard_header_len
;
687 * Re-fill the eth header
689 #ifdef HAVE_REBUILD_HEADER
691 kni_net_rebuild_header(struct sk_buff
*skb
)
693 struct net_device
*dev
= skb
->dev
;
694 struct ethhdr
*eth
= (struct ethhdr
*) skb
->data
;
696 memcpy(eth
->h_source
, dev
->dev_addr
, dev
->addr_len
);
697 memcpy(eth
->h_dest
, dev
->dev_addr
, dev
->addr_len
);
704 * kni_net_set_mac - Change the Ethernet Address of the KNI NIC
705 * @netdev: network interface device structure
706 * @p: pointer to an address structure
708 * Returns 0 on success, negative on failure
711 kni_net_set_mac(struct net_device
*netdev
, void *p
)
714 struct rte_kni_request req
;
716 struct sockaddr
*addr
= p
;
718 memset(&req
, 0, sizeof(req
));
719 req
.req_id
= RTE_KNI_REQ_CHANGE_MAC_ADDR
;
721 if (!is_valid_ether_addr((unsigned char *)(addr
->sa_data
)))
722 return -EADDRNOTAVAIL
;
724 memcpy(req
.mac_addr
, addr
->sa_data
, netdev
->addr_len
);
725 memcpy(netdev
->dev_addr
, addr
->sa_data
, netdev
->addr_len
);
727 kni
= netdev_priv(netdev
);
728 ret
= kni_net_process_request(kni
, &req
);
730 return (ret
== 0 ? req
.result
: ret
);
733 #ifdef HAVE_CHANGE_CARRIER_CB
735 kni_net_change_carrier(struct net_device
*dev
, bool new_carrier
)
738 netif_carrier_on(dev
);
740 netif_carrier_off(dev
);
745 static const struct header_ops kni_net_header_ops
= {
746 .create
= kni_net_header
,
747 .parse
= eth_header_parse
,
748 #ifdef HAVE_REBUILD_HEADER
749 .rebuild
= kni_net_rebuild_header
,
751 .cache
= NULL
, /* disable caching */
754 static const struct net_device_ops kni_net_netdev_ops
= {
755 .ndo_open
= kni_net_open
,
756 .ndo_stop
= kni_net_release
,
757 .ndo_set_config
= kni_net_config
,
758 .ndo_change_rx_flags
= kni_net_set_promiscusity
,
759 .ndo_start_xmit
= kni_net_tx
,
760 .ndo_change_mtu
= kni_net_change_mtu
,
761 .ndo_do_ioctl
= kni_net_ioctl
,
762 .ndo_set_rx_mode
= kni_net_set_rx_mode
,
763 .ndo_get_stats
= kni_net_stats
,
764 .ndo_tx_timeout
= kni_net_tx_timeout
,
765 .ndo_set_mac_address
= kni_net_set_mac
,
766 #ifdef HAVE_CHANGE_CARRIER_CB
767 .ndo_change_carrier
= kni_net_change_carrier
,
772 kni_net_init(struct net_device
*dev
)
774 struct kni_dev
*kni
= netdev_priv(dev
);
776 init_waitqueue_head(&kni
->wq
);
777 mutex_init(&kni
->sync_lock
);
779 ether_setup(dev
); /* assign some of the fields */
780 dev
->netdev_ops
= &kni_net_netdev_ops
;
781 dev
->header_ops
= &kni_net_header_ops
;
782 dev
->watchdog_timeo
= WD_TIMEOUT
;
786 kni_net_config_lo_mode(char *lo_str
)
789 pr_debug("loopback disabled");
793 if (!strcmp(lo_str
, "lo_mode_none"))
794 pr_debug("loopback disabled");
795 else if (!strcmp(lo_str
, "lo_mode_fifo")) {
796 pr_debug("loopback mode=lo_mode_fifo enabled");
797 kni_net_rx_func
= kni_net_rx_lo_fifo
;
798 } else if (!strcmp(lo_str
, "lo_mode_fifo_skb")) {
799 pr_debug("loopback mode=lo_mode_fifo_skb enabled");
800 kni_net_rx_func
= kni_net_rx_lo_fifo_skb
;
802 pr_debug("Incognizant parameter, loopback disabled");