]> git.proxmox.com Git - ceph.git/blob - ceph/src/seastar/dpdk/kernel/linux/kni/kni_misc.c
import 15.2.0 Octopus source
[ceph.git] / ceph / src / seastar / dpdk / kernel / linux / kni / kni_misc.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright(c) 2010-2014 Intel Corporation.
4 */
5
6 #include <linux/version.h>
7 #include <linux/module.h>
8 #include <linux/miscdevice.h>
9 #include <linux/netdevice.h>
10 #include <linux/etherdevice.h>
11 #include <linux/pci.h>
12 #include <linux/kthread.h>
13 #include <linux/rwsem.h>
14 #include <linux/mutex.h>
15 #include <linux/nsproxy.h>
16 #include <net/net_namespace.h>
17 #include <net/netns/generic.h>
18
19 #include <rte_kni_common.h>
20
21 #include "compat.h"
22 #include "kni_dev.h"
23
24 MODULE_LICENSE("Dual BSD/GPL");
25 MODULE_AUTHOR("Intel Corporation");
26 MODULE_DESCRIPTION("Kernel Module for managing kni devices");
27
28 #define KNI_RX_LOOP_NUM 1000
29
30 #define KNI_MAX_DEVICES 32
31
32 extern const struct pci_device_id ixgbe_pci_tbl[];
33 extern const struct pci_device_id igb_pci_tbl[];
34
35 /* loopback mode */
36 static char *lo_mode;
37
38 /* Kernel thread mode */
39 static char *kthread_mode;
40 static uint32_t multiple_kthread_on;
41
42 /* Default carrier state for created KNI network interfaces */
43 static char *carrier;
44 uint32_t dflt_carrier;
45
46 #define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */
47
48 static int kni_net_id;
49
50 struct kni_net {
51 unsigned long device_in_use; /* device in use flag */
52 struct mutex kni_kthread_lock;
53 struct task_struct *kni_kthread;
54 struct rw_semaphore kni_list_lock;
55 struct list_head kni_list_head;
56 };
57
58 static int __net_init
59 kni_init_net(struct net *net)
60 {
61 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
62 struct kni_net *knet = net_generic(net, kni_net_id);
63
64 memset(knet, 0, sizeof(*knet));
65 #else
66 struct kni_net *knet;
67 int ret;
68
69 knet = kzalloc(sizeof(struct kni_net), GFP_KERNEL);
70 if (!knet) {
71 ret = -ENOMEM;
72 return ret;
73 }
74 #endif
75
76 /* Clear the bit of device in use */
77 clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
78
79 mutex_init(&knet->kni_kthread_lock);
80
81 init_rwsem(&knet->kni_list_lock);
82 INIT_LIST_HEAD(&knet->kni_list_head);
83
84 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
85 return 0;
86 #else
87 ret = net_assign_generic(net, kni_net_id, knet);
88 if (ret < 0)
89 kfree(knet);
90
91 return ret;
92 #endif
93 }
94
95 static void __net_exit
96 kni_exit_net(struct net *net)
97 {
98 struct kni_net *knet __maybe_unused;
99
100 knet = net_generic(net, kni_net_id);
101 mutex_destroy(&knet->kni_kthread_lock);
102
103 #ifndef HAVE_SIMPLIFIED_PERNET_OPERATIONS
104 kfree(knet);
105 #endif
106 }
107
108 static struct pernet_operations kni_net_ops = {
109 .init = kni_init_net,
110 .exit = kni_exit_net,
111 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
112 .id = &kni_net_id,
113 .size = sizeof(struct kni_net),
114 #endif
115 };
116
117 static int
118 kni_thread_single(void *data)
119 {
120 struct kni_net *knet = data;
121 int j;
122 struct kni_dev *dev;
123
124 while (!kthread_should_stop()) {
125 down_read(&knet->kni_list_lock);
126 for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
127 list_for_each_entry(dev, &knet->kni_list_head, list) {
128 kni_net_rx(dev);
129 kni_net_poll_resp(dev);
130 }
131 }
132 up_read(&knet->kni_list_lock);
133 #ifdef RTE_KNI_PREEMPT_DEFAULT
134 /* reschedule out for a while */
135 schedule_timeout_interruptible(
136 usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
137 #endif
138 }
139
140 return 0;
141 }
142
143 static int
144 kni_thread_multiple(void *param)
145 {
146 int j;
147 struct kni_dev *dev = param;
148
149 while (!kthread_should_stop()) {
150 for (j = 0; j < KNI_RX_LOOP_NUM; j++) {
151 kni_net_rx(dev);
152 kni_net_poll_resp(dev);
153 }
154 #ifdef RTE_KNI_PREEMPT_DEFAULT
155 schedule_timeout_interruptible(
156 usecs_to_jiffies(KNI_KTHREAD_RESCHEDULE_INTERVAL));
157 #endif
158 }
159
160 return 0;
161 }
162
163 static int
164 kni_open(struct inode *inode, struct file *file)
165 {
166 struct net *net = current->nsproxy->net_ns;
167 struct kni_net *knet = net_generic(net, kni_net_id);
168
169 /* kni device can be opened by one user only per netns */
170 if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use))
171 return -EBUSY;
172
173 file->private_data = get_net(net);
174 pr_debug("/dev/kni opened\n");
175
176 return 0;
177 }
178
179 static int
180 kni_dev_remove(struct kni_dev *dev)
181 {
182 if (!dev)
183 return -ENODEV;
184
185 #ifdef RTE_KNI_KMOD_ETHTOOL
186 if (dev->pci_dev) {
187 if (pci_match_id(ixgbe_pci_tbl, dev->pci_dev))
188 ixgbe_kni_remove(dev->pci_dev);
189 else if (pci_match_id(igb_pci_tbl, dev->pci_dev))
190 igb_kni_remove(dev->pci_dev);
191 }
192 #endif
193
194 if (dev->net_dev) {
195 unregister_netdev(dev->net_dev);
196 free_netdev(dev->net_dev);
197 }
198
199 kni_net_release_fifo_phy(dev);
200
201 return 0;
202 }
203
204 static int
205 kni_release(struct inode *inode, struct file *file)
206 {
207 struct net *net = file->private_data;
208 struct kni_net *knet = net_generic(net, kni_net_id);
209 struct kni_dev *dev, *n;
210
211 /* Stop kernel thread for single mode */
212 if (multiple_kthread_on == 0) {
213 mutex_lock(&knet->kni_kthread_lock);
214 /* Stop kernel thread */
215 if (knet->kni_kthread != NULL) {
216 kthread_stop(knet->kni_kthread);
217 knet->kni_kthread = NULL;
218 }
219 mutex_unlock(&knet->kni_kthread_lock);
220 }
221
222 down_write(&knet->kni_list_lock);
223 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
224 /* Stop kernel thread for multiple mode */
225 if (multiple_kthread_on && dev->pthread != NULL) {
226 kthread_stop(dev->pthread);
227 dev->pthread = NULL;
228 }
229
230 kni_dev_remove(dev);
231 list_del(&dev->list);
232 }
233 up_write(&knet->kni_list_lock);
234
235 /* Clear the bit of device in use */
236 clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use);
237
238 put_net(net);
239 pr_debug("/dev/kni closed\n");
240
241 return 0;
242 }
243
244 static int
245 kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev)
246 {
247 if (!kni || !dev)
248 return -1;
249
250 /* Check if network name has been used */
251 if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) {
252 pr_err("KNI name %s duplicated\n", dev->name);
253 return -1;
254 }
255
256 return 0;
257 }
258
259 static int
260 kni_run_thread(struct kni_net *knet, struct kni_dev *kni, uint8_t force_bind)
261 {
262 /**
263 * Create a new kernel thread for multiple mode, set its core affinity,
264 * and finally wake it up.
265 */
266 if (multiple_kthread_on) {
267 kni->pthread = kthread_create(kni_thread_multiple,
268 (void *)kni, "kni_%s", kni->name);
269 if (IS_ERR(kni->pthread)) {
270 kni_dev_remove(kni);
271 return -ECANCELED;
272 }
273
274 if (force_bind)
275 kthread_bind(kni->pthread, kni->core_id);
276 wake_up_process(kni->pthread);
277 } else {
278 mutex_lock(&knet->kni_kthread_lock);
279
280 if (knet->kni_kthread == NULL) {
281 knet->kni_kthread = kthread_create(kni_thread_single,
282 (void *)knet, "kni_single");
283 if (IS_ERR(knet->kni_kthread)) {
284 mutex_unlock(&knet->kni_kthread_lock);
285 kni_dev_remove(kni);
286 return -ECANCELED;
287 }
288
289 if (force_bind)
290 kthread_bind(knet->kni_kthread, kni->core_id);
291 wake_up_process(knet->kni_kthread);
292 }
293
294 mutex_unlock(&knet->kni_kthread_lock);
295 }
296
297 return 0;
298 }
299
300 static int
301 kni_ioctl_create(struct net *net, uint32_t ioctl_num,
302 unsigned long ioctl_param)
303 {
304 struct kni_net *knet = net_generic(net, kni_net_id);
305 int ret;
306 struct rte_kni_device_info dev_info;
307 struct net_device *net_dev = NULL;
308 struct kni_dev *kni, *dev, *n;
309 #ifdef RTE_KNI_KMOD_ETHTOOL
310 struct pci_dev *found_pci = NULL;
311 struct net_device *lad_dev = NULL;
312 struct pci_dev *pci = NULL;
313 #endif
314
315 pr_info("Creating kni...\n");
316 /* Check the buffer size, to avoid warning */
317 if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
318 return -EINVAL;
319
320 /* Copy kni info from user space */
321 ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
322 if (ret) {
323 pr_err("copy_from_user in kni_ioctl_create");
324 return -EIO;
325 }
326
327 /* Check if name is zero-ended */
328 if (strnlen(dev_info.name, sizeof(dev_info.name)) == sizeof(dev_info.name)) {
329 pr_err("kni.name not zero-terminated");
330 return -EINVAL;
331 }
332
333 /**
334 * Check if the cpu core id is valid for binding.
335 */
336 if (dev_info.force_bind && !cpu_online(dev_info.core_id)) {
337 pr_err("cpu %u is not online\n", dev_info.core_id);
338 return -EINVAL;
339 }
340
341 /* Check if it has been created */
342 down_read(&knet->kni_list_lock);
343 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
344 if (kni_check_param(dev, &dev_info) < 0) {
345 up_read(&knet->kni_list_lock);
346 return -EINVAL;
347 }
348 }
349 up_read(&knet->kni_list_lock);
350
351 net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name,
352 #ifdef NET_NAME_USER
353 NET_NAME_USER,
354 #endif
355 kni_net_init);
356 if (net_dev == NULL) {
357 pr_err("error allocating device \"%s\"\n", dev_info.name);
358 return -EBUSY;
359 }
360
361 dev_net_set(net_dev, net);
362
363 kni = netdev_priv(net_dev);
364
365 kni->net_dev = net_dev;
366 kni->group_id = dev_info.group_id;
367 kni->core_id = dev_info.core_id;
368 strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
369
370 /* Translate user space info into kernel space info */
371 kni->tx_q = phys_to_virt(dev_info.tx_phys);
372 kni->rx_q = phys_to_virt(dev_info.rx_phys);
373 kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
374 kni->free_q = phys_to_virt(dev_info.free_phys);
375
376 kni->req_q = phys_to_virt(dev_info.req_phys);
377 kni->resp_q = phys_to_virt(dev_info.resp_phys);
378 kni->sync_va = dev_info.sync_va;
379 kni->sync_kva = phys_to_virt(dev_info.sync_phys);
380
381 kni->mbuf_size = dev_info.mbuf_size;
382
383 pr_debug("tx_phys: 0x%016llx, tx_q addr: 0x%p\n",
384 (unsigned long long) dev_info.tx_phys, kni->tx_q);
385 pr_debug("rx_phys: 0x%016llx, rx_q addr: 0x%p\n",
386 (unsigned long long) dev_info.rx_phys, kni->rx_q);
387 pr_debug("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n",
388 (unsigned long long) dev_info.alloc_phys, kni->alloc_q);
389 pr_debug("free_phys: 0x%016llx, free_q addr: 0x%p\n",
390 (unsigned long long) dev_info.free_phys, kni->free_q);
391 pr_debug("req_phys: 0x%016llx, req_q addr: 0x%p\n",
392 (unsigned long long) dev_info.req_phys, kni->req_q);
393 pr_debug("resp_phys: 0x%016llx, resp_q addr: 0x%p\n",
394 (unsigned long long) dev_info.resp_phys, kni->resp_q);
395 pr_debug("mbuf_size: %u\n", kni->mbuf_size);
396
397 pr_debug("PCI: %02x:%02x.%02x %04x:%04x\n",
398 dev_info.bus,
399 dev_info.devid,
400 dev_info.function,
401 dev_info.vendor_id,
402 dev_info.device_id);
403 #ifdef RTE_KNI_KMOD_ETHTOOL
404 pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL);
405
406 /* Support Ethtool */
407 while (pci) {
408 pr_debug("pci_bus: %02x:%02x:%02x\n",
409 pci->bus->number,
410 PCI_SLOT(pci->devfn),
411 PCI_FUNC(pci->devfn));
412
413 if ((pci->bus->number == dev_info.bus) &&
414 (PCI_SLOT(pci->devfn) == dev_info.devid) &&
415 (PCI_FUNC(pci->devfn) == dev_info.function)) {
416 found_pci = pci;
417
418 if (pci_match_id(ixgbe_pci_tbl, found_pci))
419 ret = ixgbe_kni_probe(found_pci, &lad_dev);
420 else if (pci_match_id(igb_pci_tbl, found_pci))
421 ret = igb_kni_probe(found_pci, &lad_dev);
422 else
423 ret = -1;
424
425 pr_debug("PCI found: pci=0x%p, lad_dev=0x%p\n",
426 pci, lad_dev);
427 if (ret == 0) {
428 kni->lad_dev = lad_dev;
429 kni_set_ethtool_ops(kni->net_dev);
430 } else {
431 pr_err("Device not supported by ethtool");
432 kni->lad_dev = NULL;
433 }
434
435 kni->pci_dev = found_pci;
436 kni->device_id = dev_info.device_id;
437 break;
438 }
439 pci = pci_get_device(dev_info.vendor_id,
440 dev_info.device_id, pci);
441 }
442 if (pci)
443 pci_dev_put(pci);
444 #endif
445
446 if (kni->lad_dev)
447 ether_addr_copy(net_dev->dev_addr, kni->lad_dev->dev_addr);
448 else {
449 /* if user has provided a valid mac address */
450 if (is_valid_ether_addr(dev_info.mac_addr))
451 memcpy(net_dev->dev_addr, dev_info.mac_addr, ETH_ALEN);
452 else
453 /*
454 * Generate random mac address. eth_random_addr() is the
455 * newer version of generating mac address in kernel.
456 */
457 random_ether_addr(net_dev->dev_addr);
458 }
459
460 if (dev_info.mtu)
461 net_dev->mtu = dev_info.mtu;
462 #ifdef HAVE_MAX_MTU_PARAM
463 net_dev->max_mtu = net_dev->mtu;
464 #endif
465
466 ret = register_netdev(net_dev);
467 if (ret) {
468 pr_err("error %i registering device \"%s\"\n",
469 ret, dev_info.name);
470 kni->net_dev = NULL;
471 kni_dev_remove(kni);
472 free_netdev(net_dev);
473 return -ENODEV;
474 }
475
476 netif_carrier_off(net_dev);
477
478 ret = kni_run_thread(knet, kni, dev_info.force_bind);
479 if (ret != 0)
480 return ret;
481
482 down_write(&knet->kni_list_lock);
483 list_add(&kni->list, &knet->kni_list_head);
484 up_write(&knet->kni_list_lock);
485
486 return 0;
487 }
488
489 static int
490 kni_ioctl_release(struct net *net, uint32_t ioctl_num,
491 unsigned long ioctl_param)
492 {
493 struct kni_net *knet = net_generic(net, kni_net_id);
494 int ret = -EINVAL;
495 struct kni_dev *dev, *n;
496 struct rte_kni_device_info dev_info;
497
498 if (_IOC_SIZE(ioctl_num) > sizeof(dev_info))
499 return -EINVAL;
500
501 ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info));
502 if (ret) {
503 pr_err("copy_from_user in kni_ioctl_release");
504 return -EIO;
505 }
506
507 /* Release the network device according to its name */
508 if (strlen(dev_info.name) == 0)
509 return ret;
510
511 down_write(&knet->kni_list_lock);
512 list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) {
513 if (strncmp(dev->name, dev_info.name, RTE_KNI_NAMESIZE) != 0)
514 continue;
515
516 if (multiple_kthread_on && dev->pthread != NULL) {
517 kthread_stop(dev->pthread);
518 dev->pthread = NULL;
519 }
520
521 kni_dev_remove(dev);
522 list_del(&dev->list);
523 ret = 0;
524 break;
525 }
526 up_write(&knet->kni_list_lock);
527 pr_info("%s release kni named %s\n",
528 (ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name);
529
530 return ret;
531 }
532
533 static int
534 kni_ioctl(struct inode *inode, uint32_t ioctl_num, unsigned long ioctl_param)
535 {
536 int ret = -EINVAL;
537 struct net *net = current->nsproxy->net_ns;
538
539 pr_debug("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param);
540
541 /*
542 * Switch according to the ioctl called
543 */
544 switch (_IOC_NR(ioctl_num)) {
545 case _IOC_NR(RTE_KNI_IOCTL_TEST):
546 /* For test only, not used */
547 break;
548 case _IOC_NR(RTE_KNI_IOCTL_CREATE):
549 ret = kni_ioctl_create(net, ioctl_num, ioctl_param);
550 break;
551 case _IOC_NR(RTE_KNI_IOCTL_RELEASE):
552 ret = kni_ioctl_release(net, ioctl_num, ioctl_param);
553 break;
554 default:
555 pr_debug("IOCTL default\n");
556 break;
557 }
558
559 return ret;
560 }
561
562 static int
563 kni_compat_ioctl(struct inode *inode, uint32_t ioctl_num,
564 unsigned long ioctl_param)
565 {
566 /* 32 bits app on 64 bits OS to be supported later */
567 pr_debug("Not implemented.\n");
568
569 return -EINVAL;
570 }
571
572 static const struct file_operations kni_fops = {
573 .owner = THIS_MODULE,
574 .open = kni_open,
575 .release = kni_release,
576 .unlocked_ioctl = (void *)kni_ioctl,
577 .compat_ioctl = (void *)kni_compat_ioctl,
578 };
579
580 static struct miscdevice kni_misc = {
581 .minor = MISC_DYNAMIC_MINOR,
582 .name = KNI_DEVICE,
583 .fops = &kni_fops,
584 };
585
586 static int __init
587 kni_parse_kthread_mode(void)
588 {
589 if (!kthread_mode)
590 return 0;
591
592 if (strcmp(kthread_mode, "single") == 0)
593 return 0;
594 else if (strcmp(kthread_mode, "multiple") == 0)
595 multiple_kthread_on = 1;
596 else
597 return -1;
598
599 return 0;
600 }
601
602 static int __init
603 kni_parse_carrier_state(void)
604 {
605 if (!carrier) {
606 dflt_carrier = 0;
607 return 0;
608 }
609
610 if (strcmp(carrier, "off") == 0)
611 dflt_carrier = 0;
612 else if (strcmp(carrier, "on") == 0)
613 dflt_carrier = 1;
614 else
615 return -1;
616
617 return 0;
618 }
619
620 static int __init
621 kni_init(void)
622 {
623 int rc;
624
625 if (kni_parse_kthread_mode() < 0) {
626 pr_err("Invalid parameter for kthread_mode\n");
627 return -EINVAL;
628 }
629
630 if (multiple_kthread_on == 0)
631 pr_debug("Single kernel thread for all KNI devices\n");
632 else
633 pr_debug("Multiple kernel thread mode enabled\n");
634
635 if (kni_parse_carrier_state() < 0) {
636 pr_err("Invalid parameter for carrier\n");
637 return -EINVAL;
638 }
639
640 if (dflt_carrier == 0)
641 pr_debug("Default carrier state set to off.\n");
642 else
643 pr_debug("Default carrier state set to on.\n");
644
645 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
646 rc = register_pernet_subsys(&kni_net_ops);
647 #else
648 rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops);
649 #endif
650 if (rc)
651 return -EPERM;
652
653 rc = misc_register(&kni_misc);
654 if (rc != 0) {
655 pr_err("Misc registration failed\n");
656 goto out;
657 }
658
659 /* Configure the lo mode according to the input parameter */
660 kni_net_config_lo_mode(lo_mode);
661
662 return 0;
663
664 out:
665 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
666 unregister_pernet_subsys(&kni_net_ops);
667 #else
668 unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
669 #endif
670 return rc;
671 }
672
673 static void __exit
674 kni_exit(void)
675 {
676 misc_deregister(&kni_misc);
677 #ifdef HAVE_SIMPLIFIED_PERNET_OPERATIONS
678 unregister_pernet_subsys(&kni_net_ops);
679 #else
680 unregister_pernet_gen_subsys(kni_net_id, &kni_net_ops);
681 #endif
682 }
683
684 module_init(kni_init);
685 module_exit(kni_exit);
686
687 module_param(lo_mode, charp, 0644);
688 MODULE_PARM_DESC(lo_mode,
689 "KNI loopback mode (default=lo_mode_none):\n"
690 "\t\tlo_mode_none Kernel loopback disabled\n"
691 "\t\tlo_mode_fifo Enable kernel loopback with fifo\n"
692 "\t\tlo_mode_fifo_skb Enable kernel loopback with fifo and skb buffer\n"
693 "\t\t"
694 );
695
696 module_param(kthread_mode, charp, 0644);
697 MODULE_PARM_DESC(kthread_mode,
698 "Kernel thread mode (default=single):\n"
699 "\t\tsingle Single kernel thread mode enabled.\n"
700 "\t\tmultiple Multiple kernel thread mode enabled.\n"
701 "\t\t"
702 );
703
704 module_param(carrier, charp, 0644);
705 MODULE_PARM_DESC(carrier,
706 "Default carrier state for KNI interface (default=off):\n"
707 "\t\toff Interfaces will be created with carrier state set to off.\n"
708 "\t\ton Interfaces will be created with carrier state set to on.\n"
709 "\t\t"
710 );