]> git.proxmox.com Git - ceph.git/blob - ceph/src/dpdk/examples/vhost_xen/vhost_monitor.c
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / dpdk / examples / vhost_xen / vhost_monitor.c
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include <stdlib.h>
35 #include <stdio.h>
36 #include <dirent.h>
37 #include <unistd.h>
38 #include <sys/eventfd.h>
39 #include <sys/ioctl.h>
40 #include <sys/mman.h>
41 #include <xen/xen-compat.h>
42 #if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200
43 #include <xs.h>
44 #else
45 #include <xenstore.h>
46 #endif
47 #include <linux/virtio_ring.h>
48 #include <linux/virtio_pci.h>
49 #include <linux/virtio_net.h>
50
51 #include <rte_ethdev.h>
52 #include <rte_log.h>
53 #include <rte_malloc.h>
54 #include <rte_string_fns.h>
55
56 #include "virtio-net.h"
57 #include "xen_vhost.h"
58
59 struct virtio_watch {
60 struct xs_handle *xs;
61 int watch_fd;
62 };
63
64
65 /* device ops to add/remove device to/from data core. */
66 static struct virtio_net_device_ops const *notify_ops;
67
68 /* root address of the linked list in the configuration core. */
69 static struct virtio_net_config_ll *ll_root = NULL;
70
71 /* root address of VM. */
72 static struct xen_guestlist guest_root;
73
74 static struct virtio_watch watch;
75
76 static void
77 vq_vring_init(struct vhost_virtqueue *vq, unsigned int num, uint8_t *p,
78 unsigned long align)
79 {
80 vq->size = num;
81 vq->desc = (struct vring_desc *) p;
82 vq->avail = (struct vring_avail *) (p +
83 num * sizeof(struct vring_desc));
84 vq->used = (void *)
85 RTE_ALIGN_CEIL( (uintptr_t)(&vq->avail->ring[num]), align);
86
87 }
88
89 static int
90 init_watch(void)
91 {
92 struct xs_handle *xs;
93 int ret;
94 int fd;
95
96 /* get a connection to the daemon */
97 xs = xs_daemon_open();
98 if (xs == NULL) {
99 RTE_LOG(ERR, XENHOST, "xs_daemon_open failed\n");
100 return -1;
101 }
102
103 ret = xs_watch(xs, "/local/domain", "mytoken");
104 if (ret == 0) {
105 RTE_LOG(ERR, XENHOST, "%s: xs_watch failed\n", __func__);
106 xs_daemon_close(xs);
107 return -1;
108 }
109
110 /* We are notified of read availability on the watch via the file descriptor. */
111 fd = xs_fileno(xs);
112 watch.xs = xs;
113 watch.watch_fd = fd;
114
115 TAILQ_INIT(&guest_root);
116 return 0;
117 }
118
119 static struct xen_guest *
120 get_xen_guest(int dom_id)
121 {
122 struct xen_guest *guest = NULL;
123
124 TAILQ_FOREACH(guest, &guest_root, next) {
125 if(guest->dom_id == dom_id)
126 return guest;
127 }
128
129 return NULL;
130 }
131
132
133 static struct xen_guest *
134 add_xen_guest(int32_t dom_id)
135 {
136 struct xen_guest *guest = NULL;
137
138 if ((guest = get_xen_guest(dom_id)) != NULL)
139 return guest;
140
141 guest = calloc(1, sizeof(struct xen_guest));
142 if (guest) {
143 RTE_LOG(ERR, XENHOST, " %s: return newly created guest with %d rings\n", __func__, guest->vring_num);
144 TAILQ_INSERT_TAIL(&guest_root, guest, next);
145 guest->dom_id = dom_id;
146 }
147
148 return guest;
149 }
150
151 static void
152 cleanup_device(struct virtio_net_config_ll *ll_dev)
153 {
154 if (ll_dev == NULL)
155 return;
156 if (ll_dev->dev.virtqueue_rx) {
157 rte_free(ll_dev->dev.virtqueue_rx);
158 ll_dev->dev.virtqueue_rx = NULL;
159 }
160 if (ll_dev->dev.virtqueue_tx) {
161 rte_free(ll_dev->dev.virtqueue_tx);
162 ll_dev->dev.virtqueue_tx = NULL;
163 }
164 free(ll_dev);
165 }
166
167 /*
168 * Add entry containing a device to the device configuration linked list.
169 */
170 static void
171 add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
172 {
173 struct virtio_net_config_ll *ll_dev = ll_root;
174
175 /* If ll_dev == NULL then this is the first device so go to else */
176 if (ll_dev) {
177 /* If the 1st device_id != 0 then we insert our device here. */
178 if (ll_dev->dev.device_fh != 0) {
179 new_ll_dev->dev.device_fh = 0;
180 new_ll_dev->next = ll_dev;
181 ll_root = new_ll_dev;
182 } else {
183 /* increment through the ll until we find un unused device_id,
184 * insert the device at that entry
185 */
186 while ((ll_dev->next != NULL) && (ll_dev->dev.device_fh == (ll_dev->next->dev.device_fh - 1)))
187 ll_dev = ll_dev->next;
188
189 new_ll_dev->dev.device_fh = ll_dev->dev.device_fh + 1;
190 new_ll_dev->next = ll_dev->next;
191 ll_dev->next = new_ll_dev;
192 }
193 } else {
194 ll_root = new_ll_dev;
195 ll_root->dev.device_fh = 0;
196 }
197 }
198
199
200 /*
201 * Remove an entry from the device configuration linked list.
202 */
203 static struct virtio_net_config_ll *
204 rm_config_ll_entry(struct virtio_net_config_ll *ll_dev, struct virtio_net_config_ll *ll_dev_last)
205 {
206 /* First remove the device and then clean it up. */
207 if (ll_dev == ll_root) {
208 ll_root = ll_dev->next;
209 cleanup_device(ll_dev);
210 return ll_root;
211 } else {
212 ll_dev_last->next = ll_dev->next;
213 cleanup_device(ll_dev);
214 return ll_dev_last->next;
215 }
216 }
217
218 /*
219 * Retrieves an entry from the devices configuration linked list.
220 */
221 static struct virtio_net_config_ll *
222 get_config_ll_entry(unsigned int virtio_idx, unsigned int dom_id)
223 {
224 struct virtio_net_config_ll *ll_dev = ll_root;
225
226 /* Loop through linked list until the dom_id is found. */
227 while (ll_dev != NULL) {
228 if (ll_dev->dev.dom_id == dom_id && ll_dev->dev.virtio_idx == virtio_idx)
229 return ll_dev;
230 ll_dev = ll_dev->next;
231 }
232
233 return NULL;
234 }
235
236 /*
237 * Initialise all variables in device structure.
238 */
239 static void
240 init_dev(struct virtio_net *dev)
241 {
242 RTE_SET_USED(dev);
243 }
244
245
246 static struct
247 virtio_net_config_ll *new_device(unsigned int virtio_idx, struct xen_guest *guest)
248 {
249 struct virtio_net_config_ll *new_ll_dev;
250 struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;
251 size_t size, vq_ring_size, vq_size = VQ_DESC_NUM;
252 void *vq_ring_virt_mem;
253 uint64_t gpa;
254 uint32_t i;
255
256 /* Setup device and virtqueues. */
257 new_ll_dev = calloc(1, sizeof(struct virtio_net_config_ll));
258 virtqueue_rx = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), RTE_CACHE_LINE_SIZE);
259 virtqueue_tx = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), RTE_CACHE_LINE_SIZE);
260 if (new_ll_dev == NULL || virtqueue_rx == NULL || virtqueue_tx == NULL)
261 goto err;
262
263 new_ll_dev->dev.virtqueue_rx = virtqueue_rx;
264 new_ll_dev->dev.virtqueue_tx = virtqueue_tx;
265 new_ll_dev->dev.dom_id = guest->dom_id;
266 new_ll_dev->dev.virtio_idx = virtio_idx;
267 /* Initialise device and virtqueues. */
268 init_dev(&new_ll_dev->dev);
269
270 size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN);
271 vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
272 (void)vq_ring_size;
273
274 vq_ring_virt_mem = guest->vring[virtio_idx].rxvring_addr;
275 vq_vring_init(virtqueue_rx, vq_size, vq_ring_virt_mem, VIRTIO_PCI_VRING_ALIGN);
276 virtqueue_rx->size = vq_size;
277 virtqueue_rx->vhost_hlen = sizeof(struct virtio_net_hdr);
278
279 vq_ring_virt_mem = guest->vring[virtio_idx].txvring_addr;
280 vq_vring_init(virtqueue_tx, vq_size, vq_ring_virt_mem, VIRTIO_PCI_VRING_ALIGN);
281 virtqueue_tx->size = vq_size;
282 memcpy(&new_ll_dev->dev.mac_address, &guest->vring[virtio_idx].addr, sizeof(struct ether_addr));
283
284 /* virtio_memory has to be one per domid */
285 new_ll_dev->dev.mem = malloc(sizeof(struct virtio_memory) + sizeof(struct virtio_memory_regions) * MAX_XENVIRT_MEMPOOL);
286 new_ll_dev->dev.mem->nregions = guest->pool_num;
287 for (i = 0; i < guest->pool_num; i++) {
288 gpa = new_ll_dev->dev.mem->regions[i].guest_phys_address =
289 (uint64_t)((uintptr_t)guest->mempool[i].gva);
290 new_ll_dev->dev.mem->regions[i].guest_phys_address_end =
291 gpa + guest->mempool[i].mempfn_num * getpagesize();
292 new_ll_dev->dev.mem->regions[i].address_offset =
293 (uint64_t)((uintptr_t)guest->mempool[i].hva -
294 (uintptr_t)gpa);
295 }
296
297 new_ll_dev->next = NULL;
298
299 /* Add entry to device configuration linked list. */
300 add_config_ll_entry(new_ll_dev);
301 return new_ll_dev;
302 err:
303 free(new_ll_dev);
304 rte_free(virtqueue_rx);
305 rte_free(virtqueue_tx);
306
307 return NULL;
308 }
309
310 static void
311 destroy_guest(struct xen_guest *guest)
312 {
313 uint32_t i;
314
315 for (i = 0; i < guest->vring_num; i++)
316 cleanup_vring(&guest->vring[i]);
317 /* clean mempool */
318 for (i = 0; i < guest->pool_num; i++)
319 cleanup_mempool(&guest->mempool[i]);
320 free(guest);
321
322 return;
323 }
324
325 /*
326 * This function will cleanup the device and remove it from device configuration linked list.
327 */
328 static void
329 destroy_device(unsigned int virtio_idx, unsigned int dom_id)
330 {
331 struct virtio_net_config_ll *ll_dev_cur_ctx, *ll_dev_last = NULL;
332 struct virtio_net_config_ll *ll_dev_cur = ll_root;
333
334 /* clean virtio device */
335 struct xen_guest *guest = NULL;
336 guest = get_xen_guest(dom_id);
337 if (guest == NULL)
338 return;
339
340 /* Find the linked list entry for the device to be removed. */
341 ll_dev_cur_ctx = get_config_ll_entry(virtio_idx, dom_id);
342 while (ll_dev_cur != NULL) {
343 /* If the device is found or a device that doesn't exist is found then it is removed. */
344 if (ll_dev_cur == ll_dev_cur_ctx) {
345 if ((ll_dev_cur->dev.flags & VIRTIO_DEV_RUNNING))
346 notify_ops->destroy_device(&(ll_dev_cur->dev));
347 ll_dev_cur = rm_config_ll_entry(ll_dev_cur, ll_dev_last);
348 } else {
349 ll_dev_last = ll_dev_cur;
350 ll_dev_cur = ll_dev_cur->next;
351 }
352 }
353 RTE_LOG(INFO, XENHOST, " %s guest:%p vring:%p rxvring:%p txvring:%p flag:%p\n",
354 __func__, guest, &guest->vring[virtio_idx], guest->vring[virtio_idx].rxvring_addr, guest->vring[virtio_idx].txvring_addr, guest->vring[virtio_idx].flag);
355 cleanup_vring(&guest->vring[virtio_idx]);
356 guest->vring[virtio_idx].removed = 1;
357 guest->vring_num -= 1;
358 }
359
360
361
362
363 static void
364 watch_unmap_event(void)
365 {
366 int i;
367 struct xen_guest *guest = NULL;
368 bool remove_request;
369
370 TAILQ_FOREACH(guest, &guest_root, next) {
371 for (i = 0; i < MAX_VIRTIO; i++) {
372 if (guest->vring[i].dom_id && guest->vring[i].removed == 0 && *guest->vring[i].flag == 0) {
373 RTE_LOG(INFO, XENHOST, "\n\n");
374 RTE_LOG(INFO, XENHOST, " #####%s: (%d, %d) to be removed\n",
375 __func__,
376 guest->vring[i].dom_id,
377 i);
378 destroy_device(i, guest->dom_id);
379 RTE_LOG(INFO, XENHOST, " %s: DOM %u, vring num: %d\n",
380 __func__,
381 guest->dom_id,
382 guest->vring_num);
383 }
384 }
385 }
386
387 _find_next_remove:
388 guest = NULL;
389 remove_request = false;
390 TAILQ_FOREACH(guest, &guest_root, next) {
391 if (guest->vring_num == 0) {
392 remove_request = true;
393 break;
394 }
395 }
396 if (remove_request == true) {
397 TAILQ_REMOVE(&guest_root, guest, next);
398 RTE_LOG(INFO, XENHOST, " #####%s: destroy guest (%d)\n", __func__, guest->dom_id);
399 destroy_guest(guest);
400 goto _find_next_remove;
401 }
402 return;
403 }
404
405 /*
406 * OK, if the guest starts first, it is ok.
407 * if host starts first, it is ok.
408 * if guest starts, and has run for sometime, and host stops and restarts,
409 * then last_used_idx 0? how to solve this. */
410
411 static void virtio_init(void)
412 {
413 uint32_t len, e_num;
414 uint32_t i,j;
415 char **dom;
416 char *status;
417 int dom_id;
418 char path[PATH_MAX];
419 char node[PATH_MAX];
420 xs_transaction_t th;
421 struct xen_guest *guest;
422 struct virtio_net_config_ll *net_config;
423 char *end;
424 int val;
425
426 /* init env for watch the node */
427 if (init_watch() < 0)
428 return;
429
430 dom = xs_directory(watch.xs, XBT_NULL, "/local/domain", &e_num);
431
432 for (i = 0; i < e_num; i++) {
433 errno = 0;
434 dom_id = strtol(dom[i], &end, 0);
435 if (errno != 0 || end == NULL || dom_id == 0)
436 continue;
437
438 for (j = 0; j < RTE_MAX_ETHPORTS; j++) {
439 snprintf(node, PATH_MAX, "%s%d", VIRTIO_START, j);
440 snprintf(path, PATH_MAX, XEN_VM_NODE_FMT,
441 dom_id, node);
442
443 th = xs_transaction_start(watch.xs);
444 status = xs_read(watch.xs, th, path, &len);
445 xs_transaction_end(watch.xs, th, false);
446
447 if (status == NULL)
448 break;
449
450 /* if there's any valid virtio device */
451 errno = 0;
452 val = strtol(status, &end, 0);
453 if (errno != 0 || end == NULL || dom_id == 0)
454 val = 0;
455 if (val == 1) {
456 guest = add_xen_guest(dom_id);
457 if (guest == NULL)
458 continue;
459 RTE_LOG(INFO, XENHOST, " there's a new virtio existed, new a virtio device\n\n");
460
461 RTE_LOG(INFO, XENHOST, " parse_vringnode dom_id %d virtioidx %d\n",dom_id,j);
462 if (parse_vringnode(guest, j)) {
463 RTE_LOG(ERR, XENHOST, " there is invalid information in xenstore\n");
464 TAILQ_REMOVE(&guest_root, guest, next);
465 destroy_guest(guest);
466
467 continue;
468 }
469
470 /*if pool_num > 0, then mempool has already been parsed*/
471 if (guest->pool_num == 0 && parse_mempoolnode(guest)) {
472 RTE_LOG(ERR, XENHOST, " there is error information in xenstore\n");
473 TAILQ_REMOVE(&guest_root, guest, next);
474 destroy_guest(guest);
475 continue;
476 }
477
478 net_config = new_device(j, guest);
479 /* every thing is ready now, added into data core */
480 notify_ops->new_device(&net_config->dev);
481 }
482 }
483 }
484
485 free(dom);
486 return;
487 }
488
489 void
490 virtio_monitor_loop(void)
491 {
492 char **vec;
493 xs_transaction_t th;
494 char *buf;
495 unsigned int len;
496 unsigned int dom_id;
497 uint32_t virtio_idx;
498 struct xen_guest *guest;
499 struct virtio_net_config_ll *net_config;
500 enum fieldnames {
501 FLD_NULL = 0,
502 FLD_LOCAL,
503 FLD_DOMAIN,
504 FLD_ID,
505 FLD_CONTROL,
506 FLD_DPDK,
507 FLD_NODE,
508 _NUM_FLD
509 };
510 char *str_fld[_NUM_FLD];
511 char *str;
512 char *end;
513
514 virtio_init();
515 while (1) {
516 watch_unmap_event();
517
518 usleep(50);
519 vec = xs_check_watch(watch.xs);
520
521 if (vec == NULL)
522 continue;
523
524 th = xs_transaction_start(watch.xs);
525
526 buf = xs_read(watch.xs, th, vec[XS_WATCH_PATH],&len);
527 xs_transaction_end(watch.xs, th, false);
528
529 if (buf) {
530 /* theres' some node for vhost existed */
531 if (rte_strsplit(vec[XS_WATCH_PATH], strnlen(vec[XS_WATCH_PATH], PATH_MAX),
532 str_fld, _NUM_FLD, '/') == _NUM_FLD) {
533 if (strstr(str_fld[FLD_NODE], VIRTIO_START)) {
534 errno = 0;
535 str = str_fld[FLD_ID];
536 dom_id = strtoul(str, &end, 0);
537 if (errno != 0 || end == NULL || end == str ) {
538 RTE_LOG(INFO, XENHOST, "invalid domain id\n");
539 continue;
540 }
541
542 errno = 0;
543 str = str_fld[FLD_NODE] + sizeof(VIRTIO_START) - 1;
544 virtio_idx = strtoul(str, &end, 0);
545 if (errno != 0 || end == NULL || end == str
546 || virtio_idx > MAX_VIRTIO) {
547 RTE_LOG(INFO, XENHOST, "invalid virtio idx\n");
548 continue;
549 }
550 RTE_LOG(INFO, XENHOST, " #####virtio dev (%d, %d) is started\n", dom_id, virtio_idx);
551
552 guest = add_xen_guest(dom_id);
553 if (guest == NULL)
554 continue;
555 guest->dom_id = dom_id;
556 if (parse_vringnode(guest, virtio_idx)) {
557 RTE_LOG(ERR, XENHOST, " there is invalid information in xenstore\n");
558 /*guest newly created? guest existed ?*/
559 TAILQ_REMOVE(&guest_root, guest, next);
560 destroy_guest(guest);
561 continue;
562 }
563 /*if pool_num > 0, then mempool has already been parsed*/
564 if (guest->pool_num == 0 && parse_mempoolnode(guest)) {
565 RTE_LOG(ERR, XENHOST, " there is error information in xenstore\n");
566 TAILQ_REMOVE(&guest_root, guest, next);
567 destroy_guest(guest);
568 continue;
569 }
570
571
572 net_config = new_device(virtio_idx, guest);
573 RTE_LOG(INFO, XENHOST, " Add to dataplane core\n");
574 notify_ops->new_device(&net_config->dev);
575
576 }
577 }
578 }
579
580 free(vec);
581 }
582 return;
583 }
584
585 /*
586 * Register ops so that we can add/remove device to data core.
587 */
588 int
589 init_virtio_xen(struct virtio_net_device_ops const *const ops)
590 {
591 notify_ops = ops;
592 if (xenhost_init())
593 return -1;
594 return 0;
595 }