]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - drivers/misc/vmw_vmci/vmci_guest.c
36eade15ba87ff9ea2ad1323d72469e9c03fff34
[mirror_ubuntu-jammy-kernel.git] / drivers / misc / vmw_vmci / vmci_guest.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * VMware VMCI Driver
4 *
5 * Copyright (C) 2012 VMware, Inc. All rights reserved.
6 */
7
8 #include <linux/vmw_vmci_defs.h>
9 #include <linux/vmw_vmci_api.h>
10 #include <linux/moduleparam.h>
11 #include <linux/interrupt.h>
12 #include <linux/highmem.h>
13 #include <linux/kernel.h>
14 #include <linux/mm.h>
15 #include <linux/module.h>
16 #include <linux/sched.h>
17 #include <linux/slab.h>
18 #include <linux/init.h>
19 #include <linux/pci.h>
20 #include <linux/smp.h>
21 #include <linux/io.h>
22 #include <linux/vmalloc.h>
23
24 #include "vmci_datagram.h"
25 #include "vmci_doorbell.h"
26 #include "vmci_context.h"
27 #include "vmci_driver.h"
28 #include "vmci_event.h"
29
30 #define PCI_DEVICE_ID_VMWARE_VMCI 0x0740
31
32 #define VMCI_UTIL_NUM_RESOURCES 1
33
34 /*
35 * Datagram buffers for DMA send/receive must accommodate at least
36 * a maximum sized datagram and the header.
37 */
38 #define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE)
39
40 static bool vmci_disable_msi;
41 module_param_named(disable_msi, vmci_disable_msi, bool, 0);
42 MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
43
44 static bool vmci_disable_msix;
45 module_param_named(disable_msix, vmci_disable_msix, bool, 0);
46 MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");
47
48 static u32 ctx_update_sub_id = VMCI_INVALID_ID;
49 static u32 vm_context_id = VMCI_INVALID_ID;
50
51 struct vmci_guest_device {
52 struct device *dev; /* PCI device we are attached to */
53 void __iomem *iobase;
54 void __iomem *mmio_base;
55
56 bool exclusive_vectors;
57
58 struct tasklet_struct datagram_tasklet;
59 struct tasklet_struct bm_tasklet;
60
61 void *data_buffer;
62 dma_addr_t data_buffer_base;
63 void *tx_buffer;
64 dma_addr_t tx_buffer_base;
65 void *notification_bitmap;
66 dma_addr_t notification_base;
67 };
68
69 static bool use_ppn64;
70
71 bool vmci_use_ppn64(void)
72 {
73 return use_ppn64;
74 }
75
76 /* vmci_dev singleton device and supporting data*/
77 struct pci_dev *vmci_pdev;
78 static struct vmci_guest_device *vmci_dev_g;
79 static DEFINE_SPINLOCK(vmci_dev_spinlock);
80
81 static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0);
82
83 bool vmci_guest_code_active(void)
84 {
85 return atomic_read(&vmci_num_guest_devices) != 0;
86 }
87
88 u32 vmci_get_vm_context_id(void)
89 {
90 if (vm_context_id == VMCI_INVALID_ID) {
91 struct vmci_datagram get_cid_msg;
92 get_cid_msg.dst =
93 vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
94 VMCI_GET_CONTEXT_ID);
95 get_cid_msg.src = VMCI_ANON_SRC_HANDLE;
96 get_cid_msg.payload_size = 0;
97 vm_context_id = vmci_send_datagram(&get_cid_msg);
98 }
99 return vm_context_id;
100 }
101
102 static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg)
103 {
104 if (dev->mmio_base != NULL)
105 return readl(dev->mmio_base + reg);
106 return ioread32(dev->iobase + reg);
107 }
108
109 static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg)
110 {
111 if (dev->mmio_base != NULL)
112 writel(val, dev->mmio_base + reg);
113 else
114 iowrite32(val, dev->iobase + reg);
115 }
116
117 /*
118 * VM to hypervisor call mechanism. We use the standard VMware naming
119 * convention since shared code is calling this function as well.
120 */
121 int vmci_send_datagram(struct vmci_datagram *dg)
122 {
123 unsigned long flags;
124 int result;
125
126 /* Check args. */
127 if (dg == NULL)
128 return VMCI_ERROR_INVALID_ARGS;
129
130 /*
131 * Need to acquire spinlock on the device because the datagram
132 * data may be spread over multiple pages and the monitor may
133 * interleave device user rpc calls from multiple
134 * VCPUs. Acquiring the spinlock precludes that
135 * possibility. Disabling interrupts to avoid incoming
136 * datagrams during a "rep out" and possibly landing up in
137 * this function.
138 */
139 spin_lock_irqsave(&vmci_dev_spinlock, flags);
140
141 if (vmci_dev_g) {
142 iowrite8_rep(vmci_dev_g->iobase + VMCI_DATA_OUT_ADDR,
143 dg, VMCI_DG_SIZE(dg));
144 result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
145 } else {
146 result = VMCI_ERROR_UNAVAILABLE;
147 }
148
149 spin_unlock_irqrestore(&vmci_dev_spinlock, flags);
150
151 return result;
152 }
153 EXPORT_SYMBOL_GPL(vmci_send_datagram);
154
155 /*
156 * Gets called with the new context id if updated or resumed.
157 * Context id.
158 */
159 static void vmci_guest_cid_update(u32 sub_id,
160 const struct vmci_event_data *event_data,
161 void *client_data)
162 {
163 const struct vmci_event_payld_ctx *ev_payload =
164 vmci_event_data_const_payload(event_data);
165
166 if (sub_id != ctx_update_sub_id) {
167 pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id);
168 return;
169 }
170
171 if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) {
172 pr_devel("Invalid event data\n");
173 return;
174 }
175
176 pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n",
177 vm_context_id, ev_payload->context_id, event_data->event);
178
179 vm_context_id = ev_payload->context_id;
180 }
181
182 /*
183 * Verify that the host supports the hypercalls we need. If it does not,
184 * try to find fallback hypercalls and use those instead. Returns
185 * true if required hypercalls (or fallback hypercalls) are
186 * supported by the host, false otherwise.
187 */
188 static int vmci_check_host_caps(struct pci_dev *pdev)
189 {
190 bool result;
191 struct vmci_resource_query_msg *msg;
192 u32 msg_size = sizeof(struct vmci_resource_query_hdr) +
193 VMCI_UTIL_NUM_RESOURCES * sizeof(u32);
194 struct vmci_datagram *check_msg;
195
196 check_msg = kzalloc(msg_size, GFP_KERNEL);
197 if (!check_msg) {
198 dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__);
199 return -ENOMEM;
200 }
201
202 check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
203 VMCI_RESOURCES_QUERY);
204 check_msg->src = VMCI_ANON_SRC_HANDLE;
205 check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE;
206 msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg);
207
208 msg->num_resources = VMCI_UTIL_NUM_RESOURCES;
209 msg->resources[0] = VMCI_GET_CONTEXT_ID;
210
211 /* Checks that hyper calls are supported */
212 result = vmci_send_datagram(check_msg) == 0x01;
213 kfree(check_msg);
214
215 dev_dbg(&pdev->dev, "%s: Host capability check: %s\n",
216 __func__, result ? "PASSED" : "FAILED");
217
218 /* We need the vector. There are no fallbacks. */
219 return result ? 0 : -ENXIO;
220 }
221
222 /*
223 * Reads datagrams from the data in port and dispatches them. We
224 * always start reading datagrams into only the first page of the
225 * datagram buffer. If the datagrams don't fit into one page, we
226 * use the maximum datagram buffer size for the remainder of the
227 * invocation. This is a simple heuristic for not penalizing
228 * small datagrams.
229 *
230 * This function assumes that it has exclusive access to the data
231 * in port for the duration of the call.
232 */
233 static void vmci_dispatch_dgs(unsigned long data)
234 {
235 struct vmci_guest_device *vmci_dev = (struct vmci_guest_device *)data;
236 u8 *dg_in_buffer = vmci_dev->data_buffer;
237 struct vmci_datagram *dg;
238 size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE;
239 size_t current_dg_in_buffer_size = PAGE_SIZE;
240 size_t remaining_bytes;
241
242 BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE);
243
244 ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
245 vmci_dev->data_buffer, current_dg_in_buffer_size);
246 dg = (struct vmci_datagram *)dg_in_buffer;
247 remaining_bytes = current_dg_in_buffer_size;
248
249 while (dg->dst.resource != VMCI_INVALID_ID ||
250 remaining_bytes > PAGE_SIZE) {
251 unsigned dg_in_size;
252
253 /*
254 * When the input buffer spans multiple pages, a datagram can
255 * start on any page boundary in the buffer.
256 */
257 if (dg->dst.resource == VMCI_INVALID_ID) {
258 dg = (struct vmci_datagram *)roundup(
259 (uintptr_t)dg + 1, PAGE_SIZE);
260 remaining_bytes =
261 (size_t)(dg_in_buffer +
262 current_dg_in_buffer_size -
263 (u8 *)dg);
264 continue;
265 }
266
267 dg_in_size = VMCI_DG_SIZE_ALIGNED(dg);
268
269 if (dg_in_size <= dg_in_buffer_size) {
270 int result;
271
272 /*
273 * If the remaining bytes in the datagram
274 * buffer doesn't contain the complete
275 * datagram, we first make sure we have enough
276 * room for it and then we read the reminder
277 * of the datagram and possibly any following
278 * datagrams.
279 */
280 if (dg_in_size > remaining_bytes) {
281 if (remaining_bytes !=
282 current_dg_in_buffer_size) {
283
284 /*
285 * We move the partial
286 * datagram to the front and
287 * read the reminder of the
288 * datagram and possibly
289 * following calls into the
290 * following bytes.
291 */
292 memmove(dg_in_buffer, dg_in_buffer +
293 current_dg_in_buffer_size -
294 remaining_bytes,
295 remaining_bytes);
296 dg = (struct vmci_datagram *)
297 dg_in_buffer;
298 }
299
300 if (current_dg_in_buffer_size !=
301 dg_in_buffer_size)
302 current_dg_in_buffer_size =
303 dg_in_buffer_size;
304
305 ioread8_rep(vmci_dev->iobase +
306 VMCI_DATA_IN_ADDR,
307 vmci_dev->data_buffer +
308 remaining_bytes,
309 current_dg_in_buffer_size -
310 remaining_bytes);
311 }
312
313 /*
314 * We special case event datagrams from the
315 * hypervisor.
316 */
317 if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
318 dg->dst.resource == VMCI_EVENT_HANDLER) {
319 result = vmci_event_dispatch(dg);
320 } else {
321 result = vmci_datagram_invoke_guest_handler(dg);
322 }
323 if (result < VMCI_SUCCESS)
324 dev_dbg(vmci_dev->dev,
325 "Datagram with resource (ID=0x%x) failed (err=%d)\n",
326 dg->dst.resource, result);
327
328 /* On to the next datagram. */
329 dg = (struct vmci_datagram *)((u8 *)dg +
330 dg_in_size);
331 } else {
332 size_t bytes_to_skip;
333
334 /*
335 * Datagram doesn't fit in datagram buffer of maximal
336 * size. We drop it.
337 */
338 dev_dbg(vmci_dev->dev,
339 "Failed to receive datagram (size=%u bytes)\n",
340 dg_in_size);
341
342 bytes_to_skip = dg_in_size - remaining_bytes;
343 if (current_dg_in_buffer_size != dg_in_buffer_size)
344 current_dg_in_buffer_size = dg_in_buffer_size;
345
346 for (;;) {
347 ioread8_rep(vmci_dev->iobase +
348 VMCI_DATA_IN_ADDR,
349 vmci_dev->data_buffer,
350 current_dg_in_buffer_size);
351 if (bytes_to_skip <= current_dg_in_buffer_size)
352 break;
353
354 bytes_to_skip -= current_dg_in_buffer_size;
355 }
356 dg = (struct vmci_datagram *)(dg_in_buffer +
357 bytes_to_skip);
358 }
359
360 remaining_bytes =
361 (size_t) (dg_in_buffer + current_dg_in_buffer_size -
362 (u8 *)dg);
363
364 if (remaining_bytes < VMCI_DG_HEADERSIZE) {
365 /* Get the next batch of datagrams. */
366
367 ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
368 vmci_dev->data_buffer,
369 current_dg_in_buffer_size);
370 dg = (struct vmci_datagram *)dg_in_buffer;
371 remaining_bytes = current_dg_in_buffer_size;
372 }
373 }
374 }
375
376 /*
377 * Scans the notification bitmap for raised flags, clears them
378 * and handles the notifications.
379 */
380 static void vmci_process_bitmap(unsigned long data)
381 {
382 struct vmci_guest_device *dev = (struct vmci_guest_device *)data;
383
384 if (!dev->notification_bitmap) {
385 dev_dbg(dev->dev, "No bitmap present in %s\n", __func__);
386 return;
387 }
388
389 vmci_dbell_scan_notification_entries(dev->notification_bitmap);
390 }
391
392 /*
393 * Interrupt handler for legacy or MSI interrupt, or for first MSI-X
394 * interrupt (vector VMCI_INTR_DATAGRAM).
395 */
396 static irqreturn_t vmci_interrupt(int irq, void *_dev)
397 {
398 struct vmci_guest_device *dev = _dev;
399
400 /*
401 * If we are using MSI-X with exclusive vectors then we simply schedule
402 * the datagram tasklet, since we know the interrupt was meant for us.
403 * Otherwise we must read the ICR to determine what to do.
404 */
405
406 if (dev->exclusive_vectors) {
407 tasklet_schedule(&dev->datagram_tasklet);
408 } else {
409 unsigned int icr;
410
411 /* Acknowledge interrupt and determine what needs doing. */
412 icr = vmci_read_reg(dev, VMCI_ICR_ADDR);
413 if (icr == 0 || icr == ~0)
414 return IRQ_NONE;
415
416 if (icr & VMCI_ICR_DATAGRAM) {
417 tasklet_schedule(&dev->datagram_tasklet);
418 icr &= ~VMCI_ICR_DATAGRAM;
419 }
420
421 if (icr & VMCI_ICR_NOTIFICATION) {
422 tasklet_schedule(&dev->bm_tasklet);
423 icr &= ~VMCI_ICR_NOTIFICATION;
424 }
425
426 if (icr & VMCI_ICR_DMA_DATAGRAM)
427 icr &= ~VMCI_ICR_DMA_DATAGRAM;
428
429 if (icr != 0)
430 dev_warn(dev->dev,
431 "Ignoring unknown interrupt cause (%d)\n",
432 icr);
433 }
434
435 return IRQ_HANDLED;
436 }
437
438 /*
439 * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
440 * which is for the notification bitmap. Will only get called if we are
441 * using MSI-X with exclusive vectors.
442 */
443 static irqreturn_t vmci_interrupt_bm(int irq, void *_dev)
444 {
445 struct vmci_guest_device *dev = _dev;
446
447 /* For MSI-X we can just assume it was meant for us. */
448 tasklet_schedule(&dev->bm_tasklet);
449
450 return IRQ_HANDLED;
451 }
452
453 /*
454 * Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM,
455 * which is for the completion of a DMA datagram send or receive operation.
456 * Will only get called if we are using MSI-X with exclusive vectors.
457 */
458 static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev)
459 {
460 return IRQ_HANDLED;
461 }
462
463 static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev)
464 {
465 if (vmci_dev->mmio_base != NULL) {
466 if (vmci_dev->tx_buffer != NULL)
467 dma_free_coherent(vmci_dev->dev,
468 VMCI_DMA_DG_BUFFER_SIZE,
469 vmci_dev->tx_buffer,
470 vmci_dev->tx_buffer_base);
471 if (vmci_dev->data_buffer != NULL)
472 dma_free_coherent(vmci_dev->dev,
473 VMCI_DMA_DG_BUFFER_SIZE,
474 vmci_dev->data_buffer,
475 vmci_dev->data_buffer_base);
476 } else {
477 vfree(vmci_dev->data_buffer);
478 }
479 }
480
481 /*
482 * Most of the initialization at module load time is done here.
483 */
484 static int vmci_guest_probe_device(struct pci_dev *pdev,
485 const struct pci_device_id *id)
486 {
487 struct vmci_guest_device *vmci_dev;
488 void __iomem *iobase = NULL;
489 void __iomem *mmio_base = NULL;
490 unsigned int num_irq_vectors;
491 unsigned int capabilities;
492 unsigned int caps_in_use;
493 unsigned long cmd;
494 int vmci_err;
495 int error;
496
497 dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n");
498
499 error = pcim_enable_device(pdev);
500 if (error) {
501 dev_err(&pdev->dev,
502 "Failed to enable VMCI device: %d\n", error);
503 return error;
504 }
505
506 /*
507 * The VMCI device with mmio access to registers requests 256KB
508 * for BAR1. If present, driver will use new VMCI device
509 * functionality for register access and datagram send/recv.
510 */
511
512 if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) {
513 dev_info(&pdev->dev, "MMIO register access is available\n");
514 mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET,
515 VMCI_MMIO_ACCESS_SIZE);
516 /* If the map fails, we fall back to IOIO access. */
517 if (!mmio_base)
518 dev_warn(&pdev->dev, "Failed to map MMIO register access\n");
519 }
520
521 if (!mmio_base) {
522 error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME);
523 if (error) {
524 dev_err(&pdev->dev, "Failed to reserve/map IO regions\n");
525 return error;
526 }
527 iobase = pcim_iomap_table(pdev)[0];
528 }
529
530 vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL);
531 if (!vmci_dev) {
532 dev_err(&pdev->dev,
533 "Can't allocate memory for VMCI device\n");
534 return -ENOMEM;
535 }
536
537 vmci_dev->dev = &pdev->dev;
538 vmci_dev->exclusive_vectors = false;
539 vmci_dev->iobase = iobase;
540 vmci_dev->mmio_base = mmio_base;
541
542 tasklet_init(&vmci_dev->datagram_tasklet,
543 vmci_dispatch_dgs, (unsigned long)vmci_dev);
544 tasklet_init(&vmci_dev->bm_tasklet,
545 vmci_process_bitmap, (unsigned long)vmci_dev);
546
547 if (mmio_base != NULL) {
548 vmci_dev->tx_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
549 &vmci_dev->tx_buffer_base,
550 GFP_KERNEL);
551 if (!vmci_dev->tx_buffer) {
552 dev_err(&pdev->dev,
553 "Can't allocate memory for datagram tx buffer\n");
554 return -ENOMEM;
555 }
556
557 vmci_dev->data_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
558 &vmci_dev->data_buffer_base,
559 GFP_KERNEL);
560 } else {
561 vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
562 }
563 if (!vmci_dev->data_buffer) {
564 dev_err(&pdev->dev,
565 "Can't allocate memory for datagram buffer\n");
566 error = -ENOMEM;
567 goto err_free_data_buffers;
568 }
569
570 pci_set_master(pdev); /* To enable queue_pair functionality. */
571
572 /*
573 * Verify that the VMCI Device supports the capabilities that
574 * we need. If the device is missing capabilities that we would
575 * like to use, check for fallback capabilities and use those
576 * instead (so we can run a new VM on old hosts). Fail the load if
577 * a required capability is missing and there is no fallback.
578 *
579 * Right now, we need datagrams. There are no fallbacks.
580 */
581 capabilities = vmci_read_reg(vmci_dev, VMCI_CAPS_ADDR);
582 if (!(capabilities & VMCI_CAPS_DATAGRAM)) {
583 dev_err(&pdev->dev, "Device does not support datagrams\n");
584 error = -ENXIO;
585 goto err_free_data_buffers;
586 }
587 caps_in_use = VMCI_CAPS_DATAGRAM;
588
589 /*
590 * Use 64-bit PPNs if the device supports.
591 *
592 * There is no check for the return value of dma_set_mask_and_coherent
593 * since this driver can handle the default mask values if
594 * dma_set_mask_and_coherent fails.
595 */
596 if (capabilities & VMCI_CAPS_PPN64) {
597 dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
598 use_ppn64 = true;
599 caps_in_use |= VMCI_CAPS_PPN64;
600 } else {
601 dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44));
602 use_ppn64 = false;
603 }
604
605 /*
606 * If the hardware supports notifications, we will use that as
607 * well.
608 */
609 if (capabilities & VMCI_CAPS_NOTIFICATIONS) {
610 vmci_dev->notification_bitmap = dma_alloc_coherent(
611 &pdev->dev, PAGE_SIZE, &vmci_dev->notification_base,
612 GFP_KERNEL);
613 if (!vmci_dev->notification_bitmap) {
614 dev_warn(&pdev->dev,
615 "Unable to allocate notification bitmap\n");
616 } else {
617 memset(vmci_dev->notification_bitmap, 0, PAGE_SIZE);
618 caps_in_use |= VMCI_CAPS_NOTIFICATIONS;
619 }
620 }
621
622 if (mmio_base != NULL) {
623 if (capabilities & VMCI_CAPS_DMA_DATAGRAM) {
624 caps_in_use |= VMCI_CAPS_DMA_DATAGRAM;
625 } else {
626 dev_err(&pdev->dev,
627 "Missing capability: VMCI_CAPS_DMA_DATAGRAM\n");
628 error = -ENXIO;
629 goto err_free_data_buffers;
630 }
631 }
632
633 dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use);
634
635 /* Let the host know which capabilities we intend to use. */
636 vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR);
637
638 if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
639 /* Let the device know the size for pages passed down. */
640 vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT);
641
642 /* Configure the high order parts of the data in/out buffers. */
643 vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->data_buffer_base),
644 VMCI_DATA_IN_HIGH_ADDR);
645 vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->tx_buffer_base),
646 VMCI_DATA_OUT_HIGH_ADDR);
647 }
648
649 /* Set up global device so that we can start sending datagrams */
650 spin_lock_irq(&vmci_dev_spinlock);
651 vmci_dev_g = vmci_dev;
652 vmci_pdev = pdev;
653 spin_unlock_irq(&vmci_dev_spinlock);
654
655 /*
656 * Register notification bitmap with device if that capability is
657 * used.
658 */
659 if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) {
660 unsigned long bitmap_ppn =
661 vmci_dev->notification_base >> PAGE_SHIFT;
662 if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) {
663 dev_warn(&pdev->dev,
664 "VMCI device unable to register notification bitmap with PPN 0x%lx\n",
665 bitmap_ppn);
666 error = -ENXIO;
667 goto err_remove_vmci_dev_g;
668 }
669 }
670
671 /* Check host capabilities. */
672 error = vmci_check_host_caps(pdev);
673 if (error)
674 goto err_remove_bitmap;
675
676 /* Enable device. */
677
678 /*
679 * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can
680 * update the internal context id when needed.
681 */
682 vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE,
683 vmci_guest_cid_update, NULL,
684 &ctx_update_sub_id);
685 if (vmci_err < VMCI_SUCCESS)
686 dev_warn(&pdev->dev,
687 "Failed to subscribe to event (type=%d): %d\n",
688 VMCI_EVENT_CTX_ID_UPDATE, vmci_err);
689
690 /*
691 * Enable interrupts. Try MSI-X first, then MSI, and then fallback on
692 * legacy interrupts.
693 */
694 if (vmci_dev->mmio_base != NULL)
695 num_irq_vectors = VMCI_MAX_INTRS;
696 else
697 num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION;
698 error = pci_alloc_irq_vectors(pdev, num_irq_vectors, num_irq_vectors,
699 PCI_IRQ_MSIX);
700 if (error < 0) {
701 error = pci_alloc_irq_vectors(pdev, 1, 1,
702 PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY);
703 if (error < 0)
704 goto err_remove_bitmap;
705 } else {
706 vmci_dev->exclusive_vectors = true;
707 }
708
709 /*
710 * Request IRQ for legacy or MSI interrupts, or for first
711 * MSI-X vector.
712 */
713 error = request_irq(pci_irq_vector(pdev, 0), vmci_interrupt,
714 IRQF_SHARED, KBUILD_MODNAME, vmci_dev);
715 if (error) {
716 dev_err(&pdev->dev, "Irq %u in use: %d\n",
717 pci_irq_vector(pdev, 0), error);
718 goto err_disable_msi;
719 }
720
721 /*
722 * For MSI-X with exclusive vectors we need to request an
723 * interrupt for each vector so that we get a separate
724 * interrupt handler routine. This allows us to distinguish
725 * between the vectors.
726 */
727 if (vmci_dev->exclusive_vectors) {
728 error = request_irq(pci_irq_vector(pdev, 1),
729 vmci_interrupt_bm, 0, KBUILD_MODNAME,
730 vmci_dev);
731 if (error) {
732 dev_err(&pdev->dev,
733 "Failed to allocate irq %u: %d\n",
734 pci_irq_vector(pdev, 1), error);
735 goto err_free_irq;
736 }
737 if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
738 error = request_irq(pci_irq_vector(pdev, 2),
739 vmci_interrupt_dma_datagram,
740 0, KBUILD_MODNAME, vmci_dev);
741 if (error) {
742 dev_err(&pdev->dev,
743 "Failed to allocate irq %u: %d\n",
744 pci_irq_vector(pdev, 2), error);
745 goto err_free_bm_irq;
746 }
747 }
748 }
749
750 dev_dbg(&pdev->dev, "Registered device\n");
751
752 atomic_inc(&vmci_num_guest_devices);
753
754 /* Enable specific interrupt bits. */
755 cmd = VMCI_IMR_DATAGRAM;
756 if (caps_in_use & VMCI_CAPS_NOTIFICATIONS)
757 cmd |= VMCI_IMR_NOTIFICATION;
758 if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM)
759 cmd |= VMCI_IMR_DMA_DATAGRAM;
760 vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR);
761
762 /* Enable interrupts. */
763 vmci_write_reg(vmci_dev, VMCI_CONTROL_INT_ENABLE, VMCI_CONTROL_ADDR);
764
765 pci_set_drvdata(pdev, vmci_dev);
766
767 vmci_call_vsock_callback(false);
768 return 0;
769
770 err_free_bm_irq:
771 free_irq(pci_irq_vector(pdev, 1), vmci_dev);
772 err_free_irq:
773 free_irq(pci_irq_vector(pdev, 0), vmci_dev);
774 tasklet_kill(&vmci_dev->datagram_tasklet);
775 tasklet_kill(&vmci_dev->bm_tasklet);
776
777 err_disable_msi:
778 pci_free_irq_vectors(pdev);
779
780 vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
781 if (vmci_err < VMCI_SUCCESS)
782 dev_warn(&pdev->dev,
783 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
784 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
785
786 err_remove_bitmap:
787 if (vmci_dev->notification_bitmap) {
788 vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
789 dma_free_coherent(&pdev->dev, PAGE_SIZE,
790 vmci_dev->notification_bitmap,
791 vmci_dev->notification_base);
792 }
793
794 err_remove_vmci_dev_g:
795 spin_lock_irq(&vmci_dev_spinlock);
796 vmci_pdev = NULL;
797 vmci_dev_g = NULL;
798 spin_unlock_irq(&vmci_dev_spinlock);
799
800 err_free_data_buffers:
801 vmci_free_dg_buffers(vmci_dev);
802
803 /* The rest are managed resources and will be freed by PCI core */
804 return error;
805 }
806
807 static void vmci_guest_remove_device(struct pci_dev *pdev)
808 {
809 struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev);
810 int vmci_err;
811
812 dev_dbg(&pdev->dev, "Removing device\n");
813
814 atomic_dec(&vmci_num_guest_devices);
815
816 vmci_qp_guest_endpoints_exit();
817
818 vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
819 if (vmci_err < VMCI_SUCCESS)
820 dev_warn(&pdev->dev,
821 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
822 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
823
824 spin_lock_irq(&vmci_dev_spinlock);
825 vmci_dev_g = NULL;
826 vmci_pdev = NULL;
827 spin_unlock_irq(&vmci_dev_spinlock);
828
829 dev_dbg(&pdev->dev, "Resetting vmci device\n");
830 vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
831
832 /*
833 * Free IRQ and then disable MSI/MSI-X as appropriate. For
834 * MSI-X, we might have multiple vectors, each with their own
835 * IRQ, which we must free too.
836 */
837 if (vmci_dev->exclusive_vectors) {
838 free_irq(pci_irq_vector(pdev, 1), vmci_dev);
839 if (vmci_dev->mmio_base != NULL)
840 free_irq(pci_irq_vector(pdev, 2), vmci_dev);
841 }
842 free_irq(pci_irq_vector(pdev, 0), vmci_dev);
843 pci_free_irq_vectors(pdev);
844
845 tasklet_kill(&vmci_dev->datagram_tasklet);
846 tasklet_kill(&vmci_dev->bm_tasklet);
847
848 if (vmci_dev->notification_bitmap) {
849 /*
850 * The device reset above cleared the bitmap state of the
851 * device, so we can safely free it here.
852 */
853
854 dma_free_coherent(&pdev->dev, PAGE_SIZE,
855 vmci_dev->notification_bitmap,
856 vmci_dev->notification_base);
857 }
858
859 vmci_free_dg_buffers(vmci_dev);
860
861 if (vmci_dev->mmio_base != NULL)
862 pci_iounmap(pdev, vmci_dev->mmio_base);
863
864 /* The rest are managed resources and will be freed by PCI core */
865 }
866
867 static const struct pci_device_id vmci_ids[] = {
868 { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), },
869 { 0 },
870 };
871 MODULE_DEVICE_TABLE(pci, vmci_ids);
872
873 static struct pci_driver vmci_guest_driver = {
874 .name = KBUILD_MODNAME,
875 .id_table = vmci_ids,
876 .probe = vmci_guest_probe_device,
877 .remove = vmci_guest_remove_device,
878 };
879
880 int __init vmci_guest_init(void)
881 {
882 return pci_register_driver(&vmci_guest_driver);
883 }
884
885 void __exit vmci_guest_exit(void)
886 {
887 pci_unregister_driver(&vmci_guest_driver);
888 }