]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/misc/vmw_vmci/vmci_guest.c
VMCI: dma dg: add support for DMA datagrams sends
[mirror_ubuntu-jammy-kernel.git] / drivers / misc / vmw_vmci / vmci_guest.c
CommitLineData
685a6bf8 1// SPDX-License-Identifier: GPL-2.0-only
1f166439
GZ
2/*
3 * VMware VMCI Driver
4 *
5 * Copyright (C) 2012 VMware, Inc. All rights reserved.
1f166439
GZ
6 */
7
8#include <linux/vmw_vmci_defs.h>
9#include <linux/vmw_vmci_api.h>
10#include <linux/moduleparam.h>
11#include <linux/interrupt.h>
12#include <linux/highmem.h>
13#include <linux/kernel.h>
ea8a83a4 14#include <linux/mm.h>
1f166439 15#include <linux/module.h>
1af5c8ae 16#include <linux/processor.h>
1f166439 17#include <linux/sched.h>
ea8a83a4 18#include <linux/slab.h>
1f166439
GZ
19#include <linux/init.h>
20#include <linux/pci.h>
21#include <linux/smp.h>
22#include <linux/io.h>
ea8a83a4 23#include <linux/vmalloc.h>
1f166439
GZ
24
25#include "vmci_datagram.h"
26#include "vmci_doorbell.h"
27#include "vmci_context.h"
28#include "vmci_driver.h"
29#include "vmci_event.h"
30
1f166439
GZ
31#define PCI_DEVICE_ID_VMWARE_VMCI 0x0740
32
33#define VMCI_UTIL_NUM_RESOURCES 1
34
8389c29a
JH
35/*
36 * Datagram buffers for DMA send/receive must accommodate at least
37 * a maximum sized datagram and the header.
38 */
39#define VMCI_DMA_DG_BUFFER_SIZE (VMCI_MAX_DG_SIZE + PAGE_SIZE)
40
1f166439
GZ
41static bool vmci_disable_msi;
42module_param_named(disable_msi, vmci_disable_msi, bool, 0);
43MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
44
45static bool vmci_disable_msix;
46module_param_named(disable_msix, vmci_disable_msix, bool, 0);
47MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");
48
49static u32 ctx_update_sub_id = VMCI_INVALID_ID;
50static u32 vm_context_id = VMCI_INVALID_ID;
51
52struct vmci_guest_device {
53 struct device *dev; /* PCI device we are attached to */
54 void __iomem *iobase;
e01153c7 55 void __iomem *mmio_base;
1f166439 56
1f166439 57 bool exclusive_vectors;
1f166439
GZ
58
59 struct tasklet_struct datagram_tasklet;
60 struct tasklet_struct bm_tasklet;
61
62 void *data_buffer;
8389c29a
JH
63 dma_addr_t data_buffer_base;
64 void *tx_buffer;
65 dma_addr_t tx_buffer_base;
1f166439 66 void *notification_bitmap;
6d6dfb4f 67 dma_addr_t notification_base;
1f166439
GZ
68};
69
f2db7361
VD
70static bool use_ppn64;
71
72bool vmci_use_ppn64(void)
73{
74 return use_ppn64;
75}
76
1f166439 77/* vmci_dev singleton device and supporting data*/
6d6dfb4f 78struct pci_dev *vmci_pdev;
1f166439
GZ
79static struct vmci_guest_device *vmci_dev_g;
80static DEFINE_SPINLOCK(vmci_dev_spinlock);
81
82static atomic_t vmci_num_guest_devices = ATOMIC_INIT(0);
83
84bool vmci_guest_code_active(void)
85{
86 return atomic_read(&vmci_num_guest_devices) != 0;
87}
88
89u32 vmci_get_vm_context_id(void)
90{
91 if (vm_context_id == VMCI_INVALID_ID) {
1f166439
GZ
92 struct vmci_datagram get_cid_msg;
93 get_cid_msg.dst =
94 vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
95 VMCI_GET_CONTEXT_ID);
96 get_cid_msg.src = VMCI_ANON_SRC_HANDLE;
97 get_cid_msg.payload_size = 0;
5a19b789 98 vm_context_id = vmci_send_datagram(&get_cid_msg);
1f166439
GZ
99 }
100 return vm_context_id;
101}
102
e01153c7
JH
103static unsigned int vmci_read_reg(struct vmci_guest_device *dev, u32 reg)
104{
105 if (dev->mmio_base != NULL)
106 return readl(dev->mmio_base + reg);
107 return ioread32(dev->iobase + reg);
108}
109
110static void vmci_write_reg(struct vmci_guest_device *dev, u32 val, u32 reg)
111{
112 if (dev->mmio_base != NULL)
113 writel(val, dev->mmio_base + reg);
114 else
115 iowrite32(val, dev->iobase + reg);
116}
117
1af5c8ae
JH
118static int vmci_write_data(struct vmci_guest_device *dev,
119 struct vmci_datagram *dg)
120{
121 int result;
122
123 if (dev->mmio_base != NULL) {
124 struct vmci_data_in_out_header *buffer_header = dev->tx_buffer;
125 u8 *dg_out_buffer = (u8 *)(buffer_header + 1);
126
127 if (VMCI_DG_SIZE(dg) > VMCI_MAX_DG_SIZE)
128 return VMCI_ERROR_INVALID_ARGS;
129
130 /*
131 * Initialize send buffer with outgoing datagram
132 * and set up header for inline data. Device will
133 * not access buffer asynchronously - only after
134 * the write to VMCI_DATA_OUT_LOW_ADDR.
135 */
136 memcpy(dg_out_buffer, dg, VMCI_DG_SIZE(dg));
137 buffer_header->opcode = 0;
138 buffer_header->size = VMCI_DG_SIZE(dg);
139 buffer_header->busy = 1;
140
141 vmci_write_reg(dev, lower_32_bits(dev->tx_buffer_base),
142 VMCI_DATA_OUT_LOW_ADDR);
143
144 /* Caller holds a spinlock, so cannot block. */
145 spin_until_cond(buffer_header->busy == 0);
146
147 result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
148 if (result == VMCI_SUCCESS)
149 result = (int)buffer_header->result;
150 } else {
151 iowrite8_rep(dev->iobase + VMCI_DATA_OUT_ADDR,
152 dg, VMCI_DG_SIZE(dg));
153 result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
154 }
155
156 return result;
157}
158
1f166439
GZ
159/*
160 * VM to hypervisor call mechanism. We use the standard VMware naming
161 * convention since shared code is calling this function as well.
162 */
163int vmci_send_datagram(struct vmci_datagram *dg)
164{
165 unsigned long flags;
166 int result;
167
168 /* Check args. */
169 if (dg == NULL)
170 return VMCI_ERROR_INVALID_ARGS;
171
172 /*
173 * Need to acquire spinlock on the device because the datagram
174 * data may be spread over multiple pages and the monitor may
175 * interleave device user rpc calls from multiple
176 * VCPUs. Acquiring the spinlock precludes that
177 * possibility. Disabling interrupts to avoid incoming
178 * datagrams during a "rep out" and possibly landing up in
179 * this function.
180 */
181 spin_lock_irqsave(&vmci_dev_spinlock, flags);
182
183 if (vmci_dev_g) {
1af5c8ae 184 vmci_write_data(vmci_dev_g, dg);
e01153c7 185 result = vmci_read_reg(vmci_dev_g, VMCI_RESULT_LOW_ADDR);
1f166439
GZ
186 } else {
187 result = VMCI_ERROR_UNAVAILABLE;
188 }
189
190 spin_unlock_irqrestore(&vmci_dev_spinlock, flags);
191
192 return result;
193}
194EXPORT_SYMBOL_GPL(vmci_send_datagram);
195
196/*
197 * Gets called with the new context id if updated or resumed.
198 * Context id.
199 */
200static void vmci_guest_cid_update(u32 sub_id,
201 const struct vmci_event_data *event_data,
202 void *client_data)
203{
204 const struct vmci_event_payld_ctx *ev_payload =
205 vmci_event_data_const_payload(event_data);
206
207 if (sub_id != ctx_update_sub_id) {
208 pr_devel("Invalid subscriber (ID=0x%x)\n", sub_id);
209 return;
210 }
211
212 if (!event_data || ev_payload->context_id == VMCI_INVALID_ID) {
213 pr_devel("Invalid event data\n");
214 return;
215 }
216
217 pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event (type=%d)\n",
218 vm_context_id, ev_payload->context_id, event_data->event);
219
220 vm_context_id = ev_payload->context_id;
221}
222
223/*
224 * Verify that the host supports the hypercalls we need. If it does not,
225 * try to find fallback hypercalls and use those instead. Returns
226 * true if required hypercalls (or fallback hypercalls) are
227 * supported by the host, false otherwise.
228 */
782f2445 229static int vmci_check_host_caps(struct pci_dev *pdev)
1f166439
GZ
230{
231 bool result;
232 struct vmci_resource_query_msg *msg;
233 u32 msg_size = sizeof(struct vmci_resource_query_hdr) +
234 VMCI_UTIL_NUM_RESOURCES * sizeof(u32);
235 struct vmci_datagram *check_msg;
236
b2192cfe 237 check_msg = kzalloc(msg_size, GFP_KERNEL);
1f166439
GZ
238 if (!check_msg) {
239 dev_err(&pdev->dev, "%s: Insufficient memory\n", __func__);
782f2445 240 return -ENOMEM;
1f166439
GZ
241 }
242
243 check_msg->dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
244 VMCI_RESOURCES_QUERY);
245 check_msg->src = VMCI_ANON_SRC_HANDLE;
246 check_msg->payload_size = msg_size - VMCI_DG_HEADERSIZE;
247 msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(check_msg);
248
249 msg->num_resources = VMCI_UTIL_NUM_RESOURCES;
250 msg->resources[0] = VMCI_GET_CONTEXT_ID;
251
252 /* Checks that hyper calls are supported */
253 result = vmci_send_datagram(check_msg) == 0x01;
254 kfree(check_msg);
255
256 dev_dbg(&pdev->dev, "%s: Host capability check: %s\n",
257 __func__, result ? "PASSED" : "FAILED");
258
259 /* We need the vector. There are no fallbacks. */
782f2445 260 return result ? 0 : -ENXIO;
1f166439
GZ
261}
262
263/*
264 * Reads datagrams from the data in port and dispatches them. We
265 * always start reading datagrams into only the first page of the
266 * datagram buffer. If the datagrams don't fit into one page, we
267 * use the maximum datagram buffer size for the remainder of the
268 * invocation. This is a simple heuristic for not penalizing
269 * small datagrams.
270 *
271 * This function assumes that it has exclusive access to the data
272 * in port for the duration of the call.
273 */
274static void vmci_dispatch_dgs(unsigned long data)
275{
276 struct vmci_guest_device *vmci_dev = (struct vmci_guest_device *)data;
277 u8 *dg_in_buffer = vmci_dev->data_buffer;
278 struct vmci_datagram *dg;
279 size_t dg_in_buffer_size = VMCI_MAX_DG_SIZE;
280 size_t current_dg_in_buffer_size = PAGE_SIZE;
281 size_t remaining_bytes;
282
283 BUILD_BUG_ON(VMCI_MAX_DG_SIZE < PAGE_SIZE);
284
285 ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
286 vmci_dev->data_buffer, current_dg_in_buffer_size);
287 dg = (struct vmci_datagram *)dg_in_buffer;
288 remaining_bytes = current_dg_in_buffer_size;
289
290 while (dg->dst.resource != VMCI_INVALID_ID ||
291 remaining_bytes > PAGE_SIZE) {
292 unsigned dg_in_size;
293
294 /*
295 * When the input buffer spans multiple pages, a datagram can
296 * start on any page boundary in the buffer.
297 */
298 if (dg->dst.resource == VMCI_INVALID_ID) {
299 dg = (struct vmci_datagram *)roundup(
300 (uintptr_t)dg + 1, PAGE_SIZE);
301 remaining_bytes =
302 (size_t)(dg_in_buffer +
303 current_dg_in_buffer_size -
304 (u8 *)dg);
305 continue;
306 }
307
308 dg_in_size = VMCI_DG_SIZE_ALIGNED(dg);
309
310 if (dg_in_size <= dg_in_buffer_size) {
311 int result;
312
313 /*
314 * If the remaining bytes in the datagram
315 * buffer doesn't contain the complete
316 * datagram, we first make sure we have enough
317 * room for it and then we read the reminder
318 * of the datagram and possibly any following
319 * datagrams.
320 */
321 if (dg_in_size > remaining_bytes) {
322 if (remaining_bytes !=
323 current_dg_in_buffer_size) {
324
325 /*
326 * We move the partial
327 * datagram to the front and
328 * read the reminder of the
329 * datagram and possibly
330 * following calls into the
331 * following bytes.
332 */
333 memmove(dg_in_buffer, dg_in_buffer +
334 current_dg_in_buffer_size -
335 remaining_bytes,
336 remaining_bytes);
337 dg = (struct vmci_datagram *)
338 dg_in_buffer;
339 }
340
341 if (current_dg_in_buffer_size !=
342 dg_in_buffer_size)
343 current_dg_in_buffer_size =
344 dg_in_buffer_size;
345
346 ioread8_rep(vmci_dev->iobase +
347 VMCI_DATA_IN_ADDR,
348 vmci_dev->data_buffer +
349 remaining_bytes,
350 current_dg_in_buffer_size -
351 remaining_bytes);
352 }
353
354 /*
355 * We special case event datagrams from the
356 * hypervisor.
357 */
358 if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
359 dg->dst.resource == VMCI_EVENT_HANDLER) {
360 result = vmci_event_dispatch(dg);
361 } else {
362 result = vmci_datagram_invoke_guest_handler(dg);
363 }
364 if (result < VMCI_SUCCESS)
365 dev_dbg(vmci_dev->dev,
366 "Datagram with resource (ID=0x%x) failed (err=%d)\n",
367 dg->dst.resource, result);
368
369 /* On to the next datagram. */
370 dg = (struct vmci_datagram *)((u8 *)dg +
371 dg_in_size);
372 } else {
373 size_t bytes_to_skip;
374
375 /*
376 * Datagram doesn't fit in datagram buffer of maximal
377 * size. We drop it.
378 */
379 dev_dbg(vmci_dev->dev,
380 "Failed to receive datagram (size=%u bytes)\n",
381 dg_in_size);
382
383 bytes_to_skip = dg_in_size - remaining_bytes;
384 if (current_dg_in_buffer_size != dg_in_buffer_size)
385 current_dg_in_buffer_size = dg_in_buffer_size;
386
387 for (;;) {
388 ioread8_rep(vmci_dev->iobase +
389 VMCI_DATA_IN_ADDR,
390 vmci_dev->data_buffer,
391 current_dg_in_buffer_size);
392 if (bytes_to_skip <= current_dg_in_buffer_size)
393 break;
394
395 bytes_to_skip -= current_dg_in_buffer_size;
396 }
397 dg = (struct vmci_datagram *)(dg_in_buffer +
398 bytes_to_skip);
399 }
400
401 remaining_bytes =
402 (size_t) (dg_in_buffer + current_dg_in_buffer_size -
403 (u8 *)dg);
404
405 if (remaining_bytes < VMCI_DG_HEADERSIZE) {
406 /* Get the next batch of datagrams. */
407
408 ioread8_rep(vmci_dev->iobase + VMCI_DATA_IN_ADDR,
409 vmci_dev->data_buffer,
410 current_dg_in_buffer_size);
411 dg = (struct vmci_datagram *)dg_in_buffer;
412 remaining_bytes = current_dg_in_buffer_size;
413 }
414 }
415}
416
417/*
418 * Scans the notification bitmap for raised flags, clears them
419 * and handles the notifications.
420 */
421static void vmci_process_bitmap(unsigned long data)
422{
423 struct vmci_guest_device *dev = (struct vmci_guest_device *)data;
424
425 if (!dev->notification_bitmap) {
426 dev_dbg(dev->dev, "No bitmap present in %s\n", __func__);
427 return;
428 }
429
430 vmci_dbell_scan_notification_entries(dev->notification_bitmap);
431}
432
1f166439
GZ
433/*
434 * Interrupt handler for legacy or MSI interrupt, or for first MSI-X
435 * interrupt (vector VMCI_INTR_DATAGRAM).
436 */
437static irqreturn_t vmci_interrupt(int irq, void *_dev)
438{
439 struct vmci_guest_device *dev = _dev;
440
441 /*
442 * If we are using MSI-X with exclusive vectors then we simply schedule
443 * the datagram tasklet, since we know the interrupt was meant for us.
444 * Otherwise we must read the ICR to determine what to do.
445 */
446
3bb434cd 447 if (dev->exclusive_vectors) {
1f166439
GZ
448 tasklet_schedule(&dev->datagram_tasklet);
449 } else {
450 unsigned int icr;
451
452 /* Acknowledge interrupt and determine what needs doing. */
e01153c7 453 icr = vmci_read_reg(dev, VMCI_ICR_ADDR);
1f166439
GZ
454 if (icr == 0 || icr == ~0)
455 return IRQ_NONE;
456
457 if (icr & VMCI_ICR_DATAGRAM) {
458 tasklet_schedule(&dev->datagram_tasklet);
459 icr &= ~VMCI_ICR_DATAGRAM;
460 }
461
462 if (icr & VMCI_ICR_NOTIFICATION) {
463 tasklet_schedule(&dev->bm_tasklet);
464 icr &= ~VMCI_ICR_NOTIFICATION;
465 }
466
3eed5372
JH
467 if (icr & VMCI_ICR_DMA_DATAGRAM)
468 icr &= ~VMCI_ICR_DMA_DATAGRAM;
469
1f166439
GZ
470 if (icr != 0)
471 dev_warn(dev->dev,
472 "Ignoring unknown interrupt cause (%d)\n",
473 icr);
474 }
475
476 return IRQ_HANDLED;
477}
478
479/*
480 * Interrupt handler for MSI-X interrupt vector VMCI_INTR_NOTIFICATION,
481 * which is for the notification bitmap. Will only get called if we are
482 * using MSI-X with exclusive vectors.
483 */
484static irqreturn_t vmci_interrupt_bm(int irq, void *_dev)
485{
486 struct vmci_guest_device *dev = _dev;
487
488 /* For MSI-X we can just assume it was meant for us. */
489 tasklet_schedule(&dev->bm_tasklet);
490
491 return IRQ_HANDLED;
492}
493
3eed5372
JH
494/*
495 * Interrupt handler for MSI-X interrupt vector VMCI_INTR_DMA_DATAGRAM,
496 * which is for the completion of a DMA datagram send or receive operation.
497 * Will only get called if we are using MSI-X with exclusive vectors.
498 */
499static irqreturn_t vmci_interrupt_dma_datagram(int irq, void *_dev)
500{
501 return IRQ_HANDLED;
502}
503
8389c29a
JH
504static void vmci_free_dg_buffers(struct vmci_guest_device *vmci_dev)
505{
506 if (vmci_dev->mmio_base != NULL) {
507 if (vmci_dev->tx_buffer != NULL)
508 dma_free_coherent(vmci_dev->dev,
509 VMCI_DMA_DG_BUFFER_SIZE,
510 vmci_dev->tx_buffer,
511 vmci_dev->tx_buffer_base);
512 if (vmci_dev->data_buffer != NULL)
513 dma_free_coherent(vmci_dev->dev,
514 VMCI_DMA_DG_BUFFER_SIZE,
515 vmci_dev->data_buffer,
516 vmci_dev->data_buffer_base);
517 } else {
518 vfree(vmci_dev->data_buffer);
519 }
520}
521
1f166439
GZ
522/*
523 * Most of the initialization at module load time is done here.
524 */
525static int vmci_guest_probe_device(struct pci_dev *pdev,
526 const struct pci_device_id *id)
527{
528 struct vmci_guest_device *vmci_dev;
e01153c7
JH
529 void __iomem *iobase = NULL;
530 void __iomem *mmio_base = NULL;
3eed5372 531 unsigned int num_irq_vectors;
1f166439 532 unsigned int capabilities;
f2db7361 533 unsigned int caps_in_use;
1f166439
GZ
534 unsigned long cmd;
535 int vmci_err;
536 int error;
537
538 dev_dbg(&pdev->dev, "Probing for vmci/PCI guest device\n");
539
540 error = pcim_enable_device(pdev);
541 if (error) {
542 dev_err(&pdev->dev,
543 "Failed to enable VMCI device: %d\n", error);
544 return error;
545 }
546
e01153c7
JH
547 /*
548 * The VMCI device with mmio access to registers requests 256KB
549 * for BAR1. If present, driver will use new VMCI device
550 * functionality for register access and datagram send/recv.
551 */
1f166439 552
e01153c7
JH
553 if (pci_resource_len(pdev, 1) == VMCI_WITH_MMIO_ACCESS_BAR_SIZE) {
554 dev_info(&pdev->dev, "MMIO register access is available\n");
555 mmio_base = pci_iomap_range(pdev, 1, VMCI_MMIO_ACCESS_OFFSET,
556 VMCI_MMIO_ACCESS_SIZE);
557 /* If the map fails, we fall back to IOIO access. */
558 if (!mmio_base)
559 dev_warn(&pdev->dev, "Failed to map MMIO register access\n");
560 }
1f166439 561
e01153c7
JH
562 if (!mmio_base) {
563 error = pcim_iomap_regions(pdev, BIT(0), KBUILD_MODNAME);
564 if (error) {
565 dev_err(&pdev->dev, "Failed to reserve/map IO regions\n");
566 return error;
567 }
568 iobase = pcim_iomap_table(pdev)[0];
569 }
1f166439
GZ
570
571 vmci_dev = devm_kzalloc(&pdev->dev, sizeof(*vmci_dev), GFP_KERNEL);
572 if (!vmci_dev) {
573 dev_err(&pdev->dev,
574 "Can't allocate memory for VMCI device\n");
575 return -ENOMEM;
576 }
577
578 vmci_dev->dev = &pdev->dev;
1f166439
GZ
579 vmci_dev->exclusive_vectors = false;
580 vmci_dev->iobase = iobase;
e01153c7 581 vmci_dev->mmio_base = mmio_base;
1f166439
GZ
582
583 tasklet_init(&vmci_dev->datagram_tasklet,
584 vmci_dispatch_dgs, (unsigned long)vmci_dev);
585 tasklet_init(&vmci_dev->bm_tasklet,
586 vmci_process_bitmap, (unsigned long)vmci_dev);
587
8389c29a
JH
588 if (mmio_base != NULL) {
589 vmci_dev->tx_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
590 &vmci_dev->tx_buffer_base,
591 GFP_KERNEL);
592 if (!vmci_dev->tx_buffer) {
593 dev_err(&pdev->dev,
594 "Can't allocate memory for datagram tx buffer\n");
595 return -ENOMEM;
596 }
597
598 vmci_dev->data_buffer = dma_alloc_coherent(&pdev->dev, VMCI_DMA_DG_BUFFER_SIZE,
599 &vmci_dev->data_buffer_base,
600 GFP_KERNEL);
601 } else {
602 vmci_dev->data_buffer = vmalloc(VMCI_MAX_DG_SIZE);
603 }
1f166439
GZ
604 if (!vmci_dev->data_buffer) {
605 dev_err(&pdev->dev,
606 "Can't allocate memory for datagram buffer\n");
8389c29a
JH
607 error = -ENOMEM;
608 goto err_free_data_buffers;
1f166439
GZ
609 }
610
611 pci_set_master(pdev); /* To enable queue_pair functionality. */
612
613 /*
614 * Verify that the VMCI Device supports the capabilities that
615 * we need. If the device is missing capabilities that we would
616 * like to use, check for fallback capabilities and use those
617 * instead (so we can run a new VM on old hosts). Fail the load if
618 * a required capability is missing and there is no fallback.
619 *
620 * Right now, we need datagrams. There are no fallbacks.
621 */
e01153c7 622 capabilities = vmci_read_reg(vmci_dev, VMCI_CAPS_ADDR);
1f166439
GZ
623 if (!(capabilities & VMCI_CAPS_DATAGRAM)) {
624 dev_err(&pdev->dev, "Device does not support datagrams\n");
625 error = -ENXIO;
8389c29a 626 goto err_free_data_buffers;
1f166439 627 }
f2db7361
VD
628 caps_in_use = VMCI_CAPS_DATAGRAM;
629
630 /*
631 * Use 64-bit PPNs if the device supports.
632 *
633 * There is no check for the return value of dma_set_mask_and_coherent
634 * since this driver can handle the default mask values if
635 * dma_set_mask_and_coherent fails.
636 */
637 if (capabilities & VMCI_CAPS_PPN64) {
638 dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
639 use_ppn64 = true;
640 caps_in_use |= VMCI_CAPS_PPN64;
641 } else {
642 dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44));
643 use_ppn64 = false;
644 }
1f166439
GZ
645
646 /*
647 * If the hardware supports notifications, we will use that as
648 * well.
649 */
650 if (capabilities & VMCI_CAPS_NOTIFICATIONS) {
6d6dfb4f
AK
651 vmci_dev->notification_bitmap = dma_alloc_coherent(
652 &pdev->dev, PAGE_SIZE, &vmci_dev->notification_base,
653 GFP_KERNEL);
1f166439
GZ
654 if (!vmci_dev->notification_bitmap) {
655 dev_warn(&pdev->dev,
656 "Unable to allocate notification bitmap\n");
657 } else {
658 memset(vmci_dev->notification_bitmap, 0, PAGE_SIZE);
f2db7361 659 caps_in_use |= VMCI_CAPS_NOTIFICATIONS;
1f166439
GZ
660 }
661 }
662
2d5484c1
JH
663 if (mmio_base != NULL) {
664 if (capabilities & VMCI_CAPS_DMA_DATAGRAM) {
665 caps_in_use |= VMCI_CAPS_DMA_DATAGRAM;
666 } else {
667 dev_err(&pdev->dev,
668 "Missing capability: VMCI_CAPS_DMA_DATAGRAM\n");
669 error = -ENXIO;
8389c29a 670 goto err_free_data_buffers;
2d5484c1
JH
671 }
672 }
673
f2db7361 674 dev_info(&pdev->dev, "Using capabilities 0x%x\n", caps_in_use);
1f166439
GZ
675
676 /* Let the host know which capabilities we intend to use. */
e01153c7 677 vmci_write_reg(vmci_dev, caps_in_use, VMCI_CAPS_ADDR);
1f166439 678
8389c29a
JH
679 if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
680 /* Let the device know the size for pages passed down. */
9b17e6ab
JH
681 vmci_write_reg(vmci_dev, PAGE_SHIFT, VMCI_GUEST_PAGE_SHIFT);
682
8389c29a
JH
683 /* Configure the high order parts of the data in/out buffers. */
684 vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->data_buffer_base),
685 VMCI_DATA_IN_HIGH_ADDR);
686 vmci_write_reg(vmci_dev, upper_32_bits(vmci_dev->tx_buffer_base),
687 VMCI_DATA_OUT_HIGH_ADDR);
688 }
689
1f166439
GZ
690 /* Set up global device so that we can start sending datagrams */
691 spin_lock_irq(&vmci_dev_spinlock);
692 vmci_dev_g = vmci_dev;
6d6dfb4f 693 vmci_pdev = pdev;
1f166439
GZ
694 spin_unlock_irq(&vmci_dev_spinlock);
695
696 /*
697 * Register notification bitmap with device if that capability is
698 * used.
699 */
f2db7361 700 if (caps_in_use & VMCI_CAPS_NOTIFICATIONS) {
6d6dfb4f
AK
701 unsigned long bitmap_ppn =
702 vmci_dev->notification_base >> PAGE_SHIFT;
1f166439
GZ
703 if (!vmci_dbell_register_notification_bitmap(bitmap_ppn)) {
704 dev_warn(&pdev->dev,
f2db7361
VD
705 "VMCI device unable to register notification bitmap with PPN 0x%lx\n",
706 bitmap_ppn);
782f2445 707 error = -ENXIO;
1f166439
GZ
708 goto err_remove_vmci_dev_g;
709 }
710 }
711
712 /* Check host capabilities. */
782f2445
DT
713 error = vmci_check_host_caps(pdev);
714 if (error)
1f166439
GZ
715 goto err_remove_bitmap;
716
717 /* Enable device. */
718
719 /*
720 * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can
721 * update the internal context id when needed.
722 */
723 vmci_err = vmci_event_subscribe(VMCI_EVENT_CTX_ID_UPDATE,
724 vmci_guest_cid_update, NULL,
725 &ctx_update_sub_id);
726 if (vmci_err < VMCI_SUCCESS)
727 dev_warn(&pdev->dev,
728 "Failed to subscribe to event (type=%d): %d\n",
729 VMCI_EVENT_CTX_ID_UPDATE, vmci_err);
730
731 /*
732 * Enable interrupts. Try MSI-X first, then MSI, and then fallback on
733 * legacy interrupts.
734 */
3eed5372
JH
735 if (vmci_dev->mmio_base != NULL)
736 num_irq_vectors = VMCI_MAX_INTRS;
737 else
738 num_irq_vectors = VMCI_MAX_INTRS_NOTIFICATION;
739 error = pci_alloc_irq_vectors(pdev, num_irq_vectors, num_irq_vectors,
740 PCI_IRQ_MSIX);
c3423563 741 if (error < 0) {
3bb434cd
CH
742 error = pci_alloc_irq_vectors(pdev, 1, 1,
743 PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY);
c3423563 744 if (error < 0)
3bb434cd 745 goto err_remove_bitmap;
1f166439 746 } else {
3bb434cd 747 vmci_dev->exclusive_vectors = true;
1f166439
GZ
748 }
749
750 /*
751 * Request IRQ for legacy or MSI interrupts, or for first
752 * MSI-X vector.
753 */
3bb434cd
CH
754 error = request_irq(pci_irq_vector(pdev, 0), vmci_interrupt,
755 IRQF_SHARED, KBUILD_MODNAME, vmci_dev);
1f166439
GZ
756 if (error) {
757 dev_err(&pdev->dev, "Irq %u in use: %d\n",
3bb434cd 758 pci_irq_vector(pdev, 0), error);
1f166439
GZ
759 goto err_disable_msi;
760 }
761
762 /*
763 * For MSI-X with exclusive vectors we need to request an
764 * interrupt for each vector so that we get a separate
765 * interrupt handler routine. This allows us to distinguish
766 * between the vectors.
767 */
768 if (vmci_dev->exclusive_vectors) {
3bb434cd 769 error = request_irq(pci_irq_vector(pdev, 1),
1f166439
GZ
770 vmci_interrupt_bm, 0, KBUILD_MODNAME,
771 vmci_dev);
772 if (error) {
773 dev_err(&pdev->dev,
774 "Failed to allocate irq %u: %d\n",
3bb434cd 775 pci_irq_vector(pdev, 1), error);
1f166439
GZ
776 goto err_free_irq;
777 }
3eed5372
JH
778 if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM) {
779 error = request_irq(pci_irq_vector(pdev, 2),
780 vmci_interrupt_dma_datagram,
781 0, KBUILD_MODNAME, vmci_dev);
782 if (error) {
783 dev_err(&pdev->dev,
784 "Failed to allocate irq %u: %d\n",
785 pci_irq_vector(pdev, 2), error);
786 goto err_free_bm_irq;
787 }
788 }
1f166439
GZ
789 }
790
791 dev_dbg(&pdev->dev, "Registered device\n");
792
793 atomic_inc(&vmci_num_guest_devices);
794
795 /* Enable specific interrupt bits. */
796 cmd = VMCI_IMR_DATAGRAM;
f2db7361 797 if (caps_in_use & VMCI_CAPS_NOTIFICATIONS)
1f166439 798 cmd |= VMCI_IMR_NOTIFICATION;
3eed5372
JH
799 if (caps_in_use & VMCI_CAPS_DMA_DATAGRAM)
800 cmd |= VMCI_IMR_DMA_DATAGRAM;
e01153c7 801 vmci_write_reg(vmci_dev, cmd, VMCI_IMR_ADDR);
1f166439
GZ
802
803 /* Enable interrupts. */
e01153c7 804 vmci_write_reg(vmci_dev, VMCI_CONTROL_INT_ENABLE, VMCI_CONTROL_ADDR);
1f166439
GZ
805
806 pci_set_drvdata(pdev, vmci_dev);
b1bba80a
SG
807
808 vmci_call_vsock_callback(false);
1f166439
GZ
809 return 0;
810
3eed5372
JH
811err_free_bm_irq:
812 free_irq(pci_irq_vector(pdev, 1), vmci_dev);
1f166439 813err_free_irq:
3bb434cd 814 free_irq(pci_irq_vector(pdev, 0), vmci_dev);
1f166439
GZ
815 tasklet_kill(&vmci_dev->datagram_tasklet);
816 tasklet_kill(&vmci_dev->bm_tasklet);
817
818err_disable_msi:
3bb434cd 819 pci_free_irq_vectors(pdev);
1f166439
GZ
820
821 vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
822 if (vmci_err < VMCI_SUCCESS)
823 dev_warn(&pdev->dev,
824 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
825 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
826
827err_remove_bitmap:
828 if (vmci_dev->notification_bitmap) {
e01153c7 829 vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
6d6dfb4f
AK
830 dma_free_coherent(&pdev->dev, PAGE_SIZE,
831 vmci_dev->notification_bitmap,
832 vmci_dev->notification_base);
1f166439
GZ
833 }
834
835err_remove_vmci_dev_g:
836 spin_lock_irq(&vmci_dev_spinlock);
6d6dfb4f 837 vmci_pdev = NULL;
1f166439
GZ
838 vmci_dev_g = NULL;
839 spin_unlock_irq(&vmci_dev_spinlock);
840
8389c29a
JH
841err_free_data_buffers:
842 vmci_free_dg_buffers(vmci_dev);
1f166439
GZ
843
844 /* The rest are managed resources and will be freed by PCI core */
845 return error;
846}
847
848static void vmci_guest_remove_device(struct pci_dev *pdev)
849{
850 struct vmci_guest_device *vmci_dev = pci_get_drvdata(pdev);
851 int vmci_err;
852
853 dev_dbg(&pdev->dev, "Removing device\n");
854
855 atomic_dec(&vmci_num_guest_devices);
856
857 vmci_qp_guest_endpoints_exit();
858
859 vmci_err = vmci_event_unsubscribe(ctx_update_sub_id);
860 if (vmci_err < VMCI_SUCCESS)
861 dev_warn(&pdev->dev,
862 "Failed to unsubscribe from event (type=%d) with subscriber (ID=0x%x): %d\n",
863 VMCI_EVENT_CTX_ID_UPDATE, ctx_update_sub_id, vmci_err);
864
865 spin_lock_irq(&vmci_dev_spinlock);
866 vmci_dev_g = NULL;
6d6dfb4f 867 vmci_pdev = NULL;
1f166439
GZ
868 spin_unlock_irq(&vmci_dev_spinlock);
869
870 dev_dbg(&pdev->dev, "Resetting vmci device\n");
e01153c7 871 vmci_write_reg(vmci_dev, VMCI_CONTROL_RESET, VMCI_CONTROL_ADDR);
1f166439
GZ
872
873 /*
874 * Free IRQ and then disable MSI/MSI-X as appropriate. For
875 * MSI-X, we might have multiple vectors, each with their own
876 * IRQ, which we must free too.
877 */
3eed5372 878 if (vmci_dev->exclusive_vectors) {
3bb434cd 879 free_irq(pci_irq_vector(pdev, 1), vmci_dev);
3eed5372
JH
880 if (vmci_dev->mmio_base != NULL)
881 free_irq(pci_irq_vector(pdev, 2), vmci_dev);
882 }
3bb434cd
CH
883 free_irq(pci_irq_vector(pdev, 0), vmci_dev);
884 pci_free_irq_vectors(pdev);
1f166439
GZ
885
886 tasklet_kill(&vmci_dev->datagram_tasklet);
887 tasklet_kill(&vmci_dev->bm_tasklet);
888
889 if (vmci_dev->notification_bitmap) {
890 /*
891 * The device reset above cleared the bitmap state of the
892 * device, so we can safely free it here.
893 */
894
6d6dfb4f
AK
895 dma_free_coherent(&pdev->dev, PAGE_SIZE,
896 vmci_dev->notification_bitmap,
897 vmci_dev->notification_base);
1f166439
GZ
898 }
899
8389c29a
JH
900 vmci_free_dg_buffers(vmci_dev);
901
902 if (vmci_dev->mmio_base != NULL)
903 pci_iounmap(pdev, vmci_dev->mmio_base);
1f166439
GZ
904
905 /* The rest are managed resources and will be freed by PCI core */
906}
907
32182cd3 908static const struct pci_device_id vmci_ids[] = {
1f166439
GZ
909 { PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI), },
910 { 0 },
911};
912MODULE_DEVICE_TABLE(pci, vmci_ids);
913
914static struct pci_driver vmci_guest_driver = {
915 .name = KBUILD_MODNAME,
916 .id_table = vmci_ids,
917 .probe = vmci_guest_probe_device,
918 .remove = vmci_guest_remove_device,
919};
920
921int __init vmci_guest_init(void)
922{
923 return pci_register_driver(&vmci_guest_driver);
924}
925
926void __exit vmci_guest_exit(void)
927{
928 pci_unregister_driver(&vmci_guest_driver);
929}