]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/lib/virtio/virtio_pci.c
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / lib / virtio / virtio_pci.c
CommitLineData
11fdf7f2
TL
1/*-
2 * BSD LICENSE
3 *
4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#include "spdk/stdinc.h"
35
f67539c2 36#include "spdk/memory.h"
11fdf7f2
TL
37#include "spdk/mmio.h"
38#include "spdk/string.h"
39#include "spdk/env.h"
40
41#include "spdk_internal/virtio.h"
42
43struct virtio_hw {
44 uint8_t use_msix;
45 uint32_t notify_off_multiplier;
46 uint8_t *isr;
47 uint16_t *notify_base;
48
49 struct {
50 /** Mem-mapped resources from given PCI BAR */
51 void *vaddr;
52
53 /** Length of the address space */
54 uint32_t len;
55 } pci_bar[6];
56
57 struct virtio_pci_common_cfg *common_cfg;
58 struct spdk_pci_device *pci_dev;
59
60 /** Device-specific PCI config space */
61 void *dev_cfg;
62};
63
64struct virtio_pci_probe_ctx {
65 virtio_pci_create_cb enum_cb;
66 void *enum_ctx;
67 uint16_t device_id;
68};
69
70/*
71 * Following macros are derived from linux/pci_regs.h, however,
72 * we can't simply include that header here, as there is no such
73 * file for non-Linux platform.
74 */
75#define PCI_CAPABILITY_LIST 0x34
76#define PCI_CAP_ID_VNDR 0x09
77#define PCI_CAP_ID_MSIX 0x11
78
79static inline int
80check_vq_phys_addr_ok(struct virtqueue *vq)
81{
82 /* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit,
83 * and only accepts 32 bit page frame number.
84 * Check if the allocated physical memory exceeds 16TB.
85 */
86 if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >>
87 (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) {
88 SPDK_ERRLOG("vring address shouldn't be above 16TB!\n");
89 return 0;
90 }
91
92 return 1;
93}
94
95static void
96free_virtio_hw(struct virtio_hw *hw)
97{
98 unsigned i;
99
100 for (i = 0; i < 6; ++i) {
101 if (hw->pci_bar[i].vaddr == NULL) {
102 continue;
103 }
104
105 spdk_pci_device_unmap_bar(hw->pci_dev, i, hw->pci_bar[i].vaddr);
106 }
107
108 free(hw);
109}
110
111static void
112pci_dump_json_info(struct virtio_dev *dev, struct spdk_json_write_ctx *w)
113{
114 struct virtio_hw *hw = dev->ctx;
115 struct spdk_pci_addr pci_addr = spdk_pci_device_get_addr((struct spdk_pci_device *)hw->pci_dev);
116 char addr[32];
117
118 spdk_json_write_name(w, "type");
119 if (dev->modern) {
120 spdk_json_write_string(w, "pci-modern");
121 } else {
122 spdk_json_write_string(w, "pci-legacy");
123 }
124
11fdf7f2 125 spdk_pci_addr_fmt(addr, sizeof(addr), &pci_addr);
9f95a23c 126 spdk_json_write_named_string(w, "pci_address", addr);
11fdf7f2
TL
127}
128
129static void
130pci_write_json_config(struct virtio_dev *dev, struct spdk_json_write_ctx *w)
131{
132 struct virtio_hw *hw = dev->ctx;
133 struct spdk_pci_addr pci_addr = spdk_pci_device_get_addr(hw->pci_dev);
134 char addr[32];
135
136 spdk_pci_addr_fmt(addr, sizeof(addr), &pci_addr);
137
138 spdk_json_write_named_string(w, "trtype", "pci");
139 spdk_json_write_named_string(w, "traddr", addr);
140}
141
142static inline void
143io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
144{
145 spdk_mmio_write_4(lo, val & ((1ULL << 32) - 1));
146 spdk_mmio_write_4(hi, val >> 32);
147}
148
149static int
150modern_read_dev_config(struct virtio_dev *dev, size_t offset,
151 void *dst, int length)
152{
153 struct virtio_hw *hw = dev->ctx;
154 int i;
155 uint8_t *p;
156 uint8_t old_gen, new_gen;
157
158 do {
159 old_gen = spdk_mmio_read_1(&hw->common_cfg->config_generation);
160
161 p = dst;
162 for (i = 0; i < length; i++) {
163 *p++ = spdk_mmio_read_1((uint8_t *)hw->dev_cfg + offset + i);
164 }
165
166 new_gen = spdk_mmio_read_1(&hw->common_cfg->config_generation);
167 } while (old_gen != new_gen);
168
169 return 0;
170}
171
172static int
173modern_write_dev_config(struct virtio_dev *dev, size_t offset,
174 const void *src, int length)
175{
176 struct virtio_hw *hw = dev->ctx;
177 int i;
178 const uint8_t *p = src;
179
180 for (i = 0; i < length; i++) {
181 spdk_mmio_write_1(((uint8_t *)hw->dev_cfg) + offset + i, *p++);
182 }
183
184 return 0;
185}
186
187static uint64_t
188modern_get_features(struct virtio_dev *dev)
189{
190 struct virtio_hw *hw = dev->ctx;
191 uint32_t features_lo, features_hi;
192
193 spdk_mmio_write_4(&hw->common_cfg->device_feature_select, 0);
194 features_lo = spdk_mmio_read_4(&hw->common_cfg->device_feature);
195
196 spdk_mmio_write_4(&hw->common_cfg->device_feature_select, 1);
197 features_hi = spdk_mmio_read_4(&hw->common_cfg->device_feature);
198
199 return ((uint64_t)features_hi << 32) | features_lo;
200}
201
202static int
203modern_set_features(struct virtio_dev *dev, uint64_t features)
204{
205 struct virtio_hw *hw = dev->ctx;
206
207 if ((features & (1ULL << VIRTIO_F_VERSION_1)) == 0) {
208 SPDK_ERRLOG("VIRTIO_F_VERSION_1 feature is not enabled.\n");
209 return -EINVAL;
210 }
211
212 spdk_mmio_write_4(&hw->common_cfg->guest_feature_select, 0);
213 spdk_mmio_write_4(&hw->common_cfg->guest_feature, features & ((1ULL << 32) - 1));
214
215 spdk_mmio_write_4(&hw->common_cfg->guest_feature_select, 1);
216 spdk_mmio_write_4(&hw->common_cfg->guest_feature, features >> 32);
217
218 dev->negotiated_features = features;
219
220 return 0;
221}
222
223static void
224modern_destruct_dev(struct virtio_dev *vdev)
225{
226 struct virtio_hw *hw = vdev->ctx;
227 struct spdk_pci_device *pci_dev = hw->pci_dev;
228
229 free_virtio_hw(hw);
230 spdk_pci_device_detach(pci_dev);
231}
232
233static uint8_t
234modern_get_status(struct virtio_dev *dev)
235{
236 struct virtio_hw *hw = dev->ctx;
237
238 return spdk_mmio_read_1(&hw->common_cfg->device_status);
239}
240
241static void
242modern_set_status(struct virtio_dev *dev, uint8_t status)
243{
244 struct virtio_hw *hw = dev->ctx;
245
246 spdk_mmio_write_1(&hw->common_cfg->device_status, status);
247}
248
249static uint16_t
250modern_get_queue_size(struct virtio_dev *dev, uint16_t queue_id)
251{
252 struct virtio_hw *hw = dev->ctx;
253
254 spdk_mmio_write_2(&hw->common_cfg->queue_select, queue_id);
255 return spdk_mmio_read_2(&hw->common_cfg->queue_size);
256}
257
258static int
259modern_setup_queue(struct virtio_dev *dev, struct virtqueue *vq)
260{
261 struct virtio_hw *hw = dev->ctx;
262 uint64_t desc_addr, avail_addr, used_addr;
263 uint16_t notify_off;
264 void *queue_mem;
265 uint64_t queue_mem_phys_addr;
266
267 /* To ensure physical address contiguity we make the queue occupy
268 * only a single hugepage (2MB). As of Virtio 1.0, the queue size
269 * always falls within this limit.
270 */
9f95a23c 271 if (vq->vq_ring_size > VALUE_2MB) {
11fdf7f2
TL
272 return -ENOMEM;
273 }
274
9f95a23c
TL
275 queue_mem = spdk_zmalloc(vq->vq_ring_size, VALUE_2MB, NULL,
276 SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
11fdf7f2
TL
277 if (queue_mem == NULL) {
278 return -ENOMEM;
279 }
280
9f95a23c
TL
281 queue_mem_phys_addr = spdk_vtophys(queue_mem, NULL);
282 if (queue_mem_phys_addr == SPDK_VTOPHYS_ERROR) {
283 spdk_free(queue_mem);
284 return -EFAULT;
285 }
286
11fdf7f2
TL
287 vq->vq_ring_mem = queue_mem_phys_addr;
288 vq->vq_ring_virt_mem = queue_mem;
289
290 if (!check_vq_phys_addr_ok(vq)) {
9f95a23c 291 spdk_free(queue_mem);
11fdf7f2
TL
292 return -ENOMEM;
293 }
294
295 desc_addr = vq->vq_ring_mem;
296 avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
297 used_addr = (avail_addr + offsetof(struct vring_avail, ring[vq->vq_nentries])
298 + VIRTIO_PCI_VRING_ALIGN - 1) & ~(VIRTIO_PCI_VRING_ALIGN - 1);
299
300 spdk_mmio_write_2(&hw->common_cfg->queue_select, vq->vq_queue_index);
301
302 io_write64_twopart(desc_addr, &hw->common_cfg->queue_desc_lo,
303 &hw->common_cfg->queue_desc_hi);
304 io_write64_twopart(avail_addr, &hw->common_cfg->queue_avail_lo,
305 &hw->common_cfg->queue_avail_hi);
306 io_write64_twopart(used_addr, &hw->common_cfg->queue_used_lo,
307 &hw->common_cfg->queue_used_hi);
308
309 notify_off = spdk_mmio_read_2(&hw->common_cfg->queue_notify_off);
310 vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
311 notify_off * hw->notify_off_multiplier);
312
313 spdk_mmio_write_2(&hw->common_cfg->queue_enable, 1);
314
315 SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "queue %"PRIu16" addresses:\n", vq->vq_queue_index);
316 SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "\t desc_addr: %" PRIx64 "\n", desc_addr);
317 SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "\t aval_addr: %" PRIx64 "\n", avail_addr);
318 SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "\t used_addr: %" PRIx64 "\n", used_addr);
319 SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "\t notify addr: %p (notify offset: %"PRIu16")\n",
320 vq->notify_addr, notify_off);
321
322 return 0;
323}
324
325static void
326modern_del_queue(struct virtio_dev *dev, struct virtqueue *vq)
327{
328 struct virtio_hw *hw = dev->ctx;
329
330 spdk_mmio_write_2(&hw->common_cfg->queue_select, vq->vq_queue_index);
331
332 io_write64_twopart(0, &hw->common_cfg->queue_desc_lo,
333 &hw->common_cfg->queue_desc_hi);
334 io_write64_twopart(0, &hw->common_cfg->queue_avail_lo,
335 &hw->common_cfg->queue_avail_hi);
336 io_write64_twopart(0, &hw->common_cfg->queue_used_lo,
337 &hw->common_cfg->queue_used_hi);
338
339 spdk_mmio_write_2(&hw->common_cfg->queue_enable, 0);
340
9f95a23c 341 spdk_free(vq->vq_ring_virt_mem);
11fdf7f2
TL
342}
343
344static void
345modern_notify_queue(struct virtio_dev *dev, struct virtqueue *vq)
346{
347 spdk_mmio_write_2(vq->notify_addr, vq->vq_queue_index);
348}
349
350static const struct virtio_dev_ops modern_ops = {
351 .read_dev_cfg = modern_read_dev_config,
352 .write_dev_cfg = modern_write_dev_config,
353 .get_status = modern_get_status,
354 .set_status = modern_set_status,
355 .get_features = modern_get_features,
356 .set_features = modern_set_features,
357 .destruct_dev = modern_destruct_dev,
358 .get_queue_size = modern_get_queue_size,
359 .setup_queue = modern_setup_queue,
360 .del_queue = modern_del_queue,
361 .notify_queue = modern_notify_queue,
362 .dump_json_info = pci_dump_json_info,
363 .write_json_config = pci_write_json_config,
364};
365
366static void *
367get_cfg_addr(struct virtio_hw *hw, struct virtio_pci_cap *cap)
368{
369 uint8_t bar = cap->bar;
370 uint32_t length = cap->length;
371 uint32_t offset = cap->offset;
372
373 if (bar > 5) {
374 SPDK_ERRLOG("invalid bar: %"PRIu8"\n", bar);
375 return NULL;
376 }
377
378 if (offset + length < offset) {
379 SPDK_ERRLOG("offset(%"PRIu32") + length(%"PRIu32") overflows\n",
380 offset, length);
381 return NULL;
382 }
383
384 if (offset + length > hw->pci_bar[bar].len) {
385 SPDK_ERRLOG("invalid cap: overflows bar space: %"PRIu32" > %"PRIu32"\n",
386 offset + length, hw->pci_bar[bar].len);
387 return NULL;
388 }
389
390 if (hw->pci_bar[bar].vaddr == NULL) {
391 SPDK_ERRLOG("bar %"PRIu8" base addr is NULL\n", bar);
392 return NULL;
393 }
394
395 return hw->pci_bar[bar].vaddr + offset;
396}
397
398static int
399virtio_read_caps(struct virtio_hw *hw)
400{
401 uint8_t pos;
402 struct virtio_pci_cap cap;
403 int ret;
404
405 ret = spdk_pci_device_cfg_read(hw->pci_dev, &pos, 1, PCI_CAPABILITY_LIST);
406 if (ret < 0) {
407 SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "failed to read pci capability list\n");
408 return ret;
409 }
410
411 while (pos) {
412 ret = spdk_pci_device_cfg_read(hw->pci_dev, &cap, sizeof(cap), pos);
413 if (ret < 0) {
414 SPDK_ERRLOG("failed to read pci cap at pos: %"PRIx8"\n", pos);
415 break;
416 }
417
418 if (cap.cap_vndr == PCI_CAP_ID_MSIX) {
419 hw->use_msix = 1;
420 }
421
422 if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
423 SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI,
424 "[%2"PRIx8"] skipping non VNDR cap id: %02"PRIx8"\n",
425 pos, cap.cap_vndr);
426 goto next;
427 }
428
429 SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI,
430 "[%2"PRIx8"] cfg type: %"PRIu8", bar: %"PRIu8", offset: %04"PRIx32", len: %"PRIu32"\n",
431 pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
432
433 switch (cap.cfg_type) {
434 case VIRTIO_PCI_CAP_COMMON_CFG:
435 hw->common_cfg = get_cfg_addr(hw, &cap);
436 break;
437 case VIRTIO_PCI_CAP_NOTIFY_CFG:
438 spdk_pci_device_cfg_read(hw->pci_dev, &hw->notify_off_multiplier,
439 4, pos + sizeof(cap));
440 hw->notify_base = get_cfg_addr(hw, &cap);
441 break;
442 case VIRTIO_PCI_CAP_DEVICE_CFG:
443 hw->dev_cfg = get_cfg_addr(hw, &cap);
444 break;
445 case VIRTIO_PCI_CAP_ISR_CFG:
446 hw->isr = get_cfg_addr(hw, &cap);
447 break;
448 }
449
450next:
451 pos = cap.cap_next;
452 }
453
454 if (hw->common_cfg == NULL || hw->notify_base == NULL ||
455 hw->dev_cfg == NULL || hw->isr == NULL) {
456 SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "no modern virtio pci device found.\n");
457 if (ret < 0) {
458 return ret;
459 } else {
460 return -EINVAL;
461 }
462 }
463
464 SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "found modern virtio pci device.\n");
465
466 SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "common cfg mapped at: %p\n", hw->common_cfg);
467 SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "device cfg mapped at: %p\n", hw->dev_cfg);
468 SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "isr cfg mapped at: %p\n", hw->isr);
469 SPDK_DEBUGLOG(SPDK_LOG_VIRTIO_PCI, "notify base: %p, notify off multiplier: %u\n",
470 hw->notify_base, hw->notify_off_multiplier);
471
472 return 0;
473}
474
475static int
476virtio_pci_dev_probe(struct spdk_pci_device *pci_dev, struct virtio_pci_probe_ctx *ctx)
477{
478 struct virtio_hw *hw;
479 uint8_t *bar_vaddr;
480 uint64_t bar_paddr, bar_len;
481 int rc;
482 unsigned i;
483 char bdf[32];
484 struct spdk_pci_addr addr;
485
486 addr = spdk_pci_device_get_addr(pci_dev);
487 rc = spdk_pci_addr_fmt(bdf, sizeof(bdf), &addr);
488 if (rc != 0) {
489 SPDK_ERRLOG("Ignoring a device with non-parseable PCI address\n");
490 return -1;
491 }
492
493 hw = calloc(1, sizeof(*hw));
494 if (hw == NULL) {
495 SPDK_ERRLOG("%s: calloc failed\n", bdf);
496 return -1;
497 }
498
499 hw->pci_dev = pci_dev;
500
501 for (i = 0; i < 6; ++i) {
502 rc = spdk_pci_device_map_bar(pci_dev, i, (void *) &bar_vaddr, &bar_paddr,
503 &bar_len);
504 if (rc != 0) {
505 SPDK_ERRLOG("%s: failed to memmap PCI BAR %u\n", bdf, i);
506 free_virtio_hw(hw);
507 return -1;
508 }
509
510 hw->pci_bar[i].vaddr = bar_vaddr;
511 hw->pci_bar[i].len = bar_len;
512 }
513
514 /* Virtio PCI caps exist only on modern PCI devices.
515 * Legacy devices are not supported.
516 */
517 if (virtio_read_caps(hw) != 0) {
518 SPDK_NOTICELOG("Ignoring legacy PCI device at %s\n", bdf);
519 free_virtio_hw(hw);
520 return -1;
521 }
522
523 rc = ctx->enum_cb((struct virtio_pci_ctx *)hw, ctx->enum_ctx);
524 if (rc != 0) {
525 free_virtio_hw(hw);
526 }
527
528 return rc;
529}
530
531static int
532virtio_pci_dev_probe_cb(void *probe_ctx, struct spdk_pci_device *pci_dev)
533{
534 struct virtio_pci_probe_ctx *ctx = probe_ctx;
535 uint16_t pci_device_id = spdk_pci_device_get_device_id(pci_dev);
536
537 if (pci_device_id != ctx->device_id) {
538 return 1;
539 }
540
541 return virtio_pci_dev_probe(pci_dev, ctx);
542}
543
544int
545virtio_pci_dev_enumerate(virtio_pci_create_cb enum_cb, void *enum_ctx,
546 uint16_t pci_device_id)
547{
548 struct virtio_pci_probe_ctx ctx;
549
550 if (!spdk_process_is_primary()) {
551 SPDK_WARNLOG("virtio_pci secondary process support is not implemented yet.\n");
552 return 0;
553 }
554
555 ctx.enum_cb = enum_cb;
556 ctx.enum_ctx = enum_ctx;
557 ctx.device_id = pci_device_id;
558
9f95a23c
TL
559 return spdk_pci_enumerate(spdk_pci_virtio_get_driver(),
560 virtio_pci_dev_probe_cb, &ctx);
11fdf7f2
TL
561}
562
563int
564virtio_pci_dev_attach(virtio_pci_create_cb enum_cb, void *enum_ctx,
565 uint16_t pci_device_id, struct spdk_pci_addr *pci_address)
566{
567 struct virtio_pci_probe_ctx ctx;
568
569 if (!spdk_process_is_primary()) {
570 SPDK_WARNLOG("virtio_pci secondary process support is not implemented yet.\n");
571 return 0;
572 }
573
574 ctx.enum_cb = enum_cb;
575 ctx.enum_ctx = enum_ctx;
576 ctx.device_id = pci_device_id;
577
9f95a23c
TL
578 return spdk_pci_device_attach(spdk_pci_virtio_get_driver(),
579 virtio_pci_dev_probe_cb, &ctx, pci_address);
11fdf7f2
TL
580}
581
582int
583virtio_pci_dev_init(struct virtio_dev *vdev, const char *name,
584 struct virtio_pci_ctx *pci_ctx)
585{
586 int rc;
587
588 rc = virtio_dev_construct(vdev, name, &modern_ops, pci_ctx);
589 if (rc != 0) {
590 return rc;
591 }
592
593 vdev->is_hw = 1;
594 vdev->modern = 1;
595
596 return 0;
597}
598
599SPDK_LOG_REGISTER_COMPONENT("virtio_pci", SPDK_LOG_VIRTIO_PCI)