]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/dpdk/examples/vhost_scsi/vhost_scsi.c
import 15.2.0 Octopus source
[ceph.git] / ceph / src / spdk / dpdk / examples / vhost_scsi / vhost_scsi.c
CommitLineData
11fdf7f2
TL
1/* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
3 */
4
5#include <stdint.h>
6#include <unistd.h>
7#include <stdbool.h>
8#include <signal.h>
9#include <assert.h>
10#include <semaphore.h>
11#include <linux/virtio_scsi.h>
12#include <linux/virtio_ring.h>
13
14#include <rte_atomic.h>
15#include <rte_cycles.h>
16#include <rte_log.h>
17#include <rte_malloc.h>
18#include <rte_vhost.h>
19
20#include "vhost_scsi.h"
21#include "scsi_spec.h"
22
23#define VIRTIO_SCSI_FEATURES ((1 << VIRTIO_F_NOTIFY_ON_EMPTY) |\
24 (1 << VIRTIO_SCSI_F_INOUT) |\
25 (1 << VIRTIO_SCSI_F_CHANGE))
26
27/* Path to folder where character device will be created. Can be set by user. */
28static char dev_pathname[PATH_MAX] = "";
29
30static struct vhost_scsi_ctrlr *g_vhost_ctrlr;
31static int g_should_stop;
32static sem_t exit_sem;
33
34static struct vhost_scsi_ctrlr *
35vhost_scsi_ctrlr_find(__rte_unused const char *ctrlr_name)
36{
37 /* currently we only support 1 socket file fd */
38 return g_vhost_ctrlr;
39}
40
41static uint64_t gpa_to_vva(int vid, uint64_t gpa, uint64_t *len)
42{
43 char path[PATH_MAX];
44 struct vhost_scsi_ctrlr *ctrlr;
45 int ret = 0;
46
47 ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
48 if (ret) {
49 fprintf(stderr, "Cannot get socket name\n");
50 assert(ret != 0);
51 }
52
53 ctrlr = vhost_scsi_ctrlr_find(path);
54 if (!ctrlr) {
55 fprintf(stderr, "Controller is not ready\n");
56 assert(ctrlr != NULL);
57 }
58
59 assert(ctrlr->mem != NULL);
60
61 return rte_vhost_va_from_guest_pa(ctrlr->mem, gpa, len);
62}
63
64static struct vring_desc *
65descriptor_get_next(struct vring_desc *vq_desc, struct vring_desc *cur_desc)
66{
67 return &vq_desc[cur_desc->next];
68}
69
70static bool
71descriptor_has_next(struct vring_desc *cur_desc)
72{
73 return !!(cur_desc->flags & VRING_DESC_F_NEXT);
74}
75
76static bool
77descriptor_is_wr(struct vring_desc *cur_desc)
78{
79 return !!(cur_desc->flags & VRING_DESC_F_WRITE);
80}
81
82static void
83submit_completion(struct vhost_scsi_task *task, uint32_t q_idx)
84{
85 struct rte_vhost_vring *vq;
86 struct vring_used *used;
87
88 vq = task->vq;
89 used = vq->used;
90 /* Fill out the next entry in the "used" ring. id = the
91 * index of the descriptor that contained the SCSI request.
92 * len = the total amount of data transferred for the SCSI
93 * request. We must report the correct len, for variable
94 * length SCSI CDBs, where we may return less data than
95 * allocated by the guest VM.
96 */
97 used->ring[used->idx & (vq->size - 1)].id = task->req_idx;
98 used->ring[used->idx & (vq->size - 1)].len = task->data_len;
99 used->idx++;
100
101 /* Send an interrupt back to the guest VM so that it knows
102 * a completion is ready to be processed.
103 */
104 rte_vhost_vring_call(task->bdev->vid, q_idx);
105}
106
107static void
108vhost_process_read_payload_chain(struct vhost_scsi_task *task)
109{
110 void *data;
111 uint64_t chunck_len;
112
113 task->iovs_cnt = 0;
114 chunck_len = task->desc->len;
115 task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
116 task->desc->addr,
117 &chunck_len);
118 if (!task->resp || chunck_len != task->desc->len) {
119 fprintf(stderr, "failed to translate desc address.\n");
120 return;
121 }
122
123 while (descriptor_has_next(task->desc)) {
124 task->desc = descriptor_get_next(task->vq->desc, task->desc);
125 chunck_len = task->desc->len;
126 data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
127 task->desc->addr,
128 &chunck_len);
129 if (!data || chunck_len != task->desc->len) {
130 fprintf(stderr, "failed to translate desc address.\n");
131 return;
132 }
133
134 task->iovs[task->iovs_cnt].iov_base = data;
135 task->iovs[task->iovs_cnt].iov_len = task->desc->len;
136 task->data_len += task->desc->len;
137 task->iovs_cnt++;
138 }
139}
140
141static void
142vhost_process_write_payload_chain(struct vhost_scsi_task *task)
143{
144 void *data;
145 uint64_t chunck_len;
146
147 task->iovs_cnt = 0;
148
149 do {
150 chunck_len = task->desc->len;
151 data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
152 task->desc->addr,
153 &chunck_len);
154 if (!data || chunck_len != task->desc->len) {
155 fprintf(stderr, "failed to translate desc address.\n");
156 return;
157 }
158
159 task->iovs[task->iovs_cnt].iov_base = data;
160 task->iovs[task->iovs_cnt].iov_len = task->desc->len;
161 task->data_len += task->desc->len;
162 task->iovs_cnt++;
163 task->desc = descriptor_get_next(task->vq->desc, task->desc);
164 } while (descriptor_has_next(task->desc));
165
166 chunck_len = task->desc->len;
167 task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
168 task->desc->addr,
169 &chunck_len);
170 if (!task->resp || chunck_len != task->desc->len)
171 fprintf(stderr, "failed to translate desc address.\n");
172}
173
174static struct vhost_block_dev *
175vhost_scsi_bdev_construct(const char *bdev_name, const char *bdev_serial,
176 uint32_t blk_size, uint64_t blk_cnt,
177 bool wce_enable)
178{
179 struct vhost_block_dev *bdev;
180
181 bdev = rte_zmalloc(NULL, sizeof(*bdev), RTE_CACHE_LINE_SIZE);
182 if (!bdev)
183 return NULL;
184
185 strncpy(bdev->name, bdev_name, sizeof(bdev->name));
186 strncpy(bdev->product_name, bdev_serial, sizeof(bdev->product_name));
187 bdev->blocklen = blk_size;
188 bdev->blockcnt = blk_cnt;
189 bdev->write_cache = wce_enable;
190
191 /* use memory as disk storage space */
192 bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0);
193 if (!bdev->data) {
194 fprintf(stderr, "no enough reseverd huge memory for disk\n");
195 return NULL;
196 }
197
198 return bdev;
199}
200
201static void
202process_requestq(struct vhost_scsi_ctrlr *ctrlr, uint32_t q_idx)
203{
204 int ret;
205 struct vhost_scsi_queue *scsi_vq;
206 struct rte_vhost_vring *vq;
207
208 scsi_vq = &ctrlr->bdev->queues[q_idx];
209 vq = &scsi_vq->vq;
210 ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, q_idx, vq);
211 assert(ret == 0);
212
213 while (vq->avail->idx != scsi_vq->last_used_idx) {
214 int req_idx;
215 uint16_t last_idx;
216 struct vhost_scsi_task *task;
217 uint64_t chunck_len;
218
219 last_idx = scsi_vq->last_used_idx & (vq->size - 1);
220 req_idx = vq->avail->ring[last_idx];
221
222 task = rte_zmalloc(NULL, sizeof(*task), 0);
223 assert(task != NULL);
224
225 task->ctrlr = ctrlr;
226 task->bdev = ctrlr->bdev;
227 task->vq = vq;
228 task->req_idx = req_idx;
229 task->desc = &task->vq->desc[task->req_idx];
230
231 /* does not support indirect descriptors */
232 assert((task->desc->flags & VRING_DESC_F_INDIRECT) == 0);
233 scsi_vq->last_used_idx++;
234
235 chunck_len = task->desc->len;
236 task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid,
237 task->desc->addr,
238 &chunck_len);
239 if (!task->req || chunck_len != task->desc->len) {
240 fprintf(stderr, "failed to translate desc address.\n");
241 return;
242 }
243
244 task->desc = descriptor_get_next(task->vq->desc, task->desc);
245 if (!descriptor_has_next(task->desc)) {
246 task->dxfer_dir = SCSI_DIR_NONE;
247 chunck_len = task->desc->len;
248 task->resp = (void *)(uintptr_t)
249 gpa_to_vva(task->bdev->vid,
250 task->desc->addr,
251 &chunck_len);
252 if (!task->resp || chunck_len != task->desc->len) {
253 fprintf(stderr, "failed to translate desc address.\n");
254 return;
255 }
256 } else if (!descriptor_is_wr(task->desc)) {
257 task->dxfer_dir = SCSI_DIR_TO_DEV;
258 vhost_process_write_payload_chain(task);
259 } else {
260 task->dxfer_dir = SCSI_DIR_FROM_DEV;
261 vhost_process_read_payload_chain(task);
262 }
263
264 ret = vhost_bdev_process_scsi_commands(ctrlr->bdev, task);
265 if (ret) {
266 /* invalid response */
267 task->resp->response = VIRTIO_SCSI_S_BAD_TARGET;
268 } else {
269 /* successfully */
270 task->resp->response = VIRTIO_SCSI_S_OK;
271 task->resp->status = 0;
272 task->resp->resid = 0;
273 }
274 submit_completion(task, q_idx);
275 rte_free(task);
276 }
277}
278
279/* Main framework for processing IOs */
280static void *
281ctrlr_worker(void *arg)
282{
283 uint32_t idx, num;
284 struct vhost_scsi_ctrlr *ctrlr = (struct vhost_scsi_ctrlr *)arg;
285 cpu_set_t cpuset;
286 pthread_t thread;
287
9f95a23c
TL
288 if (ctrlr == NULL || ctrlr->bdev == NULL) {
289 fprintf(stderr, "%s: Error, invalid argument passed to worker thread\n",
290 __func__);
291 exit(0);
292 }
293
11fdf7f2
TL
294 thread = pthread_self();
295 CPU_ZERO(&cpuset);
296 CPU_SET(0, &cpuset);
297 pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset);
298
299 num = rte_vhost_get_vring_num(ctrlr->bdev->vid);
300 fprintf(stdout, "Ctrlr Worker Thread Started with %u Vring\n", num);
301
302 if (num != NUM_OF_SCSI_QUEUES) {
303 fprintf(stderr, "Only 1 IO queue are supported\n");
304 exit(0);
305 }
306
307 while (!g_should_stop && ctrlr->bdev != NULL) {
308 /* At least 3 vrings, currently only can support 1 IO queue
309 * Queue 2 for IO queue, does not support TMF and hotplug
310 * for the example application now
311 */
312 for (idx = 2; idx < num; idx++)
313 process_requestq(ctrlr, idx);
314 }
315
316 fprintf(stdout, "Ctrlr Worker Thread Exiting\n");
317 sem_post(&exit_sem);
318 return NULL;
319}
320
321static int
322new_device(int vid)
323{
324 char path[PATH_MAX];
325 struct vhost_scsi_ctrlr *ctrlr;
326 struct vhost_scsi_queue *scsi_vq;
327 struct rte_vhost_vring *vq;
328 pthread_t tid;
329 int i, ret;
330
331 ret = rte_vhost_get_ifname(vid, path, PATH_MAX);
332 if (ret) {
333 fprintf(stderr, "Cannot get socket name\n");
334 return -1;
335 }
336
337 ctrlr = vhost_scsi_ctrlr_find(path);
338 if (!ctrlr) {
339 fprintf(stderr, "Controller is not ready\n");
340 return -1;
341 }
342
343 ret = rte_vhost_get_mem_table(vid, &ctrlr->mem);
344 if (ret) {
345 fprintf(stderr, "Get Controller memory region failed\n");
346 return -1;
347 }
348 assert(ctrlr->mem != NULL);
349
350 /* hardcoded block device information with 128MiB */
351 ctrlr->bdev = vhost_scsi_bdev_construct("malloc0", "vhost_scsi_malloc0",
352 4096, 32768, 0);
353 if (!ctrlr->bdev)
354 return -1;
355
356 ctrlr->bdev->vid = vid;
357
358 /* Disable Notifications */
359 for (i = 0; i < NUM_OF_SCSI_QUEUES; i++) {
360 rte_vhost_enable_guest_notification(vid, i, 0);
361 /* restore used index */
362 scsi_vq = &ctrlr->bdev->queues[i];
363 vq = &scsi_vq->vq;
364 ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, i, vq);
365 assert(ret == 0);
366 scsi_vq->last_used_idx = vq->used->idx;
367 scsi_vq->last_avail_idx = vq->used->idx;
368 }
369
370 g_should_stop = 0;
371 fprintf(stdout, "New Device %s, Device ID %d\n", path, vid);
372 if (pthread_create(&tid, NULL, &ctrlr_worker, ctrlr) < 0) {
373 fprintf(stderr, "Worker Thread Started Failed\n");
374 return -1;
375 }
376 pthread_detach(tid);
377 return 0;
378}
379
380static void
381destroy_device(int vid)
382{
383 char path[PATH_MAX];
384 struct vhost_scsi_ctrlr *ctrlr;
385
386 rte_vhost_get_ifname(vid, path, PATH_MAX);
387 fprintf(stdout, "Destroy %s Device ID %d\n", path, vid);
388 ctrlr = vhost_scsi_ctrlr_find(path);
389 if (!ctrlr) {
390 fprintf(stderr, "Destroy Ctrlr Failed\n");
391 return;
392 }
393 ctrlr->bdev = NULL;
394 g_should_stop = 1;
395
396 sem_wait(&exit_sem);
397}
398
399static const struct vhost_device_ops vhost_scsi_device_ops = {
400 .new_device = new_device,
401 .destroy_device = destroy_device,
402};
403
404static struct vhost_scsi_ctrlr *
405vhost_scsi_ctrlr_construct(const char *ctrlr_name)
406{
407 int ret;
408 struct vhost_scsi_ctrlr *ctrlr;
409 char *path;
410 char cwd[PATH_MAX];
411
412 /* always use current directory */
413 path = getcwd(cwd, PATH_MAX);
414 if (!path) {
415 fprintf(stderr, "Cannot get current working directory\n");
416 return NULL;
417 }
418 snprintf(dev_pathname, sizeof(dev_pathname), "%s/%s", path, ctrlr_name);
419
420 if (access(dev_pathname, F_OK) != -1) {
421 if (unlink(dev_pathname) != 0)
422 rte_exit(EXIT_FAILURE, "Cannot remove %s.\n",
423 dev_pathname);
424 }
425
426 if (rte_vhost_driver_register(dev_pathname, 0) != 0) {
427 fprintf(stderr, "socket %s already exists\n", dev_pathname);
428 return NULL;
429 }
430
431 fprintf(stdout, "socket file: %s created\n", dev_pathname);
432
433 ret = rte_vhost_driver_set_features(dev_pathname, VIRTIO_SCSI_FEATURES);
434 if (ret != 0) {
435 fprintf(stderr, "Set vhost driver features failed\n");
436 return NULL;
437 }
438
439 ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE);
440 if (!ctrlr)
441 return NULL;
442
443 rte_vhost_driver_callback_register(dev_pathname,
444 &vhost_scsi_device_ops);
445
446 return ctrlr;
447}
448
449static void
450signal_handler(__rte_unused int signum)
451{
452
453 if (access(dev_pathname, F_OK) == 0)
454 unlink(dev_pathname);
455 exit(0);
456}
457
458int main(int argc, char *argv[])
459{
460 int ret;
461
462 signal(SIGINT, signal_handler);
463
464 /* init EAL */
465 ret = rte_eal_init(argc, argv);
466 if (ret < 0)
467 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
468
469 g_vhost_ctrlr = vhost_scsi_ctrlr_construct("vhost.socket");
470 if (g_vhost_ctrlr == NULL) {
471 fprintf(stderr, "Construct vhost scsi controller failed\n");
472 return 0;
473 }
474
475 if (sem_init(&exit_sem, 0, 0) < 0) {
476 fprintf(stderr, "Error init exit_sem\n");
477 return -1;
478 }
479
480 rte_vhost_driver_start(dev_pathname);
481
482 /* loop for exit the application */
483 while (1)
484 sleep(1);
485
486 return 0;
487}
488