1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
10 #include <semaphore.h>
11 #include <linux/virtio_scsi.h>
12 #include <linux/virtio_ring.h>
14 #include <rte_atomic.h>
15 #include <rte_cycles.h>
17 #include <rte_malloc.h>
18 #include <rte_vhost.h>
20 #include "vhost_scsi.h"
21 #include "scsi_spec.h"
23 #define VIRTIO_SCSI_FEATURES ((1 << VIRTIO_F_NOTIFY_ON_EMPTY) |\
24 (1 << VIRTIO_SCSI_F_INOUT) |\
25 (1 << VIRTIO_SCSI_F_CHANGE))
27 /* Path to folder where character device will be created. Can be set by user. */
28 static char dev_pathname
[PATH_MAX
] = "";
30 static struct vhost_scsi_ctrlr
*g_vhost_ctrlr
;
31 static int g_should_stop
;
32 static sem_t exit_sem
;
34 static struct vhost_scsi_ctrlr
*
35 vhost_scsi_ctrlr_find(__rte_unused
const char *ctrlr_name
)
37 /* currently we only support 1 socket file fd */
41 static uint64_t gpa_to_vva(int vid
, uint64_t gpa
, uint64_t *len
)
44 struct vhost_scsi_ctrlr
*ctrlr
;
47 ret
= rte_vhost_get_ifname(vid
, path
, PATH_MAX
);
49 fprintf(stderr
, "Cannot get socket name\n");
53 ctrlr
= vhost_scsi_ctrlr_find(path
);
55 fprintf(stderr
, "Controller is not ready\n");
56 assert(ctrlr
!= NULL
);
59 assert(ctrlr
->mem
!= NULL
);
61 return rte_vhost_va_from_guest_pa(ctrlr
->mem
, gpa
, len
);
64 static struct vring_desc
*
65 descriptor_get_next(struct vring_desc
*vq_desc
, struct vring_desc
*cur_desc
)
67 return &vq_desc
[cur_desc
->next
];
71 descriptor_has_next(struct vring_desc
*cur_desc
)
73 return !!(cur_desc
->flags
& VRING_DESC_F_NEXT
);
77 descriptor_is_wr(struct vring_desc
*cur_desc
)
79 return !!(cur_desc
->flags
& VRING_DESC_F_WRITE
);
83 submit_completion(struct vhost_scsi_task
*task
, uint32_t q_idx
)
85 struct rte_vhost_vring
*vq
;
86 struct vring_used
*used
;
90 /* Fill out the next entry in the "used" ring. id = the
91 * index of the descriptor that contained the SCSI request.
92 * len = the total amount of data transferred for the SCSI
93 * request. We must report the correct len, for variable
94 * length SCSI CDBs, where we may return less data than
95 * allocated by the guest VM.
97 used
->ring
[used
->idx
& (vq
->size
- 1)].id
= task
->req_idx
;
98 used
->ring
[used
->idx
& (vq
->size
- 1)].len
= task
->data_len
;
101 /* Send an interrupt back to the guest VM so that it knows
102 * a completion is ready to be processed.
104 rte_vhost_vring_call(task
->bdev
->vid
, q_idx
);
108 vhost_process_read_payload_chain(struct vhost_scsi_task
*task
)
114 chunck_len
= task
->desc
->len
;
115 task
->resp
= (void *)(uintptr_t)gpa_to_vva(task
->bdev
->vid
,
118 if (!task
->resp
|| chunck_len
!= task
->desc
->len
) {
119 fprintf(stderr
, "failed to translate desc address.\n");
123 while (descriptor_has_next(task
->desc
)) {
124 task
->desc
= descriptor_get_next(task
->vq
->desc
, task
->desc
);
125 chunck_len
= task
->desc
->len
;
126 data
= (void *)(uintptr_t)gpa_to_vva(task
->bdev
->vid
,
129 if (!data
|| chunck_len
!= task
->desc
->len
) {
130 fprintf(stderr
, "failed to translate desc address.\n");
134 task
->iovs
[task
->iovs_cnt
].iov_base
= data
;
135 task
->iovs
[task
->iovs_cnt
].iov_len
= task
->desc
->len
;
136 task
->data_len
+= task
->desc
->len
;
142 vhost_process_write_payload_chain(struct vhost_scsi_task
*task
)
150 chunck_len
= task
->desc
->len
;
151 data
= (void *)(uintptr_t)gpa_to_vva(task
->bdev
->vid
,
154 if (!data
|| chunck_len
!= task
->desc
->len
) {
155 fprintf(stderr
, "failed to translate desc address.\n");
159 task
->iovs
[task
->iovs_cnt
].iov_base
= data
;
160 task
->iovs
[task
->iovs_cnt
].iov_len
= task
->desc
->len
;
161 task
->data_len
+= task
->desc
->len
;
163 task
->desc
= descriptor_get_next(task
->vq
->desc
, task
->desc
);
164 } while (descriptor_has_next(task
->desc
));
166 chunck_len
= task
->desc
->len
;
167 task
->resp
= (void *)(uintptr_t)gpa_to_vva(task
->bdev
->vid
,
170 if (!task
->resp
|| chunck_len
!= task
->desc
->len
)
171 fprintf(stderr
, "failed to translate desc address.\n");
174 static struct vhost_block_dev
*
175 vhost_scsi_bdev_construct(const char *bdev_name
, const char *bdev_serial
,
176 uint32_t blk_size
, uint64_t blk_cnt
,
179 struct vhost_block_dev
*bdev
;
181 bdev
= rte_zmalloc(NULL
, sizeof(*bdev
), RTE_CACHE_LINE_SIZE
);
185 strncpy(bdev
->name
, bdev_name
, sizeof(bdev
->name
));
186 strncpy(bdev
->product_name
, bdev_serial
, sizeof(bdev
->product_name
));
187 bdev
->blocklen
= blk_size
;
188 bdev
->blockcnt
= blk_cnt
;
189 bdev
->write_cache
= wce_enable
;
191 /* use memory as disk storage space */
192 bdev
->data
= rte_zmalloc(NULL
, blk_cnt
* blk_size
, 0);
194 fprintf(stderr
, "no enough reseverd huge memory for disk\n");
202 process_requestq(struct vhost_scsi_ctrlr
*ctrlr
, uint32_t q_idx
)
205 struct vhost_scsi_queue
*scsi_vq
;
206 struct rte_vhost_vring
*vq
;
208 scsi_vq
= &ctrlr
->bdev
->queues
[q_idx
];
210 ret
= rte_vhost_get_vhost_vring(ctrlr
->bdev
->vid
, q_idx
, vq
);
213 while (vq
->avail
->idx
!= scsi_vq
->last_used_idx
) {
216 struct vhost_scsi_task
*task
;
219 last_idx
= scsi_vq
->last_used_idx
& (vq
->size
- 1);
220 req_idx
= vq
->avail
->ring
[last_idx
];
222 task
= rte_zmalloc(NULL
, sizeof(*task
), 0);
223 assert(task
!= NULL
);
226 task
->bdev
= ctrlr
->bdev
;
228 task
->req_idx
= req_idx
;
229 task
->desc
= &task
->vq
->desc
[task
->req_idx
];
231 /* does not support indirect descriptors */
232 assert((task
->desc
->flags
& VRING_DESC_F_INDIRECT
) == 0);
233 scsi_vq
->last_used_idx
++;
235 chunck_len
= task
->desc
->len
;
236 task
->req
= (void *)(uintptr_t)gpa_to_vva(task
->bdev
->vid
,
239 if (!task
->req
|| chunck_len
!= task
->desc
->len
) {
240 fprintf(stderr
, "failed to translate desc address.\n");
244 task
->desc
= descriptor_get_next(task
->vq
->desc
, task
->desc
);
245 if (!descriptor_has_next(task
->desc
)) {
246 task
->dxfer_dir
= SCSI_DIR_NONE
;
247 chunck_len
= task
->desc
->len
;
248 task
->resp
= (void *)(uintptr_t)
249 gpa_to_vva(task
->bdev
->vid
,
252 if (!task
->resp
|| chunck_len
!= task
->desc
->len
) {
253 fprintf(stderr
, "failed to translate desc address.\n");
256 } else if (!descriptor_is_wr(task
->desc
)) {
257 task
->dxfer_dir
= SCSI_DIR_TO_DEV
;
258 vhost_process_write_payload_chain(task
);
260 task
->dxfer_dir
= SCSI_DIR_FROM_DEV
;
261 vhost_process_read_payload_chain(task
);
264 ret
= vhost_bdev_process_scsi_commands(ctrlr
->bdev
, task
);
266 /* invalid response */
267 task
->resp
->response
= VIRTIO_SCSI_S_BAD_TARGET
;
270 task
->resp
->response
= VIRTIO_SCSI_S_OK
;
271 task
->resp
->status
= 0;
272 task
->resp
->resid
= 0;
274 submit_completion(task
, q_idx
);
279 /* Main framework for processing IOs */
281 ctrlr_worker(void *arg
)
284 struct vhost_scsi_ctrlr
*ctrlr
= (struct vhost_scsi_ctrlr
*)arg
;
288 thread
= pthread_self();
291 pthread_setaffinity_np(thread
, sizeof(cpu_set_t
), &cpuset
);
293 num
= rte_vhost_get_vring_num(ctrlr
->bdev
->vid
);
294 fprintf(stdout
, "Ctrlr Worker Thread Started with %u Vring\n", num
);
296 if (num
!= NUM_OF_SCSI_QUEUES
) {
297 fprintf(stderr
, "Only 1 IO queue are supported\n");
301 while (!g_should_stop
&& ctrlr
->bdev
!= NULL
) {
302 /* At least 3 vrings, currently only can support 1 IO queue
303 * Queue 2 for IO queue, does not support TMF and hotplug
304 * for the example application now
306 for (idx
= 2; idx
< num
; idx
++)
307 process_requestq(ctrlr
, idx
);
310 fprintf(stdout
, "Ctrlr Worker Thread Exiting\n");
319 struct vhost_scsi_ctrlr
*ctrlr
;
320 struct vhost_scsi_queue
*scsi_vq
;
321 struct rte_vhost_vring
*vq
;
325 ret
= rte_vhost_get_ifname(vid
, path
, PATH_MAX
);
327 fprintf(stderr
, "Cannot get socket name\n");
331 ctrlr
= vhost_scsi_ctrlr_find(path
);
333 fprintf(stderr
, "Controller is not ready\n");
337 ret
= rte_vhost_get_mem_table(vid
, &ctrlr
->mem
);
339 fprintf(stderr
, "Get Controller memory region failed\n");
342 assert(ctrlr
->mem
!= NULL
);
344 /* hardcoded block device information with 128MiB */
345 ctrlr
->bdev
= vhost_scsi_bdev_construct("malloc0", "vhost_scsi_malloc0",
350 ctrlr
->bdev
->vid
= vid
;
352 /* Disable Notifications */
353 for (i
= 0; i
< NUM_OF_SCSI_QUEUES
; i
++) {
354 rte_vhost_enable_guest_notification(vid
, i
, 0);
355 /* restore used index */
356 scsi_vq
= &ctrlr
->bdev
->queues
[i
];
358 ret
= rte_vhost_get_vhost_vring(ctrlr
->bdev
->vid
, i
, vq
);
360 scsi_vq
->last_used_idx
= vq
->used
->idx
;
361 scsi_vq
->last_avail_idx
= vq
->used
->idx
;
365 fprintf(stdout
, "New Device %s, Device ID %d\n", path
, vid
);
366 if (pthread_create(&tid
, NULL
, &ctrlr_worker
, ctrlr
) < 0) {
367 fprintf(stderr
, "Worker Thread Started Failed\n");
375 destroy_device(int vid
)
378 struct vhost_scsi_ctrlr
*ctrlr
;
380 rte_vhost_get_ifname(vid
, path
, PATH_MAX
);
381 fprintf(stdout
, "Destroy %s Device ID %d\n", path
, vid
);
382 ctrlr
= vhost_scsi_ctrlr_find(path
);
384 fprintf(stderr
, "Destroy Ctrlr Failed\n");
393 static const struct vhost_device_ops vhost_scsi_device_ops
= {
394 .new_device
= new_device
,
395 .destroy_device
= destroy_device
,
398 static struct vhost_scsi_ctrlr
*
399 vhost_scsi_ctrlr_construct(const char *ctrlr_name
)
402 struct vhost_scsi_ctrlr
*ctrlr
;
406 /* always use current directory */
407 path
= getcwd(cwd
, PATH_MAX
);
409 fprintf(stderr
, "Cannot get current working directory\n");
412 snprintf(dev_pathname
, sizeof(dev_pathname
), "%s/%s", path
, ctrlr_name
);
414 if (access(dev_pathname
, F_OK
) != -1) {
415 if (unlink(dev_pathname
) != 0)
416 rte_exit(EXIT_FAILURE
, "Cannot remove %s.\n",
420 if (rte_vhost_driver_register(dev_pathname
, 0) != 0) {
421 fprintf(stderr
, "socket %s already exists\n", dev_pathname
);
425 fprintf(stdout
, "socket file: %s created\n", dev_pathname
);
427 ret
= rte_vhost_driver_set_features(dev_pathname
, VIRTIO_SCSI_FEATURES
);
429 fprintf(stderr
, "Set vhost driver features failed\n");
433 ctrlr
= rte_zmalloc(NULL
, sizeof(*ctrlr
), RTE_CACHE_LINE_SIZE
);
437 rte_vhost_driver_callback_register(dev_pathname
,
438 &vhost_scsi_device_ops
);
444 signal_handler(__rte_unused
int signum
)
447 if (access(dev_pathname
, F_OK
) == 0)
448 unlink(dev_pathname
);
452 int main(int argc
, char *argv
[])
456 signal(SIGINT
, signal_handler
);
459 ret
= rte_eal_init(argc
, argv
);
461 rte_exit(EXIT_FAILURE
, "Error with EAL initialization\n");
463 g_vhost_ctrlr
= vhost_scsi_ctrlr_construct("vhost.socket");
464 if (g_vhost_ctrlr
== NULL
) {
465 fprintf(stderr
, "Construct vhost scsi controller failed\n");
469 if (sem_init(&exit_sem
, 0, 0) < 0) {
470 fprintf(stderr
, "Error init exit_sem\n");
474 rte_vhost_driver_start(dev_pathname
);
476 /* loop for exit the application */