]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | /* SPDX-License-Identifier: BSD-3-Clause |
2 | * Copyright(c) 2010-2017 Intel Corporation | |
3 | */ | |
4 | ||
5 | #include <stdint.h> | |
6 | #include <unistd.h> | |
7 | #include <stdbool.h> | |
8 | #include <signal.h> | |
9 | #include <assert.h> | |
10 | #include <semaphore.h> | |
11 | #include <linux/virtio_scsi.h> | |
12 | #include <linux/virtio_ring.h> | |
13 | ||
14 | #include <rte_atomic.h> | |
15 | #include <rte_cycles.h> | |
16 | #include <rte_log.h> | |
17 | #include <rte_malloc.h> | |
18 | #include <rte_vhost.h> | |
19 | ||
20 | #include "vhost_scsi.h" | |
21 | #include "scsi_spec.h" | |
22 | ||
23 | #define VIRTIO_SCSI_FEATURES ((1 << VIRTIO_F_NOTIFY_ON_EMPTY) |\ | |
24 | (1 << VIRTIO_SCSI_F_INOUT) |\ | |
25 | (1 << VIRTIO_SCSI_F_CHANGE)) | |
26 | ||
27 | /* Path to folder where character device will be created. Can be set by user. */ | |
28 | static char dev_pathname[PATH_MAX] = ""; | |
29 | ||
30 | static struct vhost_scsi_ctrlr *g_vhost_ctrlr; | |
31 | static int g_should_stop; | |
32 | static sem_t exit_sem; | |
33 | ||
34 | static struct vhost_scsi_ctrlr * | |
35 | vhost_scsi_ctrlr_find(__rte_unused const char *ctrlr_name) | |
36 | { | |
37 | /* currently we only support 1 socket file fd */ | |
38 | return g_vhost_ctrlr; | |
39 | } | |
40 | ||
41 | static uint64_t gpa_to_vva(int vid, uint64_t gpa, uint64_t *len) | |
42 | { | |
43 | char path[PATH_MAX]; | |
44 | struct vhost_scsi_ctrlr *ctrlr; | |
45 | int ret = 0; | |
46 | ||
47 | ret = rte_vhost_get_ifname(vid, path, PATH_MAX); | |
48 | if (ret) { | |
49 | fprintf(stderr, "Cannot get socket name\n"); | |
50 | assert(ret != 0); | |
51 | } | |
52 | ||
53 | ctrlr = vhost_scsi_ctrlr_find(path); | |
54 | if (!ctrlr) { | |
55 | fprintf(stderr, "Controller is not ready\n"); | |
56 | assert(ctrlr != NULL); | |
57 | } | |
58 | ||
59 | assert(ctrlr->mem != NULL); | |
60 | ||
61 | return rte_vhost_va_from_guest_pa(ctrlr->mem, gpa, len); | |
62 | } | |
63 | ||
64 | static struct vring_desc * | |
65 | descriptor_get_next(struct vring_desc *vq_desc, struct vring_desc *cur_desc) | |
66 | { | |
67 | return &vq_desc[cur_desc->next]; | |
68 | } | |
69 | ||
70 | static bool | |
71 | descriptor_has_next(struct vring_desc *cur_desc) | |
72 | { | |
73 | return !!(cur_desc->flags & VRING_DESC_F_NEXT); | |
74 | } | |
75 | ||
76 | static bool | |
77 | descriptor_is_wr(struct vring_desc *cur_desc) | |
78 | { | |
79 | return !!(cur_desc->flags & VRING_DESC_F_WRITE); | |
80 | } | |
81 | ||
82 | static void | |
83 | submit_completion(struct vhost_scsi_task *task, uint32_t q_idx) | |
84 | { | |
85 | struct rte_vhost_vring *vq; | |
86 | struct vring_used *used; | |
87 | ||
88 | vq = task->vq; | |
89 | used = vq->used; | |
90 | /* Fill out the next entry in the "used" ring. id = the | |
91 | * index of the descriptor that contained the SCSI request. | |
92 | * len = the total amount of data transferred for the SCSI | |
93 | * request. We must report the correct len, for variable | |
94 | * length SCSI CDBs, where we may return less data than | |
95 | * allocated by the guest VM. | |
96 | */ | |
97 | used->ring[used->idx & (vq->size - 1)].id = task->req_idx; | |
98 | used->ring[used->idx & (vq->size - 1)].len = task->data_len; | |
99 | used->idx++; | |
100 | ||
101 | /* Send an interrupt back to the guest VM so that it knows | |
102 | * a completion is ready to be processed. | |
103 | */ | |
104 | rte_vhost_vring_call(task->bdev->vid, q_idx); | |
105 | } | |
106 | ||
107 | static void | |
108 | vhost_process_read_payload_chain(struct vhost_scsi_task *task) | |
109 | { | |
110 | void *data; | |
111 | uint64_t chunck_len; | |
112 | ||
113 | task->iovs_cnt = 0; | |
114 | chunck_len = task->desc->len; | |
115 | task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, | |
116 | task->desc->addr, | |
117 | &chunck_len); | |
118 | if (!task->resp || chunck_len != task->desc->len) { | |
119 | fprintf(stderr, "failed to translate desc address.\n"); | |
120 | return; | |
121 | } | |
122 | ||
123 | while (descriptor_has_next(task->desc)) { | |
124 | task->desc = descriptor_get_next(task->vq->desc, task->desc); | |
125 | chunck_len = task->desc->len; | |
126 | data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, | |
127 | task->desc->addr, | |
128 | &chunck_len); | |
129 | if (!data || chunck_len != task->desc->len) { | |
130 | fprintf(stderr, "failed to translate desc address.\n"); | |
131 | return; | |
132 | } | |
133 | ||
134 | task->iovs[task->iovs_cnt].iov_base = data; | |
135 | task->iovs[task->iovs_cnt].iov_len = task->desc->len; | |
136 | task->data_len += task->desc->len; | |
137 | task->iovs_cnt++; | |
138 | } | |
139 | } | |
140 | ||
141 | static void | |
142 | vhost_process_write_payload_chain(struct vhost_scsi_task *task) | |
143 | { | |
144 | void *data; | |
145 | uint64_t chunck_len; | |
146 | ||
147 | task->iovs_cnt = 0; | |
148 | ||
149 | do { | |
150 | chunck_len = task->desc->len; | |
151 | data = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, | |
152 | task->desc->addr, | |
153 | &chunck_len); | |
154 | if (!data || chunck_len != task->desc->len) { | |
155 | fprintf(stderr, "failed to translate desc address.\n"); | |
156 | return; | |
157 | } | |
158 | ||
159 | task->iovs[task->iovs_cnt].iov_base = data; | |
160 | task->iovs[task->iovs_cnt].iov_len = task->desc->len; | |
161 | task->data_len += task->desc->len; | |
162 | task->iovs_cnt++; | |
163 | task->desc = descriptor_get_next(task->vq->desc, task->desc); | |
164 | } while (descriptor_has_next(task->desc)); | |
165 | ||
166 | chunck_len = task->desc->len; | |
167 | task->resp = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, | |
168 | task->desc->addr, | |
169 | &chunck_len); | |
170 | if (!task->resp || chunck_len != task->desc->len) | |
171 | fprintf(stderr, "failed to translate desc address.\n"); | |
172 | } | |
173 | ||
174 | static struct vhost_block_dev * | |
175 | vhost_scsi_bdev_construct(const char *bdev_name, const char *bdev_serial, | |
176 | uint32_t blk_size, uint64_t blk_cnt, | |
177 | bool wce_enable) | |
178 | { | |
179 | struct vhost_block_dev *bdev; | |
180 | ||
181 | bdev = rte_zmalloc(NULL, sizeof(*bdev), RTE_CACHE_LINE_SIZE); | |
182 | if (!bdev) | |
183 | return NULL; | |
184 | ||
185 | strncpy(bdev->name, bdev_name, sizeof(bdev->name)); | |
186 | strncpy(bdev->product_name, bdev_serial, sizeof(bdev->product_name)); | |
187 | bdev->blocklen = blk_size; | |
188 | bdev->blockcnt = blk_cnt; | |
189 | bdev->write_cache = wce_enable; | |
190 | ||
191 | /* use memory as disk storage space */ | |
192 | bdev->data = rte_zmalloc(NULL, blk_cnt * blk_size, 0); | |
193 | if (!bdev->data) { | |
194 | fprintf(stderr, "no enough reseverd huge memory for disk\n"); | |
195 | return NULL; | |
196 | } | |
197 | ||
198 | return bdev; | |
199 | } | |
200 | ||
201 | static void | |
202 | process_requestq(struct vhost_scsi_ctrlr *ctrlr, uint32_t q_idx) | |
203 | { | |
204 | int ret; | |
205 | struct vhost_scsi_queue *scsi_vq; | |
206 | struct rte_vhost_vring *vq; | |
207 | ||
208 | scsi_vq = &ctrlr->bdev->queues[q_idx]; | |
209 | vq = &scsi_vq->vq; | |
210 | ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, q_idx, vq); | |
211 | assert(ret == 0); | |
212 | ||
213 | while (vq->avail->idx != scsi_vq->last_used_idx) { | |
214 | int req_idx; | |
215 | uint16_t last_idx; | |
216 | struct vhost_scsi_task *task; | |
217 | uint64_t chunck_len; | |
218 | ||
219 | last_idx = scsi_vq->last_used_idx & (vq->size - 1); | |
220 | req_idx = vq->avail->ring[last_idx]; | |
221 | ||
222 | task = rte_zmalloc(NULL, sizeof(*task), 0); | |
223 | assert(task != NULL); | |
224 | ||
225 | task->ctrlr = ctrlr; | |
226 | task->bdev = ctrlr->bdev; | |
227 | task->vq = vq; | |
228 | task->req_idx = req_idx; | |
229 | task->desc = &task->vq->desc[task->req_idx]; | |
230 | ||
231 | /* does not support indirect descriptors */ | |
232 | assert((task->desc->flags & VRING_DESC_F_INDIRECT) == 0); | |
233 | scsi_vq->last_used_idx++; | |
234 | ||
235 | chunck_len = task->desc->len; | |
236 | task->req = (void *)(uintptr_t)gpa_to_vva(task->bdev->vid, | |
237 | task->desc->addr, | |
238 | &chunck_len); | |
239 | if (!task->req || chunck_len != task->desc->len) { | |
240 | fprintf(stderr, "failed to translate desc address.\n"); | |
241 | return; | |
242 | } | |
243 | ||
244 | task->desc = descriptor_get_next(task->vq->desc, task->desc); | |
245 | if (!descriptor_has_next(task->desc)) { | |
246 | task->dxfer_dir = SCSI_DIR_NONE; | |
247 | chunck_len = task->desc->len; | |
248 | task->resp = (void *)(uintptr_t) | |
249 | gpa_to_vva(task->bdev->vid, | |
250 | task->desc->addr, | |
251 | &chunck_len); | |
252 | if (!task->resp || chunck_len != task->desc->len) { | |
253 | fprintf(stderr, "failed to translate desc address.\n"); | |
254 | return; | |
255 | } | |
256 | } else if (!descriptor_is_wr(task->desc)) { | |
257 | task->dxfer_dir = SCSI_DIR_TO_DEV; | |
258 | vhost_process_write_payload_chain(task); | |
259 | } else { | |
260 | task->dxfer_dir = SCSI_DIR_FROM_DEV; | |
261 | vhost_process_read_payload_chain(task); | |
262 | } | |
263 | ||
264 | ret = vhost_bdev_process_scsi_commands(ctrlr->bdev, task); | |
265 | if (ret) { | |
266 | /* invalid response */ | |
267 | task->resp->response = VIRTIO_SCSI_S_BAD_TARGET; | |
268 | } else { | |
269 | /* successfully */ | |
270 | task->resp->response = VIRTIO_SCSI_S_OK; | |
271 | task->resp->status = 0; | |
272 | task->resp->resid = 0; | |
273 | } | |
274 | submit_completion(task, q_idx); | |
275 | rte_free(task); | |
276 | } | |
277 | } | |
278 | ||
279 | /* Main framework for processing IOs */ | |
280 | static void * | |
281 | ctrlr_worker(void *arg) | |
282 | { | |
283 | uint32_t idx, num; | |
284 | struct vhost_scsi_ctrlr *ctrlr = (struct vhost_scsi_ctrlr *)arg; | |
285 | cpu_set_t cpuset; | |
286 | pthread_t thread; | |
287 | ||
9f95a23c TL |
288 | if (ctrlr == NULL || ctrlr->bdev == NULL) { |
289 | fprintf(stderr, "%s: Error, invalid argument passed to worker thread\n", | |
290 | __func__); | |
291 | exit(0); | |
292 | } | |
293 | ||
11fdf7f2 TL |
294 | thread = pthread_self(); |
295 | CPU_ZERO(&cpuset); | |
296 | CPU_SET(0, &cpuset); | |
297 | pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpuset); | |
298 | ||
299 | num = rte_vhost_get_vring_num(ctrlr->bdev->vid); | |
300 | fprintf(stdout, "Ctrlr Worker Thread Started with %u Vring\n", num); | |
301 | ||
302 | if (num != NUM_OF_SCSI_QUEUES) { | |
303 | fprintf(stderr, "Only 1 IO queue are supported\n"); | |
304 | exit(0); | |
305 | } | |
306 | ||
307 | while (!g_should_stop && ctrlr->bdev != NULL) { | |
308 | /* At least 3 vrings, currently only can support 1 IO queue | |
309 | * Queue 2 for IO queue, does not support TMF and hotplug | |
310 | * for the example application now | |
311 | */ | |
312 | for (idx = 2; idx < num; idx++) | |
313 | process_requestq(ctrlr, idx); | |
314 | } | |
315 | ||
316 | fprintf(stdout, "Ctrlr Worker Thread Exiting\n"); | |
317 | sem_post(&exit_sem); | |
318 | return NULL; | |
319 | } | |
320 | ||
321 | static int | |
322 | new_device(int vid) | |
323 | { | |
324 | char path[PATH_MAX]; | |
325 | struct vhost_scsi_ctrlr *ctrlr; | |
326 | struct vhost_scsi_queue *scsi_vq; | |
327 | struct rte_vhost_vring *vq; | |
328 | pthread_t tid; | |
329 | int i, ret; | |
330 | ||
331 | ret = rte_vhost_get_ifname(vid, path, PATH_MAX); | |
332 | if (ret) { | |
333 | fprintf(stderr, "Cannot get socket name\n"); | |
334 | return -1; | |
335 | } | |
336 | ||
337 | ctrlr = vhost_scsi_ctrlr_find(path); | |
338 | if (!ctrlr) { | |
339 | fprintf(stderr, "Controller is not ready\n"); | |
340 | return -1; | |
341 | } | |
342 | ||
343 | ret = rte_vhost_get_mem_table(vid, &ctrlr->mem); | |
344 | if (ret) { | |
345 | fprintf(stderr, "Get Controller memory region failed\n"); | |
346 | return -1; | |
347 | } | |
348 | assert(ctrlr->mem != NULL); | |
349 | ||
350 | /* hardcoded block device information with 128MiB */ | |
351 | ctrlr->bdev = vhost_scsi_bdev_construct("malloc0", "vhost_scsi_malloc0", | |
352 | 4096, 32768, 0); | |
353 | if (!ctrlr->bdev) | |
354 | return -1; | |
355 | ||
356 | ctrlr->bdev->vid = vid; | |
357 | ||
358 | /* Disable Notifications */ | |
359 | for (i = 0; i < NUM_OF_SCSI_QUEUES; i++) { | |
360 | rte_vhost_enable_guest_notification(vid, i, 0); | |
361 | /* restore used index */ | |
362 | scsi_vq = &ctrlr->bdev->queues[i]; | |
363 | vq = &scsi_vq->vq; | |
364 | ret = rte_vhost_get_vhost_vring(ctrlr->bdev->vid, i, vq); | |
365 | assert(ret == 0); | |
366 | scsi_vq->last_used_idx = vq->used->idx; | |
367 | scsi_vq->last_avail_idx = vq->used->idx; | |
368 | } | |
369 | ||
370 | g_should_stop = 0; | |
371 | fprintf(stdout, "New Device %s, Device ID %d\n", path, vid); | |
372 | if (pthread_create(&tid, NULL, &ctrlr_worker, ctrlr) < 0) { | |
373 | fprintf(stderr, "Worker Thread Started Failed\n"); | |
374 | return -1; | |
375 | } | |
376 | pthread_detach(tid); | |
377 | return 0; | |
378 | } | |
379 | ||
380 | static void | |
381 | destroy_device(int vid) | |
382 | { | |
383 | char path[PATH_MAX]; | |
384 | struct vhost_scsi_ctrlr *ctrlr; | |
385 | ||
386 | rte_vhost_get_ifname(vid, path, PATH_MAX); | |
387 | fprintf(stdout, "Destroy %s Device ID %d\n", path, vid); | |
388 | ctrlr = vhost_scsi_ctrlr_find(path); | |
389 | if (!ctrlr) { | |
390 | fprintf(stderr, "Destroy Ctrlr Failed\n"); | |
391 | return; | |
392 | } | |
393 | ctrlr->bdev = NULL; | |
394 | g_should_stop = 1; | |
395 | ||
396 | sem_wait(&exit_sem); | |
397 | } | |
398 | ||
399 | static const struct vhost_device_ops vhost_scsi_device_ops = { | |
400 | .new_device = new_device, | |
401 | .destroy_device = destroy_device, | |
402 | }; | |
403 | ||
404 | static struct vhost_scsi_ctrlr * | |
405 | vhost_scsi_ctrlr_construct(const char *ctrlr_name) | |
406 | { | |
407 | int ret; | |
408 | struct vhost_scsi_ctrlr *ctrlr; | |
409 | char *path; | |
410 | char cwd[PATH_MAX]; | |
411 | ||
412 | /* always use current directory */ | |
413 | path = getcwd(cwd, PATH_MAX); | |
414 | if (!path) { | |
415 | fprintf(stderr, "Cannot get current working directory\n"); | |
416 | return NULL; | |
417 | } | |
418 | snprintf(dev_pathname, sizeof(dev_pathname), "%s/%s", path, ctrlr_name); | |
419 | ||
420 | if (access(dev_pathname, F_OK) != -1) { | |
421 | if (unlink(dev_pathname) != 0) | |
422 | rte_exit(EXIT_FAILURE, "Cannot remove %s.\n", | |
423 | dev_pathname); | |
424 | } | |
425 | ||
426 | if (rte_vhost_driver_register(dev_pathname, 0) != 0) { | |
427 | fprintf(stderr, "socket %s already exists\n", dev_pathname); | |
428 | return NULL; | |
429 | } | |
430 | ||
431 | fprintf(stdout, "socket file: %s created\n", dev_pathname); | |
432 | ||
433 | ret = rte_vhost_driver_set_features(dev_pathname, VIRTIO_SCSI_FEATURES); | |
434 | if (ret != 0) { | |
435 | fprintf(stderr, "Set vhost driver features failed\n"); | |
436 | return NULL; | |
437 | } | |
438 | ||
439 | ctrlr = rte_zmalloc(NULL, sizeof(*ctrlr), RTE_CACHE_LINE_SIZE); | |
440 | if (!ctrlr) | |
441 | return NULL; | |
442 | ||
443 | rte_vhost_driver_callback_register(dev_pathname, | |
444 | &vhost_scsi_device_ops); | |
445 | ||
446 | return ctrlr; | |
447 | } | |
448 | ||
449 | static void | |
450 | signal_handler(__rte_unused int signum) | |
451 | { | |
452 | ||
453 | if (access(dev_pathname, F_OK) == 0) | |
454 | unlink(dev_pathname); | |
455 | exit(0); | |
456 | } | |
457 | ||
458 | int main(int argc, char *argv[]) | |
459 | { | |
460 | int ret; | |
461 | ||
462 | signal(SIGINT, signal_handler); | |
463 | ||
464 | /* init EAL */ | |
465 | ret = rte_eal_init(argc, argv); | |
466 | if (ret < 0) | |
467 | rte_exit(EXIT_FAILURE, "Error with EAL initialization\n"); | |
468 | ||
469 | g_vhost_ctrlr = vhost_scsi_ctrlr_construct("vhost.socket"); | |
470 | if (g_vhost_ctrlr == NULL) { | |
471 | fprintf(stderr, "Construct vhost scsi controller failed\n"); | |
472 | return 0; | |
473 | } | |
474 | ||
475 | if (sem_init(&exit_sem, 0, 0) < 0) { | |
476 | fprintf(stderr, "Error init exit_sem\n"); | |
477 | return -1; | |
478 | } | |
479 | ||
480 | rte_vhost_driver_start(dev_pathname); | |
481 | ||
482 | /* loop for exit the application */ | |
483 | while (1) | |
484 | sleep(1); | |
485 | ||
486 | return 0; | |
487 | } | |
488 |