]> git.proxmox.com Git - mirror_qemu.git/blob - block/iscsi.c
block: Switch discard length bounds to byte-based
[mirror_qemu.git] / block / iscsi.c
1 /*
2 * QEMU Block driver for iSCSI images
3 *
4 * Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
5 * Copyright (c) 2012-2015 Peter Lieven <pl@kamp.de>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26 #include "qemu/osdep.h"
27
28 #include <poll.h>
29 #include <math.h>
30 #include <arpa/inet.h>
31 #include "qemu-common.h"
32 #include "qemu/config-file.h"
33 #include "qemu/error-report.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "block/block_int.h"
37 #include "block/scsi.h"
38 #include "qemu/iov.h"
39 #include "sysemu/sysemu.h"
40 #include "qmp-commands.h"
41 #include "qapi/qmp/qstring.h"
42 #include "crypto/secret.h"
43
44 #include <iscsi/iscsi.h>
45 #include <iscsi/scsi-lowlevel.h>
46
47 #ifdef __linux__
48 #include <scsi/sg.h>
49 #include <block/scsi.h>
50 #endif
51
52 typedef struct IscsiLun {
53 struct iscsi_context *iscsi;
54 AioContext *aio_context;
55 int lun;
56 enum scsi_inquiry_peripheral_device_type type;
57 int block_size;
58 uint64_t num_blocks;
59 int events;
60 QEMUTimer *nop_timer;
61 QEMUTimer *event_timer;
62 struct scsi_inquiry_logical_block_provisioning lbp;
63 struct scsi_inquiry_block_limits bl;
64 unsigned char *zeroblock;
65 unsigned long *allocationmap;
66 int cluster_sectors;
67 bool use_16_for_rw;
68 bool write_protected;
69 bool lbpme;
70 bool lbprz;
71 bool dpofua;
72 bool has_write_same;
73 bool request_timed_out;
74 } IscsiLun;
75
76 typedef struct IscsiTask {
77 int status;
78 int complete;
79 int retries;
80 int do_retry;
81 struct scsi_task *task;
82 Coroutine *co;
83 QEMUBH *bh;
84 IscsiLun *iscsilun;
85 QEMUTimer retry_timer;
86 int err_code;
87 } IscsiTask;
88
89 typedef struct IscsiAIOCB {
90 BlockAIOCB common;
91 QEMUIOVector *qiov;
92 QEMUBH *bh;
93 IscsiLun *iscsilun;
94 struct scsi_task *task;
95 uint8_t *buf;
96 int status;
97 int64_t sector_num;
98 int nb_sectors;
99 int ret;
100 #ifdef __linux__
101 sg_io_hdr_t *ioh;
102 #endif
103 } IscsiAIOCB;
104
105 /* libiscsi uses time_t so its enough to process events every second */
106 #define EVENT_INTERVAL 1000
107 #define NOP_INTERVAL 5000
108 #define MAX_NOP_FAILURES 3
109 #define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
110 static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048, 8192, 32768};
111
112 /* this threshold is a trade-off knob to choose between
113 * the potential additional overhead of an extra GET_LBA_STATUS request
114 * vs. unnecessarily reading a lot of zero sectors over the wire.
115 * If a read request is greater or equal than ISCSI_CHECKALLOC_THRES
116 * sectors we check the allocation status of the area covered by the
117 * request first if the allocationmap indicates that the area might be
118 * unallocated. */
119 #define ISCSI_CHECKALLOC_THRES 64
120
121 static void
122 iscsi_bh_cb(void *p)
123 {
124 IscsiAIOCB *acb = p;
125
126 qemu_bh_delete(acb->bh);
127
128 g_free(acb->buf);
129 acb->buf = NULL;
130
131 acb->common.cb(acb->common.opaque, acb->status);
132
133 if (acb->task != NULL) {
134 scsi_free_scsi_task(acb->task);
135 acb->task = NULL;
136 }
137
138 qemu_aio_unref(acb);
139 }
140
141 static void
142 iscsi_schedule_bh(IscsiAIOCB *acb)
143 {
144 if (acb->bh) {
145 return;
146 }
147 acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb);
148 qemu_bh_schedule(acb->bh);
149 }
150
151 static void iscsi_co_generic_bh_cb(void *opaque)
152 {
153 struct IscsiTask *iTask = opaque;
154 iTask->complete = 1;
155 qemu_bh_delete(iTask->bh);
156 qemu_coroutine_enter(iTask->co, NULL);
157 }
158
159 static void iscsi_retry_timer_expired(void *opaque)
160 {
161 struct IscsiTask *iTask = opaque;
162 iTask->complete = 1;
163 if (iTask->co) {
164 qemu_coroutine_enter(iTask->co, NULL);
165 }
166 }
167
168 static inline unsigned exp_random(double mean)
169 {
170 return -mean * log((double)rand() / RAND_MAX);
171 }
172
173 /* SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST was introduced in
174 * libiscsi 1.10.0, together with other constants we need. Use it as
175 * a hint that we have to define them ourselves if needed, to keep the
176 * minimum required libiscsi version at 1.9.0. We use an ASCQ macro for
177 * the test because SCSI_STATUS_* is an enum.
178 *
179 * To guard against future changes where SCSI_SENSE_ASCQ_* also becomes
180 * an enum, check against the LIBISCSI_API_VERSION macro, which was
181 * introduced in 1.11.0. If it is present, there is no need to define
182 * anything.
183 */
184 #if !defined(SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST) && \
185 !defined(LIBISCSI_API_VERSION)
186 #define SCSI_STATUS_TASK_SET_FULL 0x28
187 #define SCSI_STATUS_TIMEOUT 0x0f000002
188 #define SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST 0x2600
189 #define SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR 0x1a00
190 #endif
191
192 static int iscsi_translate_sense(struct scsi_sense *sense)
193 {
194 int ret;
195
196 switch (sense->key) {
197 case SCSI_SENSE_NOT_READY:
198 return -EBUSY;
199 case SCSI_SENSE_DATA_PROTECTION:
200 return -EACCES;
201 case SCSI_SENSE_COMMAND_ABORTED:
202 return -ECANCELED;
203 case SCSI_SENSE_ILLEGAL_REQUEST:
204 /* Parse ASCQ */
205 break;
206 default:
207 return -EIO;
208 }
209 switch (sense->ascq) {
210 case SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR:
211 case SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE:
212 case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB:
213 case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST:
214 ret = -EINVAL;
215 break;
216 case SCSI_SENSE_ASCQ_LBA_OUT_OF_RANGE:
217 ret = -ENOSPC;
218 break;
219 case SCSI_SENSE_ASCQ_LOGICAL_UNIT_NOT_SUPPORTED:
220 ret = -ENOTSUP;
221 break;
222 case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT:
223 case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_CLOSED:
224 case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_OPEN:
225 ret = -ENOMEDIUM;
226 break;
227 case SCSI_SENSE_ASCQ_WRITE_PROTECTED:
228 ret = -EACCES;
229 break;
230 default:
231 ret = -EIO;
232 break;
233 }
234 return ret;
235 }
236
237 static void
238 iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
239 void *command_data, void *opaque)
240 {
241 struct IscsiTask *iTask = opaque;
242 struct scsi_task *task = command_data;
243
244 iTask->status = status;
245 iTask->do_retry = 0;
246 iTask->task = task;
247
248 if (status != SCSI_STATUS_GOOD) {
249 if (iTask->retries++ < ISCSI_CMD_RETRIES) {
250 if (status == SCSI_STATUS_CHECK_CONDITION
251 && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
252 error_report("iSCSI CheckCondition: %s",
253 iscsi_get_error(iscsi));
254 iTask->do_retry = 1;
255 goto out;
256 }
257 if (status == SCSI_STATUS_BUSY ||
258 status == SCSI_STATUS_TIMEOUT ||
259 status == SCSI_STATUS_TASK_SET_FULL) {
260 unsigned retry_time =
261 exp_random(iscsi_retry_times[iTask->retries - 1]);
262 if (status == SCSI_STATUS_TIMEOUT) {
263 /* make sure the request is rescheduled AFTER the
264 * reconnect is initiated */
265 retry_time = EVENT_INTERVAL * 2;
266 iTask->iscsilun->request_timed_out = true;
267 }
268 error_report("iSCSI Busy/TaskSetFull/TimeOut"
269 " (retry #%u in %u ms): %s",
270 iTask->retries, retry_time,
271 iscsi_get_error(iscsi));
272 aio_timer_init(iTask->iscsilun->aio_context,
273 &iTask->retry_timer, QEMU_CLOCK_REALTIME,
274 SCALE_MS, iscsi_retry_timer_expired, iTask);
275 timer_mod(&iTask->retry_timer,
276 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time);
277 iTask->do_retry = 1;
278 return;
279 }
280 }
281 iTask->err_code = iscsi_translate_sense(&task->sense);
282 error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
283 }
284
285 out:
286 if (iTask->co) {
287 iTask->bh = aio_bh_new(iTask->iscsilun->aio_context,
288 iscsi_co_generic_bh_cb, iTask);
289 qemu_bh_schedule(iTask->bh);
290 } else {
291 iTask->complete = 1;
292 }
293 }
294
295 static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask)
296 {
297 *iTask = (struct IscsiTask) {
298 .co = qemu_coroutine_self(),
299 .iscsilun = iscsilun,
300 };
301 }
302
303 static void
304 iscsi_abort_task_cb(struct iscsi_context *iscsi, int status, void *command_data,
305 void *private_data)
306 {
307 IscsiAIOCB *acb = private_data;
308
309 acb->status = -ECANCELED;
310 iscsi_schedule_bh(acb);
311 }
312
313 static void
314 iscsi_aio_cancel(BlockAIOCB *blockacb)
315 {
316 IscsiAIOCB *acb = (IscsiAIOCB *)blockacb;
317 IscsiLun *iscsilun = acb->iscsilun;
318
319 if (acb->status != -EINPROGRESS) {
320 return;
321 }
322
323 /* send a task mgmt call to the target to cancel the task on the target */
324 iscsi_task_mgmt_abort_task_async(iscsilun->iscsi, acb->task,
325 iscsi_abort_task_cb, acb);
326
327 }
328
329 static const AIOCBInfo iscsi_aiocb_info = {
330 .aiocb_size = sizeof(IscsiAIOCB),
331 .cancel_async = iscsi_aio_cancel,
332 };
333
334
335 static void iscsi_process_read(void *arg);
336 static void iscsi_process_write(void *arg);
337
338 static void
339 iscsi_set_events(IscsiLun *iscsilun)
340 {
341 struct iscsi_context *iscsi = iscsilun->iscsi;
342 int ev = iscsi_which_events(iscsi);
343
344 if (ev != iscsilun->events) {
345 aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
346 false,
347 (ev & POLLIN) ? iscsi_process_read : NULL,
348 (ev & POLLOUT) ? iscsi_process_write : NULL,
349 iscsilun);
350 iscsilun->events = ev;
351 }
352 }
353
354 static void iscsi_timed_check_events(void *opaque)
355 {
356 IscsiLun *iscsilun = opaque;
357
358 /* check for timed out requests */
359 iscsi_service(iscsilun->iscsi, 0);
360
361 if (iscsilun->request_timed_out) {
362 iscsilun->request_timed_out = false;
363 iscsi_reconnect(iscsilun->iscsi);
364 }
365
366 /* newer versions of libiscsi may return zero events. Ensure we are able
367 * to return to service once this situation changes. */
368 iscsi_set_events(iscsilun);
369
370 timer_mod(iscsilun->event_timer,
371 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
372 }
373
374 static void
375 iscsi_process_read(void *arg)
376 {
377 IscsiLun *iscsilun = arg;
378 struct iscsi_context *iscsi = iscsilun->iscsi;
379
380 iscsi_service(iscsi, POLLIN);
381 iscsi_set_events(iscsilun);
382 }
383
384 static void
385 iscsi_process_write(void *arg)
386 {
387 IscsiLun *iscsilun = arg;
388 struct iscsi_context *iscsi = iscsilun->iscsi;
389
390 iscsi_service(iscsi, POLLOUT);
391 iscsi_set_events(iscsilun);
392 }
393
394 static int64_t sector_lun2qemu(int64_t sector, IscsiLun *iscsilun)
395 {
396 return sector * iscsilun->block_size / BDRV_SECTOR_SIZE;
397 }
398
399 static int64_t sector_qemu2lun(int64_t sector, IscsiLun *iscsilun)
400 {
401 return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
402 }
403
404 static bool is_byte_request_lun_aligned(int64_t offset, int count,
405 IscsiLun *iscsilun)
406 {
407 if (offset % iscsilun->block_size || count % iscsilun->block_size) {
408 error_report("iSCSI misaligned request: "
409 "iscsilun->block_size %u, offset %" PRIi64
410 ", count %d",
411 iscsilun->block_size, offset, count);
412 return false;
413 }
414 return true;
415 }
416
417 static bool is_sector_request_lun_aligned(int64_t sector_num, int nb_sectors,
418 IscsiLun *iscsilun)
419 {
420 assert(nb_sectors <= BDRV_REQUEST_MAX_SECTORS);
421 return is_byte_request_lun_aligned(sector_num << BDRV_SECTOR_BITS,
422 nb_sectors << BDRV_SECTOR_BITS,
423 iscsilun);
424 }
425
426 static unsigned long *iscsi_allocationmap_init(IscsiLun *iscsilun)
427 {
428 return bitmap_try_new(DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks,
429 iscsilun),
430 iscsilun->cluster_sectors));
431 }
432
433 static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
434 int nb_sectors)
435 {
436 if (iscsilun->allocationmap == NULL) {
437 return;
438 }
439 bitmap_set(iscsilun->allocationmap,
440 sector_num / iscsilun->cluster_sectors,
441 DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
442 }
443
444 static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
445 int nb_sectors)
446 {
447 int64_t cluster_num, nb_clusters;
448 if (iscsilun->allocationmap == NULL) {
449 return;
450 }
451 cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
452 nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors
453 - cluster_num;
454 if (nb_clusters > 0) {
455 bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters);
456 }
457 }
458
459 static int coroutine_fn
460 iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
461 QEMUIOVector *iov, int flags)
462 {
463 IscsiLun *iscsilun = bs->opaque;
464 struct IscsiTask iTask;
465 uint64_t lba;
466 uint32_t num_sectors;
467 bool fua = flags & BDRV_REQ_FUA;
468
469 if (fua) {
470 assert(iscsilun->dpofua);
471 }
472 if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
473 return -EINVAL;
474 }
475
476 if (bs->bl.max_transfer &&
477 nb_sectors << BDRV_SECTOR_BITS > bs->bl.max_transfer) {
478 error_report("iSCSI Error: Write of %d sectors exceeds max_xfer_len "
479 "of %" PRIu32 " bytes", nb_sectors, bs->bl.max_transfer);
480 return -EINVAL;
481 }
482
483 lba = sector_qemu2lun(sector_num, iscsilun);
484 num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
485 iscsi_co_init_iscsitask(iscsilun, &iTask);
486 retry:
487 if (iscsilun->use_16_for_rw) {
488 iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
489 NULL, num_sectors * iscsilun->block_size,
490 iscsilun->block_size, 0, 0, fua, 0, 0,
491 iscsi_co_generic_cb, &iTask);
492 } else {
493 iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
494 NULL, num_sectors * iscsilun->block_size,
495 iscsilun->block_size, 0, 0, fua, 0, 0,
496 iscsi_co_generic_cb, &iTask);
497 }
498 if (iTask.task == NULL) {
499 return -ENOMEM;
500 }
501 scsi_task_set_iov_out(iTask.task, (struct scsi_iovec *) iov->iov,
502 iov->niov);
503 while (!iTask.complete) {
504 iscsi_set_events(iscsilun);
505 qemu_coroutine_yield();
506 }
507
508 if (iTask.task != NULL) {
509 scsi_free_scsi_task(iTask.task);
510 iTask.task = NULL;
511 }
512
513 if (iTask.do_retry) {
514 iTask.complete = 0;
515 goto retry;
516 }
517
518 if (iTask.status != SCSI_STATUS_GOOD) {
519 return iTask.err_code;
520 }
521
522 iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
523
524 return 0;
525 }
526
527
528 static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
529 int64_t sector_num, int nb_sectors)
530 {
531 unsigned long size;
532 if (iscsilun->allocationmap == NULL) {
533 return true;
534 }
535 size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
536 return !(find_next_bit(iscsilun->allocationmap, size,
537 sector_num / iscsilun->cluster_sectors) == size);
538 }
539
540 static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
541 int64_t sector_num,
542 int nb_sectors, int *pnum,
543 BlockDriverState **file)
544 {
545 IscsiLun *iscsilun = bs->opaque;
546 struct scsi_get_lba_status *lbas = NULL;
547 struct scsi_lba_status_descriptor *lbasd = NULL;
548 struct IscsiTask iTask;
549 int64_t ret;
550
551 iscsi_co_init_iscsitask(iscsilun, &iTask);
552
553 if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
554 ret = -EINVAL;
555 goto out;
556 }
557
558 /* default to all sectors allocated */
559 ret = BDRV_BLOCK_DATA;
560 ret |= (sector_num << BDRV_SECTOR_BITS) | BDRV_BLOCK_OFFSET_VALID;
561 *pnum = nb_sectors;
562
563 /* LUN does not support logical block provisioning */
564 if (!iscsilun->lbpme) {
565 goto out;
566 }
567
568 retry:
569 if (iscsi_get_lba_status_task(iscsilun->iscsi, iscsilun->lun,
570 sector_qemu2lun(sector_num, iscsilun),
571 8 + 16, iscsi_co_generic_cb,
572 &iTask) == NULL) {
573 ret = -ENOMEM;
574 goto out;
575 }
576
577 while (!iTask.complete) {
578 iscsi_set_events(iscsilun);
579 qemu_coroutine_yield();
580 }
581
582 if (iTask.do_retry) {
583 if (iTask.task != NULL) {
584 scsi_free_scsi_task(iTask.task);
585 iTask.task = NULL;
586 }
587 iTask.complete = 0;
588 goto retry;
589 }
590
591 if (iTask.status != SCSI_STATUS_GOOD) {
592 /* in case the get_lba_status_callout fails (i.e.
593 * because the device is busy or the cmd is not
594 * supported) we pretend all blocks are allocated
595 * for backwards compatibility */
596 goto out;
597 }
598
599 lbas = scsi_datain_unmarshall(iTask.task);
600 if (lbas == NULL) {
601 ret = -EIO;
602 goto out;
603 }
604
605 lbasd = &lbas->descriptors[0];
606
607 if (sector_qemu2lun(sector_num, iscsilun) != lbasd->lba) {
608 ret = -EIO;
609 goto out;
610 }
611
612 *pnum = sector_lun2qemu(lbasd->num_blocks, iscsilun);
613
614 if (lbasd->provisioning == SCSI_PROVISIONING_TYPE_DEALLOCATED ||
615 lbasd->provisioning == SCSI_PROVISIONING_TYPE_ANCHORED) {
616 ret &= ~BDRV_BLOCK_DATA;
617 if (iscsilun->lbprz) {
618 ret |= BDRV_BLOCK_ZERO;
619 }
620 }
621
622 if (ret & BDRV_BLOCK_ZERO) {
623 iscsi_allocationmap_clear(iscsilun, sector_num, *pnum);
624 } else {
625 iscsi_allocationmap_set(iscsilun, sector_num, *pnum);
626 }
627
628 if (*pnum > nb_sectors) {
629 *pnum = nb_sectors;
630 }
631 out:
632 if (iTask.task != NULL) {
633 scsi_free_scsi_task(iTask.task);
634 }
635 if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
636 *file = bs;
637 }
638 return ret;
639 }
640
641 static int coroutine_fn iscsi_co_readv(BlockDriverState *bs,
642 int64_t sector_num, int nb_sectors,
643 QEMUIOVector *iov)
644 {
645 IscsiLun *iscsilun = bs->opaque;
646 struct IscsiTask iTask;
647 uint64_t lba;
648 uint32_t num_sectors;
649
650 if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
651 return -EINVAL;
652 }
653
654 if (bs->bl.max_transfer &&
655 nb_sectors << BDRV_SECTOR_BITS > bs->bl.max_transfer) {
656 error_report("iSCSI Error: Read of %d sectors exceeds max_xfer_len "
657 "of %" PRIu32 " bytes", nb_sectors, bs->bl.max_transfer);
658 return -EINVAL;
659 }
660
661 if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
662 !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
663 int64_t ret;
664 int pnum;
665 BlockDriverState *file;
666 ret = iscsi_co_get_block_status(bs, sector_num,
667 BDRV_REQUEST_MAX_SECTORS, &pnum, &file);
668 if (ret < 0) {
669 return ret;
670 }
671 if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) {
672 qemu_iovec_memset(iov, 0, 0x00, iov->size);
673 return 0;
674 }
675 }
676
677 lba = sector_qemu2lun(sector_num, iscsilun);
678 num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
679
680 iscsi_co_init_iscsitask(iscsilun, &iTask);
681 retry:
682 if (iscsilun->use_16_for_rw) {
683 iTask.task = iscsi_read16_task(iscsilun->iscsi, iscsilun->lun, lba,
684 num_sectors * iscsilun->block_size,
685 iscsilun->block_size, 0, 0, 0, 0, 0,
686 iscsi_co_generic_cb, &iTask);
687 } else {
688 iTask.task = iscsi_read10_task(iscsilun->iscsi, iscsilun->lun, lba,
689 num_sectors * iscsilun->block_size,
690 iscsilun->block_size,
691 0, 0, 0, 0, 0,
692 iscsi_co_generic_cb, &iTask);
693 }
694 if (iTask.task == NULL) {
695 return -ENOMEM;
696 }
697 scsi_task_set_iov_in(iTask.task, (struct scsi_iovec *) iov->iov, iov->niov);
698
699 while (!iTask.complete) {
700 iscsi_set_events(iscsilun);
701 qemu_coroutine_yield();
702 }
703
704 if (iTask.task != NULL) {
705 scsi_free_scsi_task(iTask.task);
706 iTask.task = NULL;
707 }
708
709 if (iTask.do_retry) {
710 iTask.complete = 0;
711 goto retry;
712 }
713
714 if (iTask.status != SCSI_STATUS_GOOD) {
715 return iTask.err_code;
716 }
717
718 return 0;
719 }
720
721 static int coroutine_fn iscsi_co_flush(BlockDriverState *bs)
722 {
723 IscsiLun *iscsilun = bs->opaque;
724 struct IscsiTask iTask;
725
726 iscsi_co_init_iscsitask(iscsilun, &iTask);
727 retry:
728 if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
729 0, iscsi_co_generic_cb, &iTask) == NULL) {
730 return -ENOMEM;
731 }
732
733 while (!iTask.complete) {
734 iscsi_set_events(iscsilun);
735 qemu_coroutine_yield();
736 }
737
738 if (iTask.task != NULL) {
739 scsi_free_scsi_task(iTask.task);
740 iTask.task = NULL;
741 }
742
743 if (iTask.do_retry) {
744 iTask.complete = 0;
745 goto retry;
746 }
747
748 if (iTask.status != SCSI_STATUS_GOOD) {
749 return iTask.err_code;
750 }
751
752 return 0;
753 }
754
755 #ifdef __linux__
756 static void
757 iscsi_aio_ioctl_cb(struct iscsi_context *iscsi, int status,
758 void *command_data, void *opaque)
759 {
760 IscsiAIOCB *acb = opaque;
761
762 g_free(acb->buf);
763 acb->buf = NULL;
764
765 acb->status = 0;
766 if (status < 0) {
767 error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
768 iscsi_get_error(iscsi));
769 acb->status = iscsi_translate_sense(&acb->task->sense);
770 }
771
772 acb->ioh->driver_status = 0;
773 acb->ioh->host_status = 0;
774 acb->ioh->resid = 0;
775 acb->ioh->status = status;
776
777 #define SG_ERR_DRIVER_SENSE 0x08
778
779 if (status == SCSI_STATUS_CHECK_CONDITION && acb->task->datain.size >= 2) {
780 int ss;
781
782 acb->ioh->driver_status |= SG_ERR_DRIVER_SENSE;
783
784 acb->ioh->sb_len_wr = acb->task->datain.size - 2;
785 ss = (acb->ioh->mx_sb_len >= acb->ioh->sb_len_wr) ?
786 acb->ioh->mx_sb_len : acb->ioh->sb_len_wr;
787 memcpy(acb->ioh->sbp, &acb->task->datain.data[2], ss);
788 }
789
790 iscsi_schedule_bh(acb);
791 }
792
793 static void iscsi_ioctl_bh_completion(void *opaque)
794 {
795 IscsiAIOCB *acb = opaque;
796
797 qemu_bh_delete(acb->bh);
798 acb->common.cb(acb->common.opaque, acb->ret);
799 qemu_aio_unref(acb);
800 }
801
802 static void iscsi_ioctl_handle_emulated(IscsiAIOCB *acb, int req, void *buf)
803 {
804 BlockDriverState *bs = acb->common.bs;
805 IscsiLun *iscsilun = bs->opaque;
806 int ret = 0;
807
808 switch (req) {
809 case SG_GET_VERSION_NUM:
810 *(int *)buf = 30000;
811 break;
812 case SG_GET_SCSI_ID:
813 ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
814 break;
815 default:
816 ret = -EINVAL;
817 }
818 assert(!acb->bh);
819 acb->bh = aio_bh_new(bdrv_get_aio_context(bs),
820 iscsi_ioctl_bh_completion, acb);
821 acb->ret = ret;
822 qemu_bh_schedule(acb->bh);
823 }
824
825 static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
826 unsigned long int req, void *buf,
827 BlockCompletionFunc *cb, void *opaque)
828 {
829 IscsiLun *iscsilun = bs->opaque;
830 struct iscsi_context *iscsi = iscsilun->iscsi;
831 struct iscsi_data data;
832 IscsiAIOCB *acb;
833
834 acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
835
836 acb->iscsilun = iscsilun;
837 acb->bh = NULL;
838 acb->status = -EINPROGRESS;
839 acb->buf = NULL;
840 acb->ioh = buf;
841
842 if (req != SG_IO) {
843 iscsi_ioctl_handle_emulated(acb, req, buf);
844 return &acb->common;
845 }
846
847 if (acb->ioh->cmd_len > SCSI_CDB_MAX_SIZE) {
848 error_report("iSCSI: ioctl error CDB exceeds max size (%d > %d)",
849 acb->ioh->cmd_len, SCSI_CDB_MAX_SIZE);
850 qemu_aio_unref(acb);
851 return NULL;
852 }
853
854 acb->task = malloc(sizeof(struct scsi_task));
855 if (acb->task == NULL) {
856 error_report("iSCSI: Failed to allocate task for scsi command. %s",
857 iscsi_get_error(iscsi));
858 qemu_aio_unref(acb);
859 return NULL;
860 }
861 memset(acb->task, 0, sizeof(struct scsi_task));
862
863 switch (acb->ioh->dxfer_direction) {
864 case SG_DXFER_TO_DEV:
865 acb->task->xfer_dir = SCSI_XFER_WRITE;
866 break;
867 case SG_DXFER_FROM_DEV:
868 acb->task->xfer_dir = SCSI_XFER_READ;
869 break;
870 default:
871 acb->task->xfer_dir = SCSI_XFER_NONE;
872 break;
873 }
874
875 acb->task->cdb_size = acb->ioh->cmd_len;
876 memcpy(&acb->task->cdb[0], acb->ioh->cmdp, acb->ioh->cmd_len);
877 acb->task->expxferlen = acb->ioh->dxfer_len;
878
879 data.size = 0;
880 if (acb->task->xfer_dir == SCSI_XFER_WRITE) {
881 if (acb->ioh->iovec_count == 0) {
882 data.data = acb->ioh->dxferp;
883 data.size = acb->ioh->dxfer_len;
884 } else {
885 scsi_task_set_iov_out(acb->task,
886 (struct scsi_iovec *) acb->ioh->dxferp,
887 acb->ioh->iovec_count);
888 }
889 }
890
891 if (iscsi_scsi_command_async(iscsi, iscsilun->lun, acb->task,
892 iscsi_aio_ioctl_cb,
893 (data.size > 0) ? &data : NULL,
894 acb) != 0) {
895 scsi_free_scsi_task(acb->task);
896 qemu_aio_unref(acb);
897 return NULL;
898 }
899
900 /* tell libiscsi to read straight into the buffer we got from ioctl */
901 if (acb->task->xfer_dir == SCSI_XFER_READ) {
902 if (acb->ioh->iovec_count == 0) {
903 scsi_task_add_data_in_buffer(acb->task,
904 acb->ioh->dxfer_len,
905 acb->ioh->dxferp);
906 } else {
907 scsi_task_set_iov_in(acb->task,
908 (struct scsi_iovec *) acb->ioh->dxferp,
909 acb->ioh->iovec_count);
910 }
911 }
912
913 iscsi_set_events(iscsilun);
914
915 return &acb->common;
916 }
917
918 #endif
919
920 static int64_t
921 iscsi_getlength(BlockDriverState *bs)
922 {
923 IscsiLun *iscsilun = bs->opaque;
924 int64_t len;
925
926 len = iscsilun->num_blocks;
927 len *= iscsilun->block_size;
928
929 return len;
930 }
931
932 static int
933 coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
934 int nb_sectors)
935 {
936 IscsiLun *iscsilun = bs->opaque;
937 struct IscsiTask iTask;
938 struct unmap_list list;
939
940 if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
941 return -EINVAL;
942 }
943
944 if (!iscsilun->lbp.lbpu) {
945 /* UNMAP is not supported by the target */
946 return 0;
947 }
948
949 list.lba = sector_qemu2lun(sector_num, iscsilun);
950 list.num = sector_qemu2lun(nb_sectors, iscsilun);
951
952 iscsi_co_init_iscsitask(iscsilun, &iTask);
953 retry:
954 if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
955 iscsi_co_generic_cb, &iTask) == NULL) {
956 return -ENOMEM;
957 }
958
959 while (!iTask.complete) {
960 iscsi_set_events(iscsilun);
961 qemu_coroutine_yield();
962 }
963
964 if (iTask.task != NULL) {
965 scsi_free_scsi_task(iTask.task);
966 iTask.task = NULL;
967 }
968
969 if (iTask.do_retry) {
970 iTask.complete = 0;
971 goto retry;
972 }
973
974 if (iTask.status == SCSI_STATUS_CHECK_CONDITION) {
975 /* the target might fail with a check condition if it
976 is not happy with the alignment of the UNMAP request
977 we silently fail in this case */
978 return 0;
979 }
980
981 if (iTask.status != SCSI_STATUS_GOOD) {
982 return iTask.err_code;
983 }
984
985 iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
986
987 return 0;
988 }
989
990 static int
991 coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
992 int count, BdrvRequestFlags flags)
993 {
994 IscsiLun *iscsilun = bs->opaque;
995 struct IscsiTask iTask;
996 uint64_t lba;
997 uint32_t nb_blocks;
998 bool use_16_for_ws = iscsilun->use_16_for_rw;
999
1000 if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
1001 return -ENOTSUP;
1002 }
1003
1004 if (flags & BDRV_REQ_MAY_UNMAP) {
1005 if (!use_16_for_ws && !iscsilun->lbp.lbpws10) {
1006 /* WRITESAME10 with UNMAP is unsupported try WRITESAME16 */
1007 use_16_for_ws = true;
1008 }
1009 if (use_16_for_ws && !iscsilun->lbp.lbpws) {
1010 /* WRITESAME16 with UNMAP is not supported by the target,
1011 * fall back and try WRITESAME10/16 without UNMAP */
1012 flags &= ~BDRV_REQ_MAY_UNMAP;
1013 use_16_for_ws = iscsilun->use_16_for_rw;
1014 }
1015 }
1016
1017 if (!(flags & BDRV_REQ_MAY_UNMAP) && !iscsilun->has_write_same) {
1018 /* WRITESAME without UNMAP is not supported by the target */
1019 return -ENOTSUP;
1020 }
1021
1022 lba = offset / iscsilun->block_size;
1023 nb_blocks = count / iscsilun->block_size;
1024
1025 if (iscsilun->zeroblock == NULL) {
1026 iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
1027 if (iscsilun->zeroblock == NULL) {
1028 return -ENOMEM;
1029 }
1030 }
1031
1032 iscsi_co_init_iscsitask(iscsilun, &iTask);
1033 retry:
1034 if (use_16_for_ws) {
1035 iTask.task = iscsi_writesame16_task(iscsilun->iscsi, iscsilun->lun, lba,
1036 iscsilun->zeroblock, iscsilun->block_size,
1037 nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
1038 0, 0, iscsi_co_generic_cb, &iTask);
1039 } else {
1040 iTask.task = iscsi_writesame10_task(iscsilun->iscsi, iscsilun->lun, lba,
1041 iscsilun->zeroblock, iscsilun->block_size,
1042 nb_blocks, 0, !!(flags & BDRV_REQ_MAY_UNMAP),
1043 0, 0, iscsi_co_generic_cb, &iTask);
1044 }
1045 if (iTask.task == NULL) {
1046 return -ENOMEM;
1047 }
1048
1049 while (!iTask.complete) {
1050 iscsi_set_events(iscsilun);
1051 qemu_coroutine_yield();
1052 }
1053
1054 if (iTask.status == SCSI_STATUS_CHECK_CONDITION &&
1055 iTask.task->sense.key == SCSI_SENSE_ILLEGAL_REQUEST &&
1056 (iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE ||
1057 iTask.task->sense.ascq == SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB)) {
1058 /* WRITE SAME is not supported by the target */
1059 iscsilun->has_write_same = false;
1060 scsi_free_scsi_task(iTask.task);
1061 return -ENOTSUP;
1062 }
1063
1064 if (iTask.task != NULL) {
1065 scsi_free_scsi_task(iTask.task);
1066 iTask.task = NULL;
1067 }
1068
1069 if (iTask.do_retry) {
1070 iTask.complete = 0;
1071 goto retry;
1072 }
1073
1074 if (iTask.status != SCSI_STATUS_GOOD) {
1075 return iTask.err_code;
1076 }
1077
1078 if (flags & BDRV_REQ_MAY_UNMAP) {
1079 iscsi_allocationmap_clear(iscsilun, offset >> BDRV_SECTOR_BITS,
1080 count >> BDRV_SECTOR_BITS);
1081 } else {
1082 iscsi_allocationmap_set(iscsilun, offset >> BDRV_SECTOR_BITS,
1083 count >> BDRV_SECTOR_BITS);
1084 }
1085
1086 return 0;
1087 }
1088
1089 static void parse_chap(struct iscsi_context *iscsi, const char *target,
1090 Error **errp)
1091 {
1092 QemuOptsList *list;
1093 QemuOpts *opts;
1094 const char *user = NULL;
1095 const char *password = NULL;
1096 const char *secretid;
1097 char *secret = NULL;
1098
1099 list = qemu_find_opts("iscsi");
1100 if (!list) {
1101 return;
1102 }
1103
1104 opts = qemu_opts_find(list, target);
1105 if (opts == NULL) {
1106 opts = QTAILQ_FIRST(&list->head);
1107 if (!opts) {
1108 return;
1109 }
1110 }
1111
1112 user = qemu_opt_get(opts, "user");
1113 if (!user) {
1114 return;
1115 }
1116
1117 secretid = qemu_opt_get(opts, "password-secret");
1118 password = qemu_opt_get(opts, "password");
1119 if (secretid && password) {
1120 error_setg(errp, "'password' and 'password-secret' properties are "
1121 "mutually exclusive");
1122 return;
1123 }
1124 if (secretid) {
1125 secret = qcrypto_secret_lookup_as_utf8(secretid, errp);
1126 if (!secret) {
1127 return;
1128 }
1129 password = secret;
1130 } else if (!password) {
1131 error_setg(errp, "CHAP username specified but no password was given");
1132 return;
1133 }
1134
1135 if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
1136 error_setg(errp, "Failed to set initiator username and password");
1137 }
1138
1139 g_free(secret);
1140 }
1141
1142 static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
1143 Error **errp)
1144 {
1145 QemuOptsList *list;
1146 QemuOpts *opts;
1147 const char *digest = NULL;
1148
1149 list = qemu_find_opts("iscsi");
1150 if (!list) {
1151 return;
1152 }
1153
1154 opts = qemu_opts_find(list, target);
1155 if (opts == NULL) {
1156 opts = QTAILQ_FIRST(&list->head);
1157 if (!opts) {
1158 return;
1159 }
1160 }
1161
1162 digest = qemu_opt_get(opts, "header-digest");
1163 if (!digest) {
1164 return;
1165 }
1166
1167 if (!strcmp(digest, "CRC32C")) {
1168 iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C);
1169 } else if (!strcmp(digest, "NONE")) {
1170 iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE);
1171 } else if (!strcmp(digest, "CRC32C-NONE")) {
1172 iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_CRC32C_NONE);
1173 } else if (!strcmp(digest, "NONE-CRC32C")) {
1174 iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1175 } else {
1176 error_setg(errp, "Invalid header-digest setting : %s", digest);
1177 }
1178 }
1179
1180 static char *parse_initiator_name(const char *target)
1181 {
1182 QemuOptsList *list;
1183 QemuOpts *opts;
1184 const char *name;
1185 char *iscsi_name;
1186 UuidInfo *uuid_info;
1187
1188 list = qemu_find_opts("iscsi");
1189 if (list) {
1190 opts = qemu_opts_find(list, target);
1191 if (!opts) {
1192 opts = QTAILQ_FIRST(&list->head);
1193 }
1194 if (opts) {
1195 name = qemu_opt_get(opts, "initiator-name");
1196 if (name) {
1197 return g_strdup(name);
1198 }
1199 }
1200 }
1201
1202 uuid_info = qmp_query_uuid(NULL);
1203 if (strcmp(uuid_info->UUID, UUID_NONE) == 0) {
1204 name = qemu_get_vm_name();
1205 } else {
1206 name = uuid_info->UUID;
1207 }
1208 iscsi_name = g_strdup_printf("iqn.2008-11.org.linux-kvm%s%s",
1209 name ? ":" : "", name ? name : "");
1210 qapi_free_UuidInfo(uuid_info);
1211 return iscsi_name;
1212 }
1213
1214 static int parse_timeout(const char *target)
1215 {
1216 QemuOptsList *list;
1217 QemuOpts *opts;
1218 const char *timeout;
1219
1220 list = qemu_find_opts("iscsi");
1221 if (list) {
1222 opts = qemu_opts_find(list, target);
1223 if (!opts) {
1224 opts = QTAILQ_FIRST(&list->head);
1225 }
1226 if (opts) {
1227 timeout = qemu_opt_get(opts, "timeout");
1228 if (timeout) {
1229 return atoi(timeout);
1230 }
1231 }
1232 }
1233
1234 return 0;
1235 }
1236
1237 static void iscsi_nop_timed_event(void *opaque)
1238 {
1239 IscsiLun *iscsilun = opaque;
1240
1241 if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
1242 error_report("iSCSI: NOP timeout. Reconnecting...");
1243 iscsilun->request_timed_out = true;
1244 } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
1245 error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
1246 return;
1247 }
1248
1249 timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1250 iscsi_set_events(iscsilun);
1251 }
1252
1253 static void iscsi_readcapacity_sync(IscsiLun *iscsilun, Error **errp)
1254 {
1255 struct scsi_task *task = NULL;
1256 struct scsi_readcapacity10 *rc10 = NULL;
1257 struct scsi_readcapacity16 *rc16 = NULL;
1258 int retries = ISCSI_CMD_RETRIES;
1259
1260 do {
1261 if (task != NULL) {
1262 scsi_free_scsi_task(task);
1263 task = NULL;
1264 }
1265
1266 switch (iscsilun->type) {
1267 case TYPE_DISK:
1268 task = iscsi_readcapacity16_sync(iscsilun->iscsi, iscsilun->lun);
1269 if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1270 rc16 = scsi_datain_unmarshall(task);
1271 if (rc16 == NULL) {
1272 error_setg(errp, "iSCSI: Failed to unmarshall readcapacity16 data.");
1273 } else {
1274 iscsilun->block_size = rc16->block_length;
1275 iscsilun->num_blocks = rc16->returned_lba + 1;
1276 iscsilun->lbpme = !!rc16->lbpme;
1277 iscsilun->lbprz = !!rc16->lbprz;
1278 iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
1279 }
1280 break;
1281 }
1282 if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1283 && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
1284 break;
1285 }
1286 /* Fall through and try READ CAPACITY(10) instead. */
1287 case TYPE_ROM:
1288 task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
1289 if (task != NULL && task->status == SCSI_STATUS_GOOD) {
1290 rc10 = scsi_datain_unmarshall(task);
1291 if (rc10 == NULL) {
1292 error_setg(errp, "iSCSI: Failed to unmarshall readcapacity10 data.");
1293 } else {
1294 iscsilun->block_size = rc10->block_size;
1295 if (rc10->lba == 0) {
1296 /* blank disk loaded */
1297 iscsilun->num_blocks = 0;
1298 } else {
1299 iscsilun->num_blocks = rc10->lba + 1;
1300 }
1301 }
1302 }
1303 break;
1304 default:
1305 return;
1306 }
1307 } while (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
1308 && task->sense.key == SCSI_SENSE_UNIT_ATTENTION
1309 && retries-- > 0);
1310
1311 if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1312 error_setg(errp, "iSCSI: failed to send readcapacity10/16 command");
1313 } else if (!iscsilun->block_size ||
1314 iscsilun->block_size % BDRV_SECTOR_SIZE) {
1315 error_setg(errp, "iSCSI: the target returned an invalid "
1316 "block size of %d.", iscsilun->block_size);
1317 }
1318 if (task) {
1319 scsi_free_scsi_task(task);
1320 }
1321 }
1322
1323 /* TODO Convert to fine grained options */
1324 static QemuOptsList runtime_opts = {
1325 .name = "iscsi",
1326 .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
1327 .desc = {
1328 {
1329 .name = "filename",
1330 .type = QEMU_OPT_STRING,
1331 .help = "URL to the iscsi image",
1332 },
1333 { /* end of list */ }
1334 },
1335 };
1336
1337 static struct scsi_task *iscsi_do_inquiry(struct iscsi_context *iscsi, int lun,
1338 int evpd, int pc, void **inq, Error **errp)
1339 {
1340 int full_size;
1341 struct scsi_task *task = NULL;
1342 task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, 64);
1343 if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1344 goto fail;
1345 }
1346 full_size = scsi_datain_getfullsize(task);
1347 if (full_size > task->datain.size) {
1348 scsi_free_scsi_task(task);
1349
1350 /* we need more data for the full list */
1351 task = iscsi_inquiry_sync(iscsi, lun, evpd, pc, full_size);
1352 if (task == NULL || task->status != SCSI_STATUS_GOOD) {
1353 goto fail;
1354 }
1355 }
1356
1357 *inq = scsi_datain_unmarshall(task);
1358 if (*inq == NULL) {
1359 error_setg(errp, "iSCSI: failed to unmarshall inquiry datain blob");
1360 goto fail_with_err;
1361 }
1362
1363 return task;
1364
1365 fail:
1366 error_setg(errp, "iSCSI: Inquiry command failed : %s",
1367 iscsi_get_error(iscsi));
1368 fail_with_err:
1369 if (task != NULL) {
1370 scsi_free_scsi_task(task);
1371 }
1372 return NULL;
1373 }
1374
1375 static void iscsi_detach_aio_context(BlockDriverState *bs)
1376 {
1377 IscsiLun *iscsilun = bs->opaque;
1378
1379 aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
1380 false, NULL, NULL, NULL);
1381 iscsilun->events = 0;
1382
1383 if (iscsilun->nop_timer) {
1384 timer_del(iscsilun->nop_timer);
1385 timer_free(iscsilun->nop_timer);
1386 iscsilun->nop_timer = NULL;
1387 }
1388 if (iscsilun->event_timer) {
1389 timer_del(iscsilun->event_timer);
1390 timer_free(iscsilun->event_timer);
1391 iscsilun->event_timer = NULL;
1392 }
1393 }
1394
1395 static void iscsi_attach_aio_context(BlockDriverState *bs,
1396 AioContext *new_context)
1397 {
1398 IscsiLun *iscsilun = bs->opaque;
1399
1400 iscsilun->aio_context = new_context;
1401 iscsi_set_events(iscsilun);
1402
1403 /* Set up a timer for sending out iSCSI NOPs */
1404 iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context,
1405 QEMU_CLOCK_REALTIME, SCALE_MS,
1406 iscsi_nop_timed_event, iscsilun);
1407 timer_mod(iscsilun->nop_timer,
1408 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
1409
1410 /* Set up a timer for periodic calls to iscsi_set_events and to
1411 * scan for command timeout */
1412 iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
1413 QEMU_CLOCK_REALTIME, SCALE_MS,
1414 iscsi_timed_check_events, iscsilun);
1415 timer_mod(iscsilun->event_timer,
1416 qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
1417 }
1418
1419 static void iscsi_modesense_sync(IscsiLun *iscsilun)
1420 {
1421 struct scsi_task *task;
1422 struct scsi_mode_sense *ms = NULL;
1423 iscsilun->write_protected = false;
1424 iscsilun->dpofua = false;
1425
1426 task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun,
1427 1, SCSI_MODESENSE_PC_CURRENT,
1428 0x3F, 0, 255);
1429 if (task == NULL) {
1430 error_report("iSCSI: Failed to send MODE_SENSE(6) command: %s",
1431 iscsi_get_error(iscsilun->iscsi));
1432 goto out;
1433 }
1434
1435 if (task->status != SCSI_STATUS_GOOD) {
1436 error_report("iSCSI: Failed MODE_SENSE(6), LUN assumed writable");
1437 goto out;
1438 }
1439 ms = scsi_datain_unmarshall(task);
1440 if (!ms) {
1441 error_report("iSCSI: Failed to unmarshall MODE_SENSE(6) data: %s",
1442 iscsi_get_error(iscsilun->iscsi));
1443 goto out;
1444 }
1445 iscsilun->write_protected = ms->device_specific_parameter & 0x80;
1446 iscsilun->dpofua = ms->device_specific_parameter & 0x10;
1447
1448 out:
1449 if (task) {
1450 scsi_free_scsi_task(task);
1451 }
1452 }
1453
1454 /*
1455 * We support iscsi url's on the form
1456 * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
1457 */
1458 static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
1459 Error **errp)
1460 {
1461 IscsiLun *iscsilun = bs->opaque;
1462 struct iscsi_context *iscsi = NULL;
1463 struct iscsi_url *iscsi_url = NULL;
1464 struct scsi_task *task = NULL;
1465 struct scsi_inquiry_standard *inq = NULL;
1466 struct scsi_inquiry_supported_pages *inq_vpd;
1467 char *initiator_name = NULL;
1468 QemuOpts *opts;
1469 Error *local_err = NULL;
1470 const char *filename;
1471 int i, ret = 0, timeout = 0;
1472
1473 opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
1474 qemu_opts_absorb_qdict(opts, options, &local_err);
1475 if (local_err) {
1476 error_propagate(errp, local_err);
1477 ret = -EINVAL;
1478 goto out;
1479 }
1480
1481 filename = qemu_opt_get(opts, "filename");
1482
1483 iscsi_url = iscsi_parse_full_url(iscsi, filename);
1484 if (iscsi_url == NULL) {
1485 error_setg(errp, "Failed to parse URL : %s", filename);
1486 ret = -EINVAL;
1487 goto out;
1488 }
1489
1490 memset(iscsilun, 0, sizeof(IscsiLun));
1491
1492 initiator_name = parse_initiator_name(iscsi_url->target);
1493
1494 iscsi = iscsi_create_context(initiator_name);
1495 if (iscsi == NULL) {
1496 error_setg(errp, "iSCSI: Failed to create iSCSI context.");
1497 ret = -ENOMEM;
1498 goto out;
1499 }
1500
1501 if (iscsi_set_targetname(iscsi, iscsi_url->target)) {
1502 error_setg(errp, "iSCSI: Failed to set target name.");
1503 ret = -EINVAL;
1504 goto out;
1505 }
1506
1507 if (iscsi_url->user[0] != '\0') {
1508 ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
1509 iscsi_url->passwd);
1510 if (ret != 0) {
1511 error_setg(errp, "Failed to set initiator username and password");
1512 ret = -EINVAL;
1513 goto out;
1514 }
1515 }
1516
1517 /* check if we got CHAP username/password via the options */
1518 parse_chap(iscsi, iscsi_url->target, &local_err);
1519 if (local_err != NULL) {
1520 error_propagate(errp, local_err);
1521 ret = -EINVAL;
1522 goto out;
1523 }
1524
1525 if (iscsi_set_session_type(iscsi, ISCSI_SESSION_NORMAL) != 0) {
1526 error_setg(errp, "iSCSI: Failed to set session type to normal.");
1527 ret = -EINVAL;
1528 goto out;
1529 }
1530
1531 iscsi_set_header_digest(iscsi, ISCSI_HEADER_DIGEST_NONE_CRC32C);
1532
1533 /* check if we got HEADER_DIGEST via the options */
1534 parse_header_digest(iscsi, iscsi_url->target, &local_err);
1535 if (local_err != NULL) {
1536 error_propagate(errp, local_err);
1537 ret = -EINVAL;
1538 goto out;
1539 }
1540
1541 /* timeout handling is broken in libiscsi before 1.15.0 */
1542 timeout = parse_timeout(iscsi_url->target);
1543 #if defined(LIBISCSI_API_VERSION) && LIBISCSI_API_VERSION >= 20150621
1544 iscsi_set_timeout(iscsi, timeout);
1545 #else
1546 if (timeout) {
1547 error_report("iSCSI: ignoring timeout value for libiscsi <1.15.0");
1548 }
1549 #endif
1550
1551 if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
1552 error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
1553 iscsi_get_error(iscsi));
1554 ret = -EINVAL;
1555 goto out;
1556 }
1557
1558 iscsilun->iscsi = iscsi;
1559 iscsilun->aio_context = bdrv_get_aio_context(bs);
1560 iscsilun->lun = iscsi_url->lun;
1561 iscsilun->has_write_same = true;
1562
1563 task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 0, 0,
1564 (void **) &inq, errp);
1565 if (task == NULL) {
1566 ret = -EINVAL;
1567 goto out;
1568 }
1569 iscsilun->type = inq->periperal_device_type;
1570 scsi_free_scsi_task(task);
1571 task = NULL;
1572
1573 iscsi_modesense_sync(iscsilun);
1574 if (iscsilun->dpofua) {
1575 bs->supported_write_flags = BDRV_REQ_FUA;
1576 }
1577 bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
1578
1579 /* Check the write protect flag of the LUN if we want to write */
1580 if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
1581 iscsilun->write_protected) {
1582 error_setg(errp, "Cannot open a write protected LUN as read-write");
1583 ret = -EACCES;
1584 goto out;
1585 }
1586
1587 iscsi_readcapacity_sync(iscsilun, &local_err);
1588 if (local_err != NULL) {
1589 error_propagate(errp, local_err);
1590 ret = -EINVAL;
1591 goto out;
1592 }
1593 bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
1594
1595 /* We don't have any emulation for devices other than disks and CD-ROMs, so
1596 * this must be sg ioctl compatible. We force it to be sg, otherwise qemu
1597 * will try to read from the device to guess the image format.
1598 */
1599 if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
1600 bs->sg = 1;
1601 }
1602
1603 task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1604 SCSI_INQUIRY_PAGECODE_SUPPORTED_VPD_PAGES,
1605 (void **) &inq_vpd, errp);
1606 if (task == NULL) {
1607 ret = -EINVAL;
1608 goto out;
1609 }
1610 for (i = 0; i < inq_vpd->num_pages; i++) {
1611 struct scsi_task *inq_task;
1612 struct scsi_inquiry_logical_block_provisioning *inq_lbp;
1613 struct scsi_inquiry_block_limits *inq_bl;
1614 switch (inq_vpd->pages[i]) {
1615 case SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING:
1616 inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1617 SCSI_INQUIRY_PAGECODE_LOGICAL_BLOCK_PROVISIONING,
1618 (void **) &inq_lbp, errp);
1619 if (inq_task == NULL) {
1620 ret = -EINVAL;
1621 goto out;
1622 }
1623 memcpy(&iscsilun->lbp, inq_lbp,
1624 sizeof(struct scsi_inquiry_logical_block_provisioning));
1625 scsi_free_scsi_task(inq_task);
1626 break;
1627 case SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS:
1628 inq_task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
1629 SCSI_INQUIRY_PAGECODE_BLOCK_LIMITS,
1630 (void **) &inq_bl, errp);
1631 if (inq_task == NULL) {
1632 ret = -EINVAL;
1633 goto out;
1634 }
1635 memcpy(&iscsilun->bl, inq_bl,
1636 sizeof(struct scsi_inquiry_block_limits));
1637 scsi_free_scsi_task(inq_task);
1638 break;
1639 default:
1640 break;
1641 }
1642 }
1643 scsi_free_scsi_task(task);
1644 task = NULL;
1645
1646 iscsi_attach_aio_context(bs, iscsilun->aio_context);
1647
1648 /* Guess the internal cluster (page) size of the iscsi target by the means
1649 * of opt_unmap_gran. Transfer the unmap granularity only if it has a
1650 * reasonable size */
1651 if (iscsilun->bl.opt_unmap_gran * iscsilun->block_size >= 4 * 1024 &&
1652 iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
1653 iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
1654 iscsilun->block_size) >> BDRV_SECTOR_BITS;
1655 if (iscsilun->lbprz) {
1656 iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
1657 if (iscsilun->allocationmap == NULL) {
1658 ret = -ENOMEM;
1659 }
1660 }
1661 }
1662
1663 out:
1664 qemu_opts_del(opts);
1665 g_free(initiator_name);
1666 if (iscsi_url != NULL) {
1667 iscsi_destroy_url(iscsi_url);
1668 }
1669 if (task != NULL) {
1670 scsi_free_scsi_task(task);
1671 }
1672
1673 if (ret) {
1674 if (iscsi != NULL) {
1675 if (iscsi_is_logged_in(iscsi)) {
1676 iscsi_logout_sync(iscsi);
1677 }
1678 iscsi_destroy_context(iscsi);
1679 }
1680 memset(iscsilun, 0, sizeof(IscsiLun));
1681 }
1682 return ret;
1683 }
1684
1685 static void iscsi_close(BlockDriverState *bs)
1686 {
1687 IscsiLun *iscsilun = bs->opaque;
1688 struct iscsi_context *iscsi = iscsilun->iscsi;
1689
1690 iscsi_detach_aio_context(bs);
1691 if (iscsi_is_logged_in(iscsi)) {
1692 iscsi_logout_sync(iscsi);
1693 }
1694 iscsi_destroy_context(iscsi);
1695 g_free(iscsilun->zeroblock);
1696 g_free(iscsilun->allocationmap);
1697 memset(iscsilun, 0, sizeof(IscsiLun));
1698 }
1699
1700 static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
1701 {
1702 /* We don't actually refresh here, but just return data queried in
1703 * iscsi_open(): iscsi targets don't change their limits. */
1704
1705 IscsiLun *iscsilun = bs->opaque;
1706 uint64_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
1707
1708 bs->request_alignment = iscsilun->block_size;
1709
1710 if (iscsilun->bl.max_xfer_len) {
1711 max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
1712 }
1713
1714 if (max_xfer_len * iscsilun->block_size < INT_MAX) {
1715 bs->bl.max_transfer = max_xfer_len * iscsilun->block_size;
1716 }
1717
1718 if (iscsilun->lbp.lbpu) {
1719 if (iscsilun->bl.max_unmap < 0xffffffff / iscsilun->block_size) {
1720 bs->bl.max_pdiscard =
1721 iscsilun->bl.max_unmap * iscsilun->block_size;
1722 }
1723 bs->bl.pdiscard_alignment =
1724 iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
1725 } else {
1726 bs->bl.pdiscard_alignment = iscsilun->block_size;
1727 }
1728
1729 if (iscsilun->bl.max_ws_len < 0xffffffff / iscsilun->block_size) {
1730 bs->bl.max_pwrite_zeroes =
1731 iscsilun->bl.max_ws_len * iscsilun->block_size;
1732 }
1733 if (iscsilun->lbp.lbpws) {
1734 bs->bl.pwrite_zeroes_alignment =
1735 iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
1736 } else {
1737 bs->bl.pwrite_zeroes_alignment = iscsilun->block_size;
1738 }
1739 if (iscsilun->bl.opt_xfer_len &&
1740 iscsilun->bl.opt_xfer_len < INT_MAX / iscsilun->block_size) {
1741 bs->bl.opt_transfer = pow2floor(iscsilun->bl.opt_xfer_len *
1742 iscsilun->block_size);
1743 }
1744 }
1745
1746 /* Note that this will not re-establish a connection with an iSCSI target - it
1747 * is effectively a NOP. */
1748 static int iscsi_reopen_prepare(BDRVReopenState *state,
1749 BlockReopenQueue *queue, Error **errp)
1750 {
1751 IscsiLun *iscsilun = state->bs->opaque;
1752
1753 if (state->flags & BDRV_O_RDWR && iscsilun->write_protected) {
1754 error_setg(errp, "Cannot open a write protected LUN as read-write");
1755 return -EACCES;
1756 }
1757 return 0;
1758 }
1759
1760 static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
1761 {
1762 IscsiLun *iscsilun = bs->opaque;
1763 Error *local_err = NULL;
1764
1765 if (iscsilun->type != TYPE_DISK) {
1766 return -ENOTSUP;
1767 }
1768
1769 iscsi_readcapacity_sync(iscsilun, &local_err);
1770 if (local_err != NULL) {
1771 error_free(local_err);
1772 return -EIO;
1773 }
1774
1775 if (offset > iscsi_getlength(bs)) {
1776 return -EINVAL;
1777 }
1778
1779 if (iscsilun->allocationmap != NULL) {
1780 g_free(iscsilun->allocationmap);
1781 iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
1782 }
1783
1784 return 0;
1785 }
1786
1787 static int iscsi_create(const char *filename, QemuOpts *opts, Error **errp)
1788 {
1789 int ret = 0;
1790 int64_t total_size = 0;
1791 BlockDriverState *bs;
1792 IscsiLun *iscsilun = NULL;
1793 QDict *bs_options;
1794
1795 bs = bdrv_new();
1796
1797 /* Read out options */
1798 total_size = DIV_ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
1799 BDRV_SECTOR_SIZE);
1800 bs->opaque = g_new0(struct IscsiLun, 1);
1801 iscsilun = bs->opaque;
1802
1803 bs_options = qdict_new();
1804 qdict_put(bs_options, "filename", qstring_from_str(filename));
1805 ret = iscsi_open(bs, bs_options, 0, NULL);
1806 QDECREF(bs_options);
1807
1808 if (ret != 0) {
1809 goto out;
1810 }
1811 iscsi_detach_aio_context(bs);
1812 if (iscsilun->type != TYPE_DISK) {
1813 ret = -ENODEV;
1814 goto out;
1815 }
1816 if (bs->total_sectors < total_size) {
1817 ret = -ENOSPC;
1818 goto out;
1819 }
1820
1821 ret = 0;
1822 out:
1823 if (iscsilun->iscsi != NULL) {
1824 iscsi_destroy_context(iscsilun->iscsi);
1825 }
1826 g_free(bs->opaque);
1827 bs->opaque = NULL;
1828 bdrv_unref(bs);
1829 return ret;
1830 }
1831
1832 static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
1833 {
1834 IscsiLun *iscsilun = bs->opaque;
1835 bdi->unallocated_blocks_are_zero = iscsilun->lbprz;
1836 bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
1837 bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
1838 return 0;
1839 }
1840
1841 static QemuOptsList iscsi_create_opts = {
1842 .name = "iscsi-create-opts",
1843 .head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
1844 .desc = {
1845 {
1846 .name = BLOCK_OPT_SIZE,
1847 .type = QEMU_OPT_SIZE,
1848 .help = "Virtual disk size"
1849 },
1850 { /* end of list */ }
1851 }
1852 };
1853
1854 static BlockDriver bdrv_iscsi = {
1855 .format_name = "iscsi",
1856 .protocol_name = "iscsi",
1857
1858 .instance_size = sizeof(IscsiLun),
1859 .bdrv_needs_filename = true,
1860 .bdrv_file_open = iscsi_open,
1861 .bdrv_close = iscsi_close,
1862 .bdrv_create = iscsi_create,
1863 .create_opts = &iscsi_create_opts,
1864 .bdrv_reopen_prepare = iscsi_reopen_prepare,
1865
1866 .bdrv_getlength = iscsi_getlength,
1867 .bdrv_get_info = iscsi_get_info,
1868 .bdrv_truncate = iscsi_truncate,
1869 .bdrv_refresh_limits = iscsi_refresh_limits,
1870
1871 .bdrv_co_get_block_status = iscsi_co_get_block_status,
1872 .bdrv_co_discard = iscsi_co_discard,
1873 .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
1874 .bdrv_co_readv = iscsi_co_readv,
1875 .bdrv_co_writev_flags = iscsi_co_writev_flags,
1876 .bdrv_co_flush_to_disk = iscsi_co_flush,
1877
1878 #ifdef __linux__
1879 .bdrv_aio_ioctl = iscsi_aio_ioctl,
1880 #endif
1881
1882 .bdrv_detach_aio_context = iscsi_detach_aio_context,
1883 .bdrv_attach_aio_context = iscsi_attach_aio_context,
1884 };
1885
1886 static QemuOptsList qemu_iscsi_opts = {
1887 .name = "iscsi",
1888 .head = QTAILQ_HEAD_INITIALIZER(qemu_iscsi_opts.head),
1889 .desc = {
1890 {
1891 .name = "user",
1892 .type = QEMU_OPT_STRING,
1893 .help = "username for CHAP authentication to target",
1894 },{
1895 .name = "password",
1896 .type = QEMU_OPT_STRING,
1897 .help = "password for CHAP authentication to target",
1898 },{
1899 .name = "password-secret",
1900 .type = QEMU_OPT_STRING,
1901 .help = "ID of the secret providing password for CHAP "
1902 "authentication to target",
1903 },{
1904 .name = "header-digest",
1905 .type = QEMU_OPT_STRING,
1906 .help = "HeaderDigest setting. "
1907 "{CRC32C|CRC32C-NONE|NONE-CRC32C|NONE}",
1908 },{
1909 .name = "initiator-name",
1910 .type = QEMU_OPT_STRING,
1911 .help = "Initiator iqn name to use when connecting",
1912 },{
1913 .name = "timeout",
1914 .type = QEMU_OPT_NUMBER,
1915 .help = "Request timeout in seconds (default 0 = no timeout)",
1916 },
1917 { /* end of list */ }
1918 },
1919 };
1920
1921 static void iscsi_block_init(void)
1922 {
1923 bdrv_register(&bdrv_iscsi);
1924 qemu_add_opts(&qemu_iscsi_opts);
1925 }
1926
1927 block_init(iscsi_block_init);