]> git.proxmox.com Git - mirror_qemu.git/blame - block/nbd.c
qemu-socket: Make socket_optslist public
[mirror_qemu.git] / block / nbd.c
CommitLineData
75818250
TS
1/*
2 * QEMU Block driver for NBD
3 *
4 * Copyright (C) 2008 Bull S.A.S.
bd5921b4 5 * Author: Laurent Vivier <Laurent.Vivier@bull.net>
75818250
TS
6 *
7 * Some parts:
8 * Copyright (C) 2007 Anthony Liguori <anthony@codemonkey.ws>
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
28
29#include "qemu-common.h"
737e150e 30#include "block/nbd.h"
1de7afc9 31#include "qemu/uri.h"
737e150e 32#include "block/block_int.h"
1de7afc9
PB
33#include "qemu/module.h"
34#include "qemu/sockets.h"
75818250
TS
35
36#include <sys/types.h>
37#include <unistd.h>
75818250 38
1d45f8b5
LV
39#define EN_OPTSTR ":exportname="
40
33897dc7
NT
41/* #define DEBUG_NBD */
42
43#if defined(DEBUG_NBD)
44#define logout(fmt, ...) \
45 fprintf(stderr, "nbd\t%-24s" fmt, __func__, ##__VA_ARGS__)
46#else
47#define logout(fmt, ...) ((void)0)
48#endif
49
ecda3447
PB
50#define MAX_NBD_REQUESTS 16
51#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
52#define INDEX_TO_HANDLE(bs, index) ((index) ^ ((uint64_t)(intptr_t)bs))
53
75818250
TS
54typedef struct BDRVNBDState {
55 int sock;
b90fb4b8 56 uint32_t nbdflags;
75818250
TS
57 off_t size;
58 size_t blocksize;
33897dc7 59
ecda3447
PB
60 CoMutex send_mutex;
61 CoMutex free_sema;
62 Coroutine *send_coroutine;
63 int in_flight;
ae255e52 64
ecda3447 65 Coroutine *recv_coroutine[MAX_NBD_REQUESTS];
ae255e52
PB
66 struct nbd_reply reply;
67
d04b0bbb 68 int is_unix;
33897dc7 69 char *host_spec;
d04b0bbb 70 char *export_name; /* An NBD server may export several devices */
75818250
TS
71} BDRVNBDState;
72
1d7d2a9d
PB
73static int nbd_parse_uri(BDRVNBDState *s, const char *filename)
74{
75 URI *uri;
76 const char *p;
77 QueryParams *qp = NULL;
78 int ret = 0;
79
80 uri = uri_parse(filename);
81 if (!uri) {
82 return -EINVAL;
83 }
84
85 /* transport */
86 if (!strcmp(uri->scheme, "nbd")) {
87 s->is_unix = false;
88 } else if (!strcmp(uri->scheme, "nbd+tcp")) {
89 s->is_unix = false;
90 } else if (!strcmp(uri->scheme, "nbd+unix")) {
91 s->is_unix = true;
92 } else {
93 ret = -EINVAL;
94 goto out;
95 }
96
97 p = uri->path ? uri->path : "/";
98 p += strspn(p, "/");
99 if (p[0]) {
100 s->export_name = g_strdup(p);
101 }
102
103 qp = query_params_parse(uri->query);
104 if (qp->n > 1 || (s->is_unix && !qp->n) || (!s->is_unix && qp->n)) {
105 ret = -EINVAL;
106 goto out;
107 }
108
109 if (s->is_unix) {
110 /* nbd+unix:///export?socket=path */
111 if (uri->server || uri->port || strcmp(qp->p[0].name, "socket")) {
112 ret = -EINVAL;
113 goto out;
114 }
115 s->host_spec = g_strdup(qp->p[0].value);
116 } else {
117 /* nbd[+tcp]://host:port/export */
118 if (!uri->server) {
119 ret = -EINVAL;
120 goto out;
121 }
122 if (!uri->port) {
123 uri->port = NBD_DEFAULT_PORT;
124 }
125 s->host_spec = g_strdup_printf("%s:%d", uri->server, uri->port);
126 }
127
128out:
129 if (qp) {
130 query_params_free(qp);
131 }
132 uri_free(uri);
133 return ret;
134}
135
d04b0bbb 136static int nbd_config(BDRVNBDState *s, const char *filename)
75818250 137{
1d45f8b5 138 char *file;
33897dc7
NT
139 char *export_name;
140 const char *host_spec;
75818250 141 const char *unixpath;
1d45f8b5 142 int err = -EINVAL;
75818250 143
1d7d2a9d
PB
144 if (strstr(filename, "://")) {
145 return nbd_parse_uri(s, filename);
146 }
147
7267c094 148 file = g_strdup(filename);
1d45f8b5 149
33897dc7
NT
150 export_name = strstr(file, EN_OPTSTR);
151 if (export_name) {
152 if (export_name[strlen(EN_OPTSTR)] == 0) {
1d45f8b5
LV
153 goto out;
154 }
33897dc7
NT
155 export_name[0] = 0; /* truncate 'file' */
156 export_name += strlen(EN_OPTSTR);
7267c094 157 s->export_name = g_strdup(export_name);
1d45f8b5
LV
158 }
159
33897dc7
NT
160 /* extract the host_spec - fail if it's not nbd:... */
161 if (!strstart(file, "nbd:", &host_spec)) {
1d45f8b5
LV
162 goto out;
163 }
75818250 164
33897dc7
NT
165 /* are we a UNIX or TCP socket? */
166 if (strstart(host_spec, "unix:", &unixpath)) {
d04b0bbb 167 s->is_unix = true;
7267c094 168 s->host_spec = g_strdup(unixpath);
75818250 169 } else {
d04b0bbb 170 s->is_unix = false;
7267c094 171 s->host_spec = g_strdup(host_spec);
33897dc7 172 }
75818250 173
33897dc7 174 err = 0;
75818250 175
33897dc7 176out:
7267c094 177 g_free(file);
33897dc7 178 if (err != 0) {
7267c094
AL
179 g_free(s->export_name);
180 g_free(s->host_spec);
33897dc7
NT
181 }
182 return err;
183}
1d45f8b5 184
ae255e52
PB
185static void nbd_coroutine_start(BDRVNBDState *s, struct nbd_request *request)
186{
ecda3447
PB
187 int i;
188
189 /* Poor man semaphore. The free_sema is locked when no other request
190 * can be accepted, and unlocked after receiving one reply. */
191 if (s->in_flight >= MAX_NBD_REQUESTS - 1) {
192 qemu_co_mutex_lock(&s->free_sema);
193 assert(s->in_flight < MAX_NBD_REQUESTS);
194 }
195 s->in_flight++;
196
197 for (i = 0; i < MAX_NBD_REQUESTS; i++) {
198 if (s->recv_coroutine[i] == NULL) {
199 s->recv_coroutine[i] = qemu_coroutine_self();
200 break;
201 }
202 }
203
204 assert(i < MAX_NBD_REQUESTS);
205 request->handle = INDEX_TO_HANDLE(s, i);
ae255e52
PB
206}
207
208static int nbd_have_request(void *opaque)
209{
210 BDRVNBDState *s = opaque;
211
ecda3447 212 return s->in_flight > 0;
ae255e52
PB
213}
214
215static void nbd_reply_ready(void *opaque)
216{
217 BDRVNBDState *s = opaque;
dd3e8ac4 218 uint64_t i;
7fe7b68b 219 int ret;
ae255e52
PB
220
221 if (s->reply.handle == 0) {
7fe7b68b
PB
222 /* No reply already in flight. Fetch a header. It is possible
223 * that another thread has done the same thing in parallel, so
224 * the socket is not readable anymore.
225 */
226 ret = nbd_receive_reply(s->sock, &s->reply);
227 if (ret == -EAGAIN) {
228 return;
229 }
230 if (ret < 0) {
ae255e52 231 s->reply.handle = 0;
ecda3447 232 goto fail;
ae255e52
PB
233 }
234 }
235
236 /* There's no need for a mutex on the receive side, because the
237 * handler acts as a synchronization point and ensures that only
238 * one coroutine is called until the reply finishes. */
ecda3447 239 i = HANDLE_TO_INDEX(s, s->reply.handle);
dd3e8ac4
PB
240 if (i >= MAX_NBD_REQUESTS) {
241 goto fail;
242 }
243
ecda3447
PB
244 if (s->recv_coroutine[i]) {
245 qemu_coroutine_enter(s->recv_coroutine[i], NULL);
246 return;
247 }
248
249fail:
250 for (i = 0; i < MAX_NBD_REQUESTS; i++) {
251 if (s->recv_coroutine[i]) {
252 qemu_coroutine_enter(s->recv_coroutine[i], NULL);
253 }
ae255e52
PB
254 }
255}
256
257static void nbd_restart_write(void *opaque)
258{
259 BDRVNBDState *s = opaque;
ecda3447 260 qemu_coroutine_enter(s->send_coroutine, NULL);
ae255e52
PB
261}
262
263static int nbd_co_send_request(BDRVNBDState *s, struct nbd_request *request,
2fc8ae1d 264 QEMUIOVector *qiov, int offset)
ae255e52
PB
265{
266 int rc, ret;
267
ecda3447
PB
268 qemu_co_mutex_lock(&s->send_mutex);
269 s->send_coroutine = qemu_coroutine_self();
ae255e52 270 qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, nbd_restart_write,
bafbd6a1 271 nbd_have_request, s);
ae255e52 272 rc = nbd_send_request(s->sock, request);
2fc8ae1d
MT
273 if (rc >= 0 && qiov) {
274 ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov,
275 offset, request->len);
ae255e52 276 if (ret != request->len) {
185b4338 277 return -EIO;
ae255e52
PB
278 }
279 }
280 qemu_aio_set_fd_handler(s->sock, nbd_reply_ready, NULL,
bafbd6a1 281 nbd_have_request, s);
ecda3447
PB
282 s->send_coroutine = NULL;
283 qemu_co_mutex_unlock(&s->send_mutex);
ae255e52
PB
284 return rc;
285}
286
287static void nbd_co_receive_reply(BDRVNBDState *s, struct nbd_request *request,
288 struct nbd_reply *reply,
2fc8ae1d 289 QEMUIOVector *qiov, int offset)
ae255e52
PB
290{
291 int ret;
292
ecda3447
PB
293 /* Wait until we're woken up by the read handler. TODO: perhaps
294 * peek at the next reply and avoid yielding if it's ours? */
ae255e52
PB
295 qemu_coroutine_yield();
296 *reply = s->reply;
297 if (reply->handle != request->handle) {
298 reply->error = EIO;
299 } else {
2fc8ae1d
MT
300 if (qiov && reply->error == 0) {
301 ret = qemu_co_recvv(s->sock, qiov->iov, qiov->niov,
302 offset, request->len);
ae255e52
PB
303 if (ret != request->len) {
304 reply->error = EIO;
305 }
306 }
307
308 /* Tell the read handler to read another header. */
309 s->reply.handle = 0;
310 }
311}
312
313static void nbd_coroutine_end(BDRVNBDState *s, struct nbd_request *request)
314{
ecda3447
PB
315 int i = HANDLE_TO_INDEX(s, request->handle);
316 s->recv_coroutine[i] = NULL;
317 if (s->in_flight-- == MAX_NBD_REQUESTS) {
318 qemu_co_mutex_unlock(&s->free_sema);
319 }
ae255e52
PB
320}
321
33897dc7
NT
322static int nbd_establish_connection(BlockDriverState *bs)
323{
324 BDRVNBDState *s = bs->opaque;
325 int sock;
326 int ret;
327 off_t size;
328 size_t blocksize;
75818250 329
d04b0bbb 330 if (s->is_unix) {
33897dc7
NT
331 sock = unix_socket_outgoing(s->host_spec);
332 } else {
333 sock = tcp_socket_outgoing_spec(s->host_spec);
75818250
TS
334 }
335
33897dc7 336 /* Failed to establish connection */
fc19f8a0 337 if (sock < 0) {
33897dc7
NT
338 logout("Failed to establish connection to NBD server\n");
339 return -errno;
1d45f8b5 340 }
75818250 341
33897dc7 342 /* NBD handshake */
b90fb4b8 343 ret = nbd_receive_negotiate(sock, s->export_name, &s->nbdflags, &size,
33897dc7 344 &blocksize);
fc19f8a0 345 if (ret < 0) {
33897dc7
NT
346 logout("Failed to negotiate with the NBD server\n");
347 closesocket(sock);
185b4338 348 return ret;
1d45f8b5 349 }
75818250 350
ae255e52
PB
351 /* Now that we're connected, set the socket to be non-blocking and
352 * kick the reply mechanism. */
33897dc7 353 socket_set_nonblock(sock);
b3adf53a 354 qemu_aio_set_fd_handler(sock, nbd_reply_ready, NULL,
bafbd6a1 355 nbd_have_request, s);
33897dc7 356
75818250
TS
357 s->sock = sock;
358 s->size = size;
359 s->blocksize = blocksize;
360
33897dc7
NT
361 logout("Established connection with NBD server\n");
362 return 0;
363}
364
365static void nbd_teardown_connection(BlockDriverState *bs)
366{
367 BDRVNBDState *s = bs->opaque;
368 struct nbd_request request;
369
370 request.type = NBD_CMD_DISC;
33897dc7
NT
371 request.from = 0;
372 request.len = 0;
373 nbd_send_request(s->sock, &request);
374
bafbd6a1 375 qemu_aio_set_fd_handler(s->sock, NULL, NULL, NULL, NULL);
33897dc7
NT
376 closesocket(s->sock);
377}
378
787e4a85
KW
379static int nbd_open(BlockDriverState *bs, const char* filename,
380 QDict *options, int flags)
33897dc7
NT
381{
382 BDRVNBDState *s = bs->opaque;
383 int result;
384
ecda3447
PB
385 qemu_co_mutex_init(&s->send_mutex);
386 qemu_co_mutex_init(&s->free_sema);
ae255e52 387
33897dc7 388 /* Pop the config into our state object. Exit if invalid. */
d04b0bbb 389 result = nbd_config(s, filename);
33897dc7
NT
390 if (result != 0) {
391 return result;
392 }
393
394 /* establish TCP connection, return error if it fails
395 * TODO: Configurable retry-until-timeout behaviour.
396 */
397 result = nbd_establish_connection(bs);
398
399 return result;
75818250
TS
400}
401
d9b09f13
PB
402static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
403 int nb_sectors, QEMUIOVector *qiov,
404 int offset)
75818250
TS
405{
406 BDRVNBDState *s = bs->opaque;
407 struct nbd_request request;
408 struct nbd_reply reply;
fc19f8a0 409 ssize_t ret;
75818250
TS
410
411 request.type = NBD_CMD_READ;
3a93113a 412 request.from = sector_num * 512;
75818250
TS
413 request.len = nb_sectors * 512;
414
ae255e52 415 nbd_coroutine_start(s, &request);
fc19f8a0
PB
416 ret = nbd_co_send_request(s, &request, NULL, 0);
417 if (ret < 0) {
185b4338 418 reply.error = -ret;
ae255e52 419 } else {
2fc8ae1d 420 nbd_co_receive_reply(s, &request, &reply, qiov, offset);
ae255e52
PB
421 }
422 nbd_coroutine_end(s, &request);
423 return -reply.error;
75818250 424
75818250
TS
425}
426
d9b09f13
PB
427static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
428 int nb_sectors, QEMUIOVector *qiov,
429 int offset)
75818250
TS
430{
431 BDRVNBDState *s = bs->opaque;
432 struct nbd_request request;
433 struct nbd_reply reply;
fc19f8a0 434 ssize_t ret;
75818250
TS
435
436 request.type = NBD_CMD_WRITE;
2c7989a9
PB
437 if (!bdrv_enable_write_cache(bs) && (s->nbdflags & NBD_FLAG_SEND_FUA)) {
438 request.type |= NBD_CMD_FLAG_FUA;
439 }
440
3a93113a 441 request.from = sector_num * 512;
75818250
TS
442 request.len = nb_sectors * 512;
443
ae255e52 444 nbd_coroutine_start(s, &request);
2fc8ae1d 445 ret = nbd_co_send_request(s, &request, qiov, offset);
fc19f8a0 446 if (ret < 0) {
185b4338 447 reply.error = -ret;
ae255e52
PB
448 } else {
449 nbd_co_receive_reply(s, &request, &reply, NULL, 0);
450 }
451 nbd_coroutine_end(s, &request);
452 return -reply.error;
e183ef75
PB
453}
454
d9b09f13
PB
455/* qemu-nbd has a limit of slightly less than 1M per request. Try to
456 * remain aligned to 4K. */
457#define NBD_MAX_SECTORS 2040
458
459static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
460 int nb_sectors, QEMUIOVector *qiov)
461{
462 int offset = 0;
463 int ret;
464 while (nb_sectors > NBD_MAX_SECTORS) {
465 ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
466 if (ret < 0) {
467 return ret;
468 }
469 offset += NBD_MAX_SECTORS * 512;
470 sector_num += NBD_MAX_SECTORS;
471 nb_sectors -= NBD_MAX_SECTORS;
472 }
473 return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset);
474}
475
476static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
477 int nb_sectors, QEMUIOVector *qiov)
478{
479 int offset = 0;
480 int ret;
481 while (nb_sectors > NBD_MAX_SECTORS) {
482 ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
483 if (ret < 0) {
484 return ret;
485 }
486 offset += NBD_MAX_SECTORS * 512;
487 sector_num += NBD_MAX_SECTORS;
488 nb_sectors -= NBD_MAX_SECTORS;
489 }
490 return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset);
491}
492
1486d04a
PB
493static int nbd_co_flush(BlockDriverState *bs)
494{
495 BDRVNBDState *s = bs->opaque;
496 struct nbd_request request;
497 struct nbd_reply reply;
fc19f8a0 498 ssize_t ret;
1486d04a
PB
499
500 if (!(s->nbdflags & NBD_FLAG_SEND_FLUSH)) {
501 return 0;
502 }
503
504 request.type = NBD_CMD_FLUSH;
505 if (s->nbdflags & NBD_FLAG_SEND_FUA) {
506 request.type |= NBD_CMD_FLAG_FUA;
507 }
508
509 request.from = 0;
510 request.len = 0;
511
512 nbd_coroutine_start(s, &request);
fc19f8a0
PB
513 ret = nbd_co_send_request(s, &request, NULL, 0);
514 if (ret < 0) {
185b4338 515 reply.error = -ret;
1486d04a
PB
516 } else {
517 nbd_co_receive_reply(s, &request, &reply, NULL, 0);
518 }
519 nbd_coroutine_end(s, &request);
520 return -reply.error;
521}
522
7a706633
PB
523static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,
524 int nb_sectors)
525{
526 BDRVNBDState *s = bs->opaque;
527 struct nbd_request request;
528 struct nbd_reply reply;
fc19f8a0 529 ssize_t ret;
7a706633
PB
530
531 if (!(s->nbdflags & NBD_FLAG_SEND_TRIM)) {
532 return 0;
533 }
534 request.type = NBD_CMD_TRIM;
535 request.from = sector_num * 512;;
536 request.len = nb_sectors * 512;
537
538 nbd_coroutine_start(s, &request);
fc19f8a0
PB
539 ret = nbd_co_send_request(s, &request, NULL, 0);
540 if (ret < 0) {
185b4338 541 reply.error = -ret;
7a706633
PB
542 } else {
543 nbd_co_receive_reply(s, &request, &reply, NULL, 0);
544 }
545 nbd_coroutine_end(s, &request);
546 return -reply.error;
547}
548
75818250
TS
549static void nbd_close(BlockDriverState *bs)
550{
d2d979c6 551 BDRVNBDState *s = bs->opaque;
7267c094
AL
552 g_free(s->export_name);
553 g_free(s->host_spec);
d2d979c6 554
33897dc7 555 nbd_teardown_connection(bs);
75818250
TS
556}
557
558static int64_t nbd_getlength(BlockDriverState *bs)
559{
560 BDRVNBDState *s = bs->opaque;
561
562 return s->size;
563}
564
5efa9d5a 565static BlockDriver bdrv_nbd = {
1486d04a 566 .format_name = "nbd",
1d7d2a9d
PB
567 .protocol_name = "nbd",
568 .instance_size = sizeof(BDRVNBDState),
569 .bdrv_file_open = nbd_open,
570 .bdrv_co_readv = nbd_co_readv,
571 .bdrv_co_writev = nbd_co_writev,
572 .bdrv_close = nbd_close,
573 .bdrv_co_flush_to_os = nbd_co_flush,
574 .bdrv_co_discard = nbd_co_discard,
575 .bdrv_getlength = nbd_getlength,
576};
577
578static BlockDriver bdrv_nbd_tcp = {
579 .format_name = "nbd",
580 .protocol_name = "nbd+tcp",
581 .instance_size = sizeof(BDRVNBDState),
582 .bdrv_file_open = nbd_open,
583 .bdrv_co_readv = nbd_co_readv,
584 .bdrv_co_writev = nbd_co_writev,
585 .bdrv_close = nbd_close,
586 .bdrv_co_flush_to_os = nbd_co_flush,
587 .bdrv_co_discard = nbd_co_discard,
588 .bdrv_getlength = nbd_getlength,
589};
590
591static BlockDriver bdrv_nbd_unix = {
592 .format_name = "nbd",
593 .protocol_name = "nbd+unix",
1486d04a
PB
594 .instance_size = sizeof(BDRVNBDState),
595 .bdrv_file_open = nbd_open,
596 .bdrv_co_readv = nbd_co_readv,
597 .bdrv_co_writev = nbd_co_writev,
598 .bdrv_close = nbd_close,
599 .bdrv_co_flush_to_os = nbd_co_flush,
7a706633 600 .bdrv_co_discard = nbd_co_discard,
1486d04a 601 .bdrv_getlength = nbd_getlength,
75818250 602};
5efa9d5a
AL
603
604static void bdrv_nbd_init(void)
605{
606 bdrv_register(&bdrv_nbd);
1d7d2a9d
PB
607 bdrv_register(&bdrv_nbd_tcp);
608 bdrv_register(&bdrv_nbd_unix);
5efa9d5a
AL
609}
610
611block_init(bdrv_nbd_init);