]> git.proxmox.com Git - mirror_qemu.git/blame - block/nbd-client.c
vfio: express vfio dependencies with Kconfig
[mirror_qemu.git] / block / nbd-client.c
CommitLineData
2302c1ca
MAL
1/*
2 * QEMU Block driver for NBD
3 *
b626b51a 4 * Copyright (C) 2016 Red Hat, Inc.
2302c1ca
MAL
5 * Copyright (C) 2008 Bull S.A.S.
6 * Author: Laurent Vivier <Laurent.Vivier@bull.net>
7 *
8 * Some parts:
9 * Copyright (C) 2007 Anthony Liguori <anthony@codemonkey.ws>
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a copy
12 * of this software and associated documentation files (the "Software"), to deal
13 * in the Software without restriction, including without limitation the rights
14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 * copies of the Software, and to permit persons to whom the Software is
16 * furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice shall be included in
19 * all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27 * THE SOFTWARE.
28 */
29
80c71a24 30#include "qemu/osdep.h"
d8b4bad8
VSO
31
32#include "trace.h"
be41c100 33#include "qapi/error.h"
2302c1ca 34#include "nbd-client.h"
2302c1ca 35
cfa3ad63
EB
36#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ (uint64_t)(intptr_t)(bs))
37#define INDEX_TO_HANDLE(bs, index) ((index) ^ (uint64_t)(intptr_t)(bs))
2302c1ca 38
07b1b99c 39static void nbd_recv_coroutines_wake_all(NBDClientSession *s)
69152c09
MAL
40{
41 int i;
42
43 for (i = 0; i < MAX_NBD_REQUESTS; i++) {
40f4a218
SH
44 NBDClientRequest *req = &s->requests[i];
45
46 if (req->coroutine && req->receiving) {
47 aio_co_wake(req->coroutine);
69152c09
MAL
48 }
49 }
50}
51
f53a829b 52static void nbd_teardown_connection(BlockDriverState *bs)
4a41a2d6 53{
10676b81 54 NBDClientSession *client = nbd_get_client_session(bs);
f53a829b 55
88ed4e1b 56 assert(client->ioc);
064097d9 57
4a41a2d6 58 /* finish any pending coroutines */
064097d9
DB
59 qio_channel_shutdown(client->ioc,
60 QIO_CHANNEL_SHUTDOWN_BOTH,
61 NULL);
bc5a0335 62 BDRV_POLL_WHILE(bs, client->connection_co);
4a41a2d6 63
f53a829b 64 nbd_client_detach_aio_context(bs);
064097d9
DB
65 object_unref(OBJECT(client->sioc));
66 client->sioc = NULL;
67 object_unref(OBJECT(client->ioc));
68 client->ioc = NULL;
4a41a2d6
SH
69}
70
bc5a0335 71static coroutine_fn void nbd_connection_entry(void *opaque)
2302c1ca 72{
ff82911c 73 NBDClientSession *s = opaque;
2302c1ca 74 uint64_t i;
d0a18013 75 int ret = 0;
be41c100 76 Error *local_err = NULL;
2302c1ca 77
72b6ffc7 78 while (!s->quit) {
5ad81b49
KW
79 /*
80 * The NBD client can only really be considered idle when it has
81 * yielded from qio_channel_readv_all_eof(), waiting for data. This is
82 * the point where the additional scheduled coroutine entry happens
83 * after nbd_client_attach_aio_context().
84 *
85 * Therefore we keep an additional in_flight reference all the time and
86 * only drop it temporarily here.
5ad81b49 87 */
ff82911c 88 assert(s->reply.handle == 0);
d3bd5b90 89 ret = nbd_receive_reply(s->bs, s->ioc, &s->reply, &local_err);
5ad81b49 90
08ace1d7 91 if (local_err) {
d8b4bad8
VSO
92 trace_nbd_read_reply_entry_fail(ret, error_get_pretty(local_err));
93 error_free(local_err);
be41c100 94 }
a12a712a 95 if (ret <= 0) {
ff82911c 96 break;
2302c1ca 97 }
2302c1ca 98
ff82911c
PB
99 /* There's no need for a mutex on the receive side, because the
100 * handler acts as a synchronization point and ensures that only
101 * one coroutine is called until the reply finishes.
102 */
103 i = HANDLE_TO_INDEX(s, s->reply.handle);
40f4a218
SH
104 if (i >= MAX_NBD_REQUESTS ||
105 !s->requests[i].coroutine ||
d2febedb 106 !s->requests[i].receiving ||
f140e300 107 (nbd_reply_is_structured(&s->reply) && !s->info.structured_reply))
d2febedb 108 {
ff82911c
PB
109 break;
110 }
2302c1ca 111
40f4a218 112 /* We're woken up again by the request itself. Note that there
bc5a0335 113 * is no race between yielding and reentering connection_co. This
ff82911c
PB
114 * is because:
115 *
40f4a218 116 * - if the request runs on the same AioContext, it is only
ff82911c
PB
117 * entered after we yield
118 *
40f4a218 119 * - if the request runs on a different AioContext, reentering
bc5a0335 120 * connection_co happens through a bottom half, which can only
ff82911c
PB
121 * run after we yield.
122 */
40f4a218 123 aio_co_wake(s->requests[i].coroutine);
ff82911c 124 qemu_coroutine_yield();
2302c1ca 125 }
a12a712a 126
40f4a218 127 s->quit = true;
07b1b99c 128 nbd_recv_coroutines_wake_all(s);
5ad81b49
KW
129 bdrv_dec_in_flight(s->bs);
130
bc5a0335 131 s->connection_co = NULL;
4720cbee 132 aio_wait_kick();
2302c1ca
MAL
133}
134
f53a829b 135static int nbd_co_send_request(BlockDriverState *bs,
ed2dd912 136 NBDRequest *request,
1e2a77a8 137 QEMUIOVector *qiov)
2302c1ca 138{
10676b81 139 NBDClientSession *s = nbd_get_client_session(bs);
030fa7f6 140 int rc, i;
2302c1ca
MAL
141
142 qemu_co_mutex_lock(&s->send_mutex);
6bdcc018
PB
143 while (s->in_flight == MAX_NBD_REQUESTS) {
144 qemu_co_queue_wait(&s->free_sema, &s->send_mutex);
145 }
146 s->in_flight++;
141cabe6
BW
147
148 for (i = 0; i < MAX_NBD_REQUESTS; i++) {
40f4a218 149 if (s->requests[i].coroutine == NULL) {
141cabe6
BW
150 break;
151 }
152 }
153
1c778ef7 154 g_assert(qemu_in_coroutine());
141cabe6 155 assert(i < MAX_NBD_REQUESTS);
40f4a218
SH
156
157 s->requests[i].coroutine = qemu_coroutine_self();
f140e300 158 s->requests[i].offset = request->from;
40f4a218
SH
159 s->requests[i].receiving = false;
160
141cabe6 161 request->handle = INDEX_TO_HANDLE(s, i);
064097d9 162
72b6ffc7 163 if (s->quit) {
3c2d5183
SH
164 rc = -EIO;
165 goto err;
72b6ffc7 166 }
88ed4e1b 167 assert(s->ioc);
064097d9 168
2302c1ca 169 if (qiov) {
064097d9 170 qio_channel_set_cork(s->ioc, true);
1c778ef7 171 rc = nbd_send_request(s->ioc, request);
72b6ffc7 172 if (rc >= 0 && !s->quit) {
030fa7f6
EB
173 if (qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov,
174 NULL) < 0) {
2302c1ca
MAL
175 rc = -EIO;
176 }
a6934370
VSO
177 } else if (rc >= 0) {
178 rc = -EIO;
2302c1ca 179 }
064097d9 180 qio_channel_set_cork(s->ioc, false);
2302c1ca 181 } else {
1c778ef7 182 rc = nbd_send_request(s->ioc, request);
2302c1ca 183 }
3c2d5183
SH
184
185err:
72b6ffc7
EB
186 if (rc < 0) {
187 s->quit = true;
3c2d5183
SH
188 s->requests[i].coroutine = NULL;
189 s->in_flight--;
190 qemu_co_queue_next(&s->free_sema);
72b6ffc7 191 }
2302c1ca
MAL
192 qemu_co_mutex_unlock(&s->send_mutex);
193 return rc;
194}
195
f140e300
VSO
196static inline uint16_t payload_advance16(uint8_t **payload)
197{
198 *payload += 2;
199 return lduw_be_p(*payload - 2);
200}
201
202static inline uint32_t payload_advance32(uint8_t **payload)
203{
204 *payload += 4;
205 return ldl_be_p(*payload - 4);
206}
207
208static inline uint64_t payload_advance64(uint8_t **payload)
209{
210 *payload += 8;
211 return ldq_be_p(*payload - 8);
212}
213
214static int nbd_parse_offset_hole_payload(NBDStructuredReplyChunk *chunk,
215 uint8_t *payload, uint64_t orig_offset,
216 QEMUIOVector *qiov, Error **errp)
217{
218 uint64_t offset;
219 uint32_t hole_size;
220
221 if (chunk->length != sizeof(offset) + sizeof(hole_size)) {
222 error_setg(errp, "Protocol error: invalid payload for "
223 "NBD_REPLY_TYPE_OFFSET_HOLE");
224 return -EINVAL;
225 }
226
227 offset = payload_advance64(&payload);
228 hole_size = payload_advance32(&payload);
229
b4176cb3 230 if (!hole_size || offset < orig_offset || hole_size > qiov->size ||
f140e300
VSO
231 offset > orig_offset + qiov->size - hole_size) {
232 error_setg(errp, "Protocol error: server sent chunk exceeding requested"
233 " region");
234 return -EINVAL;
235 }
236
237 qemu_iovec_memset(qiov, offset - orig_offset, 0, hole_size);
238
239 return 0;
240}
241
78a33ab5
VSO
242/* nbd_parse_blockstatus_payload
243 * support only one extent in reply and only for
244 * base:allocation context
245 */
246static int nbd_parse_blockstatus_payload(NBDClientSession *client,
247 NBDStructuredReplyChunk *chunk,
248 uint8_t *payload, uint64_t orig_length,
249 NBDExtent *extent, Error **errp)
250{
251 uint32_t context_id;
252
00d96a46 253 if (chunk->length != sizeof(context_id) + sizeof(*extent)) {
78a33ab5
VSO
254 error_setg(errp, "Protocol error: invalid payload for "
255 "NBD_REPLY_TYPE_BLOCK_STATUS");
256 return -EINVAL;
257 }
258
259 context_id = payload_advance32(&payload);
2df94eb5 260 if (client->info.context_id != context_id) {
78a33ab5
VSO
261 error_setg(errp, "Protocol error: unexpected context id %d for "
262 "NBD_REPLY_TYPE_BLOCK_STATUS, when negotiated context "
263 "id is %d", context_id,
2df94eb5 264 client->info.context_id);
78a33ab5
VSO
265 return -EINVAL;
266 }
267
268 extent->length = payload_advance32(&payload);
269 extent->flags = payload_advance32(&payload);
270
271 if (extent->length == 0 ||
272 (client->info.min_block && !QEMU_IS_ALIGNED(extent->length,
acfd8f7a 273 client->info.min_block))) {
78a33ab5
VSO
274 error_setg(errp, "Protocol error: server sent status chunk with "
275 "invalid length");
276 return -EINVAL;
277 }
278
acfd8f7a
EB
279 /* The server is allowed to send us extra information on the final
280 * extent; just clamp it to the length we requested. */
281 if (extent->length > orig_length) {
282 extent->length = orig_length;
283 }
284
78a33ab5
VSO
285 return 0;
286}
287
f140e300
VSO
288/* nbd_parse_error_payload
289 * on success @errp contains message describing nbd error reply
290 */
291static int nbd_parse_error_payload(NBDStructuredReplyChunk *chunk,
292 uint8_t *payload, int *request_ret,
293 Error **errp)
294{
295 uint32_t error;
296 uint16_t message_size;
297
298 assert(chunk->type & (1 << 15));
299
300 if (chunk->length < sizeof(error) + sizeof(message_size)) {
301 error_setg(errp,
302 "Protocol error: invalid payload for structured error");
303 return -EINVAL;
304 }
305
306 error = nbd_errno_to_system_errno(payload_advance32(&payload));
307 if (error == 0) {
e659fb3b 308 error_setg(errp, "Protocol error: server sent structured error chunk "
f140e300
VSO
309 "with error = 0");
310 return -EINVAL;
311 }
312
313 *request_ret = -error;
314 message_size = payload_advance16(&payload);
315
316 if (message_size > chunk->length - sizeof(error) - sizeof(message_size)) {
e659fb3b 317 error_setg(errp, "Protocol error: server sent structured error chunk "
f140e300
VSO
318 "with incorrect message size");
319 return -EINVAL;
320 }
321
322 /* TODO: Add a trace point to mention the server complaint */
323
324 /* TODO handle ERROR_OFFSET */
325
326 return 0;
327}
328
329static int nbd_co_receive_offset_data_payload(NBDClientSession *s,
330 uint64_t orig_offset,
331 QEMUIOVector *qiov, Error **errp)
332{
333 QEMUIOVector sub_qiov;
334 uint64_t offset;
335 size_t data_size;
336 int ret;
337 NBDStructuredReplyChunk *chunk = &s->reply.structured;
338
339 assert(nbd_reply_is_structured(&s->reply));
340
b4176cb3
EB
341 /* The NBD spec requires at least one byte of payload */
342 if (chunk->length <= sizeof(offset)) {
f140e300
VSO
343 error_setg(errp, "Protocol error: invalid payload for "
344 "NBD_REPLY_TYPE_OFFSET_DATA");
345 return -EINVAL;
346 }
347
e6798f06 348 if (nbd_read64(s->ioc, &offset, "OFFSET_DATA offset", errp) < 0) {
f140e300
VSO
349 return -EIO;
350 }
f140e300
VSO
351
352 data_size = chunk->length - sizeof(offset);
b4176cb3 353 assert(data_size);
f140e300
VSO
354 if (offset < orig_offset || data_size > qiov->size ||
355 offset > orig_offset + qiov->size - data_size) {
356 error_setg(errp, "Protocol error: server sent chunk exceeding requested"
357 " region");
358 return -EINVAL;
359 }
360
361 qemu_iovec_init(&sub_qiov, qiov->niov);
362 qemu_iovec_concat(&sub_qiov, qiov, offset - orig_offset, data_size);
363 ret = qio_channel_readv_all(s->ioc, sub_qiov.iov, sub_qiov.niov, errp);
364 qemu_iovec_destroy(&sub_qiov);
365
366 return ret < 0 ? -EIO : 0;
367}
368
369#define NBD_MAX_MALLOC_PAYLOAD 1000
370/* nbd_co_receive_structured_payload
371 */
372static coroutine_fn int nbd_co_receive_structured_payload(
373 NBDClientSession *s, void **payload, Error **errp)
374{
375 int ret;
376 uint32_t len;
377
378 assert(nbd_reply_is_structured(&s->reply));
379
380 len = s->reply.structured.length;
381
382 if (len == 0) {
383 return 0;
384 }
385
386 if (payload == NULL) {
387 error_setg(errp, "Unexpected structured payload");
388 return -EINVAL;
389 }
390
391 if (len > NBD_MAX_MALLOC_PAYLOAD) {
392 error_setg(errp, "Payload too large");
393 return -EINVAL;
394 }
395
396 *payload = g_new(char, len);
e6798f06 397 ret = nbd_read(s->ioc, *payload, len, "structured payload", errp);
f140e300
VSO
398 if (ret < 0) {
399 g_free(*payload);
400 *payload = NULL;
401 return ret;
402 }
403
404 return 0;
405}
406
407/* nbd_co_do_receive_one_chunk
408 * for simple reply:
409 * set request_ret to received reply error
410 * if qiov is not NULL: read payload to @qiov
411 * for structured reply chunk:
412 * if error chunk: read payload, set @request_ret, do not set @payload
413 * else if offset_data chunk: read payload data to @qiov, do not set @payload
414 * else: read payload to @payload
415 *
416 * If function fails, @errp contains corresponding error message, and the
417 * connection with the server is suspect. If it returns 0, then the
418 * transaction succeeded (although @request_ret may be a negative errno
419 * corresponding to the server's error reply), and errp is unchanged.
420 */
421static coroutine_fn int nbd_co_do_receive_one_chunk(
422 NBDClientSession *s, uint64_t handle, bool only_structured,
423 int *request_ret, QEMUIOVector *qiov, void **payload, Error **errp)
2302c1ca 424{
319a56cd 425 int ret;
ed397b2f 426 int i = HANDLE_TO_INDEX(s, handle);
f140e300
VSO
427 void *local_payload = NULL;
428 NBDStructuredReplyChunk *chunk;
429
430 if (payload) {
431 *payload = NULL;
432 }
433 *request_ret = 0;
2302c1ca 434
bc5a0335 435 /* Wait until we're woken up by nbd_connection_entry. */
40f4a218 436 s->requests[i].receiving = true;
2302c1ca 437 qemu_coroutine_yield();
40f4a218 438 s->requests[i].receiving = false;
88ed4e1b 439 if (s->quit) {
f140e300
VSO
440 error_setg(errp, "Connection closed");
441 return -EIO;
442 }
88ed4e1b 443 assert(s->ioc);
f140e300
VSO
444
445 assert(s->reply.handle == handle);
446
447 if (nbd_reply_is_simple(&s->reply)) {
448 if (only_structured) {
449 error_setg(errp, "Protocol error: simple reply when structured "
450 "reply chunk was expected");
451 return -EINVAL;
2302c1ca
MAL
452 }
453
f140e300
VSO
454 *request_ret = -nbd_errno_to_system_errno(s->reply.simple.error);
455 if (*request_ret < 0 || !qiov) {
456 return 0;
457 }
458
459 return qio_channel_readv_all(s->ioc, qiov->iov, qiov->niov,
460 errp) < 0 ? -EIO : 0;
461 }
462
463 /* handle structured reply chunk */
464 assert(s->info.structured_reply);
465 chunk = &s->reply.structured;
466
467 if (chunk->type == NBD_REPLY_TYPE_NONE) {
468 if (!(chunk->flags & NBD_REPLY_FLAG_DONE)) {
469 error_setg(errp, "Protocol error: NBD_REPLY_TYPE_NONE chunk without"
e659fb3b 470 " NBD_REPLY_FLAG_DONE flag set");
f140e300
VSO
471 return -EINVAL;
472 }
b4176cb3
EB
473 if (chunk->length) {
474 error_setg(errp, "Protocol error: NBD_REPLY_TYPE_NONE chunk with"
475 " nonzero length");
476 return -EINVAL;
477 }
f140e300
VSO
478 return 0;
479 }
480
481 if (chunk->type == NBD_REPLY_TYPE_OFFSET_DATA) {
482 if (!qiov) {
483 error_setg(errp, "Unexpected NBD_REPLY_TYPE_OFFSET_DATA chunk");
484 return -EINVAL;
485 }
486
487 return nbd_co_receive_offset_data_payload(s, s->requests[i].offset,
488 qiov, errp);
489 }
490
491 if (nbd_reply_type_is_error(chunk->type)) {
492 payload = &local_payload;
493 }
494
495 ret = nbd_co_receive_structured_payload(s, payload, errp);
496 if (ret < 0) {
497 return ret;
2302c1ca 498 }
ff82911c 499
f140e300
VSO
500 if (nbd_reply_type_is_error(chunk->type)) {
501 ret = nbd_parse_error_payload(chunk, local_payload, request_ret, errp);
502 g_free(local_payload);
503 return ret;
504 }
505
506 return 0;
507}
508
509/* nbd_co_receive_one_chunk
bc5a0335 510 * Read reply, wake up connection_co and set s->quit if needed.
f140e300
VSO
511 * Return value is a fatal error code or normal nbd reply error code
512 */
513static coroutine_fn int nbd_co_receive_one_chunk(
514 NBDClientSession *s, uint64_t handle, bool only_structured,
7f86068d
VSO
515 int *request_ret, QEMUIOVector *qiov, NBDReply *reply, void **payload,
516 Error **errp)
f140e300 517{
f140e300 518 int ret = nbd_co_do_receive_one_chunk(s, handle, only_structured,
7f86068d 519 request_ret, qiov, payload, errp);
f140e300
VSO
520
521 if (ret < 0) {
522 s->quit = true;
523 } else {
bc5a0335 524 /* For assert at loop start in nbd_connection_entry */
f140e300
VSO
525 if (reply) {
526 *reply = s->reply;
527 }
528 s->reply.handle = 0;
f140e300 529 }
ff82911c 530
bc5a0335
VSO
531 if (s->connection_co) {
532 aio_co_wake(s->connection_co);
2302c1ca 533 }
6bdcc018 534
f140e300
VSO
535 return ret;
536}
537
538typedef struct NBDReplyChunkIter {
539 int ret;
7f86068d 540 int request_ret;
f140e300
VSO
541 Error *err;
542 bool done, only_structured;
543} NBDReplyChunkIter;
544
7f86068d
VSO
545static void nbd_iter_channel_error(NBDReplyChunkIter *iter,
546 int ret, Error **local_err)
f140e300
VSO
547{
548 assert(ret < 0);
549
7f86068d 550 if (!iter->ret) {
f140e300
VSO
551 iter->ret = ret;
552 error_propagate(&iter->err, *local_err);
553 } else {
554 error_free(*local_err);
555 }
556
557 *local_err = NULL;
558}
559
7f86068d
VSO
560static void nbd_iter_request_error(NBDReplyChunkIter *iter, int ret)
561{
562 assert(ret < 0);
563
564 if (!iter->request_ret) {
565 iter->request_ret = ret;
566 }
567}
568
f140e300
VSO
569/* NBD_FOREACH_REPLY_CHUNK
570 */
571#define NBD_FOREACH_REPLY_CHUNK(s, iter, handle, structured, \
572 qiov, reply, payload) \
573 for (iter = (NBDReplyChunkIter) { .only_structured = structured }; \
574 nbd_reply_chunk_iter_receive(s, &iter, handle, qiov, reply, payload);)
575
576/* nbd_reply_chunk_iter_receive
577 */
578static bool nbd_reply_chunk_iter_receive(NBDClientSession *s,
579 NBDReplyChunkIter *iter,
580 uint64_t handle,
581 QEMUIOVector *qiov, NBDReply *reply,
582 void **payload)
583{
7f86068d 584 int ret, request_ret;
f140e300
VSO
585 NBDReply local_reply;
586 NBDStructuredReplyChunk *chunk;
587 Error *local_err = NULL;
588 if (s->quit) {
589 error_setg(&local_err, "Connection closed");
7f86068d 590 nbd_iter_channel_error(iter, -EIO, &local_err);
f140e300
VSO
591 goto break_loop;
592 }
593
594 if (iter->done) {
595 /* Previous iteration was last. */
596 goto break_loop;
597 }
598
599 if (reply == NULL) {
600 reply = &local_reply;
601 }
602
603 ret = nbd_co_receive_one_chunk(s, handle, iter->only_structured,
7f86068d
VSO
604 &request_ret, qiov, reply, payload,
605 &local_err);
f140e300 606 if (ret < 0) {
7f86068d
VSO
607 nbd_iter_channel_error(iter, ret, &local_err);
608 } else if (request_ret < 0) {
609 nbd_iter_request_error(iter, request_ret);
f140e300
VSO
610 }
611
612 /* Do not execute the body of NBD_FOREACH_REPLY_CHUNK for simple reply. */
65e01d47 613 if (nbd_reply_is_simple(reply) || s->quit) {
f140e300
VSO
614 goto break_loop;
615 }
616
617 chunk = &reply->structured;
618 iter->only_structured = true;
619
620 if (chunk->type == NBD_REPLY_TYPE_NONE) {
621 /* NBD_REPLY_FLAG_DONE is already checked in nbd_co_receive_one_chunk */
622 assert(chunk->flags & NBD_REPLY_FLAG_DONE);
623 goto break_loop;
624 }
625
626 if (chunk->flags & NBD_REPLY_FLAG_DONE) {
627 /* This iteration is last. */
628 iter->done = true;
629 }
630
631 /* Execute the loop body */
632 return true;
633
634break_loop:
635 s->requests[HANDLE_TO_INDEX(s, handle)].coroutine = NULL;
636
6bdcc018
PB
637 qemu_co_mutex_lock(&s->send_mutex);
638 s->in_flight--;
639 qemu_co_queue_next(&s->free_sema);
640 qemu_co_mutex_unlock(&s->send_mutex);
319a56cd 641
f140e300 642 return false;
2302c1ca
MAL
643}
644
f140e300 645static int nbd_co_receive_return_code(NBDClientSession *s, uint64_t handle,
7f86068d 646 int *request_ret, Error **errp)
f140e300
VSO
647{
648 NBDReplyChunkIter iter;
649
650 NBD_FOREACH_REPLY_CHUNK(s, iter, handle, false, NULL, NULL, NULL) {
651 /* nbd_reply_chunk_iter_receive does all the work */
652 }
653
654 error_propagate(errp, iter.err);
7f86068d 655 *request_ret = iter.request_ret;
f140e300
VSO
656 return iter.ret;
657}
658
659static int nbd_co_receive_cmdread_reply(NBDClientSession *s, uint64_t handle,
660 uint64_t offset, QEMUIOVector *qiov,
7f86068d 661 int *request_ret, Error **errp)
f140e300
VSO
662{
663 NBDReplyChunkIter iter;
664 NBDReply reply;
665 void *payload = NULL;
666 Error *local_err = NULL;
667
668 NBD_FOREACH_REPLY_CHUNK(s, iter, handle, s->info.structured_reply,
669 qiov, &reply, &payload)
670 {
671 int ret;
672 NBDStructuredReplyChunk *chunk = &reply.structured;
673
674 assert(nbd_reply_is_structured(&reply));
675
676 switch (chunk->type) {
677 case NBD_REPLY_TYPE_OFFSET_DATA:
678 /* special cased in nbd_co_receive_one_chunk, data is already
679 * in qiov */
680 break;
681 case NBD_REPLY_TYPE_OFFSET_HOLE:
682 ret = nbd_parse_offset_hole_payload(&reply.structured, payload,
683 offset, qiov, &local_err);
684 if (ret < 0) {
685 s->quit = true;
7f86068d 686 nbd_iter_channel_error(&iter, ret, &local_err);
f140e300
VSO
687 }
688 break;
689 default:
690 if (!nbd_reply_type_is_error(chunk->type)) {
691 /* not allowed reply type */
692 s->quit = true;
693 error_setg(&local_err,
694 "Unexpected reply type: %d (%s) for CMD_READ",
695 chunk->type, nbd_reply_type_lookup(chunk->type));
7f86068d 696 nbd_iter_channel_error(&iter, -EINVAL, &local_err);
f140e300
VSO
697 }
698 }
699
700 g_free(payload);
701 payload = NULL;
702 }
703
704 error_propagate(errp, iter.err);
7f86068d 705 *request_ret = iter.request_ret;
f140e300
VSO
706 return iter.ret;
707}
708
78a33ab5
VSO
709static int nbd_co_receive_blockstatus_reply(NBDClientSession *s,
710 uint64_t handle, uint64_t length,
7f86068d
VSO
711 NBDExtent *extent,
712 int *request_ret, Error **errp)
78a33ab5
VSO
713{
714 NBDReplyChunkIter iter;
715 NBDReply reply;
716 void *payload = NULL;
717 Error *local_err = NULL;
718 bool received = false;
719
720 assert(!extent->length);
721 NBD_FOREACH_REPLY_CHUNK(s, iter, handle, s->info.structured_reply,
722 NULL, &reply, &payload)
723 {
724 int ret;
725 NBDStructuredReplyChunk *chunk = &reply.structured;
726
727 assert(nbd_reply_is_structured(&reply));
728
729 switch (chunk->type) {
730 case NBD_REPLY_TYPE_BLOCK_STATUS:
731 if (received) {
732 s->quit = true;
733 error_setg(&local_err, "Several BLOCK_STATUS chunks in reply");
7f86068d 734 nbd_iter_channel_error(&iter, -EINVAL, &local_err);
78a33ab5
VSO
735 }
736 received = true;
737
738 ret = nbd_parse_blockstatus_payload(s, &reply.structured,
739 payload, length, extent,
740 &local_err);
741 if (ret < 0) {
742 s->quit = true;
7f86068d 743 nbd_iter_channel_error(&iter, ret, &local_err);
78a33ab5
VSO
744 }
745 break;
746 default:
747 if (!nbd_reply_type_is_error(chunk->type)) {
748 s->quit = true;
749 error_setg(&local_err,
750 "Unexpected reply type: %d (%s) "
751 "for CMD_BLOCK_STATUS",
752 chunk->type, nbd_reply_type_lookup(chunk->type));
7f86068d 753 nbd_iter_channel_error(&iter, -EINVAL, &local_err);
78a33ab5
VSO
754 }
755 }
756
757 g_free(payload);
758 payload = NULL;
759 }
760
761 if (!extent->length && !iter.err) {
762 error_setg(&iter.err,
763 "Server did not reply with any status extents");
764 if (!iter.ret) {
765 iter.ret = -EIO;
766 }
767 }
7f86068d 768
78a33ab5 769 error_propagate(errp, iter.err);
7f86068d 770 *request_ret = iter.request_ret;
78a33ab5
VSO
771 return iter.ret;
772}
773
f140e300
VSO
774static int nbd_co_request(BlockDriverState *bs, NBDRequest *request,
775 QEMUIOVector *write_qiov)
f35dff7e 776{
7f86068d 777 int ret, request_ret;
f140e300
VSO
778 Error *local_err = NULL;
779 NBDClientSession *client = nbd_get_client_session(bs);
f35dff7e 780
f140e300
VSO
781 assert(request->type != NBD_CMD_READ);
782 if (write_qiov) {
783 assert(request->type == NBD_CMD_WRITE);
784 assert(request->len == iov_size(write_qiov->iov, write_qiov->niov));
4bfe4478 785 } else {
f140e300 786 assert(request->type != NBD_CMD_WRITE);
4bfe4478 787 }
f140e300 788 ret = nbd_co_send_request(bs, request, write_qiov);
f35dff7e 789 if (ret < 0) {
319a56cd 790 return ret;
f35dff7e 791 }
319a56cd 792
7f86068d
VSO
793 ret = nbd_co_receive_return_code(client, request->handle,
794 &request_ret, &local_err);
f140e300 795 if (local_err) {
d8b4bad8
VSO
796 trace_nbd_co_request_fail(request->from, request->len, request->handle,
797 request->flags, request->type,
798 nbd_cmd_lookup(request->type),
799 ret, error_get_pretty(local_err));
800 error_free(local_err);
f140e300 801 }
7f86068d 802 return ret ? ret : request_ret;
f35dff7e
VSO
803}
804
70c4fb26
EB
805int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
806 uint64_t bytes, QEMUIOVector *qiov, int flags)
2302c1ca 807{
7f86068d 808 int ret, request_ret;
f140e300
VSO
809 Error *local_err = NULL;
810 NBDClientSession *client = nbd_get_client_session(bs);
ed2dd912 811 NBDRequest request = {
70c4fb26
EB
812 .type = NBD_CMD_READ,
813 .from = offset,
814 .len = bytes,
815 };
2302c1ca 816
70c4fb26
EB
817 assert(bytes <= NBD_MAX_BUFFER_SIZE);
818 assert(!flags);
2302c1ca 819
9d8f818c
EB
820 if (!bytes) {
821 return 0;
822 }
f140e300
VSO
823 ret = nbd_co_send_request(bs, &request, NULL);
824 if (ret < 0) {
825 return ret;
826 }
827
828 ret = nbd_co_receive_cmdread_reply(client, request.handle, offset, qiov,
7f86068d 829 &request_ret, &local_err);
08ace1d7 830 if (local_err) {
d8b4bad8
VSO
831 trace_nbd_co_request_fail(request.from, request.len, request.handle,
832 request.flags, request.type,
833 nbd_cmd_lookup(request.type),
834 ret, error_get_pretty(local_err));
835 error_free(local_err);
f140e300 836 }
7f86068d 837 return ret ? ret : request_ret;
2302c1ca
MAL
838}
839
70c4fb26
EB
840int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
841 uint64_t bytes, QEMUIOVector *qiov, int flags)
2302c1ca 842{
10676b81 843 NBDClientSession *client = nbd_get_client_session(bs);
ed2dd912 844 NBDRequest request = {
70c4fb26
EB
845 .type = NBD_CMD_WRITE,
846 .from = offset,
847 .len = bytes,
848 };
2302c1ca 849
1104d83c 850 assert(!(client->info.flags & NBD_FLAG_READ_ONLY));
52a46505 851 if (flags & BDRV_REQ_FUA) {
004a89fc 852 assert(client->info.flags & NBD_FLAG_SEND_FUA);
b626b51a 853 request.flags |= NBD_CMD_FLAG_FUA;
2302c1ca
MAL
854 }
855
70c4fb26 856 assert(bytes <= NBD_MAX_BUFFER_SIZE);
2302c1ca 857
9d8f818c
EB
858 if (!bytes) {
859 return 0;
860 }
f35dff7e 861 return nbd_co_request(bs, &request, qiov);
2302c1ca
MAL
862}
863
fa778fff 864int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
f5a5ca79 865 int bytes, BdrvRequestFlags flags)
fa778fff 866{
fa778fff
EB
867 NBDClientSession *client = nbd_get_client_session(bs);
868 NBDRequest request = {
869 .type = NBD_CMD_WRITE_ZEROES,
870 .from = offset,
f5a5ca79 871 .len = bytes,
fa778fff 872 };
fa778fff 873
1104d83c 874 assert(!(client->info.flags & NBD_FLAG_READ_ONLY));
004a89fc 875 if (!(client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES)) {
fa778fff
EB
876 return -ENOTSUP;
877 }
878
879 if (flags & BDRV_REQ_FUA) {
004a89fc 880 assert(client->info.flags & NBD_FLAG_SEND_FUA);
fa778fff
EB
881 request.flags |= NBD_CMD_FLAG_FUA;
882 }
883 if (!(flags & BDRV_REQ_MAY_UNMAP)) {
884 request.flags |= NBD_CMD_FLAG_NO_HOLE;
885 }
886
9d8f818c
EB
887 if (!bytes) {
888 return 0;
889 }
f35dff7e 890 return nbd_co_request(bs, &request, NULL);
fa778fff
EB
891}
892
f53a829b 893int nbd_client_co_flush(BlockDriverState *bs)
2302c1ca 894{
10676b81 895 NBDClientSession *client = nbd_get_client_session(bs);
ed2dd912 896 NBDRequest request = { .type = NBD_CMD_FLUSH };
2302c1ca 897
004a89fc 898 if (!(client->info.flags & NBD_FLAG_SEND_FLUSH)) {
2302c1ca
MAL
899 return 0;
900 }
901
2302c1ca
MAL
902 request.from = 0;
903 request.len = 0;
904
f35dff7e 905 return nbd_co_request(bs, &request, NULL);
2302c1ca
MAL
906}
907
f5a5ca79 908int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
2302c1ca 909{
10676b81 910 NBDClientSession *client = nbd_get_client_session(bs);
ed2dd912 911 NBDRequest request = {
447e57c3
EB
912 .type = NBD_CMD_TRIM,
913 .from = offset,
f5a5ca79 914 .len = bytes,
447e57c3 915 };
2302c1ca 916
1104d83c 917 assert(!(client->info.flags & NBD_FLAG_READ_ONLY));
9d8f818c 918 if (!(client->info.flags & NBD_FLAG_SEND_TRIM) || !bytes) {
2302c1ca
MAL
919 return 0;
920 }
2302c1ca 921
f35dff7e 922 return nbd_co_request(bs, &request, NULL);
2302c1ca
MAL
923}
924
78a33ab5
VSO
925int coroutine_fn nbd_client_co_block_status(BlockDriverState *bs,
926 bool want_zero,
927 int64_t offset, int64_t bytes,
928 int64_t *pnum, int64_t *map,
929 BlockDriverState **file)
930{
7f86068d 931 int ret, request_ret;
78a33ab5
VSO
932 NBDExtent extent = { 0 };
933 NBDClientSession *client = nbd_get_client_session(bs);
934 Error *local_err = NULL;
935
936 NBDRequest request = {
937 .type = NBD_CMD_BLOCK_STATUS,
938 .from = offset,
939 .len = MIN(MIN_NON_ZERO(QEMU_ALIGN_DOWN(INT_MAX,
940 bs->bl.request_alignment),
941 client->info.max_block), bytes),
942 .flags = NBD_CMD_FLAG_REQ_ONE,
943 };
944
945 if (!client->info.base_allocation) {
946 *pnum = bytes;
947 return BDRV_BLOCK_DATA;
948 }
949
950 ret = nbd_co_send_request(bs, &request, NULL);
951 if (ret < 0) {
952 return ret;
953 }
954
955 ret = nbd_co_receive_blockstatus_reply(client, request.handle, bytes,
7f86068d 956 &extent, &request_ret, &local_err);
78a33ab5 957 if (local_err) {
d8b4bad8
VSO
958 trace_nbd_co_request_fail(request.from, request.len, request.handle,
959 request.flags, request.type,
960 nbd_cmd_lookup(request.type),
961 ret, error_get_pretty(local_err));
962 error_free(local_err);
78a33ab5 963 }
7f86068d
VSO
964 if (ret < 0 || request_ret < 0) {
965 return ret ? ret : request_ret;
78a33ab5
VSO
966 }
967
968 assert(extent.length);
969 *pnum = extent.length;
970 return (extent.flags & NBD_STATE_HOLE ? 0 : BDRV_BLOCK_DATA) |
971 (extent.flags & NBD_STATE_ZERO ? BDRV_BLOCK_ZERO : 0);
972}
973
f53a829b 974void nbd_client_detach_aio_context(BlockDriverState *bs)
69447cd8 975{
ff82911c 976 NBDClientSession *client = nbd_get_client_session(bs);
96d06835 977 qio_channel_detach_aio_context(QIO_CHANNEL(client->ioc));
69447cd8
SH
978}
979
28e0b2d2
KW
980static void nbd_client_attach_aio_context_bh(void *opaque)
981{
982 BlockDriverState *bs = opaque;
983 NBDClientSession *client = nbd_get_client_session(bs);
984
985 /* The node is still drained, so we know the coroutine has yielded in
986 * nbd_read_eof(), the only place where bs->in_flight can reach 0, or it is
987 * entered for the first time. Both places are safe for entering the
988 * coroutine.*/
989 qemu_aio_coroutine_enter(bs->aio_context, client->connection_co);
990 bdrv_dec_in_flight(bs);
991}
992
f53a829b
HR
993void nbd_client_attach_aio_context(BlockDriverState *bs,
994 AioContext *new_context)
69447cd8 995{
ff82911c 996 NBDClientSession *client = nbd_get_client_session(bs);
96d06835 997 qio_channel_attach_aio_context(QIO_CHANNEL(client->ioc), new_context);
5ad81b49 998
28e0b2d2
KW
999 bdrv_inc_in_flight(bs);
1000
1001 /* Need to wait here for the BH to run because the BH must run while the
1002 * node is still drained. */
1003 aio_wait_bh_oneshot(new_context, nbd_client_attach_aio_context_bh, bs);
69447cd8
SH
1004}
1005
f53a829b 1006void nbd_client_close(BlockDriverState *bs)
2302c1ca 1007{
10676b81 1008 NBDClientSession *client = nbd_get_client_session(bs);
ed2dd912 1009 NBDRequest request = { .type = NBD_CMD_DISC };
2302c1ca 1010
88ed4e1b 1011 assert(client->ioc);
4a41a2d6 1012
1c778ef7 1013 nbd_send_request(client->ioc, &request);
5ad283eb 1014
f53a829b 1015 nbd_teardown_connection(bs);
2302c1ca
MAL
1016}
1017
d42f78e9
VSO
1018static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
1019 Error **errp)
1020{
1021 QIOChannelSocket *sioc;
1022 Error *local_err = NULL;
1023
1024 sioc = qio_channel_socket_new();
1025 qio_channel_set_name(QIO_CHANNEL(sioc), "nbd-client");
1026
1027 qio_channel_socket_connect_sync(sioc, saddr, &local_err);
1028 if (local_err) {
1029 object_unref(OBJECT(sioc));
1030 error_propagate(errp, local_err);
1031 return NULL;
1032 }
1033
1034 qio_channel_set_delay(QIO_CHANNEL(sioc), false);
1035
1036 return sioc;
1037}
1038
b0e4b5a5
VSO
1039static int nbd_client_connect(BlockDriverState *bs,
1040 SocketAddress *saddr,
1041 const char *export,
1042 QCryptoTLSCreds *tlscreds,
1043 const char *hostname,
1044 const char *x_dirty_bitmap,
1045 Error **errp)
2302c1ca 1046{
10676b81 1047 NBDClientSession *client = nbd_get_client_session(bs);
2302c1ca
MAL
1048 int ret;
1049
d42f78e9
VSO
1050 /*
1051 * establish TCP connection, return error if it fails
1052 * TODO: Configurable retry-until-timeout behaviour.
1053 */
1054 QIOChannelSocket *sioc = nbd_establish_connection(saddr, errp);
1055
1056 if (!sioc) {
1057 return -ECONNREFUSED;
1058 }
1059
2302c1ca 1060 /* NBD handshake */
e2bc625f 1061 logout("session init %s\n", export);
064097d9
DB
1062 qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
1063
081dd1fe 1064 client->info.request_sizes = true;
f140e300 1065 client->info.structured_reply = true;
78a33ab5 1066 client->info.base_allocation = true;
216ee365 1067 client->info.x_dirty_bitmap = g_strdup(x_dirty_bitmap);
6dc1667d
EB
1068 client->info.name = g_strdup(export ?: "");
1069 ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), tlscreds, hostname,
004a89fc 1070 &client->ioc, &client->info, errp);
216ee365 1071 g_free(client->info.x_dirty_bitmap);
6dc1667d 1072 g_free(client->info.name);
2302c1ca
MAL
1073 if (ret < 0) {
1074 logout("Failed to negotiate with the NBD server\n");
d42f78e9 1075 object_unref(OBJECT(sioc));
2302c1ca
MAL
1076 return ret;
1077 }
47829c40
EB
1078 if (x_dirty_bitmap && !client->info.base_allocation) {
1079 error_setg(errp, "requested x-dirty-bitmap %s not found",
1080 x_dirty_bitmap);
c688e6ca
EB
1081 ret = -EINVAL;
1082 goto fail;
47829c40 1083 }
6c2e581d
KW
1084 if (client->info.flags & NBD_FLAG_READ_ONLY) {
1085 ret = bdrv_apply_auto_read_only(bs, "NBD export is read-only", errp);
1086 if (ret < 0) {
c688e6ca 1087 goto fail;
6c2e581d 1088 }
1104d83c 1089 }
004a89fc 1090 if (client->info.flags & NBD_FLAG_SEND_FUA) {
4df863f3 1091 bs->supported_write_flags = BDRV_REQ_FUA;
169407e1
EB
1092 bs->supported_zero_flags |= BDRV_REQ_FUA;
1093 }
004a89fc 1094 if (client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) {
169407e1 1095 bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
4df863f3 1096 }
2302c1ca 1097
064097d9 1098 client->sioc = sioc;
f95910fe
DB
1099
1100 if (!client->ioc) {
1101 client->ioc = QIO_CHANNEL(sioc);
1102 object_ref(OBJECT(client->ioc));
1103 }
2302c1ca
MAL
1104
1105 /* Now that we're connected, set the socket to be non-blocking and
1106 * kick the reply mechanism. */
064097d9 1107 qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
bc5a0335 1108 client->connection_co = qemu_coroutine_create(nbd_connection_entry, client);
5ad81b49 1109 bdrv_inc_in_flight(bs);
f53a829b 1110 nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
2302c1ca
MAL
1111
1112 logout("Established connection with NBD server\n");
1113 return 0;
c688e6ca
EB
1114
1115 fail:
1116 /*
1117 * We have connected, but must fail for other reasons. The
1118 * connection is still blocking; send NBD_CMD_DISC as a courtesy
1119 * to the server.
1120 */
1121 {
1122 NBDRequest request = { .type = NBD_CMD_DISC };
1123
1124 nbd_send_request(client->ioc ?: QIO_CHANNEL(sioc), &request);
d42f78e9
VSO
1125
1126 object_unref(OBJECT(sioc));
1127
c688e6ca
EB
1128 return ret;
1129 }
2302c1ca 1130}
b0e4b5a5
VSO
1131
1132int nbd_client_init(BlockDriverState *bs,
1133 SocketAddress *saddr,
1134 const char *export,
1135 QCryptoTLSCreds *tlscreds,
1136 const char *hostname,
1137 const char *x_dirty_bitmap,
1138 Error **errp)
1139{
1140 NBDClientSession *client = nbd_get_client_session(bs);
1141
5ad81b49 1142 client->bs = bs;
b0e4b5a5
VSO
1143 qemu_co_mutex_init(&client->send_mutex);
1144 qemu_co_queue_init(&client->free_sema);
1145
1146 return nbd_client_connect(bs, saddr, export, tlscreds, hostname,
1147 x_dirty_bitmap, errp);
1148}