]> git.proxmox.com Git - mirror_qemu.git/blame - block/nbd-client.c
block/nbd-client: don't check ioc
[mirror_qemu.git] / block / nbd-client.c
CommitLineData
2302c1ca
MAL
1/*
2 * QEMU Block driver for NBD
3 *
b626b51a 4 * Copyright (C) 2016 Red Hat, Inc.
2302c1ca
MAL
5 * Copyright (C) 2008 Bull S.A.S.
6 * Author: Laurent Vivier <Laurent.Vivier@bull.net>
7 *
8 * Some parts:
9 * Copyright (C) 2007 Anthony Liguori <anthony@codemonkey.ws>
10 *
11 * Permission is hereby granted, free of charge, to any person obtaining a copy
12 * of this software and associated documentation files (the "Software"), to deal
13 * in the Software without restriction, including without limitation the rights
14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15 * copies of the Software, and to permit persons to whom the Software is
16 * furnished to do so, subject to the following conditions:
17 *
18 * The above copyright notice and this permission notice shall be included in
19 * all copies or substantial portions of the Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
24 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27 * THE SOFTWARE.
28 */
29
80c71a24 30#include "qemu/osdep.h"
d8b4bad8
VSO
31
32#include "trace.h"
be41c100 33#include "qapi/error.h"
2302c1ca 34#include "nbd-client.h"
2302c1ca 35
cfa3ad63
EB
36#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ (uint64_t)(intptr_t)(bs))
37#define INDEX_TO_HANDLE(bs, index) ((index) ^ (uint64_t)(intptr_t)(bs))
2302c1ca 38
07b1b99c 39static void nbd_recv_coroutines_wake_all(NBDClientSession *s)
69152c09
MAL
40{
41 int i;
42
43 for (i = 0; i < MAX_NBD_REQUESTS; i++) {
40f4a218
SH
44 NBDClientRequest *req = &s->requests[i];
45
46 if (req->coroutine && req->receiving) {
47 aio_co_wake(req->coroutine);
69152c09
MAL
48 }
49 }
50}
51
f53a829b 52static void nbd_teardown_connection(BlockDriverState *bs)
4a41a2d6 53{
10676b81 54 NBDClientSession *client = nbd_get_client_session(bs);
f53a829b 55
88ed4e1b 56 assert(client->ioc);
064097d9 57
4a41a2d6 58 /* finish any pending coroutines */
064097d9
DB
59 qio_channel_shutdown(client->ioc,
60 QIO_CHANNEL_SHUTDOWN_BOTH,
61 NULL);
a12a712a 62 BDRV_POLL_WHILE(bs, client->read_reply_co);
4a41a2d6 63
f53a829b 64 nbd_client_detach_aio_context(bs);
064097d9
DB
65 object_unref(OBJECT(client->sioc));
66 client->sioc = NULL;
67 object_unref(OBJECT(client->ioc));
68 client->ioc = NULL;
4a41a2d6
SH
69}
70
ff82911c 71static coroutine_fn void nbd_read_reply_entry(void *opaque)
2302c1ca 72{
ff82911c 73 NBDClientSession *s = opaque;
2302c1ca 74 uint64_t i;
d0a18013 75 int ret = 0;
be41c100 76 Error *local_err = NULL;
2302c1ca 77
72b6ffc7 78 while (!s->quit) {
ff82911c 79 assert(s->reply.handle == 0);
be41c100 80 ret = nbd_receive_reply(s->ioc, &s->reply, &local_err);
08ace1d7 81 if (local_err) {
d8b4bad8
VSO
82 trace_nbd_read_reply_entry_fail(ret, error_get_pretty(local_err));
83 error_free(local_err);
be41c100 84 }
a12a712a 85 if (ret <= 0) {
ff82911c 86 break;
2302c1ca 87 }
2302c1ca 88
ff82911c
PB
89 /* There's no need for a mutex on the receive side, because the
90 * handler acts as a synchronization point and ensures that only
91 * one coroutine is called until the reply finishes.
92 */
93 i = HANDLE_TO_INDEX(s, s->reply.handle);
40f4a218
SH
94 if (i >= MAX_NBD_REQUESTS ||
95 !s->requests[i].coroutine ||
d2febedb 96 !s->requests[i].receiving ||
f140e300 97 (nbd_reply_is_structured(&s->reply) && !s->info.structured_reply))
d2febedb 98 {
ff82911c
PB
99 break;
100 }
2302c1ca 101
40f4a218 102 /* We're woken up again by the request itself. Note that there
ff82911c
PB
103 * is no race between yielding and reentering read_reply_co. This
104 * is because:
105 *
40f4a218 106 * - if the request runs on the same AioContext, it is only
ff82911c
PB
107 * entered after we yield
108 *
40f4a218 109 * - if the request runs on a different AioContext, reentering
ff82911c
PB
110 * read_reply_co happens through a bottom half, which can only
111 * run after we yield.
112 */
40f4a218 113 aio_co_wake(s->requests[i].coroutine);
ff82911c 114 qemu_coroutine_yield();
2302c1ca 115 }
a12a712a 116
40f4a218 117 s->quit = true;
07b1b99c 118 nbd_recv_coroutines_wake_all(s);
ff82911c 119 s->read_reply_co = NULL;
4720cbee 120 aio_wait_kick();
2302c1ca
MAL
121}
122
f53a829b 123static int nbd_co_send_request(BlockDriverState *bs,
ed2dd912 124 NBDRequest *request,
1e2a77a8 125 QEMUIOVector *qiov)
2302c1ca 126{
10676b81 127 NBDClientSession *s = nbd_get_client_session(bs);
030fa7f6 128 int rc, i;
2302c1ca
MAL
129
130 qemu_co_mutex_lock(&s->send_mutex);
6bdcc018
PB
131 while (s->in_flight == MAX_NBD_REQUESTS) {
132 qemu_co_queue_wait(&s->free_sema, &s->send_mutex);
133 }
134 s->in_flight++;
141cabe6
BW
135
136 for (i = 0; i < MAX_NBD_REQUESTS; i++) {
40f4a218 137 if (s->requests[i].coroutine == NULL) {
141cabe6
BW
138 break;
139 }
140 }
141
1c778ef7 142 g_assert(qemu_in_coroutine());
141cabe6 143 assert(i < MAX_NBD_REQUESTS);
40f4a218
SH
144
145 s->requests[i].coroutine = qemu_coroutine_self();
f140e300 146 s->requests[i].offset = request->from;
40f4a218
SH
147 s->requests[i].receiving = false;
148
141cabe6 149 request->handle = INDEX_TO_HANDLE(s, i);
064097d9 150
72b6ffc7 151 if (s->quit) {
3c2d5183
SH
152 rc = -EIO;
153 goto err;
72b6ffc7 154 }
88ed4e1b 155 assert(s->ioc);
064097d9 156
2302c1ca 157 if (qiov) {
064097d9 158 qio_channel_set_cork(s->ioc, true);
1c778ef7 159 rc = nbd_send_request(s->ioc, request);
72b6ffc7 160 if (rc >= 0 && !s->quit) {
030fa7f6
EB
161 if (qio_channel_writev_all(s->ioc, qiov->iov, qiov->niov,
162 NULL) < 0) {
2302c1ca
MAL
163 rc = -EIO;
164 }
a6934370
VSO
165 } else if (rc >= 0) {
166 rc = -EIO;
2302c1ca 167 }
064097d9 168 qio_channel_set_cork(s->ioc, false);
2302c1ca 169 } else {
1c778ef7 170 rc = nbd_send_request(s->ioc, request);
2302c1ca 171 }
3c2d5183
SH
172
173err:
72b6ffc7
EB
174 if (rc < 0) {
175 s->quit = true;
3c2d5183
SH
176 s->requests[i].coroutine = NULL;
177 s->in_flight--;
178 qemu_co_queue_next(&s->free_sema);
72b6ffc7 179 }
2302c1ca
MAL
180 qemu_co_mutex_unlock(&s->send_mutex);
181 return rc;
182}
183
f140e300
VSO
184static inline uint16_t payload_advance16(uint8_t **payload)
185{
186 *payload += 2;
187 return lduw_be_p(*payload - 2);
188}
189
190static inline uint32_t payload_advance32(uint8_t **payload)
191{
192 *payload += 4;
193 return ldl_be_p(*payload - 4);
194}
195
196static inline uint64_t payload_advance64(uint8_t **payload)
197{
198 *payload += 8;
199 return ldq_be_p(*payload - 8);
200}
201
202static int nbd_parse_offset_hole_payload(NBDStructuredReplyChunk *chunk,
203 uint8_t *payload, uint64_t orig_offset,
204 QEMUIOVector *qiov, Error **errp)
205{
206 uint64_t offset;
207 uint32_t hole_size;
208
209 if (chunk->length != sizeof(offset) + sizeof(hole_size)) {
210 error_setg(errp, "Protocol error: invalid payload for "
211 "NBD_REPLY_TYPE_OFFSET_HOLE");
212 return -EINVAL;
213 }
214
215 offset = payload_advance64(&payload);
216 hole_size = payload_advance32(&payload);
217
b4176cb3 218 if (!hole_size || offset < orig_offset || hole_size > qiov->size ||
f140e300
VSO
219 offset > orig_offset + qiov->size - hole_size) {
220 error_setg(errp, "Protocol error: server sent chunk exceeding requested"
221 " region");
222 return -EINVAL;
223 }
224
225 qemu_iovec_memset(qiov, offset - orig_offset, 0, hole_size);
226
227 return 0;
228}
229
78a33ab5
VSO
230/* nbd_parse_blockstatus_payload
231 * support only one extent in reply and only for
232 * base:allocation context
233 */
234static int nbd_parse_blockstatus_payload(NBDClientSession *client,
235 NBDStructuredReplyChunk *chunk,
236 uint8_t *payload, uint64_t orig_length,
237 NBDExtent *extent, Error **errp)
238{
239 uint32_t context_id;
240
00d96a46 241 if (chunk->length != sizeof(context_id) + sizeof(*extent)) {
78a33ab5
VSO
242 error_setg(errp, "Protocol error: invalid payload for "
243 "NBD_REPLY_TYPE_BLOCK_STATUS");
244 return -EINVAL;
245 }
246
247 context_id = payload_advance32(&payload);
2df94eb5 248 if (client->info.context_id != context_id) {
78a33ab5
VSO
249 error_setg(errp, "Protocol error: unexpected context id %d for "
250 "NBD_REPLY_TYPE_BLOCK_STATUS, when negotiated context "
251 "id is %d", context_id,
2df94eb5 252 client->info.context_id);
78a33ab5
VSO
253 return -EINVAL;
254 }
255
256 extent->length = payload_advance32(&payload);
257 extent->flags = payload_advance32(&payload);
258
259 if (extent->length == 0 ||
260 (client->info.min_block && !QEMU_IS_ALIGNED(extent->length,
acfd8f7a 261 client->info.min_block))) {
78a33ab5
VSO
262 error_setg(errp, "Protocol error: server sent status chunk with "
263 "invalid length");
264 return -EINVAL;
265 }
266
acfd8f7a
EB
267 /* The server is allowed to send us extra information on the final
268 * extent; just clamp it to the length we requested. */
269 if (extent->length > orig_length) {
270 extent->length = orig_length;
271 }
272
78a33ab5
VSO
273 return 0;
274}
275
f140e300
VSO
276/* nbd_parse_error_payload
277 * on success @errp contains message describing nbd error reply
278 */
279static int nbd_parse_error_payload(NBDStructuredReplyChunk *chunk,
280 uint8_t *payload, int *request_ret,
281 Error **errp)
282{
283 uint32_t error;
284 uint16_t message_size;
285
286 assert(chunk->type & (1 << 15));
287
288 if (chunk->length < sizeof(error) + sizeof(message_size)) {
289 error_setg(errp,
290 "Protocol error: invalid payload for structured error");
291 return -EINVAL;
292 }
293
294 error = nbd_errno_to_system_errno(payload_advance32(&payload));
295 if (error == 0) {
e659fb3b 296 error_setg(errp, "Protocol error: server sent structured error chunk "
f140e300
VSO
297 "with error = 0");
298 return -EINVAL;
299 }
300
301 *request_ret = -error;
302 message_size = payload_advance16(&payload);
303
304 if (message_size > chunk->length - sizeof(error) - sizeof(message_size)) {
e659fb3b 305 error_setg(errp, "Protocol error: server sent structured error chunk "
f140e300
VSO
306 "with incorrect message size");
307 return -EINVAL;
308 }
309
310 /* TODO: Add a trace point to mention the server complaint */
311
312 /* TODO handle ERROR_OFFSET */
313
314 return 0;
315}
316
317static int nbd_co_receive_offset_data_payload(NBDClientSession *s,
318 uint64_t orig_offset,
319 QEMUIOVector *qiov, Error **errp)
320{
321 QEMUIOVector sub_qiov;
322 uint64_t offset;
323 size_t data_size;
324 int ret;
325 NBDStructuredReplyChunk *chunk = &s->reply.structured;
326
327 assert(nbd_reply_is_structured(&s->reply));
328
b4176cb3
EB
329 /* The NBD spec requires at least one byte of payload */
330 if (chunk->length <= sizeof(offset)) {
f140e300
VSO
331 error_setg(errp, "Protocol error: invalid payload for "
332 "NBD_REPLY_TYPE_OFFSET_DATA");
333 return -EINVAL;
334 }
335
e6798f06 336 if (nbd_read64(s->ioc, &offset, "OFFSET_DATA offset", errp) < 0) {
f140e300
VSO
337 return -EIO;
338 }
f140e300
VSO
339
340 data_size = chunk->length - sizeof(offset);
b4176cb3 341 assert(data_size);
f140e300
VSO
342 if (offset < orig_offset || data_size > qiov->size ||
343 offset > orig_offset + qiov->size - data_size) {
344 error_setg(errp, "Protocol error: server sent chunk exceeding requested"
345 " region");
346 return -EINVAL;
347 }
348
349 qemu_iovec_init(&sub_qiov, qiov->niov);
350 qemu_iovec_concat(&sub_qiov, qiov, offset - orig_offset, data_size);
351 ret = qio_channel_readv_all(s->ioc, sub_qiov.iov, sub_qiov.niov, errp);
352 qemu_iovec_destroy(&sub_qiov);
353
354 return ret < 0 ? -EIO : 0;
355}
356
357#define NBD_MAX_MALLOC_PAYLOAD 1000
358/* nbd_co_receive_structured_payload
359 */
360static coroutine_fn int nbd_co_receive_structured_payload(
361 NBDClientSession *s, void **payload, Error **errp)
362{
363 int ret;
364 uint32_t len;
365
366 assert(nbd_reply_is_structured(&s->reply));
367
368 len = s->reply.structured.length;
369
370 if (len == 0) {
371 return 0;
372 }
373
374 if (payload == NULL) {
375 error_setg(errp, "Unexpected structured payload");
376 return -EINVAL;
377 }
378
379 if (len > NBD_MAX_MALLOC_PAYLOAD) {
380 error_setg(errp, "Payload too large");
381 return -EINVAL;
382 }
383
384 *payload = g_new(char, len);
e6798f06 385 ret = nbd_read(s->ioc, *payload, len, "structured payload", errp);
f140e300
VSO
386 if (ret < 0) {
387 g_free(*payload);
388 *payload = NULL;
389 return ret;
390 }
391
392 return 0;
393}
394
395/* nbd_co_do_receive_one_chunk
396 * for simple reply:
397 * set request_ret to received reply error
398 * if qiov is not NULL: read payload to @qiov
399 * for structured reply chunk:
400 * if error chunk: read payload, set @request_ret, do not set @payload
401 * else if offset_data chunk: read payload data to @qiov, do not set @payload
402 * else: read payload to @payload
403 *
404 * If function fails, @errp contains corresponding error message, and the
405 * connection with the server is suspect. If it returns 0, then the
406 * transaction succeeded (although @request_ret may be a negative errno
407 * corresponding to the server's error reply), and errp is unchanged.
408 */
409static coroutine_fn int nbd_co_do_receive_one_chunk(
410 NBDClientSession *s, uint64_t handle, bool only_structured,
411 int *request_ret, QEMUIOVector *qiov, void **payload, Error **errp)
2302c1ca 412{
319a56cd 413 int ret;
ed397b2f 414 int i = HANDLE_TO_INDEX(s, handle);
f140e300
VSO
415 void *local_payload = NULL;
416 NBDStructuredReplyChunk *chunk;
417
418 if (payload) {
419 *payload = NULL;
420 }
421 *request_ret = 0;
2302c1ca 422
ff82911c 423 /* Wait until we're woken up by nbd_read_reply_entry. */
40f4a218 424 s->requests[i].receiving = true;
2302c1ca 425 qemu_coroutine_yield();
40f4a218 426 s->requests[i].receiving = false;
88ed4e1b 427 if (s->quit) {
f140e300
VSO
428 error_setg(errp, "Connection closed");
429 return -EIO;
430 }
88ed4e1b 431 assert(s->ioc);
f140e300
VSO
432
433 assert(s->reply.handle == handle);
434
435 if (nbd_reply_is_simple(&s->reply)) {
436 if (only_structured) {
437 error_setg(errp, "Protocol error: simple reply when structured "
438 "reply chunk was expected");
439 return -EINVAL;
2302c1ca
MAL
440 }
441
f140e300
VSO
442 *request_ret = -nbd_errno_to_system_errno(s->reply.simple.error);
443 if (*request_ret < 0 || !qiov) {
444 return 0;
445 }
446
447 return qio_channel_readv_all(s->ioc, qiov->iov, qiov->niov,
448 errp) < 0 ? -EIO : 0;
449 }
450
451 /* handle structured reply chunk */
452 assert(s->info.structured_reply);
453 chunk = &s->reply.structured;
454
455 if (chunk->type == NBD_REPLY_TYPE_NONE) {
456 if (!(chunk->flags & NBD_REPLY_FLAG_DONE)) {
457 error_setg(errp, "Protocol error: NBD_REPLY_TYPE_NONE chunk without"
e659fb3b 458 " NBD_REPLY_FLAG_DONE flag set");
f140e300
VSO
459 return -EINVAL;
460 }
b4176cb3
EB
461 if (chunk->length) {
462 error_setg(errp, "Protocol error: NBD_REPLY_TYPE_NONE chunk with"
463 " nonzero length");
464 return -EINVAL;
465 }
f140e300
VSO
466 return 0;
467 }
468
469 if (chunk->type == NBD_REPLY_TYPE_OFFSET_DATA) {
470 if (!qiov) {
471 error_setg(errp, "Unexpected NBD_REPLY_TYPE_OFFSET_DATA chunk");
472 return -EINVAL;
473 }
474
475 return nbd_co_receive_offset_data_payload(s, s->requests[i].offset,
476 qiov, errp);
477 }
478
479 if (nbd_reply_type_is_error(chunk->type)) {
480 payload = &local_payload;
481 }
482
483 ret = nbd_co_receive_structured_payload(s, payload, errp);
484 if (ret < 0) {
485 return ret;
2302c1ca 486 }
ff82911c 487
f140e300
VSO
488 if (nbd_reply_type_is_error(chunk->type)) {
489 ret = nbd_parse_error_payload(chunk, local_payload, request_ret, errp);
490 g_free(local_payload);
491 return ret;
492 }
493
494 return 0;
495}
496
497/* nbd_co_receive_one_chunk
498 * Read reply, wake up read_reply_co and set s->quit if needed.
499 * Return value is a fatal error code or normal nbd reply error code
500 */
501static coroutine_fn int nbd_co_receive_one_chunk(
502 NBDClientSession *s, uint64_t handle, bool only_structured,
7f86068d
VSO
503 int *request_ret, QEMUIOVector *qiov, NBDReply *reply, void **payload,
504 Error **errp)
f140e300 505{
f140e300 506 int ret = nbd_co_do_receive_one_chunk(s, handle, only_structured,
7f86068d 507 request_ret, qiov, payload, errp);
f140e300
VSO
508
509 if (ret < 0) {
510 s->quit = true;
511 } else {
512 /* For assert at loop start in nbd_read_reply_entry */
513 if (reply) {
514 *reply = s->reply;
515 }
516 s->reply.handle = 0;
f140e300 517 }
ff82911c 518
ff82911c
PB
519 if (s->read_reply_co) {
520 aio_co_wake(s->read_reply_co);
2302c1ca 521 }
6bdcc018 522
f140e300
VSO
523 return ret;
524}
525
526typedef struct NBDReplyChunkIter {
527 int ret;
7f86068d 528 int request_ret;
f140e300
VSO
529 Error *err;
530 bool done, only_structured;
531} NBDReplyChunkIter;
532
7f86068d
VSO
533static void nbd_iter_channel_error(NBDReplyChunkIter *iter,
534 int ret, Error **local_err)
f140e300
VSO
535{
536 assert(ret < 0);
537
7f86068d 538 if (!iter->ret) {
f140e300
VSO
539 iter->ret = ret;
540 error_propagate(&iter->err, *local_err);
541 } else {
542 error_free(*local_err);
543 }
544
545 *local_err = NULL;
546}
547
7f86068d
VSO
548static void nbd_iter_request_error(NBDReplyChunkIter *iter, int ret)
549{
550 assert(ret < 0);
551
552 if (!iter->request_ret) {
553 iter->request_ret = ret;
554 }
555}
556
f140e300
VSO
557/* NBD_FOREACH_REPLY_CHUNK
558 */
559#define NBD_FOREACH_REPLY_CHUNK(s, iter, handle, structured, \
560 qiov, reply, payload) \
561 for (iter = (NBDReplyChunkIter) { .only_structured = structured }; \
562 nbd_reply_chunk_iter_receive(s, &iter, handle, qiov, reply, payload);)
563
564/* nbd_reply_chunk_iter_receive
565 */
566static bool nbd_reply_chunk_iter_receive(NBDClientSession *s,
567 NBDReplyChunkIter *iter,
568 uint64_t handle,
569 QEMUIOVector *qiov, NBDReply *reply,
570 void **payload)
571{
7f86068d 572 int ret, request_ret;
f140e300
VSO
573 NBDReply local_reply;
574 NBDStructuredReplyChunk *chunk;
575 Error *local_err = NULL;
576 if (s->quit) {
577 error_setg(&local_err, "Connection closed");
7f86068d 578 nbd_iter_channel_error(iter, -EIO, &local_err);
f140e300
VSO
579 goto break_loop;
580 }
581
582 if (iter->done) {
583 /* Previous iteration was last. */
584 goto break_loop;
585 }
586
587 if (reply == NULL) {
588 reply = &local_reply;
589 }
590
591 ret = nbd_co_receive_one_chunk(s, handle, iter->only_structured,
7f86068d
VSO
592 &request_ret, qiov, reply, payload,
593 &local_err);
f140e300 594 if (ret < 0) {
7f86068d
VSO
595 nbd_iter_channel_error(iter, ret, &local_err);
596 } else if (request_ret < 0) {
597 nbd_iter_request_error(iter, request_ret);
f140e300
VSO
598 }
599
600 /* Do not execute the body of NBD_FOREACH_REPLY_CHUNK for simple reply. */
65e01d47 601 if (nbd_reply_is_simple(reply) || s->quit) {
f140e300
VSO
602 goto break_loop;
603 }
604
605 chunk = &reply->structured;
606 iter->only_structured = true;
607
608 if (chunk->type == NBD_REPLY_TYPE_NONE) {
609 /* NBD_REPLY_FLAG_DONE is already checked in nbd_co_receive_one_chunk */
610 assert(chunk->flags & NBD_REPLY_FLAG_DONE);
611 goto break_loop;
612 }
613
614 if (chunk->flags & NBD_REPLY_FLAG_DONE) {
615 /* This iteration is last. */
616 iter->done = true;
617 }
618
619 /* Execute the loop body */
620 return true;
621
622break_loop:
623 s->requests[HANDLE_TO_INDEX(s, handle)].coroutine = NULL;
624
6bdcc018
PB
625 qemu_co_mutex_lock(&s->send_mutex);
626 s->in_flight--;
627 qemu_co_queue_next(&s->free_sema);
628 qemu_co_mutex_unlock(&s->send_mutex);
319a56cd 629
f140e300 630 return false;
2302c1ca
MAL
631}
632
f140e300 633static int nbd_co_receive_return_code(NBDClientSession *s, uint64_t handle,
7f86068d 634 int *request_ret, Error **errp)
f140e300
VSO
635{
636 NBDReplyChunkIter iter;
637
638 NBD_FOREACH_REPLY_CHUNK(s, iter, handle, false, NULL, NULL, NULL) {
639 /* nbd_reply_chunk_iter_receive does all the work */
640 }
641
642 error_propagate(errp, iter.err);
7f86068d 643 *request_ret = iter.request_ret;
f140e300
VSO
644 return iter.ret;
645}
646
647static int nbd_co_receive_cmdread_reply(NBDClientSession *s, uint64_t handle,
648 uint64_t offset, QEMUIOVector *qiov,
7f86068d 649 int *request_ret, Error **errp)
f140e300
VSO
650{
651 NBDReplyChunkIter iter;
652 NBDReply reply;
653 void *payload = NULL;
654 Error *local_err = NULL;
655
656 NBD_FOREACH_REPLY_CHUNK(s, iter, handle, s->info.structured_reply,
657 qiov, &reply, &payload)
658 {
659 int ret;
660 NBDStructuredReplyChunk *chunk = &reply.structured;
661
662 assert(nbd_reply_is_structured(&reply));
663
664 switch (chunk->type) {
665 case NBD_REPLY_TYPE_OFFSET_DATA:
666 /* special cased in nbd_co_receive_one_chunk, data is already
667 * in qiov */
668 break;
669 case NBD_REPLY_TYPE_OFFSET_HOLE:
670 ret = nbd_parse_offset_hole_payload(&reply.structured, payload,
671 offset, qiov, &local_err);
672 if (ret < 0) {
673 s->quit = true;
7f86068d 674 nbd_iter_channel_error(&iter, ret, &local_err);
f140e300
VSO
675 }
676 break;
677 default:
678 if (!nbd_reply_type_is_error(chunk->type)) {
679 /* not allowed reply type */
680 s->quit = true;
681 error_setg(&local_err,
682 "Unexpected reply type: %d (%s) for CMD_READ",
683 chunk->type, nbd_reply_type_lookup(chunk->type));
7f86068d 684 nbd_iter_channel_error(&iter, -EINVAL, &local_err);
f140e300
VSO
685 }
686 }
687
688 g_free(payload);
689 payload = NULL;
690 }
691
692 error_propagate(errp, iter.err);
7f86068d 693 *request_ret = iter.request_ret;
f140e300
VSO
694 return iter.ret;
695}
696
78a33ab5
VSO
697static int nbd_co_receive_blockstatus_reply(NBDClientSession *s,
698 uint64_t handle, uint64_t length,
7f86068d
VSO
699 NBDExtent *extent,
700 int *request_ret, Error **errp)
78a33ab5
VSO
701{
702 NBDReplyChunkIter iter;
703 NBDReply reply;
704 void *payload = NULL;
705 Error *local_err = NULL;
706 bool received = false;
707
708 assert(!extent->length);
709 NBD_FOREACH_REPLY_CHUNK(s, iter, handle, s->info.structured_reply,
710 NULL, &reply, &payload)
711 {
712 int ret;
713 NBDStructuredReplyChunk *chunk = &reply.structured;
714
715 assert(nbd_reply_is_structured(&reply));
716
717 switch (chunk->type) {
718 case NBD_REPLY_TYPE_BLOCK_STATUS:
719 if (received) {
720 s->quit = true;
721 error_setg(&local_err, "Several BLOCK_STATUS chunks in reply");
7f86068d 722 nbd_iter_channel_error(&iter, -EINVAL, &local_err);
78a33ab5
VSO
723 }
724 received = true;
725
726 ret = nbd_parse_blockstatus_payload(s, &reply.structured,
727 payload, length, extent,
728 &local_err);
729 if (ret < 0) {
730 s->quit = true;
7f86068d 731 nbd_iter_channel_error(&iter, ret, &local_err);
78a33ab5
VSO
732 }
733 break;
734 default:
735 if (!nbd_reply_type_is_error(chunk->type)) {
736 s->quit = true;
737 error_setg(&local_err,
738 "Unexpected reply type: %d (%s) "
739 "for CMD_BLOCK_STATUS",
740 chunk->type, nbd_reply_type_lookup(chunk->type));
7f86068d 741 nbd_iter_channel_error(&iter, -EINVAL, &local_err);
78a33ab5
VSO
742 }
743 }
744
745 g_free(payload);
746 payload = NULL;
747 }
748
749 if (!extent->length && !iter.err) {
750 error_setg(&iter.err,
751 "Server did not reply with any status extents");
752 if (!iter.ret) {
753 iter.ret = -EIO;
754 }
755 }
7f86068d 756
78a33ab5 757 error_propagate(errp, iter.err);
7f86068d 758 *request_ret = iter.request_ret;
78a33ab5
VSO
759 return iter.ret;
760}
761
f140e300
VSO
762static int nbd_co_request(BlockDriverState *bs, NBDRequest *request,
763 QEMUIOVector *write_qiov)
f35dff7e 764{
7f86068d 765 int ret, request_ret;
f140e300
VSO
766 Error *local_err = NULL;
767 NBDClientSession *client = nbd_get_client_session(bs);
f35dff7e 768
f140e300
VSO
769 assert(request->type != NBD_CMD_READ);
770 if (write_qiov) {
771 assert(request->type == NBD_CMD_WRITE);
772 assert(request->len == iov_size(write_qiov->iov, write_qiov->niov));
4bfe4478 773 } else {
f140e300 774 assert(request->type != NBD_CMD_WRITE);
4bfe4478 775 }
f140e300 776 ret = nbd_co_send_request(bs, request, write_qiov);
f35dff7e 777 if (ret < 0) {
319a56cd 778 return ret;
f35dff7e 779 }
319a56cd 780
7f86068d
VSO
781 ret = nbd_co_receive_return_code(client, request->handle,
782 &request_ret, &local_err);
f140e300 783 if (local_err) {
d8b4bad8
VSO
784 trace_nbd_co_request_fail(request->from, request->len, request->handle,
785 request->flags, request->type,
786 nbd_cmd_lookup(request->type),
787 ret, error_get_pretty(local_err));
788 error_free(local_err);
f140e300 789 }
7f86068d 790 return ret ? ret : request_ret;
f35dff7e
VSO
791}
792
70c4fb26
EB
793int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
794 uint64_t bytes, QEMUIOVector *qiov, int flags)
2302c1ca 795{
7f86068d 796 int ret, request_ret;
f140e300
VSO
797 Error *local_err = NULL;
798 NBDClientSession *client = nbd_get_client_session(bs);
ed2dd912 799 NBDRequest request = {
70c4fb26
EB
800 .type = NBD_CMD_READ,
801 .from = offset,
802 .len = bytes,
803 };
2302c1ca 804
70c4fb26
EB
805 assert(bytes <= NBD_MAX_BUFFER_SIZE);
806 assert(!flags);
2302c1ca 807
9d8f818c
EB
808 if (!bytes) {
809 return 0;
810 }
f140e300
VSO
811 ret = nbd_co_send_request(bs, &request, NULL);
812 if (ret < 0) {
813 return ret;
814 }
815
816 ret = nbd_co_receive_cmdread_reply(client, request.handle, offset, qiov,
7f86068d 817 &request_ret, &local_err);
08ace1d7 818 if (local_err) {
d8b4bad8
VSO
819 trace_nbd_co_request_fail(request.from, request.len, request.handle,
820 request.flags, request.type,
821 nbd_cmd_lookup(request.type),
822 ret, error_get_pretty(local_err));
823 error_free(local_err);
f140e300 824 }
7f86068d 825 return ret ? ret : request_ret;
2302c1ca
MAL
826}
827
70c4fb26
EB
828int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
829 uint64_t bytes, QEMUIOVector *qiov, int flags)
2302c1ca 830{
10676b81 831 NBDClientSession *client = nbd_get_client_session(bs);
ed2dd912 832 NBDRequest request = {
70c4fb26
EB
833 .type = NBD_CMD_WRITE,
834 .from = offset,
835 .len = bytes,
836 };
2302c1ca 837
1104d83c 838 assert(!(client->info.flags & NBD_FLAG_READ_ONLY));
52a46505 839 if (flags & BDRV_REQ_FUA) {
004a89fc 840 assert(client->info.flags & NBD_FLAG_SEND_FUA);
b626b51a 841 request.flags |= NBD_CMD_FLAG_FUA;
2302c1ca
MAL
842 }
843
70c4fb26 844 assert(bytes <= NBD_MAX_BUFFER_SIZE);
2302c1ca 845
9d8f818c
EB
846 if (!bytes) {
847 return 0;
848 }
f35dff7e 849 return nbd_co_request(bs, &request, qiov);
2302c1ca
MAL
850}
851
fa778fff 852int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
f5a5ca79 853 int bytes, BdrvRequestFlags flags)
fa778fff 854{
fa778fff
EB
855 NBDClientSession *client = nbd_get_client_session(bs);
856 NBDRequest request = {
857 .type = NBD_CMD_WRITE_ZEROES,
858 .from = offset,
f5a5ca79 859 .len = bytes,
fa778fff 860 };
fa778fff 861
1104d83c 862 assert(!(client->info.flags & NBD_FLAG_READ_ONLY));
004a89fc 863 if (!(client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES)) {
fa778fff
EB
864 return -ENOTSUP;
865 }
866
867 if (flags & BDRV_REQ_FUA) {
004a89fc 868 assert(client->info.flags & NBD_FLAG_SEND_FUA);
fa778fff
EB
869 request.flags |= NBD_CMD_FLAG_FUA;
870 }
871 if (!(flags & BDRV_REQ_MAY_UNMAP)) {
872 request.flags |= NBD_CMD_FLAG_NO_HOLE;
873 }
874
9d8f818c
EB
875 if (!bytes) {
876 return 0;
877 }
f35dff7e 878 return nbd_co_request(bs, &request, NULL);
fa778fff
EB
879}
880
f53a829b 881int nbd_client_co_flush(BlockDriverState *bs)
2302c1ca 882{
10676b81 883 NBDClientSession *client = nbd_get_client_session(bs);
ed2dd912 884 NBDRequest request = { .type = NBD_CMD_FLUSH };
2302c1ca 885
004a89fc 886 if (!(client->info.flags & NBD_FLAG_SEND_FLUSH)) {
2302c1ca
MAL
887 return 0;
888 }
889
2302c1ca
MAL
890 request.from = 0;
891 request.len = 0;
892
f35dff7e 893 return nbd_co_request(bs, &request, NULL);
2302c1ca
MAL
894}
895
f5a5ca79 896int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes)
2302c1ca 897{
10676b81 898 NBDClientSession *client = nbd_get_client_session(bs);
ed2dd912 899 NBDRequest request = {
447e57c3
EB
900 .type = NBD_CMD_TRIM,
901 .from = offset,
f5a5ca79 902 .len = bytes,
447e57c3 903 };
2302c1ca 904
1104d83c 905 assert(!(client->info.flags & NBD_FLAG_READ_ONLY));
9d8f818c 906 if (!(client->info.flags & NBD_FLAG_SEND_TRIM) || !bytes) {
2302c1ca
MAL
907 return 0;
908 }
2302c1ca 909
f35dff7e 910 return nbd_co_request(bs, &request, NULL);
2302c1ca
MAL
911}
912
78a33ab5
VSO
913int coroutine_fn nbd_client_co_block_status(BlockDriverState *bs,
914 bool want_zero,
915 int64_t offset, int64_t bytes,
916 int64_t *pnum, int64_t *map,
917 BlockDriverState **file)
918{
7f86068d 919 int ret, request_ret;
78a33ab5
VSO
920 NBDExtent extent = { 0 };
921 NBDClientSession *client = nbd_get_client_session(bs);
922 Error *local_err = NULL;
923
924 NBDRequest request = {
925 .type = NBD_CMD_BLOCK_STATUS,
926 .from = offset,
927 .len = MIN(MIN_NON_ZERO(QEMU_ALIGN_DOWN(INT_MAX,
928 bs->bl.request_alignment),
929 client->info.max_block), bytes),
930 .flags = NBD_CMD_FLAG_REQ_ONE,
931 };
932
933 if (!client->info.base_allocation) {
934 *pnum = bytes;
935 return BDRV_BLOCK_DATA;
936 }
937
938 ret = nbd_co_send_request(bs, &request, NULL);
939 if (ret < 0) {
940 return ret;
941 }
942
943 ret = nbd_co_receive_blockstatus_reply(client, request.handle, bytes,
7f86068d 944 &extent, &request_ret, &local_err);
78a33ab5 945 if (local_err) {
d8b4bad8
VSO
946 trace_nbd_co_request_fail(request.from, request.len, request.handle,
947 request.flags, request.type,
948 nbd_cmd_lookup(request.type),
949 ret, error_get_pretty(local_err));
950 error_free(local_err);
78a33ab5 951 }
7f86068d
VSO
952 if (ret < 0 || request_ret < 0) {
953 return ret ? ret : request_ret;
78a33ab5
VSO
954 }
955
956 assert(extent.length);
957 *pnum = extent.length;
958 return (extent.flags & NBD_STATE_HOLE ? 0 : BDRV_BLOCK_DATA) |
959 (extent.flags & NBD_STATE_ZERO ? BDRV_BLOCK_ZERO : 0);
960}
961
f53a829b 962void nbd_client_detach_aio_context(BlockDriverState *bs)
69447cd8 963{
ff82911c 964 NBDClientSession *client = nbd_get_client_session(bs);
96d06835 965 qio_channel_detach_aio_context(QIO_CHANNEL(client->ioc));
69447cd8
SH
966}
967
f53a829b
HR
968void nbd_client_attach_aio_context(BlockDriverState *bs,
969 AioContext *new_context)
69447cd8 970{
ff82911c 971 NBDClientSession *client = nbd_get_client_session(bs);
96d06835 972 qio_channel_attach_aio_context(QIO_CHANNEL(client->ioc), new_context);
ff82911c 973 aio_co_schedule(new_context, client->read_reply_co);
69447cd8
SH
974}
975
f53a829b 976void nbd_client_close(BlockDriverState *bs)
2302c1ca 977{
10676b81 978 NBDClientSession *client = nbd_get_client_session(bs);
ed2dd912 979 NBDRequest request = { .type = NBD_CMD_DISC };
2302c1ca 980
88ed4e1b 981 assert(client->ioc);
4a41a2d6 982
1c778ef7 983 nbd_send_request(client->ioc, &request);
5ad283eb 984
f53a829b 985 nbd_teardown_connection(bs);
2302c1ca
MAL
986}
987
d42f78e9
VSO
988static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
989 Error **errp)
990{
991 QIOChannelSocket *sioc;
992 Error *local_err = NULL;
993
994 sioc = qio_channel_socket_new();
995 qio_channel_set_name(QIO_CHANNEL(sioc), "nbd-client");
996
997 qio_channel_socket_connect_sync(sioc, saddr, &local_err);
998 if (local_err) {
999 object_unref(OBJECT(sioc));
1000 error_propagate(errp, local_err);
1001 return NULL;
1002 }
1003
1004 qio_channel_set_delay(QIO_CHANNEL(sioc), false);
1005
1006 return sioc;
1007}
1008
b0e4b5a5
VSO
1009static int nbd_client_connect(BlockDriverState *bs,
1010 SocketAddress *saddr,
1011 const char *export,
1012 QCryptoTLSCreds *tlscreds,
1013 const char *hostname,
1014 const char *x_dirty_bitmap,
1015 Error **errp)
2302c1ca 1016{
10676b81 1017 NBDClientSession *client = nbd_get_client_session(bs);
2302c1ca
MAL
1018 int ret;
1019
d42f78e9
VSO
1020 /*
1021 * establish TCP connection, return error if it fails
1022 * TODO: Configurable retry-until-timeout behaviour.
1023 */
1024 QIOChannelSocket *sioc = nbd_establish_connection(saddr, errp);
1025
1026 if (!sioc) {
1027 return -ECONNREFUSED;
1028 }
1029
2302c1ca 1030 /* NBD handshake */
e2bc625f 1031 logout("session init %s\n", export);
064097d9
DB
1032 qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
1033
081dd1fe 1034 client->info.request_sizes = true;
f140e300 1035 client->info.structured_reply = true;
78a33ab5 1036 client->info.base_allocation = true;
216ee365 1037 client->info.x_dirty_bitmap = g_strdup(x_dirty_bitmap);
6dc1667d
EB
1038 client->info.name = g_strdup(export ?: "");
1039 ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), tlscreds, hostname,
004a89fc 1040 &client->ioc, &client->info, errp);
216ee365 1041 g_free(client->info.x_dirty_bitmap);
6dc1667d 1042 g_free(client->info.name);
2302c1ca
MAL
1043 if (ret < 0) {
1044 logout("Failed to negotiate with the NBD server\n");
d42f78e9 1045 object_unref(OBJECT(sioc));
2302c1ca
MAL
1046 return ret;
1047 }
47829c40
EB
1048 if (x_dirty_bitmap && !client->info.base_allocation) {
1049 error_setg(errp, "requested x-dirty-bitmap %s not found",
1050 x_dirty_bitmap);
c688e6ca
EB
1051 ret = -EINVAL;
1052 goto fail;
47829c40 1053 }
6c2e581d
KW
1054 if (client->info.flags & NBD_FLAG_READ_ONLY) {
1055 ret = bdrv_apply_auto_read_only(bs, "NBD export is read-only", errp);
1056 if (ret < 0) {
c688e6ca 1057 goto fail;
6c2e581d 1058 }
1104d83c 1059 }
004a89fc 1060 if (client->info.flags & NBD_FLAG_SEND_FUA) {
4df863f3 1061 bs->supported_write_flags = BDRV_REQ_FUA;
169407e1
EB
1062 bs->supported_zero_flags |= BDRV_REQ_FUA;
1063 }
004a89fc 1064 if (client->info.flags & NBD_FLAG_SEND_WRITE_ZEROES) {
169407e1 1065 bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
4df863f3 1066 }
2302c1ca 1067
064097d9 1068 client->sioc = sioc;
f95910fe
DB
1069
1070 if (!client->ioc) {
1071 client->ioc = QIO_CHANNEL(sioc);
1072 object_ref(OBJECT(client->ioc));
1073 }
2302c1ca
MAL
1074
1075 /* Now that we're connected, set the socket to be non-blocking and
1076 * kick the reply mechanism. */
064097d9 1077 qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
ff82911c 1078 client->read_reply_co = qemu_coroutine_create(nbd_read_reply_entry, client);
f53a829b 1079 nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
2302c1ca
MAL
1080
1081 logout("Established connection with NBD server\n");
1082 return 0;
c688e6ca
EB
1083
1084 fail:
1085 /*
1086 * We have connected, but must fail for other reasons. The
1087 * connection is still blocking; send NBD_CMD_DISC as a courtesy
1088 * to the server.
1089 */
1090 {
1091 NBDRequest request = { .type = NBD_CMD_DISC };
1092
1093 nbd_send_request(client->ioc ?: QIO_CHANNEL(sioc), &request);
d42f78e9
VSO
1094
1095 object_unref(OBJECT(sioc));
1096
c688e6ca
EB
1097 return ret;
1098 }
2302c1ca 1099}
b0e4b5a5
VSO
1100
1101int nbd_client_init(BlockDriverState *bs,
1102 SocketAddress *saddr,
1103 const char *export,
1104 QCryptoTLSCreds *tlscreds,
1105 const char *hostname,
1106 const char *x_dirty_bitmap,
1107 Error **errp)
1108{
1109 NBDClientSession *client = nbd_get_client_session(bs);
1110
1111 qemu_co_mutex_init(&client->send_mutex);
1112 qemu_co_queue_init(&client->free_sema);
1113
1114 return nbd_client_connect(bs, saddr, export, tlscreds, hostname,
1115 x_dirty_bitmap, errp);
1116}