]> git.proxmox.com Git - qemu.git/blame - nbd.c
nbd: consistently check for <0 or >=0
[qemu.git] / nbd.c
CommitLineData
75818250 1/*
7a5ca864
FB
2 * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws>
3 *
4 * Network Block Device
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; under version 2 of the License.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
8167ee88 16 * along with this program; if not, see <http://www.gnu.org/licenses/>.
75818250 17 */
7a5ca864
FB
18
19#include "nbd.h"
ab359cd1 20#include "block.h"
af49bbbe 21#include "block_int.h"
7a5ca864 22
262db388
PB
23#include "qemu-coroutine.h"
24
7a5ca864
FB
25#include <errno.h>
26#include <string.h>
03ff3ca3 27#ifndef _WIN32
7a5ca864 28#include <sys/ioctl.h>
03ff3ca3 29#endif
5dc2eec9 30#if defined(__sun__) || defined(__HAIKU__)
7e00eb9b
AL
31#include <sys/ioccom.h>
32#endif
7a5ca864
FB
33#include <ctype.h>
34#include <inttypes.h>
75818250 35
b90fb4b8
PB
36#ifdef __linux__
37#include <linux/fs.h>
38#endif
39
03ff3ca3 40#include "qemu_socket.h"
d9a73806 41#include "qemu-queue.h"
03ff3ca3
AL
42
43//#define DEBUG_NBD
44
45#ifdef DEBUG_NBD
75818250 46#define TRACE(msg, ...) do { \
03ff3ca3 47 LOG(msg, ## __VA_ARGS__); \
75818250 48} while(0)
03ff3ca3
AL
49#else
50#define TRACE(msg, ...) \
51 do { } while (0)
52#endif
7a5ca864
FB
53
54#define LOG(msg, ...) do { \
55 fprintf(stderr, "%s:%s():L%d: " msg "\n", \
56 __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
57} while(0)
58
7a5ca864
FB
59/* This is all part of the "official" NBD API */
60
b2e3d87f 61#define NBD_REPLY_SIZE (4 + 4 + 8)
7a5ca864
FB
62#define NBD_REQUEST_MAGIC 0x25609513
63#define NBD_REPLY_MAGIC 0x67446698
64
65#define NBD_SET_SOCK _IO(0xab, 0)
66#define NBD_SET_BLKSIZE _IO(0xab, 1)
67#define NBD_SET_SIZE _IO(0xab, 2)
68#define NBD_DO_IT _IO(0xab, 3)
69#define NBD_CLEAR_SOCK _IO(0xab, 4)
70#define NBD_CLEAR_QUE _IO(0xab, 5)
b2e3d87f
NT
71#define NBD_PRINT_DEBUG _IO(0xab, 6)
72#define NBD_SET_SIZE_BLOCKS _IO(0xab, 7)
7a5ca864 73#define NBD_DISCONNECT _IO(0xab, 8)
bbb74edd
PB
74#define NBD_SET_TIMEOUT _IO(0xab, 9)
75#define NBD_SET_FLAGS _IO(0xab, 10)
7a5ca864 76
b2e3d87f 77#define NBD_OPT_EXPORT_NAME (1 << 0)
1d45f8b5 78
7a5ca864
FB
79/* That's all folks */
80
75818250
TS
81#define read_sync(fd, buffer, size) nbd_wr_sync(fd, buffer, size, true)
82#define write_sync(fd, buffer, size) nbd_wr_sync(fd, buffer, size, false)
7a5ca864 83
75818250 84size_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read)
7a5ca864
FB
85{
86 size_t offset = 0;
87
ae255e52
PB
88 if (qemu_in_coroutine()) {
89 if (do_read) {
90 return qemu_co_recv(fd, buffer, size);
91 } else {
92 return qemu_co_send(fd, buffer, size);
93 }
94 }
95
7a5ca864
FB
96 while (offset < size) {
97 ssize_t len;
98
99 if (do_read) {
00aa0040 100 len = qemu_recv(fd, buffer + offset, size - offset, 0);
7a5ca864 101 } else {
03ff3ca3 102 len = send(fd, buffer + offset, size - offset, 0);
7a5ca864
FB
103 }
104
fc19f8a0 105 if (len < 0) {
03ff3ca3
AL
106 errno = socket_error();
107
fc19f8a0
PB
108 /* recoverable error */
109 if (errno == EINTR || errno == EAGAIN) {
110 continue;
111 }
112
113 /* unrecoverable error */
114 return 0;
7a5ca864
FB
115 }
116
117 /* eof */
118 if (len == 0) {
119 break;
120 }
121
7a5ca864
FB
122 offset += len;
123 }
124
125 return offset;
126}
127
c12504ce
NT
128static void combine_addr(char *buf, size_t len, const char* address,
129 uint16_t port)
7a5ca864 130{
c12504ce
NT
131 /* If the address-part contains a colon, it's an IPv6 IP so needs [] */
132 if (strstr(address, ":")) {
133 snprintf(buf, len, "[%s]:%u", address, port);
134 } else {
135 snprintf(buf, len, "%s:%u", address, port);
7a5ca864 136 }
7a5ca864
FB
137}
138
c12504ce 139int tcp_socket_outgoing(const char *address, uint16_t port)
7a5ca864 140{
c12504ce
NT
141 char address_and_port[128];
142 combine_addr(address_and_port, 128, address, port);
143 return tcp_socket_outgoing_spec(address_and_port);
7a5ca864
FB
144}
145
c12504ce 146int tcp_socket_outgoing_spec(const char *address_and_port)
cd831bd7 147{
c12504ce 148 return inet_connect(address_and_port, SOCK_STREAM);
cd831bd7
TS
149}
150
c12504ce 151int tcp_socket_incoming(const char *address, uint16_t port)
cd831bd7 152{
c12504ce
NT
153 char address_and_port[128];
154 combine_addr(address_and_port, 128, address, port);
155 return tcp_socket_incoming_spec(address_and_port);
156}
cd831bd7 157
c12504ce
NT
158int tcp_socket_incoming_spec(const char *address_and_port)
159{
160 char *ostr = NULL;
161 int olen = 0;
162 return inet_listen(address_and_port, ostr, olen, SOCK_STREAM, 0);
03ff3ca3 163}
c12504ce 164
03ff3ca3
AL
165int unix_socket_incoming(const char *path)
166{
c12504ce
NT
167 char *ostr = NULL;
168 int olen = 0;
169
170 return unix_listen(path, ostr, olen);
cd831bd7
TS
171}
172
03ff3ca3
AL
173int unix_socket_outgoing(const char *path)
174{
c12504ce 175 return unix_connect(path);
03ff3ca3 176}
cd831bd7 177
7a5ca864
FB
178/* Basic flow
179
180 Server Client
181
182 Negotiate
183 Request
184 Response
185 Request
186 Response
187 ...
188 ...
189 Request (type == 2)
190*/
191
af49bbbe 192static int nbd_send_negotiate(int csock, off_t size, uint32_t flags)
7a5ca864 193{
b2e3d87f
NT
194 char buf[8 + 8 + 8 + 128];
195
196 /* Negotiate
197 [ 0 .. 7] passwd ("NBDMAGIC")
198 [ 8 .. 15] magic (0x00420281861253)
199 [16 .. 23] size
b90fb4b8
PB
200 [24 .. 27] flags
201 [28 .. 151] reserved (0)
b2e3d87f
NT
202 */
203
204 TRACE("Beginning negotiation.");
205 memcpy(buf, "NBDMAGIC", 8);
206 cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL);
207 cpu_to_be64w((uint64_t*)(buf + 16), size);
2c7989a9 208 cpu_to_be32w((uint32_t*)(buf + 24),
7a706633
PB
209 flags | NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
210 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
b90fb4b8 211 memset(buf + 28, 0, 124);
b2e3d87f
NT
212
213 if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
214 LOG("write failed");
215 errno = EINVAL;
216 return -1;
217 }
218
07f35073 219 TRACE("Negotiation succeeded.");
b2e3d87f
NT
220
221 return 0;
7a5ca864
FB
222}
223
1d45f8b5
LV
224int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
225 off_t *size, size_t *blocksize)
7a5ca864 226{
b2e3d87f
NT
227 char buf[256];
228 uint64_t magic, s;
229 uint16_t tmp;
230
07f35073 231 TRACE("Receiving negotiation.");
b2e3d87f
NT
232
233 if (read_sync(csock, buf, 8) != 8) {
234 LOG("read failed");
235 errno = EINVAL;
236 return -1;
237 }
238
239 buf[8] = '\0';
240 if (strlen(buf) == 0) {
241 LOG("server connection closed");
242 errno = EINVAL;
243 return -1;
244 }
245
246 TRACE("Magic is %c%c%c%c%c%c%c%c",
247 qemu_isprint(buf[0]) ? buf[0] : '.',
248 qemu_isprint(buf[1]) ? buf[1] : '.',
249 qemu_isprint(buf[2]) ? buf[2] : '.',
250 qemu_isprint(buf[3]) ? buf[3] : '.',
251 qemu_isprint(buf[4]) ? buf[4] : '.',
252 qemu_isprint(buf[5]) ? buf[5] : '.',
253 qemu_isprint(buf[6]) ? buf[6] : '.',
254 qemu_isprint(buf[7]) ? buf[7] : '.');
255
256 if (memcmp(buf, "NBDMAGIC", 8) != 0) {
257 LOG("Invalid magic received");
258 errno = EINVAL;
259 return -1;
260 }
261
262 if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
263 LOG("read failed");
264 errno = EINVAL;
265 return -1;
266 }
267 magic = be64_to_cpu(magic);
268 TRACE("Magic is 0x%" PRIx64, magic);
269
270 if (name) {
271 uint32_t reserved = 0;
272 uint32_t opt;
273 uint32_t namesize;
274
275 TRACE("Checking magic (opts_magic)");
276 if (magic != 0x49484156454F5054LL) {
277 LOG("Bad magic received");
278 errno = EINVAL;
279 return -1;
280 }
281 if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
282 LOG("flags read failed");
283 errno = EINVAL;
284 return -1;
285 }
286 *flags = be16_to_cpu(tmp) << 16;
287 /* reserved for future use */
288 if (write_sync(csock, &reserved, sizeof(reserved)) !=
289 sizeof(reserved)) {
290 LOG("write failed (reserved)");
291 errno = EINVAL;
292 return -1;
293 }
294 /* write the export name */
295 magic = cpu_to_be64(magic);
296 if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
297 LOG("write failed (magic)");
298 errno = EINVAL;
299 return -1;
300 }
301 opt = cpu_to_be32(NBD_OPT_EXPORT_NAME);
302 if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) {
303 LOG("write failed (opt)");
304 errno = EINVAL;
305 return -1;
306 }
307 namesize = cpu_to_be32(strlen(name));
308 if (write_sync(csock, &namesize, sizeof(namesize)) !=
309 sizeof(namesize)) {
310 LOG("write failed (namesize)");
311 errno = EINVAL;
312 return -1;
313 }
314 if (write_sync(csock, (char*)name, strlen(name)) != strlen(name)) {
315 LOG("write failed (name)");
316 errno = EINVAL;
317 return -1;
318 }
319 } else {
320 TRACE("Checking magic (cli_magic)");
321
322 if (magic != 0x00420281861253LL) {
323 LOG("Bad magic received");
324 errno = EINVAL;
325 return -1;
326 }
327 }
328
329 if (read_sync(csock, &s, sizeof(s)) != sizeof(s)) {
330 LOG("read failed");
331 errno = EINVAL;
332 return -1;
333 }
334 *size = be64_to_cpu(s);
335 *blocksize = 1024;
336 TRACE("Size is %" PRIu64, *size);
337
338 if (!name) {
339 if (read_sync(csock, flags, sizeof(*flags)) != sizeof(*flags)) {
340 LOG("read failed (flags)");
341 errno = EINVAL;
342 return -1;
343 }
344 *flags = be32_to_cpup(flags);
345 } else {
346 if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
347 LOG("read failed (tmp)");
348 errno = EINVAL;
349 return -1;
350 }
351 *flags |= be32_to_cpu(tmp);
352 }
353 if (read_sync(csock, &buf, 124) != 124) {
354 LOG("read failed (buf)");
355 errno = EINVAL;
356 return -1;
357 }
cd831bd7
TS
358 return 0;
359}
7a5ca864 360
b90fb4b8
PB
361#ifdef __linux__
362int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize)
cd831bd7 363{
3e05c785
CL
364 TRACE("Setting NBD socket");
365
fc19f8a0 366 if (ioctl(fd, NBD_SET_SOCK, csock) < 0) {
3e05c785
CL
367 int serrno = errno;
368 LOG("Failed to set NBD socket");
369 errno = serrno;
370 return -1;
371 }
372
b2e3d87f 373 TRACE("Setting block size to %lu", (unsigned long)blocksize);
7a5ca864 374
fc19f8a0 375 if (ioctl(fd, NBD_SET_BLKSIZE, blocksize) < 0) {
b2e3d87f
NT
376 int serrno = errno;
377 LOG("Failed setting NBD block size");
378 errno = serrno;
379 return -1;
380 }
7a5ca864 381
0bfcd599 382 TRACE("Setting size to %zd block(s)", (size_t)(size / blocksize));
7a5ca864 383
fc19f8a0 384 if (ioctl(fd, NBD_SET_SIZE_BLOCKS, size / blocksize) < 0) {
b2e3d87f
NT
385 int serrno = errno;
386 LOG("Failed setting size (in blocks)");
387 errno = serrno;
388 return -1;
389 }
7a5ca864 390
b90fb4b8
PB
391 if (flags & NBD_FLAG_READ_ONLY) {
392 int read_only = 1;
393 TRACE("Setting readonly attribute");
394
395 if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
396 int serrno = errno;
397 LOG("Failed setting read-only attribute");
398 errno = serrno;
399 return -1;
400 }
401 }
402
973b3d0a
PB
403 if (ioctl(fd, NBD_SET_FLAGS, flags) < 0
404 && errno != ENOTTY) {
405 int serrno = errno;
406 LOG("Failed setting flags");
407 errno = serrno;
408 return -1;
409 }
410
b2e3d87f 411 TRACE("Negotiation ended");
7a5ca864 412
b2e3d87f 413 return 0;
7a5ca864
FB
414}
415
416int nbd_disconnect(int fd)
417{
b2e3d87f
NT
418 ioctl(fd, NBD_CLEAR_QUE);
419 ioctl(fd, NBD_DISCONNECT);
420 ioctl(fd, NBD_CLEAR_SOCK);
421 return 0;
7a5ca864
FB
422}
423
0a4eb864 424int nbd_client(int fd)
7a5ca864 425{
b2e3d87f
NT
426 int ret;
427 int serrno;
7a5ca864 428
b2e3d87f 429 TRACE("Doing NBD loop");
7a5ca864 430
b2e3d87f 431 ret = ioctl(fd, NBD_DO_IT);
fc19f8a0 432 if (ret < 0 && errno == EPIPE) {
74624688
PB
433 /* NBD_DO_IT normally returns EPIPE when someone has disconnected
434 * the socket via NBD_DISCONNECT. We do not want to return 1 in
435 * that case.
436 */
437 ret = 0;
438 }
b2e3d87f 439 serrno = errno;
7a5ca864 440
b2e3d87f 441 TRACE("NBD loop returned %d: %s", ret, strerror(serrno));
7a5ca864 442
b2e3d87f
NT
443 TRACE("Clearing NBD queue");
444 ioctl(fd, NBD_CLEAR_QUE);
7a5ca864 445
b2e3d87f
NT
446 TRACE("Clearing NBD socket");
447 ioctl(fd, NBD_CLEAR_SOCK);
7a5ca864 448
b2e3d87f
NT
449 errno = serrno;
450 return ret;
7a5ca864 451}
03ff3ca3 452#else
8e72506e 453int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize)
03ff3ca3
AL
454{
455 errno = ENOTSUP;
456 return -1;
457}
458
459int nbd_disconnect(int fd)
460{
461 errno = ENOTSUP;
462 return -1;
463}
464
0a4eb864 465int nbd_client(int fd)
03ff3ca3
AL
466{
467 errno = ENOTSUP;
468 return -1;
469}
470#endif
7a5ca864 471
94e7340b 472ssize_t nbd_send_request(int csock, struct nbd_request *request)
7a5ca864 473{
b2e3d87f
NT
474 uint8_t buf[4 + 4 + 8 + 8 + 4];
475
476 cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC);
477 cpu_to_be32w((uint32_t*)(buf + 4), request->type);
478 cpu_to_be64w((uint64_t*)(buf + 8), request->handle);
479 cpu_to_be64w((uint64_t*)(buf + 16), request->from);
480 cpu_to_be32w((uint32_t*)(buf + 24), request->len);
75818250 481
b2e3d87f
NT
482 TRACE("Sending request to client: "
483 "{ .from = %" PRIu64", .len = %u, .handle = %" PRIu64", .type=%i}",
484 request->from, request->len, request->handle, request->type);
485
486 if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
487 LOG("writing to socket failed");
488 errno = EINVAL;
489 return -1;
490 }
491 return 0;
492}
75818250 493
94e7340b 494static ssize_t nbd_receive_request(int csock, struct nbd_request *request)
75818250 495{
b2e3d87f
NT
496 uint8_t buf[4 + 4 + 8 + 8 + 4];
497 uint32_t magic;
498
499 if (read_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
500 LOG("read failed");
501 errno = EINVAL;
502 return -1;
503 }
504
505 /* Request
506 [ 0 .. 3] magic (NBD_REQUEST_MAGIC)
507 [ 4 .. 7] type (0 == READ, 1 == WRITE)
508 [ 8 .. 15] handle
509 [16 .. 23] from
510 [24 .. 27] len
511 */
512
513 magic = be32_to_cpup((uint32_t*)buf);
514 request->type = be32_to_cpup((uint32_t*)(buf + 4));
515 request->handle = be64_to_cpup((uint64_t*)(buf + 8));
516 request->from = be64_to_cpup((uint64_t*)(buf + 16));
517 request->len = be32_to_cpup((uint32_t*)(buf + 24));
518
519 TRACE("Got request: "
520 "{ magic = 0x%x, .type = %d, from = %" PRIu64" , len = %u }",
521 magic, request->type, request->from, request->len);
522
523 if (magic != NBD_REQUEST_MAGIC) {
524 LOG("invalid magic (got 0x%x)", magic);
525 errno = EINVAL;
526 return -1;
527 }
528 return 0;
75818250
TS
529}
530
94e7340b 531ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply)
75818250 532{
b2e3d87f
NT
533 uint8_t buf[NBD_REPLY_SIZE];
534 uint32_t magic;
535
b2e3d87f
NT
536 if (read_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
537 LOG("read failed");
538 errno = EINVAL;
539 return -1;
540 }
541
542 /* Reply
543 [ 0 .. 3] magic (NBD_REPLY_MAGIC)
544 [ 4 .. 7] error (0 == no error)
545 [ 7 .. 15] handle
546 */
547
548 magic = be32_to_cpup((uint32_t*)buf);
549 reply->error = be32_to_cpup((uint32_t*)(buf + 4));
550 reply->handle = be64_to_cpup((uint64_t*)(buf + 8));
551
552 TRACE("Got reply: "
553 "{ magic = 0x%x, .error = %d, handle = %" PRIu64" }",
554 magic, reply->error, reply->handle);
555
556 if (magic != NBD_REPLY_MAGIC) {
557 LOG("invalid magic (got 0x%x)", magic);
558 errno = EINVAL;
559 return -1;
560 }
561 return 0;
75818250
TS
562}
563
94e7340b 564static ssize_t nbd_send_reply(int csock, struct nbd_reply *reply)
75818250 565{
b2e3d87f
NT
566 uint8_t buf[4 + 4 + 8];
567
568 /* Reply
569 [ 0 .. 3] magic (NBD_REPLY_MAGIC)
570 [ 4 .. 7] error (0 == no error)
571 [ 7 .. 15] handle
572 */
573 cpu_to_be32w((uint32_t*)buf, NBD_REPLY_MAGIC);
574 cpu_to_be32w((uint32_t*)(buf + 4), reply->error);
575 cpu_to_be64w((uint64_t*)(buf + 8), reply->handle);
576
577 TRACE("Sending response to client");
578
579 if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
580 LOG("writing to socket failed");
581 errno = EINVAL;
582 return -1;
583 }
584 return 0;
75818250 585}
7a5ca864 586
41996e38
PB
587#define MAX_NBD_REQUESTS 16
588
d9a73806
PB
589typedef struct NBDRequest NBDRequest;
590
591struct NBDRequest {
592 QSIMPLEQ_ENTRY(NBDRequest) entry;
72deddc5 593 NBDClient *client;
d9a73806
PB
594 uint8_t *data;
595};
596
af49bbbe
PB
597struct NBDExport {
598 BlockDriverState *bs;
599 off_t dev_offset;
600 off_t size;
af49bbbe 601 uint32_t nbdflags;
d9a73806 602 QSIMPLEQ_HEAD(, NBDRequest) requests;
af49bbbe
PB
603};
604
1743b515
PB
605struct NBDClient {
606 int refcount;
607 void (*close)(NBDClient *client);
608
609 NBDExport *exp;
610 int sock;
262db388
PB
611
612 Coroutine *recv_coroutine;
613
614 CoMutex send_lock;
615 Coroutine *send_coroutine;
41996e38
PB
616
617 int nb_requests;
1743b515
PB
618};
619
620static void nbd_client_get(NBDClient *client)
621{
622 client->refcount++;
623}
624
625static void nbd_client_put(NBDClient *client)
626{
627 if (--client->refcount == 0) {
628 g_free(client);
629 }
630}
631
632static void nbd_client_close(NBDClient *client)
633{
634 qemu_set_fd_handler2(client->sock, NULL, NULL, NULL, NULL);
635 close(client->sock);
636 client->sock = -1;
637 if (client->close) {
638 client->close(client);
639 }
640 nbd_client_put(client);
641}
642
72deddc5 643static NBDRequest *nbd_request_get(NBDClient *client)
d9a73806
PB
644{
645 NBDRequest *req;
72deddc5
PB
646 NBDExport *exp = client->exp;
647
41996e38
PB
648 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
649 client->nb_requests++;
650
d9a73806
PB
651 if (QSIMPLEQ_EMPTY(&exp->requests)) {
652 req = g_malloc0(sizeof(NBDRequest));
653 req->data = qemu_blockalign(exp->bs, NBD_BUFFER_SIZE);
654 } else {
655 req = QSIMPLEQ_FIRST(&exp->requests);
656 QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
657 }
72deddc5
PB
658 nbd_client_get(client);
659 req->client = client;
d9a73806
PB
660 return req;
661}
662
72deddc5 663static void nbd_request_put(NBDRequest *req)
d9a73806 664{
72deddc5
PB
665 NBDClient *client = req->client;
666 QSIMPLEQ_INSERT_HEAD(&client->exp->requests, req, entry);
41996e38
PB
667 if (client->nb_requests-- == MAX_NBD_REQUESTS) {
668 qemu_notify_event();
669 }
72deddc5 670 nbd_client_put(client);
d9a73806
PB
671}
672
af49bbbe
PB
673NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset,
674 off_t size, uint32_t nbdflags)
675{
676 NBDExport *exp = g_malloc0(sizeof(NBDExport));
d9a73806 677 QSIMPLEQ_INIT(&exp->requests);
af49bbbe
PB
678 exp->bs = bs;
679 exp->dev_offset = dev_offset;
680 exp->nbdflags = nbdflags;
681 exp->size = size == -1 ? exp->bs->total_sectors * 512 : size;
af49bbbe
PB
682 return exp;
683}
684
685void nbd_export_close(NBDExport *exp)
686{
d9a73806
PB
687 while (!QSIMPLEQ_EMPTY(&exp->requests)) {
688 NBDRequest *first = QSIMPLEQ_FIRST(&exp->requests);
689 QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
690 qemu_vfree(first->data);
691 g_free(first);
692 }
693
af49bbbe
PB
694 bdrv_close(exp->bs);
695 g_free(exp);
696}
697
41996e38 698static int nbd_can_read(void *opaque);
262db388
PB
699static void nbd_read(void *opaque);
700static void nbd_restart_write(void *opaque);
701
94e7340b
PB
702static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
703 int len)
22045592 704{
72deddc5
PB
705 NBDClient *client = req->client;
706 int csock = client->sock;
94e7340b 707 ssize_t rc, ret;
22045592 708
262db388 709 qemu_co_mutex_lock(&client->send_lock);
41996e38
PB
710 qemu_set_fd_handler2(csock, nbd_can_read, nbd_read,
711 nbd_restart_write, client);
262db388
PB
712 client->send_coroutine = qemu_coroutine_self();
713
22045592
PB
714 if (!len) {
715 rc = nbd_send_reply(csock, reply);
fc19f8a0 716 if (rc < 0) {
22045592
PB
717 rc = -errno;
718 }
719 } else {
720 socket_set_cork(csock, 1);
721 rc = nbd_send_reply(csock, reply);
fc19f8a0 722 if (rc >= 0) {
262db388 723 ret = qemu_co_send(csock, req->data, len);
22045592
PB
724 if (ret != len) {
725 errno = EIO;
726 rc = -1;
727 }
728 }
fc19f8a0 729 if (rc < 0) {
22045592
PB
730 rc = -errno;
731 }
732 socket_set_cork(csock, 0);
733 }
262db388
PB
734
735 client->send_coroutine = NULL;
41996e38 736 qemu_set_fd_handler2(csock, nbd_can_read, nbd_read, NULL, client);
262db388 737 qemu_co_mutex_unlock(&client->send_lock);
22045592
PB
738 return rc;
739}
740
94e7340b 741static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *request)
a030b347 742{
72deddc5
PB
743 NBDClient *client = req->client;
744 int csock = client->sock;
94e7340b 745 ssize_t rc;
a030b347 746
262db388 747 client->recv_coroutine = qemu_coroutine_self();
fc19f8a0 748 if (nbd_receive_request(csock, request) < 0) {
a030b347
PB
749 rc = -EIO;
750 goto out;
751 }
752
753 if (request->len > NBD_BUFFER_SIZE) {
754 LOG("len (%u) is larger than max len (%u)",
755 request->len, NBD_BUFFER_SIZE);
756 rc = -EINVAL;
757 goto out;
758 }
759
760 if ((request->from + request->len) < request->from) {
761 LOG("integer overflow detected! "
762 "you're probably being attacked");
763 rc = -EINVAL;
764 goto out;
765 }
766
767 TRACE("Decoding type");
768
769 if ((request->type & NBD_CMD_MASK_COMMAND) == NBD_CMD_WRITE) {
770 TRACE("Reading %u byte(s)", request->len);
771
262db388 772 if (qemu_co_recv(csock, req->data, request->len) != request->len) {
a030b347
PB
773 LOG("reading from socket failed");
774 rc = -EIO;
775 goto out;
776 }
777 }
778 rc = 0;
779
780out:
262db388 781 client->recv_coroutine = NULL;
a030b347
PB
782 return rc;
783}
784
262db388 785static void nbd_trip(void *opaque)
75818250 786{
262db388 787 NBDClient *client = opaque;
72deddc5 788 NBDRequest *req = nbd_request_get(client);
1743b515 789 NBDExport *exp = client->exp;
b2e3d87f
NT
790 struct nbd_request request;
791 struct nbd_reply reply;
94e7340b 792 ssize_t ret;
b2e3d87f
NT
793
794 TRACE("Reading request.");
795
262db388 796 ret = nbd_co_receive_request(req, &request);
a030b347 797 if (ret == -EIO) {
d9a73806 798 goto out;
a030b347 799 }
b2e3d87f 800
fae69416
PB
801 reply.handle = request.handle;
802 reply.error = 0;
803
a030b347
PB
804 if (ret < 0) {
805 reply.error = -ret;
806 goto error_reply;
b2e3d87f
NT
807 }
808
af49bbbe 809 if ((request.from + request.len) > exp->size) {
b2e3d87f
NT
810 LOG("From: %" PRIu64 ", Len: %u, Size: %" PRIu64
811 ", Offset: %" PRIu64 "\n",
af49bbbe 812 request.from, request.len,
0fee8f34 813 (uint64_t)exp->size, (uint64_t)exp->dev_offset);
b2e3d87f 814 LOG("requested operation past EOF--bad client?");
fae69416 815 goto invalid_request;
b2e3d87f
NT
816 }
817
2c7989a9 818 switch (request.type & NBD_CMD_MASK_COMMAND) {
b2e3d87f
NT
819 case NBD_CMD_READ:
820 TRACE("Request type is READ");
821
af49bbbe 822 ret = bdrv_read(exp->bs, (request.from + exp->dev_offset) / 512,
d9a73806 823 req->data, request.len / 512);
adcf6302 824 if (ret < 0) {
b2e3d87f 825 LOG("reading from file failed");
adcf6302 826 reply.error = -ret;
fae69416 827 goto error_reply;
b2e3d87f 828 }
b2e3d87f
NT
829
830 TRACE("Read %u byte(s)", request.len);
262db388 831 if (nbd_co_send_reply(req, &reply, request.len) < 0)
d9a73806 832 goto out;
b2e3d87f
NT
833 break;
834 case NBD_CMD_WRITE:
835 TRACE("Request type is WRITE");
836
af49bbbe 837 if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
b2e3d87f 838 TRACE("Server is read-only, return error");
fae69416
PB
839 reply.error = EROFS;
840 goto error_reply;
841 }
842
843 TRACE("Writing to device");
844
af49bbbe 845 ret = bdrv_write(exp->bs, (request.from + exp->dev_offset) / 512,
d9a73806 846 req->data, request.len / 512);
fae69416
PB
847 if (ret < 0) {
848 LOG("writing to file failed");
849 reply.error = -ret;
850 goto error_reply;
851 }
b2e3d87f 852
fae69416 853 if (request.type & NBD_CMD_FLAG_FUA) {
262db388 854 ret = bdrv_co_flush(exp->bs);
adcf6302 855 if (ret < 0) {
fae69416 856 LOG("flush failed");
adcf6302 857 reply.error = -ret;
fae69416 858 goto error_reply;
2c7989a9 859 }
b2e3d87f
NT
860 }
861
fc19f8a0 862 if (nbd_co_send_reply(req, &reply, 0) < 0) {
d9a73806 863 goto out;
fc19f8a0 864 }
b2e3d87f
NT
865 break;
866 case NBD_CMD_DISC:
867 TRACE("Request type is DISCONNECT");
868 errno = 0;
262db388 869 goto out;
1486d04a
PB
870 case NBD_CMD_FLUSH:
871 TRACE("Request type is FLUSH");
872
262db388 873 ret = bdrv_co_flush(exp->bs);
1486d04a
PB
874 if (ret < 0) {
875 LOG("flush failed");
876 reply.error = -ret;
877 }
fc19f8a0 878 if (nbd_co_send_reply(req, &reply, 0) < 0) {
d9a73806 879 goto out;
fc19f8a0 880 }
7a706633
PB
881 break;
882 case NBD_CMD_TRIM:
883 TRACE("Request type is TRIM");
262db388
PB
884 ret = bdrv_co_discard(exp->bs, (request.from + exp->dev_offset) / 512,
885 request.len / 512);
7a706633
PB
886 if (ret < 0) {
887 LOG("discard failed");
888 reply.error = -ret;
889 }
fc19f8a0 890 if (nbd_co_send_reply(req, &reply, 0) < 0) {
d9a73806 891 goto out;
fc19f8a0 892 }
1486d04a 893 break;
b2e3d87f
NT
894 default:
895 LOG("invalid request type (%u) received", request.type);
fae69416
PB
896 invalid_request:
897 reply.error = -EINVAL;
898 error_reply:
fc19f8a0 899 if (nbd_co_send_reply(req, &reply, 0) < 0) {
d9a73806 900 goto out;
fc19f8a0 901 }
fae69416 902 break;
b2e3d87f
NT
903 }
904
905 TRACE("Request/Reply complete");
906
262db388
PB
907 nbd_request_put(req);
908 return;
909
d9a73806 910out:
72deddc5 911 nbd_request_put(req);
262db388 912 nbd_client_close(client);
7a5ca864 913}
af49bbbe 914
41996e38
PB
915static int nbd_can_read(void *opaque)
916{
917 NBDClient *client = opaque;
918
919 return client->recv_coroutine || client->nb_requests < MAX_NBD_REQUESTS;
920}
921
1743b515
PB
922static void nbd_read(void *opaque)
923{
924 NBDClient *client = opaque;
925
262db388
PB
926 if (client->recv_coroutine) {
927 qemu_coroutine_enter(client->recv_coroutine, NULL);
928 } else {
929 qemu_coroutine_enter(qemu_coroutine_create(nbd_trip), client);
1743b515 930 }
1743b515
PB
931}
932
262db388
PB
933static void nbd_restart_write(void *opaque)
934{
935 NBDClient *client = opaque;
936
937 qemu_coroutine_enter(client->send_coroutine, NULL);
938}
939
1743b515
PB
940NBDClient *nbd_client_new(NBDExport *exp, int csock,
941 void (*close)(NBDClient *))
af49bbbe 942{
1743b515 943 NBDClient *client;
fc19f8a0 944 if (nbd_send_negotiate(csock, exp->size, exp->nbdflags) < 0) {
1743b515
PB
945 return NULL;
946 }
947 client = g_malloc0(sizeof(NBDClient));
948 client->refcount = 1;
949 client->exp = exp;
950 client->sock = csock;
951 client->close = close;
262db388 952 qemu_co_mutex_init(&client->send_lock);
41996e38 953 qemu_set_fd_handler2(csock, nbd_can_read, nbd_read, NULL, client);
1743b515 954 return client;
af49bbbe 955}