X-Git-Url: https://git.proxmox.com/?p=qemu.git;a=blobdiff_plain;f=qemu-nbd.c;h=c26c98ef1d97ae7b8a2b375572cdb2ba1e71213f;hp=d8d3e15a843acd7852a7b8b502ac37abe85d4a54;hb=30c367ed446b6ea53245589a5cf373578ac075d7;hpb=03c39eb558472ab3b13a39e9159b2e1ffa6c3a43 diff --git a/qemu-nbd.c b/qemu-nbd.c index d8d3e15a8..c26c98ef1 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -17,8 +17,9 @@ */ #include "qemu-common.h" -#include "block_int.h" -#include "nbd.h" +#include "block/block.h" +#include "block/nbd.h" +#include "qemu/main-loop.h" #include #include @@ -31,37 +32,61 @@ #include #include #include +#include -#define SOCKET_PATH "/var/lock/qemu-nbd-%s" - -#define NBD_BUFFER_SIZE (1024*1024) +#define SOCKET_PATH "/var/lock/qemu-nbd-%s" +#define QEMU_NBD_OPT_CACHE 1 +#define QEMU_NBD_OPT_AIO 2 +#define QEMU_NBD_OPT_DISCARD 3 +static NBDExport *exp; static int verbose; +static char *srcpath; +static char *sockpath; +static int persistent = 0; +static enum { RUNNING, TERMINATE, TERMINATING, TERMINATED } state; +static int shared = 1; +static int nb_fds; static void usage(const char *name) { - printf( + (printf) ( "Usage: %s [OPTIONS] FILE\n" "QEMU Disk Network Block Device Server\n" "\n" +" -h, --help display this help and exit\n" +" -V, --version output version information and exit\n" +"\n" +"Connection properties:\n" " -p, --port=PORT port to listen on (default `%d')\n" -" -o, --offset=OFFSET offset into the image\n" " -b, --bind=IFACE interface to bind to (default `0.0.0.0')\n" " -k, --socket=PATH path to the unix socket\n" " (default '"SOCKET_PATH"')\n" -" -r, --read-only export read-only\n" -" -P, --partition=NUM only expose partition NUM\n" -" -s, --snapshot use snapshot file\n" -" -n, --nocache disable host cache\n" -" -c, --connect=DEV connect FILE to the local NBD device DEV\n" -" -d, --disconnect disconnect the specified device\n" " -e, --shared=NUM device can be shared by NUM clients (default '1')\n" " -t, --persistent don't exit on the last connection\n" " -v, --verbose display extra debugging information\n" -" -h, --help display this help and exit\n" -" -V, --version output version information and exit\n" "\n" -"Report bugs to \n" +"Exposing part of the image:\n" +" -o, --offset=OFFSET offset into the image\n" +" -P, --partition=NUM only expose partition NUM\n" +"\n" +#ifdef __linux__ +"Kernel NBD client support:\n" +" -c, --connect=DEV connect FILE to the local NBD device DEV\n" +" -d, --disconnect disconnect the specified device\n" +"\n" +#endif +"\n" +"Block device options:\n" +" -r, --read-only export read-only\n" +" -s, --snapshot use snapshot file\n" +" -n, --nocache disable host cache\n" +" --cache=MODE set cache mode (none, writeback, ...)\n" +#ifdef CONFIG_LINUX_AIO +" --aio=MODE set AIO mode (native or threads)\n" +#endif +"\n" +"Report bugs to \n" , name, NBD_DEFAULT_PORT, "DEVICE"); } @@ -120,8 +145,7 @@ static int find_partition(BlockDriverState *bs, int partition, } if (data[510] != 0x55 || data[511] != 0xaa) { - errno = -EINVAL; - return -1; + return -EINVAL; } for (i = 0; i < 4; i++) { @@ -159,43 +183,139 @@ static int find_partition(BlockDriverState *bs, int partition, } } - errno = -ENOENT; - return -1; + return -ENOENT; } -static void show_parts(const char *device) +static void termsig_handler(int signum) { - if (fork() == 0) { - int nbd; + state = TERMINATE; + qemu_notify_event(); +} - /* linux just needs an open() to trigger - * the partition table update - * but remember to load the module with max_part != 0 : - * modprobe nbd max_part=63 - */ - nbd = open(device, O_RDWR); - if (nbd != -1) - close(nbd); - exit(0); +static void *show_parts(void *arg) +{ + char *device = arg; + int nbd; + + /* linux just needs an open() to trigger + * the partition table update + * but remember to load the module with max_part != 0 : + * modprobe nbd max_part=63 + */ + nbd = open(device, O_RDWR); + if (nbd >= 0) { + close(nbd); + } + return NULL; +} + +static void *nbd_client_thread(void *arg) +{ + char *device = arg; + off_t size; + size_t blocksize; + uint32_t nbdflags; + int fd, sock; + int ret; + pthread_t show_parts_thread; + + sock = unix_socket_outgoing(sockpath); + if (sock < 0) { + goto out; + } + + ret = nbd_receive_negotiate(sock, NULL, &nbdflags, + &size, &blocksize); + if (ret < 0) { + goto out; + } + + fd = open(device, O_RDWR); + if (fd < 0) { + /* Linux-only, we can use %m in printf. */ + fprintf(stderr, "Failed to open %s: %m", device); + goto out; + } + + ret = nbd_init(fd, sock, nbdflags, size, blocksize); + if (ret < 0) { + goto out; + } + + /* update partition table */ + pthread_create(&show_parts_thread, NULL, show_parts, device); + + if (verbose) { + fprintf(stderr, "NBD device %s is now connected to %s\n", + device, srcpath); + } else { + /* Close stderr so that the qemu-nbd process exits. */ + dup2(STDOUT_FILENO, STDERR_FILENO); + } + + ret = nbd_client(fd); + if (ret) { + goto out; + } + close(fd); + kill(getpid(), SIGTERM); + return (void *) EXIT_SUCCESS; + +out: + kill(getpid(), SIGTERM); + return (void *) EXIT_FAILURE; +} + +static int nbd_can_accept(void *opaque) +{ + return nb_fds < shared; +} + +static void nbd_export_closed(NBDExport *exp) +{ + assert(state == TERMINATING); + state = TERMINATED; +} + +static void nbd_client_closed(NBDClient *client) +{ + nb_fds--; + if (nb_fds == 0 && !persistent && state == RUNNING) { + state = TERMINATE; + } + qemu_notify_event(); + nbd_client_put(client); +} + +static void nbd_accept(void *opaque) +{ + int server_fd = (uintptr_t) opaque; + struct sockaddr_in addr; + socklen_t addr_len = sizeof(addr); + + int fd = accept(server_fd, (struct sockaddr *)&addr, &addr_len); + if (state >= TERMINATE) { + close(fd); + return; + } + + if (fd >= 0 && nbd_client_new(exp, fd, nbd_client_closed)) { + nb_fds++; } } int main(int argc, char **argv) { BlockDriverState *bs; + BlockDriver *drv; off_t dev_offset = 0; - off_t offset = 0; uint32_t nbdflags = 0; bool disconnect = false; const char *bindto = "0.0.0.0"; + char *device = NULL; int port = NBD_DEFAULT_PORT; - struct sockaddr_in addr; - socklen_t addr_len = sizeof(addr); off_t fd_size; - char *device = NULL; - char *socket = NULL; - char sockpath[128]; - const char *sopt = "hVb:o:p:rsnP:c:dvk:e:t"; + const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:t"; struct option lopt[] = { { "help", 0, NULL, 'h' }, { "version", 0, NULL, 'V' }, @@ -209,7 +329,13 @@ int main(int argc, char **argv) { "disconnect", 0, NULL, 'd' }, { "snapshot", 0, NULL, 's' }, { "nocache", 0, NULL, 'n' }, + { "cache", 1, NULL, QEMU_NBD_OPT_CACHE }, +#ifdef CONFIG_LINUX_AIO + { "aio", 1, NULL, QEMU_NBD_OPT_AIO }, +#endif + { "discard", 1, NULL, QEMU_NBD_OPT_DISCARD }, { "shared", 1, NULL, 'e' }, + { "format", 1, NULL, 'f' }, { "persistent", 0, NULL, 't' }, { "verbose", 0, NULL, 'v' }, { NULL, 0, NULL, 0 } @@ -221,15 +347,23 @@ int main(int argc, char **argv) int flags = BDRV_O_RDWR; int partition = -1; int ret; - int shared = 1; - uint8_t *data; - fd_set fds; - int *sharing_fds; int fd; - int i; - int nb_fds = 0; - int max_fd; - int persistent = 0; + bool seen_cache = false; + bool seen_discard = false; +#ifdef CONFIG_LINUX_AIO + bool seen_aio = false; +#endif + pthread_t client_thread; + const char *fmt = NULL; + Error *local_err = NULL; + + /* The client thread uses SIGTERM to interrupt the server. A signal + * handler ensures that "qemu-nbd -v -c" exits with a nice status code. + */ + struct sigaction sa_sigterm; + memset(&sa_sigterm, 0, sizeof(sa_sigterm)); + sa_sigterm.sa_handler = termsig_handler; + sigaction(SIGTERM, &sa_sigterm, NULL); while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) { switch (ch) { @@ -237,7 +371,40 @@ int main(int argc, char **argv) flags |= BDRV_O_SNAPSHOT; break; case 'n': - flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB; + optarg = (char *) "none"; + /* fallthrough */ + case QEMU_NBD_OPT_CACHE: + if (seen_cache) { + errx(EXIT_FAILURE, "-n and --cache can only be specified once"); + } + seen_cache = true; + if (bdrv_parse_cache_flags(optarg, &flags) == -1) { + errx(EXIT_FAILURE, "Invalid cache mode `%s'", optarg); + } + break; +#ifdef CONFIG_LINUX_AIO + case QEMU_NBD_OPT_AIO: + if (seen_aio) { + errx(EXIT_FAILURE, "--aio can only be specified once"); + } + seen_aio = true; + if (!strcmp(optarg, "native")) { + flags |= BDRV_O_NATIVE_AIO; + } else if (!strcmp(optarg, "threads")) { + /* this is the default */ + } else { + errx(EXIT_FAILURE, "invalid aio mode `%s'", optarg); + } + break; +#endif + case QEMU_NBD_OPT_DISCARD: + if (seen_discard) { + errx(EXIT_FAILURE, "--discard can only be specified once"); + } + seen_discard = true; + if (bdrv_parse_discard_flags(optarg, &flags) == -1) { + errx(EXIT_FAILURE, "Invalid discard mode `%s'", optarg); + } break; case 'b': bindto = optarg; @@ -273,8 +440,8 @@ int main(int argc, char **argv) errx(EXIT_FAILURE, "Invalid partition %d", partition); break; case 'k': - socket = optarg; - if (socket[0] != '/') + sockpath = optarg; + if (sockpath[0] != '/') errx(EXIT_FAILURE, "socket path must be absolute\n"); break; case 'd': @@ -292,6 +459,9 @@ int main(int argc, char **argv) errx(EXIT_FAILURE, "Shared device number must be greater than 0\n"); } break; + case 'f': + fmt = optarg; + break; case 't': persistent = 1; break; @@ -320,9 +490,9 @@ int main(int argc, char **argv) if (disconnect) { fd = open(argv[optind], O_RDWR); - if (fd == -1) + if (fd < 0) { err(EXIT_FAILURE, "Cannot open %s", argv[optind]); - + } nbd_disconnect(fd); close(fd); @@ -332,167 +502,150 @@ int main(int argc, char **argv) return 0; } - bdrv_init(); - - bs = bdrv_new("hda"); - - if ((ret = bdrv_open(bs, argv[optind], flags, NULL)) < 0) { - errno = -ret; - err(EXIT_FAILURE, "Failed to bdrv_open '%s'", argv[optind]); - } - - fd_size = bs->total_sectors * 512; - - if (partition != -1 && - find_partition(bs, partition, &dev_offset, &fd_size)) - err(EXIT_FAILURE, "Could not find partition %d", partition); - - if (device) { + if (device && !verbose) { + int stderr_fd[2]; pid_t pid; - int sock; + int ret; - /* want to fail before daemonizing */ - if (access(device, R_OK|W_OK) == -1) { - err(EXIT_FAILURE, "Could not access '%s'", device); + if (qemu_pipe(stderr_fd) < 0) { + err(EXIT_FAILURE, "Error setting up communication pipe"); } - if (!verbose) { - /* detach client and server */ - if (qemu_daemon(0, 0) == -1) { + /* Now daemonize, but keep a communication channel open to + * print errors and exit with the proper status code. + */ + pid = fork(); + if (pid == 0) { + close(stderr_fd[0]); + ret = qemu_daemon(1, 0); + + /* Temporarily redirect stderr to the parent's pipe... */ + dup2(stderr_fd[1], STDERR_FILENO); + if (ret < 0) { err(EXIT_FAILURE, "Failed to daemonize"); } - } - - if (socket == NULL) { - snprintf(sockpath, sizeof(sockpath), SOCKET_PATH, - basename(device)); - socket = sockpath; - } - pid = fork(); - if (pid < 0) - return 1; - if (pid != 0) { - off_t size; - size_t blocksize; - - ret = 0; - bdrv_close(bs); - - do { - sock = unix_socket_outgoing(socket); - if (sock == -1) { - if (errno != ENOENT && errno != ECONNREFUSED) { - ret = 1; - goto out; - } - sleep(1); /* wait children */ + /* ... close the descriptor we inherited and go on. */ + close(stderr_fd[1]); + } else { + bool errors = false; + char *buf; + + /* In the parent. Print error messages from the child until + * it closes the pipe. + */ + close(stderr_fd[1]); + buf = g_malloc(1024); + while ((ret = read(stderr_fd[0], buf, 1024)) > 0) { + errors = true; + ret = qemu_write_full(STDERR_FILENO, buf, ret); + if (ret < 0) { + exit(EXIT_FAILURE); } - } while (sock == -1); - - fd = open(device, O_RDWR); - if (fd == -1) { - ret = 1; - goto out; } - - ret = nbd_receive_negotiate(sock, NULL, &nbdflags, - &size, &blocksize); - if (ret == -1) { - ret = 1; - goto out; + if (ret < 0) { + err(EXIT_FAILURE, "Cannot read from daemon"); } - ret = nbd_init(fd, sock, nbdflags, size, blocksize); - if (ret == -1) { - ret = 1; - goto out; - } + /* Usually the daemon should not print any message. + * Exit with zero status in that case. + */ + exit(errors); + } + } - printf("NBD device %s is now connected to file %s\n", - device, argv[optind]); + if (device != NULL && sockpath == NULL) { + sockpath = g_malloc(128); + snprintf(sockpath, 128, SOCKET_PATH, basename(device)); + } - /* update partition table */ + qemu_init_main_loop(); + bdrv_init(); + atexit(bdrv_close_all); - show_parts(device); + if (fmt) { + drv = bdrv_find_format(fmt); + if (!drv) { + errx(EXIT_FAILURE, "Unknown file format '%s'", fmt); + } + } else { + drv = NULL; + } - ret = nbd_client(fd); - if (ret) { - ret = 1; - } - close(fd); - out: - kill(pid, SIGTERM); - unlink(socket); + bs = bdrv_new("hda"); + srcpath = argv[optind]; + ret = bdrv_open(bs, srcpath, NULL, flags, drv, &local_err); + if (ret < 0) { + errno = -ret; + err(EXIT_FAILURE, "Failed to bdrv_open '%s': %s", argv[optind], + error_get_pretty(local_err)); + } + + fd_size = bdrv_getlength(bs); - return ret; + if (partition != -1) { + ret = find_partition(bs, partition, &dev_offset, &fd_size); + if (ret < 0) { + errno = -ret; + err(EXIT_FAILURE, "Could not find partition %d", partition); } - /* children */ } - sharing_fds = g_malloc((shared + 1) * sizeof(int)); + exp = nbd_export_new(bs, dev_offset, fd_size, nbdflags, nbd_export_closed); - if (socket) { - sharing_fds[0] = unix_socket_incoming(socket); + if (sockpath) { + fd = unix_socket_incoming(sockpath); } else { - sharing_fds[0] = tcp_socket_incoming(bindto, port); + fd = tcp_socket_incoming(bindto, port); } - if (sharing_fds[0] == -1) + if (fd < 0) { return 1; - max_fd = sharing_fds[0]; - nb_fds++; + } - data = qemu_blockalign(bs, NBD_BUFFER_SIZE); - if (data == NULL) - errx(EXIT_FAILURE, "Cannot allocate data buffer"); + if (device) { + int ret; - do { + ret = pthread_create(&client_thread, NULL, nbd_client_thread, device); + if (ret != 0) { + errx(EXIT_FAILURE, "Failed to create client thread: %s", + strerror(ret)); + } + } else { + /* Shut up GCC warnings. */ + memset(&client_thread, 0, sizeof(client_thread)); + } - FD_ZERO(&fds); - for (i = 0; i < nb_fds; i++) - FD_SET(sharing_fds[i], &fds); + qemu_set_fd_handler2(fd, nbd_can_accept, nbd_accept, NULL, + (void *)(uintptr_t)fd); - ret = select(max_fd + 1, &fds, NULL, NULL, NULL); - if (ret == -1) - break; + /* now when the initialization is (almost) complete, chdir("/") + * to free any busy filesystems */ + if (chdir("/") < 0) { + err(EXIT_FAILURE, "Could not chdir to root directory"); + } - if (FD_ISSET(sharing_fds[0], &fds)) - ret--; - for (i = 1; i < nb_fds && ret; i++) { - if (FD_ISSET(sharing_fds[i], &fds)) { - if (nbd_trip(bs, sharing_fds[i], fd_size, dev_offset, - &offset, nbdflags, data, NBD_BUFFER_SIZE) != 0) { - close(sharing_fds[i]); - nb_fds--; - sharing_fds[i] = sharing_fds[nb_fds]; - i--; - } - ret--; - } - } - /* new connection ? */ - if (FD_ISSET(sharing_fds[0], &fds)) { - if (nb_fds < shared + 1) { - sharing_fds[nb_fds] = accept(sharing_fds[0], - (struct sockaddr *)&addr, - &addr_len); - if (sharing_fds[nb_fds] != -1 && - nbd_negotiate(sharing_fds[nb_fds], fd_size, nbdflags) != -1) { - if (sharing_fds[nb_fds] > max_fd) - max_fd = sharing_fds[nb_fds]; - nb_fds++; - } - } + state = RUNNING; + do { + main_loop_wait(false); + if (state == TERMINATE) { + state = TERMINATING; + nbd_export_close(exp); + nbd_export_put(exp); + exp = NULL; } - } while (persistent || nb_fds > 1); - qemu_vfree(data); + } while (state != TERMINATED); - close(sharing_fds[0]); bdrv_close(bs); - g_free(sharing_fds); - if (socket) - unlink(socket); + if (sockpath) { + unlink(sockpath); + } - return 0; + if (device) { + void *ret; + pthread_join(client_thread, &ret); + exit(ret != NULL); + } else { + exit(EXIT_SUCCESS); + } }