]> git.proxmox.com Git - ceph.git/blame - ceph/src/common/compat.cc
update ceph source to reef 18.2.0
[ceph.git] / ceph / src / common / compat.cc
CommitLineData
28e407b8
AA
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
91327a77
AA
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2011 New Dream Network
7 * Copyright (C) 2018 Red Hat, Inc.
8 *
9 * This is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License version 2.1, as published by the Free Software
12 * Foundation. See file COPYING.
13 *
14 */
28e407b8 15
9f95a23c
TL
16#include <cstdio>
17
91327a77 18#include <errno.h>
28e407b8
AA
19#include <fcntl.h>
20#include <stdint.h>
f67539c2 21#include <stdio.h>
20effc67
TL
22#include "acconfig.h"
23#ifdef HAVE_MEMSET_S
24# define __STDC_WANT_LIB_EXT1__ 1
25#endif
28e407b8 26#include <string.h>
f67539c2
TL
27#include <thread>
28#ifndef _WIN32
28e407b8 29#include <sys/mount.h>
f67539c2
TL
30#else
31#include <stdlib.h>
32#endif
91327a77
AA
33#include <sys/param.h>
34#include <sys/socket.h>
35#include <sys/stat.h>
36#include <sys/types.h>
37#include <unistd.h>
28e407b8
AA
38#if defined(__linux__)
39#include <sys/vfs.h>
40#endif
41
91327a77
AA
42#include "include/compat.h"
43#include "include/sock_compat.h"
28e407b8
AA
44#include "common/safe_io.h"
45
46// The type-value for a ZFS FS in fstatfs.
47#define FS_ZFS_TYPE 0xde
48
49// On FreeBSD, ZFS fallocate always fails since it is considered impossible to
50// reserve space on a COW filesystem. posix_fallocate() returns EINVAL
51// Linux in this case already emulates the reservation in glibc
52// In which case it is allocated manually, and still that is not a real guarantee
53// that a full buffer is allocated on disk, since it could be compressed.
54// To prevent this the written buffer needs to be loaded with random data.
55int manual_fallocate(int fd, off_t offset, off_t len) {
56 int r = lseek(fd, offset, SEEK_SET);
57 if (r == -1)
58 return errno;
59 char data[1024*128];
60 // TODO: compressing filesystems would require random data
92f5a8d4 61 // FIPS zeroization audit 20191115: this memset is not security related.
28e407b8
AA
62 memset(data, 0x42, sizeof(data));
63 for (off_t off = 0; off < len; off += sizeof(data)) {
11fdf7f2 64 if (off + static_cast<off_t>(sizeof(data)) > len)
28e407b8
AA
65 r = safe_write(fd, data, len - off);
66 else
67 r = safe_write(fd, data, sizeof(data));
68 if (r == -1) {
69 return errno;
70 }
71 }
72 return 0;
73}
74
75int on_zfs(int basedir_fd) {
f67539c2 76 #ifndef _WIN32
28e407b8
AA
77 struct statfs basefs;
78 (void)fstatfs(basedir_fd, &basefs);
79 return (basefs.f_type == FS_ZFS_TYPE);
f67539c2
TL
80 #else
81 return 0;
82 #endif
28e407b8
AA
83}
84
85int ceph_posix_fallocate(int fd, off_t offset, off_t len) {
86 // Return 0 if oke, otherwise errno > 0
87
88#ifdef HAVE_POSIX_FALLOCATE
89 if (on_zfs(fd)) {
90 return manual_fallocate(fd, offset, len);
91 } else {
92 return posix_fallocate(fd, offset, len);
93 }
94#elif defined(__APPLE__)
95 fstore_t store;
96 store.fst_flags = F_ALLOCATECONTIG;
97 store.fst_posmode = F_PEOFPOSMODE;
98 store.fst_offset = offset;
99 store.fst_length = len;
100
101 int ret = fcntl(fd, F_PREALLOCATE, &store);
102 if (ret == -1) {
103 ret = errno;
104 }
105 return ret;
106#else
107 return manual_fallocate(fd, offset, len);
108#endif
109}
110
9f95a23c 111int pipe_cloexec(int pipefd[2], int flags)
91327a77
AA
112{
113#if defined(HAVE_PIPE2)
9f95a23c 114 return pipe2(pipefd, O_CLOEXEC | flags);
91327a77
AA
115#else
116 if (pipe(pipefd) == -1)
117 return -1;
118
f67539c2 119 #ifndef _WIN32
91327a77
AA
120 /*
121 * The old-fashioned, race-condition prone way that we have to fall
122 * back on if pipe2 does not exist.
123 */
124 if (fcntl(pipefd[0], F_SETFD, FD_CLOEXEC) < 0) {
125 goto fail;
126 }
127
128 if (fcntl(pipefd[1], F_SETFD, FD_CLOEXEC) < 0) {
129 goto fail;
130 }
f67539c2 131 #endif
91327a77
AA
132
133 return 0;
134fail:
135 int save_errno = errno;
136 VOID_TEMP_FAILURE_RETRY(close(pipefd[0]));
137 VOID_TEMP_FAILURE_RETRY(close(pipefd[1]));
138 return (errno = save_errno, -1);
139#endif
140}
141
142
143int socket_cloexec(int domain, int type, int protocol)
144{
145#ifdef SOCK_CLOEXEC
146 return socket(domain, type|SOCK_CLOEXEC, protocol);
147#else
148 int fd = socket(domain, type, protocol);
149 if (fd == -1)
150 return -1;
151
f67539c2 152 #ifndef _WIN32
91327a77
AA
153 if (fcntl(fd, F_SETFD, FD_CLOEXEC) < 0)
154 goto fail;
f67539c2 155 #endif
91327a77
AA
156
157 return fd;
158fail:
159 int save_errno = errno;
160 VOID_TEMP_FAILURE_RETRY(close(fd));
161 return (errno = save_errno, -1);
162#endif
163}
164
165int socketpair_cloexec(int domain, int type, int protocol, int sv[2])
166{
167#ifdef SOCK_CLOEXEC
168 return socketpair(domain, type|SOCK_CLOEXEC, protocol, sv);
f67539c2
TL
169#elif _WIN32
170 /* TODO */
171 return -ENOTSUP;
91327a77
AA
172#else
173 int rc = socketpair(domain, type, protocol, sv);
174 if (rc == -1)
175 return -1;
176
f67539c2 177 #ifndef _WIN32
91327a77
AA
178 if (fcntl(sv[0], F_SETFD, FD_CLOEXEC) < 0)
179 goto fail;
180
181 if (fcntl(sv[1], F_SETFD, FD_CLOEXEC) < 0)
182 goto fail;
f67539c2 183 #endif
91327a77
AA
184
185 return 0;
186fail:
187 int save_errno = errno;
188 VOID_TEMP_FAILURE_RETRY(close(sv[0]));
189 VOID_TEMP_FAILURE_RETRY(close(sv[1]));
190 return (errno = save_errno, -1);
191#endif
192}
193
194int accept_cloexec(int sockfd, struct sockaddr* addr, socklen_t* addrlen)
195{
196#ifdef HAVE_ACCEPT4
197 return accept4(sockfd, addr, addrlen, SOCK_CLOEXEC);
198#else
199 int fd = accept(sockfd, addr, addrlen);
200 if (fd == -1)
201 return -1;
202
f67539c2 203 #ifndef _WIN32
91327a77
AA
204 if (fcntl(fd, F_SETFD, FD_CLOEXEC) < 0)
205 goto fail;
f67539c2 206 #endif
91327a77
AA
207
208 return fd;
209fail:
210 int save_errno = errno;
211 VOID_TEMP_FAILURE_RETRY(close(fd));
212 return (errno = save_errno, -1);
213#endif
214}
11fdf7f2
TL
215
216#if defined(__FreeBSD__)
217int sched_setaffinity(pid_t pid, size_t cpusetsize,
218 cpu_set_t *mask)
219{
220 return 0;
221}
222#endif
223
9f95a23c
TL
224char *ceph_strerror_r(int errnum, char *buf, size_t buflen)
225{
f67539c2
TL
226#ifdef _WIN32
227 strerror_s(buf, buflen, errnum);
228 return buf;
229#elif defined(STRERROR_R_CHAR_P)
9f95a23c
TL
230 return strerror_r(errnum, buf, buflen);
231#else
232 if (strerror_r(errnum, buf, buflen)) {
233 snprintf(buf, buflen, "Unknown error %d", errnum);
234 }
235 return buf;
236#endif
237}
f67539c2
TL
238
239int ceph_memzero_s(void *dest, size_t destsz, size_t count) {
20effc67 240#ifdef HAVE_MEMSET_S
f67539c2
TL
241 return memset_s(dest, destsz, 0, count);
242#elif defined(_WIN32)
243 SecureZeroMemory(dest, count);
244#else
245 explicit_bzero(dest, count);
246#endif
247 return 0;
248}
249
250#ifdef _WIN32
251
252#include <iomanip>
253#include <ctime>
254
255// chown is not available on Windows. Plus, changing file owners is not
256// a common practice on Windows.
257int chown(const char *path, uid_t owner, gid_t group) {
258 return 0;
259}
260
261int fchown(int fd, uid_t owner, gid_t group) {
262 return 0;
263}
264
265int lchown(const char *path, uid_t owner, gid_t group) {
266 return 0;
267}
268
269int posix_memalign(void **memptr, size_t alignment, size_t size) {
270 *memptr = _aligned_malloc(size, alignment);
271 return *memptr ? 0 : errno;
272}
273
274char *strptime(const char *s, const char *format, struct tm *tm) {
275 std::istringstream input(s);
276 input.imbue(std::locale(setlocale(LC_ALL, nullptr)));
277 input >> std::get_time(tm, format);
278 if (input.fail()) {
279 return nullptr;
280 }
281 return (char*)(s + input.tellg());
282}
283
284int pipe(int pipefd[2]) {
285 // We'll use the same pipe size as Linux (64kb).
286 return _pipe(pipefd, 0x10000, O_NOINHERIT);
287}
288
289// lrand48 is not available on Windows. We'll generate a pseudo-random
290// value in the 0 - 2^31 range by calling rand twice.
291long int lrand48(void) {
292 long int val;
293 val = (long int) rand();
294 val <<= 16;
295 val += (long int) rand();
296 return val;
297}
298
299int random() {
300 return rand();
301}
302
303int fsync(int fd) {
304 HANDLE handle = (HANDLE*)_get_osfhandle(fd);
305 if (handle == INVALID_HANDLE_VALUE)
306 return -1;
307 if (!FlushFileBuffers(handle))
308 return -1;
309 return 0;
310}
311
312ssize_t pwrite(int fd, const void *buf, size_t count, off_t offset) {
313 DWORD bytes_written = 0;
314
315 HANDLE handle = (HANDLE*)_get_osfhandle(fd);
316 if (handle == INVALID_HANDLE_VALUE)
317 return -1;
318
319 OVERLAPPED overlapped = { 0 };
320 ULARGE_INTEGER offsetUnion;
321 offsetUnion.QuadPart = offset;
322
323 overlapped.Offset = offsetUnion.LowPart;
324 overlapped.OffsetHigh = offsetUnion.HighPart;
325
326 if (!WriteFile(handle, buf, count, &bytes_written, &overlapped))
327 // we may consider mapping error codes, although that may
328 // not be exhaustive.
329 return -1;
330
331 return bytes_written;
332}
333
334ssize_t pread(int fd, void *buf, size_t count, off_t offset) {
335 DWORD bytes_read = 0;
336
337 HANDLE handle = (HANDLE*)_get_osfhandle(fd);
338 if (handle == INVALID_HANDLE_VALUE)
339 return -1;
340
341 OVERLAPPED overlapped = { 0 };
342 ULARGE_INTEGER offsetUnion;
343 offsetUnion.QuadPart = offset;
344
345 overlapped.Offset = offsetUnion.LowPart;
346 overlapped.OffsetHigh = offsetUnion.HighPart;
347
348 if (!ReadFile(handle, buf, count, &bytes_read, &overlapped)) {
349 if (GetLastError() != ERROR_HANDLE_EOF)
350 return -1;
351 }
352
353 return bytes_read;
354}
355
356ssize_t preadv(int fd, const struct iovec *iov, int iov_cnt) {
357 ssize_t read = 0;
358
359 for (int i = 0; i < iov_cnt; i++) {
360 int r = ::read(fd, iov[i].iov_base, iov[i].iov_len);
361 if (r < 0)
362 return r;
363 read += r;
364 if (r < iov[i].iov_len)
365 break;
366 }
367
368 return read;
369}
370
371ssize_t writev(int fd, const struct iovec *iov, int iov_cnt) {
372 ssize_t written = 0;
373
374 for (int i = 0; i < iov_cnt; i++) {
375 int r = ::write(fd, iov[i].iov_base, iov[i].iov_len);
376 if (r < 0)
377 return r;
378 written += r;
379 if (r < iov[i].iov_len)
380 break;
381 }
382
383 return written;
384}
385
386int &alloc_tls() {
387 static __thread int tlsvar;
388 tlsvar++;
389 return tlsvar;
390}
391
392void apply_tls_workaround() {
393 // Workaround for the following Mingw bugs:
394 // https://sourceforge.net/p/mingw-w64/bugs/727/
395 // https://sourceforge.net/p/mingw-w64/bugs/527/
396 // https://sourceforge.net/p/mingw-w64/bugs/445/
397 // https://gcc.gnu.org/bugzilla/attachment.cgi?id=41382
398 pthread_key_t key;
399 pthread_key_create(&key, nullptr);
400 // Use a TLS slot for emutls
401 alloc_tls();
402 // Free up a slot that can now be used for c++ destructors
403 pthread_key_delete(key);
404}
405
406CEPH_CONSTRUCTOR(ceph_windows_init) {
407 // This will run at startup time before invoking main().
408 WSADATA wsaData;
409 int error;
410
411 #ifdef __MINGW32__
412 apply_tls_workaround();
413 #endif
414
415 error = WSAStartup(MAKEWORD(2, 2), &wsaData);
416 if (error != 0) {
417 fprintf(stderr, "WSAStartup failed: %d", WSAGetLastError());
418 exit(error);
419 }
420}
421
422int _win_socketpair(int socks[2])
423{
424 union {
425 struct sockaddr_in inaddr;
426 struct sockaddr addr;
427 } a;
428 SOCKET listener;
429 int e;
430 socklen_t addrlen = sizeof(a.inaddr);
431 int reuse = 1;
432
433 if (socks == 0) {
434 WSASetLastError(WSAEINVAL);
435 return -1;
436 }
437
438 listener = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
439 if (listener == INVALID_SOCKET) {
440 return -1;
441 }
442
443 memset(&a, 0, sizeof(a));
444 a.inaddr.sin_family = AF_INET;
445 a.inaddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
446 a.inaddr.sin_port = 0;
447
448 socks[0] = socks[1] = -1;
449 SOCKET s[2] = { INVALID_SOCKET, INVALID_SOCKET };
450
451 do {
452 if (setsockopt(listener, SOL_SOCKET, SO_REUSEADDR,
453 (char*) &reuse, (socklen_t) sizeof(reuse)) == -1)
454 break;
455 if (bind(listener, &a.addr, sizeof(a.inaddr)) == SOCKET_ERROR)
456 break;
457 if (getsockname(listener, &a.addr, &addrlen) == SOCKET_ERROR)
458 break;
459 if (listen(listener, 1) == SOCKET_ERROR)
460 break;
461 s[0] = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
462 if (s[0] == INVALID_SOCKET)
463 break;
464 if (connect(s[0], &a.addr, sizeof(a.inaddr)) == SOCKET_ERROR)
465 break;
466 s[1] = accept(listener, NULL, NULL);
467 if (s[1] == INVALID_SOCKET)
468 break;
469
470 closesocket(listener);
471
472 // The Windows socket API is mostly compatible with the Berkeley
473 // API, with a few exceptions. The Windows socket functions use
474 // SOCKET instead of int. The issue is that on x64 systems,
475 // SOCKET uses 64b while int uses 32b. There's been much debate
476 // whether casting a Windows socket to an int is safe or not.
477 // Worth noting that Windows kernel objects use 32b. For now,
478 // we're just adding a check.
479 //
480 // Ideally, we should update ceph to use the right type but this
481 // can be quite difficult, especially considering that there are
482 // a significant number of functions that accept both sockets and
483 // file descriptors.
484 if (s[0] >> 32 || s[1] >> 32) {
485 WSASetLastError(WSAENAMETOOLONG);
486 break;
487 }
488
489 socks[0] = s[0];
490 socks[1] = s[1];
491
492 return 0;
493
494 } while (0);
495
496 e = WSAGetLastError();
497 closesocket(listener);
498 closesocket(s[0]);
499 closesocket(s[1]);
500 WSASetLastError(e);
501 return -1;
502}
503
504int win_socketpair(int socks[2]) {
505 int r = 0;
506 for (int i = 0; i < 15; i++) {
507 r = _win_socketpair(socks);
508 if (r && WSAGetLastError() == WSAEADDRINUSE) {
509 sleep(2);
510 continue;
511 }
512 else {
513 break;
514 }
515 }
516 return r;
517}
518
519unsigned get_page_size() {
520 SYSTEM_INFO system_info;
521 GetSystemInfo(&system_info);
522 return system_info.dwPageSize;
523}
524
525int setenv(const char *name, const char *value, int overwrite) {
526 if (!overwrite && getenv(name)) {
527 return 0;
528 }
529 return _putenv_s(name, value);
530}
531
532ssize_t get_self_exe_path(char* path, int buff_length) {
533 return GetModuleFileName(NULL, path, buff_length - 1);
534}
535
536int geteuid()
537{
538 return 0;
539}
540
541int getegid()
542{
543 return 0;
544}
545
546int getuid()
547{
548 return 0;
549}
550
551int getgid()
552{
553 return 0;
554}
555
556#else
557
558unsigned get_page_size() {
559 return sysconf(_SC_PAGESIZE);
560}
561
562ssize_t get_self_exe_path(char* path, int buff_length) {
563 return readlink("/proc/self/exe", path,
564 sizeof(buff_length) - 1);
565}
566
567#endif /* _WIN32 */