]> git.proxmox.com Git - ceph.git/blob - ceph/src/pmdk/src/common/file.c
import ceph 16.2.7
[ceph.git] / ceph / src / pmdk / src / common / file.c
1 // SPDX-License-Identifier: BSD-3-Clause
2 /* Copyright 2014-2020, Intel Corporation */
3
4 /*
5 * file.c -- file utilities
6 */
7
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <errno.h>
12 #include <fcntl.h>
13 #include <unistd.h>
14 #include <limits.h>
15 #include <sys/file.h>
16 #include <sys/mman.h>
17
18 #if !defined(_WIN32) && !defined(__FreeBSD__)
19 #include <sys/sysmacros.h>
20 #endif
21
22 #include "../libpmem2/config.h"
23 #include "../libpmem2/pmem2_utils.h"
24 #include "file.h"
25 #include "os.h"
26 #include "out.h"
27 #include "mmap.h"
28
29 #define DEVICE_DAX_ZERO_LEN (2 * MEGABYTE)
30
31 /*
32 * util_file_exists -- checks whether file exists
33 */
34 int
35 util_file_exists(const char *path)
36 {
37 LOG(3, "path \"%s\"", path);
38
39 if (os_access(path, F_OK) == 0)
40 return 1;
41
42 if (errno != ENOENT) {
43 ERR("!os_access \"%s\"", path);
44 return -1;
45 }
46
47 /*
48 * ENOENT means that some component of a pathname does not exists.
49 *
50 * XXX - we should also call os_access on parent directory and
51 * if this also results in ENOENT -1 should be returned.
52 *
53 * The problem is that we would need to use realpath, which fails
54 * if file does not exist.
55 */
56
57 return 0;
58 }
59
60 /*
61 * util_stat_get_type -- checks whether stat structure describes
62 * device dax or a normal file
63 */
64 enum file_type
65 util_stat_get_type(const os_stat_t *st)
66 {
67 enum pmem2_file_type type;
68
69 int ret = pmem2_get_type_from_stat(st, &type);
70 if (ret) {
71 errno = pmem2_err_to_errno(ret);
72 return OTHER_ERROR;
73 }
74
75 if (type == PMEM2_FTYPE_REG || type == PMEM2_FTYPE_DIR)
76 return TYPE_NORMAL;
77
78 if (type == PMEM2_FTYPE_DEVDAX)
79 return TYPE_DEVDAX;
80
81 ASSERTinfo(0, "unhandled file type in util_stat_get_type");
82 return OTHER_ERROR;
83 }
84
85 /*
86 * util_fd_get_type -- checks whether a file descriptor is associated
87 * with a device dax or a normal file
88 */
89 enum file_type
90 util_fd_get_type(int fd)
91 {
92 LOG(3, "fd %d", fd);
93
94 #ifdef _WIN32
95 return TYPE_NORMAL;
96 #else
97 os_stat_t st;
98
99 if (os_fstat(fd, &st) < 0) {
100 ERR("!fstat");
101 return OTHER_ERROR;
102 }
103
104 return util_stat_get_type(&st);
105 #endif
106 }
107
108 /*
109 * util_file_get_type -- checks whether the path points to a device dax,
110 * normal file or non-existent file
111 */
112 enum file_type
113 util_file_get_type(const char *path)
114 {
115 LOG(3, "path \"%s\"", path);
116
117 if (path == NULL) {
118 ERR("invalid (NULL) path");
119 errno = EINVAL;
120 return OTHER_ERROR;
121 }
122
123 int exists = util_file_exists(path);
124 if (exists < 0)
125 return OTHER_ERROR;
126
127 if (!exists)
128 return NOT_EXISTS;
129
130 #ifdef _WIN32
131 return TYPE_NORMAL;
132 #else
133 os_stat_t st;
134
135 if (os_stat(path, &st) < 0) {
136 ERR("!stat");
137 return OTHER_ERROR;
138 }
139
140 return util_stat_get_type(&st);
141 #endif
142 }
143
144 /*
145 * util_file_get_size -- returns size of a file
146 */
147 ssize_t
148 util_file_get_size(const char *path)
149 {
150 LOG(3, "path \"%s\"", path);
151
152 int fd = os_open(path, O_RDONLY);
153 if (fd < 0) {
154 ERR("!open");
155 return -1;
156 }
157
158 ssize_t size = util_fd_get_size(fd);
159 (void) close(fd);
160
161 return size;
162 }
163
164 /*
165 * util_fd_get_size -- returns size of a file behind a given file descriptor
166 */
167 ssize_t
168 util_fd_get_size(int fd)
169 {
170 LOG(3, "fd %d", fd);
171
172 struct pmem2_source *src;
173 size_t size;
174 int ret;
175
176 if ((ret = pmem2_source_from_fd(&src, fd)) != 0) {
177 errno = pmem2_err_to_errno(ret);
178 return -1;
179 }
180
181 ret = pmem2_source_size(src, &size);
182
183 pmem2_source_delete(&src);
184
185 if (ret) {
186 errno = pmem2_err_to_errno(ret);
187 return -1;
188 }
189
190 /* size is unsigned, this function returns signed */
191 if (size >= INT64_MAX) {
192 errno = ERANGE;
193 ERR(
194 "file size (%ld) too big to be represented in 64-bit signed integer",
195 size);
196 return -1;
197 }
198
199 LOG(4, "file length %zu", size);
200 return (ssize_t)size;
201 }
202
203 /*
204 * util_file_map_whole -- maps the entire file into memory
205 */
206 void *
207 util_file_map_whole(const char *path)
208 {
209 LOG(3, "path \"%s\"", path);
210
211 int fd;
212 int olderrno;
213 void *addr = NULL;
214 int flags = O_RDWR;
215 #ifdef _WIN32
216 flags |= O_BINARY;
217 #endif
218
219 if ((fd = os_open(path, flags)) < 0) {
220 ERR("!open \"%s\"", path);
221 return NULL;
222 }
223
224 ssize_t size = util_fd_get_size(fd);
225 if (size < 0) {
226 LOG(2, "cannot determine file length \"%s\"", path);
227 goto out;
228 }
229
230 addr = util_map(fd, 0, (size_t)size, MAP_SHARED, 0, 0, NULL);
231 if (addr == NULL) {
232 LOG(2, "failed to map entire file \"%s\"", path);
233 goto out;
234 }
235
236 out:
237 olderrno = errno;
238 (void) os_close(fd);
239 errno = olderrno;
240
241 return addr;
242 }
243
244 /*
245 * util_file_zero -- zeroes the specified region of the file
246 */
247 int
248 util_file_zero(const char *path, os_off_t off, size_t len)
249 {
250 LOG(3, "path \"%s\" off %ju len %zu", path, off, len);
251
252 int fd;
253 int olderrno;
254 int ret = 0;
255 int flags = O_RDWR;
256 #ifdef _WIN32
257 flags |= O_BINARY;
258 #endif
259
260 if ((fd = os_open(path, flags)) < 0) {
261 ERR("!open \"%s\"", path);
262 return -1;
263 }
264
265 ssize_t size = util_fd_get_size(fd);
266 if (size < 0) {
267 LOG(2, "cannot determine file length \"%s\"", path);
268 ret = -1;
269 goto out;
270 }
271
272 if (off > size) {
273 LOG(2, "offset beyond file length, %ju > %ju", off, size);
274 ret = -1;
275 goto out;
276 }
277
278 if ((size_t)off + len > (size_t)size) {
279 LOG(2, "requested size of write goes beyond the file length, "
280 "%zu > %zu", (size_t)off + len, size);
281 LOG(4, "adjusting len to %zu", size - off);
282 len = (size_t)(size - off);
283 }
284
285 void *addr = util_map(fd, 0, (size_t)size, MAP_SHARED, 0, 0, NULL);
286 if (addr == NULL) {
287 LOG(2, "failed to map entire file \"%s\"", path);
288 ret = -1;
289 goto out;
290 }
291
292 /* zero initialize the specified region */
293 memset((char *)addr + off, 0, len);
294
295 util_unmap(addr, (size_t)size);
296
297 out:
298 olderrno = errno;
299 (void) os_close(fd);
300 errno = olderrno;
301
302 return ret;
303 }
304
305 /*
306 * util_file_pwrite -- writes to a file with an offset
307 */
308 ssize_t
309 util_file_pwrite(const char *path, const void *buffer, size_t size,
310 os_off_t offset)
311 {
312 LOG(3, "path \"%s\" buffer %p size %zu offset %ju",
313 path, buffer, size, offset);
314
315 enum file_type type = util_file_get_type(path);
316 if (type < 0)
317 return -1;
318
319 if (type == TYPE_NORMAL) {
320 int fd = util_file_open(path, NULL, 0, O_RDWR);
321 if (fd < 0) {
322 LOG(2, "failed to open file \"%s\"", path);
323 return -1;
324 }
325
326 ssize_t write_len = pwrite(fd, buffer, size, offset);
327 int olderrno = errno;
328 (void) os_close(fd);
329 errno = olderrno;
330 return write_len;
331 }
332
333 ssize_t file_size = util_file_get_size(path);
334 if (file_size < 0) {
335 LOG(2, "cannot determine file length \"%s\"", path);
336 return -1;
337 }
338
339 size_t max_size = (size_t)(file_size - offset);
340 if (size > max_size) {
341 LOG(2, "requested size of write goes beyond the file length, "
342 "%zu > %zu", size, max_size);
343 LOG(4, "adjusting size to %zu", max_size);
344 size = max_size;
345 }
346
347 void *addr = util_file_map_whole(path);
348 if (addr == NULL) {
349 LOG(2, "failed to map entire file \"%s\"", path);
350 return -1;
351 }
352
353 memcpy(ADDR_SUM(addr, offset), buffer, size);
354 util_unmap(addr, (size_t)file_size);
355 return (ssize_t)size;
356 }
357
358 /*
359 * util_file_pread -- reads from a file with an offset
360 */
361 ssize_t
362 util_file_pread(const char *path, void *buffer, size_t size,
363 os_off_t offset)
364 {
365 LOG(3, "path \"%s\" buffer %p size %zu offset %ju",
366 path, buffer, size, offset);
367
368 enum file_type type = util_file_get_type(path);
369 if (type < 0)
370 return -1;
371
372 if (type == TYPE_NORMAL) {
373 int fd = util_file_open(path, NULL, 0, O_RDONLY);
374 if (fd < 0) {
375 LOG(2, "failed to open file \"%s\"", path);
376 return -1;
377 }
378
379 ssize_t read_len = pread(fd, buffer, size, offset);
380 int olderrno = errno;
381 (void) os_close(fd);
382 errno = olderrno;
383 return read_len;
384 }
385
386 ssize_t file_size = util_file_get_size(path);
387 if (file_size < 0) {
388 LOG(2, "cannot determine file length \"%s\"", path);
389 return -1;
390 }
391
392 size_t max_size = (size_t)(file_size - offset);
393 if (size > max_size) {
394 LOG(2, "requested size of read goes beyond the file length, "
395 "%zu > %zu", size, max_size);
396 LOG(4, "adjusting size to %zu", max_size);
397 size = max_size;
398 }
399
400 void *addr = util_file_map_whole(path);
401 if (addr == NULL) {
402 LOG(2, "failed to map entire file \"%s\"", path);
403 return -1;
404 }
405
406 memcpy(buffer, ADDR_SUM(addr, offset), size);
407 util_unmap(addr, (size_t)file_size);
408 return (ssize_t)size;
409 }
410
411 /*
412 * util_file_create -- create a new memory pool file
413 */
414 int
415 util_file_create(const char *path, size_t size, size_t minsize)
416 {
417 LOG(3, "path \"%s\" size %zu minsize %zu", path, size, minsize);
418
419 ASSERTne(size, 0);
420
421 if (size < minsize) {
422 ERR("size %zu smaller than %zu", size, minsize);
423 errno = EINVAL;
424 return -1;
425 }
426
427 if (((os_off_t)size) < 0) {
428 ERR("invalid size (%zu) for os_off_t", size);
429 errno = EFBIG;
430 return -1;
431 }
432
433 int fd;
434 int mode;
435 int flags = O_RDWR | O_CREAT | O_EXCL;
436 #ifndef _WIN32
437 mode = 0;
438 #else
439 mode = S_IWRITE | S_IREAD;
440 flags |= O_BINARY;
441 #endif
442
443 /*
444 * Create file without any permission. It will be granted once
445 * initialization completes.
446 */
447 if ((fd = os_open(path, flags, mode)) < 0) {
448 ERR("!open \"%s\"", path);
449 return -1;
450 }
451
452 if ((errno = os_posix_fallocate(fd, 0, (os_off_t)size)) != 0) {
453 ERR("!posix_fallocate \"%s\", %zu", path, size);
454 goto err;
455 }
456
457 /* for windows we can't flock until after we fallocate */
458 if (os_flock(fd, OS_LOCK_EX | OS_LOCK_NB) < 0) {
459 ERR("!flock \"%s\"", path);
460 goto err;
461 }
462
463 return fd;
464
465 err:
466 LOG(4, "error clean up");
467 int oerrno = errno;
468 if (fd != -1)
469 (void) os_close(fd);
470 os_unlink(path);
471 errno = oerrno;
472 return -1;
473 }
474
475 /*
476 * util_file_open -- open a memory pool file
477 */
478 int
479 util_file_open(const char *path, size_t *size, size_t minsize, int flags)
480 {
481 LOG(3, "path \"%s\" size %p minsize %zu flags %d", path, size, minsize,
482 flags);
483
484 int oerrno;
485 int fd;
486
487 #ifdef _WIN32
488 flags |= O_BINARY;
489 #endif
490
491 if ((fd = os_open(path, flags)) < 0) {
492 ERR("!open \"%s\"", path);
493 return -1;
494 }
495
496 if (os_flock(fd, OS_LOCK_EX | OS_LOCK_NB) < 0) {
497 ERR("!flock \"%s\"", path);
498 (void) os_close(fd);
499 return -1;
500 }
501
502 if (size || minsize) {
503 if (size)
504 ASSERTeq(*size, 0);
505
506 ssize_t actual_size = util_fd_get_size(fd);
507 if (actual_size < 0) {
508 ERR("stat \"%s\": negative size", path);
509 errno = EINVAL;
510 goto err;
511 }
512
513 if ((size_t)actual_size < minsize) {
514 ERR("size %zu smaller than %zu",
515 (size_t)actual_size, minsize);
516 errno = EINVAL;
517 goto err;
518 }
519
520 if (size) {
521 *size = (size_t)actual_size;
522 LOG(4, "actual file size %zu", *size);
523 }
524 }
525
526 return fd;
527 err:
528 oerrno = errno;
529 if (os_flock(fd, OS_LOCK_UN))
530 ERR("!flock unlock");
531 (void) os_close(fd);
532 errno = oerrno;
533 return -1;
534 }
535
536 /*
537 * util_unlink -- unlinks a file or zeroes a device dax
538 */
539 int
540 util_unlink(const char *path)
541 {
542 LOG(3, "path \"%s\"", path);
543
544 enum file_type type = util_file_get_type(path);
545 if (type < 0)
546 return -1;
547
548 if (type == TYPE_DEVDAX) {
549 return util_file_zero(path, 0, DEVICE_DAX_ZERO_LEN);
550 } else {
551 #ifdef _WIN32
552 /* on Windows we can not unlink Read-Only files */
553 if (os_chmod(path, S_IREAD | S_IWRITE) == -1) {
554 ERR("!chmod \"%s\"", path);
555 return -1;
556 }
557 #endif
558 return os_unlink(path);
559 }
560 }
561
562 /*
563 * util_unlink_flock -- flocks the file and unlinks it
564 *
565 * The unlink(2) call on a file which is opened and locked using flock(2)
566 * by different process works on linux. Thus in order to forbid removing a
567 * pool when in use by different process we need to flock(2) the pool files
568 * first before unlinking.
569 */
570 int
571 util_unlink_flock(const char *path)
572 {
573 LOG(3, "path \"%s\"", path);
574
575 #ifdef WIN32
576 /*
577 * On Windows it is not possible to unlink the
578 * file if it is flocked.
579 */
580 return util_unlink(path);
581 #else
582 int fd = util_file_open(path, NULL, 0, O_RDONLY);
583 if (fd < 0) {
584 LOG(2, "failed to open file \"%s\"", path);
585 return -1;
586 }
587
588 int ret = util_unlink(path);
589
590 (void) os_close(fd);
591
592 return ret;
593 #endif
594 }
595
596 /*
597 * util_write_all -- a wrapper for util_write
598 *
599 * writes exactly count bytes from buf to file referred to by fd
600 * returns -1 on error, 0 otherwise
601 */
602 int
603 util_write_all(int fd, const char *buf, size_t count)
604 {
605 ssize_t n_wrote = 0;
606 size_t total = 0;
607
608 while (count > total) {
609 n_wrote = util_write(fd, buf, count - total);
610 if (n_wrote <= 0)
611 return -1;
612
613 buf += (size_t)n_wrote;
614 total += (size_t)n_wrote;
615 }
616
617 return 0;
618 }