]> git.proxmox.com Git - ceph.git/blame - ceph/src/arrow/cpp/src/arrow/util/io_util.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / util / io_util.cc
CommitLineData
1d09f67e
TL
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// Ensure 64-bit off_t for platforms where it matters
19#ifdef _FILE_OFFSET_BITS
20#undef _FILE_OFFSET_BITS
21#endif
22
23#define _FILE_OFFSET_BITS 64
24
25#if defined(sun) || defined(__sun)
26// According to https://bugs.python.org/issue1759169#msg82201, __EXTENSIONS__
27// is the best way to enable modern POSIX APIs, such as posix_madvise(), on Solaris.
28// (see also
29// https://github.com/illumos/illumos-gate/blob/master/usr/src/uts/common/sys/mman.h)
30#undef __EXTENSIONS__
31#define __EXTENSIONS__
32#endif
33
34#include "arrow/util/windows_compatibility.h" // IWYU pragma: keep
35
36#include <algorithm>
37#include <cerrno>
38#include <cstdint>
39#include <cstring>
40#include <iostream>
41#include <random>
42#include <sstream>
43#include <string>
44#include <thread>
45#include <utility>
46#include <vector>
47
48#include <fcntl.h>
49#include <signal.h>
50#include <stdlib.h>
51#include <sys/stat.h>
52#include <sys/types.h> // IWYU pragma: keep
53
54// ----------------------------------------------------------------------
55// file compatibility stuff
56
57#ifdef _WIN32
58#include <io.h>
59#include <share.h>
60#else // POSIX-like platforms
61#include <dirent.h>
62#endif
63
64#ifdef _WIN32
65#include "arrow/io/mman.h"
66#undef Realloc
67#undef Free
68#else // POSIX-like platforms
69#include <sys/mman.h>
70#include <unistd.h>
71#endif
72
73// define max read/write count
74#ifdef _WIN32
75#define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
76#else
77
78#ifdef __APPLE__
79// due to macOS bug, we need to set read/write max
80#define ARROW_MAX_IO_CHUNKSIZE INT32_MAX
81#else
82// see notes on Linux read/write manpage
83#define ARROW_MAX_IO_CHUNKSIZE 0x7ffff000
84#endif
85
86#endif
87
88#include "arrow/buffer.h"
89#include "arrow/result.h"
90#include "arrow/util/checked_cast.h"
91#include "arrow/util/io_util.h"
92#include "arrow/util/logging.h"
93
94// For filename conversion
95#if defined(_WIN32)
96#include "arrow/util/utf8.h"
97#endif
98
99namespace arrow {
100
101using internal::checked_cast;
102
103namespace internal {
104
105namespace {
106
107template <typename CharT>
108std::basic_string<CharT> ReplaceChars(std::basic_string<CharT> s, CharT find, CharT rep) {
109 if (find != rep) {
110 for (size_t i = 0; i < s.length(); ++i) {
111 if (s[i] == find) {
112 s[i] = rep;
113 }
114 }
115 }
116 return s;
117}
118
119Result<NativePathString> StringToNative(const std::string& s) {
120#if _WIN32
121 return ::arrow::util::UTF8ToWideString(s);
122#else
123 return s;
124#endif
125}
126
127#if _WIN32
128Result<std::string> NativeToString(const NativePathString& ws) {
129 return ::arrow::util::WideStringToUTF8(ws);
130}
131#endif
132
133#if _WIN32
134const wchar_t kNativeSep = L'\\';
135const wchar_t kGenericSep = L'/';
136const wchar_t* kAllSeps = L"\\/";
137#else
138const char kNativeSep = '/';
139const char kGenericSep = '/';
140const char* kAllSeps = "/";
141#endif
142
143NativePathString NativeSlashes(NativePathString s) {
144 return ReplaceChars(std::move(s), kGenericSep, kNativeSep);
145}
146
147NativePathString GenericSlashes(NativePathString s) {
148 return ReplaceChars(std::move(s), kNativeSep, kGenericSep);
149}
150
151NativePathString NativeParent(const NativePathString& s) {
152 auto last_sep = s.find_last_of(kAllSeps);
153 if (last_sep == s.length() - 1) {
154 // Last separator is a trailing separator, skip all trailing separators
155 // and try again
156 auto before_last_seps = s.find_last_not_of(kAllSeps);
157 if (before_last_seps == NativePathString::npos) {
158 // Only separators in path
159 return s;
160 }
161 last_sep = s.find_last_of(kAllSeps, before_last_seps);
162 }
163 if (last_sep == NativePathString::npos) {
164 // No (other) separator in path
165 return s;
166 }
167 // There may be multiple contiguous separators, skip all of them
168 auto before_last_seps = s.find_last_not_of(kAllSeps, last_sep);
169 if (before_last_seps == NativePathString::npos) {
170 // All separators are at start of string, keep them all
171 return s.substr(0, last_sep + 1);
172 } else {
173 return s.substr(0, before_last_seps + 1);
174 }
175}
176
177Status ValidatePath(const std::string& s) {
178 if (s.find_first_of('\0') != std::string::npos) {
179 return Status::Invalid("Embedded NUL char in path: '", s, "'");
180 }
181 return Status::OK();
182}
183
184} // namespace
185
186std::string ErrnoMessage(int errnum) { return std::strerror(errnum); }
187
188#if _WIN32
189std::string WinErrorMessage(int errnum) {
190 char buf[1024];
191 auto nchars = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
192 NULL, errnum, 0, buf, sizeof(buf), NULL);
193 if (nchars == 0) {
194 // Fallback
195 std::stringstream ss;
196 ss << "Windows error #" << errnum;
197 return ss.str();
198 }
199 return std::string(buf, nchars);
200}
201#endif
202
203namespace {
204
205const char kErrnoDetailTypeId[] = "arrow::ErrnoDetail";
206
207class ErrnoDetail : public StatusDetail {
208 public:
209 explicit ErrnoDetail(int errnum) : errnum_(errnum) {}
210
211 const char* type_id() const override { return kErrnoDetailTypeId; }
212
213 std::string ToString() const override {
214 std::stringstream ss;
215 ss << "[errno " << errnum_ << "] " << ErrnoMessage(errnum_);
216 return ss.str();
217 }
218
219 int errnum() const { return errnum_; }
220
221 protected:
222 int errnum_;
223};
224
225#if _WIN32
226const char kWinErrorDetailTypeId[] = "arrow::WinErrorDetail";
227
228class WinErrorDetail : public StatusDetail {
229 public:
230 explicit WinErrorDetail(int errnum) : errnum_(errnum) {}
231
232 const char* type_id() const override { return kWinErrorDetailTypeId; }
233
234 std::string ToString() const override {
235 std::stringstream ss;
236 ss << "[Windows error " << errnum_ << "] " << WinErrorMessage(errnum_);
237 return ss.str();
238 }
239
240 int errnum() const { return errnum_; }
241
242 protected:
243 int errnum_;
244};
245#endif
246
247const char kSignalDetailTypeId[] = "arrow::SignalDetail";
248
249class SignalDetail : public StatusDetail {
250 public:
251 explicit SignalDetail(int signum) : signum_(signum) {}
252
253 const char* type_id() const override { return kSignalDetailTypeId; }
254
255 std::string ToString() const override {
256 std::stringstream ss;
257 ss << "received signal " << signum_;
258 return ss.str();
259 }
260
261 int signum() const { return signum_; }
262
263 protected:
264 int signum_;
265};
266
267} // namespace
268
269std::shared_ptr<StatusDetail> StatusDetailFromErrno(int errnum) {
270 return std::make_shared<ErrnoDetail>(errnum);
271}
272
273#if _WIN32
274std::shared_ptr<StatusDetail> StatusDetailFromWinError(int errnum) {
275 return std::make_shared<WinErrorDetail>(errnum);
276}
277#endif
278
279std::shared_ptr<StatusDetail> StatusDetailFromSignal(int signum) {
280 return std::make_shared<SignalDetail>(signum);
281}
282
283int ErrnoFromStatus(const Status& status) {
284 const auto detail = status.detail();
285 if (detail != nullptr && detail->type_id() == kErrnoDetailTypeId) {
286 return checked_cast<const ErrnoDetail&>(*detail).errnum();
287 }
288 return 0;
289}
290
291int WinErrorFromStatus(const Status& status) {
292#if _WIN32
293 const auto detail = status.detail();
294 if (detail != nullptr && detail->type_id() == kWinErrorDetailTypeId) {
295 return checked_cast<const WinErrorDetail&>(*detail).errnum();
296 }
297#endif
298 return 0;
299}
300
301int SignalFromStatus(const Status& status) {
302 const auto detail = status.detail();
303 if (detail != nullptr && detail->type_id() == kSignalDetailTypeId) {
304 return checked_cast<const SignalDetail&>(*detail).signum();
305 }
306 return 0;
307}
308
309//
310// PlatformFilename implementation
311//
312
313struct PlatformFilename::Impl {
314 Impl() = default;
315 explicit Impl(NativePathString p) : native_(NativeSlashes(std::move(p))) {}
316
317 NativePathString native_;
318
319 // '/'-separated
320 NativePathString generic() const { return GenericSlashes(native_); }
321};
322
323PlatformFilename::PlatformFilename() : impl_(new Impl{}) {}
324
325PlatformFilename::~PlatformFilename() {}
326
327PlatformFilename::PlatformFilename(Impl impl) : impl_(new Impl(std::move(impl))) {}
328
329PlatformFilename::PlatformFilename(const PlatformFilename& other)
330 : PlatformFilename(Impl{other.impl_->native_}) {}
331
332PlatformFilename::PlatformFilename(PlatformFilename&& other)
333 : impl_(std::move(other.impl_)) {}
334
335PlatformFilename& PlatformFilename::operator=(const PlatformFilename& other) {
336 this->impl_.reset(new Impl{other.impl_->native_});
337 return *this;
338}
339
340PlatformFilename& PlatformFilename::operator=(PlatformFilename&& other) {
341 this->impl_ = std::move(other.impl_);
342 return *this;
343}
344
345PlatformFilename::PlatformFilename(const NativePathString& path)
346 : PlatformFilename(Impl{path}) {}
347
348PlatformFilename::PlatformFilename(const NativePathString::value_type* path)
349 : PlatformFilename(NativePathString(path)) {}
350
351bool PlatformFilename::operator==(const PlatformFilename& other) const {
352 return impl_->native_ == other.impl_->native_;
353}
354
355bool PlatformFilename::operator!=(const PlatformFilename& other) const {
356 return impl_->native_ != other.impl_->native_;
357}
358
359const NativePathString& PlatformFilename::ToNative() const { return impl_->native_; }
360
361std::string PlatformFilename::ToString() const {
362#if _WIN32
363 auto result = NativeToString(impl_->generic());
364 if (!result.ok()) {
365 std::stringstream ss;
366 ss << "<Unrepresentable filename: " << result.status().ToString() << ">";
367 return ss.str();
368 }
369 return *std::move(result);
370#else
371 return impl_->generic();
372#endif
373}
374
375PlatformFilename PlatformFilename::Parent() const {
376 return PlatformFilename(NativeParent(ToNative()));
377}
378
379Result<PlatformFilename> PlatformFilename::FromString(const std::string& file_name) {
380 RETURN_NOT_OK(ValidatePath(file_name));
381 ARROW_ASSIGN_OR_RAISE(auto ns, StringToNative(file_name));
382 return PlatformFilename(std::move(ns));
383}
384
385PlatformFilename PlatformFilename::Join(const PlatformFilename& child) const {
386 if (impl_->native_.empty() || impl_->native_.back() == kNativeSep) {
387 return PlatformFilename(Impl{impl_->native_ + child.impl_->native_});
388 } else {
389 return PlatformFilename(Impl{impl_->native_ + kNativeSep + child.impl_->native_});
390 }
391}
392
393Result<PlatformFilename> PlatformFilename::Join(const std::string& child_name) const {
394 ARROW_ASSIGN_OR_RAISE(auto child, PlatformFilename::FromString(child_name));
395 return Join(child);
396}
397
398//
399// Filesystem access routines
400//
401
402namespace {
403
404Result<bool> DoCreateDir(const PlatformFilename& dir_path, bool create_parents) {
405#ifdef _WIN32
406 const auto s = dir_path.ToNative().c_str();
407 if (CreateDirectoryW(s, nullptr)) {
408 return true;
409 }
410 int errnum = GetLastError();
411 if (errnum == ERROR_ALREADY_EXISTS) {
412 const auto attrs = GetFileAttributesW(s);
413 if (attrs == INVALID_FILE_ATTRIBUTES || !(attrs & FILE_ATTRIBUTE_DIRECTORY)) {
414 // Note we propagate the original error, not the GetFileAttributesW() error
415 return IOErrorFromWinError(ERROR_ALREADY_EXISTS, "Cannot create directory '",
416 dir_path.ToString(), "': non-directory entry exists");
417 }
418 return false;
419 }
420 if (create_parents && errnum == ERROR_PATH_NOT_FOUND) {
421 auto parent_path = dir_path.Parent();
422 if (parent_path != dir_path) {
423 RETURN_NOT_OK(DoCreateDir(parent_path, create_parents));
424 return DoCreateDir(dir_path, false); // Retry
425 }
426 }
427 return IOErrorFromWinError(GetLastError(), "Cannot create directory '",
428 dir_path.ToString(), "'");
429#else
430 const auto s = dir_path.ToNative().c_str();
431 if (mkdir(s, S_IRWXU | S_IRWXG | S_IRWXO) == 0) {
432 return true;
433 }
434 if (errno == EEXIST) {
435 struct stat st;
436 if (stat(s, &st) || !S_ISDIR(st.st_mode)) {
437 // Note we propagate the original errno, not the stat() errno
438 return IOErrorFromErrno(EEXIST, "Cannot create directory '", dir_path.ToString(),
439 "': non-directory entry exists");
440 }
441 return false;
442 }
443 if (create_parents && errno == ENOENT) {
444 auto parent_path = dir_path.Parent();
445 if (parent_path != dir_path) {
446 RETURN_NOT_OK(DoCreateDir(parent_path, create_parents));
447 return DoCreateDir(dir_path, false); // Retry
448 }
449 }
450 return IOErrorFromErrno(errno, "Cannot create directory '", dir_path.ToString(), "'");
451#endif
452}
453
454} // namespace
455
456Result<bool> CreateDir(const PlatformFilename& dir_path) {
457 return DoCreateDir(dir_path, false);
458}
459
460Result<bool> CreateDirTree(const PlatformFilename& dir_path) {
461 return DoCreateDir(dir_path, true);
462}
463
464#ifdef _WIN32
465
466namespace {
467
468void FindHandleDeleter(HANDLE* handle) {
469 if (!FindClose(*handle)) {
470 ARROW_LOG(WARNING) << "Cannot close directory handle: "
471 << WinErrorMessage(GetLastError());
472 }
473}
474
475std::wstring PathWithoutTrailingSlash(const PlatformFilename& fn) {
476 std::wstring path = fn.ToNative();
477 while (!path.empty() && path.back() == kNativeSep) {
478 path.pop_back();
479 }
480 return path;
481}
482
483Result<std::vector<WIN32_FIND_DATAW>> ListDirInternal(const PlatformFilename& dir_path) {
484 WIN32_FIND_DATAW find_data;
485 std::wstring pattern = PathWithoutTrailingSlash(dir_path) + L"\\*.*";
486 HANDLE handle = FindFirstFileW(pattern.c_str(), &find_data);
487 if (handle == INVALID_HANDLE_VALUE) {
488 return IOErrorFromWinError(GetLastError(), "Cannot list directory '",
489 dir_path.ToString(), "'");
490 }
491
492 std::unique_ptr<HANDLE, decltype(&FindHandleDeleter)> handle_guard(&handle,
493 FindHandleDeleter);
494
495 std::vector<WIN32_FIND_DATAW> results;
496 do {
497 // Skip "." and ".."
498 if (find_data.cFileName[0] == L'.') {
499 if (find_data.cFileName[1] == L'\0' ||
500 (find_data.cFileName[1] == L'.' && find_data.cFileName[2] == L'\0')) {
501 continue;
502 }
503 }
504 results.push_back(find_data);
505 } while (FindNextFileW(handle, &find_data));
506
507 int errnum = GetLastError();
508 if (errnum != ERROR_NO_MORE_FILES) {
509 return IOErrorFromWinError(GetLastError(), "Cannot list directory '",
510 dir_path.ToString(), "'");
511 }
512 return results;
513}
514
515Status FindOneFile(const PlatformFilename& fn, WIN32_FIND_DATAW* find_data,
516 bool* exists = nullptr) {
517 HANDLE handle = FindFirstFileW(PathWithoutTrailingSlash(fn).c_str(), find_data);
518 if (handle == INVALID_HANDLE_VALUE) {
519 int errnum = GetLastError();
520 if (exists == nullptr ||
521 (errnum != ERROR_PATH_NOT_FOUND && errnum != ERROR_FILE_NOT_FOUND)) {
522 return IOErrorFromWinError(GetLastError(), "Cannot get information for path '",
523 fn.ToString(), "'");
524 }
525 *exists = false;
526 } else {
527 if (exists != nullptr) {
528 *exists = true;
529 }
530 FindHandleDeleter(&handle);
531 }
532 return Status::OK();
533}
534
535} // namespace
536
537Result<std::vector<PlatformFilename>> ListDir(const PlatformFilename& dir_path) {
538 ARROW_ASSIGN_OR_RAISE(auto entries, ListDirInternal(dir_path));
539
540 std::vector<PlatformFilename> results;
541 results.reserve(entries.size());
542 for (const auto& entry : entries) {
543 results.emplace_back(std::wstring(entry.cFileName));
544 }
545 return results;
546}
547
548#else
549
550Result<std::vector<PlatformFilename>> ListDir(const PlatformFilename& dir_path) {
551 DIR* dir = opendir(dir_path.ToNative().c_str());
552 if (dir == nullptr) {
553 return IOErrorFromErrno(errno, "Cannot list directory '", dir_path.ToString(), "'");
554 }
555
556 auto dir_deleter = [](DIR* dir) -> void {
557 if (closedir(dir) != 0) {
558 ARROW_LOG(WARNING) << "Cannot close directory handle: " << ErrnoMessage(errno);
559 }
560 };
561 std::unique_ptr<DIR, decltype(dir_deleter)> dir_guard(dir, dir_deleter);
562
563 std::vector<PlatformFilename> results;
564 errno = 0;
565 struct dirent* entry = readdir(dir);
566 while (entry != nullptr) {
567 std::string path = entry->d_name;
568 if (path != "." && path != "..") {
569 results.emplace_back(std::move(path));
570 }
571 entry = readdir(dir);
572 }
573 if (errno != 0) {
574 return IOErrorFromErrno(errno, "Cannot list directory '", dir_path.ToString(), "'");
575 }
576 return results;
577}
578
579#endif
580
581namespace {
582
583#ifdef _WIN32
584
585Status DeleteDirTreeInternal(const PlatformFilename& dir_path);
586
587// Remove a directory entry that's always a directory
588Status DeleteDirEntryDir(const PlatformFilename& path, const WIN32_FIND_DATAW& entry,
589 bool remove_top_dir = true) {
590 if ((entry.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) == 0) {
591 // It's a directory that doesn't have a reparse point => recurse
592 RETURN_NOT_OK(DeleteDirTreeInternal(path));
593 }
594 if (remove_top_dir) {
595 // Remove now empty directory or reparse point (e.g. symlink to dir)
596 if (!RemoveDirectoryW(path.ToNative().c_str())) {
597 return IOErrorFromWinError(GetLastError(), "Cannot delete directory entry '",
598 path.ToString(), "': ");
599 }
600 }
601 return Status::OK();
602}
603
604Status DeleteDirEntry(const PlatformFilename& path, const WIN32_FIND_DATAW& entry) {
605 if ((entry.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0) {
606 return DeleteDirEntryDir(path, entry);
607 }
608 // It's a non-directory entry, most likely a regular file
609 if (!DeleteFileW(path.ToNative().c_str())) {
610 return IOErrorFromWinError(GetLastError(), "Cannot delete file '", path.ToString(),
611 "': ");
612 }
613 return Status::OK();
614}
615
616Status DeleteDirTreeInternal(const PlatformFilename& dir_path) {
617 ARROW_ASSIGN_OR_RAISE(auto entries, ListDirInternal(dir_path));
618 for (const auto& entry : entries) {
619 PlatformFilename path = dir_path.Join(PlatformFilename(entry.cFileName));
620 RETURN_NOT_OK(DeleteDirEntry(path, entry));
621 }
622 return Status::OK();
623}
624
625Result<bool> DeleteDirContents(const PlatformFilename& dir_path, bool allow_not_found,
626 bool remove_top_dir) {
627 bool exists = true;
628 WIN32_FIND_DATAW entry;
629 if (allow_not_found) {
630 RETURN_NOT_OK(FindOneFile(dir_path, &entry, &exists));
631 } else {
632 // Will raise if dir_path does not exist
633 RETURN_NOT_OK(FindOneFile(dir_path, &entry));
634 }
635 if (exists) {
636 RETURN_NOT_OK(DeleteDirEntryDir(dir_path, entry, remove_top_dir));
637 }
638 return exists;
639}
640
641#else // POSIX
642
643Status LinkStat(const PlatformFilename& path, struct stat* lst, bool* exists = nullptr) {
644 if (lstat(path.ToNative().c_str(), lst) != 0) {
645 if (exists == nullptr || (errno != ENOENT && errno != ENOTDIR && errno != ELOOP)) {
646 return IOErrorFromErrno(errno, "Cannot get information for path '", path.ToString(),
647 "'");
648 }
649 *exists = false;
650 } else if (exists != nullptr) {
651 *exists = true;
652 }
653 return Status::OK();
654}
655
656Status DeleteDirTreeInternal(const PlatformFilename& dir_path);
657
658Status DeleteDirEntryDir(const PlatformFilename& path, const struct stat& lst,
659 bool remove_top_dir = true) {
660 if (!S_ISLNK(lst.st_mode)) {
661 // Not a symlink => delete contents recursively
662 DCHECK(S_ISDIR(lst.st_mode));
663 RETURN_NOT_OK(DeleteDirTreeInternal(path));
664 if (remove_top_dir && rmdir(path.ToNative().c_str()) != 0) {
665 return IOErrorFromErrno(errno, "Cannot delete directory entry '", path.ToString(),
666 "'");
667 }
668 } else {
669 // Remove symlink
670 if (remove_top_dir && unlink(path.ToNative().c_str()) != 0) {
671 return IOErrorFromErrno(errno, "Cannot delete directory entry '", path.ToString(),
672 "'");
673 }
674 }
675 return Status::OK();
676}
677
678Status DeleteDirEntry(const PlatformFilename& path, const struct stat& lst) {
679 if (S_ISDIR(lst.st_mode)) {
680 return DeleteDirEntryDir(path, lst);
681 }
682 if (unlink(path.ToNative().c_str()) != 0) {
683 return IOErrorFromErrno(errno, "Cannot delete directory entry '", path.ToString(),
684 "'");
685 }
686 return Status::OK();
687}
688
689Status DeleteDirTreeInternal(const PlatformFilename& dir_path) {
690 ARROW_ASSIGN_OR_RAISE(auto children, ListDir(dir_path));
691 for (const auto& child : children) {
692 struct stat lst;
693 PlatformFilename full_path = dir_path.Join(child);
694 RETURN_NOT_OK(LinkStat(full_path, &lst));
695 RETURN_NOT_OK(DeleteDirEntry(full_path, lst));
696 }
697 return Status::OK();
698}
699
700Result<bool> DeleteDirContents(const PlatformFilename& dir_path, bool allow_not_found,
701 bool remove_top_dir) {
702 bool exists = true;
703 struct stat lst;
704 if (allow_not_found) {
705 RETURN_NOT_OK(LinkStat(dir_path, &lst, &exists));
706 } else {
707 // Will raise if dir_path does not exist
708 RETURN_NOT_OK(LinkStat(dir_path, &lst));
709 }
710 if (exists) {
711 if (!S_ISDIR(lst.st_mode) && !S_ISLNK(lst.st_mode)) {
712 return Status::IOError("Cannot delete directory '", dir_path.ToString(),
713 "': not a directory");
714 }
715 RETURN_NOT_OK(DeleteDirEntryDir(dir_path, lst, remove_top_dir));
716 }
717 return exists;
718}
719
720#endif
721
722} // namespace
723
724Result<bool> DeleteDirContents(const PlatformFilename& dir_path, bool allow_not_found) {
725 return DeleteDirContents(dir_path, allow_not_found, /*remove_top_dir=*/false);
726}
727
728Result<bool> DeleteDirTree(const PlatformFilename& dir_path, bool allow_not_found) {
729 return DeleteDirContents(dir_path, allow_not_found, /*remove_top_dir=*/true);
730}
731
732Result<bool> DeleteFile(const PlatformFilename& file_path, bool allow_not_found) {
733#ifdef _WIN32
734 if (DeleteFileW(file_path.ToNative().c_str())) {
735 return true;
736 } else {
737 int errnum = GetLastError();
738 if (!allow_not_found || errnum != ERROR_FILE_NOT_FOUND) {
739 return IOErrorFromWinError(GetLastError(), "Cannot delete file '",
740 file_path.ToString(), "'");
741 }
742 }
743#else
744 if (unlink(file_path.ToNative().c_str()) == 0) {
745 return true;
746 } else {
747 if (!allow_not_found || errno != ENOENT) {
748 return IOErrorFromErrno(errno, "Cannot delete file '", file_path.ToString(), "'");
749 }
750 }
751#endif
752 return false;
753}
754
755Result<bool> FileExists(const PlatformFilename& path) {
756#ifdef _WIN32
757 if (GetFileAttributesW(path.ToNative().c_str()) != INVALID_FILE_ATTRIBUTES) {
758 return true;
759 } else {
760 int errnum = GetLastError();
761 if (errnum != ERROR_PATH_NOT_FOUND && errnum != ERROR_FILE_NOT_FOUND) {
762 return IOErrorFromWinError(GetLastError(), "Failed getting information for path '",
763 path.ToString(), "'");
764 }
765 return false;
766 }
767#else
768 struct stat st;
769 if (stat(path.ToNative().c_str(), &st) == 0) {
770 return true;
771 } else {
772 if (errno != ENOENT && errno != ENOTDIR) {
773 return IOErrorFromErrno(errno, "Failed getting information for path '",
774 path.ToString(), "'");
775 }
776 return false;
777 }
778#endif
779}
780
781//
782// Functions for creating file descriptors
783//
784
785#define CHECK_LSEEK(retval) \
786 if ((retval) == -1) return Status::IOError("lseek failed");
787
788static inline int64_t lseek64_compat(int fd, int64_t pos, int whence) {
789#if defined(_WIN32)
790 return _lseeki64(fd, pos, whence);
791#else
792 return lseek(fd, pos, whence);
793#endif
794}
795
796static inline Result<int> CheckFileOpResult(int fd_ret, int errno_actual,
797 const PlatformFilename& file_name,
798 const char* opname) {
799 if (fd_ret == -1) {
800#ifdef _WIN32
801 int winerr = GetLastError();
802 if (winerr != ERROR_SUCCESS) {
803 return IOErrorFromWinError(GetLastError(), "Failed to ", opname, " file '",
804 file_name.ToString(), "'");
805 }
806#endif
807 return IOErrorFromErrno(errno_actual, "Failed to ", opname, " file '",
808 file_name.ToString(), "'");
809 }
810 return fd_ret;
811}
812
813Result<int> FileOpenReadable(const PlatformFilename& file_name) {
814 int fd, errno_actual;
815#if defined(_WIN32)
816 SetLastError(0);
817 HANDLE file_handle = CreateFileW(file_name.ToNative().c_str(), GENERIC_READ,
818 FILE_SHARE_READ | FILE_SHARE_WRITE, NULL,
819 OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
820
821 DWORD last_error = GetLastError();
822 if (last_error == ERROR_SUCCESS) {
823 errno_actual = 0;
824 fd = _open_osfhandle(reinterpret_cast<intptr_t>(file_handle),
825 _O_RDONLY | _O_BINARY | _O_NOINHERIT);
826 } else {
827 return IOErrorFromWinError(last_error, "Failed to open local file '",
828 file_name.ToString(), "'");
829 }
830#else
831 fd = open(file_name.ToNative().c_str(), O_RDONLY);
832 errno_actual = errno;
833
834 if (fd >= 0) {
835 // open(O_RDONLY) succeeds on directories, check for it
836 struct stat st;
837 int ret = fstat(fd, &st);
838 if (ret == -1) {
839 ARROW_UNUSED(FileClose(fd));
840 // Will propagate error below
841 } else if (S_ISDIR(st.st_mode)) {
842 ARROW_UNUSED(FileClose(fd));
843 return Status::IOError("Cannot open for reading: path '", file_name.ToString(),
844 "' is a directory");
845 }
846 }
847#endif
848
849 return CheckFileOpResult(fd, errno_actual, file_name, "open local");
850}
851
852Result<int> FileOpenWritable(const PlatformFilename& file_name, bool write_only,
853 bool truncate, bool append) {
854 int fd, errno_actual;
855
856#if defined(_WIN32)
857 SetLastError(0);
858 int oflag = _O_CREAT | _O_BINARY | _O_NOINHERIT;
859 DWORD desired_access = GENERIC_WRITE;
860 DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE;
861 DWORD creation_disposition = OPEN_ALWAYS;
862
863 if (append) {
864 oflag |= _O_APPEND;
865 }
866
867 if (truncate) {
868 oflag |= _O_TRUNC;
869 creation_disposition = CREATE_ALWAYS;
870 }
871
872 if (write_only) {
873 oflag |= _O_WRONLY;
874 } else {
875 oflag |= _O_RDWR;
876 desired_access |= GENERIC_READ;
877 }
878
879 HANDLE file_handle =
880 CreateFileW(file_name.ToNative().c_str(), desired_access, share_mode, NULL,
881 creation_disposition, FILE_ATTRIBUTE_NORMAL, NULL);
882
883 DWORD last_error = GetLastError();
884 if (last_error == ERROR_SUCCESS || last_error == ERROR_ALREADY_EXISTS) {
885 errno_actual = 0;
886 fd = _open_osfhandle(reinterpret_cast<intptr_t>(file_handle), oflag);
887 } else {
888 return IOErrorFromWinError(last_error, "Failed to open local file '",
889 file_name.ToString(), "'");
890 }
891#else
892 int oflag = O_CREAT;
893
894 if (truncate) {
895 oflag |= O_TRUNC;
896 }
897 if (append) {
898 oflag |= O_APPEND;
899 }
900
901 if (write_only) {
902 oflag |= O_WRONLY;
903 } else {
904 oflag |= O_RDWR;
905 }
906
907 fd = open(file_name.ToNative().c_str(), oflag, 0666);
908 errno_actual = errno;
909#endif
910
911 RETURN_NOT_OK(CheckFileOpResult(fd, errno_actual, file_name, "open local"));
912 if (append) {
913 // Seek to end, as O_APPEND does not necessarily do it
914 auto ret = lseek64_compat(fd, 0, SEEK_END);
915 if (ret == -1) {
916 ARROW_UNUSED(FileClose(fd));
917 return Status::IOError("lseek failed");
918 }
919 }
920 return fd;
921}
922
923Result<int64_t> FileTell(int fd) {
924 int64_t current_pos;
925#if defined(_WIN32)
926 current_pos = _telli64(fd);
927 if (current_pos == -1) {
928 return Status::IOError("_telli64 failed");
929 }
930#else
931 current_pos = lseek64_compat(fd, 0, SEEK_CUR);
932 CHECK_LSEEK(current_pos);
933#endif
934 return current_pos;
935}
936
937Result<Pipe> CreatePipe() {
938 int ret;
939 int fd[2];
940#if defined(_WIN32)
941 ret = _pipe(fd, 4096, _O_BINARY);
942#else
943 ret = pipe(fd);
944#endif
945
946 if (ret == -1) {
947 return IOErrorFromErrno(errno, "Error creating pipe");
948 }
949 return Pipe{fd[0], fd[1]};
950}
951
952static Status StatusFromMmapErrno(const char* prefix) {
953#ifdef _WIN32
954 errno = __map_mman_error(GetLastError(), EPERM);
955#endif
956 return IOErrorFromErrno(errno, prefix);
957}
958
959namespace {
960
961int64_t GetPageSizeInternal() {
962#if defined(__APPLE__)
963 return getpagesize();
964#elif defined(_WIN32)
965 SYSTEM_INFO si;
966 GetSystemInfo(&si);
967 return si.dwPageSize;
968#else
969 errno = 0;
970 const auto ret = sysconf(_SC_PAGESIZE);
971 if (ret == -1) {
972 ARROW_LOG(FATAL) << "sysconf(_SC_PAGESIZE) failed: " << ErrnoMessage(errno);
973 }
974 return static_cast<int64_t>(ret);
975#endif
976}
977
978} // namespace
979
980int64_t GetPageSize() {
981 static const int64_t kPageSize = GetPageSizeInternal(); // cache it
982 return kPageSize;
983}
984
985//
986// Compatible way to remap a memory map
987//
988
989Status MemoryMapRemap(void* addr, size_t old_size, size_t new_size, int fildes,
990 void** new_addr) {
991 // should only be called with writable files
992 *new_addr = MAP_FAILED;
993#ifdef _WIN32
994 // flags are ignored on windows
995 HANDLE fm, h;
996
997 if (!UnmapViewOfFile(addr)) {
998 return StatusFromMmapErrno("UnmapViewOfFile failed");
999 }
1000
1001 h = reinterpret_cast<HANDLE>(_get_osfhandle(fildes));
1002 if (h == INVALID_HANDLE_VALUE) {
1003 return StatusFromMmapErrno("Cannot get file handle");
1004 }
1005
1006 uint64_t new_size64 = new_size;
1007 LONG new_size_low = static_cast<LONG>(new_size64 & 0xFFFFFFFFUL);
1008 LONG new_size_high = static_cast<LONG>((new_size64 >> 32) & 0xFFFFFFFFUL);
1009
1010 SetFilePointer(h, new_size_low, &new_size_high, FILE_BEGIN);
1011 SetEndOfFile(h);
1012 fm = CreateFileMapping(h, NULL, PAGE_READWRITE, 0, 0, "");
1013 if (fm == NULL) {
1014 return StatusFromMmapErrno("CreateFileMapping failed");
1015 }
1016 *new_addr = MapViewOfFile(fm, FILE_MAP_WRITE, 0, 0, new_size);
1017 CloseHandle(fm);
1018 if (new_addr == NULL) {
1019 return StatusFromMmapErrno("MapViewOfFile failed");
1020 }
1021 return Status::OK();
1022#elif defined(__linux__)
1023 if (ftruncate(fildes, new_size) == -1) {
1024 return StatusFromMmapErrno("ftruncate failed");
1025 }
1026 *new_addr = mremap(addr, old_size, new_size, MREMAP_MAYMOVE);
1027 if (*new_addr == MAP_FAILED) {
1028 return StatusFromMmapErrno("mremap failed");
1029 }
1030 return Status::OK();
1031#else
1032 // we have to close the mmap first, truncate the file to the new size
1033 // and recreate the mmap
1034 if (munmap(addr, old_size) == -1) {
1035 return StatusFromMmapErrno("munmap failed");
1036 }
1037 if (ftruncate(fildes, new_size) == -1) {
1038 return StatusFromMmapErrno("ftruncate failed");
1039 }
1040 // we set READ / WRITE flags on the new map, since we could only have
1041 // unlarged a RW map in the first place
1042 *new_addr = mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, fildes, 0);
1043 if (*new_addr == MAP_FAILED) {
1044 return StatusFromMmapErrno("mmap failed");
1045 }
1046 return Status::OK();
1047#endif
1048}
1049
1050Status MemoryAdviseWillNeed(const std::vector<MemoryRegion>& regions) {
1051 const auto page_size = static_cast<size_t>(GetPageSize());
1052 DCHECK_GT(page_size, 0);
1053 const size_t page_mask = ~(page_size - 1);
1054 DCHECK_EQ(page_mask & page_size, page_size);
1055
1056 auto align_region = [=](const MemoryRegion& region) -> MemoryRegion {
1057 const auto addr = reinterpret_cast<uintptr_t>(region.addr);
1058 const auto aligned_addr = addr & page_mask;
1059 DCHECK_LT(addr - aligned_addr, page_size);
1060 return {reinterpret_cast<void*>(aligned_addr),
1061 region.size + static_cast<size_t>(addr - aligned_addr)};
1062 };
1063
1064#ifdef _WIN32
1065 // PrefetchVirtualMemory() is available on Windows 8 or later
1066 struct PrefetchEntry { // Like WIN32_MEMORY_RANGE_ENTRY
1067 void* VirtualAddress;
1068 size_t NumberOfBytes;
1069
1070 PrefetchEntry(const MemoryRegion& region) // NOLINT runtime/explicit
1071 : VirtualAddress(region.addr), NumberOfBytes(region.size) {}
1072 };
1073 using PrefetchVirtualMemoryFunc = BOOL (*)(HANDLE, ULONG_PTR, PrefetchEntry*, ULONG);
1074 static const auto prefetch_virtual_memory = reinterpret_cast<PrefetchVirtualMemoryFunc>(
1075 GetProcAddress(GetModuleHandleW(L"kernel32.dll"), "PrefetchVirtualMemory"));
1076 if (prefetch_virtual_memory != nullptr) {
1077 std::vector<PrefetchEntry> entries;
1078 entries.reserve(regions.size());
1079 for (const auto& region : regions) {
1080 if (region.size != 0) {
1081 entries.emplace_back(align_region(region));
1082 }
1083 }
1084 if (!entries.empty() &&
1085 !prefetch_virtual_memory(GetCurrentProcess(),
1086 static_cast<ULONG_PTR>(entries.size()), entries.data(),
1087 0)) {
1088 return IOErrorFromWinError(GetLastError(), "PrefetchVirtualMemory failed");
1089 }
1090 }
1091 return Status::OK();
1092#elif defined(POSIX_MADV_WILLNEED)
1093 for (const auto& region : regions) {
1094 if (region.size != 0) {
1095 const auto aligned = align_region(region);
1096 int err = posix_madvise(aligned.addr, aligned.size, POSIX_MADV_WILLNEED);
1097 // EBADF can be returned on Linux in the following cases:
1098 // - the kernel version is older than 3.9
1099 // - the kernel was compiled with CONFIG_SWAP disabled (ARROW-9577)
1100 if (err != 0 && err != EBADF) {
1101 return IOErrorFromErrno(err, "posix_madvise failed");
1102 }
1103 }
1104 }
1105 return Status::OK();
1106#else
1107 return Status::OK();
1108#endif
1109}
1110
1111//
1112// Closing files
1113//
1114
1115Status FileClose(int fd) {
1116 int ret;
1117
1118#if defined(_WIN32)
1119 ret = static_cast<int>(_close(fd));
1120#else
1121 ret = static_cast<int>(close(fd));
1122#endif
1123
1124 if (ret == -1) {
1125 return Status::IOError("error closing file");
1126 }
1127 return Status::OK();
1128}
1129
1130//
1131// Seeking and telling
1132//
1133
1134Status FileSeek(int fd, int64_t pos, int whence) {
1135 int64_t ret = lseek64_compat(fd, pos, whence);
1136 CHECK_LSEEK(ret);
1137 return Status::OK();
1138}
1139
1140Status FileSeek(int fd, int64_t pos) { return FileSeek(fd, pos, SEEK_SET); }
1141
1142Result<int64_t> FileGetSize(int fd) {
1143#if defined(_WIN32)
1144 struct __stat64 st;
1145#else
1146 struct stat st;
1147#endif
1148 st.st_size = -1;
1149
1150#if defined(_WIN32)
1151 int ret = _fstat64(fd, &st);
1152#else
1153 int ret = fstat(fd, &st);
1154#endif
1155
1156 if (ret == -1) {
1157 return Status::IOError("error stat()ing file");
1158 }
1159 if (st.st_size == 0) {
1160 // Maybe the file doesn't support getting its size, double-check by
1161 // trying to tell() (seekable files usually have a size, while
1162 // non-seekable files don't)
1163 RETURN_NOT_OK(FileTell(fd));
1164 } else if (st.st_size < 0) {
1165 return Status::IOError("error getting file size");
1166 }
1167 return st.st_size;
1168}
1169
1170//
1171// Reading data
1172//
1173
1174static inline int64_t pread_compat(int fd, void* buf, int64_t nbytes, int64_t pos) {
1175#if defined(_WIN32)
1176 HANDLE handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
1177 DWORD dwBytesRead = 0;
1178 OVERLAPPED overlapped = {0};
1179 overlapped.Offset = static_cast<uint32_t>(pos);
1180 overlapped.OffsetHigh = static_cast<uint32_t>(pos >> 32);
1181
1182 // Note: ReadFile() will update the file position
1183 BOOL bRet =
1184 ReadFile(handle, buf, static_cast<uint32_t>(nbytes), &dwBytesRead, &overlapped);
1185 if (bRet || GetLastError() == ERROR_HANDLE_EOF) {
1186 return dwBytesRead;
1187 } else {
1188 return -1;
1189 }
1190#else
1191 return static_cast<int64_t>(
1192 pread(fd, buf, static_cast<size_t>(nbytes), static_cast<off_t>(pos)));
1193#endif
1194}
1195
1196Result<int64_t> FileRead(int fd, uint8_t* buffer, int64_t nbytes) {
1197 int64_t bytes_read = 0;
1198
1199 while (bytes_read < nbytes) {
1200 int64_t chunksize =
1201 std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - bytes_read);
1202#if defined(_WIN32)
1203 int64_t ret =
1204 static_cast<int64_t>(_read(fd, buffer, static_cast<uint32_t>(chunksize)));
1205#else
1206 int64_t ret = static_cast<int64_t>(read(fd, buffer, static_cast<size_t>(chunksize)));
1207#endif
1208
1209 if (ret == -1) {
1210 return IOErrorFromErrno(errno, "Error reading bytes from file");
1211 }
1212 if (ret == 0) {
1213 // EOF
1214 break;
1215 }
1216 buffer += ret;
1217 bytes_read += ret;
1218 }
1219 return bytes_read;
1220}
1221
1222Result<int64_t> FileReadAt(int fd, uint8_t* buffer, int64_t position, int64_t nbytes) {
1223 int64_t bytes_read = 0;
1224
1225 while (bytes_read < nbytes) {
1226 int64_t chunksize =
1227 std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - bytes_read);
1228 int64_t ret = pread_compat(fd, buffer, chunksize, position);
1229
1230 if (ret == -1) {
1231 return IOErrorFromErrno(errno, "Error reading bytes from file");
1232 }
1233 if (ret == 0) {
1234 // EOF
1235 break;
1236 }
1237 buffer += ret;
1238 position += ret;
1239 bytes_read += ret;
1240 }
1241 return bytes_read;
1242}
1243
1244//
1245// Writing data
1246//
1247
1248Status FileWrite(int fd, const uint8_t* buffer, const int64_t nbytes) {
1249 int ret = 0;
1250 int64_t bytes_written = 0;
1251
1252 while (ret != -1 && bytes_written < nbytes) {
1253 int64_t chunksize =
1254 std::min(static_cast<int64_t>(ARROW_MAX_IO_CHUNKSIZE), nbytes - bytes_written);
1255#if defined(_WIN32)
1256 ret = static_cast<int>(
1257 _write(fd, buffer + bytes_written, static_cast<uint32_t>(chunksize)));
1258#else
1259 ret = static_cast<int>(
1260 write(fd, buffer + bytes_written, static_cast<size_t>(chunksize)));
1261#endif
1262
1263 if (ret != -1) {
1264 bytes_written += ret;
1265 }
1266 }
1267
1268 if (ret == -1) {
1269 return IOErrorFromErrno(errno, "Error writing bytes to file");
1270 }
1271 return Status::OK();
1272}
1273
1274Status FileTruncate(int fd, const int64_t size) {
1275 int ret, errno_actual;
1276
1277#ifdef _WIN32
1278 errno_actual = _chsize_s(fd, static_cast<size_t>(size));
1279 ret = errno_actual == 0 ? 0 : -1;
1280#else
1281 ret = ftruncate(fd, static_cast<size_t>(size));
1282 errno_actual = errno;
1283#endif
1284
1285 if (ret == -1) {
1286 return IOErrorFromErrno(errno_actual, "Error writing bytes to file");
1287 }
1288 return Status::OK();
1289}
1290
1291//
1292// Environment variables
1293//
1294
1295Result<std::string> GetEnvVar(const char* name) {
1296#ifdef _WIN32
1297 // On Windows, getenv() reads an early copy of the process' environment
1298 // which doesn't get updated when SetEnvironmentVariable() is called.
1299 constexpr int32_t bufsize = 2000;
1300 char c_str[bufsize];
1301 auto res = GetEnvironmentVariableA(name, c_str, bufsize);
1302 if (res >= bufsize) {
1303 return Status::CapacityError("environment variable value too long");
1304 } else if (res == 0) {
1305 return Status::KeyError("environment variable undefined");
1306 }
1307 return std::string(c_str);
1308#else
1309 char* c_str = getenv(name);
1310 if (c_str == nullptr) {
1311 return Status::KeyError("environment variable undefined");
1312 }
1313 return std::string(c_str);
1314#endif
1315}
1316
1317Result<std::string> GetEnvVar(const std::string& name) { return GetEnvVar(name.c_str()); }
1318
1319#ifdef _WIN32
1320Result<NativePathString> GetEnvVarNative(const std::string& name) {
1321 NativePathString w_name;
1322 constexpr int32_t bufsize = 2000;
1323 wchar_t w_str[bufsize];
1324
1325 ARROW_ASSIGN_OR_RAISE(w_name, StringToNative(name));
1326 auto res = GetEnvironmentVariableW(w_name.c_str(), w_str, bufsize);
1327 if (res >= bufsize) {
1328 return Status::CapacityError("environment variable value too long");
1329 } else if (res == 0) {
1330 return Status::KeyError("environment variable undefined");
1331 }
1332 return NativePathString(w_str);
1333}
1334
1335Result<NativePathString> GetEnvVarNative(const char* name) {
1336 return GetEnvVarNative(std::string(name));
1337}
1338
1339#else
1340
1341Result<NativePathString> GetEnvVarNative(const std::string& name) {
1342 return GetEnvVar(name);
1343}
1344
1345Result<NativePathString> GetEnvVarNative(const char* name) { return GetEnvVar(name); }
1346#endif
1347
1348Status SetEnvVar(const char* name, const char* value) {
1349#ifdef _WIN32
1350 if (SetEnvironmentVariableA(name, value)) {
1351 return Status::OK();
1352 } else {
1353 return Status::Invalid("failed setting environment variable");
1354 }
1355#else
1356 if (setenv(name, value, 1) == 0) {
1357 return Status::OK();
1358 } else {
1359 return Status::Invalid("failed setting environment variable");
1360 }
1361#endif
1362}
1363
1364Status SetEnvVar(const std::string& name, const std::string& value) {
1365 return SetEnvVar(name.c_str(), value.c_str());
1366}
1367
1368Status DelEnvVar(const char* name) {
1369#ifdef _WIN32
1370 if (SetEnvironmentVariableA(name, nullptr)) {
1371 return Status::OK();
1372 } else {
1373 return Status::Invalid("failed deleting environment variable");
1374 }
1375#else
1376 if (unsetenv(name) == 0) {
1377 return Status::OK();
1378 } else {
1379 return Status::Invalid("failed deleting environment variable");
1380 }
1381#endif
1382}
1383
1384Status DelEnvVar(const std::string& name) { return DelEnvVar(name.c_str()); }
1385
1386//
1387// Temporary directories
1388//
1389
1390namespace {
1391
1392#if _WIN32
1393NativePathString GetWindowsDirectoryPath() {
1394 auto size = GetWindowsDirectoryW(nullptr, 0);
1395 ARROW_CHECK_GT(size, 0) << "GetWindowsDirectoryW failed";
1396 std::vector<wchar_t> w_str(size);
1397 size = GetWindowsDirectoryW(w_str.data(), size);
1398 ARROW_CHECK_GT(size, 0) << "GetWindowsDirectoryW failed";
1399 return {w_str.data(), size};
1400}
1401#endif
1402
1403// Return a list of preferred locations for temporary files
1404std::vector<NativePathString> GetPlatformTemporaryDirs() {
1405 struct TempDirSelector {
1406 std::string env_var;
1407 NativePathString path_append;
1408 };
1409
1410 std::vector<TempDirSelector> selectors;
1411 NativePathString fallback_tmp;
1412
1413#if _WIN32
1414 selectors = {
1415 {"TMP", L""}, {"TEMP", L""}, {"LOCALAPPDATA", L"Temp"}, {"USERPROFILE", L"Temp"}};
1416 fallback_tmp = GetWindowsDirectoryPath();
1417
1418#else
1419 selectors = {{"TMPDIR", ""}, {"TMP", ""}, {"TEMP", ""}, {"TEMPDIR", ""}};
1420#ifdef __ANDROID__
1421 fallback_tmp = "/data/local/tmp";
1422#else
1423 fallback_tmp = "/tmp";
1424#endif
1425#endif
1426
1427 std::vector<NativePathString> temp_dirs;
1428 for (const auto& sel : selectors) {
1429 auto result = GetEnvVarNative(sel.env_var);
1430 if (result.status().IsKeyError()) {
1431 // Environment variable absent, skip
1432 continue;
1433 }
1434 if (!result.ok()) {
1435 ARROW_LOG(WARNING) << "Failed getting env var '" << sel.env_var
1436 << "': " << result.status().ToString();
1437 continue;
1438 }
1439 NativePathString p = *std::move(result);
1440 if (p.empty()) {
1441 // Environment variable set to empty string, skip
1442 continue;
1443 }
1444 if (sel.path_append.empty()) {
1445 temp_dirs.push_back(p);
1446 } else {
1447 temp_dirs.push_back(p + kNativeSep + sel.path_append);
1448 }
1449 }
1450 temp_dirs.push_back(fallback_tmp);
1451 return temp_dirs;
1452}
1453
1454std::string MakeRandomName(int num_chars) {
1455 static const std::string chars = "0123456789abcdefghijklmnopqrstuvwxyz";
1456 std::default_random_engine gen(
1457 static_cast<std::default_random_engine::result_type>(GetRandomSeed()));
1458 std::uniform_int_distribution<int> dist(0, static_cast<int>(chars.length() - 1));
1459
1460 std::string s;
1461 s.reserve(num_chars);
1462 for (int i = 0; i < num_chars; ++i) {
1463 s += chars[dist(gen)];
1464 }
1465 return s;
1466}
1467
1468} // namespace
1469
1470Result<std::unique_ptr<TemporaryDir>> TemporaryDir::Make(const std::string& prefix) {
1471 const int kNumChars = 8;
1472
1473 NativePathString base_name;
1474
1475 auto MakeBaseName = [&]() {
1476 std::string suffix = MakeRandomName(kNumChars);
1477 return StringToNative(prefix + suffix);
1478 };
1479
1480 auto TryCreatingDirectory =
1481 [&](const NativePathString& base_dir) -> Result<std::unique_ptr<TemporaryDir>> {
1482 Status st;
1483 for (int attempt = 0; attempt < 3; ++attempt) {
1484 PlatformFilename fn(base_dir + kNativeSep + base_name + kNativeSep);
1485 auto result = CreateDir(fn);
1486 if (!result.ok()) {
1487 // Probably a permissions error or a non-existing base_dir
1488 return nullptr;
1489 }
1490 if (*result) {
1491 return std::unique_ptr<TemporaryDir>(new TemporaryDir(std::move(fn)));
1492 }
1493 // The random name already exists in base_dir, try with another name
1494 st = Status::IOError("Path already exists: '", fn.ToString(), "'");
1495 ARROW_ASSIGN_OR_RAISE(base_name, MakeBaseName());
1496 }
1497 return st;
1498 };
1499
1500 ARROW_ASSIGN_OR_RAISE(base_name, MakeBaseName());
1501
1502 auto base_dirs = GetPlatformTemporaryDirs();
1503 DCHECK_NE(base_dirs.size(), 0);
1504
1505 for (const auto& base_dir : base_dirs) {
1506 ARROW_ASSIGN_OR_RAISE(auto ptr, TryCreatingDirectory(base_dir));
1507 if (ptr) {
1508 return std::move(ptr);
1509 }
1510 // Cannot create in this directory, try the next one
1511 }
1512
1513 return Status::IOError(
1514 "Cannot create temporary subdirectory in any "
1515 "of the platform temporary directories");
1516}
1517
1518TemporaryDir::TemporaryDir(PlatformFilename&& path) : path_(std::move(path)) {}
1519
1520TemporaryDir::~TemporaryDir() {
1521 Status st = DeleteDirTree(path_).status();
1522 if (!st.ok()) {
1523 ARROW_LOG(WARNING) << "When trying to delete temporary directory: " << st;
1524 }
1525}
1526
1527SignalHandler::SignalHandler() : SignalHandler(static_cast<Callback>(nullptr)) {}
1528
1529SignalHandler::SignalHandler(Callback cb) {
1530#if ARROW_HAVE_SIGACTION
1531 sa_.sa_handler = cb;
1532 sa_.sa_flags = 0;
1533 sigemptyset(&sa_.sa_mask);
1534#else
1535 cb_ = cb;
1536#endif
1537}
1538
1539#if ARROW_HAVE_SIGACTION
1540SignalHandler::SignalHandler(const struct sigaction& sa) {
1541 memcpy(&sa_, &sa, sizeof(sa));
1542}
1543#endif
1544
1545SignalHandler::Callback SignalHandler::callback() const {
1546#if ARROW_HAVE_SIGACTION
1547 return sa_.sa_handler;
1548#else
1549 return cb_;
1550#endif
1551}
1552
1553#if ARROW_HAVE_SIGACTION
1554const struct sigaction& SignalHandler::action() const { return sa_; }
1555#endif
1556
1557Result<SignalHandler> GetSignalHandler(int signum) {
1558#if ARROW_HAVE_SIGACTION
1559 struct sigaction sa;
1560 int ret = sigaction(signum, nullptr, &sa);
1561 if (ret != 0) {
1562 // TODO more detailed message using errno
1563 return Status::IOError("sigaction call failed");
1564 }
1565 return SignalHandler(sa);
1566#else
1567 // To read the old handler, set the signal handler to something else temporarily
1568 SignalHandler::Callback cb = signal(signum, SIG_IGN);
1569 if (cb == SIG_ERR || signal(signum, cb) == SIG_ERR) {
1570 // TODO more detailed message using errno
1571 return Status::IOError("signal call failed");
1572 }
1573 return SignalHandler(cb);
1574#endif
1575}
1576
1577Result<SignalHandler> SetSignalHandler(int signum, const SignalHandler& handler) {
1578#if ARROW_HAVE_SIGACTION
1579 struct sigaction old_sa;
1580 int ret = sigaction(signum, &handler.action(), &old_sa);
1581 if (ret != 0) {
1582 // TODO more detailed message using errno
1583 return Status::IOError("sigaction call failed");
1584 }
1585 return SignalHandler(old_sa);
1586#else
1587 SignalHandler::Callback cb = signal(signum, handler.callback());
1588 if (cb == SIG_ERR) {
1589 // TODO more detailed message using errno
1590 return Status::IOError("signal call failed");
1591 }
1592 return SignalHandler(cb);
1593#endif
1594 return Status::OK();
1595}
1596
1597void ReinstateSignalHandler(int signum, SignalHandler::Callback handler) {
1598#if !ARROW_HAVE_SIGACTION
1599 // Cannot report any errors from signal() (but there shouldn't be any)
1600 signal(signum, handler);
1601#endif
1602}
1603
1604Status SendSignal(int signum) {
1605 if (raise(signum) == 0) {
1606 return Status::OK();
1607 }
1608 if (errno == EINVAL) {
1609 return Status::Invalid("Invalid signal number ", signum);
1610 }
1611 return IOErrorFromErrno(errno, "Failed to raise signal");
1612}
1613
1614Status SendSignalToThread(int signum, uint64_t thread_id) {
1615#ifdef _WIN32
1616 return Status::NotImplemented("Cannot send signal to specific thread on Windows");
1617#else
1618 // Have to use a C-style cast because pthread_t can be a pointer *or* integer type
1619 int r = pthread_kill((pthread_t)thread_id, signum); // NOLINT readability-casting
1620 if (r == 0) {
1621 return Status::OK();
1622 }
1623 if (r == EINVAL) {
1624 return Status::Invalid("Invalid signal number ", signum);
1625 }
1626 return IOErrorFromErrno(r, "Failed to raise signal");
1627#endif
1628}
1629
1630namespace {
1631
1632int64_t GetPid() {
1633#ifdef _WIN32
1634 return GetCurrentProcessId();
1635#else
1636 return getpid();
1637#endif
1638}
1639
1640std::mt19937_64 GetSeedGenerator() {
1641 // Initialize Mersenne Twister PRNG with a true random seed.
1642 // Make sure to mix in process id to minimize risks of clashes when parallel testing.
1643#ifdef ARROW_VALGRIND
1644 // Valgrind can crash, hang or enter an infinite loop on std::random_device,
1645 // use a crude initializer instead.
1646 const uint8_t dummy = 0;
1647 ARROW_UNUSED(dummy);
1648 std::mt19937_64 seed_gen(reinterpret_cast<uintptr_t>(&dummy) ^
1649 static_cast<uintptr_t>(GetPid()));
1650#else
1651 std::random_device true_random;
1652 std::mt19937_64 seed_gen(static_cast<uint64_t>(true_random()) ^
1653 (static_cast<uint64_t>(true_random()) << 32) ^
1654 static_cast<uint64_t>(GetPid()));
1655#endif
1656 return seed_gen;
1657}
1658
1659} // namespace
1660
1661int64_t GetRandomSeed() {
1662 // The process-global seed generator to aims to avoid calling std::random_device
1663 // unless truly necessary (it can block on some systems, see ARROW-10287).
1664 static auto seed_gen = GetSeedGenerator();
1665 return static_cast<int64_t>(seed_gen());
1666}
1667
1668uint64_t GetThreadId() {
1669 uint64_t equiv{0};
1670 // std::thread::id is trivially copyable as per C++ spec,
1671 // so type punning as a uint64_t should work
1672 static_assert(sizeof(std::thread::id) <= sizeof(uint64_t),
1673 "std::thread::id can't fit into uint64_t");
1674 const auto tid = std::this_thread::get_id();
1675 memcpy(&equiv, reinterpret_cast<const void*>(&tid), sizeof(tid));
1676 return equiv;
1677}
1678
1679uint64_t GetOptionalThreadId() {
1680 auto tid = GetThreadId();
1681 return (tid == 0) ? tid - 1 : tid;
1682}
1683
1684} // namespace internal
1685} // namespace arrow