]> git.proxmox.com Git - ceph.git/blob - ceph/src/arrow/cpp/src/arrow/filesystem/localfs.cc
import quincy 17.2.0
[ceph.git] / ceph / src / arrow / cpp / src / arrow / filesystem / localfs.cc
1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17
18 #include <chrono>
19 #include <cstring>
20 #include <sstream>
21 #include <utility>
22
23 #ifdef _WIN32
24 #include "arrow/util/windows_compatibility.h"
25 #else
26 #include <errno.h>
27 #include <fcntl.h>
28 #include <stdio.h>
29 #include <sys/stat.h>
30 #endif
31
32 #include "arrow/filesystem/localfs.h"
33 #include "arrow/filesystem/path_util.h"
34 #include "arrow/filesystem/util_internal.h"
35 #include "arrow/io/file.h"
36 #include "arrow/util/io_util.h"
37 #include "arrow/util/logging.h"
38 #include "arrow/util/uri.h"
39 #include "arrow/util/windows_fixup.h"
40
41 namespace arrow {
42 namespace fs {
43
44 using ::arrow::internal::IOErrorFromErrno;
45 #ifdef _WIN32
46 using ::arrow::internal::IOErrorFromWinError;
47 #endif
48 using ::arrow::internal::NativePathString;
49 using ::arrow::internal::PlatformFilename;
50
51 namespace internal {
52
53 #ifdef _WIN32
54 static bool IsDriveLetter(char c) {
55 // Can't use locale-dependent functions from the C/C++ stdlib
56 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
57 }
58 #endif
59
60 bool DetectAbsolutePath(const std::string& s) {
61 // Is it a /-prefixed local path?
62 if (s.length() >= 1 && s[0] == '/') {
63 return true;
64 }
65 #ifdef _WIN32
66 // Is it a \-prefixed local path?
67 if (s.length() >= 1 && s[0] == '\\') {
68 return true;
69 }
70 // Does it start with a drive letter in addition to being /- or \-prefixed,
71 // e.g. "C:\..."?
72 if (s.length() >= 3 && s[1] == ':' && (s[2] == '/' || s[2] == '\\') &&
73 IsDriveLetter(s[0])) {
74 return true;
75 }
76 #endif
77 return false;
78 }
79
80 } // namespace internal
81
82 namespace {
83
84 #ifdef _WIN32
85
86 std::string NativeToString(const NativePathString& ns) {
87 PlatformFilename fn(ns);
88 return fn.ToString();
89 }
90
91 TimePoint ToTimePoint(FILETIME ft) {
92 // Hundreds of nanoseconds between January 1, 1601 (UTC) and the Unix epoch.
93 static constexpr int64_t kFileTimeEpoch = 11644473600LL * 10000000;
94
95 int64_t hundreds = (static_cast<int64_t>(ft.dwHighDateTime) << 32) + ft.dwLowDateTime -
96 kFileTimeEpoch; // hundreds of ns since Unix epoch
97 std::chrono::nanoseconds ns_count(100 * hundreds);
98 return TimePoint(std::chrono::duration_cast<TimePoint::duration>(ns_count));
99 }
100
101 FileInfo FileInformationToFileInfo(const BY_HANDLE_FILE_INFORMATION& information) {
102 FileInfo info;
103 if (information.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
104 info.set_type(FileType::Directory);
105 info.set_size(kNoSize);
106 } else {
107 // Regular file
108 info.set_type(FileType::File);
109 info.set_size((static_cast<int64_t>(information.nFileSizeHigh) << 32) +
110 information.nFileSizeLow);
111 }
112 info.set_mtime(ToTimePoint(information.ftLastWriteTime));
113 return info;
114 }
115
116 Result<FileInfo> StatFile(const std::wstring& path) {
117 HANDLE h;
118 std::string bytes_path = NativeToString(path);
119 FileInfo info;
120
121 /* Inspired by CPython, see Modules/posixmodule.c */
122 h = CreateFileW(path.c_str(), FILE_READ_ATTRIBUTES, /* desired access */
123 0, /* share mode */
124 NULL, /* security attributes */
125 OPEN_EXISTING,
126 /* FILE_FLAG_BACKUP_SEMANTICS is required to open a directory */
127 FILE_ATTRIBUTE_NORMAL | FILE_FLAG_BACKUP_SEMANTICS, NULL);
128
129 if (h == INVALID_HANDLE_VALUE) {
130 DWORD err = GetLastError();
131 if (err == ERROR_FILE_NOT_FOUND || err == ERROR_PATH_NOT_FOUND) {
132 info.set_path(bytes_path);
133 info.set_type(FileType::NotFound);
134 info.set_mtime(kNoTime);
135 info.set_size(kNoSize);
136 return info;
137 } else {
138 return IOErrorFromWinError(GetLastError(), "Failed querying information for path '",
139 bytes_path, "'");
140 }
141 }
142 BY_HANDLE_FILE_INFORMATION information;
143 if (!GetFileInformationByHandle(h, &information)) {
144 CloseHandle(h);
145 return IOErrorFromWinError(GetLastError(), "Failed querying information for path '",
146 bytes_path, "'");
147 }
148 CloseHandle(h);
149 info = FileInformationToFileInfo(information);
150 info.set_path(bytes_path);
151 return info;
152 }
153
154 #else // POSIX systems
155
156 TimePoint ToTimePoint(const struct timespec& s) {
157 std::chrono::nanoseconds ns_count(static_cast<int64_t>(s.tv_sec) * 1000000000 +
158 static_cast<int64_t>(s.tv_nsec));
159 return TimePoint(std::chrono::duration_cast<TimePoint::duration>(ns_count));
160 }
161
162 FileInfo StatToFileInfo(const struct stat& s) {
163 FileInfo info;
164 if (S_ISREG(s.st_mode)) {
165 info.set_type(FileType::File);
166 info.set_size(static_cast<int64_t>(s.st_size));
167 } else if (S_ISDIR(s.st_mode)) {
168 info.set_type(FileType::Directory);
169 info.set_size(kNoSize);
170 } else {
171 info.set_type(FileType::Unknown);
172 info.set_size(kNoSize);
173 }
174 #ifdef __APPLE__
175 // macOS doesn't use the POSIX-compliant spelling
176 info.set_mtime(ToTimePoint(s.st_mtimespec));
177 #else
178 info.set_mtime(ToTimePoint(s.st_mtim));
179 #endif
180 return info;
181 }
182
183 Result<FileInfo> StatFile(const std::string& path) {
184 FileInfo info;
185 struct stat s;
186 int r = stat(path.c_str(), &s);
187 if (r == -1) {
188 if (errno == ENOENT || errno == ENOTDIR || errno == ELOOP) {
189 info.set_type(FileType::NotFound);
190 info.set_mtime(kNoTime);
191 info.set_size(kNoSize);
192 } else {
193 return IOErrorFromErrno(errno, "Failed stat()ing path '", path, "'");
194 }
195 } else {
196 info = StatToFileInfo(s);
197 }
198 info.set_path(path);
199 return info;
200 }
201
202 #endif
203
204 Status StatSelector(const PlatformFilename& dir_fn, const FileSelector& select,
205 int32_t nesting_depth, std::vector<FileInfo>* out) {
206 auto result = ListDir(dir_fn);
207 if (!result.ok()) {
208 auto status = result.status();
209 if (select.allow_not_found && status.IsIOError()) {
210 ARROW_ASSIGN_OR_RAISE(bool exists, FileExists(dir_fn));
211 if (!exists) {
212 return Status::OK();
213 }
214 }
215 return status;
216 }
217
218 for (const auto& child_fn : *result) {
219 PlatformFilename full_fn = dir_fn.Join(child_fn);
220 ARROW_ASSIGN_OR_RAISE(FileInfo info, StatFile(full_fn.ToNative()));
221 if (info.type() != FileType::NotFound) {
222 out->push_back(std::move(info));
223 }
224 if (nesting_depth < select.max_recursion && select.recursive &&
225 info.type() == FileType::Directory) {
226 RETURN_NOT_OK(StatSelector(full_fn, select, nesting_depth + 1, out));
227 }
228 }
229 return Status::OK();
230 }
231
232 } // namespace
233
234 LocalFileSystemOptions LocalFileSystemOptions::Defaults() {
235 return LocalFileSystemOptions();
236 }
237
238 bool LocalFileSystemOptions::Equals(const LocalFileSystemOptions& other) const {
239 return use_mmap == other.use_mmap;
240 }
241
242 Result<LocalFileSystemOptions> LocalFileSystemOptions::FromUri(
243 const ::arrow::internal::Uri& uri, std::string* out_path) {
244 if (!uri.username().empty() || !uri.password().empty()) {
245 return Status::Invalid("Unsupported username or password in local URI: '",
246 uri.ToString(), "'");
247 }
248 std::string path;
249 const auto host = uri.host();
250 if (!host.empty()) {
251 #ifdef _WIN32
252 std::stringstream ss;
253 ss << "//" << host << "/" << internal::RemoveLeadingSlash(uri.path());
254 *out_path = ss.str();
255 #else
256 return Status::Invalid("Unsupported hostname in non-Windows local URI: '",
257 uri.ToString(), "'");
258 #endif
259 } else {
260 *out_path = uri.path();
261 }
262
263 // TODO handle use_mmap option
264 return LocalFileSystemOptions();
265 }
266
267 LocalFileSystem::LocalFileSystem(const io::IOContext& io_context)
268 : FileSystem(io_context), options_(LocalFileSystemOptions::Defaults()) {}
269
270 LocalFileSystem::LocalFileSystem(const LocalFileSystemOptions& options,
271 const io::IOContext& io_context)
272 : FileSystem(io_context), options_(options) {}
273
274 LocalFileSystem::~LocalFileSystem() {}
275
276 Result<std::string> LocalFileSystem::NormalizePath(std::string path) {
277 ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
278 return fn.ToString();
279 }
280
281 bool LocalFileSystem::Equals(const FileSystem& other) const {
282 if (other.type_name() != type_name()) {
283 return false;
284 } else {
285 const auto& localfs = ::arrow::internal::checked_cast<const LocalFileSystem&>(other);
286 return options_.Equals(localfs.options());
287 }
288 }
289
290 Result<FileInfo> LocalFileSystem::GetFileInfo(const std::string& path) {
291 ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
292 return StatFile(fn.ToNative());
293 }
294
295 Result<std::vector<FileInfo>> LocalFileSystem::GetFileInfo(const FileSelector& select) {
296 ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(select.base_dir));
297 std::vector<FileInfo> results;
298 RETURN_NOT_OK(StatSelector(fn, select, 0, &results));
299 return results;
300 }
301
302 Status LocalFileSystem::CreateDir(const std::string& path, bool recursive) {
303 ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
304 if (recursive) {
305 return ::arrow::internal::CreateDirTree(fn).status();
306 } else {
307 return ::arrow::internal::CreateDir(fn).status();
308 }
309 }
310
311 Status LocalFileSystem::DeleteDir(const std::string& path) {
312 ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
313 auto st = ::arrow::internal::DeleteDirTree(fn, /*allow_not_found=*/false).status();
314 if (!st.ok()) {
315 // TODO Status::WithPrefix()?
316 std::stringstream ss;
317 ss << "Cannot delete directory '" << path << "': " << st.message();
318 return st.WithMessage(ss.str());
319 }
320 return Status::OK();
321 }
322
323 Status LocalFileSystem::DeleteDirContents(const std::string& path) {
324 if (internal::IsEmptyPath(path)) {
325 return internal::InvalidDeleteDirContents(path);
326 }
327 ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
328 auto st = ::arrow::internal::DeleteDirContents(fn, /*allow_not_found=*/false).status();
329 if (!st.ok()) {
330 std::stringstream ss;
331 ss << "Cannot delete directory contents in '" << path << "': " << st.message();
332 return st.WithMessage(ss.str());
333 }
334 return Status::OK();
335 }
336
337 Status LocalFileSystem::DeleteRootDirContents() {
338 return Status::Invalid("LocalFileSystem::DeleteRootDirContents is strictly forbidden");
339 }
340
341 Status LocalFileSystem::DeleteFile(const std::string& path) {
342 ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
343 return ::arrow::internal::DeleteFile(fn, /*allow_not_found=*/false).status();
344 }
345
346 Status LocalFileSystem::Move(const std::string& src, const std::string& dest) {
347 ARROW_ASSIGN_OR_RAISE(auto sfn, PlatformFilename::FromString(src));
348 ARROW_ASSIGN_OR_RAISE(auto dfn, PlatformFilename::FromString(dest));
349
350 #ifdef _WIN32
351 if (!MoveFileExW(sfn.ToNative().c_str(), dfn.ToNative().c_str(),
352 MOVEFILE_REPLACE_EXISTING)) {
353 return IOErrorFromWinError(GetLastError(), "Failed renaming '", sfn.ToString(),
354 "' to '", dfn.ToString(), "'");
355 }
356 #else
357 if (rename(sfn.ToNative().c_str(), dfn.ToNative().c_str()) == -1) {
358 return IOErrorFromErrno(errno, "Failed renaming '", sfn.ToString(), "' to '",
359 dfn.ToString(), "'");
360 }
361 #endif
362 return Status::OK();
363 }
364
365 Status LocalFileSystem::CopyFile(const std::string& src, const std::string& dest) {
366 ARROW_ASSIGN_OR_RAISE(auto sfn, PlatformFilename::FromString(src));
367 ARROW_ASSIGN_OR_RAISE(auto dfn, PlatformFilename::FromString(dest));
368 // XXX should we use fstat() to compare inodes?
369 if (sfn.ToNative() == dfn.ToNative()) {
370 return Status::OK();
371 }
372
373 #ifdef _WIN32
374 if (!CopyFileW(sfn.ToNative().c_str(), dfn.ToNative().c_str(),
375 FALSE /* bFailIfExists */)) {
376 return IOErrorFromWinError(GetLastError(), "Failed copying '", sfn.ToString(),
377 "' to '", dfn.ToString(), "'");
378 }
379 return Status::OK();
380 #else
381 ARROW_ASSIGN_OR_RAISE(auto is, OpenInputStream(src));
382 ARROW_ASSIGN_OR_RAISE(auto os, OpenOutputStream(dest));
383 RETURN_NOT_OK(internal::CopyStream(is, os, 1024 * 1024 /* chunk_size */, io_context()));
384 RETURN_NOT_OK(os->Close());
385 return is->Close();
386 #endif
387 }
388
389 namespace {
390
391 template <typename InputStreamType>
392 Result<std::shared_ptr<InputStreamType>> OpenInputStreamGeneric(
393 const std::string& path, const LocalFileSystemOptions& options,
394 const io::IOContext& io_context) {
395 if (options.use_mmap) {
396 return io::MemoryMappedFile::Open(path, io::FileMode::READ);
397 } else {
398 return io::ReadableFile::Open(path, io_context.pool());
399 }
400 }
401
402 } // namespace
403
404 Result<std::shared_ptr<io::InputStream>> LocalFileSystem::OpenInputStream(
405 const std::string& path) {
406 return OpenInputStreamGeneric<io::InputStream>(path, options_, io_context());
407 }
408
409 Result<std::shared_ptr<io::RandomAccessFile>> LocalFileSystem::OpenInputFile(
410 const std::string& path) {
411 return OpenInputStreamGeneric<io::RandomAccessFile>(path, options_, io_context());
412 }
413
414 namespace {
415
416 Result<std::shared_ptr<io::OutputStream>> OpenOutputStreamGeneric(const std::string& path,
417 bool truncate,
418 bool append) {
419 int fd;
420 bool write_only = true;
421 ARROW_ASSIGN_OR_RAISE(auto fn, PlatformFilename::FromString(path));
422 ARROW_ASSIGN_OR_RAISE(
423 fd, ::arrow::internal::FileOpenWritable(fn, write_only, truncate, append));
424 auto maybe_stream = io::FileOutputStream::Open(fd);
425 if (!maybe_stream.ok()) {
426 ARROW_UNUSED(::arrow::internal::FileClose(fd));
427 }
428 return maybe_stream;
429 }
430
431 } // namespace
432
433 Result<std::shared_ptr<io::OutputStream>> LocalFileSystem::OpenOutputStream(
434 const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
435 bool truncate = true;
436 bool append = false;
437 return OpenOutputStreamGeneric(path, truncate, append);
438 }
439
440 Result<std::shared_ptr<io::OutputStream>> LocalFileSystem::OpenAppendStream(
441 const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
442 bool truncate = false;
443 bool append = true;
444 return OpenOutputStreamGeneric(path, truncate, append);
445 }
446
447 } // namespace fs
448 } // namespace arrow