1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements. See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership. The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License. You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied. See the License for the
15 // specific language governing permissions and limitations
24 #include "arrow/util/windows_compatibility.h"
32 #include "arrow/filesystem/localfs.h"
33 #include "arrow/filesystem/path_util.h"
34 #include "arrow/filesystem/util_internal.h"
35 #include "arrow/io/file.h"
36 #include "arrow/util/io_util.h"
37 #include "arrow/util/logging.h"
38 #include "arrow/util/uri.h"
39 #include "arrow/util/windows_fixup.h"
44 using ::arrow::internal::IOErrorFromErrno
;
46 using ::arrow::internal::IOErrorFromWinError
;
48 using ::arrow::internal::NativePathString
;
49 using ::arrow::internal::PlatformFilename
;
54 static bool IsDriveLetter(char c
) {
55 // Can't use locale-dependent functions from the C/C++ stdlib
56 return (c
>= 'A' && c
<= 'Z') || (c
>= 'a' && c
<= 'z');
60 bool DetectAbsolutePath(const std::string
& s
) {
61 // Is it a /-prefixed local path?
62 if (s
.length() >= 1 && s
[0] == '/') {
66 // Is it a \-prefixed local path?
67 if (s
.length() >= 1 && s
[0] == '\\') {
70 // Does it start with a drive letter in addition to being /- or \-prefixed,
72 if (s
.length() >= 3 && s
[1] == ':' && (s
[2] == '/' || s
[2] == '\\') &&
73 IsDriveLetter(s
[0])) {
80 } // namespace internal
86 std::string
NativeToString(const NativePathString
& ns
) {
87 PlatformFilename
fn(ns
);
91 TimePoint
ToTimePoint(FILETIME ft
) {
92 // Hundreds of nanoseconds between January 1, 1601 (UTC) and the Unix epoch.
93 static constexpr int64_t kFileTimeEpoch
= 11644473600LL * 10000000;
95 int64_t hundreds
= (static_cast<int64_t>(ft
.dwHighDateTime
) << 32) + ft
.dwLowDateTime
-
96 kFileTimeEpoch
; // hundreds of ns since Unix epoch
97 std::chrono::nanoseconds
ns_count(100 * hundreds
);
98 return TimePoint(std::chrono::duration_cast
<TimePoint::duration
>(ns_count
));
101 FileInfo
FileInformationToFileInfo(const BY_HANDLE_FILE_INFORMATION
& information
) {
103 if (information
.dwFileAttributes
& FILE_ATTRIBUTE_DIRECTORY
) {
104 info
.set_type(FileType::Directory
);
105 info
.set_size(kNoSize
);
108 info
.set_type(FileType::File
);
109 info
.set_size((static_cast<int64_t>(information
.nFileSizeHigh
) << 32) +
110 information
.nFileSizeLow
);
112 info
.set_mtime(ToTimePoint(information
.ftLastWriteTime
));
116 Result
<FileInfo
> StatFile(const std::wstring
& path
) {
118 std::string bytes_path
= NativeToString(path
);
121 /* Inspired by CPython, see Modules/posixmodule.c */
122 h
= CreateFileW(path
.c_str(), FILE_READ_ATTRIBUTES
, /* desired access */
124 NULL
, /* security attributes */
126 /* FILE_FLAG_BACKUP_SEMANTICS is required to open a directory */
127 FILE_ATTRIBUTE_NORMAL
| FILE_FLAG_BACKUP_SEMANTICS
, NULL
);
129 if (h
== INVALID_HANDLE_VALUE
) {
130 DWORD err
= GetLastError();
131 if (err
== ERROR_FILE_NOT_FOUND
|| err
== ERROR_PATH_NOT_FOUND
) {
132 info
.set_path(bytes_path
);
133 info
.set_type(FileType::NotFound
);
134 info
.set_mtime(kNoTime
);
135 info
.set_size(kNoSize
);
138 return IOErrorFromWinError(GetLastError(), "Failed querying information for path '",
142 BY_HANDLE_FILE_INFORMATION information
;
143 if (!GetFileInformationByHandle(h
, &information
)) {
145 return IOErrorFromWinError(GetLastError(), "Failed querying information for path '",
149 info
= FileInformationToFileInfo(information
);
150 info
.set_path(bytes_path
);
154 #else // POSIX systems
156 TimePoint
ToTimePoint(const struct timespec
& s
) {
157 std::chrono::nanoseconds
ns_count(static_cast<int64_t>(s
.tv_sec
) * 1000000000 +
158 static_cast<int64_t>(s
.tv_nsec
));
159 return TimePoint(std::chrono::duration_cast
<TimePoint::duration
>(ns_count
));
162 FileInfo
StatToFileInfo(const struct stat
& s
) {
164 if (S_ISREG(s
.st_mode
)) {
165 info
.set_type(FileType::File
);
166 info
.set_size(static_cast<int64_t>(s
.st_size
));
167 } else if (S_ISDIR(s
.st_mode
)) {
168 info
.set_type(FileType::Directory
);
169 info
.set_size(kNoSize
);
171 info
.set_type(FileType::Unknown
);
172 info
.set_size(kNoSize
);
175 // macOS doesn't use the POSIX-compliant spelling
176 info
.set_mtime(ToTimePoint(s
.st_mtimespec
));
178 info
.set_mtime(ToTimePoint(s
.st_mtim
));
183 Result
<FileInfo
> StatFile(const std::string
& path
) {
186 int r
= stat(path
.c_str(), &s
);
188 if (errno
== ENOENT
|| errno
== ENOTDIR
|| errno
== ELOOP
) {
189 info
.set_type(FileType::NotFound
);
190 info
.set_mtime(kNoTime
);
191 info
.set_size(kNoSize
);
193 return IOErrorFromErrno(errno
, "Failed stat()ing path '", path
, "'");
196 info
= StatToFileInfo(s
);
204 Status
StatSelector(const PlatformFilename
& dir_fn
, const FileSelector
& select
,
205 int32_t nesting_depth
, std::vector
<FileInfo
>* out
) {
206 auto result
= ListDir(dir_fn
);
208 auto status
= result
.status();
209 if (select
.allow_not_found
&& status
.IsIOError()) {
210 ARROW_ASSIGN_OR_RAISE(bool exists
, FileExists(dir_fn
));
218 for (const auto& child_fn
: *result
) {
219 PlatformFilename full_fn
= dir_fn
.Join(child_fn
);
220 ARROW_ASSIGN_OR_RAISE(FileInfo info
, StatFile(full_fn
.ToNative()));
221 if (info
.type() != FileType::NotFound
) {
222 out
->push_back(std::move(info
));
224 if (nesting_depth
< select
.max_recursion
&& select
.recursive
&&
225 info
.type() == FileType::Directory
) {
226 RETURN_NOT_OK(StatSelector(full_fn
, select
, nesting_depth
+ 1, out
));
234 LocalFileSystemOptions
LocalFileSystemOptions::Defaults() {
235 return LocalFileSystemOptions();
238 bool LocalFileSystemOptions::Equals(const LocalFileSystemOptions
& other
) const {
239 return use_mmap
== other
.use_mmap
;
242 Result
<LocalFileSystemOptions
> LocalFileSystemOptions::FromUri(
243 const ::arrow::internal::Uri
& uri
, std::string
* out_path
) {
244 if (!uri
.username().empty() || !uri
.password().empty()) {
245 return Status::Invalid("Unsupported username or password in local URI: '",
246 uri
.ToString(), "'");
249 const auto host
= uri
.host();
252 std::stringstream ss
;
253 ss
<< "//" << host
<< "/" << internal::RemoveLeadingSlash(uri
.path());
254 *out_path
= ss
.str();
256 return Status::Invalid("Unsupported hostname in non-Windows local URI: '",
257 uri
.ToString(), "'");
260 *out_path
= uri
.path();
263 // TODO handle use_mmap option
264 return LocalFileSystemOptions();
267 LocalFileSystem::LocalFileSystem(const io::IOContext
& io_context
)
268 : FileSystem(io_context
), options_(LocalFileSystemOptions::Defaults()) {}
270 LocalFileSystem::LocalFileSystem(const LocalFileSystemOptions
& options
,
271 const io::IOContext
& io_context
)
272 : FileSystem(io_context
), options_(options
) {}
274 LocalFileSystem::~LocalFileSystem() {}
276 Result
<std::string
> LocalFileSystem::NormalizePath(std::string path
) {
277 ARROW_ASSIGN_OR_RAISE(auto fn
, PlatformFilename::FromString(path
));
278 return fn
.ToString();
281 bool LocalFileSystem::Equals(const FileSystem
& other
) const {
282 if (other
.type_name() != type_name()) {
285 const auto& localfs
= ::arrow::internal::checked_cast
<const LocalFileSystem
&>(other
);
286 return options_
.Equals(localfs
.options());
290 Result
<FileInfo
> LocalFileSystem::GetFileInfo(const std::string
& path
) {
291 ARROW_ASSIGN_OR_RAISE(auto fn
, PlatformFilename::FromString(path
));
292 return StatFile(fn
.ToNative());
295 Result
<std::vector
<FileInfo
>> LocalFileSystem::GetFileInfo(const FileSelector
& select
) {
296 ARROW_ASSIGN_OR_RAISE(auto fn
, PlatformFilename::FromString(select
.base_dir
));
297 std::vector
<FileInfo
> results
;
298 RETURN_NOT_OK(StatSelector(fn
, select
, 0, &results
));
302 Status
LocalFileSystem::CreateDir(const std::string
& path
, bool recursive
) {
303 ARROW_ASSIGN_OR_RAISE(auto fn
, PlatformFilename::FromString(path
));
305 return ::arrow::internal::CreateDirTree(fn
).status();
307 return ::arrow::internal::CreateDir(fn
).status();
311 Status
LocalFileSystem::DeleteDir(const std::string
& path
) {
312 ARROW_ASSIGN_OR_RAISE(auto fn
, PlatformFilename::FromString(path
));
313 auto st
= ::arrow::internal::DeleteDirTree(fn
, /*allow_not_found=*/false).status();
315 // TODO Status::WithPrefix()?
316 std::stringstream ss
;
317 ss
<< "Cannot delete directory '" << path
<< "': " << st
.message();
318 return st
.WithMessage(ss
.str());
323 Status
LocalFileSystem::DeleteDirContents(const std::string
& path
) {
324 if (internal::IsEmptyPath(path
)) {
325 return internal::InvalidDeleteDirContents(path
);
327 ARROW_ASSIGN_OR_RAISE(auto fn
, PlatformFilename::FromString(path
));
328 auto st
= ::arrow::internal::DeleteDirContents(fn
, /*allow_not_found=*/false).status();
330 std::stringstream ss
;
331 ss
<< "Cannot delete directory contents in '" << path
<< "': " << st
.message();
332 return st
.WithMessage(ss
.str());
337 Status
LocalFileSystem::DeleteRootDirContents() {
338 return Status::Invalid("LocalFileSystem::DeleteRootDirContents is strictly forbidden");
341 Status
LocalFileSystem::DeleteFile(const std::string
& path
) {
342 ARROW_ASSIGN_OR_RAISE(auto fn
, PlatformFilename::FromString(path
));
343 return ::arrow::internal::DeleteFile(fn
, /*allow_not_found=*/false).status();
346 Status
LocalFileSystem::Move(const std::string
& src
, const std::string
& dest
) {
347 ARROW_ASSIGN_OR_RAISE(auto sfn
, PlatformFilename::FromString(src
));
348 ARROW_ASSIGN_OR_RAISE(auto dfn
, PlatformFilename::FromString(dest
));
351 if (!MoveFileExW(sfn
.ToNative().c_str(), dfn
.ToNative().c_str(),
352 MOVEFILE_REPLACE_EXISTING
)) {
353 return IOErrorFromWinError(GetLastError(), "Failed renaming '", sfn
.ToString(),
354 "' to '", dfn
.ToString(), "'");
357 if (rename(sfn
.ToNative().c_str(), dfn
.ToNative().c_str()) == -1) {
358 return IOErrorFromErrno(errno
, "Failed renaming '", sfn
.ToString(), "' to '",
359 dfn
.ToString(), "'");
365 Status
LocalFileSystem::CopyFile(const std::string
& src
, const std::string
& dest
) {
366 ARROW_ASSIGN_OR_RAISE(auto sfn
, PlatformFilename::FromString(src
));
367 ARROW_ASSIGN_OR_RAISE(auto dfn
, PlatformFilename::FromString(dest
));
368 // XXX should we use fstat() to compare inodes?
369 if (sfn
.ToNative() == dfn
.ToNative()) {
374 if (!CopyFileW(sfn
.ToNative().c_str(), dfn
.ToNative().c_str(),
375 FALSE
/* bFailIfExists */)) {
376 return IOErrorFromWinError(GetLastError(), "Failed copying '", sfn
.ToString(),
377 "' to '", dfn
.ToString(), "'");
381 ARROW_ASSIGN_OR_RAISE(auto is
, OpenInputStream(src
));
382 ARROW_ASSIGN_OR_RAISE(auto os
, OpenOutputStream(dest
));
383 RETURN_NOT_OK(internal::CopyStream(is
, os
, 1024 * 1024 /* chunk_size */, io_context()));
384 RETURN_NOT_OK(os
->Close());
391 template <typename InputStreamType
>
392 Result
<std::shared_ptr
<InputStreamType
>> OpenInputStreamGeneric(
393 const std::string
& path
, const LocalFileSystemOptions
& options
,
394 const io::IOContext
& io_context
) {
395 if (options
.use_mmap
) {
396 return io::MemoryMappedFile::Open(path
, io::FileMode::READ
);
398 return io::ReadableFile::Open(path
, io_context
.pool());
404 Result
<std::shared_ptr
<io::InputStream
>> LocalFileSystem::OpenInputStream(
405 const std::string
& path
) {
406 return OpenInputStreamGeneric
<io::InputStream
>(path
, options_
, io_context());
409 Result
<std::shared_ptr
<io::RandomAccessFile
>> LocalFileSystem::OpenInputFile(
410 const std::string
& path
) {
411 return OpenInputStreamGeneric
<io::RandomAccessFile
>(path
, options_
, io_context());
416 Result
<std::shared_ptr
<io::OutputStream
>> OpenOutputStreamGeneric(const std::string
& path
,
420 bool write_only
= true;
421 ARROW_ASSIGN_OR_RAISE(auto fn
, PlatformFilename::FromString(path
));
422 ARROW_ASSIGN_OR_RAISE(
423 fd
, ::arrow::internal::FileOpenWritable(fn
, write_only
, truncate
, append
));
424 auto maybe_stream
= io::FileOutputStream::Open(fd
);
425 if (!maybe_stream
.ok()) {
426 ARROW_UNUSED(::arrow::internal::FileClose(fd
));
433 Result
<std::shared_ptr
<io::OutputStream
>> LocalFileSystem::OpenOutputStream(
434 const std::string
& path
, const std::shared_ptr
<const KeyValueMetadata
>& metadata
) {
435 bool truncate
= true;
437 return OpenOutputStreamGeneric(path
, truncate
, append
);
440 Result
<std::shared_ptr
<io::OutputStream
>> LocalFileSystem::OpenAppendStream(
441 const std::string
& path
, const std::shared_ptr
<const KeyValueMetadata
>& metadata
) {
442 bool truncate
= false;
444 return OpenOutputStreamGeneric(path
, truncate
, append
);