1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under the BSD-style license found in the
3 // LICENSE file in the root directory of this source tree. An additional grant
4 // of patent rights can be found in the PATENTS file in the same directory.
5 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
6 // Use of this source code is governed by a BSD-style license that can be
7 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 // An Env is an interface used by the rocksdb implementation to access
10 // operating system functionality like the filesystem etc. Callers
11 // may wish to provide a custom Env object when opening a database to
12 // get fine gain control; e.g., to rate limit file system operations.
14 // All Env implementations are safe for concurrent access from
15 // multiple threads without any external synchronization.
17 #ifndef STORAGE_ROCKSDB_INCLUDE_ENV_H_
18 #define STORAGE_ROCKSDB_INCLUDE_ENV_H_
27 #include "rocksdb/status.h"
28 #include "rocksdb/thread_status.h"
31 // Windows API macro interference
40 class RandomAccessFile
;
47 struct ImmutableDBOptions
;
49 class ThreadStatusUpdater
;
52 using std::unique_ptr
;
53 using std::shared_ptr
;
55 const size_t kDefaultPageSize
= 4 * 1024;
57 // Options while opening a file to read/write
60 // construct with default Options
63 // construct from Options
64 explicit EnvOptions(const DBOptions
& options
);
66 // If true, then use mmap to read data
67 bool use_mmap_reads
= false;
69 // If true, then use mmap to write data
70 bool use_mmap_writes
= true;
72 // If true, then use O_DIRECT for reading data
73 bool use_direct_reads
= false;
75 // If true, then use O_DIRECT for writing data
76 bool use_direct_writes
= false;
78 // If false, fallocate() calls are bypassed
79 bool allow_fallocate
= true;
81 // If true, set the FD_CLOEXEC on open fd.
82 bool set_fd_cloexec
= true;
84 // Allows OS to incrementally sync files to disk while they are being
85 // written, in the background. Issue one request for every bytes_per_sync
86 // written. 0 turns it off.
88 uint64_t bytes_per_sync
= 0;
90 // If true, we will preallocate the file with FALLOC_FL_KEEP_SIZE flag, which
91 // means that file size won't change as part of preallocation.
92 // If false, preallocation will also change the file size. This option will
93 // improve the performance in workloads where you sync the data on every
94 // write. By default, we set it to true for MANIFEST writes and false for
96 bool fallocate_with_keep_size
= true;
99 size_t compaction_readahead_size
;
102 size_t random_access_max_buffer_size
;
105 size_t writable_file_max_buffer_size
= 1024 * 1024;
107 // If not nullptr, write rate limiting is enabled for flush and compaction
108 RateLimiter
* rate_limiter
= nullptr;
113 struct FileAttributes
{
117 // Size of file in bytes
121 Env() : thread_status_updater_(nullptr) {}
125 // Return a default environment suitable for the current operating
126 // system. Sophisticated users may wish to provide their own Env
127 // implementation instead of relying on this default environment.
129 // The result of Default() belongs to rocksdb and must never be deleted.
130 static Env
* Default();
132 // Create a brand new sequentially-readable file with the specified name.
133 // On success, stores a pointer to the new file in *result and returns OK.
134 // On failure stores nullptr in *result and returns non-OK. If the file does
135 // not exist, returns a non-OK status.
137 // The returned file will only be accessed by one thread at a time.
138 virtual Status
NewSequentialFile(const std::string
& fname
,
139 unique_ptr
<SequentialFile
>* result
,
140 const EnvOptions
& options
)
143 // Create a brand new random access read-only file with the
144 // specified name. On success, stores a pointer to the new file in
145 // *result and returns OK. On failure stores nullptr in *result and
146 // returns non-OK. If the file does not exist, returns a non-OK
149 // The returned file may be concurrently accessed by multiple threads.
150 virtual Status
NewRandomAccessFile(const std::string
& fname
,
151 unique_ptr
<RandomAccessFile
>* result
,
152 const EnvOptions
& options
)
155 // Create an object that writes to a new file with the specified
156 // name. Deletes any existing file with the same name and creates a
157 // new file. On success, stores a pointer to the new file in
158 // *result and returns OK. On failure stores nullptr in *result and
161 // The returned file will only be accessed by one thread at a time.
162 virtual Status
NewWritableFile(const std::string
& fname
,
163 unique_ptr
<WritableFile
>* result
,
164 const EnvOptions
& options
) = 0;
166 // Create an object that writes to a new file with the specified
167 // name. Deletes any existing file with the same name and creates a
168 // new file. On success, stores a pointer to the new file in
169 // *result and returns OK. On failure stores nullptr in *result and
172 // The returned file will only be accessed by one thread at a time.
173 virtual Status
ReopenWritableFile(const std::string
& fname
,
174 unique_ptr
<WritableFile
>* result
,
175 const EnvOptions
& options
) {
180 // Reuse an existing file by renaming it and opening it as writable.
181 virtual Status
ReuseWritableFile(const std::string
& fname
,
182 const std::string
& old_fname
,
183 unique_ptr
<WritableFile
>* result
,
184 const EnvOptions
& options
);
186 // Open `fname` for random read and write, if file dont exist the file
187 // will be created. On success, stores a pointer to the new file in
188 // *result and returns OK. On failure returns non-OK.
190 // The returned file will only be accessed by one thread at a time.
191 virtual Status
NewRandomRWFile(const std::string
& fname
,
192 unique_ptr
<RandomRWFile
>* result
,
193 const EnvOptions
& options
) {
194 return Status::NotSupported("RandomRWFile is not implemented in this Env");
197 // Create an object that represents a directory. Will fail if directory
198 // doesn't exist. If the directory exists, it will open the directory
199 // and create a new Directory object.
201 // On success, stores a pointer to the new Directory in
202 // *result and returns OK. On failure stores nullptr in *result and
204 virtual Status
NewDirectory(const std::string
& name
,
205 unique_ptr
<Directory
>* result
) = 0;
207 // Returns OK if the named file exists.
208 // NotFound if the named file does not exist,
209 // the calling process does not have permission to determine
210 // whether this file exists, or if the path is invalid.
211 // IOError if an IO Error was encountered
212 virtual Status
FileExists(const std::string
& fname
) = 0;
214 // Store in *result the names of the children of the specified directory.
215 // The names are relative to "dir".
216 // Original contents of *results are dropped.
217 // Returns OK if "dir" exists and "*result" contains its children.
218 // NotFound if "dir" does not exist, the calling process does not have
219 // permission to access "dir", or if "dir" is invalid.
220 // IOError if an IO Error was encountered
221 virtual Status
GetChildren(const std::string
& dir
,
222 std::vector
<std::string
>* result
) = 0;
224 // Store in *result the attributes of the children of the specified directory.
225 // In case the implementation lists the directory prior to iterating the files
226 // and files are concurrently deleted, the deleted files will be omitted from
228 // The name attributes are relative to "dir".
229 // Original contents of *results are dropped.
230 // Returns OK if "dir" exists and "*result" contains its children.
231 // NotFound if "dir" does not exist, the calling process does not have
232 // permission to access "dir", or if "dir" is invalid.
233 // IOError if an IO Error was encountered
234 virtual Status
GetChildrenFileAttributes(const std::string
& dir
,
235 std::vector
<FileAttributes
>* result
);
237 // Delete the named file.
238 virtual Status
DeleteFile(const std::string
& fname
) = 0;
240 // Create the specified directory. Returns error if directory exists.
241 virtual Status
CreateDir(const std::string
& dirname
) = 0;
243 // Creates directory if missing. Return Ok if it exists, or successful in
245 virtual Status
CreateDirIfMissing(const std::string
& dirname
) = 0;
247 // Delete the specified directory.
248 virtual Status
DeleteDir(const std::string
& dirname
) = 0;
250 // Store the size of fname in *file_size.
251 virtual Status
GetFileSize(const std::string
& fname
, uint64_t* file_size
) = 0;
253 // Store the last modification time of fname in *file_mtime.
254 virtual Status
GetFileModificationTime(const std::string
& fname
,
255 uint64_t* file_mtime
) = 0;
256 // Rename file src to target.
257 virtual Status
RenameFile(const std::string
& src
,
258 const std::string
& target
) = 0;
260 // Hard Link file src to target.
261 virtual Status
LinkFile(const std::string
& src
, const std::string
& target
) {
262 return Status::NotSupported("LinkFile is not supported for this Env");
265 // Lock the specified file. Used to prevent concurrent access to
266 // the same db by multiple processes. On failure, stores nullptr in
267 // *lock and returns non-OK.
269 // On success, stores a pointer to the object that represents the
270 // acquired lock in *lock and returns OK. The caller should call
271 // UnlockFile(*lock) to release the lock. If the process exits,
272 // the lock will be automatically released.
274 // If somebody else already holds the lock, finishes immediately
275 // with a failure. I.e., this call does not wait for existing locks
278 // May create the named file if it does not already exist.
279 virtual Status
LockFile(const std::string
& fname
, FileLock
** lock
) = 0;
281 // Release the lock acquired by a previous successful call to LockFile.
282 // REQUIRES: lock was returned by a successful LockFile() call
283 // REQUIRES: lock has not already been unlocked.
284 virtual Status
UnlockFile(FileLock
* lock
) = 0;
286 // Priority for scheduling job in thread pool
287 enum Priority
{ LOW
, HIGH
, TOTAL
};
289 // Priority for requesting bytes in rate limiter scheduler
296 // Arrange to run "(*function)(arg)" once in a background thread, in
297 // the thread pool specified by pri. By default, jobs go to the 'LOW'
298 // priority thread pool.
300 // "function" may run in an unspecified thread. Multiple functions
301 // added to the same Env may run concurrently in different threads.
302 // I.e., the caller may not assume that background work items are
304 // When the UnSchedule function is called, the unschedFunction
305 // registered at the time of Schedule is invoked with arg as a parameter.
306 virtual void Schedule(void (*function
)(void* arg
), void* arg
,
307 Priority pri
= LOW
, void* tag
= nullptr,
308 void (*unschedFunction
)(void* arg
) = 0) = 0;
310 // Arrange to remove jobs for given arg from the queue_ if they are not
311 // already scheduled. Caller is expected to have exclusive lock on arg.
312 virtual int UnSchedule(void* arg
, Priority pri
) { return 0; }
314 // Start a new thread, invoking "function(arg)" within the new thread.
315 // When "function(arg)" returns, the thread will be destroyed.
316 virtual void StartThread(void (*function
)(void* arg
), void* arg
) = 0;
318 // Wait for all threads started by StartThread to terminate.
319 virtual void WaitForJoin() {}
321 // Get thread pool queue length for specific thrad pool.
322 virtual unsigned int GetThreadPoolQueueLen(Priority pri
= LOW
) const {
326 // *path is set to a temporary directory that can be used for testing. It may
327 // or many not have just been created. The directory may or may not differ
328 // between runs of the same process, but subsequent calls will return the
330 virtual Status
GetTestDirectory(std::string
* path
) = 0;
332 // Create and return a log file for storing informational messages.
333 virtual Status
NewLogger(const std::string
& fname
,
334 shared_ptr
<Logger
>* result
) = 0;
336 // Returns the number of micro-seconds since some fixed point in time.
337 // It is often used as system time such as in GenericRateLimiter
338 // and other places so a port needs to return system time in order to work.
339 virtual uint64_t NowMicros() = 0;
341 // Returns the number of nano-seconds since some fixed point in time. Only
342 // useful for computing deltas of time in one run.
343 // Default implementation simply relies on NowMicros.
344 // In platform-specific implementations, NowNanos() should return time points
345 // that are MONOTONIC.
346 virtual uint64_t NowNanos() {
347 return NowMicros() * 1000;
350 // Sleep/delay the thread for the perscribed number of micro-seconds.
351 virtual void SleepForMicroseconds(int micros
) = 0;
353 // Get the current host name.
354 virtual Status
GetHostName(char* name
, uint64_t len
) = 0;
356 // Get the number of seconds since the Epoch, 1970-01-01 00:00:00 (UTC).
357 virtual Status
GetCurrentTime(int64_t* unix_time
) = 0;
359 // Get full directory name for this db.
360 virtual Status
GetAbsolutePath(const std::string
& db_path
,
361 std::string
* output_path
) = 0;
363 // The number of background worker threads of a specific thread pool
364 // for this environment. 'LOW' is the default pool.
366 virtual void SetBackgroundThreads(int number
, Priority pri
= LOW
) = 0;
368 // Enlarge number of background worker threads of a specific thread pool
369 // for this environment if it is smaller than specified. 'LOW' is the default
371 virtual void IncBackgroundThreadsIfNeeded(int number
, Priority pri
) = 0;
373 // Lower IO priority for threads from the specified pool.
374 virtual void LowerThreadPoolIOPriority(Priority pool
= LOW
) {}
376 // Converts seconds-since-Jan-01-1970 to a printable string
377 virtual std::string
TimeToString(uint64_t time
) = 0;
379 // Generates a unique id that can be used to identify a db
380 virtual std::string
GenerateUniqueId();
382 // OptimizeForLogWrite will create a new EnvOptions object that is a copy of
383 // the EnvOptions in the parameters, but is optimized for writing log files.
384 // Default implementation returns the copy of the same object.
385 virtual EnvOptions
OptimizeForLogWrite(const EnvOptions
& env_options
,
386 const DBOptions
& db_options
) const;
387 // OptimizeForManifestWrite will create a new EnvOptions object that is a copy
388 // of the EnvOptions in the parameters, but is optimized for writing manifest
389 // files. Default implementation returns the copy of the same object.
390 virtual EnvOptions
OptimizeForManifestWrite(
391 const EnvOptions
& env_options
) const;
393 // OptimizeForCompactionTableWrite will create a new EnvOptions object that is a copy
394 // of the EnvOptions in the parameters, but is optimized for writing table
395 // files. Default implementation returns the copy of the same object.
396 virtual EnvOptions
OptimizeForCompactionTableWrite(
397 const EnvOptions
& env_options
,
398 const ImmutableDBOptions
& db_options
) const;
400 // OptimizeForCompactionTableWrite will create a new EnvOptions object that is a copy
401 // of the EnvOptions in the parameters, but is optimized for reading table
402 // files. Default implementation returns the copy of the same object.
403 virtual EnvOptions
OptimizeForCompactionTableRead(
404 const EnvOptions
& env_options
,
405 const ImmutableDBOptions
& db_options
) const;
407 // Returns the status of all threads that belong to the current Env.
408 virtual Status
GetThreadList(std::vector
<ThreadStatus
>* thread_list
) {
409 return Status::NotSupported("Not supported.");
412 // Returns the pointer to ThreadStatusUpdater. This function will be
413 // used in RocksDB internally to update thread status and supports
415 virtual ThreadStatusUpdater
* GetThreadStatusUpdater() const {
416 return thread_status_updater_
;
419 // Returns the ID of the current thread.
420 virtual uint64_t GetThreadID() const;
423 // The pointer to an internal structure that will update the
424 // status of each thread.
425 ThreadStatusUpdater
* thread_status_updater_
;
428 // No copying allowed
430 void operator=(const Env
&);
433 // The factory function to construct a ThreadStatusUpdater. Any Env
434 // that supports GetThreadList() feature should call this function in its
435 // constructor to initialize thread_status_updater_.
436 ThreadStatusUpdater
* CreateThreadStatusUpdater();
438 // A file abstraction for reading sequentially through a file
439 class SequentialFile
{
442 virtual ~SequentialFile();
444 // Read up to "n" bytes from the file. "scratch[0..n-1]" may be
445 // written by this routine. Sets "*result" to the data that was
446 // read (including if fewer than "n" bytes were successfully read).
447 // May set "*result" to point at data in "scratch[0..n-1]", so
448 // "scratch[0..n-1]" must be live when "*result" is used.
449 // If an error was encountered, returns a non-OK status.
451 // REQUIRES: External synchronization
452 virtual Status
Read(size_t n
, Slice
* result
, char* scratch
) = 0;
454 // Skip "n" bytes from the file. This is guaranteed to be no
455 // slower that reading the same data, but may be faster.
457 // If end of file is reached, skipping will stop at the end of the
458 // file, and Skip will return OK.
460 // REQUIRES: External synchronization
461 virtual Status
Skip(uint64_t n
) = 0;
463 // Indicates the upper layers if the current SequentialFile implementation
465 virtual bool use_direct_io() const { return false; }
467 // Use the returned alignment value to allocate
468 // aligned buffer for Direct I/O
469 virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize
; }
471 virtual void Rewind() {}
473 // Remove any kind of caching of data from the offset to offset+length
474 // of this file. If the length is 0, then it refers to the end of file.
475 // If the system is not caching the file contents, then this is a noop.
476 virtual Status
InvalidateCache(size_t offset
, size_t length
) {
477 return Status::NotSupported("InvalidateCache not supported.");
480 // Positioned Read for direct I/O
481 // If Direct I/O enabled, offset, n, and scratch should be properly aligned
482 virtual Status
PositionedRead(uint64_t offset
, size_t n
, Slice
* result
,
484 return Status::NotSupported();
488 // A file abstraction for randomly reading the contents of a file.
489 class RandomAccessFile
{
492 RandomAccessFile() { }
493 virtual ~RandomAccessFile();
495 // Read up to "n" bytes from the file starting at "offset".
496 // "scratch[0..n-1]" may be written by this routine. Sets "*result"
497 // to the data that was read (including if fewer than "n" bytes were
498 // successfully read). May set "*result" to point at data in
499 // "scratch[0..n-1]", so "scratch[0..n-1]" must be live when
500 // "*result" is used. If an error was encountered, returns a non-OK
503 // Safe for concurrent use by multiple threads.
504 // If Direct I/O enabled, offset, n, and scratch should be aligned properly.
505 virtual Status
Read(uint64_t offset
, size_t n
, Slice
* result
,
506 char* scratch
) const = 0;
508 // Readahead the file starting from offset by n bytes for caching.
509 virtual Status
Prefetch(uint64_t offset
, size_t n
) {
513 // Used by the file_reader_writer to decide if the ReadAhead wrapper
514 // should simply forward the call and do not enact buffering or locking.
515 virtual bool ShouldForwardRawRequest() const {
519 // For cases when read-ahead is implemented in the platform dependent
521 virtual void EnableReadAhead() {}
523 // Tries to get an unique ID for this file that will be the same each time
524 // the file is opened (and will stay the same while the file is open).
525 // Furthermore, it tries to make this ID at most "max_size" bytes. If such an
526 // ID can be created this function returns the length of the ID and places it
527 // in "id"; otherwise, this function returns 0, in which case "id"
528 // may not have been modified.
530 // This function guarantees, for IDs from a given environment, two unique ids
531 // cannot be made equal to eachother by adding arbitrary bytes to one of
532 // them. That is, no unique ID is the prefix of another.
534 // This function guarantees that the returned ID will not be interpretable as
537 // Note: these IDs are only valid for the duration of the process.
538 virtual size_t GetUniqueId(char* id
, size_t max_size
) const {
539 return 0; // Default implementation to prevent issues with backwards
543 enum AccessPattern
{ NORMAL
, RANDOM
, SEQUENTIAL
, WILLNEED
, DONTNEED
};
545 virtual void Hint(AccessPattern pattern
) {}
547 // Indicates the upper layers if the current RandomAccessFile implementation
549 virtual bool use_direct_io() const { return false; }
551 // Use the returned alignment value to allocate
552 // aligned buffer for Direct I/O
553 virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize
; }
555 // Remove any kind of caching of data from the offset to offset+length
556 // of this file. If the length is 0, then it refers to the end of file.
557 // If the system is not caching the file contents, then this is a noop.
558 virtual Status
InvalidateCache(size_t offset
, size_t length
) {
559 return Status::NotSupported("InvalidateCache not supported.");
563 // A file abstraction for sequential writing. The implementation
564 // must provide buffering since callers may append small fragments
565 // at a time to the file.
569 : last_preallocated_block_(0),
570 preallocation_block_size_(0),
571 io_priority_(Env::IO_TOTAL
) {
573 virtual ~WritableFile();
575 // Append data to the end of the file
576 // Note: A WriteabelFile object must support either Append or
577 // PositionedAppend, so the users cannot mix the two.
578 virtual Status
Append(const Slice
& data
) = 0;
580 // PositionedAppend data to the specified offset. The new EOF after append
581 // must be larger than the previous EOF. This is to be used when writes are
582 // not backed by OS buffers and hence has to always start from the start of
583 // the sector. The implementation thus needs to also rewrite the last
585 // Note: PositionAppend does not guarantee moving the file offset after the
586 // write. A WritableFile object must support either Append or
587 // PositionedAppend, so the users cannot mix the two.
589 // PositionedAppend() can only happen on the page/sector boundaries. For that
590 // reason, if the last write was an incomplete sector we still need to rewind
591 // back to the nearest sector/page and rewrite the portion of it with whatever
592 // we need to add. We need to keep where we stop writing.
594 // PositionedAppend() can only write whole sectors. For that reason we have to
595 // pad with zeros for the last write and trim the file when closing according
596 // to the position we keep in the previous step.
598 // PositionedAppend() requires aligned buffer to be passed in. The alignment
599 // required is queried via GetRequiredBufferAlignment()
600 virtual Status
PositionedAppend(const Slice
& /* data */, uint64_t /* offset */) {
601 return Status::NotSupported();
604 // Truncate is necessary to trim the file to the correct size
605 // before closing. It is not always possible to keep track of the file
606 // size due to whole pages writes. The behavior is undefined if called
607 // with other writes to follow.
608 virtual Status
Truncate(uint64_t size
) {
611 virtual Status
Close() = 0;
612 virtual Status
Flush() = 0;
613 virtual Status
Sync() = 0; // sync data
616 * Sync data and/or metadata as well.
617 * By default, sync only data.
618 * Override this method for environments where we need to sync
621 virtual Status
Fsync() {
625 // true if Sync() and Fsync() are safe to call concurrently with Append()
627 virtual bool IsSyncThreadSafe() const {
631 // Indicates the upper layers if the current WritableFile implementation
633 virtual bool use_direct_io() const { return false; }
635 // Use the returned alignment value to allocate
636 // aligned buffer for Direct I/O
637 virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize
; }
639 * Change the priority in rate limiter if rate limiting is enabled.
640 * If rate limiting is not enabled, this call has no effect.
642 virtual void SetIOPriority(Env::IOPriority pri
) {
646 virtual Env::IOPriority
GetIOPriority() { return io_priority_
; }
649 * Get the size of valid data in the file.
651 virtual uint64_t GetFileSize() {
656 * Get and set the default pre-allocation block size for writes to
657 * this file. If non-zero, then Allocate will be used to extend the
658 * underlying storage of a file (generally via fallocate) if the Env
659 * instance supports it.
661 virtual void SetPreallocationBlockSize(size_t size
) {
662 preallocation_block_size_
= size
;
665 virtual void GetPreallocationStatus(size_t* block_size
,
666 size_t* last_allocated_block
) {
667 *last_allocated_block
= last_preallocated_block_
;
668 *block_size
= preallocation_block_size_
;
671 // For documentation, refer to RandomAccessFile::GetUniqueId()
672 virtual size_t GetUniqueId(char* id
, size_t max_size
) const {
673 return 0; // Default implementation to prevent issues with backwards
676 // Remove any kind of caching of data from the offset to offset+length
677 // of this file. If the length is 0, then it refers to the end of file.
678 // If the system is not caching the file contents, then this is a noop.
679 // This call has no effect on dirty pages in the cache.
680 virtual Status
InvalidateCache(size_t offset
, size_t length
) {
681 return Status::NotSupported("InvalidateCache not supported.");
684 // Sync a file range with disk.
685 // offset is the starting byte of the file range to be synchronized.
686 // nbytes specifies the length of the range to be synchronized.
687 // This asks the OS to initiate flushing the cached data to disk,
688 // without waiting for completion.
689 // Default implementation does nothing.
690 virtual Status
RangeSync(uint64_t offset
, uint64_t nbytes
) { return Status::OK(); }
692 // PrepareWrite performs any necessary preparation for a write
693 // before the write actually occurs. This allows for pre-allocation
694 // of space on devices where it can result in less file
695 // fragmentation and/or less waste from over-zealous filesystem
697 virtual void PrepareWrite(size_t offset
, size_t len
) {
698 if (preallocation_block_size_
== 0) {
701 // If this write would cross one or more preallocation blocks,
702 // determine what the last preallocation block necesessary to
703 // cover this write would be and Allocate to that point.
704 const auto block_size
= preallocation_block_size_
;
705 size_t new_last_preallocated_block
=
706 (offset
+ len
+ block_size
- 1) / block_size
;
707 if (new_last_preallocated_block
> last_preallocated_block_
) {
708 size_t num_spanned_blocks
=
709 new_last_preallocated_block
- last_preallocated_block_
;
710 Allocate(block_size
* last_preallocated_block_
,
711 block_size
* num_spanned_blocks
);
712 last_preallocated_block_
= new_last_preallocated_block
;
718 * Pre-allocate space for a file.
720 virtual Status
Allocate(uint64_t offset
, uint64_t len
) {
724 size_t preallocation_block_size() { return preallocation_block_size_
; }
727 size_t last_preallocated_block_
;
728 size_t preallocation_block_size_
;
729 // No copying allowed
730 WritableFile(const WritableFile
&);
731 void operator=(const WritableFile
&);
734 friend class WritableFileWrapper
;
735 friend class WritableFileMirror
;
737 Env::IOPriority io_priority_
;
740 // A file abstraction for random reading and writing.
744 virtual ~RandomRWFile() {}
746 // Indicates if the class makes use of direct I/O
747 // If false you must pass aligned buffer to Write()
748 virtual bool use_direct_io() const { return false; }
750 // Use the returned alignment value to allocate
751 // aligned buffer for Direct I/O
752 virtual size_t GetRequiredBufferAlignment() const { return kDefaultPageSize
; }
754 // Used by the file_reader_writer to decide if the ReadAhead wrapper
755 // should simply forward the call and do not enact read_ahead buffering or locking.
756 // The implementation below takes care of reading ahead
757 virtual bool ShouldForwardRawRequest() const {
761 // For cases when read-ahead is implemented in the platform dependent
762 // layer. This is when ShouldForwardRawRequest() returns true.
763 virtual void EnableReadAhead() {}
765 // Write bytes in `data` at offset `offset`, Returns Status::OK() on success.
766 // Pass aligned buffer when use_direct_io() returns true.
767 virtual Status
Write(uint64_t offset
, const Slice
& data
) = 0;
769 // Read up to `n` bytes starting from offset `offset` and store them in
770 // result, provided `scratch` size should be at least `n`.
771 // Returns Status::OK() on success.
772 virtual Status
Read(uint64_t offset
, size_t n
, Slice
* result
,
773 char* scratch
) const = 0;
775 virtual Status
Flush() = 0;
777 virtual Status
Sync() = 0;
779 virtual Status
Fsync() { return Sync(); }
781 virtual Status
Close() = 0;
783 // No copying allowed
784 RandomRWFile(const RandomRWFile
&) = delete;
785 RandomRWFile
& operator=(const RandomRWFile
&) = delete;
788 // Directory object represents collection of files and implements
789 // filesystem operations that can be executed on directories.
792 virtual ~Directory() {}
793 // Fsync directory. Can be called concurrently from multiple threads.
794 virtual Status
Fsync() = 0;
797 enum InfoLogLevel
: unsigned char {
807 // An interface for writing log messages.
810 size_t kDoNotSupportGetLogFileSize
= std::numeric_limits
<size_t>::max();
812 explicit Logger(const InfoLogLevel log_level
= InfoLogLevel::INFO_LEVEL
)
813 : log_level_(log_level
) {}
816 // Write a header to the log file with the specified format
817 // It is recommended that you log all header information at the start of the
818 // application. But it is not enforced.
819 virtual void LogHeader(const char* format
, va_list ap
) {
820 // Default implementation does a simple INFO level log write.
821 // Please override as per the logger class requirement.
825 // Write an entry to the log file with the specified format.
826 virtual void Logv(const char* format
, va_list ap
) = 0;
828 // Write an entry to the log file with the specified log level
829 // and format. Any log with level under the internal log level
830 // of *this (see @SetInfoLogLevel and @GetInfoLogLevel) will not be
832 virtual void Logv(const InfoLogLevel log_level
, const char* format
, va_list ap
);
834 virtual size_t GetLogFileSize() const { return kDoNotSupportGetLogFileSize
; }
835 // Flush to the OS buffers
836 virtual void Flush() {}
837 virtual InfoLogLevel
GetInfoLogLevel() const { return log_level_
; }
838 virtual void SetInfoLogLevel(const InfoLogLevel log_level
) {
839 log_level_
= log_level
;
843 // No copying allowed
844 Logger(const Logger
&);
845 void operator=(const Logger
&);
846 InfoLogLevel log_level_
;
850 // Identifies a locked file.
856 // No copying allowed
857 FileLock(const FileLock
&);
858 void operator=(const FileLock
&);
861 extern void LogFlush(const shared_ptr
<Logger
>& info_log
);
863 extern void Log(const InfoLogLevel log_level
,
864 const shared_ptr
<Logger
>& info_log
, const char* format
, ...);
866 // a set of log functions with different log levels.
867 extern void Header(const shared_ptr
<Logger
>& info_log
, const char* format
, ...);
868 extern void Debug(const shared_ptr
<Logger
>& info_log
, const char* format
, ...);
869 extern void Info(const shared_ptr
<Logger
>& info_log
, const char* format
, ...);
870 extern void Warn(const shared_ptr
<Logger
>& info_log
, const char* format
, ...);
871 extern void Error(const shared_ptr
<Logger
>& info_log
, const char* format
, ...);
872 extern void Fatal(const shared_ptr
<Logger
>& info_log
, const char* format
, ...);
874 // Log the specified data to *info_log if info_log is non-nullptr.
875 // The default info log level is InfoLogLevel::INFO_LEVEL.
876 extern void Log(const shared_ptr
<Logger
>& info_log
, const char* format
, ...)
877 # if defined(__GNUC__) || defined(__clang__)
878 __attribute__((__format__ (__printf__
, 2, 3)))
882 extern void LogFlush(Logger
*info_log
);
884 extern void Log(const InfoLogLevel log_level
, Logger
* info_log
,
885 const char* format
, ...);
887 // The default info log level is InfoLogLevel::INFO_LEVEL.
888 extern void Log(Logger
* info_log
, const char* format
, ...)
889 # if defined(__GNUC__) || defined(__clang__)
890 __attribute__((__format__ (__printf__
, 2, 3)))
894 // a set of log functions with different log levels.
895 extern void Header(Logger
* info_log
, const char* format
, ...);
896 extern void Debug(Logger
* info_log
, const char* format
, ...);
897 extern void Info(Logger
* info_log
, const char* format
, ...);
898 extern void Warn(Logger
* info_log
, const char* format
, ...);
899 extern void Error(Logger
* info_log
, const char* format
, ...);
900 extern void Fatal(Logger
* info_log
, const char* format
, ...);
902 // A utility routine: write "data" to the named file.
903 extern Status
WriteStringToFile(Env
* env
, const Slice
& data
,
904 const std::string
& fname
,
905 bool should_sync
= false);
907 // A utility routine: read contents of named file into *data
908 extern Status
ReadFileToString(Env
* env
, const std::string
& fname
,
911 // An implementation of Env that forwards all calls to another Env.
912 // May be useful to clients who wish to override just part of the
913 // functionality of another Env.
914 class EnvWrapper
: public Env
{
916 // Initialize an EnvWrapper that delegates all calls to *t
917 explicit EnvWrapper(Env
* t
) : target_(t
) { }
918 virtual ~EnvWrapper();
920 // Return the target to which this Env forwards all calls
921 Env
* target() const { return target_
; }
923 // The following text is boilerplate that forwards all methods to target()
924 Status
NewSequentialFile(const std::string
& f
, unique_ptr
<SequentialFile
>* r
,
925 const EnvOptions
& options
) override
{
926 return target_
->NewSequentialFile(f
, r
, options
);
928 Status
NewRandomAccessFile(const std::string
& f
,
929 unique_ptr
<RandomAccessFile
>* r
,
930 const EnvOptions
& options
) override
{
931 return target_
->NewRandomAccessFile(f
, r
, options
);
933 Status
NewWritableFile(const std::string
& f
, unique_ptr
<WritableFile
>* r
,
934 const EnvOptions
& options
) override
{
935 return target_
->NewWritableFile(f
, r
, options
);
937 Status
ReopenWritableFile(const std::string
& fname
,
938 unique_ptr
<WritableFile
>* result
,
939 const EnvOptions
& options
) override
{
940 return target_
->ReopenWritableFile(fname
, result
, options
);
942 Status
ReuseWritableFile(const std::string
& fname
,
943 const std::string
& old_fname
,
944 unique_ptr
<WritableFile
>* r
,
945 const EnvOptions
& options
) override
{
946 return target_
->ReuseWritableFile(fname
, old_fname
, r
, options
);
948 Status
NewRandomRWFile(const std::string
& fname
,
949 unique_ptr
<RandomRWFile
>* result
,
950 const EnvOptions
& options
) override
{
951 return target_
->NewRandomRWFile(fname
, result
, options
);
953 virtual Status
NewDirectory(const std::string
& name
,
954 unique_ptr
<Directory
>* result
) override
{
955 return target_
->NewDirectory(name
, result
);
957 Status
FileExists(const std::string
& f
) override
{
958 return target_
->FileExists(f
);
960 Status
GetChildren(const std::string
& dir
,
961 std::vector
<std::string
>* r
) override
{
962 return target_
->GetChildren(dir
, r
);
964 Status
GetChildrenFileAttributes(
965 const std::string
& dir
, std::vector
<FileAttributes
>* result
) override
{
966 return target_
->GetChildrenFileAttributes(dir
, result
);
968 Status
DeleteFile(const std::string
& f
) override
{
969 return target_
->DeleteFile(f
);
971 Status
CreateDir(const std::string
& d
) override
{
972 return target_
->CreateDir(d
);
974 Status
CreateDirIfMissing(const std::string
& d
) override
{
975 return target_
->CreateDirIfMissing(d
);
977 Status
DeleteDir(const std::string
& d
) override
{
978 return target_
->DeleteDir(d
);
980 Status
GetFileSize(const std::string
& f
, uint64_t* s
) override
{
981 return target_
->GetFileSize(f
, s
);
984 Status
GetFileModificationTime(const std::string
& fname
,
985 uint64_t* file_mtime
) override
{
986 return target_
->GetFileModificationTime(fname
, file_mtime
);
989 Status
RenameFile(const std::string
& s
, const std::string
& t
) override
{
990 return target_
->RenameFile(s
, t
);
993 Status
LinkFile(const std::string
& s
, const std::string
& t
) override
{
994 return target_
->LinkFile(s
, t
);
997 Status
LockFile(const std::string
& f
, FileLock
** l
) override
{
998 return target_
->LockFile(f
, l
);
1001 Status
UnlockFile(FileLock
* l
) override
{ return target_
->UnlockFile(l
); }
1003 void Schedule(void (*f
)(void* arg
), void* a
, Priority pri
,
1004 void* tag
= nullptr, void (*u
)(void* arg
) = 0) override
{
1005 return target_
->Schedule(f
, a
, pri
, tag
, u
);
1008 int UnSchedule(void* tag
, Priority pri
) override
{
1009 return target_
->UnSchedule(tag
, pri
);
1012 void StartThread(void (*f
)(void*), void* a
) override
{
1013 return target_
->StartThread(f
, a
);
1015 void WaitForJoin() override
{ return target_
->WaitForJoin(); }
1016 virtual unsigned int GetThreadPoolQueueLen(
1017 Priority pri
= LOW
) const override
{
1018 return target_
->GetThreadPoolQueueLen(pri
);
1020 virtual Status
GetTestDirectory(std::string
* path
) override
{
1021 return target_
->GetTestDirectory(path
);
1023 virtual Status
NewLogger(const std::string
& fname
,
1024 shared_ptr
<Logger
>* result
) override
{
1025 return target_
->NewLogger(fname
, result
);
1027 uint64_t NowMicros() override
{ return target_
->NowMicros(); }
1029 void SleepForMicroseconds(int micros
) override
{
1030 target_
->SleepForMicroseconds(micros
);
1032 Status
GetHostName(char* name
, uint64_t len
) override
{
1033 return target_
->GetHostName(name
, len
);
1035 Status
GetCurrentTime(int64_t* unix_time
) override
{
1036 return target_
->GetCurrentTime(unix_time
);
1038 Status
GetAbsolutePath(const std::string
& db_path
,
1039 std::string
* output_path
) override
{
1040 return target_
->GetAbsolutePath(db_path
, output_path
);
1042 void SetBackgroundThreads(int num
, Priority pri
) override
{
1043 return target_
->SetBackgroundThreads(num
, pri
);
1046 void IncBackgroundThreadsIfNeeded(int num
, Priority pri
) override
{
1047 return target_
->IncBackgroundThreadsIfNeeded(num
, pri
);
1050 void LowerThreadPoolIOPriority(Priority pool
= LOW
) override
{
1051 target_
->LowerThreadPoolIOPriority(pool
);
1054 std::string
TimeToString(uint64_t time
) override
{
1055 return target_
->TimeToString(time
);
1058 Status
GetThreadList(std::vector
<ThreadStatus
>* thread_list
) override
{
1059 return target_
->GetThreadList(thread_list
);
1062 ThreadStatusUpdater
* GetThreadStatusUpdater() const override
{
1063 return target_
->GetThreadStatusUpdater();
1066 uint64_t GetThreadID() const override
{
1067 return target_
->GetThreadID();
1074 // An implementation of WritableFile that forwards all calls to another
1075 // WritableFile. May be useful to clients who wish to override just part of the
1076 // functionality of another WritableFile.
1077 // It's declared as friend of WritableFile to allow forwarding calls to
1078 // protected virtual methods.
1079 class WritableFileWrapper
: public WritableFile
{
1081 explicit WritableFileWrapper(WritableFile
* t
) : target_(t
) { }
1083 Status
Append(const Slice
& data
) override
{ return target_
->Append(data
); }
1084 Status
PositionedAppend(const Slice
& data
, uint64_t offset
) override
{
1085 return target_
->PositionedAppend(data
, offset
);
1087 Status
Truncate(uint64_t size
) override
{ return target_
->Truncate(size
); }
1088 Status
Close() override
{ return target_
->Close(); }
1089 Status
Flush() override
{ return target_
->Flush(); }
1090 Status
Sync() override
{ return target_
->Sync(); }
1091 Status
Fsync() override
{ return target_
->Fsync(); }
1092 bool IsSyncThreadSafe() const override
{ return target_
->IsSyncThreadSafe(); }
1093 void SetIOPriority(Env::IOPriority pri
) override
{
1094 target_
->SetIOPriority(pri
);
1096 Env::IOPriority
GetIOPriority() override
{ return target_
->GetIOPriority(); }
1097 uint64_t GetFileSize() override
{ return target_
->GetFileSize(); }
1098 void GetPreallocationStatus(size_t* block_size
,
1099 size_t* last_allocated_block
) override
{
1100 target_
->GetPreallocationStatus(block_size
, last_allocated_block
);
1102 size_t GetUniqueId(char* id
, size_t max_size
) const override
{
1103 return target_
->GetUniqueId(id
, max_size
);
1105 Status
InvalidateCache(size_t offset
, size_t length
) override
{
1106 return target_
->InvalidateCache(offset
, length
);
1109 virtual void SetPreallocationBlockSize(size_t size
) override
{
1110 target_
->SetPreallocationBlockSize(size
);
1112 virtual void PrepareWrite(size_t offset
, size_t len
) override
{
1113 target_
->PrepareWrite(offset
, len
);
1117 Status
Allocate(uint64_t offset
, uint64_t len
) override
{
1118 return target_
->Allocate(offset
, len
);
1120 Status
RangeSync(uint64_t offset
, uint64_t nbytes
) override
{
1121 return target_
->RangeSync(offset
, nbytes
);
1125 WritableFile
* target_
;
1128 // Returns a new environment that stores its data in memory and delegates
1129 // all non-file-storage tasks to base_env. The caller must delete the result
1130 // when it is no longer needed.
1131 // *base_env must remain live while the result is in use.
1132 Env
* NewMemEnv(Env
* base_env
);
1134 // Returns a new environment that is used for HDFS environment.
1135 // This is a factory method for HdfsEnv declared in hdfs/env_hdfs.h
1136 Status
NewHdfsEnv(Env
** hdfs_env
, const std::string
& fsname
);
1138 // Returns a new environment that measures function call times for filesystem
1139 // operations, reporting results to variables in PerfContext.
1140 // This is a factory method for TimedEnv defined in utilities/env_timed.cc.
1141 Env
* NewTimedEnv(Env
* base_env
);
1143 } // namespace rocksdb
1145 #endif // STORAGE_ROCKSDB_INCLUDE_ENV_H_