]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/env/env_encryption.cc
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / env / env_encryption.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #ifndef ROCKSDB_LITE
7
8 #include <algorithm>
9 #include <cctype>
10 #include <iostream>
11 #include <cassert>
12
13 #include "rocksdb/env_encryption.h"
14 #include "util/aligned_buffer.h"
15 #include "util/coding.h"
16 #include "util/random.h"
17
18 #endif
19
20 namespace rocksdb {
21
22 #ifndef ROCKSDB_LITE
23
24 class EncryptedSequentialFile : public SequentialFile {
25 private:
26 std::unique_ptr<SequentialFile> file_;
27 std::unique_ptr<BlockAccessCipherStream> stream_;
28 uint64_t offset_;
29 size_t prefixLength_;
30
31 public:
32 // Default ctor. Given underlying sequential file is supposed to be at
33 // offset == prefixLength.
34 EncryptedSequentialFile(SequentialFile* f, BlockAccessCipherStream* s, size_t prefixLength)
35 : file_(f), stream_(s), offset_(prefixLength), prefixLength_(prefixLength) {
36 }
37
38 // Read up to "n" bytes from the file. "scratch[0..n-1]" may be
39 // written by this routine. Sets "*result" to the data that was
40 // read (including if fewer than "n" bytes were successfully read).
41 // May set "*result" to point at data in "scratch[0..n-1]", so
42 // "scratch[0..n-1]" must be live when "*result" is used.
43 // If an error was encountered, returns a non-OK status.
44 //
45 // REQUIRES: External synchronization
46 Status Read(size_t n, Slice* result, char* scratch) override {
47 assert(scratch);
48 Status status = file_->Read(n, result, scratch);
49 if (!status.ok()) {
50 return status;
51 }
52 status = stream_->Decrypt(offset_, (char*)result->data(), result->size());
53 offset_ += result->size(); // We've already ready data from disk, so update offset_ even if decryption fails.
54 return status;
55 }
56
57 // Skip "n" bytes from the file. This is guaranteed to be no
58 // slower that reading the same data, but may be faster.
59 //
60 // If end of file is reached, skipping will stop at the end of the
61 // file, and Skip will return OK.
62 //
63 // REQUIRES: External synchronization
64 Status Skip(uint64_t n) override {
65 auto status = file_->Skip(n);
66 if (!status.ok()) {
67 return status;
68 }
69 offset_ += n;
70 return status;
71 }
72
73 // Indicates the upper layers if the current SequentialFile implementation
74 // uses direct IO.
75 bool use_direct_io() const override { return file_->use_direct_io(); }
76
77 // Use the returned alignment value to allocate
78 // aligned buffer for Direct I/O
79 size_t GetRequiredBufferAlignment() const override {
80 return file_->GetRequiredBufferAlignment();
81 }
82
83 // Remove any kind of caching of data from the offset to offset+length
84 // of this file. If the length is 0, then it refers to the end of file.
85 // If the system is not caching the file contents, then this is a noop.
86 Status InvalidateCache(size_t offset, size_t length) override {
87 return file_->InvalidateCache(offset + prefixLength_, length);
88 }
89
90 // Positioned Read for direct I/O
91 // If Direct I/O enabled, offset, n, and scratch should be properly aligned
92 Status PositionedRead(uint64_t offset, size_t n, Slice* result,
93 char* scratch) override {
94 assert(scratch);
95 offset += prefixLength_; // Skip prefix
96 auto status = file_->PositionedRead(offset, n, result, scratch);
97 if (!status.ok()) {
98 return status;
99 }
100 offset_ = offset + result->size();
101 status = stream_->Decrypt(offset, (char*)result->data(), result->size());
102 return status;
103 }
104 };
105
106 // A file abstraction for randomly reading the contents of a file.
107 class EncryptedRandomAccessFile : public RandomAccessFile {
108 private:
109 std::unique_ptr<RandomAccessFile> file_;
110 std::unique_ptr<BlockAccessCipherStream> stream_;
111 size_t prefixLength_;
112
113 public:
114 EncryptedRandomAccessFile(RandomAccessFile* f, BlockAccessCipherStream* s, size_t prefixLength)
115 : file_(f), stream_(s), prefixLength_(prefixLength) { }
116
117 // Read up to "n" bytes from the file starting at "offset".
118 // "scratch[0..n-1]" may be written by this routine. Sets "*result"
119 // to the data that was read (including if fewer than "n" bytes were
120 // successfully read). May set "*result" to point at data in
121 // "scratch[0..n-1]", so "scratch[0..n-1]" must be live when
122 // "*result" is used. If an error was encountered, returns a non-OK
123 // status.
124 //
125 // Safe for concurrent use by multiple threads.
126 // If Direct I/O enabled, offset, n, and scratch should be aligned properly.
127 Status Read(uint64_t offset, size_t n, Slice* result,
128 char* scratch) const override {
129 assert(scratch);
130 offset += prefixLength_;
131 auto status = file_->Read(offset, n, result, scratch);
132 if (!status.ok()) {
133 return status;
134 }
135 status = stream_->Decrypt(offset, (char*)result->data(), result->size());
136 return status;
137 }
138
139 // Readahead the file starting from offset by n bytes for caching.
140 Status Prefetch(uint64_t offset, size_t n) override {
141 //return Status::OK();
142 return file_->Prefetch(offset + prefixLength_, n);
143 }
144
145 // Tries to get an unique ID for this file that will be the same each time
146 // the file is opened (and will stay the same while the file is open).
147 // Furthermore, it tries to make this ID at most "max_size" bytes. If such an
148 // ID can be created this function returns the length of the ID and places it
149 // in "id"; otherwise, this function returns 0, in which case "id"
150 // may not have been modified.
151 //
152 // This function guarantees, for IDs from a given environment, two unique ids
153 // cannot be made equal to each other by adding arbitrary bytes to one of
154 // them. That is, no unique ID is the prefix of another.
155 //
156 // This function guarantees that the returned ID will not be interpretable as
157 // a single varint.
158 //
159 // Note: these IDs are only valid for the duration of the process.
160 size_t GetUniqueId(char* id, size_t max_size) const override {
161 return file_->GetUniqueId(id, max_size);
162 };
163
164 void Hint(AccessPattern pattern) override { file_->Hint(pattern); }
165
166 // Indicates the upper layers if the current RandomAccessFile implementation
167 // uses direct IO.
168 bool use_direct_io() const override { return file_->use_direct_io(); }
169
170 // Use the returned alignment value to allocate
171 // aligned buffer for Direct I/O
172 size_t GetRequiredBufferAlignment() const override {
173 return file_->GetRequiredBufferAlignment();
174 }
175
176 // Remove any kind of caching of data from the offset to offset+length
177 // of this file. If the length is 0, then it refers to the end of file.
178 // If the system is not caching the file contents, then this is a noop.
179 Status InvalidateCache(size_t offset, size_t length) override {
180 return file_->InvalidateCache(offset + prefixLength_, length);
181 }
182 };
183
184 // A file abstraction for sequential writing. The implementation
185 // must provide buffering since callers may append small fragments
186 // at a time to the file.
187 class EncryptedWritableFile : public WritableFileWrapper {
188 private:
189 std::unique_ptr<WritableFile> file_;
190 std::unique_ptr<BlockAccessCipherStream> stream_;
191 size_t prefixLength_;
192
193 public:
194 // Default ctor. Prefix is assumed to be written already.
195 EncryptedWritableFile(WritableFile* f, BlockAccessCipherStream* s, size_t prefixLength)
196 : WritableFileWrapper(f), file_(f), stream_(s), prefixLength_(prefixLength) { }
197
198 Status Append(const Slice& data) override {
199 AlignedBuffer buf;
200 Status status;
201 Slice dataToAppend(data);
202 if (data.size() > 0) {
203 auto offset = file_->GetFileSize(); // size including prefix
204 // Encrypt in cloned buffer
205 buf.Alignment(GetRequiredBufferAlignment());
206 buf.AllocateNewBuffer(data.size());
207 memmove(buf.BufferStart(), data.data(), data.size());
208 status = stream_->Encrypt(offset, buf.BufferStart(), data.size());
209 if (!status.ok()) {
210 return status;
211 }
212 dataToAppend = Slice(buf.BufferStart(), data.size());
213 }
214 status = file_->Append(dataToAppend);
215 if (!status.ok()) {
216 return status;
217 }
218 return status;
219 }
220
221 Status PositionedAppend(const Slice& data, uint64_t offset) override {
222 AlignedBuffer buf;
223 Status status;
224 Slice dataToAppend(data);
225 offset += prefixLength_;
226 if (data.size() > 0) {
227 // Encrypt in cloned buffer
228 buf.Alignment(GetRequiredBufferAlignment());
229 buf.AllocateNewBuffer(data.size());
230 memmove(buf.BufferStart(), data.data(), data.size());
231 status = stream_->Encrypt(offset, buf.BufferStart(), data.size());
232 if (!status.ok()) {
233 return status;
234 }
235 dataToAppend = Slice(buf.BufferStart(), data.size());
236 }
237 status = file_->PositionedAppend(dataToAppend, offset);
238 if (!status.ok()) {
239 return status;
240 }
241 return status;
242 }
243
244 // Indicates the upper layers if the current WritableFile implementation
245 // uses direct IO.
246 bool use_direct_io() const override { return file_->use_direct_io(); }
247
248 // Use the returned alignment value to allocate
249 // aligned buffer for Direct I/O
250 size_t GetRequiredBufferAlignment() const override {
251 return file_->GetRequiredBufferAlignment();
252 }
253
254 /*
255 * Get the size of valid data in the file.
256 */
257 uint64_t GetFileSize() override {
258 return file_->GetFileSize() - prefixLength_;
259 }
260
261 // Truncate is necessary to trim the file to the correct size
262 // before closing. It is not always possible to keep track of the file
263 // size due to whole pages writes. The behavior is undefined if called
264 // with other writes to follow.
265 Status Truncate(uint64_t size) override {
266 return file_->Truncate(size + prefixLength_);
267 }
268
269 // Remove any kind of caching of data from the offset to offset+length
270 // of this file. If the length is 0, then it refers to the end of file.
271 // If the system is not caching the file contents, then this is a noop.
272 // This call has no effect on dirty pages in the cache.
273 Status InvalidateCache(size_t offset, size_t length) override {
274 return file_->InvalidateCache(offset + prefixLength_, length);
275 }
276
277 // Sync a file range with disk.
278 // offset is the starting byte of the file range to be synchronized.
279 // nbytes specifies the length of the range to be synchronized.
280 // This asks the OS to initiate flushing the cached data to disk,
281 // without waiting for completion.
282 // Default implementation does nothing.
283 Status RangeSync(uint64_t offset, uint64_t nbytes) override {
284 return file_->RangeSync(offset + prefixLength_, nbytes);
285 }
286
287 // PrepareWrite performs any necessary preparation for a write
288 // before the write actually occurs. This allows for pre-allocation
289 // of space on devices where it can result in less file
290 // fragmentation and/or less waste from over-zealous filesystem
291 // pre-allocation.
292 void PrepareWrite(size_t offset, size_t len) override {
293 file_->PrepareWrite(offset + prefixLength_, len);
294 }
295
296 // Pre-allocates space for a file.
297 Status Allocate(uint64_t offset, uint64_t len) override {
298 return file_->Allocate(offset + prefixLength_, len);
299 }
300 };
301
302 // A file abstraction for random reading and writing.
303 class EncryptedRandomRWFile : public RandomRWFile {
304 private:
305 std::unique_ptr<RandomRWFile> file_;
306 std::unique_ptr<BlockAccessCipherStream> stream_;
307 size_t prefixLength_;
308
309 public:
310 EncryptedRandomRWFile(RandomRWFile* f, BlockAccessCipherStream* s, size_t prefixLength)
311 : file_(f), stream_(s), prefixLength_(prefixLength) {}
312
313 // Indicates if the class makes use of direct I/O
314 // If false you must pass aligned buffer to Write()
315 bool use_direct_io() const override { return file_->use_direct_io(); }
316
317 // Use the returned alignment value to allocate
318 // aligned buffer for Direct I/O
319 size_t GetRequiredBufferAlignment() const override {
320 return file_->GetRequiredBufferAlignment();
321 }
322
323 // Write bytes in `data` at offset `offset`, Returns Status::OK() on success.
324 // Pass aligned buffer when use_direct_io() returns true.
325 Status Write(uint64_t offset, const Slice& data) override {
326 AlignedBuffer buf;
327 Status status;
328 Slice dataToWrite(data);
329 offset += prefixLength_;
330 if (data.size() > 0) {
331 // Encrypt in cloned buffer
332 buf.Alignment(GetRequiredBufferAlignment());
333 buf.AllocateNewBuffer(data.size());
334 memmove(buf.BufferStart(), data.data(), data.size());
335 status = stream_->Encrypt(offset, buf.BufferStart(), data.size());
336 if (!status.ok()) {
337 return status;
338 }
339 dataToWrite = Slice(buf.BufferStart(), data.size());
340 }
341 status = file_->Write(offset, dataToWrite);
342 return status;
343 }
344
345 // Read up to `n` bytes starting from offset `offset` and store them in
346 // result, provided `scratch` size should be at least `n`.
347 // Returns Status::OK() on success.
348 Status Read(uint64_t offset, size_t n, Slice* result,
349 char* scratch) const override {
350 assert(scratch);
351 offset += prefixLength_;
352 auto status = file_->Read(offset, n, result, scratch);
353 if (!status.ok()) {
354 return status;
355 }
356 status = stream_->Decrypt(offset, (char*)result->data(), result->size());
357 return status;
358 }
359
360 Status Flush() override { return file_->Flush(); }
361
362 Status Sync() override { return file_->Sync(); }
363
364 Status Fsync() override { return file_->Fsync(); }
365
366 Status Close() override { return file_->Close(); }
367 };
368
369 // EncryptedEnv implements an Env wrapper that adds encryption to files stored on disk.
370 class EncryptedEnv : public EnvWrapper {
371 public:
372 EncryptedEnv(Env* base_env, EncryptionProvider *provider)
373 : EnvWrapper(base_env) {
374 provider_ = provider;
375 }
376
377 // NewSequentialFile opens a file for sequential reading.
378 Status NewSequentialFile(const std::string& fname,
379 std::unique_ptr<SequentialFile>* result,
380 const EnvOptions& options) override {
381 result->reset();
382 if (options.use_mmap_reads) {
383 return Status::InvalidArgument();
384 }
385 // Open file using underlying Env implementation
386 std::unique_ptr<SequentialFile> underlying;
387 auto status = EnvWrapper::NewSequentialFile(fname, &underlying, options);
388 if (!status.ok()) {
389 return status;
390 }
391 // Read prefix (if needed)
392 AlignedBuffer prefixBuf;
393 Slice prefixSlice;
394 size_t prefixLength = provider_->GetPrefixLength();
395 if (prefixLength > 0) {
396 // Read prefix
397 prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
398 prefixBuf.AllocateNewBuffer(prefixLength);
399 status = underlying->Read(prefixLength, &prefixSlice, prefixBuf.BufferStart());
400 if (!status.ok()) {
401 return status;
402 }
403 }
404 // Create cipher stream
405 std::unique_ptr<BlockAccessCipherStream> stream;
406 status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
407 if (!status.ok()) {
408 return status;
409 }
410 (*result) = std::unique_ptr<SequentialFile>(new EncryptedSequentialFile(underlying.release(), stream.release(), prefixLength));
411 return Status::OK();
412 }
413
414 // NewRandomAccessFile opens a file for random read access.
415 Status NewRandomAccessFile(const std::string& fname,
416 std::unique_ptr<RandomAccessFile>* result,
417 const EnvOptions& options) override {
418 result->reset();
419 if (options.use_mmap_reads) {
420 return Status::InvalidArgument();
421 }
422 // Open file using underlying Env implementation
423 std::unique_ptr<RandomAccessFile> underlying;
424 auto status = EnvWrapper::NewRandomAccessFile(fname, &underlying, options);
425 if (!status.ok()) {
426 return status;
427 }
428 // Read prefix (if needed)
429 AlignedBuffer prefixBuf;
430 Slice prefixSlice;
431 size_t prefixLength = provider_->GetPrefixLength();
432 if (prefixLength > 0) {
433 // Read prefix
434 prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
435 prefixBuf.AllocateNewBuffer(prefixLength);
436 status = underlying->Read(0, prefixLength, &prefixSlice, prefixBuf.BufferStart());
437 if (!status.ok()) {
438 return status;
439 }
440 }
441 // Create cipher stream
442 std::unique_ptr<BlockAccessCipherStream> stream;
443 status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
444 if (!status.ok()) {
445 return status;
446 }
447 (*result) = std::unique_ptr<RandomAccessFile>(new EncryptedRandomAccessFile(underlying.release(), stream.release(), prefixLength));
448 return Status::OK();
449 }
450
451 // NewWritableFile opens a file for sequential writing.
452 Status NewWritableFile(const std::string& fname,
453 std::unique_ptr<WritableFile>* result,
454 const EnvOptions& options) override {
455 result->reset();
456 if (options.use_mmap_writes) {
457 return Status::InvalidArgument();
458 }
459 // Open file using underlying Env implementation
460 std::unique_ptr<WritableFile> underlying;
461 Status status = EnvWrapper::NewWritableFile(fname, &underlying, options);
462 if (!status.ok()) {
463 return status;
464 }
465 // Initialize & write prefix (if needed)
466 AlignedBuffer prefixBuf;
467 Slice prefixSlice;
468 size_t prefixLength = provider_->GetPrefixLength();
469 if (prefixLength > 0) {
470 // Initialize prefix
471 prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
472 prefixBuf.AllocateNewBuffer(prefixLength);
473 provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength);
474 prefixSlice = Slice(prefixBuf.BufferStart(), prefixLength);
475 // Write prefix
476 status = underlying->Append(prefixSlice);
477 if (!status.ok()) {
478 return status;
479 }
480 }
481 // Create cipher stream
482 std::unique_ptr<BlockAccessCipherStream> stream;
483 status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
484 if (!status.ok()) {
485 return status;
486 }
487 (*result) = std::unique_ptr<WritableFile>(new EncryptedWritableFile(underlying.release(), stream.release(), prefixLength));
488 return Status::OK();
489 }
490
491 // Create an object that writes to a new file with the specified
492 // name. Deletes any existing file with the same name and creates a
493 // new file. On success, stores a pointer to the new file in
494 // *result and returns OK. On failure stores nullptr in *result and
495 // returns non-OK.
496 //
497 // The returned file will only be accessed by one thread at a time.
498 Status ReopenWritableFile(const std::string& fname,
499 std::unique_ptr<WritableFile>* result,
500 const EnvOptions& options) override {
501 result->reset();
502 if (options.use_mmap_writes) {
503 return Status::InvalidArgument();
504 }
505 // Open file using underlying Env implementation
506 std::unique_ptr<WritableFile> underlying;
507 Status status = EnvWrapper::ReopenWritableFile(fname, &underlying, options);
508 if (!status.ok()) {
509 return status;
510 }
511 // Initialize & write prefix (if needed)
512 AlignedBuffer prefixBuf;
513 Slice prefixSlice;
514 size_t prefixLength = provider_->GetPrefixLength();
515 if (prefixLength > 0) {
516 // Initialize prefix
517 prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
518 prefixBuf.AllocateNewBuffer(prefixLength);
519 provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength);
520 prefixSlice = Slice(prefixBuf.BufferStart(), prefixLength);
521 // Write prefix
522 status = underlying->Append(prefixSlice);
523 if (!status.ok()) {
524 return status;
525 }
526 }
527 // Create cipher stream
528 std::unique_ptr<BlockAccessCipherStream> stream;
529 status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
530 if (!status.ok()) {
531 return status;
532 }
533 (*result) = std::unique_ptr<WritableFile>(new EncryptedWritableFile(underlying.release(), stream.release(), prefixLength));
534 return Status::OK();
535 }
536
537 // Reuse an existing file by renaming it and opening it as writable.
538 Status ReuseWritableFile(const std::string& fname,
539 const std::string& old_fname,
540 std::unique_ptr<WritableFile>* result,
541 const EnvOptions& options) override {
542 result->reset();
543 if (options.use_mmap_writes) {
544 return Status::InvalidArgument();
545 }
546 // Open file using underlying Env implementation
547 std::unique_ptr<WritableFile> underlying;
548 Status status = EnvWrapper::ReuseWritableFile(fname, old_fname, &underlying, options);
549 if (!status.ok()) {
550 return status;
551 }
552 // Initialize & write prefix (if needed)
553 AlignedBuffer prefixBuf;
554 Slice prefixSlice;
555 size_t prefixLength = provider_->GetPrefixLength();
556 if (prefixLength > 0) {
557 // Initialize prefix
558 prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
559 prefixBuf.AllocateNewBuffer(prefixLength);
560 provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength);
561 prefixSlice = Slice(prefixBuf.BufferStart(), prefixLength);
562 // Write prefix
563 status = underlying->Append(prefixSlice);
564 if (!status.ok()) {
565 return status;
566 }
567 }
568 // Create cipher stream
569 std::unique_ptr<BlockAccessCipherStream> stream;
570 status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
571 if (!status.ok()) {
572 return status;
573 }
574 (*result) = std::unique_ptr<WritableFile>(new EncryptedWritableFile(underlying.release(), stream.release(), prefixLength));
575 return Status::OK();
576 }
577
578 // Open `fname` for random read and write, if file doesn't exist the file
579 // will be created. On success, stores a pointer to the new file in
580 // *result and returns OK. On failure returns non-OK.
581 //
582 // The returned file will only be accessed by one thread at a time.
583 Status NewRandomRWFile(const std::string& fname,
584 std::unique_ptr<RandomRWFile>* result,
585 const EnvOptions& options) override {
586 result->reset();
587 if (options.use_mmap_reads || options.use_mmap_writes) {
588 return Status::InvalidArgument();
589 }
590 // Check file exists
591 bool isNewFile = !FileExists(fname).ok();
592
593 // Open file using underlying Env implementation
594 std::unique_ptr<RandomRWFile> underlying;
595 Status status = EnvWrapper::NewRandomRWFile(fname, &underlying, options);
596 if (!status.ok()) {
597 return status;
598 }
599 // Read or Initialize & write prefix (if needed)
600 AlignedBuffer prefixBuf;
601 Slice prefixSlice;
602 size_t prefixLength = provider_->GetPrefixLength();
603 if (prefixLength > 0) {
604 prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
605 prefixBuf.AllocateNewBuffer(prefixLength);
606 if (!isNewFile) {
607 // File already exists, read prefix
608 status = underlying->Read(0, prefixLength, &prefixSlice, prefixBuf.BufferStart());
609 if (!status.ok()) {
610 return status;
611 }
612 } else {
613 // File is new, initialize & write prefix
614 provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength);
615 prefixSlice = Slice(prefixBuf.BufferStart(), prefixLength);
616 // Write prefix
617 status = underlying->Write(0, prefixSlice);
618 if (!status.ok()) {
619 return status;
620 }
621 }
622 }
623 // Create cipher stream
624 std::unique_ptr<BlockAccessCipherStream> stream;
625 status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
626 if (!status.ok()) {
627 return status;
628 }
629 (*result) = std::unique_ptr<RandomRWFile>(new EncryptedRandomRWFile(underlying.release(), stream.release(), prefixLength));
630 return Status::OK();
631 }
632
633 // Store in *result the attributes of the children of the specified directory.
634 // In case the implementation lists the directory prior to iterating the files
635 // and files are concurrently deleted, the deleted files will be omitted from
636 // result.
637 // The name attributes are relative to "dir".
638 // Original contents of *results are dropped.
639 // Returns OK if "dir" exists and "*result" contains its children.
640 // NotFound if "dir" does not exist, the calling process does not have
641 // permission to access "dir", or if "dir" is invalid.
642 // IOError if an IO Error was encountered
643 Status GetChildrenFileAttributes(
644 const std::string& dir, std::vector<FileAttributes>* result) override {
645 auto status = EnvWrapper::GetChildrenFileAttributes(dir, result);
646 if (!status.ok()) {
647 return status;
648 }
649 size_t prefixLength = provider_->GetPrefixLength();
650 for (auto it = std::begin(*result); it!=std::end(*result); ++it) {
651 assert(it->size_bytes >= prefixLength);
652 it->size_bytes -= prefixLength;
653 }
654 return Status::OK();
655 }
656
657 // Store the size of fname in *file_size.
658 Status GetFileSize(const std::string& fname, uint64_t* file_size) override {
659 auto status = EnvWrapper::GetFileSize(fname, file_size);
660 if (!status.ok()) {
661 return status;
662 }
663 size_t prefixLength = provider_->GetPrefixLength();
664 assert(*file_size >= prefixLength);
665 *file_size -= prefixLength;
666 return Status::OK();
667 }
668
669 private:
670 EncryptionProvider *provider_;
671 };
672
673
674 // Returns an Env that encrypts data when stored on disk and decrypts data when
675 // read from disk.
676 Env* NewEncryptedEnv(Env* base_env, EncryptionProvider* provider) {
677 return new EncryptedEnv(base_env, provider);
678 }
679
680 // Encrypt one or more (partial) blocks of data at the file offset.
681 // Length of data is given in dataSize.
682 Status BlockAccessCipherStream::Encrypt(uint64_t fileOffset, char *data, size_t dataSize) {
683 // Calculate block index
684 auto blockSize = BlockSize();
685 uint64_t blockIndex = fileOffset / blockSize;
686 size_t blockOffset = fileOffset % blockSize;
687 std::unique_ptr<char[]> blockBuffer;
688
689 std::string scratch;
690 AllocateScratch(scratch);
691
692 // Encrypt individual blocks.
693 while (1) {
694 char *block = data;
695 size_t n = std::min(dataSize, blockSize - blockOffset);
696 if (n != blockSize) {
697 // We're not encrypting a full block.
698 // Copy data to blockBuffer
699 if (!blockBuffer.get()) {
700 // Allocate buffer
701 blockBuffer = std::unique_ptr<char[]>(new char[blockSize]);
702 }
703 block = blockBuffer.get();
704 // Copy plain data to block buffer
705 memmove(block + blockOffset, data, n);
706 }
707 auto status = EncryptBlock(blockIndex, block, (char*)scratch.data());
708 if (!status.ok()) {
709 return status;
710 }
711 if (block != data) {
712 // Copy encrypted data back to `data`.
713 memmove(data, block + blockOffset, n);
714 }
715 dataSize -= n;
716 if (dataSize == 0) {
717 return Status::OK();
718 }
719 data += n;
720 blockOffset = 0;
721 blockIndex++;
722 }
723 }
724
725 // Decrypt one or more (partial) blocks of data at the file offset.
726 // Length of data is given in dataSize.
727 Status BlockAccessCipherStream::Decrypt(uint64_t fileOffset, char *data, size_t dataSize) {
728 // Calculate block index
729 auto blockSize = BlockSize();
730 uint64_t blockIndex = fileOffset / blockSize;
731 size_t blockOffset = fileOffset % blockSize;
732 std::unique_ptr<char[]> blockBuffer;
733
734 std::string scratch;
735 AllocateScratch(scratch);
736
737 assert(fileOffset < dataSize);
738
739 // Decrypt individual blocks.
740 while (1) {
741 char *block = data;
742 size_t n = std::min(dataSize, blockSize - blockOffset);
743 if (n != blockSize) {
744 // We're not decrypting a full block.
745 // Copy data to blockBuffer
746 if (!blockBuffer.get()) {
747 // Allocate buffer
748 blockBuffer = std::unique_ptr<char[]>(new char[blockSize]);
749 }
750 block = blockBuffer.get();
751 // Copy encrypted data to block buffer
752 memmove(block + blockOffset, data, n);
753 }
754 auto status = DecryptBlock(blockIndex, block, (char*)scratch.data());
755 if (!status.ok()) {
756 return status;
757 }
758 if (block != data) {
759 // Copy decrypted data back to `data`.
760 memmove(data, block + blockOffset, n);
761 }
762
763 // Simply decrementing dataSize by n could cause it to underflow,
764 // which will very likely make it read over the original bounds later
765 assert(dataSize >= n);
766 if (dataSize < n) {
767 return Status::Corruption("Cannot decrypt data at given offset");
768 }
769
770 dataSize -= n;
771 if (dataSize == 0) {
772 return Status::OK();
773 }
774 data += n;
775 blockOffset = 0;
776 blockIndex++;
777 }
778 }
779
780 // Encrypt a block of data.
781 // Length of data is equal to BlockSize().
782 Status ROT13BlockCipher::Encrypt(char *data) {
783 for (size_t i = 0; i < blockSize_; ++i) {
784 data[i] += 13;
785 }
786 return Status::OK();
787 }
788
789 // Decrypt a block of data.
790 // Length of data is equal to BlockSize().
791 Status ROT13BlockCipher::Decrypt(char *data) {
792 return Encrypt(data);
793 }
794
795 // Allocate scratch space which is passed to EncryptBlock/DecryptBlock.
796 void CTRCipherStream::AllocateScratch(std::string& scratch) {
797 auto blockSize = cipher_.BlockSize();
798 scratch.reserve(blockSize);
799 }
800
801 // Encrypt a block of data at the given block index.
802 // Length of data is equal to BlockSize();
803 Status CTRCipherStream::EncryptBlock(uint64_t blockIndex, char *data, char* scratch) {
804
805 // Create nonce + counter
806 auto blockSize = cipher_.BlockSize();
807 memmove(scratch, iv_.data(), blockSize);
808 EncodeFixed64(scratch, blockIndex + initialCounter_);
809
810 // Encrypt nonce+counter
811 auto status = cipher_.Encrypt(scratch);
812 if (!status.ok()) {
813 return status;
814 }
815
816 // XOR data with ciphertext.
817 for (size_t i = 0; i < blockSize; i++) {
818 data[i] = data[i] ^ scratch[i];
819 }
820 return Status::OK();
821 }
822
823 // Decrypt a block of data at the given block index.
824 // Length of data is equal to BlockSize();
825 Status CTRCipherStream::DecryptBlock(uint64_t blockIndex, char *data, char* scratch) {
826 // For CTR decryption & encryption are the same
827 return EncryptBlock(blockIndex, data, scratch);
828 }
829
830 // GetPrefixLength returns the length of the prefix that is added to every file
831 // and used for storing encryption options.
832 // For optimal performance, the prefix length should be a multiple of
833 // the page size.
834 size_t CTREncryptionProvider::GetPrefixLength() {
835 return defaultPrefixLength;
836 }
837
838 // decodeCTRParameters decodes the initial counter & IV from the given
839 // (plain text) prefix.
840 static void decodeCTRParameters(const char *prefix, size_t blockSize, uint64_t &initialCounter, Slice &iv) {
841 // First block contains 64-bit initial counter
842 initialCounter = DecodeFixed64(prefix);
843 // Second block contains IV
844 iv = Slice(prefix + blockSize, blockSize);
845 }
846
847 // CreateNewPrefix initialized an allocated block of prefix memory
848 // for a new file.
849 Status CTREncryptionProvider::CreateNewPrefix(const std::string& /*fname*/,
850 char* prefix,
851 size_t prefixLength) {
852 // Create & seed rnd.
853 Random rnd((uint32_t)Env::Default()->NowMicros());
854 // Fill entire prefix block with random values.
855 for (size_t i = 0; i < prefixLength; i++) {
856 prefix[i] = rnd.Uniform(256) & 0xFF;
857 }
858 // Take random data to extract initial counter & IV
859 auto blockSize = cipher_.BlockSize();
860 uint64_t initialCounter;
861 Slice prefixIV;
862 decodeCTRParameters(prefix, blockSize, initialCounter, prefixIV);
863
864 // Now populate the rest of the prefix, starting from the third block.
865 PopulateSecretPrefixPart(prefix + (2 * blockSize), prefixLength - (2 * blockSize), blockSize);
866
867 // Encrypt the prefix, starting from block 2 (leave block 0, 1 with initial counter & IV unencrypted)
868 CTRCipherStream cipherStream(cipher_, prefixIV.data(), initialCounter);
869 auto status = cipherStream.Encrypt(0, prefix + (2 * blockSize), prefixLength - (2 * blockSize));
870 if (!status.ok()) {
871 return status;
872 }
873 return Status::OK();
874 }
875
876 // PopulateSecretPrefixPart initializes the data into a new prefix block
877 // in plain text.
878 // Returns the amount of space (starting from the start of the prefix)
879 // that has been initialized.
880 size_t CTREncryptionProvider::PopulateSecretPrefixPart(char* /*prefix*/,
881 size_t /*prefixLength*/,
882 size_t /*blockSize*/) {
883 // Nothing to do here, put in custom data in override when needed.
884 return 0;
885 }
886
887 Status CTREncryptionProvider::CreateCipherStream(
888 const std::string& fname, const EnvOptions& options, Slice& prefix,
889 std::unique_ptr<BlockAccessCipherStream>* result) {
890 // Read plain text part of prefix.
891 auto blockSize = cipher_.BlockSize();
892 uint64_t initialCounter;
893 Slice iv;
894 decodeCTRParameters(prefix.data(), blockSize, initialCounter, iv);
895
896 // If the prefix is smaller than twice the block size, we would below read a
897 // very large chunk of the file (and very likely read over the bounds)
898 assert(prefix.size() >= 2 * blockSize);
899 if (prefix.size() < 2 * blockSize) {
900 return Status::Corruption("Unable to read from file " + fname + ": read attempt would read beyond file bounds");
901 }
902
903 // Decrypt the encrypted part of the prefix, starting from block 2 (block 0, 1 with initial counter & IV are unencrypted)
904 CTRCipherStream cipherStream(cipher_, iv.data(), initialCounter);
905 auto status = cipherStream.Decrypt(0, (char*)prefix.data() + (2 * blockSize), prefix.size() - (2 * blockSize));
906 if (!status.ok()) {
907 return status;
908 }
909
910 // Create cipher stream
911 return CreateCipherStreamFromPrefix(fname, options, initialCounter, iv, prefix, result);
912 }
913
914 // CreateCipherStreamFromPrefix creates a block access cipher stream for a file given
915 // given name and options. The given prefix is already decrypted.
916 Status CTREncryptionProvider::CreateCipherStreamFromPrefix(
917 const std::string& /*fname*/, const EnvOptions& /*options*/,
918 uint64_t initialCounter, const Slice& iv, const Slice& /*prefix*/,
919 std::unique_ptr<BlockAccessCipherStream>* result) {
920 (*result) = std::unique_ptr<BlockAccessCipherStream>(
921 new CTRCipherStream(cipher_, iv.data(), initialCounter));
922 return Status::OK();
923 }
924
925 #endif // ROCKSDB_LITE
926
927 } // namespace rocksdb