]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/env/env_encryption.cc
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / rocksdb / env / env_encryption.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #ifndef ROCKSDB_LITE
7
8 #include <algorithm>
9 #include <cassert>
10 #include <cctype>
11 #include <iostream>
12
13 #include "rocksdb/env_encryption.h"
14 #include "util/aligned_buffer.h"
15 #include "util/coding.h"
16 #include "util/random.h"
17
18 #endif
19
20 namespace ROCKSDB_NAMESPACE {
21
22 #ifndef ROCKSDB_LITE
23
24 class EncryptedSequentialFile : public SequentialFile {
25 private:
26 std::unique_ptr<SequentialFile> file_;
27 std::unique_ptr<BlockAccessCipherStream> stream_;
28 uint64_t offset_;
29 size_t prefixLength_;
30
31 public:
32 // Default ctor. Given underlying sequential file is supposed to be at
33 // offset == prefixLength.
34 EncryptedSequentialFile(SequentialFile* f, BlockAccessCipherStream* s, size_t prefixLength)
35 : file_(f), stream_(s), offset_(prefixLength), prefixLength_(prefixLength) {
36 }
37
38 // Read up to "n" bytes from the file. "scratch[0..n-1]" may be
39 // written by this routine. Sets "*result" to the data that was
40 // read (including if fewer than "n" bytes were successfully read).
41 // May set "*result" to point at data in "scratch[0..n-1]", so
42 // "scratch[0..n-1]" must be live when "*result" is used.
43 // If an error was encountered, returns a non-OK status.
44 //
45 // REQUIRES: External synchronization
46 Status Read(size_t n, Slice* result, char* scratch) override {
47 assert(scratch);
48 Status status = file_->Read(n, result, scratch);
49 if (!status.ok()) {
50 return status;
51 }
52 status = stream_->Decrypt(offset_, (char*)result->data(), result->size());
53 offset_ += result->size(); // We've already ready data from disk, so update offset_ even if decryption fails.
54 return status;
55 }
56
57 // Skip "n" bytes from the file. This is guaranteed to be no
58 // slower that reading the same data, but may be faster.
59 //
60 // If end of file is reached, skipping will stop at the end of the
61 // file, and Skip will return OK.
62 //
63 // REQUIRES: External synchronization
64 Status Skip(uint64_t n) override {
65 auto status = file_->Skip(n);
66 if (!status.ok()) {
67 return status;
68 }
69 offset_ += n;
70 return status;
71 }
72
73 // Indicates the upper layers if the current SequentialFile implementation
74 // uses direct IO.
75 bool use_direct_io() const override { return file_->use_direct_io(); }
76
77 // Use the returned alignment value to allocate
78 // aligned buffer for Direct I/O
79 size_t GetRequiredBufferAlignment() const override {
80 return file_->GetRequiredBufferAlignment();
81 }
82
83 // Remove any kind of caching of data from the offset to offset+length
84 // of this file. If the length is 0, then it refers to the end of file.
85 // If the system is not caching the file contents, then this is a noop.
86 Status InvalidateCache(size_t offset, size_t length) override {
87 return file_->InvalidateCache(offset + prefixLength_, length);
88 }
89
90 // Positioned Read for direct I/O
91 // If Direct I/O enabled, offset, n, and scratch should be properly aligned
92 Status PositionedRead(uint64_t offset, size_t n, Slice* result,
93 char* scratch) override {
94 assert(scratch);
95 offset += prefixLength_; // Skip prefix
96 auto status = file_->PositionedRead(offset, n, result, scratch);
97 if (!status.ok()) {
98 return status;
99 }
100 offset_ = offset + result->size();
101 status = stream_->Decrypt(offset, (char*)result->data(), result->size());
102 return status;
103 }
104 };
105
106 // A file abstraction for randomly reading the contents of a file.
107 class EncryptedRandomAccessFile : public RandomAccessFile {
108 private:
109 std::unique_ptr<RandomAccessFile> file_;
110 std::unique_ptr<BlockAccessCipherStream> stream_;
111 size_t prefixLength_;
112
113 public:
114 EncryptedRandomAccessFile(RandomAccessFile* f, BlockAccessCipherStream* s, size_t prefixLength)
115 : file_(f), stream_(s), prefixLength_(prefixLength) { }
116
117 // Read up to "n" bytes from the file starting at "offset".
118 // "scratch[0..n-1]" may be written by this routine. Sets "*result"
119 // to the data that was read (including if fewer than "n" bytes were
120 // successfully read). May set "*result" to point at data in
121 // "scratch[0..n-1]", so "scratch[0..n-1]" must be live when
122 // "*result" is used. If an error was encountered, returns a non-OK
123 // status.
124 //
125 // Safe for concurrent use by multiple threads.
126 // If Direct I/O enabled, offset, n, and scratch should be aligned properly.
127 Status Read(uint64_t offset, size_t n, Slice* result,
128 char* scratch) const override {
129 assert(scratch);
130 offset += prefixLength_;
131 auto status = file_->Read(offset, n, result, scratch);
132 if (!status.ok()) {
133 return status;
134 }
135 status = stream_->Decrypt(offset, (char*)result->data(), result->size());
136 return status;
137 }
138
139 // Readahead the file starting from offset by n bytes for caching.
140 Status Prefetch(uint64_t offset, size_t n) override {
141 //return Status::OK();
142 return file_->Prefetch(offset + prefixLength_, n);
143 }
144
145 // Tries to get an unique ID for this file that will be the same each time
146 // the file is opened (and will stay the same while the file is open).
147 // Furthermore, it tries to make this ID at most "max_size" bytes. If such an
148 // ID can be created this function returns the length of the ID and places it
149 // in "id"; otherwise, this function returns 0, in which case "id"
150 // may not have been modified.
151 //
152 // This function guarantees, for IDs from a given environment, two unique ids
153 // cannot be made equal to each other by adding arbitrary bytes to one of
154 // them. That is, no unique ID is the prefix of another.
155 //
156 // This function guarantees that the returned ID will not be interpretable as
157 // a single varint.
158 //
159 // Note: these IDs are only valid for the duration of the process.
160 size_t GetUniqueId(char* id, size_t max_size) const override {
161 return file_->GetUniqueId(id, max_size);
162 };
163
164 void Hint(AccessPattern pattern) override { file_->Hint(pattern); }
165
166 // Indicates the upper layers if the current RandomAccessFile implementation
167 // uses direct IO.
168 bool use_direct_io() const override { return file_->use_direct_io(); }
169
170 // Use the returned alignment value to allocate
171 // aligned buffer for Direct I/O
172 size_t GetRequiredBufferAlignment() const override {
173 return file_->GetRequiredBufferAlignment();
174 }
175
176 // Remove any kind of caching of data from the offset to offset+length
177 // of this file. If the length is 0, then it refers to the end of file.
178 // If the system is not caching the file contents, then this is a noop.
179 Status InvalidateCache(size_t offset, size_t length) override {
180 return file_->InvalidateCache(offset + prefixLength_, length);
181 }
182 };
183
184 // A file abstraction for sequential writing. The implementation
185 // must provide buffering since callers may append small fragments
186 // at a time to the file.
187 class EncryptedWritableFile : public WritableFileWrapper {
188 private:
189 std::unique_ptr<WritableFile> file_;
190 std::unique_ptr<BlockAccessCipherStream> stream_;
191 size_t prefixLength_;
192
193 public:
194 // Default ctor. Prefix is assumed to be written already.
195 EncryptedWritableFile(WritableFile* f, BlockAccessCipherStream* s, size_t prefixLength)
196 : WritableFileWrapper(f), file_(f), stream_(s), prefixLength_(prefixLength) { }
197
198 Status Append(const Slice& data) override {
199 AlignedBuffer buf;
200 Status status;
201 Slice dataToAppend(data);
202 if (data.size() > 0) {
203 auto offset = file_->GetFileSize(); // size including prefix
204 // Encrypt in cloned buffer
205 buf.Alignment(GetRequiredBufferAlignment());
206 buf.AllocateNewBuffer(data.size());
207 // TODO (sagar0): Modify AlignedBuffer.Append to allow doing a memmove
208 // so that the next two lines can be replaced with buf.Append().
209 memmove(buf.BufferStart(), data.data(), data.size());
210 buf.Size(data.size());
211 status = stream_->Encrypt(offset, buf.BufferStart(), buf.CurrentSize());
212 if (!status.ok()) {
213 return status;
214 }
215 dataToAppend = Slice(buf.BufferStart(), buf.CurrentSize());
216 }
217 status = file_->Append(dataToAppend);
218 if (!status.ok()) {
219 return status;
220 }
221 return status;
222 }
223
224 Status PositionedAppend(const Slice& data, uint64_t offset) override {
225 AlignedBuffer buf;
226 Status status;
227 Slice dataToAppend(data);
228 offset += prefixLength_;
229 if (data.size() > 0) {
230 // Encrypt in cloned buffer
231 buf.Alignment(GetRequiredBufferAlignment());
232 buf.AllocateNewBuffer(data.size());
233 memmove(buf.BufferStart(), data.data(), data.size());
234 buf.Size(data.size());
235 status = stream_->Encrypt(offset, buf.BufferStart(), buf.CurrentSize());
236 if (!status.ok()) {
237 return status;
238 }
239 dataToAppend = Slice(buf.BufferStart(), buf.CurrentSize());
240 }
241 status = file_->PositionedAppend(dataToAppend, offset);
242 if (!status.ok()) {
243 return status;
244 }
245 return status;
246 }
247
248 // Indicates the upper layers if the current WritableFile implementation
249 // uses direct IO.
250 bool use_direct_io() const override { return file_->use_direct_io(); }
251
252 // Use the returned alignment value to allocate
253 // aligned buffer for Direct I/O
254 size_t GetRequiredBufferAlignment() const override {
255 return file_->GetRequiredBufferAlignment();
256 }
257
258 /*
259 * Get the size of valid data in the file.
260 */
261 uint64_t GetFileSize() override {
262 return file_->GetFileSize() - prefixLength_;
263 }
264
265 // Truncate is necessary to trim the file to the correct size
266 // before closing. It is not always possible to keep track of the file
267 // size due to whole pages writes. The behavior is undefined if called
268 // with other writes to follow.
269 Status Truncate(uint64_t size) override {
270 return file_->Truncate(size + prefixLength_);
271 }
272
273 // Remove any kind of caching of data from the offset to offset+length
274 // of this file. If the length is 0, then it refers to the end of file.
275 // If the system is not caching the file contents, then this is a noop.
276 // This call has no effect on dirty pages in the cache.
277 Status InvalidateCache(size_t offset, size_t length) override {
278 return file_->InvalidateCache(offset + prefixLength_, length);
279 }
280
281 // Sync a file range with disk.
282 // offset is the starting byte of the file range to be synchronized.
283 // nbytes specifies the length of the range to be synchronized.
284 // This asks the OS to initiate flushing the cached data to disk,
285 // without waiting for completion.
286 // Default implementation does nothing.
287 Status RangeSync(uint64_t offset, uint64_t nbytes) override {
288 return file_->RangeSync(offset + prefixLength_, nbytes);
289 }
290
291 // PrepareWrite performs any necessary preparation for a write
292 // before the write actually occurs. This allows for pre-allocation
293 // of space on devices where it can result in less file
294 // fragmentation and/or less waste from over-zealous filesystem
295 // pre-allocation.
296 void PrepareWrite(size_t offset, size_t len) override {
297 file_->PrepareWrite(offset + prefixLength_, len);
298 }
299
300 // Pre-allocates space for a file.
301 Status Allocate(uint64_t offset, uint64_t len) override {
302 return file_->Allocate(offset + prefixLength_, len);
303 }
304 };
305
306 // A file abstraction for random reading and writing.
307 class EncryptedRandomRWFile : public RandomRWFile {
308 private:
309 std::unique_ptr<RandomRWFile> file_;
310 std::unique_ptr<BlockAccessCipherStream> stream_;
311 size_t prefixLength_;
312
313 public:
314 EncryptedRandomRWFile(RandomRWFile* f, BlockAccessCipherStream* s, size_t prefixLength)
315 : file_(f), stream_(s), prefixLength_(prefixLength) {}
316
317 // Indicates if the class makes use of direct I/O
318 // If false you must pass aligned buffer to Write()
319 bool use_direct_io() const override { return file_->use_direct_io(); }
320
321 // Use the returned alignment value to allocate
322 // aligned buffer for Direct I/O
323 size_t GetRequiredBufferAlignment() const override {
324 return file_->GetRequiredBufferAlignment();
325 }
326
327 // Write bytes in `data` at offset `offset`, Returns Status::OK() on success.
328 // Pass aligned buffer when use_direct_io() returns true.
329 Status Write(uint64_t offset, const Slice& data) override {
330 AlignedBuffer buf;
331 Status status;
332 Slice dataToWrite(data);
333 offset += prefixLength_;
334 if (data.size() > 0) {
335 // Encrypt in cloned buffer
336 buf.Alignment(GetRequiredBufferAlignment());
337 buf.AllocateNewBuffer(data.size());
338 memmove(buf.BufferStart(), data.data(), data.size());
339 buf.Size(data.size());
340 status = stream_->Encrypt(offset, buf.BufferStart(), buf.CurrentSize());
341 if (!status.ok()) {
342 return status;
343 }
344 dataToWrite = Slice(buf.BufferStart(), buf.CurrentSize());
345 }
346 status = file_->Write(offset, dataToWrite);
347 return status;
348 }
349
350 // Read up to `n` bytes starting from offset `offset` and store them in
351 // result, provided `scratch` size should be at least `n`.
352 // Returns Status::OK() on success.
353 Status Read(uint64_t offset, size_t n, Slice* result,
354 char* scratch) const override {
355 assert(scratch);
356 offset += prefixLength_;
357 auto status = file_->Read(offset, n, result, scratch);
358 if (!status.ok()) {
359 return status;
360 }
361 status = stream_->Decrypt(offset, (char*)result->data(), result->size());
362 return status;
363 }
364
365 Status Flush() override { return file_->Flush(); }
366
367 Status Sync() override { return file_->Sync(); }
368
369 Status Fsync() override { return file_->Fsync(); }
370
371 Status Close() override { return file_->Close(); }
372 };
373
374 // EncryptedEnv implements an Env wrapper that adds encryption to files stored on disk.
375 class EncryptedEnv : public EnvWrapper {
376 public:
377 EncryptedEnv(Env* base_env, EncryptionProvider *provider)
378 : EnvWrapper(base_env) {
379 provider_ = provider;
380 }
381
382 // NewSequentialFile opens a file for sequential reading.
383 Status NewSequentialFile(const std::string& fname,
384 std::unique_ptr<SequentialFile>* result,
385 const EnvOptions& options) override {
386 result->reset();
387 if (options.use_mmap_reads) {
388 return Status::InvalidArgument();
389 }
390 // Open file using underlying Env implementation
391 std::unique_ptr<SequentialFile> underlying;
392 auto status = EnvWrapper::NewSequentialFile(fname, &underlying, options);
393 if (!status.ok()) {
394 return status;
395 }
396 // Read prefix (if needed)
397 AlignedBuffer prefixBuf;
398 Slice prefixSlice;
399 size_t prefixLength = provider_->GetPrefixLength();
400 if (prefixLength > 0) {
401 // Read prefix
402 prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
403 prefixBuf.AllocateNewBuffer(prefixLength);
404 status = underlying->Read(prefixLength, &prefixSlice, prefixBuf.BufferStart());
405 if (!status.ok()) {
406 return status;
407 }
408 prefixBuf.Size(prefixLength);
409 }
410 // Create cipher stream
411 std::unique_ptr<BlockAccessCipherStream> stream;
412 status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
413 if (!status.ok()) {
414 return status;
415 }
416 (*result) = std::unique_ptr<SequentialFile>(new EncryptedSequentialFile(underlying.release(), stream.release(), prefixLength));
417 return Status::OK();
418 }
419
420 // NewRandomAccessFile opens a file for random read access.
421 Status NewRandomAccessFile(const std::string& fname,
422 std::unique_ptr<RandomAccessFile>* result,
423 const EnvOptions& options) override {
424 result->reset();
425 if (options.use_mmap_reads) {
426 return Status::InvalidArgument();
427 }
428 // Open file using underlying Env implementation
429 std::unique_ptr<RandomAccessFile> underlying;
430 auto status = EnvWrapper::NewRandomAccessFile(fname, &underlying, options);
431 if (!status.ok()) {
432 return status;
433 }
434 // Read prefix (if needed)
435 AlignedBuffer prefixBuf;
436 Slice prefixSlice;
437 size_t prefixLength = provider_->GetPrefixLength();
438 if (prefixLength > 0) {
439 // Read prefix
440 prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
441 prefixBuf.AllocateNewBuffer(prefixLength);
442 status = underlying->Read(0, prefixLength, &prefixSlice, prefixBuf.BufferStart());
443 if (!status.ok()) {
444 return status;
445 }
446 prefixBuf.Size(prefixLength);
447 }
448 // Create cipher stream
449 std::unique_ptr<BlockAccessCipherStream> stream;
450 status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
451 if (!status.ok()) {
452 return status;
453 }
454 (*result) = std::unique_ptr<RandomAccessFile>(new EncryptedRandomAccessFile(underlying.release(), stream.release(), prefixLength));
455 return Status::OK();
456 }
457
458 // NewWritableFile opens a file for sequential writing.
459 Status NewWritableFile(const std::string& fname,
460 std::unique_ptr<WritableFile>* result,
461 const EnvOptions& options) override {
462 result->reset();
463 if (options.use_mmap_writes) {
464 return Status::InvalidArgument();
465 }
466 // Open file using underlying Env implementation
467 std::unique_ptr<WritableFile> underlying;
468 Status status = EnvWrapper::NewWritableFile(fname, &underlying, options);
469 if (!status.ok()) {
470 return status;
471 }
472 // Initialize & write prefix (if needed)
473 AlignedBuffer prefixBuf;
474 Slice prefixSlice;
475 size_t prefixLength = provider_->GetPrefixLength();
476 if (prefixLength > 0) {
477 // Initialize prefix
478 prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
479 prefixBuf.AllocateNewBuffer(prefixLength);
480 provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength);
481 prefixBuf.Size(prefixLength);
482 prefixSlice = Slice(prefixBuf.BufferStart(), prefixBuf.CurrentSize());
483 // Write prefix
484 status = underlying->Append(prefixSlice);
485 if (!status.ok()) {
486 return status;
487 }
488 }
489 // Create cipher stream
490 std::unique_ptr<BlockAccessCipherStream> stream;
491 status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
492 if (!status.ok()) {
493 return status;
494 }
495 (*result) = std::unique_ptr<WritableFile>(new EncryptedWritableFile(underlying.release(), stream.release(), prefixLength));
496 return Status::OK();
497 }
498
499 // Create an object that writes to a new file with the specified
500 // name. Deletes any existing file with the same name and creates a
501 // new file. On success, stores a pointer to the new file in
502 // *result and returns OK. On failure stores nullptr in *result and
503 // returns non-OK.
504 //
505 // The returned file will only be accessed by one thread at a time.
506 Status ReopenWritableFile(const std::string& fname,
507 std::unique_ptr<WritableFile>* result,
508 const EnvOptions& options) override {
509 result->reset();
510 if (options.use_mmap_writes) {
511 return Status::InvalidArgument();
512 }
513 // Open file using underlying Env implementation
514 std::unique_ptr<WritableFile> underlying;
515 Status status = EnvWrapper::ReopenWritableFile(fname, &underlying, options);
516 if (!status.ok()) {
517 return status;
518 }
519 // Initialize & write prefix (if needed)
520 AlignedBuffer prefixBuf;
521 Slice prefixSlice;
522 size_t prefixLength = provider_->GetPrefixLength();
523 if (prefixLength > 0) {
524 // Initialize prefix
525 prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
526 prefixBuf.AllocateNewBuffer(prefixLength);
527 provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength);
528 prefixBuf.Size(prefixLength);
529 prefixSlice = Slice(prefixBuf.BufferStart(), prefixBuf.CurrentSize());
530 // Write prefix
531 status = underlying->Append(prefixSlice);
532 if (!status.ok()) {
533 return status;
534 }
535 }
536 // Create cipher stream
537 std::unique_ptr<BlockAccessCipherStream> stream;
538 status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
539 if (!status.ok()) {
540 return status;
541 }
542 (*result) = std::unique_ptr<WritableFile>(new EncryptedWritableFile(underlying.release(), stream.release(), prefixLength));
543 return Status::OK();
544 }
545
546 // Reuse an existing file by renaming it and opening it as writable.
547 Status ReuseWritableFile(const std::string& fname,
548 const std::string& old_fname,
549 std::unique_ptr<WritableFile>* result,
550 const EnvOptions& options) override {
551 result->reset();
552 if (options.use_mmap_writes) {
553 return Status::InvalidArgument();
554 }
555 // Open file using underlying Env implementation
556 std::unique_ptr<WritableFile> underlying;
557 Status status = EnvWrapper::ReuseWritableFile(fname, old_fname, &underlying, options);
558 if (!status.ok()) {
559 return status;
560 }
561 // Initialize & write prefix (if needed)
562 AlignedBuffer prefixBuf;
563 Slice prefixSlice;
564 size_t prefixLength = provider_->GetPrefixLength();
565 if (prefixLength > 0) {
566 // Initialize prefix
567 prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
568 prefixBuf.AllocateNewBuffer(prefixLength);
569 provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength);
570 prefixBuf.Size(prefixLength);
571 prefixSlice = Slice(prefixBuf.BufferStart(), prefixBuf.CurrentSize());
572 // Write prefix
573 status = underlying->Append(prefixSlice);
574 if (!status.ok()) {
575 return status;
576 }
577 }
578 // Create cipher stream
579 std::unique_ptr<BlockAccessCipherStream> stream;
580 status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
581 if (!status.ok()) {
582 return status;
583 }
584 (*result) = std::unique_ptr<WritableFile>(new EncryptedWritableFile(underlying.release(), stream.release(), prefixLength));
585 return Status::OK();
586 }
587
588 // Open `fname` for random read and write, if file doesn't exist the file
589 // will be created. On success, stores a pointer to the new file in
590 // *result and returns OK. On failure returns non-OK.
591 //
592 // The returned file will only be accessed by one thread at a time.
593 Status NewRandomRWFile(const std::string& fname,
594 std::unique_ptr<RandomRWFile>* result,
595 const EnvOptions& options) override {
596 result->reset();
597 if (options.use_mmap_reads || options.use_mmap_writes) {
598 return Status::InvalidArgument();
599 }
600 // Check file exists
601 bool isNewFile = !FileExists(fname).ok();
602
603 // Open file using underlying Env implementation
604 std::unique_ptr<RandomRWFile> underlying;
605 Status status = EnvWrapper::NewRandomRWFile(fname, &underlying, options);
606 if (!status.ok()) {
607 return status;
608 }
609 // Read or Initialize & write prefix (if needed)
610 AlignedBuffer prefixBuf;
611 Slice prefixSlice;
612 size_t prefixLength = provider_->GetPrefixLength();
613 if (prefixLength > 0) {
614 prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
615 prefixBuf.AllocateNewBuffer(prefixLength);
616 if (!isNewFile) {
617 // File already exists, read prefix
618 status = underlying->Read(0, prefixLength, &prefixSlice, prefixBuf.BufferStart());
619 if (!status.ok()) {
620 return status;
621 }
622 prefixBuf.Size(prefixLength);
623 } else {
624 // File is new, initialize & write prefix
625 provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength);
626 prefixBuf.Size(prefixLength);
627 prefixSlice = Slice(prefixBuf.BufferStart(), prefixBuf.CurrentSize());
628 // Write prefix
629 status = underlying->Write(0, prefixSlice);
630 if (!status.ok()) {
631 return status;
632 }
633 }
634 }
635 // Create cipher stream
636 std::unique_ptr<BlockAccessCipherStream> stream;
637 status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
638 if (!status.ok()) {
639 return status;
640 }
641 (*result) = std::unique_ptr<RandomRWFile>(new EncryptedRandomRWFile(underlying.release(), stream.release(), prefixLength));
642 return Status::OK();
643 }
644
645 // Store in *result the attributes of the children of the specified directory.
646 // In case the implementation lists the directory prior to iterating the files
647 // and files are concurrently deleted, the deleted files will be omitted from
648 // result.
649 // The name attributes are relative to "dir".
650 // Original contents of *results are dropped.
651 // Returns OK if "dir" exists and "*result" contains its children.
652 // NotFound if "dir" does not exist, the calling process does not have
653 // permission to access "dir", or if "dir" is invalid.
654 // IOError if an IO Error was encountered
655 Status GetChildrenFileAttributes(
656 const std::string& dir, std::vector<FileAttributes>* result) override {
657 auto status = EnvWrapper::GetChildrenFileAttributes(dir, result);
658 if (!status.ok()) {
659 return status;
660 }
661 size_t prefixLength = provider_->GetPrefixLength();
662 for (auto it = std::begin(*result); it!=std::end(*result); ++it) {
663 assert(it->size_bytes >= prefixLength);
664 it->size_bytes -= prefixLength;
665 }
666 return Status::OK();
667 }
668
669 // Store the size of fname in *file_size.
670 Status GetFileSize(const std::string& fname, uint64_t* file_size) override {
671 auto status = EnvWrapper::GetFileSize(fname, file_size);
672 if (!status.ok()) {
673 return status;
674 }
675 size_t prefixLength = provider_->GetPrefixLength();
676 assert(*file_size >= prefixLength);
677 *file_size -= prefixLength;
678 return Status::OK();
679 }
680
681 private:
682 EncryptionProvider *provider_;
683 };
684
685 // Returns an Env that encrypts data when stored on disk and decrypts data when
686 // read from disk.
687 Env* NewEncryptedEnv(Env* base_env, EncryptionProvider* provider) {
688 return new EncryptedEnv(base_env, provider);
689 }
690
691 // Encrypt one or more (partial) blocks of data at the file offset.
692 // Length of data is given in dataSize.
693 Status BlockAccessCipherStream::Encrypt(uint64_t fileOffset, char *data, size_t dataSize) {
694 // Calculate block index
695 auto blockSize = BlockSize();
696 uint64_t blockIndex = fileOffset / blockSize;
697 size_t blockOffset = fileOffset % blockSize;
698 std::unique_ptr<char[]> blockBuffer;
699
700 std::string scratch;
701 AllocateScratch(scratch);
702
703 // Encrypt individual blocks.
704 while (1) {
705 char *block = data;
706 size_t n = std::min(dataSize, blockSize - blockOffset);
707 if (n != blockSize) {
708 // We're not encrypting a full block.
709 // Copy data to blockBuffer
710 if (!blockBuffer.get()) {
711 // Allocate buffer
712 blockBuffer = std::unique_ptr<char[]>(new char[blockSize]);
713 }
714 block = blockBuffer.get();
715 // Copy plain data to block buffer
716 memmove(block + blockOffset, data, n);
717 }
718 auto status = EncryptBlock(blockIndex, block, (char*)scratch.data());
719 if (!status.ok()) {
720 return status;
721 }
722 if (block != data) {
723 // Copy encrypted data back to `data`.
724 memmove(data, block + blockOffset, n);
725 }
726 dataSize -= n;
727 if (dataSize == 0) {
728 return Status::OK();
729 }
730 data += n;
731 blockOffset = 0;
732 blockIndex++;
733 }
734 }
735
736 // Decrypt one or more (partial) blocks of data at the file offset.
737 // Length of data is given in dataSize.
738 Status BlockAccessCipherStream::Decrypt(uint64_t fileOffset, char *data, size_t dataSize) {
739 // Calculate block index
740 auto blockSize = BlockSize();
741 uint64_t blockIndex = fileOffset / blockSize;
742 size_t blockOffset = fileOffset % blockSize;
743 std::unique_ptr<char[]> blockBuffer;
744
745 std::string scratch;
746 AllocateScratch(scratch);
747
748 // Decrypt individual blocks.
749 while (1) {
750 char *block = data;
751 size_t n = std::min(dataSize, blockSize - blockOffset);
752 if (n != blockSize) {
753 // We're not decrypting a full block.
754 // Copy data to blockBuffer
755 if (!blockBuffer.get()) {
756 // Allocate buffer
757 blockBuffer = std::unique_ptr<char[]>(new char[blockSize]);
758 }
759 block = blockBuffer.get();
760 // Copy encrypted data to block buffer
761 memmove(block + blockOffset, data, n);
762 }
763 auto status = DecryptBlock(blockIndex, block, (char*)scratch.data());
764 if (!status.ok()) {
765 return status;
766 }
767 if (block != data) {
768 // Copy decrypted data back to `data`.
769 memmove(data, block + blockOffset, n);
770 }
771
772 // Simply decrementing dataSize by n could cause it to underflow,
773 // which will very likely make it read over the original bounds later
774 assert(dataSize >= n);
775 if (dataSize < n) {
776 return Status::Corruption("Cannot decrypt data at given offset");
777 }
778
779 dataSize -= n;
780 if (dataSize == 0) {
781 return Status::OK();
782 }
783 data += n;
784 blockOffset = 0;
785 blockIndex++;
786 }
787 }
788
789 // Encrypt a block of data.
790 // Length of data is equal to BlockSize().
791 Status ROT13BlockCipher::Encrypt(char *data) {
792 for (size_t i = 0; i < blockSize_; ++i) {
793 data[i] += 13;
794 }
795 return Status::OK();
796 }
797
798 // Decrypt a block of data.
799 // Length of data is equal to BlockSize().
800 Status ROT13BlockCipher::Decrypt(char *data) {
801 return Encrypt(data);
802 }
803
804 // Allocate scratch space which is passed to EncryptBlock/DecryptBlock.
805 void CTRCipherStream::AllocateScratch(std::string& scratch) {
806 auto blockSize = cipher_.BlockSize();
807 scratch.reserve(blockSize);
808 }
809
810 // Encrypt a block of data at the given block index.
811 // Length of data is equal to BlockSize();
812 Status CTRCipherStream::EncryptBlock(uint64_t blockIndex, char *data, char* scratch) {
813
814 // Create nonce + counter
815 auto blockSize = cipher_.BlockSize();
816 memmove(scratch, iv_.data(), blockSize);
817 EncodeFixed64(scratch, blockIndex + initialCounter_);
818
819 // Encrypt nonce+counter
820 auto status = cipher_.Encrypt(scratch);
821 if (!status.ok()) {
822 return status;
823 }
824
825 // XOR data with ciphertext.
826 for (size_t i = 0; i < blockSize; i++) {
827 data[i] = data[i] ^ scratch[i];
828 }
829 return Status::OK();
830 }
831
832 // Decrypt a block of data at the given block index.
833 // Length of data is equal to BlockSize();
834 Status CTRCipherStream::DecryptBlock(uint64_t blockIndex, char *data, char* scratch) {
835 // For CTR decryption & encryption are the same
836 return EncryptBlock(blockIndex, data, scratch);
837 }
838
839 // GetPrefixLength returns the length of the prefix that is added to every file
840 // and used for storing encryption options.
841 // For optimal performance, the prefix length should be a multiple of
842 // the page size.
843 size_t CTREncryptionProvider::GetPrefixLength() {
844 return defaultPrefixLength;
845 }
846
847 // decodeCTRParameters decodes the initial counter & IV from the given
848 // (plain text) prefix.
849 static void decodeCTRParameters(const char *prefix, size_t blockSize, uint64_t &initialCounter, Slice &iv) {
850 // First block contains 64-bit initial counter
851 initialCounter = DecodeFixed64(prefix);
852 // Second block contains IV
853 iv = Slice(prefix + blockSize, blockSize);
854 }
855
856 // CreateNewPrefix initialized an allocated block of prefix memory
857 // for a new file.
858 Status CTREncryptionProvider::CreateNewPrefix(const std::string& /*fname*/,
859 char* prefix,
860 size_t prefixLength) {
861 // Create & seed rnd.
862 Random rnd((uint32_t)Env::Default()->NowMicros());
863 // Fill entire prefix block with random values.
864 for (size_t i = 0; i < prefixLength; i++) {
865 prefix[i] = rnd.Uniform(256) & 0xFF;
866 }
867 // Take random data to extract initial counter & IV
868 auto blockSize = cipher_.BlockSize();
869 uint64_t initialCounter;
870 Slice prefixIV;
871 decodeCTRParameters(prefix, blockSize, initialCounter, prefixIV);
872
873 // Now populate the rest of the prefix, starting from the third block.
874 PopulateSecretPrefixPart(prefix + (2 * blockSize), prefixLength - (2 * blockSize), blockSize);
875
876 // Encrypt the prefix, starting from block 2 (leave block 0, 1 with initial counter & IV unencrypted)
877 CTRCipherStream cipherStream(cipher_, prefixIV.data(), initialCounter);
878 auto status = cipherStream.Encrypt(0, prefix + (2 * blockSize), prefixLength - (2 * blockSize));
879 if (!status.ok()) {
880 return status;
881 }
882 return Status::OK();
883 }
884
885 // PopulateSecretPrefixPart initializes the data into a new prefix block
886 // in plain text.
887 // Returns the amount of space (starting from the start of the prefix)
888 // that has been initialized.
889 size_t CTREncryptionProvider::PopulateSecretPrefixPart(char* /*prefix*/,
890 size_t /*prefixLength*/,
891 size_t /*blockSize*/) {
892 // Nothing to do here, put in custom data in override when needed.
893 return 0;
894 }
895
896 Status CTREncryptionProvider::CreateCipherStream(
897 const std::string& fname, const EnvOptions& options, Slice& prefix,
898 std::unique_ptr<BlockAccessCipherStream>* result) {
899 // Read plain text part of prefix.
900 auto blockSize = cipher_.BlockSize();
901 uint64_t initialCounter;
902 Slice iv;
903 decodeCTRParameters(prefix.data(), blockSize, initialCounter, iv);
904
905 // If the prefix is smaller than twice the block size, we would below read a
906 // very large chunk of the file (and very likely read over the bounds)
907 assert(prefix.size() >= 2 * blockSize);
908 if (prefix.size() < 2 * blockSize) {
909 return Status::Corruption("Unable to read from file " + fname +
910 ": read attempt would read beyond file bounds");
911 }
912
913 // Decrypt the encrypted part of the prefix, starting from block 2 (block 0, 1 with initial counter & IV are unencrypted)
914 CTRCipherStream cipherStream(cipher_, iv.data(), initialCounter);
915 auto status = cipherStream.Decrypt(0, (char*)prefix.data() + (2 * blockSize), prefix.size() - (2 * blockSize));
916 if (!status.ok()) {
917 return status;
918 }
919
920 // Create cipher stream
921 return CreateCipherStreamFromPrefix(fname, options, initialCounter, iv, prefix, result);
922 }
923
924 // CreateCipherStreamFromPrefix creates a block access cipher stream for a file given
925 // given name and options. The given prefix is already decrypted.
926 Status CTREncryptionProvider::CreateCipherStreamFromPrefix(
927 const std::string& /*fname*/, const EnvOptions& /*options*/,
928 uint64_t initialCounter, const Slice& iv, const Slice& /*prefix*/,
929 std::unique_ptr<BlockAccessCipherStream>* result) {
930 (*result) = std::unique_ptr<BlockAccessCipherStream>(
931 new CTRCipherStream(cipher_, iv.data(), initialCounter));
932 return Status::OK();
933 }
934
935 #endif // ROCKSDB_LITE
936
937 } // namespace ROCKSDB_NAMESPACE