]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/trace_replay/trace_replay.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / trace_replay / trace_replay.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #pragma once
7
8 #include <memory>
9 #include <unordered_map>
10 #include <utility>
11
12 #include "rocksdb/env.h"
13 #include "rocksdb/options.h"
14 #include "rocksdb/trace_reader_writer.h"
15
16 namespace ROCKSDB_NAMESPACE {
17
18 // This file contains Tracer and Replayer classes that enable capturing and
19 // replaying RocksDB traces.
20
21 class ColumnFamilyHandle;
22 class ColumnFamilyData;
23 class DB;
24 class DBImpl;
25 class Slice;
26 class WriteBatch;
27
28 extern const std::string kTraceMagic;
29 const unsigned int kTraceTimestampSize = 8;
30 const unsigned int kTraceTypeSize = 1;
31 const unsigned int kTracePayloadLengthSize = 4;
32 const unsigned int kTraceMetadataSize =
33 kTraceTimestampSize + kTraceTypeSize + kTracePayloadLengthSize;
34
35 // Supported Trace types.
36 enum TraceType : char {
37 kTraceBegin = 1,
38 kTraceEnd = 2,
39 kTraceWrite = 3,
40 kTraceGet = 4,
41 kTraceIteratorSeek = 5,
42 kTraceIteratorSeekForPrev = 6,
43 // Block cache related types.
44 kBlockTraceIndexBlock = 7,
45 kBlockTraceFilterBlock = 8,
46 kBlockTraceDataBlock = 9,
47 kBlockTraceUncompressionDictBlock = 10,
48 kBlockTraceRangeDeletionBlock = 11,
49 // IO Trace related types based on options that will be added in trace file.
50 kIOGeneral = 12,
51 kIOFileName = 13,
52 kIOFileNameAndFileSize = 14,
53 kIOLen = 15,
54 kIOLenAndOffset = 16,
55 // All trace types should be added before kTraceMax
56 kTraceMax,
57 };
58
59 // TODO: This should also be made part of public interface to help users build
60 // custom TracerReaders and TraceWriters.
61 //
62 // The data structure that defines a single trace.
63 struct Trace {
64 uint64_t ts; // timestamp
65 TraceType type;
66 std::string payload;
67
68 void reset() {
69 ts = 0;
70 type = kTraceMax;
71 payload.clear();
72 }
73 };
74
75 class TracerHelper {
76 public:
77 // Encode a trace object into the given string.
78 static void EncodeTrace(const Trace& trace, std::string* encoded_trace);
79
80 // Decode a string into the given trace object.
81 static Status DecodeTrace(const std::string& encoded_trace, Trace* trace);
82 };
83
84 // Tracer captures all RocksDB operations using a user-provided TraceWriter.
85 // Every RocksDB operation is written as a single trace. Each trace will have a
86 // timestamp and type, followed by the trace payload.
87 class Tracer {
88 public:
89 Tracer(Env* env, const TraceOptions& trace_options,
90 std::unique_ptr<TraceWriter>&& trace_writer);
91 ~Tracer();
92
93 // Trace all write operations -- Put, Merge, Delete, SingleDelete, Write
94 Status Write(WriteBatch* write_batch);
95
96 // Trace Get operations.
97 Status Get(ColumnFamilyHandle* cfname, const Slice& key);
98
99 // Trace Iterators.
100 Status IteratorSeek(const uint32_t& cf_id, const Slice& key);
101 Status IteratorSeekForPrev(const uint32_t& cf_id, const Slice& key);
102
103 // Returns true if the trace is over the configured max trace file limit.
104 // False otherwise.
105 bool IsTraceFileOverMax();
106
107 // Writes a trace footer at the end of the tracing
108 Status Close();
109
110 private:
111 // Write a trace header at the beginning, typically on initiating a trace,
112 // with some metadata like a magic number, trace version, RocksDB version, and
113 // trace format.
114 Status WriteHeader();
115
116 // Write a trace footer, typically on ending a trace, with some metadata.
117 Status WriteFooter();
118
119 // Write a single trace using the provided TraceWriter to the underlying
120 // system, say, a filesystem or a streaming service.
121 Status WriteTrace(const Trace& trace);
122
123 // Helps in filtering and sampling of traces.
124 // Returns true if a trace should be skipped, false otherwise.
125 bool ShouldSkipTrace(const TraceType& type);
126
127 Env* env_;
128 TraceOptions trace_options_;
129 std::unique_ptr<TraceWriter> trace_writer_;
130 uint64_t trace_request_count_;
131 };
132
133 // Replayer helps to replay the captured RocksDB operations, using a user
134 // provided TraceReader.
135 // The Replayer is instantiated via db_bench today, on using "replay" benchmark.
136 class Replayer {
137 public:
138 Replayer(DB* db, const std::vector<ColumnFamilyHandle*>& handles,
139 std::unique_ptr<TraceReader>&& reader);
140 ~Replayer();
141
142 // Replay all the traces from the provided trace stream, taking the delay
143 // between the traces into consideration.
144 Status Replay();
145
146 // Replay the provide trace stream, which is the same as Replay(), with
147 // multi-threads. Queries are scheduled in the thread pool job queue.
148 // User can set the number of threads in the thread pool.
149 Status MultiThreadReplay(uint32_t threads_num);
150
151 // Enables fast forwarding a replay by reducing the delay between the ingested
152 // traces.
153 // fast_forward : Rate of replay speedup.
154 // If 1, replay the operations at the same rate as in the trace stream.
155 // If > 1, speed up the replay by this amount.
156 Status SetFastForward(uint32_t fast_forward);
157
158 private:
159 Status ReadHeader(Trace* header);
160 Status ReadFooter(Trace* footer);
161 Status ReadTrace(Trace* trace);
162
163 // The background function for MultiThreadReplay to execute Get query
164 // based on the trace records.
165 static void BGWorkGet(void* arg);
166
167 // The background function for MultiThreadReplay to execute WriteBatch
168 // (Put, Delete, SingleDelete, DeleteRange) based on the trace records.
169 static void BGWorkWriteBatch(void* arg);
170
171 // The background function for MultiThreadReplay to execute Iterator (Seek)
172 // based on the trace records.
173 static void BGWorkIterSeek(void* arg);
174
175 // The background function for MultiThreadReplay to execute Iterator
176 // (SeekForPrev) based on the trace records.
177 static void BGWorkIterSeekForPrev(void* arg);
178
179 DBImpl* db_;
180 Env* env_;
181 std::unique_ptr<TraceReader> trace_reader_;
182 std::unordered_map<uint32_t, ColumnFamilyHandle*> cf_map_;
183 uint32_t fast_forward_;
184 };
185
186 // The passin arg of MultiThreadRepkay for each trace record.
187 struct ReplayerWorkerArg {
188 DB* db;
189 Trace trace_entry;
190 std::unordered_map<uint32_t, ColumnFamilyHandle*>* cf_map;
191 WriteOptions woptions;
192 ReadOptions roptions;
193 };
194
195 } // namespace ROCKSDB_NAMESPACE