1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
9 #include <unordered_map>
12 #include "rocksdb/env.h"
13 #include "rocksdb/options.h"
14 #include "rocksdb/trace_reader_writer.h"
16 namespace ROCKSDB_NAMESPACE
{
18 // This file contains Tracer and Replayer classes that enable capturing and
19 // replaying RocksDB traces.
21 class ColumnFamilyHandle
;
22 class ColumnFamilyData
;
28 extern const std::string kTraceMagic
;
29 const unsigned int kTraceTimestampSize
= 8;
30 const unsigned int kTraceTypeSize
= 1;
31 const unsigned int kTracePayloadLengthSize
= 4;
32 const unsigned int kTraceMetadataSize
=
33 kTraceTimestampSize
+ kTraceTypeSize
+ kTracePayloadLengthSize
;
35 // Supported Trace types.
36 enum TraceType
: char {
41 kTraceIteratorSeek
= 5,
42 kTraceIteratorSeekForPrev
= 6,
43 // Block cache related types.
44 kBlockTraceIndexBlock
= 7,
45 kBlockTraceFilterBlock
= 8,
46 kBlockTraceDataBlock
= 9,
47 kBlockTraceUncompressionDictBlock
= 10,
48 kBlockTraceRangeDeletionBlock
= 11,
49 // IO Trace related types based on options that will be added in trace file.
52 kIOFileNameAndFileSize
= 14,
55 // All trace types should be added before kTraceMax
59 // TODO: This should also be made part of public interface to help users build
60 // custom TracerReaders and TraceWriters.
62 // The data structure that defines a single trace.
64 uint64_t ts
; // timestamp
77 // Encode a trace object into the given string.
78 static void EncodeTrace(const Trace
& trace
, std::string
* encoded_trace
);
80 // Decode a string into the given trace object.
81 static Status
DecodeTrace(const std::string
& encoded_trace
, Trace
* trace
);
84 // Tracer captures all RocksDB operations using a user-provided TraceWriter.
85 // Every RocksDB operation is written as a single trace. Each trace will have a
86 // timestamp and type, followed by the trace payload.
89 Tracer(Env
* env
, const TraceOptions
& trace_options
,
90 std::unique_ptr
<TraceWriter
>&& trace_writer
);
93 // Trace all write operations -- Put, Merge, Delete, SingleDelete, Write
94 Status
Write(WriteBatch
* write_batch
);
96 // Trace Get operations.
97 Status
Get(ColumnFamilyHandle
* cfname
, const Slice
& key
);
100 Status
IteratorSeek(const uint32_t& cf_id
, const Slice
& key
);
101 Status
IteratorSeekForPrev(const uint32_t& cf_id
, const Slice
& key
);
103 // Returns true if the trace is over the configured max trace file limit.
105 bool IsTraceFileOverMax();
107 // Writes a trace footer at the end of the tracing
111 // Write a trace header at the beginning, typically on initiating a trace,
112 // with some metadata like a magic number, trace version, RocksDB version, and
114 Status
WriteHeader();
116 // Write a trace footer, typically on ending a trace, with some metadata.
117 Status
WriteFooter();
119 // Write a single trace using the provided TraceWriter to the underlying
120 // system, say, a filesystem or a streaming service.
121 Status
WriteTrace(const Trace
& trace
);
123 // Helps in filtering and sampling of traces.
124 // Returns true if a trace should be skipped, false otherwise.
125 bool ShouldSkipTrace(const TraceType
& type
);
128 TraceOptions trace_options_
;
129 std::unique_ptr
<TraceWriter
> trace_writer_
;
130 uint64_t trace_request_count_
;
133 // Replayer helps to replay the captured RocksDB operations, using a user
134 // provided TraceReader.
135 // The Replayer is instantiated via db_bench today, on using "replay" benchmark.
138 Replayer(DB
* db
, const std::vector
<ColumnFamilyHandle
*>& handles
,
139 std::unique_ptr
<TraceReader
>&& reader
);
142 // Replay all the traces from the provided trace stream, taking the delay
143 // between the traces into consideration.
146 // Replay the provide trace stream, which is the same as Replay(), with
147 // multi-threads. Queries are scheduled in the thread pool job queue.
148 // User can set the number of threads in the thread pool.
149 Status
MultiThreadReplay(uint32_t threads_num
);
151 // Enables fast forwarding a replay by reducing the delay between the ingested
153 // fast_forward : Rate of replay speedup.
154 // If 1, replay the operations at the same rate as in the trace stream.
155 // If > 1, speed up the replay by this amount.
156 Status
SetFastForward(uint32_t fast_forward
);
159 Status
ReadHeader(Trace
* header
);
160 Status
ReadFooter(Trace
* footer
);
161 Status
ReadTrace(Trace
* trace
);
163 // The background function for MultiThreadReplay to execute Get query
164 // based on the trace records.
165 static void BGWorkGet(void* arg
);
167 // The background function for MultiThreadReplay to execute WriteBatch
168 // (Put, Delete, SingleDelete, DeleteRange) based on the trace records.
169 static void BGWorkWriteBatch(void* arg
);
171 // The background function for MultiThreadReplay to execute Iterator (Seek)
172 // based on the trace records.
173 static void BGWorkIterSeek(void* arg
);
175 // The background function for MultiThreadReplay to execute Iterator
176 // (SeekForPrev) based on the trace records.
177 static void BGWorkIterSeekForPrev(void* arg
);
181 std::unique_ptr
<TraceReader
> trace_reader_
;
182 std::unordered_map
<uint32_t, ColumnFamilyHandle
*> cf_map_
;
183 uint32_t fast_forward_
;
186 // The passin arg of MultiThreadRepkay for each trace record.
187 struct ReplayerWorkerArg
{
190 std::unordered_map
<uint32_t, ColumnFamilyHandle
*>* cf_map
;
191 WriteOptions woptions
;
192 ReadOptions roptions
;
195 } // namespace ROCKSDB_NAMESPACE