]>
Commit | Line | Data |
---|---|---|
20effc67 TL |
1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | // This source code is licensed under both the GPLv2 (found in the | |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
5 | ||
6 | #pragma once | |
7 | ||
8 | #include <atomic> | |
9 | #include <fstream> | |
10 | ||
11 | #include "monitoring/instrumented_mutex.h" | |
1e59de90 TL |
12 | #include "port/lang.h" |
13 | #include "rocksdb/file_system.h" | |
20effc67 | 14 | #include "rocksdb/options.h" |
1e59de90 | 15 | #include "rocksdb/trace_record.h" |
20effc67 TL |
16 | #include "trace_replay/trace_replay.h" |
17 | ||
18 | namespace ROCKSDB_NAMESPACE { | |
1e59de90 TL |
19 | class SystemClock; |
20 | class TraceReader; | |
21 | class TraceWriter; | |
22 | ||
23 | /* In order to log new data in trace record for specified operations, do | |
24 | following: | |
25 | 1. Add new data in IOTraceOP (say kIONewData= 3) | |
26 | 2. Log it in IOTraceWriter::WriteIOOp, and read that in | |
27 | IOTraceReader::ReadIOOp and | |
28 | IOTraceRecordParser::PrintHumanReadableIOTraceRecord in the switch case. | |
29 | 3. In the FileSystemTracer APIs where this data will be logged with, update | |
30 | io_op_data |= (1 << IOTraceOp::kIONewData). | |
31 | */ | |
32 | enum IOTraceOp : char { | |
33 | // The value of each enum represents the bitwise position for | |
34 | // IOTraceRecord.io_op_data. | |
35 | kIOFileSize = 0, | |
36 | kIOLen = 1, | |
37 | kIOOffset = 2, | |
38 | }; | |
20effc67 TL |
39 | |
40 | struct IOTraceRecord { | |
41 | // Required fields for all accesses. | |
42 | uint64_t access_timestamp = 0; | |
43 | TraceType trace_type = TraceType::kTraceMax; | |
1e59de90 TL |
44 | // Each bit in io_op_data stores which corresponding info from IOTraceOp will |
45 | // be added in the trace. Foreg, if bit at position 1 is set then | |
46 | // IOTraceOp::kIOLen (length) will be logged in the record. | |
47 | uint64_t io_op_data = 0; | |
20effc67 TL |
48 | std::string file_operation; |
49 | uint64_t latency = 0; | |
50 | std::string io_status; | |
1e59de90 | 51 | // Stores file name instead of full path. |
20effc67 | 52 | std::string file_name; |
1e59de90 TL |
53 | |
54 | // Fields added to record based on IO operation. | |
20effc67 TL |
55 | uint64_t len = 0; |
56 | uint64_t offset = 0; | |
57 | uint64_t file_size = 0; | |
58 | ||
1e59de90 TL |
59 | // Additional information passed in IODebugContext. |
60 | uint64_t trace_data = 0; | |
61 | std::string request_id; | |
20effc67 | 62 | |
1e59de90 | 63 | IOTraceRecord() {} |
20effc67 TL |
64 | |
65 | IOTraceRecord(const uint64_t& _access_timestamp, const TraceType& _trace_type, | |
1e59de90 TL |
66 | const uint64_t& _io_op_data, const std::string& _file_operation, |
67 | const uint64_t& _latency, const std::string& _io_status, | |
68 | const std::string& _file_name, const uint64_t& _file_size = 0) | |
20effc67 TL |
69 | : access_timestamp(_access_timestamp), |
70 | trace_type(_trace_type), | |
1e59de90 | 71 | io_op_data(_io_op_data), |
20effc67 TL |
72 | file_operation(_file_operation), |
73 | latency(_latency), | |
74 | io_status(_io_status), | |
75 | file_name(_file_name), | |
76 | file_size(_file_size) {} | |
77 | ||
78 | IOTraceRecord(const uint64_t& _access_timestamp, const TraceType& _trace_type, | |
1e59de90 TL |
79 | const uint64_t& _io_op_data, const std::string& _file_operation, |
80 | const uint64_t& _latency, const std::string& _io_status, | |
81 | const std::string& _file_name, const uint64_t& _len, | |
82 | const uint64_t& _offset) | |
20effc67 TL |
83 | : access_timestamp(_access_timestamp), |
84 | trace_type(_trace_type), | |
1e59de90 | 85 | io_op_data(_io_op_data), |
20effc67 TL |
86 | file_operation(_file_operation), |
87 | latency(_latency), | |
88 | io_status(_io_status), | |
1e59de90 | 89 | file_name(_file_name), |
20effc67 TL |
90 | len(_len), |
91 | offset(_offset) {} | |
92 | }; | |
93 | ||
94 | struct IOTraceHeader { | |
95 | uint64_t start_time; | |
96 | uint32_t rocksdb_major_version; | |
97 | uint32_t rocksdb_minor_version; | |
98 | }; | |
99 | ||
100 | // IOTraceWriter writes IO operation as a single trace. Each trace will have a | |
101 | // timestamp and type, followed by the trace payload. | |
102 | class IOTraceWriter { | |
103 | public: | |
1e59de90 | 104 | IOTraceWriter(SystemClock* clock, const TraceOptions& trace_options, |
20effc67 TL |
105 | std::unique_ptr<TraceWriter>&& trace_writer); |
106 | ~IOTraceWriter() = default; | |
107 | // No copy and move. | |
108 | IOTraceWriter(const IOTraceWriter&) = delete; | |
109 | IOTraceWriter& operator=(const IOTraceWriter&) = delete; | |
110 | IOTraceWriter(IOTraceWriter&&) = delete; | |
111 | IOTraceWriter& operator=(IOTraceWriter&&) = delete; | |
112 | ||
1e59de90 | 113 | Status WriteIOOp(const IOTraceRecord& record, IODebugContext* dbg); |
20effc67 TL |
114 | |
115 | // Write a trace header at the beginning, typically on initiating a trace, | |
116 | // with some metadata like a magic number and RocksDB version. | |
117 | Status WriteHeader(); | |
118 | ||
119 | private: | |
1e59de90 | 120 | SystemClock* clock_; |
20effc67 TL |
121 | TraceOptions trace_options_; |
122 | std::unique_ptr<TraceWriter> trace_writer_; | |
123 | }; | |
124 | ||
125 | // IOTraceReader helps read the trace file generated by IOTraceWriter. | |
126 | class IOTraceReader { | |
127 | public: | |
128 | explicit IOTraceReader(std::unique_ptr<TraceReader>&& reader); | |
129 | ~IOTraceReader() = default; | |
130 | // No copy and move. | |
131 | IOTraceReader(const IOTraceReader&) = delete; | |
132 | IOTraceReader& operator=(const IOTraceReader&) = delete; | |
133 | IOTraceReader(IOTraceReader&&) = delete; | |
134 | IOTraceReader& operator=(IOTraceReader&&) = delete; | |
135 | ||
136 | Status ReadHeader(IOTraceHeader* header); | |
137 | ||
138 | Status ReadIOOp(IOTraceRecord* record); | |
139 | ||
140 | private: | |
141 | std::unique_ptr<TraceReader> trace_reader_; | |
142 | }; | |
143 | ||
144 | // An IO tracer. It uses IOTraceWriter to write the access record to the | |
145 | // trace file. | |
146 | class IOTracer { | |
147 | public: | |
148 | IOTracer(); | |
149 | ~IOTracer(); | |
150 | // No copy and move. | |
151 | IOTracer(const IOTracer&) = delete; | |
152 | IOTracer& operator=(const IOTracer&) = delete; | |
153 | IOTracer(IOTracer&&) = delete; | |
154 | IOTracer& operator=(IOTracer&&) = delete; | |
155 | ||
156 | // no_sanitize is added for tracing_enabled. writer_ is protected under mutex | |
157 | // so even if user call Start/EndIOTrace and tracing_enabled is not updated in | |
158 | // the meanwhile, WriteIOOp will anyways check the writer_ protected under | |
159 | // mutex and ignore the operation if writer_is null. So its ok if | |
160 | // tracing_enabled shows non updated value. | |
161 | ||
20effc67 TL |
162 | // Start writing IO operations to the trace_writer. |
163 | TSAN_SUPPRESSION Status | |
1e59de90 | 164 | StartIOTrace(SystemClock* clock, const TraceOptions& trace_options, |
20effc67 TL |
165 | std::unique_ptr<TraceWriter>&& trace_writer); |
166 | ||
167 | // Stop writing IO operations to the trace_writer. | |
168 | TSAN_SUPPRESSION void EndIOTrace(); | |
169 | ||
170 | TSAN_SUPPRESSION bool is_tracing_enabled() const { return tracing_enabled; } | |
171 | ||
1e59de90 | 172 | void WriteIOOp(const IOTraceRecord& record, IODebugContext* dbg); |
20effc67 TL |
173 | |
174 | private: | |
175 | TraceOptions trace_options_; | |
176 | // A mutex protects the writer_. | |
177 | InstrumentedMutex trace_writer_mutex_; | |
178 | std::atomic<IOTraceWriter*> writer_; | |
179 | // bool tracing_enabled is added to avoid costly operation of checking atomic | |
180 | // variable 'writer_' is nullptr or not in is_tracing_enabled(). | |
181 | // is_tracing_enabled() is invoked multiple times by FileSystem classes. | |
182 | bool tracing_enabled; | |
183 | }; | |
184 | ||
185 | } // namespace ROCKSDB_NAMESPACE |