]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/trace_replay/io_tracer.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / trace_replay / io_tracer.h
CommitLineData
20effc67
TL
1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
5
6#pragma once
7
8#include <atomic>
9#include <fstream>
10
11#include "monitoring/instrumented_mutex.h"
1e59de90
TL
12#include "port/lang.h"
13#include "rocksdb/file_system.h"
20effc67 14#include "rocksdb/options.h"
1e59de90 15#include "rocksdb/trace_record.h"
20effc67
TL
16#include "trace_replay/trace_replay.h"
17
18namespace ROCKSDB_NAMESPACE {
1e59de90
TL
19class SystemClock;
20class TraceReader;
21class TraceWriter;
22
23/* In order to log new data in trace record for specified operations, do
24 following:
25 1. Add new data in IOTraceOP (say kIONewData= 3)
26 2. Log it in IOTraceWriter::WriteIOOp, and read that in
27 IOTraceReader::ReadIOOp and
28 IOTraceRecordParser::PrintHumanReadableIOTraceRecord in the switch case.
29 3. In the FileSystemTracer APIs where this data will be logged with, update
30 io_op_data |= (1 << IOTraceOp::kIONewData).
31*/
32enum IOTraceOp : char {
33 // The value of each enum represents the bitwise position for
34 // IOTraceRecord.io_op_data.
35 kIOFileSize = 0,
36 kIOLen = 1,
37 kIOOffset = 2,
38};
20effc67
TL
39
40struct IOTraceRecord {
41 // Required fields for all accesses.
42 uint64_t access_timestamp = 0;
43 TraceType trace_type = TraceType::kTraceMax;
1e59de90
TL
44 // Each bit in io_op_data stores which corresponding info from IOTraceOp will
45 // be added in the trace. Foreg, if bit at position 1 is set then
46 // IOTraceOp::kIOLen (length) will be logged in the record.
47 uint64_t io_op_data = 0;
20effc67
TL
48 std::string file_operation;
49 uint64_t latency = 0;
50 std::string io_status;
1e59de90 51 // Stores file name instead of full path.
20effc67 52 std::string file_name;
1e59de90
TL
53
54 // Fields added to record based on IO operation.
20effc67
TL
55 uint64_t len = 0;
56 uint64_t offset = 0;
57 uint64_t file_size = 0;
58
1e59de90
TL
59 // Additional information passed in IODebugContext.
60 uint64_t trace_data = 0;
61 std::string request_id;
20effc67 62
1e59de90 63 IOTraceRecord() {}
20effc67
TL
64
65 IOTraceRecord(const uint64_t& _access_timestamp, const TraceType& _trace_type,
1e59de90
TL
66 const uint64_t& _io_op_data, const std::string& _file_operation,
67 const uint64_t& _latency, const std::string& _io_status,
68 const std::string& _file_name, const uint64_t& _file_size = 0)
20effc67
TL
69 : access_timestamp(_access_timestamp),
70 trace_type(_trace_type),
1e59de90 71 io_op_data(_io_op_data),
20effc67
TL
72 file_operation(_file_operation),
73 latency(_latency),
74 io_status(_io_status),
75 file_name(_file_name),
76 file_size(_file_size) {}
77
78 IOTraceRecord(const uint64_t& _access_timestamp, const TraceType& _trace_type,
1e59de90
TL
79 const uint64_t& _io_op_data, const std::string& _file_operation,
80 const uint64_t& _latency, const std::string& _io_status,
81 const std::string& _file_name, const uint64_t& _len,
82 const uint64_t& _offset)
20effc67
TL
83 : access_timestamp(_access_timestamp),
84 trace_type(_trace_type),
1e59de90 85 io_op_data(_io_op_data),
20effc67
TL
86 file_operation(_file_operation),
87 latency(_latency),
88 io_status(_io_status),
1e59de90 89 file_name(_file_name),
20effc67
TL
90 len(_len),
91 offset(_offset) {}
92};
93
94struct IOTraceHeader {
95 uint64_t start_time;
96 uint32_t rocksdb_major_version;
97 uint32_t rocksdb_minor_version;
98};
99
100// IOTraceWriter writes IO operation as a single trace. Each trace will have a
101// timestamp and type, followed by the trace payload.
102class IOTraceWriter {
103 public:
1e59de90 104 IOTraceWriter(SystemClock* clock, const TraceOptions& trace_options,
20effc67
TL
105 std::unique_ptr<TraceWriter>&& trace_writer);
106 ~IOTraceWriter() = default;
107 // No copy and move.
108 IOTraceWriter(const IOTraceWriter&) = delete;
109 IOTraceWriter& operator=(const IOTraceWriter&) = delete;
110 IOTraceWriter(IOTraceWriter&&) = delete;
111 IOTraceWriter& operator=(IOTraceWriter&&) = delete;
112
1e59de90 113 Status WriteIOOp(const IOTraceRecord& record, IODebugContext* dbg);
20effc67
TL
114
115 // Write a trace header at the beginning, typically on initiating a trace,
116 // with some metadata like a magic number and RocksDB version.
117 Status WriteHeader();
118
119 private:
1e59de90 120 SystemClock* clock_;
20effc67
TL
121 TraceOptions trace_options_;
122 std::unique_ptr<TraceWriter> trace_writer_;
123};
124
125// IOTraceReader helps read the trace file generated by IOTraceWriter.
126class IOTraceReader {
127 public:
128 explicit IOTraceReader(std::unique_ptr<TraceReader>&& reader);
129 ~IOTraceReader() = default;
130 // No copy and move.
131 IOTraceReader(const IOTraceReader&) = delete;
132 IOTraceReader& operator=(const IOTraceReader&) = delete;
133 IOTraceReader(IOTraceReader&&) = delete;
134 IOTraceReader& operator=(IOTraceReader&&) = delete;
135
136 Status ReadHeader(IOTraceHeader* header);
137
138 Status ReadIOOp(IOTraceRecord* record);
139
140 private:
141 std::unique_ptr<TraceReader> trace_reader_;
142};
143
144// An IO tracer. It uses IOTraceWriter to write the access record to the
145// trace file.
146class IOTracer {
147 public:
148 IOTracer();
149 ~IOTracer();
150 // No copy and move.
151 IOTracer(const IOTracer&) = delete;
152 IOTracer& operator=(const IOTracer&) = delete;
153 IOTracer(IOTracer&&) = delete;
154 IOTracer& operator=(IOTracer&&) = delete;
155
156 // no_sanitize is added for tracing_enabled. writer_ is protected under mutex
157 // so even if user call Start/EndIOTrace and tracing_enabled is not updated in
158 // the meanwhile, WriteIOOp will anyways check the writer_ protected under
159 // mutex and ignore the operation if writer_is null. So its ok if
160 // tracing_enabled shows non updated value.
161
20effc67
TL
162 // Start writing IO operations to the trace_writer.
163 TSAN_SUPPRESSION Status
1e59de90 164 StartIOTrace(SystemClock* clock, const TraceOptions& trace_options,
20effc67
TL
165 std::unique_ptr<TraceWriter>&& trace_writer);
166
167 // Stop writing IO operations to the trace_writer.
168 TSAN_SUPPRESSION void EndIOTrace();
169
170 TSAN_SUPPRESSION bool is_tracing_enabled() const { return tracing_enabled; }
171
1e59de90 172 void WriteIOOp(const IOTraceRecord& record, IODebugContext* dbg);
20effc67
TL
173
174 private:
175 TraceOptions trace_options_;
176 // A mutex protects the writer_.
177 InstrumentedMutex trace_writer_mutex_;
178 std::atomic<IOTraceWriter*> writer_;
179 // bool tracing_enabled is added to avoid costly operation of checking atomic
180 // variable 'writer_' is nullptr or not in is_tracing_enabled().
181 // is_tracing_enabled() is invoked multiple times by FileSystem classes.
182 bool tracing_enabled;
183};
184
185} // namespace ROCKSDB_NAMESPACE