1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab ft=cpp
4 #ifndef CEPH_RGW_SYNC_TRACE_H
5 #define CEPH_RGW_SYNC_TRACE_H
9 #include "common/debug.h"
10 #include "common/ceph_json.h"
12 #include "rgw_sync_trace.h"
13 #include "rgw_rados.h"
14 #include "rgw_worker.h"
17 #define dout_context g_ceph_context
18 #define dout_subsys ceph_subsys_rgw_sync
20 RGWSyncTraceNode::RGWSyncTraceNode(CephContext
*_cct
, uint64_t _handle
,
21 const RGWSyncTraceNodeRef
& _parent
,
22 const string
& _type
, const string
& _id
) : cct(_cct
),
27 history(cct
->_conf
->rgw_sync_trace_per_node_log_size
)
30 prefix
= parent
->get_prefix();
36 prefix
+= "[" + id
+ "]";
42 void RGWSyncTraceNode::log(int level
, const string
& s
)
45 history
.push_back(status
);
46 /* dump output on either rgw_sync, or rgw -- but only once */
47 if (cct
->_conf
->subsys
.should_gather(ceph_subsys_rgw_sync
, level
)) {
48 lsubdout(cct
, rgw_sync
,
49 ceph::dout::need_dynamic(level
)) << "RGW-SYNC:" << to_str() << dendl
;
52 ceph::dout::need_dynamic(level
)) << "RGW-SYNC:" << to_str() << dendl
;
57 class RGWSyncTraceServiceMapThread
: public RGWRadosThread
{
59 RGWSyncTraceManager
*manager
;
61 uint64_t interval_msec() override
{
62 return cct
->_conf
->rgw_sync_trace_servicemap_update_interval
* 1000;
65 RGWSyncTraceServiceMapThread(RGWRados
*_store
, RGWSyncTraceManager
*_manager
)
66 : RGWRadosThread(_store
, "sync-trace"), store(_store
), manager(_manager
) {}
68 int process() override
;
71 int RGWSyncTraceServiceMapThread::process()
73 map
<string
, string
> status
;
74 status
["current_sync"] = manager
->get_active_names();
75 int ret
= store
->update_service_map(std::move(status
));
77 ldout(store
->ctx(), 0) << "ERROR: update_service_map() returned ret=" << ret
<< dendl
;
82 RGWSyncTraceNodeRef
RGWSyncTraceManager::add_node(const RGWSyncTraceNodeRef
& parent
,
83 const std::string
& type
,
84 const std::string
& id
)
86 shunique_lock
wl(lock
, ceph::acquire_unique
);
87 auto handle
= alloc_handle();
88 RGWSyncTraceNodeRef
& ref
= nodes
[handle
];
89 ref
.reset(new RGWSyncTraceNode(cct
, handle
, parent
, type
, id
));
90 // return a separate shared_ptr that calls finish() on the node instead of
91 // deleting it. the lambda capture holds a reference to the original 'ref'
92 auto deleter
= [ref
, this] (RGWSyncTraceNode
*node
) { finish_node(node
); };
93 return {ref
.get(), deleter
};
96 bool RGWSyncTraceNode::match(const string
& search_term
, bool search_history
)
99 std::regex
expr(search_term
);
102 if (regex_search(prefix
, m
, expr
)) {
105 if (regex_search(status
, m
,expr
)) {
108 if (!search_history
) {
112 for (auto h
: history
) {
113 if (regex_search(h
, m
, expr
)) {
117 } catch (const std::regex_error
& e
) {
118 ldout(cct
, 5) << "NOTICE: sync trace: bad expression: bad regex search term" << dendl
;
124 void RGWSyncTraceManager::init(RGWRados
*store
)
126 service_map_thread
= new RGWSyncTraceServiceMapThread(store
, this);
127 service_map_thread
->start();
130 RGWSyncTraceManager::~RGWSyncTraceManager()
132 cct
->get_admin_socket()->unregister_commands(this);
133 service_map_thread
->stop();
134 delete service_map_thread
;
139 int RGWSyncTraceManager::hook_to_admin_command()
141 AdminSocket
*admin_socket
= cct
->get_admin_socket();
143 admin_commands
= { { "sync trace show name=search,type=CephString,req=false", "sync trace show [filter_str]: show current multisite tracing information" },
144 { "sync trace history name=search,type=CephString,req=false", "sync trace history [filter_str]: show history of multisite tracing information" },
145 { "sync trace active name=search,type=CephString,req=false", "show active multisite sync entities information" },
146 { "sync trace active_short name=search,type=CephString,req=false", "show active multisite sync entities entries" } };
147 for (auto cmd
: admin_commands
) {
148 int r
= admin_socket
->register_command(cmd
[0], this,
151 lderr(cct
) << "ERROR: fail to register admin socket command (r=" << r
<< ")" << dendl
;
158 static void dump_node(RGWSyncTraceNode
*entry
, bool show_history
, Formatter
*f
)
160 f
->open_object_section("entry");
161 ::encode_json("status", entry
->to_str(), f
);
163 f
->open_array_section("history");
164 for (auto h
: entry
->get_history()) {
165 ::encode_json("entry", h
, f
);
172 string
RGWSyncTraceManager::get_active_names()
174 shunique_lock
rl(lock
, ceph::acquire_shared
);
179 f
.open_array_section("result");
180 for (auto n
: nodes
) {
181 auto& entry
= n
.second
;
183 if (!entry
->test_flags(RGW_SNS_FLAG_ACTIVE
)) {
186 const string
& name
= entry
->get_resource_name();
188 ::encode_json("entry", name
, &f
);
198 int RGWSyncTraceManager::call(std::string_view command
, const cmdmap_t
& cmdmap
,
203 bool show_history
= (command
== "sync trace history");
204 bool show_short
= (command
== "sync trace active_short");
205 bool show_active
= (command
== "sync trace active") || show_short
;
209 auto si
= cmdmap
.find("search");
210 if (si
!= cmdmap
.end()) {
211 search
= boost::get
<string
>(si
->second
);
214 shunique_lock
rl(lock
, ceph::acquire_shared
);
216 f
->open_object_section("result");
217 f
->open_array_section("running");
218 for (auto n
: nodes
) {
219 auto& entry
= n
.second
;
221 if (!search
.empty() && !entry
->match(search
, show_history
)) {
224 if (show_active
&& !entry
->test_flags(RGW_SNS_FLAG_ACTIVE
)) {
228 const string
& name
= entry
->get_resource_name();
230 ::encode_json("entry", name
, f
);
233 dump_node(entry
.get(), show_history
, f
);
239 f
->open_array_section("complete");
240 for (auto& entry
: complete_nodes
) {
241 if (!search
.empty() && !entry
->match(search
, show_history
)) {
244 if (show_active
&& !entry
->test_flags(RGW_SNS_FLAG_ACTIVE
)) {
247 dump_node(entry
.get(), show_history
, f
);
257 void RGWSyncTraceManager::finish_node(RGWSyncTraceNode
*node
)
259 RGWSyncTraceNodeRef old_node
;
262 shunique_lock
wl(lock
, ceph::acquire_unique
);
266 auto iter
= nodes
.find(node
->handle
);
267 if (iter
== nodes
.end()) {
268 /* not found, already finished */
272 if (complete_nodes
.full()) {
273 /* take a reference to the entry that is going to be evicted,
274 * can't let it get evicted under lock held, otherwise
275 * it's a deadlock as it will call finish_node()
277 old_node
= complete_nodes
.front();
280 complete_nodes
.push_back(iter
->second
);