1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab ft=cpp
4 #include "cls/log/cls_log_client.h"
5 #include "cls/version/cls_version_client.h"
7 #include "rgw_log_backing.h"
9 #include "cls_fifo_legacy.h"
11 using namespace std::chrono_literals
;
12 namespace cb
= ceph::buffer
;
14 static constexpr auto dout_subsys
= ceph_subsys_rgw
;
16 enum class shard_check
{ dne
, omap
, fifo
, corrupt
};
17 inline std::ostream
& operator <<(std::ostream
& m
, const shard_check
& t
) {
19 case shard_check::dne
:
20 return m
<< "shard_check::dne";
21 case shard_check::omap
:
22 return m
<< "shard_check::omap";
23 case shard_check::fifo
:
24 return m
<< "shard_check::fifo";
25 case shard_check::corrupt
:
26 return m
<< "shard_check::corrupt";
29 return m
<< "shard_check::UNKNOWN=" << static_cast<uint32_t>(t
);
33 /// Return the shard type, and a bool to see whether it has entries.
35 probe_shard(const DoutPrefixProvider
*dpp
, librados::IoCtx
& ioctx
, const std::string
& oid
,
36 bool& fifo_unsupported
, optional_yield y
)
38 ldpp_dout(dpp
, 20) << __PRETTY_FUNCTION__
<< ":" << __LINE__
39 << " probing oid=" << oid
41 if (!fifo_unsupported
) {
42 std::unique_ptr
<rgw::cls::fifo::FIFO
> fifo
;
43 auto r
= rgw::cls::fifo::FIFO::open(dpp
, ioctx
, oid
,
48 ldpp_dout(dpp
, 20) << __PRETTY_FUNCTION__
<< ":" << __LINE__
49 << ": oid=" << oid
<< " is FIFO"
51 return shard_check::fifo
;
54 ldpp_dout(dpp
, 20) << __PRETTY_FUNCTION__
<< ":" << __LINE__
55 << ": oid=" << oid
<< " is empty and therefore OMAP"
57 return shard_check::omap
;
60 ldpp_dout(dpp
, 20) << __PRETTY_FUNCTION__
<< ":" << __LINE__
61 << ": oid=" << oid
<< " does not exist"
63 return shard_check::dne
;
66 ldpp_dout(dpp
, 20) << __PRETTY_FUNCTION__
<< ":" << __LINE__
67 << ": FIFO is unsupported, marking."
69 fifo_unsupported
= true;
70 return shard_check::omap
;
73 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
74 << ": error probing: r=" << r
75 << ", oid=" << oid
<< dendl
;
76 return shard_check::corrupt
;
79 // Since FIFO is unsupported, OMAP is the only alternative
80 return shard_check::omap
;
84 tl::expected
<log_type
, bs::error_code
>
85 handle_dne(const DoutPrefixProvider
*dpp
, librados::IoCtx
& ioctx
,
88 bool fifo_unsupported
,
91 if (def
== log_type::fifo
) {
92 if (fifo_unsupported
) {
93 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
94 << " WARNING: FIFO set as default but not supported by OSD. "
95 << "Falling back to OMAP." << dendl
;
96 return log_type::omap
;
98 std::unique_ptr
<rgw::cls::fifo::FIFO
> fifo
;
99 auto r
= rgw::cls::fifo::FIFO::create(dpp
, ioctx
, oid
,
103 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
104 << " error creating FIFO: r=" << r
105 << ", oid=" << oid
<< dendl
;
106 return tl::unexpected(bs::error_code(-r
, bs::system_category()));
113 tl::expected
<log_type
, bs::error_code
>
114 log_backing_type(const DoutPrefixProvider
*dpp
,
115 librados::IoCtx
& ioctx
,
118 const fu2::unique_function
<std::string(int) const>& get_oid
,
121 auto check
= shard_check::dne
;
122 bool fifo_unsupported
= false;
123 for (int i
= 0; i
< shards
; ++i
) {
124 auto c
= probe_shard(dpp
, ioctx
, get_oid(i
), fifo_unsupported
, y
);
125 if (c
== shard_check::corrupt
)
126 return tl::unexpected(bs::error_code(EIO
, bs::system_category()));
127 if (c
== shard_check::dne
) continue;
128 if (check
== shard_check::dne
) {
134 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
135 << " clashing types: check=" << check
136 << ", c=" << c
<< dendl
;
137 return tl::unexpected(bs::error_code(EIO
, bs::system_category()));
140 if (check
== shard_check::corrupt
) {
141 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
142 << " should be unreachable!" << dendl
;
143 return tl::unexpected(bs::error_code(EIO
, bs::system_category()));
146 if (check
== shard_check::dne
)
147 return handle_dne(dpp
, ioctx
,
153 return (check
== shard_check::fifo
? log_type::fifo
: log_type::omap
);
156 bs::error_code
log_remove(const DoutPrefixProvider
*dpp
,
157 librados::IoCtx
& ioctx
,
159 const fu2::unique_function
<std::string(int) const>& get_oid
,
164 for (int i
= 0; i
< shards
; ++i
) {
165 auto oid
= get_oid(i
);
166 rados::cls::fifo::info info
;
167 uint32_t part_header_size
= 0, part_entry_overhead
= 0;
169 auto r
= rgw::cls::fifo::get_meta(dpp
, ioctx
, oid
, std::nullopt
, &info
,
170 &part_header_size
, &part_entry_overhead
,
172 if (r
== -ENOENT
) continue;
173 if (r
== 0 && info
.head_part_num
> -1) {
174 for (auto j
= info
.tail_part_num
; j
<= info
.head_part_num
; ++j
) {
175 librados::ObjectWriteOperation op
;
177 auto part_oid
= info
.part_oid(j
);
178 auto subr
= rgw_rados_operate(dpp
, ioctx
, part_oid
, &op
, null_yield
);
179 if (subr
< 0 && subr
!= -ENOENT
) {
181 ec
= bs::error_code(-subr
, bs::system_category());
182 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
183 << ": failed removing FIFO part: part_oid=" << part_oid
184 << ", subr=" << subr
<< dendl
;
188 if (r
< 0 && r
!= -ENODATA
) {
190 ec
= bs::error_code(-r
, bs::system_category());
191 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
192 << ": failed checking FIFO part: oid=" << oid
193 << ", r=" << r
<< dendl
;
195 librados::ObjectWriteOperation op
;
196 if (i
== 0 && leave_zero
) {
197 // Leave shard 0 in existence, but remove contents and
198 // omap. cls_lock stores things in the xattrs. And sync needs to
199 // rendezvous with locks on generation 0 shard 0.
200 op
.omap_set_header({});
206 r
= rgw_rados_operate(dpp
, ioctx
, oid
, &op
, null_yield
);
207 if (r
< 0 && r
!= -ENOENT
) {
209 ec
= bs::error_code(-r
, bs::system_category());
210 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
211 << ": failed removing shard: oid=" << oid
212 << ", r=" << r
<< dendl
;
218 logback_generations::~logback_generations() {
219 if (watchcookie
> 0) {
220 auto cct
= static_cast<CephContext
*>(ioctx
.cct());
221 auto r
= ioctx
.unwatch2(watchcookie
);
223 lderr(cct
) << __PRETTY_FUNCTION__
<< ":" << __LINE__
224 << ": failed unwatching oid=" << oid
225 << ", r=" << r
<< dendl
;
230 bs::error_code
logback_generations::setup(const DoutPrefixProvider
*dpp
,
232 optional_yield y
) noexcept
236 auto cct
= static_cast<CephContext
*>(ioctx
.cct());
237 auto res
= read(dpp
, y
);
238 if (!res
&& res
.error() != bs::errc::no_such_file_or_directory
) {
242 std::unique_lock
lock(m
);
243 std::tie(entries_
, version
) = std::move(*res
);
245 // Are we the first? Then create generation 0 and the generations
247 librados::ObjectWriteOperation op
;
248 auto type
= log_backing_type(dpp
, ioctx
, def
, shards
,
250 return this->get_oid(0, shard
);
255 logback_generation l
;
258 std::unique_lock
lock(m
);
260 static constexpr auto TAG_LEN
= 24;
262 append_rand_alpha(cct
, version
.tag
, version
.tag
, TAG_LEN
);
264 cls_version_set(op
, version
);
266 entries_
.emplace(0, std::move(l
));
267 encode(entries_
, bl
);
271 auto r
= rgw_rados_operate(dpp
, ioctx
, oid
, &op
, y
);
272 if (r
< 0 && r
!= -EEXIST
) {
273 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
274 << ": failed writing oid=" << oid
275 << ", r=" << r
<< dendl
;
276 bs::system_error(-r
, bs::system_category());
278 // Did someone race us? Then re-read.
283 if (res
->first
.empty())
284 return bs::error_code(EIO
, bs::system_category());
285 auto l
= res
->first
.begin()->second
;
286 // In the unlikely event that someone raced us, created
287 // generation zero, incremented, then erased generation zero,
288 // don't leave generation zero lying around.
290 auto ec
= log_remove(dpp
, ioctx
, shards
,
292 return this->get_oid(0, shard
);
296 std::unique_lock
lock(m
);
297 std::tie(entries_
, version
) = std::move(*res
);
300 // Pass all non-empty generations to the handler
301 std::unique_lock
lock(m
);
302 auto i
= lowest_nomempty(entries_
);
304 std::copy(i
, entries_
.cend(),
305 std::inserter(e
, e
.end()));
309 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
310 << ": failed to re-establish watch, unsafe to continue: oid="
311 << oid
<< ", ec=" << ec
.message() << dendl
;
313 return handle_init(std::move(e
));
314 } catch (const std::bad_alloc
&) {
315 return bs::error_code(ENOMEM
, bs::system_category());
319 bs::error_code
logback_generations::update(const DoutPrefixProvider
*dpp
, optional_yield y
) noexcept
322 auto res
= read(dpp
, y
);
327 std::unique_lock
l(m
);
328 auto& [es
, v
] = *res
;
334 // Check consistency and prepare update
336 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
337 << ": INCONSISTENCY! Read empty update." << dendl
;
338 return bs::error_code(EFAULT
, bs::system_category());
340 auto cur_lowest
= lowest_nomempty(entries_
);
341 // Straight up can't happen
342 assert(cur_lowest
!= entries_
.cend());
343 auto new_lowest
= lowest_nomempty(es
);
344 if (new_lowest
== es
.cend()) {
345 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
346 << ": INCONSISTENCY! Read update with no active head." << dendl
;
347 return bs::error_code(EFAULT
, bs::system_category());
349 if (new_lowest
->first
< cur_lowest
->first
) {
350 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
351 << ": INCONSISTENCY! Tail moved wrong way." << dendl
;
352 return bs::error_code(EFAULT
, bs::system_category());
355 std::optional
<uint64_t> highest_empty
;
356 if (new_lowest
->first
> cur_lowest
->first
&& new_lowest
!= es
.begin()) {
358 highest_empty
= new_lowest
->first
;
361 entries_t new_entries
;
363 if ((es
.end() - 1)->first
< (entries_
.end() - 1)->first
) {
364 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
365 << ": INCONSISTENCY! Head moved wrong way." << dendl
;
366 return bs::error_code(EFAULT
, bs::system_category());
369 if ((es
.end() - 1)->first
> (entries_
.end() - 1)->first
) {
370 auto ei
= es
.lower_bound((entries_
.end() - 1)->first
+ 1);
371 std::copy(ei
, es
.end(), std::inserter(new_entries
, new_entries
.end()));
374 // Everything checks out!
381 auto ec
= handle_empty_to(*highest_empty
);
385 if (!new_entries
.empty()) {
386 auto ec
= handle_new_gens(std::move(new_entries
));
389 } catch (const std::bad_alloc
&) {
390 return bs::error_code(ENOMEM
, bs::system_category());
395 auto logback_generations::read(const DoutPrefixProvider
*dpp
, optional_yield y
) noexcept
->
396 tl::expected
<std::pair
<entries_t
, obj_version
>, bs::error_code
>
399 librados::ObjectReadOperation op
;
400 std::unique_lock
l(m
);
401 cls_version_check(op
, version
, VER_COND_GE
);
404 cls_version_read(op
, &v2
);
406 op
.read(0, 0, &bl
, nullptr);
407 auto r
= rgw_rados_operate(dpp
, ioctx
, oid
, &op
, nullptr, y
);
410 ldpp_dout(dpp
, 5) << __PRETTY_FUNCTION__
<< ":" << __LINE__
412 << " not found" << dendl
;
414 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
415 << ": failed reading oid=" << oid
416 << ", r=" << r
<< dendl
;
418 return tl::unexpected(bs::error_code(-r
, bs::system_category()));
420 auto bi
= bl
.cbegin();
424 } catch (const cb::error
& err
) {
425 return tl::unexpected(err
.code());
427 return std::pair
{ std::move(e
), std::move(v2
) };
428 } catch (const std::bad_alloc
&) {
429 return tl::unexpected(bs::error_code(ENOMEM
, bs::system_category()));
433 bs::error_code
logback_generations::write(const DoutPrefixProvider
*dpp
, entries_t
&& e
,
434 std::unique_lock
<std::mutex
>&& l_
,
435 optional_yield y
) noexcept
437 auto l
= std::move(l_
);
438 ceph_assert(l
.mutex() == &m
&&
441 librados::ObjectWriteOperation op
;
442 cls_version_check(op
, version
, VER_COND_GE
);
447 auto r
= rgw_rados_operate(dpp
, ioctx
, oid
, &op
, y
);
449 entries_
= std::move(e
);
454 if (r
< 0 && r
!= -ECANCELED
) {
455 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
456 << ": failed reading oid=" << oid
457 << ", r=" << r
<< dendl
;
458 return { -r
, bs::system_category() };
460 if (r
== -ECANCELED
) {
461 auto ec
= update(dpp
, y
);
465 return { ECANCELED
, bs::system_category() };
468 } catch (const std::bad_alloc
&) {
469 return { ENOMEM
, bs::system_category() };
475 bs::error_code
logback_generations::watch() noexcept
{
477 auto cct
= static_cast<CephContext
*>(ioctx
.cct());
478 auto r
= ioctx
.watch2(oid
, &watchcookie
, this);
480 lderr(cct
) << __PRETTY_FUNCTION__
<< ":" << __LINE__
481 << ": failed to set watch oid=" << oid
482 << ", r=" << r
<< dendl
;
483 return { -r
, bs::system_category() };
485 } catch (const std::bad_alloc
&) {
486 return bs::error_code(ENOMEM
, bs::system_category());
491 bs::error_code
logback_generations::new_backing(const DoutPrefixProvider
*dpp
,
493 optional_yield y
) noexcept
{
494 static constexpr auto max_tries
= 10;
496 auto ec
= update(dpp
, y
);
499 entries_t new_entries
;
501 std::unique_lock
l(m
);
502 auto last
= entries_
.end() - 1;
503 if (last
->second
.type
== type
) {
504 // Nothing to be done
507 auto newgenid
= last
->first
+ 1;
508 logback_generation newgen
;
509 newgen
.gen_id
= newgenid
;
511 new_entries
.emplace(newgenid
, newgen
);
513 es
.emplace(newgenid
, std::move(newgen
));
514 ec
= write(dpp
, std::move(es
), std::move(l
), y
);
516 } while (ec
== bs::errc::operation_canceled
&&
518 if (tries
>= max_tries
) {
519 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
520 << ": exhausted retry attempts." << dendl
;
525 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
526 << ": write failed with ec=" << ec
.message() << dendl
;
532 auto r
= rgw_rados_notify(dpp
, ioctx
, oid
, bl
, 10'000, &rbl
, y
);
534 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
535 << ": notify failed with r=" << r
<< dendl
;
536 return { -r
, bs::system_category() };
538 ec
= handle_new_gens(new_entries
);
539 } catch (const std::bad_alloc
&) {
540 return bs::error_code(ENOMEM
, bs::system_category());
545 bs::error_code
logback_generations::empty_to(const DoutPrefixProvider
*dpp
,
547 optional_yield y
) noexcept
{
548 static constexpr auto max_tries
= 10;
550 auto ec
= update(dpp
, y
);
553 uint64_t newtail
= 0;
555 std::unique_lock
l(m
);
557 auto last
= entries_
.end() - 1;
558 if (gen_id
>= last
->first
) {
559 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
560 << ": Attempt to trim beyond the possible." << dendl
;
561 return bs::error_code(EINVAL
, bs::system_category());
565 auto ei
= es
.upper_bound(gen_id
);
566 if (ei
== es
.begin()) {
567 // Nothing to be done.
570 for (auto i
= es
.begin(); i
< ei
; ++i
) {
572 i
->second
.pruned
= ceph::real_clock::now();
574 ec
= write(dpp
, std::move(es
), std::move(l
), y
);
576 } while (ec
== bs::errc::operation_canceled
&&
578 if (tries
>= max_tries
) {
579 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
580 << ": exhausted retry attempts." << dendl
;
585 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
586 << ": write failed with ec=" << ec
.message() << dendl
;
592 auto r
= rgw_rados_notify(dpp
, ioctx
, oid
, bl
, 10'000, &rbl
, y
);
594 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
595 << ": notify failed with r=" << r
<< dendl
;
596 return { -r
, bs::system_category() };
598 ec
= handle_empty_to(newtail
);
599 } catch (const std::bad_alloc
&) {
600 return bs::error_code(ENOMEM
, bs::system_category());
605 bs::error_code
logback_generations::remove_empty(const DoutPrefixProvider
*dpp
, optional_yield y
) noexcept
{
606 static constexpr auto max_tries
= 10;
608 auto ec
= update(dpp
, y
);
611 entries_t new_entries
;
612 std::unique_lock
l(m
);
613 ceph_assert(!entries_
.empty());
615 auto i
= lowest_nomempty(entries_
);
616 if (i
== entries_
.begin()) {
621 auto now
= ceph::real_clock::now();
624 std::copy_if(entries_
.cbegin(), entries_
.cend(),
625 std::inserter(es
, es
.end()),
626 [now
](const auto& e
) {
627 if (!e
.second
.pruned
)
630 auto pruned
= *e
.second
.pruned
;
631 return (now
- pruned
) >= 1h
;
634 for (const auto& [gen_id
, e
] : es
) {
635 ceph_assert(e
.pruned
);
636 auto ec
= log_remove(dpp
, ioctx
, shards
,
637 [this, gen_id
= gen_id
](int shard
) {
638 return this->get_oid(gen_id
, shard
);
639 }, (gen_id
== 0), y
);
641 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
642 << ": Error pruning: gen_id=" << gen_id
643 << " ec=" << ec
.message() << dendl
;
645 if (auto i
= es2
.find(gen_id
); i
!= es2
.end()) {
651 ec
= write(dpp
, std::move(es2
), std::move(l
), y
);
653 } while (ec
== bs::errc::operation_canceled
&&
655 if (tries
>= max_tries
) {
656 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
657 << ": exhausted retry attempts." << dendl
;
662 ldpp_dout(dpp
, -1) << __PRETTY_FUNCTION__
<< ":" << __LINE__
663 << ": write failed with ec=" << ec
.message() << dendl
;
666 } catch (const std::bad_alloc
&) {
667 return bs::error_code(ENOMEM
, bs::system_category());
672 void logback_generations::handle_notify(uint64_t notify_id
,
674 uint64_t notifier_id
,
677 auto cct
= static_cast<CephContext
*>(ioctx
.cct());
678 const DoutPrefix
dp(cct
, dout_subsys
, "logback generations handle_notify: ");
679 if (notifier_id
!= my_id
) {
680 auto ec
= update(&dp
, null_yield
);
683 << __PRETTY_FUNCTION__
<< ":" << __LINE__
684 << ": update failed, no one to report to and no safe way to continue."
690 ioctx
.notify_ack(oid
, notify_id
, watchcookie
, rbl
);
693 void logback_generations::handle_error(uint64_t cookie
, int err
) {
694 auto cct
= static_cast<CephContext
*>(ioctx
.cct());
695 auto r
= ioctx
.unwatch2(watchcookie
);
697 lderr(cct
) << __PRETTY_FUNCTION__
<< ":" << __LINE__
698 << ": failed to set unwatch oid=" << oid
699 << ", r=" << r
<< dendl
;
704 lderr(cct
) << __PRETTY_FUNCTION__
<< ":" << __LINE__
705 << ": failed to re-establish watch, unsafe to continue: oid="
706 << oid
<< ", ec=" << ec
.message() << dendl
;