]> git.proxmox.com Git - ceph.git/blob - ceph/src/osd/PrimaryLogPG.h
import ceph nautilus 14.2.2
[ceph.git] / ceph / src / osd / PrimaryLogPG.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 /*
3 * Ceph - scalable distributed file system
4 *
5 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
6 * Copyright (C) 2013 Cloudwatt <libre.licensing@cloudwatt.com>
7 *
8 * Author: Loic Dachary <loic@dachary.org>
9 *
10 * This is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License version 2.1, as published by the Free Software
13 * Foundation. See file COPYING.
14 *
15 */
16
17 #ifndef CEPH_REPLICATEDPG_H
18 #define CEPH_REPLICATEDPG_H
19
20 #include <boost/tuple/tuple.hpp>
21 #include "include/ceph_assert.h"
22 #include "DynamicPerfStats.h"
23 #include "OSD.h"
24 #include "PG.h"
25 #include "Watch.h"
26 #include "TierAgentState.h"
27 #include "messages/MOSDOpReply.h"
28 #include "common/Checksummer.h"
29 #include "common/sharedptr_registry.hpp"
30 #include "common/shared_cache.hpp"
31 #include "ReplicatedBackend.h"
32 #include "PGTransaction.h"
33 #include "cls/cas/cls_cas_ops.h"
34
35 class CopyFromCallback;
36 class PromoteCallback;
37
38 class PrimaryLogPG;
39 class PGLSFilter;
40 class HitSet;
41 struct TierAgentState;
42 class MOSDOp;
43 class MOSDOpReply;
44 class OSDService;
45
46 void intrusive_ptr_add_ref(PrimaryLogPG *pg);
47 void intrusive_ptr_release(PrimaryLogPG *pg);
48 uint64_t get_with_id(PrimaryLogPG *pg);
49 void put_with_id(PrimaryLogPG *pg, uint64_t id);
50
51 #ifdef PG_DEBUG_REFS
52 typedef TrackedIntPtr<PrimaryLogPG> PrimaryLogPGRef;
53 #else
54 typedef boost::intrusive_ptr<PrimaryLogPG> PrimaryLogPGRef;
55 #endif
56
57 struct inconsistent_snapset_wrapper;
58
59 class PrimaryLogPG : public PG, public PGBackend::Listener {
60 friend class OSD;
61 friend class Watch;
62
63 public:
64 MEMPOOL_CLASS_HELPERS();
65
66 /*
67 * state associated with a copy operation
68 */
69 struct OpContext;
70 class CopyCallback;
71
72 /**
73 * CopyResults stores the object metadata of interest to a copy initiator.
74 */
75 struct CopyResults {
76 ceph::real_time mtime; ///< the copy source's mtime
77 uint64_t object_size; ///< the copied object's size
78 bool started_temp_obj; ///< true if the callback needs to delete temp object
79 hobject_t temp_oid; ///< temp object (if any)
80
81 /**
82 * Function to fill in transaction; if non-empty the callback
83 * must execute it before any other accesses to the object
84 * (in order to complete the copy).
85 */
86 std::function<void(PGTransaction *)> fill_in_final_tx;
87
88 version_t user_version; ///< The copy source's user version
89 bool should_requeue; ///< op should be requeued on cancel
90 vector<snapid_t> snaps; ///< src's snaps (if clone)
91 snapid_t snap_seq; ///< src's snap_seq (if head)
92 librados::snap_set_t snapset; ///< src snapset (if head)
93 bool mirror_snapset;
94 bool has_omap;
95 uint32_t flags; // object_copy_data_t::FLAG_*
96 uint32_t source_data_digest, source_omap_digest;
97 uint32_t data_digest, omap_digest;
98 mempool::osd_pglog::vector<pair<osd_reqid_t, version_t> > reqids; // [(reqid, user_version)]
99 mempool::osd_pglog::map<uint32_t, int> reqid_return_codes; // map reqids by index to error code
100 map<string, bufferlist> attrs; // xattrs
101 uint64_t truncate_seq;
102 uint64_t truncate_size;
103 bool is_data_digest() {
104 return flags & object_copy_data_t::FLAG_DATA_DIGEST;
105 }
106 bool is_omap_digest() {
107 return flags & object_copy_data_t::FLAG_OMAP_DIGEST;
108 }
109 CopyResults()
110 : object_size(0), started_temp_obj(false),
111 user_version(0),
112 should_requeue(false), mirror_snapset(false),
113 has_omap(false),
114 flags(0),
115 source_data_digest(-1), source_omap_digest(-1),
116 data_digest(-1), omap_digest(-1),
117 truncate_seq(0), truncate_size(0)
118 {}
119 };
120
121 struct CopyOp;
122 typedef std::shared_ptr<CopyOp> CopyOpRef;
123
124 struct CopyOp {
125 CopyCallback *cb;
126 ObjectContextRef obc;
127 hobject_t src;
128 object_locator_t oloc;
129 unsigned flags;
130 bool mirror_snapset;
131
132 CopyResults results;
133
134 ceph_tid_t objecter_tid;
135 ceph_tid_t objecter_tid2;
136
137 object_copy_cursor_t cursor;
138 map<string,bufferlist> attrs;
139 bufferlist data;
140 bufferlist omap_header;
141 bufferlist omap_data;
142 int rval;
143
144 object_copy_cursor_t temp_cursor;
145
146 /*
147 * For CopyOp the process is:
148 * step1: read the data(attr/omap/data) from the source object
149 * step2: handle those data(w/ those data create a new object)
150 * src_obj_fadvise_flags used in step1;
151 * dest_obj_fadvise_flags used in step2
152 */
153 unsigned src_obj_fadvise_flags;
154 unsigned dest_obj_fadvise_flags;
155
156 map<uint64_t, CopyOpRef> chunk_cops;
157 int num_chunk;
158 bool failed;
159 uint64_t start_offset = 0;
160 uint64_t last_offset = 0;
161 vector<OSDOp> chunk_ops;
162
163 CopyOp(CopyCallback *cb_, ObjectContextRef _obc, hobject_t s,
164 object_locator_t l,
165 version_t v,
166 unsigned f,
167 bool ms,
168 unsigned src_obj_fadvise_flags,
169 unsigned dest_obj_fadvise_flags)
170 : cb(cb_), obc(_obc), src(s), oloc(l), flags(f),
171 mirror_snapset(ms),
172 objecter_tid(0),
173 objecter_tid2(0),
174 rval(-1),
175 src_obj_fadvise_flags(src_obj_fadvise_flags),
176 dest_obj_fadvise_flags(dest_obj_fadvise_flags),
177 num_chunk(0),
178 failed(false)
179 {
180 results.user_version = v;
181 results.mirror_snapset = mirror_snapset;
182 }
183 };
184
185 /**
186 * The CopyCallback class defines an interface for completions to the
187 * copy_start code. Users of the copy infrastructure must implement
188 * one and give an instance of the class to start_copy.
189 *
190 * The implementer is responsible for making sure that the CopyCallback
191 * can associate itself with the correct copy operation.
192 */
193 typedef boost::tuple<int, CopyResults*> CopyCallbackResults;
194
195 friend class CopyFromCallback;
196 friend class CopyFromFinisher;
197 friend class PromoteCallback;
198 friend class PromoteFinisher;
199
200 struct ProxyReadOp {
201 OpRequestRef op;
202 hobject_t soid;
203 ceph_tid_t objecter_tid;
204 vector<OSDOp> &ops;
205 version_t user_version;
206 int data_offset;
207 bool canceled; ///< true if canceled
208
209 ProxyReadOp(OpRequestRef _op, hobject_t oid, vector<OSDOp>& _ops)
210 : op(_op), soid(oid),
211 objecter_tid(0), ops(_ops),
212 user_version(0), data_offset(0),
213 canceled(false) { }
214 };
215 typedef std::shared_ptr<ProxyReadOp> ProxyReadOpRef;
216
217 struct ProxyWriteOp {
218 OpContext *ctx;
219 OpRequestRef op;
220 hobject_t soid;
221 ceph_tid_t objecter_tid;
222 vector<OSDOp> &ops;
223 version_t user_version;
224 bool sent_reply;
225 utime_t mtime;
226 bool canceled;
227 osd_reqid_t reqid;
228
229 ProxyWriteOp(OpRequestRef _op, hobject_t oid, vector<OSDOp>& _ops, osd_reqid_t _reqid)
230 : ctx(NULL), op(_op), soid(oid),
231 objecter_tid(0), ops(_ops),
232 user_version(0), sent_reply(false),
233 canceled(false),
234 reqid(_reqid) { }
235 };
236 typedef std::shared_ptr<ProxyWriteOp> ProxyWriteOpRef;
237
238 struct FlushOp {
239 ObjectContextRef obc; ///< obc we are flushing
240 OpRequestRef op; ///< initiating op
241 list<OpRequestRef> dup_ops; ///< bandwagon jumpers
242 version_t flushed_version; ///< user version we are flushing
243 ceph_tid_t objecter_tid; ///< copy-from request tid
244 int rval; ///< copy-from result
245 bool blocking; ///< whether we are blocking updates
246 bool removal; ///< we are removing the backend object
247 boost::optional<std::function<void()>> on_flush; ///< callback, may be null
248 // for chunked object
249 map<uint64_t, int> io_results;
250 map<uint64_t, ceph_tid_t> io_tids;
251 uint64_t chunks;
252
253 FlushOp()
254 : flushed_version(0), objecter_tid(0), rval(0),
255 blocking(false), removal(false), chunks(0) {}
256 ~FlushOp() { ceph_assert(!on_flush); }
257 };
258 typedef std::shared_ptr<FlushOp> FlushOpRef;
259
260 boost::scoped_ptr<PGBackend> pgbackend;
261 PGBackend *get_pgbackend() override {
262 return pgbackend.get();
263 }
264
265 const PGBackend *get_pgbackend() const override {
266 return pgbackend.get();
267 }
268
269 /// Listener methods
270 DoutPrefixProvider *get_dpp() override {
271 return this;
272 }
273
274 void on_local_recover(
275 const hobject_t &oid,
276 const ObjectRecoveryInfo &recovery_info,
277 ObjectContextRef obc,
278 bool is_delete,
279 ObjectStore::Transaction *t
280 ) override;
281 void on_peer_recover(
282 pg_shard_t peer,
283 const hobject_t &oid,
284 const ObjectRecoveryInfo &recovery_info
285 ) override;
286 void begin_peer_recover(
287 pg_shard_t peer,
288 const hobject_t oid) override;
289 void on_global_recover(
290 const hobject_t &oid,
291 const object_stat_sum_t &stat_diff,
292 bool is_delete) override;
293 void failed_push(const list<pg_shard_t> &from,
294 const hobject_t &soid,
295 const eversion_t &need = eversion_t()) override;
296 void primary_failed(const hobject_t &soid) override;
297 bool primary_error(const hobject_t& soid, eversion_t v) override;
298 void cancel_pull(const hobject_t &soid) override;
299 void apply_stats(
300 const hobject_t &soid,
301 const object_stat_sum_t &delta_stats) override;
302 void on_primary_error(const hobject_t &oid, eversion_t v) override;
303 void backfill_add_missing(const hobject_t &oid, eversion_t v) override;
304 void remove_missing_object(const hobject_t &oid,
305 eversion_t v,
306 Context *on_complete) override;
307
308 template<class T> class BlessedGenContext;
309 template<class T> class UnlockedBlessedGenContext;
310 class BlessedContext;
311 Context *bless_context(Context *c) override;
312
313 GenContext<ThreadPool::TPHandle&> *bless_gencontext(
314 GenContext<ThreadPool::TPHandle&> *c) override;
315 GenContext<ThreadPool::TPHandle&> *bless_unlocked_gencontext(
316 GenContext<ThreadPool::TPHandle&> *c) override;
317
318 void send_message(int to_osd, Message *m) override {
319 osd->send_message_osd_cluster(to_osd, m, get_osdmap_epoch());
320 }
321 void queue_transaction(ObjectStore::Transaction&& t,
322 OpRequestRef op) override {
323 osd->store->queue_transaction(ch, std::move(t), op);
324 }
325 void queue_transactions(vector<ObjectStore::Transaction>& tls,
326 OpRequestRef op) override {
327 osd->store->queue_transactions(ch, tls, op, NULL);
328 }
329 epoch_t get_interval_start_epoch() const override {
330 return info.history.same_interval_since;
331 }
332 epoch_t get_last_peering_reset_epoch() const override {
333 return get_last_peering_reset();
334 }
335 const set<pg_shard_t> &get_acting_recovery_backfill_shards() const override {
336 return acting_recovery_backfill;
337 }
338 const set<pg_shard_t> &get_acting_shards() const override {
339 return actingset;
340 }
341 const set<pg_shard_t> &get_backfill_shards() const override {
342 return backfill_targets;
343 }
344
345 std::ostream& gen_dbg_prefix(std::ostream& out) const override {
346 return gen_prefix(out);
347 }
348
349 const map<hobject_t, set<pg_shard_t>>
350 &get_missing_loc_shards() const override {
351 return missing_loc.get_missing_locs();
352 }
353 const map<pg_shard_t, pg_missing_t> &get_shard_missing() const override {
354 return peer_missing;
355 }
356 using PGBackend::Listener::get_shard_missing;
357 const map<pg_shard_t, pg_info_t> &get_shard_info() const override {
358 return peer_info;
359 }
360 using PGBackend::Listener::get_shard_info;
361 const pg_missing_tracker_t &get_local_missing() const override {
362 return pg_log.get_missing();
363 }
364 const PGLog &get_log() const override {
365 return pg_log;
366 }
367 void add_local_next_event(const pg_log_entry_t& e) override {
368 pg_log.missing_add_next_entry(e);
369 }
370 bool pgb_is_primary() const override {
371 return is_primary();
372 }
373 const OSDMapRef& pgb_get_osdmap() const override final {
374 return get_osdmap();
375 }
376 epoch_t pgb_get_osdmap_epoch() const override final {
377 return get_osdmap_epoch();
378 }
379 const pg_info_t &get_info() const override {
380 return info;
381 }
382 const pg_pool_t &get_pool() const override {
383 return pool.info;
384 }
385
386 ObjectContextRef get_obc(
387 const hobject_t &hoid,
388 const map<string, bufferlist> &attrs) override {
389 return get_object_context(hoid, true, &attrs);
390 }
391
392 bool try_lock_for_read(
393 const hobject_t &hoid,
394 ObcLockManager &manager) override {
395 if (is_missing_object(hoid))
396 return false;
397 auto obc = get_object_context(hoid, false, nullptr);
398 if (!obc)
399 return false;
400 return manager.try_get_read_lock(hoid, obc);
401 }
402
403 void release_locks(ObcLockManager &manager) override {
404 release_object_locks(manager);
405 }
406
407 bool pg_is_repair() override {
408 return is_repair();
409 }
410 void inc_osd_stat_repaired() override {
411 osd->inc_osd_stat_repaired();
412 }
413 bool pg_is_remote_backfilling() override {
414 return is_remote_backfilling();
415 }
416 void pg_add_local_num_bytes(int64_t num_bytes) override {
417 add_local_num_bytes(num_bytes);
418 }
419 void pg_sub_local_num_bytes(int64_t num_bytes) override {
420 sub_local_num_bytes(num_bytes);
421 }
422 void pg_add_num_bytes(int64_t num_bytes) override {
423 add_num_bytes(num_bytes);
424 }
425 void pg_sub_num_bytes(int64_t num_bytes) override {
426 sub_num_bytes(num_bytes);
427 }
428
429 void pgb_set_object_snap_mapping(
430 const hobject_t &soid,
431 const set<snapid_t> &snaps,
432 ObjectStore::Transaction *t) override {
433 return update_object_snap_mapping(t, soid, snaps);
434 }
435 void pgb_clear_object_snap_mapping(
436 const hobject_t &soid,
437 ObjectStore::Transaction *t) override {
438 return clear_object_snap_mapping(t, soid);
439 }
440
441 void log_operation(
442 const vector<pg_log_entry_t> &logv,
443 const boost::optional<pg_hit_set_history_t> &hset_history,
444 const eversion_t &trim_to,
445 const eversion_t &roll_forward_to,
446 bool transaction_applied,
447 ObjectStore::Transaction &t,
448 bool async = false) override {
449 if (hset_history) {
450 info.hit_set = *hset_history;
451 }
452 append_log(logv, trim_to, roll_forward_to, t, transaction_applied, async);
453 }
454
455 void op_applied(const eversion_t &applied_version) override;
456
457 bool should_send_op(
458 pg_shard_t peer,
459 const hobject_t &hoid) override;
460
461 bool pg_is_undersized() const override {
462 return is_undersized();
463 }
464
465 bool pg_is_repair() const override {
466 return is_repair();
467 }
468
469 void update_peer_last_complete_ondisk(
470 pg_shard_t fromosd,
471 eversion_t lcod) override {
472 peer_last_complete_ondisk[fromosd] = lcod;
473 }
474
475 void update_last_complete_ondisk(
476 eversion_t lcod) override {
477 last_complete_ondisk = lcod;
478 }
479
480 void update_stats(
481 const pg_stat_t &stat) override {
482 info.stats = stat;
483 }
484
485 void schedule_recovery_work(
486 GenContext<ThreadPool::TPHandle&> *c) override;
487
488 pg_shard_t whoami_shard() const override {
489 return pg_whoami;
490 }
491 spg_t primary_spg_t() const override {
492 return spg_t(info.pgid.pgid, primary.shard);
493 }
494 pg_shard_t primary_shard() const override {
495 return primary;
496 }
497
498 void send_message_osd_cluster(
499 int peer, Message *m, epoch_t from_epoch) override;
500 void send_message_osd_cluster(
501 Message *m, Connection *con) override;
502 void send_message_osd_cluster(
503 Message *m, const ConnectionRef& con) override;
504 ConnectionRef get_con_osd_cluster(int peer, epoch_t from_epoch) override;
505 entity_name_t get_cluster_msgr_name() override {
506 return osd->get_cluster_msgr_name();
507 }
508
509 PerfCounters *get_logger() override;
510
511 ceph_tid_t get_tid() override { return osd->get_tid(); }
512
513 LogClientTemp clog_error() override { return osd->clog->error(); }
514 LogClientTemp clog_warn() override { return osd->clog->warn(); }
515
516 struct watch_disconnect_t {
517 uint64_t cookie;
518 entity_name_t name;
519 bool send_disconnect;
520 watch_disconnect_t(uint64_t c, entity_name_t n, bool sd)
521 : cookie(c), name(n), send_disconnect(sd) {}
522 };
523 void complete_disconnect_watches(
524 ObjectContextRef obc,
525 const list<watch_disconnect_t> &to_disconnect);
526
527 struct OpFinisher {
528 virtual ~OpFinisher() {
529 }
530
531 virtual int execute() = 0;
532 };
533
534 /*
535 * Capture all object state associated with an in-progress read or write.
536 */
537 struct OpContext {
538 OpRequestRef op;
539 osd_reqid_t reqid;
540 vector<OSDOp> *ops;
541
542 const ObjectState *obs; // Old objectstate
543 const SnapSet *snapset; // Old snapset
544
545 ObjectState new_obs; // resulting ObjectState
546 SnapSet new_snapset; // resulting SnapSet (in case of a write)
547 //pg_stat_t new_stats; // resulting Stats
548 object_stat_sum_t delta_stats;
549
550 bool modify; // (force) modification (even if op_t is empty)
551 bool user_modify; // user-visible modification
552 bool undirty; // user explicitly un-dirtying this object
553 bool cache_evict; ///< true if this is a cache eviction
554 bool ignore_cache; ///< true if IGNORE_CACHE flag is set
555 bool ignore_log_op_stats; // don't log op stats
556 bool update_log_only; ///< this is a write that returned an error - just record in pg log for dup detection
557
558 // side effects
559 list<pair<watch_info_t,bool> > watch_connects; ///< new watch + will_ping flag
560 list<watch_disconnect_t> watch_disconnects; ///< old watch + send_discon
561 list<notify_info_t> notifies;
562 struct NotifyAck {
563 boost::optional<uint64_t> watch_cookie;
564 uint64_t notify_id;
565 bufferlist reply_bl;
566 explicit NotifyAck(uint64_t notify_id) : notify_id(notify_id) {}
567 NotifyAck(uint64_t notify_id, uint64_t cookie, bufferlist& rbl)
568 : watch_cookie(cookie), notify_id(notify_id) {
569 reply_bl.claim(rbl);
570 }
571 };
572 list<NotifyAck> notify_acks;
573
574 uint64_t bytes_written, bytes_read;
575
576 utime_t mtime;
577 SnapContext snapc; // writer snap context
578 eversion_t at_version; // pg's current version pointer
579 version_t user_at_version; // pg's current user version pointer
580
581 /// index of the current subop - only valid inside of do_osd_ops()
582 int current_osd_subop_num;
583 /// total number of subops processed in this context for cls_cxx_subop_version()
584 int processed_subop_count = 0;
585
586 PGTransactionUPtr op_t;
587 vector<pg_log_entry_t> log;
588 boost::optional<pg_hit_set_history_t> updated_hset_history;
589
590 interval_set<uint64_t> modified_ranges;
591 ObjectContextRef obc;
592 ObjectContextRef clone_obc; // if we created a clone
593 ObjectContextRef head_obc; // if we also update snapset (see trim_object)
594
595 // FIXME: we may want to kill this msgr hint off at some point!
596 boost::optional<int> data_off = boost::none;
597
598 MOSDOpReply *reply;
599
600 PrimaryLogPG *pg;
601
602 int num_read; ///< count read ops
603 int num_write; ///< count update ops
604
605 mempool::osd_pglog::vector<pair<osd_reqid_t, version_t> > extra_reqids;
606 mempool::osd_pglog::map<uint32_t, int> extra_reqid_return_codes;
607
608 hobject_t new_temp_oid, discard_temp_oid; ///< temp objects we should start/stop tracking
609
610 list<std::function<void()>> on_applied;
611 list<std::function<void()>> on_committed;
612 list<std::function<void()>> on_finish;
613 list<std::function<void()>> on_success;
614 template <typename F>
615 void register_on_finish(F &&f) {
616 on_finish.emplace_back(std::forward<F>(f));
617 }
618 template <typename F>
619 void register_on_success(F &&f) {
620 on_success.emplace_back(std::forward<F>(f));
621 }
622 template <typename F>
623 void register_on_applied(F &&f) {
624 on_applied.emplace_back(std::forward<F>(f));
625 }
626 template <typename F>
627 void register_on_commit(F &&f) {
628 on_committed.emplace_back(std::forward<F>(f));
629 }
630
631 bool sent_reply = false;
632
633 // pending async reads <off, len, op_flags> -> <outbl, outr>
634 list<pair<boost::tuple<uint64_t, uint64_t, unsigned>,
635 pair<bufferlist*, Context*> > > pending_async_reads;
636 int inflightreads;
637 friend struct OnReadComplete;
638 void start_async_reads(PrimaryLogPG *pg);
639 void finish_read(PrimaryLogPG *pg);
640 bool async_reads_complete() {
641 return inflightreads == 0;
642 }
643
644 ObjectContext::RWState::State lock_type;
645 ObcLockManager lock_manager;
646
647 std::map<int, std::unique_ptr<OpFinisher>> op_finishers;
648
649 OpContext(const OpContext& other);
650 const OpContext& operator=(const OpContext& other);
651
652 OpContext(OpRequestRef _op, osd_reqid_t _reqid, vector<OSDOp>* _ops,
653 ObjectContextRef& obc,
654 PrimaryLogPG *_pg) :
655 op(_op), reqid(_reqid), ops(_ops),
656 obs(&obc->obs),
657 snapset(0),
658 new_obs(obs->oi, obs->exists),
659 modify(false), user_modify(false), undirty(false), cache_evict(false),
660 ignore_cache(false), ignore_log_op_stats(false), update_log_only(false),
661 bytes_written(0), bytes_read(0), user_at_version(0),
662 current_osd_subop_num(0),
663 obc(obc),
664 reply(NULL), pg(_pg),
665 num_read(0),
666 num_write(0),
667 sent_reply(false),
668 inflightreads(0),
669 lock_type(ObjectContext::RWState::RWNONE) {
670 if (obc->ssc) {
671 new_snapset = obc->ssc->snapset;
672 snapset = &obc->ssc->snapset;
673 }
674 }
675 OpContext(OpRequestRef _op, osd_reqid_t _reqid,
676 vector<OSDOp>* _ops, PrimaryLogPG *_pg) :
677 op(_op), reqid(_reqid), ops(_ops), obs(NULL), snapset(0),
678 modify(false), user_modify(false), undirty(false), cache_evict(false),
679 ignore_cache(false), ignore_log_op_stats(false), update_log_only(false),
680 bytes_written(0), bytes_read(0), user_at_version(0),
681 current_osd_subop_num(0),
682 reply(NULL), pg(_pg),
683 num_read(0),
684 num_write(0),
685 inflightreads(0),
686 lock_type(ObjectContext::RWState::RWNONE) {}
687 void reset_obs(ObjectContextRef obc) {
688 new_obs = ObjectState(obc->obs.oi, obc->obs.exists);
689 if (obc->ssc) {
690 new_snapset = obc->ssc->snapset;
691 snapset = &obc->ssc->snapset;
692 }
693 }
694 ~OpContext() {
695 ceph_assert(!op_t);
696 if (reply)
697 reply->put();
698 for (list<pair<boost::tuple<uint64_t, uint64_t, unsigned>,
699 pair<bufferlist*, Context*> > >::iterator i =
700 pending_async_reads.begin();
701 i != pending_async_reads.end();
702 pending_async_reads.erase(i++)) {
703 delete i->second.second;
704 }
705 }
706 uint64_t get_features() {
707 if (op && op->get_req()) {
708 return op->get_req()->get_connection()->get_features();
709 }
710 return -1ull;
711 }
712 };
713 using OpContextUPtr = std::unique_ptr<OpContext>;
714 friend struct OpContext;
715
716 /*
717 * State on the PG primary associated with the replicated mutation
718 */
719 class RepGather {
720 public:
721 hobject_t hoid;
722 OpRequestRef op;
723 xlist<RepGather*>::item queue_item;
724 int nref;
725
726 eversion_t v;
727 int r = 0;
728
729 ceph_tid_t rep_tid;
730
731 bool rep_aborted;
732 bool all_committed;
733
734 utime_t start;
735
736 eversion_t pg_local_last_complete;
737
738 ObcLockManager lock_manager;
739
740 list<std::function<void()>> on_committed;
741 list<std::function<void()>> on_success;
742 list<std::function<void()>> on_finish;
743
744 RepGather(
745 OpContext *c, ceph_tid_t rt,
746 eversion_t lc) :
747 hoid(c->obc->obs.oi.soid),
748 op(c->op),
749 queue_item(this),
750 nref(1),
751 rep_tid(rt),
752 rep_aborted(false),
753 all_committed(false),
754 pg_local_last_complete(lc),
755 lock_manager(std::move(c->lock_manager)),
756 on_committed(std::move(c->on_committed)),
757 on_success(std::move(c->on_success)),
758 on_finish(std::move(c->on_finish)) {}
759
760 RepGather(
761 ObcLockManager &&manager,
762 OpRequestRef &&o,
763 boost::optional<std::function<void(void)> > &&on_complete,
764 ceph_tid_t rt,
765 eversion_t lc,
766 int r) :
767 op(o),
768 queue_item(this),
769 nref(1),
770 r(r),
771 rep_tid(rt),
772 rep_aborted(false),
773 all_committed(false),
774 pg_local_last_complete(lc),
775 lock_manager(std::move(manager)) {
776 if (on_complete) {
777 on_success.push_back(std::move(*on_complete));
778 }
779 }
780
781 RepGather *get() {
782 nref++;
783 return this;
784 }
785 void put() {
786 ceph_assert(nref > 0);
787 if (--nref == 0) {
788 delete this;
789 //generic_dout(0) << "deleting " << this << dendl;
790 }
791 }
792 };
793
794
795 protected:
796
797 /**
798 * Grabs locks for OpContext, should be cleaned up in close_op_ctx
799 *
800 * @param ctx [in,out] ctx to get locks for
801 * @return true on success, false if we are queued
802 */
803 bool get_rw_locks(bool write_ordered, OpContext *ctx) {
804 /* If head_obc, !obc->obs->exists and we will always take the
805 * snapdir lock *before* the head lock. Since all callers will do
806 * this (read or write) if we get the first we will be guaranteed
807 * to get the second.
808 */
809 if (write_ordered && ctx->op->may_read()) {
810 ctx->lock_type = ObjectContext::RWState::RWEXCL;
811 } else if (write_ordered) {
812 ctx->lock_type = ObjectContext::RWState::RWWRITE;
813 } else {
814 ceph_assert(ctx->op->may_read());
815 ctx->lock_type = ObjectContext::RWState::RWREAD;
816 }
817
818 if (ctx->head_obc) {
819 ceph_assert(!ctx->obc->obs.exists);
820 if (!ctx->lock_manager.get_lock_type(
821 ctx->lock_type,
822 ctx->head_obc->obs.oi.soid,
823 ctx->head_obc,
824 ctx->op)) {
825 ctx->lock_type = ObjectContext::RWState::RWNONE;
826 return false;
827 }
828 }
829 if (ctx->lock_manager.get_lock_type(
830 ctx->lock_type,
831 ctx->obc->obs.oi.soid,
832 ctx->obc,
833 ctx->op)) {
834 return true;
835 } else {
836 ceph_assert(!ctx->head_obc);
837 ctx->lock_type = ObjectContext::RWState::RWNONE;
838 return false;
839 }
840 }
841
842 /**
843 * Cleans up OpContext
844 *
845 * @param ctx [in] ctx to clean up
846 */
847 void close_op_ctx(OpContext *ctx);
848
849 /**
850 * Releases locks
851 *
852 * @param manager [in] manager with locks to release
853 */
854 void release_object_locks(
855 ObcLockManager &lock_manager) {
856 list<pair<ObjectContextRef, list<OpRequestRef> > > to_req;
857 bool requeue_recovery = false;
858 bool requeue_snaptrim = false;
859 lock_manager.put_locks(
860 &to_req,
861 &requeue_recovery,
862 &requeue_snaptrim);
863 if (requeue_recovery)
864 queue_recovery();
865 if (requeue_snaptrim)
866 snap_trimmer_machine.process_event(TrimWriteUnblocked());
867
868 if (!to_req.empty()) {
869 // requeue at front of scrub blocking queue if we are blocked by scrub
870 for (auto &&p: to_req) {
871 if (write_blocked_by_scrub(p.first->obs.oi.soid.get_head())) {
872 for (auto& op : p.second) {
873 op->mark_delayed("waiting for scrub");
874 }
875
876 waiting_for_scrub.splice(
877 waiting_for_scrub.begin(),
878 p.second,
879 p.second.begin(),
880 p.second.end());
881 } else {
882 requeue_ops(p.second);
883 }
884 }
885 }
886 }
887
888 // replica ops
889 // [primary|tail]
890 xlist<RepGather*> repop_queue;
891
892 friend class C_OSD_RepopCommit;
893 void repop_all_committed(RepGather *repop);
894 void eval_repop(RepGather*);
895 void issue_repop(RepGather *repop, OpContext *ctx);
896 RepGather *new_repop(
897 OpContext *ctx,
898 ObjectContextRef obc,
899 ceph_tid_t rep_tid);
900 boost::intrusive_ptr<RepGather> new_repop(
901 eversion_t version,
902 int r,
903 ObcLockManager &&manager,
904 OpRequestRef &&op,
905 boost::optional<std::function<void(void)> > &&on_complete);
906 void remove_repop(RepGather *repop);
907
908 OpContextUPtr simple_opc_create(ObjectContextRef obc);
909 void simple_opc_submit(OpContextUPtr ctx);
910
911 /**
912 * Merge entries atomically into all acting_recovery_backfill osds
913 * adjusting missing and recovery state as necessary.
914 *
915 * Also used to store error log entries for dup detection.
916 */
917 void submit_log_entries(
918 const mempool::osd_pglog::list<pg_log_entry_t> &entries,
919 ObcLockManager &&manager,
920 boost::optional<std::function<void(void)> > &&on_complete,
921 OpRequestRef op = OpRequestRef(),
922 int r = 0);
923 struct LogUpdateCtx {
924 boost::intrusive_ptr<RepGather> repop;
925 set<pg_shard_t> waiting_on;
926 };
927 void cancel_log_updates();
928 map<ceph_tid_t, LogUpdateCtx> log_entry_update_waiting_on;
929
930
931 // hot/cold tracking
932 HitSetRef hit_set; ///< currently accumulating HitSet
933 utime_t hit_set_start_stamp; ///< time the current HitSet started recording
934
935
936 void hit_set_clear(); ///< discard any HitSet state
937 void hit_set_setup(); ///< initialize HitSet state
938 void hit_set_create(); ///< create a new HitSet
939 void hit_set_persist(); ///< persist hit info
940 bool hit_set_apply_log(); ///< apply log entries to update in-memory HitSet
941 void hit_set_trim(OpContextUPtr &ctx, unsigned max); ///< discard old HitSets
942 void hit_set_in_memory_trim(uint32_t max_in_memory); ///< discard old in memory HitSets
943 void hit_set_remove_all();
944
945 hobject_t get_hit_set_current_object(utime_t stamp);
946 hobject_t get_hit_set_archive_object(utime_t start,
947 utime_t end,
948 bool using_gmt);
949
950 // agent
951 boost::scoped_ptr<TierAgentState> agent_state;
952
953 void agent_setup(); ///< initialize agent state
954 bool agent_work(int max) override ///< entry point to do some agent work
955 {
956 return agent_work(max, max);
957 }
958 bool agent_work(int max, int agent_flush_quota) override;
959 bool agent_maybe_flush(ObjectContextRef& obc); ///< maybe flush
960 bool agent_maybe_evict(ObjectContextRef& obc, bool after_flush); ///< maybe evict
961
962 void agent_load_hit_sets(); ///< load HitSets, if needed
963
964 /// estimate object atime and temperature
965 ///
966 /// @param oid [in] object name
967 /// @param temperature [out] relative temperature (# consider both access time and frequency)
968 void agent_estimate_temp(const hobject_t& oid, int *temperature);
969
970 /// stop the agent
971 void agent_stop() override;
972 void agent_delay() override;
973
974 /// clear agent state
975 void agent_clear() override;
976
977 /// choose (new) agent mode(s), returns true if op is requeued
978 bool agent_choose_mode(bool restart = false, OpRequestRef op = OpRequestRef());
979 void agent_choose_mode_restart() override;
980
981 /// true if we can send an ondisk/commit for v
982 bool already_complete(eversion_t v);
983 /// true if we can send an ack for v
984 bool already_ack(eversion_t v);
985
986 // projected object info
987 SharedLRU<hobject_t, ObjectContext> object_contexts;
988 // map from oid.snapdir() to SnapSetContext *
989 map<hobject_t, SnapSetContext*> snapset_contexts;
990 Mutex snapset_contexts_lock;
991
992 // debug order that client ops are applied
993 map<hobject_t, map<client_t, ceph_tid_t>> debug_op_order;
994
995 void populate_obc_watchers(ObjectContextRef obc);
996 void check_blacklisted_obc_watchers(ObjectContextRef obc);
997 void check_blacklisted_watchers() override;
998 void get_watchers(list<obj_watch_item_t> *ls) override;
999 void get_obc_watchers(ObjectContextRef obc, list<obj_watch_item_t> &pg_watchers);
1000 public:
1001 void handle_watch_timeout(WatchRef watch);
1002 protected:
1003
1004 ObjectContextRef create_object_context(const object_info_t& oi, SnapSetContext *ssc);
1005 ObjectContextRef get_object_context(
1006 const hobject_t& soid,
1007 bool can_create,
1008 const map<string, bufferlist> *attrs = 0
1009 );
1010
1011 void context_registry_on_change();
1012 void object_context_destructor_callback(ObjectContext *obc);
1013 class C_PG_ObjectContext;
1014
1015 int find_object_context(const hobject_t& oid,
1016 ObjectContextRef *pobc,
1017 bool can_create,
1018 bool map_snapid_to_clone=false,
1019 hobject_t *missing_oid=NULL);
1020
1021 void add_object_context_to_pg_stat(ObjectContextRef obc, pg_stat_t *stat);
1022
1023 void get_src_oloc(const object_t& oid, const object_locator_t& oloc, object_locator_t& src_oloc);
1024
1025 SnapSetContext *get_snapset_context(
1026 const hobject_t& oid,
1027 bool can_create,
1028 const map<string, bufferlist> *attrs = 0,
1029 bool oid_existed = true //indicate this oid whether exsited in backend
1030 );
1031 void register_snapset_context(SnapSetContext *ssc) {
1032 std::lock_guard l(snapset_contexts_lock);
1033 _register_snapset_context(ssc);
1034 }
1035 void _register_snapset_context(SnapSetContext *ssc) {
1036 ceph_assert(snapset_contexts_lock.is_locked());
1037 if (!ssc->registered) {
1038 ceph_assert(snapset_contexts.count(ssc->oid) == 0);
1039 ssc->registered = true;
1040 snapset_contexts[ssc->oid] = ssc;
1041 }
1042 }
1043 void put_snapset_context(SnapSetContext *ssc);
1044
1045 map<hobject_t, ObjectContextRef> recovering;
1046
1047 /*
1048 * Backfill
1049 *
1050 * peer_info[backfill_target].last_backfill == info.last_backfill on the peer.
1051 *
1052 * objects prior to peer_info[backfill_target].last_backfill
1053 * - are on the peer
1054 * - are included in the peer stats
1055 *
1056 * objects \in (last_backfill, last_backfill_started]
1057 * - are on the peer or are in backfills_in_flight
1058 * - are not included in pg stats (yet)
1059 * - have their stats in pending_backfill_updates on the primary
1060 */
1061 set<hobject_t> backfills_in_flight;
1062 map<hobject_t, pg_stat_t> pending_backfill_updates;
1063
1064 void dump_recovery_info(Formatter *f) const override {
1065 f->open_array_section("backfill_targets");
1066 for (set<pg_shard_t>::const_iterator p = backfill_targets.begin();
1067 p != backfill_targets.end(); ++p)
1068 f->dump_stream("replica") << *p;
1069 f->close_section();
1070 f->open_array_section("waiting_on_backfill");
1071 for (set<pg_shard_t>::const_iterator p = waiting_on_backfill.begin();
1072 p != waiting_on_backfill.end(); ++p)
1073 f->dump_stream("osd") << *p;
1074 f->close_section();
1075 f->dump_stream("last_backfill_started") << last_backfill_started;
1076 {
1077 f->open_object_section("backfill_info");
1078 backfill_info.dump(f);
1079 f->close_section();
1080 }
1081 {
1082 f->open_array_section("peer_backfill_info");
1083 for (map<pg_shard_t, BackfillInterval>::const_iterator pbi =
1084 peer_backfill_info.begin();
1085 pbi != peer_backfill_info.end(); ++pbi) {
1086 f->dump_stream("osd") << pbi->first;
1087 f->open_object_section("BackfillInterval");
1088 pbi->second.dump(f);
1089 f->close_section();
1090 }
1091 f->close_section();
1092 }
1093 {
1094 f->open_array_section("backfills_in_flight");
1095 for (set<hobject_t>::const_iterator i = backfills_in_flight.begin();
1096 i != backfills_in_flight.end();
1097 ++i) {
1098 f->dump_stream("object") << *i;
1099 }
1100 f->close_section();
1101 }
1102 {
1103 f->open_array_section("recovering");
1104 for (map<hobject_t, ObjectContextRef>::const_iterator i = recovering.begin();
1105 i != recovering.end();
1106 ++i) {
1107 f->dump_stream("object") << i->first;
1108 }
1109 f->close_section();
1110 }
1111 {
1112 f->open_object_section("pg_backend");
1113 pgbackend->dump_recovery_info(f);
1114 f->close_section();
1115 }
1116 }
1117
1118 /// last backfill operation started
1119 hobject_t last_backfill_started;
1120 bool new_backfill;
1121
1122 int prep_object_replica_pushes(const hobject_t& soid, eversion_t v,
1123 PGBackend::RecoveryHandle *h,
1124 bool *work_started);
1125 int prep_object_replica_deletes(const hobject_t& soid, eversion_t v,
1126 PGBackend::RecoveryHandle *h,
1127 bool *work_started);
1128
1129 void finish_degraded_object(const hobject_t& oid) override;
1130
1131 // Cancels/resets pulls from peer
1132 void check_recovery_sources(const OSDMapRef& map) override ;
1133
1134 int recover_missing(
1135 const hobject_t& oid,
1136 eversion_t v,
1137 int priority,
1138 PGBackend::RecoveryHandle *h);
1139
1140 // low level ops
1141
1142 void _make_clone(
1143 OpContext *ctx,
1144 PGTransaction* t,
1145 ObjectContextRef obc,
1146 const hobject_t& head, const hobject_t& coid,
1147 object_info_t *poi);
1148 void execute_ctx(OpContext *ctx);
1149 void finish_ctx(OpContext *ctx, int log_op_type);
1150 void reply_ctx(OpContext *ctx, int err);
1151 void reply_ctx(OpContext *ctx, int err, eversion_t v, version_t uv);
1152 void make_writeable(OpContext *ctx);
1153 void log_op_stats(const OpRequest& op, uint64_t inb, uint64_t outb);
1154
1155 void write_update_size_and_usage(object_stat_sum_t& stats, object_info_t& oi,
1156 interval_set<uint64_t>& modified, uint64_t offset,
1157 uint64_t length, bool write_full=false);
1158 inline void truncate_update_size_and_usage(
1159 object_stat_sum_t& delta_stats,
1160 object_info_t& oi,
1161 uint64_t truncate_size);
1162
1163 enum class cache_result_t {
1164 NOOP,
1165 BLOCKED_FULL,
1166 BLOCKED_PROMOTE,
1167 HANDLED_PROXY,
1168 HANDLED_REDIRECT,
1169 REPLIED_WITH_EAGAIN,
1170 BLOCKED_RECOVERY,
1171 };
1172 cache_result_t maybe_handle_cache_detail(OpRequestRef op,
1173 bool write_ordered,
1174 ObjectContextRef obc, int r,
1175 hobject_t missing_oid,
1176 bool must_promote,
1177 bool in_hit_set,
1178 ObjectContextRef *promote_obc);
1179 cache_result_t maybe_handle_manifest_detail(OpRequestRef op,
1180 bool write_ordered,
1181 ObjectContextRef obc);
1182 bool maybe_handle_manifest(OpRequestRef op,
1183 bool write_ordered,
1184 ObjectContextRef obc) {
1185 return cache_result_t::NOOP != maybe_handle_manifest_detail(
1186 op,
1187 write_ordered,
1188 obc);
1189 }
1190
1191 /**
1192 * This helper function is called from do_op if the ObjectContext lookup fails.
1193 * @returns true if the caching code is handling the Op, false otherwise.
1194 */
1195 bool maybe_handle_cache(OpRequestRef op,
1196 bool write_ordered,
1197 ObjectContextRef obc, int r,
1198 const hobject_t& missing_oid,
1199 bool must_promote,
1200 bool in_hit_set = false) {
1201 return cache_result_t::NOOP != maybe_handle_cache_detail(
1202 op,
1203 write_ordered,
1204 obc,
1205 r,
1206 missing_oid,
1207 must_promote,
1208 in_hit_set,
1209 nullptr);
1210 }
1211
1212 /**
1213 * This helper function checks if a promotion is needed.
1214 */
1215 bool maybe_promote(ObjectContextRef obc,
1216 const hobject_t& missing_oid,
1217 const object_locator_t& oloc,
1218 bool in_hit_set,
1219 uint32_t recency,
1220 OpRequestRef promote_op,
1221 ObjectContextRef *promote_obc = nullptr);
1222 /**
1223 * This helper function tells the client to redirect their request elsewhere.
1224 */
1225 void do_cache_redirect(OpRequestRef op);
1226 /**
1227 * This function attempts to start a promote. Either it succeeds,
1228 * or places op on a wait list. If op is null, failure means that
1229 * this is a noop. If a future user wants to be able to distinguish
1230 * these cases, a return value should be added.
1231 */
1232 void promote_object(
1233 ObjectContextRef obc, ///< [optional] obc
1234 const hobject_t& missing_object, ///< oid (if !obc)
1235 const object_locator_t& oloc, ///< locator for obc|oid
1236 OpRequestRef op, ///< [optional] client op
1237 ObjectContextRef *promote_obc = nullptr ///< [optional] new obc for object
1238 );
1239
1240 int prepare_transaction(OpContext *ctx);
1241 list<pair<OpRequestRef, OpContext*> > in_progress_async_reads;
1242 void complete_read_ctx(int result, OpContext *ctx);
1243
1244 // pg on-disk content
1245 void check_local() override;
1246
1247 void _clear_recovery_state() override;
1248
1249 bool start_recovery_ops(
1250 uint64_t max,
1251 ThreadPool::TPHandle &handle, uint64_t *started) override;
1252
1253 uint64_t recover_primary(uint64_t max, ThreadPool::TPHandle &handle);
1254 uint64_t recover_replicas(uint64_t max, ThreadPool::TPHandle &handle,
1255 bool *recovery_started);
1256 hobject_t earliest_peer_backfill() const;
1257 bool all_peer_done() const;
1258 /**
1259 * @param work_started will be set to true if recover_backfill got anywhere
1260 * @returns the number of operations started
1261 */
1262 uint64_t recover_backfill(uint64_t max, ThreadPool::TPHandle &handle,
1263 bool *work_started);
1264
1265 /**
1266 * scan a (hash) range of objects in the current pg
1267 *
1268 * @begin first item should be >= this value
1269 * @min return at least this many items, unless we are done
1270 * @max return no more than this many items
1271 * @bi [out] resulting map of objects to eversion_t's
1272 */
1273 void scan_range(
1274 int min, int max, BackfillInterval *bi,
1275 ThreadPool::TPHandle &handle
1276 );
1277
1278 /// Update a hash range to reflect changes since the last scan
1279 void update_range(
1280 BackfillInterval *bi, ///< [in,out] interval to update
1281 ThreadPool::TPHandle &handle ///< [in] tp handle
1282 );
1283
1284 int prep_backfill_object_push(
1285 hobject_t oid, eversion_t v, ObjectContextRef obc,
1286 vector<pg_shard_t> peers,
1287 PGBackend::RecoveryHandle *h);
1288 void send_remove_op(const hobject_t& oid, eversion_t v, pg_shard_t peer);
1289
1290
1291 class C_OSD_AppliedRecoveredObject;
1292 class C_OSD_CommittedPushedObject;
1293 class C_OSD_AppliedRecoveredObjectReplica;
1294
1295 void _applied_recovered_object(ObjectContextRef obc);
1296 void _applied_recovered_object_replica();
1297 void _committed_pushed_object(epoch_t epoch, eversion_t lc);
1298 void recover_got(hobject_t oid, eversion_t v);
1299
1300 // -- copyfrom --
1301 map<hobject_t, CopyOpRef> copy_ops;
1302
1303 int do_copy_get(OpContext *ctx, bufferlist::const_iterator& bp, OSDOp& op,
1304 ObjectContextRef& obc);
1305 int finish_copy_get();
1306
1307 void fill_in_copy_get_noent(OpRequestRef& op, hobject_t oid,
1308 OSDOp& osd_op);
1309
1310 /**
1311 * To copy an object, call start_copy.
1312 *
1313 * @param cb: The CopyCallback to be activated when the copy is complete
1314 * @param obc: The ObjectContext we are copying into
1315 * @param src: The source object
1316 * @param oloc: the source object locator
1317 * @param version: the version of the source object to copy (0 for any)
1318 */
1319 void start_copy(CopyCallback *cb, ObjectContextRef obc, hobject_t src,
1320 object_locator_t oloc, version_t version, unsigned flags,
1321 bool mirror_snapset, unsigned src_obj_fadvise_flags,
1322 unsigned dest_obj_fadvise_flags);
1323 void process_copy_chunk(hobject_t oid, ceph_tid_t tid, int r);
1324 void _write_copy_chunk(CopyOpRef cop, PGTransaction *t);
1325 uint64_t get_copy_chunk_size() const {
1326 uint64_t size = cct->_conf->osd_copyfrom_max_chunk;
1327 if (pool.info.required_alignment()) {
1328 uint64_t alignment = pool.info.required_alignment();
1329 if (size % alignment) {
1330 size += alignment - (size % alignment);
1331 }
1332 }
1333 return size;
1334 }
1335 void _copy_some(ObjectContextRef obc, CopyOpRef cop);
1336 void finish_copyfrom(CopyFromCallback *cb);
1337 void finish_promote(int r, CopyResults *results, ObjectContextRef obc);
1338 void cancel_copy(CopyOpRef cop, bool requeue, vector<ceph_tid_t> *tids);
1339 void cancel_copy_ops(bool requeue, vector<ceph_tid_t> *tids);
1340
1341 friend struct C_Copyfrom;
1342
1343 // -- flush --
1344 map<hobject_t, FlushOpRef> flush_ops;
1345
1346 /// start_flush takes ownership of on_flush iff ret == -EINPROGRESS
1347 int start_flush(
1348 OpRequestRef op, ObjectContextRef obc,
1349 bool blocking, hobject_t *pmissing,
1350 boost::optional<std::function<void()>> &&on_flush);
1351 void finish_flush(hobject_t oid, ceph_tid_t tid, int r);
1352 int try_flush_mark_clean(FlushOpRef fop);
1353 void cancel_flush(FlushOpRef fop, bool requeue, vector<ceph_tid_t> *tids);
1354 void cancel_flush_ops(bool requeue, vector<ceph_tid_t> *tids);
1355
1356 /// @return false if clone is has been evicted
1357 bool is_present_clone(hobject_t coid);
1358
1359 friend struct C_Flush;
1360
1361 // -- scrub --
1362 bool _range_available_for_scrub(
1363 const hobject_t &begin, const hobject_t &end) override;
1364 void scrub_snapshot_metadata(
1365 ScrubMap &map,
1366 const std::map<hobject_t,
1367 pair<boost::optional<uint32_t>,
1368 boost::optional<uint32_t>>> &missing_digest) override;
1369 void _scrub_clear_state() override;
1370 void _scrub_finish() override;
1371 object_stat_collection_t scrub_cstat;
1372
1373 void _split_into(pg_t child_pgid, PG *child,
1374 unsigned split_bits) override;
1375 void apply_and_flush_repops(bool requeue);
1376
1377 void calc_trim_to() override;
1378 void calc_trim_to_aggressive() override;
1379 int do_xattr_cmp_u64(int op, __u64 v1, bufferlist& xattr);
1380 int do_xattr_cmp_str(int op, string& v1s, bufferlist& xattr);
1381
1382 // -- checksum --
1383 int do_checksum(OpContext *ctx, OSDOp& osd_op, bufferlist::const_iterator *bl_it);
1384 int finish_checksum(OSDOp& osd_op, Checksummer::CSumType csum_type,
1385 bufferlist::const_iterator *init_value_bl_it,
1386 const bufferlist &read_bl);
1387
1388 friend class C_ChecksumRead;
1389
1390 int do_extent_cmp(OpContext *ctx, OSDOp& osd_op);
1391 int finish_extent_cmp(OSDOp& osd_op, const bufferlist &read_bl);
1392
1393 friend class C_ExtentCmpRead;
1394
1395 int do_read(OpContext *ctx, OSDOp& osd_op);
1396 int do_sparse_read(OpContext *ctx, OSDOp& osd_op);
1397 int do_writesame(OpContext *ctx, OSDOp& osd_op);
1398
1399 bool pgls_filter(PGLSFilter *filter, hobject_t& sobj, bufferlist& outdata);
1400 int get_pgls_filter(bufferlist::const_iterator& iter, PGLSFilter **pfilter);
1401
1402 map<hobject_t, list<OpRequestRef>> in_progress_proxy_ops;
1403 void kick_proxy_ops_blocked(hobject_t& soid);
1404 void cancel_proxy_ops(bool requeue, vector<ceph_tid_t> *tids);
1405
1406 // -- proxyread --
1407 map<ceph_tid_t, ProxyReadOpRef> proxyread_ops;
1408
1409 void do_proxy_read(OpRequestRef op, ObjectContextRef obc = NULL);
1410 void finish_proxy_read(hobject_t oid, ceph_tid_t tid, int r);
1411 void cancel_proxy_read(ProxyReadOpRef prdop, vector<ceph_tid_t> *tids);
1412
1413 friend struct C_ProxyRead;
1414
1415 // -- proxywrite --
1416 map<ceph_tid_t, ProxyWriteOpRef> proxywrite_ops;
1417
1418 void do_proxy_write(OpRequestRef op, ObjectContextRef obc = NULL);
1419 void finish_proxy_write(hobject_t oid, ceph_tid_t tid, int r);
1420 void cancel_proxy_write(ProxyWriteOpRef pwop, vector<ceph_tid_t> *tids);
1421
1422 friend struct C_ProxyWrite_Commit;
1423
1424 // -- chunkop --
1425 void do_proxy_chunked_op(OpRequestRef op, const hobject_t& missing_oid,
1426 ObjectContextRef obc, bool write_ordered);
1427 void do_proxy_chunked_read(OpRequestRef op, ObjectContextRef obc, int op_index,
1428 uint64_t chunk_index, uint64_t req_offset, uint64_t req_length,
1429 uint64_t req_total_len, bool write_ordered);
1430 bool can_proxy_chunked_read(OpRequestRef op, ObjectContextRef obc);
1431 void _copy_some_manifest(ObjectContextRef obc, CopyOpRef cop, uint64_t start_offset);
1432 void process_copy_chunk_manifest(hobject_t oid, ceph_tid_t tid, int r, uint64_t offset);
1433 void finish_promote_manifest(int r, CopyResults *results, ObjectContextRef obc);
1434 void cancel_and_requeue_proxy_ops(hobject_t oid);
1435 int do_manifest_flush(OpRequestRef op, ObjectContextRef obc, FlushOpRef manifest_fop,
1436 uint64_t start_offset, bool block);
1437 int start_manifest_flush(OpRequestRef op, ObjectContextRef obc, bool blocking,
1438 boost::optional<std::function<void()>> &&on_flush);
1439 void finish_manifest_flush(hobject_t oid, ceph_tid_t tid, int r, ObjectContextRef obc,
1440 uint64_t last_offset);
1441 void handle_manifest_flush(hobject_t oid, ceph_tid_t tid, int r,
1442 uint64_t offset, uint64_t last_offset, epoch_t lpr);
1443 void refcount_manifest(ObjectContextRef obc, object_locator_t oloc, hobject_t soid,
1444 SnapContext snapc, bool get, Context *cb, uint64_t offset);
1445
1446 friend struct C_ProxyChunkRead;
1447 friend class PromoteManifestCallback;
1448 friend class C_CopyChunk;
1449 friend struct C_ManifestFlush;
1450 friend struct RefCountCallback;
1451
1452 public:
1453 PrimaryLogPG(OSDService *o, OSDMapRef curmap,
1454 const PGPool &_pool,
1455 const map<string,string>& ec_profile,
1456 spg_t p);
1457 ~PrimaryLogPG() override {}
1458
1459 int do_command(
1460 cmdmap_t cmdmap,
1461 ostream& ss,
1462 bufferlist& idata,
1463 bufferlist& odata,
1464 ConnectionRef conn,
1465 ceph_tid_t tid) override;
1466
1467 void clear_cache();
1468 int get_cache_obj_count() {
1469 return object_contexts.get_count();
1470 }
1471 void do_request(
1472 OpRequestRef& op,
1473 ThreadPool::TPHandle &handle) override;
1474 void do_op(OpRequestRef& op);
1475 void record_write_error(OpRequestRef op, const hobject_t &soid,
1476 MOSDOpReply *orig_reply, int r);
1477 void do_pg_op(OpRequestRef op);
1478 void do_scan(
1479 OpRequestRef op,
1480 ThreadPool::TPHandle &handle);
1481 void do_backfill(OpRequestRef op);
1482 void do_backfill_remove(OpRequestRef op);
1483
1484 void handle_backoff(OpRequestRef& op);
1485
1486 int trim_object(bool first, const hobject_t &coid, OpContextUPtr *ctxp);
1487 void snap_trimmer(epoch_t e) override;
1488 void kick_snap_trim() override;
1489 void snap_trimmer_scrub_complete() override;
1490 int do_osd_ops(OpContext *ctx, vector<OSDOp>& ops);
1491
1492 int _get_tmap(OpContext *ctx, bufferlist *header, bufferlist *vals);
1493 int do_tmap2omap(OpContext *ctx, unsigned flags);
1494 int do_tmapup(OpContext *ctx, bufferlist::const_iterator& bp, OSDOp& osd_op);
1495 int do_tmapup_slow(OpContext *ctx, bufferlist::const_iterator& bp, OSDOp& osd_op, bufferlist& bl);
1496
1497 void do_osd_op_effects(OpContext *ctx, const ConnectionRef& conn);
1498 private:
1499 int do_scrub_ls(MOSDOp *op, OSDOp *osd_op);
1500 hobject_t earliest_backfill() const;
1501 bool check_src_targ(const hobject_t& soid, const hobject_t& toid) const;
1502
1503 uint64_t temp_seq; ///< last id for naming temp objects
1504 /// generate a new temp object name
1505 hobject_t generate_temp_object(const hobject_t& target);
1506 /// generate a new temp object name (for recovery)
1507 hobject_t get_temp_recovery_object(const hobject_t& target,
1508 eversion_t version) override;
1509 int get_recovery_op_priority() const {
1510 int64_t pri = 0;
1511 pool.info.opts.get(pool_opts_t::RECOVERY_OP_PRIORITY, &pri);
1512 return pri > 0 ? pri : cct->_conf->osd_recovery_op_priority;
1513 }
1514 void log_missing(unsigned missing,
1515 const boost::optional<hobject_t> &head,
1516 LogChannelRef clog,
1517 const spg_t &pgid,
1518 const char *func,
1519 const char *mode,
1520 bool allow_incomplete_clones);
1521 unsigned process_clones_to(const boost::optional<hobject_t> &head,
1522 const boost::optional<SnapSet> &snapset,
1523 LogChannelRef clog,
1524 const spg_t &pgid,
1525 const char *mode,
1526 bool allow_incomplete_clones,
1527 boost::optional<snapid_t> target,
1528 vector<snapid_t>::reverse_iterator *curclone,
1529 inconsistent_snapset_wrapper &snap_error);
1530
1531 public:
1532 coll_t get_coll() {
1533 return coll;
1534 }
1535 void split_colls(
1536 spg_t child,
1537 int split_bits,
1538 int seed,
1539 const pg_pool_t *pool,
1540 ObjectStore::Transaction *t) override {
1541 coll_t target = coll_t(child);
1542 PG::_create(*t, child, split_bits);
1543 t->split_collection(
1544 coll,
1545 split_bits,
1546 seed,
1547 target);
1548 PG::_init(*t, child, pool);
1549 }
1550 private:
1551
1552 struct DoSnapWork : boost::statechart::event< DoSnapWork > {
1553 DoSnapWork() : boost::statechart::event < DoSnapWork >() {}
1554 };
1555 struct KickTrim : boost::statechart::event< KickTrim > {
1556 KickTrim() : boost::statechart::event < KickTrim >() {}
1557 };
1558 struct RepopsComplete : boost::statechart::event< RepopsComplete > {
1559 RepopsComplete() : boost::statechart::event < RepopsComplete >() {}
1560 };
1561 struct ScrubComplete : boost::statechart::event< ScrubComplete > {
1562 ScrubComplete() : boost::statechart::event < ScrubComplete >() {}
1563 };
1564 struct TrimWriteUnblocked : boost::statechart::event< TrimWriteUnblocked > {
1565 TrimWriteUnblocked() : boost::statechart::event < TrimWriteUnblocked >() {}
1566 };
1567 struct Reset : boost::statechart::event< Reset > {
1568 Reset() : boost::statechart::event< Reset >() {}
1569 };
1570 struct SnapTrimReserved : boost::statechart::event< SnapTrimReserved > {
1571 SnapTrimReserved() : boost::statechart::event< SnapTrimReserved >() {}
1572 };
1573 struct SnapTrimTimerReady : boost::statechart::event< SnapTrimTimerReady > {
1574 SnapTrimTimerReady() : boost::statechart::event< SnapTrimTimerReady >() {}
1575 };
1576
1577 struct NotTrimming;
1578 struct SnapTrimmer : public boost::statechart::state_machine< SnapTrimmer, NotTrimming > {
1579 PrimaryLogPG *pg;
1580 explicit SnapTrimmer(PrimaryLogPG *pg) : pg(pg) {}
1581 void log_enter(const char *state_name);
1582 void log_exit(const char *state_name, utime_t duration);
1583 bool permit_trim() {
1584 return
1585 pg->is_clean() &&
1586 !pg->scrubber.active &&
1587 !pg->snap_trimq.empty();
1588 }
1589 bool can_trim() {
1590 return
1591 permit_trim() &&
1592 !pg->get_osdmap()->test_flag(CEPH_OSDMAP_NOSNAPTRIM);
1593 }
1594 } snap_trimmer_machine;
1595
1596 struct WaitReservation;
1597 struct Trimming : boost::statechart::state< Trimming, SnapTrimmer, WaitReservation >, NamedState {
1598 typedef boost::mpl::list <
1599 boost::statechart::custom_reaction< KickTrim >,
1600 boost::statechart::transition< Reset, NotTrimming >
1601 > reactions;
1602
1603 set<hobject_t> in_flight;
1604 snapid_t snap_to_trim;
1605
1606 explicit Trimming(my_context ctx)
1607 : my_base(ctx),
1608 NamedState(context< SnapTrimmer >().pg, "Trimming") {
1609 context< SnapTrimmer >().log_enter(state_name);
1610 ceph_assert(context< SnapTrimmer >().permit_trim());
1611 ceph_assert(in_flight.empty());
1612 }
1613 void exit() {
1614 context< SnapTrimmer >().log_exit(state_name, enter_time);
1615 auto *pg = context< SnapTrimmer >().pg;
1616 pg->osd->snap_reserver.cancel_reservation(pg->get_pgid());
1617 pg->state_clear(PG_STATE_SNAPTRIM);
1618 pg->publish_stats_to_osd();
1619 }
1620 boost::statechart::result react(const KickTrim&) {
1621 return discard_event();
1622 }
1623 };
1624
1625 /* SnapTrimmerStates */
1626 struct WaitTrimTimer : boost::statechart::state< WaitTrimTimer, Trimming >, NamedState {
1627 typedef boost::mpl::list <
1628 boost::statechart::custom_reaction< SnapTrimTimerReady >
1629 > reactions;
1630 Context *wakeup = nullptr;
1631 explicit WaitTrimTimer(my_context ctx)
1632 : my_base(ctx),
1633 NamedState(context< SnapTrimmer >().pg, "Trimming/WaitTrimTimer") {
1634 context< SnapTrimmer >().log_enter(state_name);
1635 ceph_assert(context<Trimming>().in_flight.empty());
1636 struct OnTimer : Context {
1637 PrimaryLogPGRef pg;
1638 epoch_t epoch;
1639 OnTimer(PrimaryLogPGRef pg, epoch_t epoch) : pg(pg), epoch(epoch) {}
1640 void finish(int) override {
1641 pg->lock();
1642 if (!pg->pg_has_reset_since(epoch))
1643 pg->snap_trimmer_machine.process_event(SnapTrimTimerReady());
1644 pg->unlock();
1645 }
1646 };
1647 auto *pg = context< SnapTrimmer >().pg;
1648 if (pg->cct->_conf->osd_snap_trim_sleep > 0) {
1649 std::lock_guard l(pg->osd->sleep_lock);
1650 wakeup = pg->osd->sleep_timer.add_event_after(
1651 pg->cct->_conf->osd_snap_trim_sleep,
1652 new OnTimer{pg, pg->get_osdmap_epoch()});
1653 } else {
1654 post_event(SnapTrimTimerReady());
1655 }
1656 }
1657 void exit() {
1658 context< SnapTrimmer >().log_exit(state_name, enter_time);
1659 auto *pg = context< SnapTrimmer >().pg;
1660 if (wakeup) {
1661 std::lock_guard l(pg->osd->sleep_lock);
1662 pg->osd->sleep_timer.cancel_event(wakeup);
1663 wakeup = nullptr;
1664 }
1665 }
1666 boost::statechart::result react(const SnapTrimTimerReady &) {
1667 wakeup = nullptr;
1668 if (!context< SnapTrimmer >().can_trim()) {
1669 post_event(KickTrim());
1670 return transit< NotTrimming >();
1671 } else {
1672 return transit< AwaitAsyncWork >();
1673 }
1674 }
1675 };
1676
1677 struct WaitRWLock : boost::statechart::state< WaitRWLock, Trimming >, NamedState {
1678 typedef boost::mpl::list <
1679 boost::statechart::custom_reaction< TrimWriteUnblocked >
1680 > reactions;
1681 explicit WaitRWLock(my_context ctx)
1682 : my_base(ctx),
1683 NamedState(context< SnapTrimmer >().pg, "Trimming/WaitRWLock") {
1684 context< SnapTrimmer >().log_enter(state_name);
1685 ceph_assert(context<Trimming>().in_flight.empty());
1686 }
1687 void exit() {
1688 context< SnapTrimmer >().log_exit(state_name, enter_time);
1689 }
1690 boost::statechart::result react(const TrimWriteUnblocked&) {
1691 if (!context< SnapTrimmer >().can_trim()) {
1692 post_event(KickTrim());
1693 return transit< NotTrimming >();
1694 } else {
1695 return transit< AwaitAsyncWork >();
1696 }
1697 }
1698 };
1699
1700 struct WaitRepops : boost::statechart::state< WaitRepops, Trimming >, NamedState {
1701 typedef boost::mpl::list <
1702 boost::statechart::custom_reaction< RepopsComplete >
1703 > reactions;
1704 explicit WaitRepops(my_context ctx)
1705 : my_base(ctx),
1706 NamedState(context< SnapTrimmer >().pg, "Trimming/WaitRepops") {
1707 context< SnapTrimmer >().log_enter(state_name);
1708 ceph_assert(!context<Trimming>().in_flight.empty());
1709 }
1710 void exit() {
1711 context< SnapTrimmer >().log_exit(state_name, enter_time);
1712 }
1713 boost::statechart::result react(const RepopsComplete&) {
1714 if (!context< SnapTrimmer >().can_trim()) {
1715 post_event(KickTrim());
1716 return transit< NotTrimming >();
1717 } else {
1718 return transit< WaitTrimTimer >();
1719 }
1720 }
1721 };
1722
1723 struct AwaitAsyncWork : boost::statechart::state< AwaitAsyncWork, Trimming >, NamedState {
1724 typedef boost::mpl::list <
1725 boost::statechart::custom_reaction< DoSnapWork >
1726 > reactions;
1727 explicit AwaitAsyncWork(my_context ctx);
1728 void exit() {
1729 context< SnapTrimmer >().log_exit(state_name, enter_time);
1730 }
1731 boost::statechart::result react(const DoSnapWork&);
1732 };
1733
1734 struct WaitReservation : boost::statechart::state< WaitReservation, Trimming >, NamedState {
1735 /* WaitReservation is a sub-state of trimming simply so that exiting Trimming
1736 * always cancels the reservation */
1737 typedef boost::mpl::list <
1738 boost::statechart::custom_reaction< SnapTrimReserved >
1739 > reactions;
1740 struct ReservationCB : public Context {
1741 PrimaryLogPGRef pg;
1742 bool canceled;
1743 explicit ReservationCB(PrimaryLogPG *pg) : pg(pg), canceled(false) {}
1744 void finish(int) override {
1745 pg->lock();
1746 if (!canceled)
1747 pg->snap_trimmer_machine.process_event(SnapTrimReserved());
1748 pg->unlock();
1749 }
1750 void cancel() {
1751 ceph_assert(pg->is_locked());
1752 ceph_assert(!canceled);
1753 canceled = true;
1754 }
1755 };
1756 ReservationCB *pending = nullptr;
1757
1758 explicit WaitReservation(my_context ctx)
1759 : my_base(ctx),
1760 NamedState(context< SnapTrimmer >().pg, "Trimming/WaitReservation") {
1761 context< SnapTrimmer >().log_enter(state_name);
1762 ceph_assert(context<Trimming>().in_flight.empty());
1763 auto *pg = context< SnapTrimmer >().pg;
1764 pending = new ReservationCB(pg);
1765 pg->osd->snap_reserver.request_reservation(
1766 pg->get_pgid(),
1767 pending,
1768 0);
1769 pg->state_set(PG_STATE_SNAPTRIM_WAIT);
1770 pg->publish_stats_to_osd();
1771 }
1772 boost::statechart::result react(const SnapTrimReserved&);
1773 void exit() {
1774 context< SnapTrimmer >().log_exit(state_name, enter_time);
1775 if (pending)
1776 pending->cancel();
1777 pending = nullptr;
1778 auto *pg = context< SnapTrimmer >().pg;
1779 pg->state_clear(PG_STATE_SNAPTRIM_WAIT);
1780 pg->state_clear(PG_STATE_SNAPTRIM_ERROR);
1781 pg->publish_stats_to_osd();
1782 }
1783 };
1784
1785 struct WaitScrub : boost::statechart::state< WaitScrub, SnapTrimmer >, NamedState {
1786 typedef boost::mpl::list <
1787 boost::statechart::custom_reaction< ScrubComplete >,
1788 boost::statechart::custom_reaction< KickTrim >,
1789 boost::statechart::transition< Reset, NotTrimming >
1790 > reactions;
1791 explicit WaitScrub(my_context ctx)
1792 : my_base(ctx),
1793 NamedState(context< SnapTrimmer >().pg, "Trimming/WaitScrub") {
1794 context< SnapTrimmer >().log_enter(state_name);
1795 }
1796 void exit() {
1797 context< SnapTrimmer >().log_exit(state_name, enter_time);
1798 }
1799 boost::statechart::result react(const ScrubComplete&) {
1800 post_event(KickTrim());
1801 return transit< NotTrimming >();
1802 }
1803 boost::statechart::result react(const KickTrim&) {
1804 return discard_event();
1805 }
1806 };
1807
1808 struct NotTrimming : boost::statechart::state< NotTrimming, SnapTrimmer >, NamedState {
1809 typedef boost::mpl::list <
1810 boost::statechart::custom_reaction< KickTrim >,
1811 boost::statechart::transition< Reset, NotTrimming >
1812 > reactions;
1813 explicit NotTrimming(my_context ctx);
1814 void exit();
1815 boost::statechart::result react(const KickTrim&);
1816 };
1817
1818 int _verify_no_head_clones(const hobject_t& soid,
1819 const SnapSet& ss);
1820 // return true if we're creating a local object, false for a
1821 // whiteout or no change.
1822 void maybe_create_new_object(OpContext *ctx, bool ignore_transaction=false);
1823 int _delete_oid(OpContext *ctx, bool no_whiteout, bool try_no_whiteout);
1824 int _rollback_to(OpContext *ctx, ceph_osd_op& op);
1825 public:
1826 bool is_missing_object(const hobject_t& oid) const;
1827 bool is_unreadable_object(const hobject_t &oid) const {
1828 return is_missing_object(oid) ||
1829 !missing_loc.readable_with_acting(oid, actingset);
1830 }
1831 void maybe_kick_recovery(const hobject_t &soid);
1832 void wait_for_unreadable_object(const hobject_t& oid, OpRequestRef op);
1833 void wait_for_all_missing(OpRequestRef op);
1834
1835 bool is_degraded_or_backfilling_object(const hobject_t& oid);
1836 bool is_degraded_on_async_recovery_target(const hobject_t& soid);
1837 void wait_for_degraded_object(const hobject_t& oid, OpRequestRef op);
1838
1839 void block_write_on_full_cache(
1840 const hobject_t& oid, OpRequestRef op);
1841 void block_for_clean(
1842 const hobject_t& oid, OpRequestRef op);
1843 void block_write_on_snap_rollback(
1844 const hobject_t& oid, ObjectContextRef obc, OpRequestRef op);
1845 void block_write_on_degraded_snap(const hobject_t& oid, OpRequestRef op);
1846
1847 bool maybe_await_blocked_head(const hobject_t &soid, OpRequestRef op);
1848 void wait_for_blocked_object(const hobject_t& soid, OpRequestRef op);
1849 void kick_object_context_blocked(ObjectContextRef obc);
1850
1851 void maybe_force_recovery();
1852
1853 void mark_all_unfound_lost(
1854 int what,
1855 ConnectionRef con,
1856 ceph_tid_t tid);
1857 eversion_t pick_newest_available(const hobject_t& oid);
1858
1859 void do_update_log_missing(
1860 OpRequestRef &op);
1861
1862 void do_update_log_missing_reply(
1863 OpRequestRef &op);
1864
1865 void on_role_change() override;
1866 void on_pool_change() override;
1867 void _on_new_interval() override;
1868 void clear_async_reads();
1869 void on_change(ObjectStore::Transaction *t) override;
1870 void on_activate() override;
1871 void on_flushed() override;
1872 void on_removal(ObjectStore::Transaction *t) override;
1873 void on_shutdown() override;
1874 bool check_failsafe_full() override;
1875 bool check_osdmap_full(const set<pg_shard_t> &missing_on) override;
1876 bool maybe_preempt_replica_scrub(const hobject_t& oid) override {
1877 return write_blocked_by_scrub(oid);
1878 }
1879 int rep_repair_primary_object(const hobject_t& soid, OpContext *ctx);
1880
1881 // attr cache handling
1882 void setattr_maybe_cache(
1883 ObjectContextRef obc,
1884 PGTransaction *t,
1885 const string &key,
1886 bufferlist &val);
1887 void setattrs_maybe_cache(
1888 ObjectContextRef obc,
1889 PGTransaction *t,
1890 map<string, bufferlist> &attrs);
1891 void rmattr_maybe_cache(
1892 ObjectContextRef obc,
1893 PGTransaction *t,
1894 const string &key);
1895 int getattr_maybe_cache(
1896 ObjectContextRef obc,
1897 const string &key,
1898 bufferlist *val);
1899 int getattrs_maybe_cache(
1900 ObjectContextRef obc,
1901 map<string, bufferlist> *out);
1902
1903 public:
1904 void set_dynamic_perf_stats_queries(
1905 const std::list<OSDPerfMetricQuery> &queries) override;
1906 void get_dynamic_perf_stats(DynamicPerfStats *stats) override;
1907
1908 private:
1909 DynamicPerfStats m_dynamic_perf_stats;
1910 };
1911
1912 inline ostream& operator<<(ostream& out, const PrimaryLogPG::RepGather& repop)
1913 {
1914 out << "repgather(" << &repop
1915 << " " << repop.v
1916 << " rep_tid=" << repop.rep_tid
1917 << " committed?=" << repop.all_committed
1918 << " r=" << repop.r
1919 << ")";
1920 return out;
1921 }
1922
1923 inline ostream& operator<<(ostream& out,
1924 const PrimaryLogPG::ProxyWriteOpRef& pwop)
1925 {
1926 out << "proxywrite(" << &pwop
1927 << " " << pwop->user_version
1928 << " pwop_tid=" << pwop->objecter_tid;
1929 if (pwop->ctx->op)
1930 out << " op=" << *(pwop->ctx->op->get_req());
1931 out << ")";
1932 return out;
1933 }
1934
1935 void intrusive_ptr_add_ref(PrimaryLogPG::RepGather *repop);
1936 void intrusive_ptr_release(PrimaryLogPG::RepGather *repop);
1937
1938
1939 #endif