]> git.proxmox.com Git - ceph.git/blame - ceph/src/mds/PurgeQueue.h
Import ceph 15.2.8
[ceph.git] / ceph / src / mds / PurgeQueue.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2015 Red Hat
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#ifndef PURGE_QUEUE_H_
16#define PURGE_QUEUE_H_
17
18#include "include/compact_set.h"
19#include "mds/MDSMap.h"
20#include "osdc/Journaler.h"
21
22
23/**
24 * Descriptor of the work associated with purging a file. We record
25 * the minimal amount of information from the inode such as the size
26 * and layout: all other un-needed inode metadata (times, permissions, etc)
27 * has been discarded.
28 */
29class PurgeItem
30{
31public:
32 enum Action : uint8_t {
33 NONE = 0,
34 PURGE_FILE = 1,
35 TRUNCATE_FILE,
36 PURGE_DIR
37 };
38
9f95a23c 39 PurgeItem() {}
7c673cae
FG
40
41 void encode(bufferlist &bl) const;
11fdf7f2
TL
42 void decode(bufferlist::const_iterator &p);
43
44 static Action str_to_type(std::string_view str) {
45 return PurgeItem::actions.at(std::string(str));
46 }
47
48 void dump(Formatter *f) const
49 {
50 f->dump_int("action", action);
51 f->dump_int("ino", ino);
52 f->dump_int("size", size);
53 f->open_object_section("layout");
54 layout.dump(f);
55 f->close_section();
56 f->open_object_section("SnapContext");
57 snapc.dump(f);
58 f->close_section();
59 f->open_object_section("fragtree");
60 fragtree.dump(f);
61 f->close_section();
62 }
63
64 std::string_view get_type_str() const;
9f95a23c
TL
65
66 utime_t stamp;
67 //None PurgeItem serves as NoOp for splicing out journal entries;
68 //so there has to be a "pad_size" to specify the size of journal
69 //space to be spliced.
70 uint32_t pad_size = 0;
71 Action action = NONE;
72 inodeno_t ino = 0;
73 uint64_t size = 0;
74 file_layout_t layout;
75 compact_set<int64_t> old_pools;
76 SnapContext snapc;
77 fragtree_t fragtree;
11fdf7f2
TL
78private:
79 static const std::map<std::string, PurgeItem::Action> actions;
7c673cae
FG
80};
81WRITE_CLASS_ENCODER(PurgeItem)
82
83enum {
84 l_pq_first = 3500,
85
86 // How many items have been finished by PurgeQueue
87 l_pq_executing_ops,
92f5a8d4 88 l_pq_executing_ops_high_water,
7c673cae 89 l_pq_executing,
92f5a8d4 90 l_pq_executing_high_water,
7c673cae 91 l_pq_executed,
9f95a23c 92 l_pq_item_in_journal,
7c673cae
FG
93 l_pq_last
94};
95
96/**
97 * A persistent queue of PurgeItems. This class both writes and reads
98 * to the queue. There is one of these per MDS rank.
99 *
100 * Note that this class does not take a reference to MDSRank: we are
101 * independent of all the metadata structures and do not need to
102 * take mds_lock for anything.
103 */
104class PurgeQueue
105{
7c673cae 106public:
9f95a23c
TL
107 PurgeQueue(
108 CephContext *cct_,
109 mds_rank_t rank_,
110 const int64_t metadata_pool_,
111 Objecter *objecter_,
112 Context *on_error);
113 ~PurgeQueue();
114
7c673cae 115 void init();
c07f9fc5 116 void activate();
7c673cae
FG
117 void shutdown();
118
119 void create_logger();
120
121 // Write an empty queue, use this during MDS rank creation
122 void create(Context *completion);
123
124 // Read the Journaler header for an existing queue and start consuming
125 void open(Context *completion);
126
3efd9988
FG
127 void wait_for_recovery(Context *c);
128
7c673cae
FG
129 // Submit one entry to the work queue. Call back when it is persisted
130 // to the queue (there is no callback for when it is executed)
131 void push(const PurgeItem &pi, Context *completion);
132
133 // If the on-disk queue is empty and we are not currently processing
134 // anything.
135 bool is_idle() const;
136
137 /**
138 * Signal to the PurgeQueue that you would like it to hurry up and
139 * finish consuming everything in the queue. Provides progress
140 * feedback.
141 *
142 * @param progress: bytes consumed since we started draining
143 * @param progress_total: max bytes that were outstanding during purge
144 * @param in_flight_count: number of file purges currently in flight
145 *
146 * @returns true if drain is complete
147 */
148 bool drain(
149 uint64_t *progress,
150 uint64_t *progress_total,
151 size_t *in_flight_count);
152
153 void update_op_limit(const MDSMap &mds_map);
154
92f5a8d4 155 void handle_conf_change(const std::set<std::string>& changed, const MDSMap& mds_map);
7c673cae 156
9f95a23c
TL
157private:
158 uint32_t _calculate_ops(const PurgeItem &item) const;
7c673cae 159
9f95a23c
TL
160 bool _can_consume();
161
162 // recover the journal write_pos (drop any partial written entry)
163 void _recover();
164
165 /**
166 * @return true if we were in a position to try and consume something:
167 * does not mean we necessarily did.
168 */
169 bool _consume();
170
171 void _execute_item(const PurgeItem &item, uint64_t expire_to);
172 void _execute_item_complete(uint64_t expire_to);
173
174 void _go_readonly(int r);
175
176 CephContext *cct;
177 const mds_rank_t rank;
178 ceph::mutex lock = ceph::make_mutex("PurgeQueue");
179 bool readonly = false;
7c673cae 180
9f95a23c
TL
181 int64_t metadata_pool;
182
183 // Don't use the MDSDaemon's Finisher and Timer, because this class
184 // operates outside of MDSDaemon::mds_lock
185 Finisher finisher;
186 SafeTimer timer;
187 Filer filer;
188 Objecter *objecter;
189 std::unique_ptr<PerfCounters> logger;
190
191 Journaler journaler;
192
193 Context *on_error;
194
195 // Map of Journaler offset to PurgeItem
196 std::map<uint64_t, PurgeItem> in_flight;
197
198 std::set<uint64_t> pending_expire;
199
200 // Throttled allowances
201 uint64_t ops_in_flight = 0;
202
203 // Dynamic op limit per MDS based on PG count
204 uint64_t max_purge_ops = 0;
205
206 // How many bytes were remaining when drain() was first called,
207 // used for indicating progress.
208 uint64_t drain_initial = 0;
209
210 // Has drain() ever been called on this instance?
211 bool draining = false;
212
213 // Do we currently have a flush timer event waiting?
214 Context *delayed_flush = nullptr;
215
216 bool recovered = false;
217 std::vector<Context*> waiting_for_recovery;
218
219 size_t purge_item_journal_size;
220
221 uint64_t ops_high_water = 0;
222 uint64_t files_high_water = 0;
223};
224#endif