]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include "common/config.h" | |
16 | #include "osdc/Journaler.h" | |
17 | #include "events/ESubtreeMap.h" | |
18 | #include "events/ESession.h" | |
19 | #include "events/ESessions.h" | |
20 | ||
21 | #include "events/EMetaBlob.h" | |
22 | #include "events/EResetJournal.h" | |
23 | #include "events/ENoOp.h" | |
24 | ||
25 | #include "events/EUpdate.h" | |
26 | #include "events/ESlaveUpdate.h" | |
27 | #include "events/EOpen.h" | |
28 | #include "events/ECommitted.h" | |
9f95a23c | 29 | #include "events/EPurged.h" |
7c673cae FG |
30 | |
31 | #include "events/EExport.h" | |
32 | #include "events/EImportStart.h" | |
33 | #include "events/EImportFinish.h" | |
34 | #include "events/EFragment.h" | |
35 | ||
36 | #include "events/ETableClient.h" | |
37 | #include "events/ETableServer.h" | |
38 | ||
39 | #include "include/stringify.h" | |
40 | ||
41 | #include "LogSegment.h" | |
42 | ||
43 | #include "MDSRank.h" | |
44 | #include "MDLog.h" | |
45 | #include "MDCache.h" | |
46 | #include "Server.h" | |
47 | #include "Migrator.h" | |
48 | #include "Mutation.h" | |
49 | ||
50 | #include "InoTable.h" | |
51 | #include "MDSTableClient.h" | |
52 | #include "MDSTableServer.h" | |
53 | ||
54 | #include "Locker.h" | |
55 | ||
56 | #define dout_context g_ceph_context | |
57 | #define dout_subsys ceph_subsys_mds | |
58 | #undef dout_prefix | |
59 | #define dout_prefix *_dout << "mds." << mds->get_nodeid() << ".journal " | |
60 | ||
61 | ||
62 | // ----------------------- | |
63 | // LogSegment | |
64 | ||
65 | void LogSegment::try_to_expire(MDSRank *mds, MDSGatherBuilder &gather_bld, int op_prio) | |
66 | { | |
67 | set<CDir*> commit; | |
68 | ||
69 | dout(6) << "LogSegment(" << seq << "/" << offset << ").try_to_expire" << dendl; | |
70 | ||
11fdf7f2 | 71 | ceph_assert(g_conf()->mds_kill_journal_expire_at != 1); |
7c673cae FG |
72 | |
73 | // commit dirs | |
74 | for (elist<CDir*>::iterator p = new_dirfrags.begin(); !p.end(); ++p) { | |
75 | dout(20) << " new_dirfrag " << **p << dendl; | |
11fdf7f2 | 76 | ceph_assert((*p)->is_auth()); |
7c673cae FG |
77 | commit.insert(*p); |
78 | } | |
79 | for (elist<CDir*>::iterator p = dirty_dirfrags.begin(); !p.end(); ++p) { | |
80 | dout(20) << " dirty_dirfrag " << **p << dendl; | |
11fdf7f2 | 81 | ceph_assert((*p)->is_auth()); |
7c673cae FG |
82 | commit.insert(*p); |
83 | } | |
84 | for (elist<CDentry*>::iterator p = dirty_dentries.begin(); !p.end(); ++p) { | |
85 | dout(20) << " dirty_dentry " << **p << dendl; | |
11fdf7f2 | 86 | ceph_assert((*p)->is_auth()); |
7c673cae FG |
87 | commit.insert((*p)->get_dir()); |
88 | } | |
89 | for (elist<CInode*>::iterator p = dirty_inodes.begin(); !p.end(); ++p) { | |
90 | dout(20) << " dirty_inode " << **p << dendl; | |
11fdf7f2 | 91 | ceph_assert((*p)->is_auth()); |
7c673cae FG |
92 | if ((*p)->is_base()) { |
93 | (*p)->store(gather_bld.new_sub()); | |
94 | } else | |
95 | commit.insert((*p)->get_parent_dn()->get_dir()); | |
96 | } | |
97 | ||
98 | if (!commit.empty()) { | |
99 | for (set<CDir*>::iterator p = commit.begin(); | |
100 | p != commit.end(); | |
101 | ++p) { | |
102 | CDir *dir = *p; | |
11fdf7f2 | 103 | ceph_assert(dir->is_auth()); |
7c673cae FG |
104 | if (dir->can_auth_pin()) { |
105 | dout(15) << "try_to_expire committing " << *dir << dendl; | |
106 | dir->commit(0, gather_bld.new_sub(), false, op_prio); | |
107 | } else { | |
108 | dout(15) << "try_to_expire waiting for unfreeze on " << *dir << dendl; | |
109 | dir->add_waiter(CDir::WAIT_UNFREEZE, gather_bld.new_sub()); | |
110 | } | |
111 | } | |
112 | } | |
113 | ||
114 | // master ops with possibly uncommitted slaves | |
115 | for (set<metareqid_t>::iterator p = uncommitted_masters.begin(); | |
116 | p != uncommitted_masters.end(); | |
117 | ++p) { | |
118 | dout(10) << "try_to_expire waiting for slaves to ack commit on " << *p << dendl; | |
119 | mds->mdcache->wait_for_uncommitted_master(*p, gather_bld.new_sub()); | |
120 | } | |
121 | ||
e306af50 TL |
122 | // slave ops that haven't been committed |
123 | for (set<metareqid_t>::iterator p = uncommitted_slaves.begin(); | |
124 | p != uncommitted_slaves.end(); | |
125 | ++p) { | |
126 | dout(10) << "try_to_expire waiting for master to ack OP_FINISH on " << *p << dendl; | |
127 | mds->mdcache->wait_for_uncommitted_slave(*p, gather_bld.new_sub()); | |
128 | } | |
129 | ||
7c673cae FG |
130 | // uncommitted fragments |
131 | for (set<dirfrag_t>::iterator p = uncommitted_fragments.begin(); | |
132 | p != uncommitted_fragments.end(); | |
133 | ++p) { | |
134 | dout(10) << "try_to_expire waiting for uncommitted fragment " << *p << dendl; | |
135 | mds->mdcache->wait_for_uncommitted_fragment(*p, gather_bld.new_sub()); | |
136 | } | |
137 | ||
138 | // nudge scatterlocks | |
139 | for (elist<CInode*>::iterator p = dirty_dirfrag_dir.begin(); !p.end(); ++p) { | |
140 | CInode *in = *p; | |
141 | dout(10) << "try_to_expire waiting for dirlock flush on " << *in << dendl; | |
142 | mds->locker->scatter_nudge(&in->filelock, gather_bld.new_sub()); | |
143 | } | |
144 | for (elist<CInode*>::iterator p = dirty_dirfrag_dirfragtree.begin(); !p.end(); ++p) { | |
145 | CInode *in = *p; | |
146 | dout(10) << "try_to_expire waiting for dirfragtreelock flush on " << *in << dendl; | |
147 | mds->locker->scatter_nudge(&in->dirfragtreelock, gather_bld.new_sub()); | |
148 | } | |
149 | for (elist<CInode*>::iterator p = dirty_dirfrag_nest.begin(); !p.end(); ++p) { | |
150 | CInode *in = *p; | |
151 | dout(10) << "try_to_expire waiting for nest flush on " << *in << dendl; | |
152 | mds->locker->scatter_nudge(&in->nestlock, gather_bld.new_sub()); | |
153 | } | |
154 | ||
11fdf7f2 | 155 | ceph_assert(g_conf()->mds_kill_journal_expire_at != 2); |
7c673cae FG |
156 | |
157 | // open files and snap inodes | |
158 | if (!open_files.empty()) { | |
11fdf7f2 | 159 | ceph_assert(!mds->mdlog->is_capped()); // hmm FIXME |
7c673cae FG |
160 | EOpen *le = 0; |
161 | LogSegment *ls = mds->mdlog->get_current_segment(); | |
11fdf7f2 | 162 | ceph_assert(ls != this); |
7c673cae FG |
163 | elist<CInode*>::iterator p = open_files.begin(member_offset(CInode, item_open_file)); |
164 | while (!p.end()) { | |
165 | CInode *in = *p; | |
166 | ++p; | |
11fdf7f2 | 167 | if (in->last != CEPH_NOSNAP && in->is_auth() && !in->client_snap_caps.empty()) { |
7c673cae FG |
168 | // journal snap inodes that need flush. This simplify the mds failover hanlding |
169 | dout(20) << "try_to_expire requeueing snap needflush inode " << *in << dendl; | |
170 | if (!le) { | |
171 | le = new EOpen(mds->mdlog); | |
172 | mds->mdlog->start_entry(le); | |
173 | } | |
174 | le->add_clean_inode(in); | |
175 | ls->open_files.push_back(&in->item_open_file); | |
176 | } else { | |
11fdf7f2 | 177 | // open files are tracked by open file table, no need to journal them again |
7c673cae FG |
178 | in->item_open_file.remove_myself(); |
179 | } | |
180 | } | |
181 | if (le) { | |
182 | mds->mdlog->submit_entry(le); | |
183 | mds->mdlog->wait_for_safe(gather_bld.new_sub()); | |
184 | dout(10) << "try_to_expire waiting for open files to rejournal" << dendl; | |
185 | } | |
186 | } | |
187 | ||
11fdf7f2 | 188 | ceph_assert(g_conf()->mds_kill_journal_expire_at != 3); |
7c673cae FG |
189 | |
190 | // backtraces to be stored/updated | |
191 | for (elist<CInode*>::iterator p = dirty_parent_inodes.begin(); !p.end(); ++p) { | |
192 | CInode *in = *p; | |
11fdf7f2 | 193 | ceph_assert(in->is_auth()); |
7c673cae FG |
194 | if (in->can_auth_pin()) { |
195 | dout(15) << "try_to_expire waiting for storing backtrace on " << *in << dendl; | |
196 | in->store_backtrace(gather_bld.new_sub(), op_prio); | |
197 | } else { | |
198 | dout(15) << "try_to_expire waiting for unfreeze on " << *in << dendl; | |
199 | in->add_waiter(CInode::WAIT_UNFREEZE, gather_bld.new_sub()); | |
200 | } | |
201 | } | |
202 | ||
11fdf7f2 | 203 | ceph_assert(g_conf()->mds_kill_journal_expire_at != 4); |
7c673cae | 204 | |
7c673cae FG |
205 | // idalloc |
206 | if (inotablev > mds->inotable->get_committed_version()) { | |
207 | dout(10) << "try_to_expire saving inotable table, need " << inotablev | |
208 | << ", committed is " << mds->inotable->get_committed_version() | |
209 | << " (" << mds->inotable->get_committing_version() << ")" | |
210 | << dendl; | |
211 | mds->inotable->save(gather_bld.new_sub(), inotablev); | |
212 | } | |
213 | ||
214 | // sessionmap | |
215 | if (sessionmapv > mds->sessionmap.get_committed()) { | |
216 | dout(10) << "try_to_expire saving sessionmap, need " << sessionmapv | |
217 | << ", committed is " << mds->sessionmap.get_committed() | |
218 | << " (" << mds->sessionmap.get_committing() << ")" | |
219 | << dendl; | |
220 | mds->sessionmap.save(gather_bld.new_sub(), sessionmapv); | |
221 | } | |
222 | ||
223 | // updates to sessions for completed_requests | |
224 | mds->sessionmap.save_if_dirty(touched_sessions, &gather_bld); | |
225 | touched_sessions.clear(); | |
226 | ||
227 | // pending commit atids | |
228 | for (map<int, ceph::unordered_set<version_t> >::iterator p = pending_commit_tids.begin(); | |
229 | p != pending_commit_tids.end(); | |
230 | ++p) { | |
231 | MDSTableClient *client = mds->get_table_client(p->first); | |
11fdf7f2 | 232 | ceph_assert(client); |
7c673cae FG |
233 | for (ceph::unordered_set<version_t>::iterator q = p->second.begin(); |
234 | q != p->second.end(); | |
235 | ++q) { | |
236 | dout(10) << "try_to_expire " << get_mdstable_name(p->first) << " transaction " << *q | |
237 | << " pending commit (not yet acked), waiting" << dendl; | |
11fdf7f2 | 238 | ceph_assert(!client->has_committed(*q)); |
7c673cae FG |
239 | client->wait_for_ack(*q, gather_bld.new_sub()); |
240 | } | |
241 | } | |
242 | ||
243 | // table servers | |
244 | for (map<int, version_t>::iterator p = tablev.begin(); | |
245 | p != tablev.end(); | |
246 | ++p) { | |
247 | MDSTableServer *server = mds->get_table_server(p->first); | |
11fdf7f2 | 248 | ceph_assert(server); |
7c673cae FG |
249 | if (p->second > server->get_committed_version()) { |
250 | dout(10) << "try_to_expire waiting for " << get_mdstable_name(p->first) | |
251 | << " to save, need " << p->second << dendl; | |
252 | server->save(gather_bld.new_sub()); | |
253 | } | |
254 | } | |
255 | ||
256 | // truncating | |
257 | for (set<CInode*>::iterator p = truncating_inodes.begin(); | |
258 | p != truncating_inodes.end(); | |
259 | ++p) { | |
260 | dout(10) << "try_to_expire waiting for truncate of " << **p << dendl; | |
261 | (*p)->add_waiter(CInode::WAIT_TRUNC, gather_bld.new_sub()); | |
262 | } | |
9f95a23c TL |
263 | // purge inodes |
264 | dout(10) << "try_to_expire waiting for purge of " << purge_inodes << dendl; | |
265 | if (purge_inodes.size()) | |
266 | set_purged_cb(gather_bld.new_sub()); | |
7c673cae FG |
267 | |
268 | if (gather_bld.has_subs()) { | |
269 | dout(6) << "LogSegment(" << seq << "/" << offset << ").try_to_expire waiting" << dendl; | |
270 | mds->mdlog->flush(); | |
271 | } else { | |
11fdf7f2 | 272 | ceph_assert(g_conf()->mds_kill_journal_expire_at != 5); |
7c673cae FG |
273 | dout(6) << "LogSegment(" << seq << "/" << offset << ").try_to_expire success" << dendl; |
274 | } | |
275 | } | |
276 | ||
7c673cae FG |
277 | // ----------------------- |
278 | // EMetaBlob | |
279 | ||
7c673cae FG |
280 | void EMetaBlob::add_dir_context(CDir *dir, int mode) |
281 | { | |
282 | MDSRank *mds = dir->cache->mds; | |
283 | ||
284 | list<CDentry*> parents; | |
285 | ||
286 | // it may be okay not to include the maybe items, if | |
287 | // - we journaled the maybe child inode in this segment | |
288 | // - that subtree turns out to be unambiguously auth | |
289 | list<CDentry*> maybe; | |
290 | bool maybenot = false; | |
291 | ||
292 | while (true) { | |
293 | // already have this dir? (we must always add in order) | |
294 | if (lump_map.count(dir->dirfrag())) { | |
295 | dout(20) << "EMetaBlob::add_dir_context(" << dir << ") have lump " << dir->dirfrag() << dendl; | |
296 | break; | |
297 | } | |
298 | ||
299 | // stop at root/stray | |
300 | CInode *diri = dir->get_inode(); | |
301 | CDentry *parent = diri->get_projected_parent_dn(); | |
302 | ||
303 | if (mode == TO_AUTH_SUBTREE_ROOT) { | |
304 | // subtree root? | |
31f18b77 FG |
305 | if (dir->is_subtree_root()) { |
306 | // match logic in MDCache::create_subtree_map() | |
307 | if (dir->get_dir_auth().first == mds->get_nodeid()) { | |
308 | mds_authority_t parent_auth = parent ? parent->authority() : CDIR_AUTH_UNDEF; | |
309 | if (parent_auth.first == dir->get_dir_auth().first) { | |
310 | if (parent_auth.second == CDIR_AUTH_UNKNOWN && | |
311 | !dir->is_ambiguous_dir_auth() && | |
312 | !dir->state_test(CDir::STATE_EXPORTBOUND) && | |
313 | !dir->state_test(CDir::STATE_AUXSUBTREE) && | |
314 | !diri->state_test(CInode::STATE_AMBIGUOUSAUTH)) { | |
315 | dout(0) << "EMetaBlob::add_dir_context unexpected subtree " << *dir << dendl; | |
11fdf7f2 | 316 | ceph_abort(); |
31f18b77 FG |
317 | } |
318 | dout(20) << "EMetaBlob::add_dir_context(" << dir << ") ambiguous or transient subtree " << dendl; | |
7c673cae FG |
319 | } else { |
320 | // it's an auth subtree, we don't need maybe (if any), and we're done. | |
321 | dout(20) << "EMetaBlob::add_dir_context(" << dir << ") reached unambig auth subtree, don't need " << maybe | |
322 | << " at " << *dir << dendl; | |
323 | maybe.clear(); | |
324 | break; | |
325 | } | |
326 | } else { | |
327 | dout(20) << "EMetaBlob::add_dir_context(" << dir << ") reached ambig or !auth subtree, need " << maybe | |
328 | << " at " << *dir << dendl; | |
329 | // we need the maybe list after all! | |
330 | parents.splice(parents.begin(), maybe); | |
331 | maybenot = false; | |
332 | } | |
333 | } | |
31f18b77 | 334 | |
7c673cae FG |
335 | // was the inode journaled in this blob? |
336 | if (event_seq && diri->last_journaled == event_seq) { | |
337 | dout(20) << "EMetaBlob::add_dir_context(" << dir << ") already have diri this blob " << *diri << dendl; | |
338 | break; | |
339 | } | |
340 | ||
341 | // have we journaled this inode since the last subtree map? | |
342 | if (!maybenot && last_subtree_map && diri->last_journaled >= last_subtree_map) { | |
343 | dout(20) << "EMetaBlob::add_dir_context(" << dir << ") already have diri in this segment (" | |
344 | << diri->last_journaled << " >= " << last_subtree_map << "), setting maybenot flag " | |
345 | << *diri << dendl; | |
346 | maybenot = true; | |
347 | } | |
348 | } | |
349 | ||
350 | if (!parent) | |
351 | break; | |
352 | ||
353 | if (maybenot) { | |
354 | dout(25) << "EMetaBlob::add_dir_context(" << dir << ") maybe " << *parent << dendl; | |
355 | maybe.push_front(parent); | |
356 | } else { | |
357 | dout(25) << "EMetaBlob::add_dir_context(" << dir << ") definitely " << *parent << dendl; | |
358 | parents.push_front(parent); | |
359 | } | |
360 | ||
361 | dir = parent->get_dir(); | |
362 | } | |
363 | ||
364 | parents.splice(parents.begin(), maybe); | |
365 | ||
366 | dout(20) << "EMetaBlob::add_dir_context final: " << parents << dendl; | |
9f95a23c TL |
367 | for (const auto& dentry : parents) { |
368 | ceph_assert(dentry->get_projected_linkage()->is_primary()); | |
369 | add_dentry(dentry, false); | |
7c673cae FG |
370 | } |
371 | } | |
372 | ||
373 | void EMetaBlob::update_segment(LogSegment *ls) | |
374 | { | |
375 | // dirty inode mtimes | |
376 | // -> handled directly by Server.cc, replay() | |
377 | ||
378 | // alloc table update? | |
379 | if (inotablev) | |
380 | ls->inotablev = inotablev; | |
381 | if (sessionmapv) | |
382 | ls->sessionmapv = sessionmapv; | |
383 | ||
384 | // truncated inodes | |
385 | // -> handled directly by Server.cc | |
386 | ||
387 | // client requests | |
388 | // note the newest request per client | |
389 | //if (!client_reqs.empty()) | |
390 | // ls->last_client_tid[client_reqs.rbegin()->client] = client_reqs.rbegin()->tid); | |
391 | } | |
392 | ||
393 | // EMetaBlob::fullbit | |
394 | ||
395 | void EMetaBlob::fullbit::encode(bufferlist& bl, uint64_t features) const { | |
396 | ENCODE_START(8, 5, bl); | |
11fdf7f2 TL |
397 | encode(dn, bl); |
398 | encode(dnfirst, bl); | |
399 | encode(dnlast, bl); | |
400 | encode(dnv, bl); | |
401 | encode(inode, bl, features); | |
402 | encode(xattrs, bl); | |
7c673cae | 403 | if (inode.is_symlink()) |
11fdf7f2 | 404 | encode(symlink, bl); |
7c673cae | 405 | if (inode.is_dir()) { |
11fdf7f2 TL |
406 | encode(dirfragtree, bl); |
407 | encode(snapbl, bl); | |
7c673cae | 408 | } |
11fdf7f2 | 409 | encode(state, bl); |
7c673cae | 410 | if (old_inodes.empty()) { |
11fdf7f2 | 411 | encode(false, bl); |
7c673cae | 412 | } else { |
11fdf7f2 TL |
413 | encode(true, bl); |
414 | encode(old_inodes, bl, features); | |
7c673cae FG |
415 | } |
416 | if (!inode.is_dir()) | |
11fdf7f2 TL |
417 | encode(snapbl, bl); |
418 | encode(oldest_snap, bl); | |
7c673cae FG |
419 | ENCODE_FINISH(bl); |
420 | } | |
421 | ||
11fdf7f2 | 422 | void EMetaBlob::fullbit::decode(bufferlist::const_iterator &bl) { |
7c673cae | 423 | DECODE_START_LEGACY_COMPAT_LEN(7, 5, 5, bl); |
11fdf7f2 TL |
424 | decode(dn, bl); |
425 | decode(dnfirst, bl); | |
426 | decode(dnlast, bl); | |
427 | decode(dnv, bl); | |
428 | decode(inode, bl); | |
e306af50 | 429 | decode_noshare(xattrs, bl); |
7c673cae | 430 | if (inode.is_symlink()) |
11fdf7f2 | 431 | decode(symlink, bl); |
7c673cae | 432 | if (inode.is_dir()) { |
11fdf7f2 TL |
433 | decode(dirfragtree, bl); |
434 | decode(snapbl, bl); | |
7c673cae FG |
435 | if ((struct_v == 2) || (struct_v == 3)) { |
436 | bool dir_layout_exists; | |
11fdf7f2 | 437 | decode(dir_layout_exists, bl); |
7c673cae FG |
438 | if (dir_layout_exists) { |
439 | __u8 dir_struct_v; | |
11fdf7f2 TL |
440 | decode(dir_struct_v, bl); // default_file_layout version |
441 | decode(inode.layout, bl); // and actual layout, that we care about | |
7c673cae FG |
442 | } |
443 | } | |
444 | } | |
445 | if (struct_v >= 6) { | |
11fdf7f2 | 446 | decode(state, bl); |
7c673cae FG |
447 | } else { |
448 | bool dirty; | |
11fdf7f2 | 449 | decode(dirty, bl); |
7c673cae FG |
450 | state = dirty ? EMetaBlob::fullbit::STATE_DIRTY : 0; |
451 | } | |
452 | ||
453 | if (struct_v >= 3) { | |
454 | bool old_inodes_present; | |
11fdf7f2 | 455 | decode(old_inodes_present, bl); |
7c673cae | 456 | if (old_inodes_present) { |
11fdf7f2 | 457 | decode(old_inodes, bl); |
7c673cae FG |
458 | } |
459 | } | |
460 | if (!inode.is_dir()) { | |
461 | if (struct_v >= 7) | |
11fdf7f2 | 462 | decode(snapbl, bl); |
7c673cae FG |
463 | } |
464 | if (struct_v >= 8) | |
11fdf7f2 | 465 | decode(oldest_snap, bl); |
7c673cae FG |
466 | else |
467 | oldest_snap = CEPH_NOSNAP; | |
468 | ||
469 | DECODE_FINISH(bl); | |
470 | } | |
471 | ||
472 | void EMetaBlob::fullbit::dump(Formatter *f) const | |
473 | { | |
474 | f->dump_string("dentry", dn); | |
475 | f->dump_stream("snapid.first") << dnfirst; | |
476 | f->dump_stream("snapid.last") << dnlast; | |
477 | f->dump_int("dentry version", dnv); | |
478 | f->open_object_section("inode"); | |
479 | inode.dump(f); | |
480 | f->close_section(); // inode | |
481 | f->open_object_section("xattrs"); | |
94b18763 FG |
482 | for (const auto &p : xattrs) { |
483 | std::string s(p.second.c_str(), p.second.length()); | |
484 | f->dump_string(p.first.c_str(), s); | |
7c673cae FG |
485 | } |
486 | f->close_section(); // xattrs | |
487 | if (inode.is_symlink()) { | |
488 | f->dump_string("symlink", symlink); | |
489 | } | |
490 | if (inode.is_dir()) { | |
491 | f->dump_stream("frag tree") << dirfragtree; | |
492 | f->dump_string("has_snapbl", snapbl.length() ? "true" : "false"); | |
493 | if (inode.has_layout()) { | |
494 | f->open_object_section("file layout policy"); | |
495 | // FIXME | |
496 | f->dump_string("layout", "the layout exists"); | |
497 | f->close_section(); // file layout policy | |
498 | } | |
499 | } | |
500 | f->dump_string("state", state_string()); | |
501 | if (!old_inodes.empty()) { | |
502 | f->open_array_section("old inodes"); | |
94b18763 | 503 | for (const auto &p : old_inodes) { |
7c673cae | 504 | f->open_object_section("inode"); |
94b18763 FG |
505 | f->dump_int("snapid", p.first); |
506 | p.second.dump(f); | |
7c673cae FG |
507 | f->close_section(); // inode |
508 | } | |
509 | f->close_section(); // old inodes | |
510 | } | |
511 | } | |
512 | ||
9f95a23c | 513 | void EMetaBlob::fullbit::generate_test_instances(std::list<EMetaBlob::fullbit*>& ls) |
7c673cae | 514 | { |
94b18763 | 515 | CInode::mempool_inode inode; |
7c673cae | 516 | fragtree_t fragtree; |
94b18763 | 517 | CInode::mempool_xattr_map empty_xattrs; |
7c673cae FG |
518 | bufferlist empty_snapbl; |
519 | fullbit *sample = new fullbit("/testdn", 0, 0, 0, | |
520 | inode, fragtree, empty_xattrs, "", 0, empty_snapbl, | |
521 | false, NULL); | |
522 | ls.push_back(sample); | |
523 | } | |
524 | ||
525 | void EMetaBlob::fullbit::update_inode(MDSRank *mds, CInode *in) | |
526 | { | |
527 | in->inode = inode; | |
528 | in->xattrs = xattrs; | |
529 | if (in->inode.is_dir()) { | |
f6b5b4d7 TL |
530 | if (is_export_ephemeral_random()) { |
531 | dout(15) << "random ephemeral pin on " << *in << dendl; | |
532 | in->set_ephemeral_rand(true); | |
533 | in->maybe_ephemeral_rand(true); | |
534 | } | |
535 | in->maybe_ephemeral_dist(); | |
536 | in->maybe_export_pin(); | |
7c673cae FG |
537 | if (!(in->dirfragtree == dirfragtree)) { |
538 | dout(10) << "EMetaBlob::fullbit::update_inode dft " << in->dirfragtree << " -> " | |
539 | << dirfragtree << " on " << *in << dendl; | |
540 | in->dirfragtree = dirfragtree; | |
541 | in->force_dirfrags(); | |
9f95a23c TL |
542 | if (in->get_num_dirfrags() && in->authority() == CDIR_AUTH_UNDEF) { |
543 | auto&& ls = in->get_nested_dirfrags(); | |
544 | for (const auto& dir : ls) { | |
7c673cae FG |
545 | if (dir->get_num_any() == 0 && |
546 | mds->mdcache->can_trim_non_auth_dirfrag(dir)) { | |
547 | dout(10) << " closing empty non-auth dirfrag " << *dir << dendl; | |
548 | in->close_dirfrag(dir->get_frag()); | |
549 | } | |
550 | } | |
551 | } | |
552 | } | |
553 | } else if (in->inode.is_symlink()) { | |
11fdf7f2 | 554 | in->symlink = symlink; |
7c673cae FG |
555 | } |
556 | in->old_inodes = old_inodes; | |
557 | if (!in->old_inodes.empty()) { | |
558 | snapid_t min_first = in->old_inodes.rbegin()->first + 1; | |
559 | if (min_first > in->first) | |
560 | in->first = min_first; | |
561 | } | |
562 | ||
563 | /* | |
564 | * we can do this before linking hte inode bc the split_at would | |
565 | * be a no-op.. we have no children (namely open snaprealms) to | |
566 | * divy up | |
567 | */ | |
568 | in->oldest_snap = oldest_snap; | |
569 | in->decode_snap_blob(snapbl); | |
570 | ||
571 | /* | |
572 | * In case there was anything malformed in the journal that we are | |
573 | * replaying, do sanity checks on the inodes we're replaying and | |
574 | * go damaged instead of letting any trash into a live cache | |
575 | */ | |
576 | if (in->is_file()) { | |
577 | // Files must have valid layouts with a pool set | |
578 | if (in->inode.layout.pool_id == -1 || !in->inode.layout.is_valid()) { | |
579 | dout(0) << "EMetaBlob.replay invalid layout on ino " << *in | |
580 | << ": " << in->inode.layout << dendl; | |
581 | std::ostringstream oss; | |
11fdf7f2 | 582 | oss << "Invalid layout for inode " << in->ino() << " in journal"; |
7c673cae FG |
583 | mds->clog->error() << oss.str(); |
584 | mds->damaged(); | |
585 | ceph_abort(); // Should be unreachable because damaged() calls respawn() | |
586 | } | |
587 | } | |
588 | } | |
589 | ||
590 | // EMetaBlob::remotebit | |
591 | ||
592 | void EMetaBlob::remotebit::encode(bufferlist& bl) const | |
593 | { | |
594 | ENCODE_START(2, 2, bl); | |
11fdf7f2 TL |
595 | encode(dn, bl); |
596 | encode(dnfirst, bl); | |
597 | encode(dnlast, bl); | |
598 | encode(dnv, bl); | |
599 | encode(ino, bl); | |
600 | encode(d_type, bl); | |
601 | encode(dirty, bl); | |
7c673cae FG |
602 | ENCODE_FINISH(bl); |
603 | } | |
604 | ||
11fdf7f2 | 605 | void EMetaBlob::remotebit::decode(bufferlist::const_iterator &bl) |
7c673cae FG |
606 | { |
607 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); | |
11fdf7f2 TL |
608 | decode(dn, bl); |
609 | decode(dnfirst, bl); | |
610 | decode(dnlast, bl); | |
611 | decode(dnv, bl); | |
612 | decode(ino, bl); | |
613 | decode(d_type, bl); | |
614 | decode(dirty, bl); | |
7c673cae FG |
615 | DECODE_FINISH(bl); |
616 | } | |
617 | ||
618 | void EMetaBlob::remotebit::dump(Formatter *f) const | |
619 | { | |
620 | f->dump_string("dentry", dn); | |
621 | f->dump_int("snapid.first", dnfirst); | |
622 | f->dump_int("snapid.last", dnlast); | |
623 | f->dump_int("dentry version", dnv); | |
624 | f->dump_int("inodeno", ino); | |
625 | uint32_t type = DTTOIF(d_type) & S_IFMT; // convert to type entries | |
626 | string type_string; | |
627 | switch(type) { | |
628 | case S_IFREG: | |
629 | type_string = "file"; break; | |
630 | case S_IFLNK: | |
631 | type_string = "symlink"; break; | |
632 | case S_IFDIR: | |
633 | type_string = "directory"; break; | |
634 | case S_IFIFO: | |
635 | type_string = "fifo"; break; | |
636 | case S_IFCHR: | |
637 | type_string = "chr"; break; | |
638 | case S_IFBLK: | |
639 | type_string = "blk"; break; | |
640 | case S_IFSOCK: | |
641 | type_string = "sock"; break; | |
642 | default: | |
643 | assert (0 == "unknown d_type!"); | |
644 | } | |
645 | f->dump_string("d_type", type_string); | |
646 | f->dump_string("dirty", dirty ? "true" : "false"); | |
647 | } | |
648 | ||
649 | void EMetaBlob::remotebit:: | |
9f95a23c | 650 | generate_test_instances(std::list<EMetaBlob::remotebit*>& ls) |
7c673cae FG |
651 | { |
652 | remotebit *remote = new remotebit("/test/dn", 0, 10, 15, 1, IFTODT(S_IFREG), false); | |
653 | ls.push_back(remote); | |
654 | } | |
655 | ||
656 | // EMetaBlob::nullbit | |
657 | ||
658 | void EMetaBlob::nullbit::encode(bufferlist& bl) const | |
659 | { | |
660 | ENCODE_START(2, 2, bl); | |
11fdf7f2 TL |
661 | encode(dn, bl); |
662 | encode(dnfirst, bl); | |
663 | encode(dnlast, bl); | |
664 | encode(dnv, bl); | |
665 | encode(dirty, bl); | |
7c673cae FG |
666 | ENCODE_FINISH(bl); |
667 | } | |
668 | ||
11fdf7f2 | 669 | void EMetaBlob::nullbit::decode(bufferlist::const_iterator &bl) |
7c673cae FG |
670 | { |
671 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); | |
11fdf7f2 TL |
672 | decode(dn, bl); |
673 | decode(dnfirst, bl); | |
674 | decode(dnlast, bl); | |
675 | decode(dnv, bl); | |
676 | decode(dirty, bl); | |
7c673cae FG |
677 | DECODE_FINISH(bl); |
678 | } | |
679 | ||
680 | void EMetaBlob::nullbit::dump(Formatter *f) const | |
681 | { | |
682 | f->dump_string("dentry", dn); | |
683 | f->dump_int("snapid.first", dnfirst); | |
684 | f->dump_int("snapid.last", dnlast); | |
685 | f->dump_int("dentry version", dnv); | |
686 | f->dump_string("dirty", dirty ? "true" : "false"); | |
687 | } | |
688 | ||
9f95a23c | 689 | void EMetaBlob::nullbit::generate_test_instances(std::list<nullbit*>& ls) |
7c673cae FG |
690 | { |
691 | nullbit *sample = new nullbit("/test/dentry", 0, 10, 15, false); | |
692 | nullbit *sample2 = new nullbit("/test/dirty", 10, 20, 25, true); | |
693 | ls.push_back(sample); | |
694 | ls.push_back(sample2); | |
695 | } | |
696 | ||
697 | // EMetaBlob::dirlump | |
698 | ||
699 | void EMetaBlob::dirlump::encode(bufferlist& bl, uint64_t features) const | |
700 | { | |
701 | ENCODE_START(2, 2, bl); | |
11fdf7f2 TL |
702 | encode(fnode, bl); |
703 | encode(state, bl); | |
704 | encode(nfull, bl); | |
705 | encode(nremote, bl); | |
706 | encode(nnull, bl); | |
7c673cae | 707 | _encode_bits(features); |
11fdf7f2 | 708 | encode(dnbl, bl); |
7c673cae FG |
709 | ENCODE_FINISH(bl); |
710 | } | |
711 | ||
11fdf7f2 | 712 | void EMetaBlob::dirlump::decode(bufferlist::const_iterator &bl) |
7c673cae FG |
713 | { |
714 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl) | |
11fdf7f2 TL |
715 | decode(fnode, bl); |
716 | decode(state, bl); | |
717 | decode(nfull, bl); | |
718 | decode(nremote, bl); | |
719 | decode(nnull, bl); | |
720 | decode(dnbl, bl); | |
7c673cae FG |
721 | dn_decoded = false; // don't decode bits unless we need them. |
722 | DECODE_FINISH(bl); | |
723 | } | |
724 | ||
725 | void EMetaBlob::dirlump::dump(Formatter *f) const | |
726 | { | |
727 | if (!dn_decoded) { | |
728 | dirlump *me = const_cast<dirlump*>(this); | |
729 | me->_decode_bits(); | |
730 | } | |
731 | f->open_object_section("fnode"); | |
732 | fnode.dump(f); | |
733 | f->close_section(); // fnode | |
734 | f->dump_string("state", state_string()); | |
735 | f->dump_int("nfull", nfull); | |
736 | f->dump_int("nremote", nremote); | |
737 | f->dump_int("nnull", nnull); | |
738 | ||
739 | f->open_array_section("full bits"); | |
11fdf7f2 | 740 | for (const auto& iter : dfull) { |
7c673cae | 741 | f->open_object_section("fullbit"); |
11fdf7f2 | 742 | iter.dump(f); |
7c673cae FG |
743 | f->close_section(); // fullbit |
744 | } | |
745 | f->close_section(); // full bits | |
746 | f->open_array_section("remote bits"); | |
11fdf7f2 | 747 | for (const auto& iter : dremote) { |
7c673cae | 748 | f->open_object_section("remotebit"); |
11fdf7f2 | 749 | iter.dump(f); |
7c673cae FG |
750 | f->close_section(); // remotebit |
751 | } | |
752 | f->close_section(); // remote bits | |
753 | f->open_array_section("null bits"); | |
11fdf7f2 | 754 | for (const auto& iter : dnull) { |
7c673cae | 755 | f->open_object_section("null bit"); |
11fdf7f2 | 756 | iter.dump(f); |
7c673cae FG |
757 | f->close_section(); // null bit |
758 | } | |
759 | f->close_section(); // null bits | |
760 | } | |
761 | ||
9f95a23c | 762 | void EMetaBlob::dirlump::generate_test_instances(std::list<dirlump*>& ls) |
7c673cae FG |
763 | { |
764 | ls.push_back(new dirlump()); | |
765 | } | |
766 | ||
767 | /** | |
768 | * EMetaBlob proper | |
769 | */ | |
770 | void EMetaBlob::encode(bufferlist& bl, uint64_t features) const | |
771 | { | |
772 | ENCODE_START(8, 5, bl); | |
11fdf7f2 TL |
773 | encode(lump_order, bl); |
774 | encode(lump_map, bl, features); | |
775 | encode(roots, bl, features); | |
776 | encode(table_tids, bl); | |
777 | encode(opened_ino, bl); | |
778 | encode(allocated_ino, bl); | |
779 | encode(used_preallocated_ino, bl); | |
780 | encode(preallocated_inos, bl); | |
781 | encode(client_name, bl); | |
782 | encode(inotablev, bl); | |
783 | encode(sessionmapv, bl); | |
784 | encode(truncate_start, bl); | |
785 | encode(truncate_finish, bl); | |
786 | encode(destroyed_inodes, bl); | |
787 | encode(client_reqs, bl); | |
788 | encode(renamed_dirino, bl); | |
789 | encode(renamed_dir_frags, bl); | |
7c673cae FG |
790 | { |
791 | // make MDSRank use v6 format happy | |
792 | int64_t i = -1; | |
793 | bool b = false; | |
11fdf7f2 TL |
794 | encode(i, bl); |
795 | encode(b, bl); | |
7c673cae | 796 | } |
11fdf7f2 | 797 | encode(client_flushes, bl); |
7c673cae FG |
798 | ENCODE_FINISH(bl); |
799 | } | |
11fdf7f2 | 800 | void EMetaBlob::decode(bufferlist::const_iterator &bl) |
7c673cae | 801 | { |
9f95a23c | 802 | DECODE_START_LEGACY_COMPAT_LEN(8, 5, 5, bl); |
11fdf7f2 TL |
803 | decode(lump_order, bl); |
804 | decode(lump_map, bl); | |
7c673cae | 805 | if (struct_v >= 4) { |
11fdf7f2 | 806 | decode(roots, bl); |
7c673cae FG |
807 | } else { |
808 | bufferlist rootbl; | |
11fdf7f2 | 809 | decode(rootbl, bl); |
7c673cae | 810 | if (rootbl.length()) { |
11fdf7f2 TL |
811 | auto p = rootbl.cbegin(); |
812 | roots.emplace_back(p); | |
7c673cae FG |
813 | } |
814 | } | |
11fdf7f2 TL |
815 | decode(table_tids, bl); |
816 | decode(opened_ino, bl); | |
817 | decode(allocated_ino, bl); | |
818 | decode(used_preallocated_ino, bl); | |
819 | decode(preallocated_inos, bl); | |
820 | decode(client_name, bl); | |
821 | decode(inotablev, bl); | |
822 | decode(sessionmapv, bl); | |
823 | decode(truncate_start, bl); | |
824 | decode(truncate_finish, bl); | |
825 | decode(destroyed_inodes, bl); | |
7c673cae | 826 | if (struct_v >= 2) { |
11fdf7f2 | 827 | decode(client_reqs, bl); |
7c673cae FG |
828 | } else { |
829 | list<metareqid_t> r; | |
11fdf7f2 | 830 | decode(r, bl); |
7c673cae FG |
831 | while (!r.empty()) { |
832 | client_reqs.push_back(pair<metareqid_t,uint64_t>(r.front(), 0)); | |
833 | r.pop_front(); | |
834 | } | |
835 | } | |
836 | if (struct_v >= 3) { | |
11fdf7f2 TL |
837 | decode(renamed_dirino, bl); |
838 | decode(renamed_dir_frags, bl); | |
7c673cae FG |
839 | } |
840 | if (struct_v >= 6) { | |
841 | // ignore | |
842 | int64_t i; | |
843 | bool b; | |
11fdf7f2 TL |
844 | decode(i, bl); |
845 | decode(b, bl); | |
7c673cae FG |
846 | } |
847 | if (struct_v >= 8) { | |
11fdf7f2 | 848 | decode(client_flushes, bl); |
7c673cae FG |
849 | } |
850 | DECODE_FINISH(bl); | |
851 | } | |
852 | ||
853 | ||
854 | /** | |
855 | * Get all inodes touched by this metablob. Includes the 'bits' within | |
856 | * dirlumps, and the inodes of the dirs themselves. | |
857 | */ | |
858 | void EMetaBlob::get_inodes( | |
859 | std::set<inodeno_t> &inodes) const | |
860 | { | |
861 | // For all dirlumps in this metablob | |
862 | for (std::map<dirfrag_t, dirlump>::const_iterator i = lump_map.begin(); i != lump_map.end(); ++i) { | |
863 | // Record inode of dirlump | |
864 | inodeno_t const dir_ino = i->first.ino; | |
865 | inodes.insert(dir_ino); | |
866 | ||
867 | // Decode dirlump bits | |
868 | dirlump const &dl = i->second; | |
869 | dl._decode_bits(); | |
870 | ||
871 | // Record inodes of fullbits | |
11fdf7f2 TL |
872 | for (const auto& iter : dl.get_dfull()) { |
873 | inodes.insert(iter.inode.ino); | |
7c673cae FG |
874 | } |
875 | ||
876 | // Record inodes of remotebits | |
11fdf7f2 TL |
877 | for (const auto& iter : dl.get_dremote()) { |
878 | inodes.insert(iter.ino); | |
7c673cae FG |
879 | } |
880 | } | |
881 | } | |
882 | ||
883 | ||
884 | /** | |
885 | * Get a map of dirfrag to set of dentries in that dirfrag which are | |
886 | * touched in this operation. | |
887 | */ | |
888 | void EMetaBlob::get_dentries(std::map<dirfrag_t, std::set<std::string> > &dentries) const | |
889 | { | |
890 | for (std::map<dirfrag_t, dirlump>::const_iterator i = lump_map.begin(); i != lump_map.end(); ++i) { | |
891 | dirlump const &dl = i->second; | |
892 | dirfrag_t const &df = i->first; | |
893 | ||
894 | // Get all bits | |
895 | dl._decode_bits(); | |
7c673cae FG |
896 | |
897 | // For all bits, store dentry | |
11fdf7f2 TL |
898 | for (const auto& iter : dl.get_dfull()) { |
899 | dentries[df].insert(iter.dn); | |
7c673cae | 900 | } |
11fdf7f2 TL |
901 | for (const auto& iter : dl.get_dremote()) { |
902 | dentries[df].insert(iter.dn); | |
7c673cae | 903 | } |
11fdf7f2 TL |
904 | for (const auto& iter : dl.get_dnull()) { |
905 | dentries[df].insert(iter.dn); | |
7c673cae FG |
906 | } |
907 | } | |
908 | } | |
909 | ||
910 | ||
911 | ||
912 | /** | |
913 | * Calculate all paths that we can infer are touched by this metablob. Only uses | |
914 | * information local to this metablob so it may only be the path within the | |
915 | * subtree. | |
916 | */ | |
917 | void EMetaBlob::get_paths( | |
918 | std::vector<std::string> &paths) const | |
919 | { | |
920 | // Each dentry has a 'location' which is a 2-tuple of parent inode and dentry name | |
921 | typedef std::pair<inodeno_t, std::string> Location; | |
922 | ||
923 | // Whenever we see a dentry within a dirlump, we remember it as a child of | |
924 | // the dirlump's inode | |
9f95a23c | 925 | std::map<inodeno_t, std::vector<std::string> > children; |
7c673cae FG |
926 | |
927 | // Whenever we see a location for an inode, remember it: this allows us to | |
928 | // build a path given an inode | |
929 | std::map<inodeno_t, Location> ino_locations; | |
930 | ||
931 | // Special case: operations on root inode populate roots but not dirlumps | |
932 | if (lump_map.empty() && !roots.empty()) { | |
933 | paths.push_back("/"); | |
934 | return; | |
935 | } | |
936 | ||
937 | // First pass | |
938 | // ========== | |
939 | // Build a tiny local metadata cache for the path structure in this metablob | |
940 | for (std::map<dirfrag_t, dirlump>::const_iterator i = lump_map.begin(); i != lump_map.end(); ++i) { | |
941 | inodeno_t const dir_ino = i->first.ino; | |
942 | dirlump const &dl = i->second; | |
943 | dl._decode_bits(); | |
944 | ||
11fdf7f2 TL |
945 | for (const auto& iter : dl.get_dfull()) { |
946 | std::string_view dentry = iter.dn; | |
94b18763 | 947 | children[dir_ino].emplace_back(dentry); |
11fdf7f2 | 948 | ino_locations[iter.inode.ino] = Location(dir_ino, dentry); |
7c673cae FG |
949 | } |
950 | ||
11fdf7f2 TL |
951 | for (const auto& iter : dl.get_dremote()) { |
952 | std::string_view dentry = iter.dn; | |
94b18763 | 953 | children[dir_ino].emplace_back(dentry); |
7c673cae FG |
954 | } |
955 | ||
11fdf7f2 TL |
956 | for (const auto& iter : dl.get_dnull()) { |
957 | std::string_view dentry = iter.dn; | |
94b18763 | 958 | children[dir_ino].emplace_back(dentry); |
7c673cae FG |
959 | } |
960 | } | |
961 | ||
962 | std::vector<Location> leaf_locations; | |
963 | ||
964 | // Second pass | |
965 | // =========== | |
966 | // Output paths for all childless nodes in the metablob | |
967 | for (std::map<dirfrag_t, dirlump>::const_iterator i = lump_map.begin(); i != lump_map.end(); ++i) { | |
968 | inodeno_t const dir_ino = i->first.ino; | |
969 | dirlump const &dl = i->second; | |
970 | dl._decode_bits(); | |
971 | ||
11fdf7f2 TL |
972 | for (const auto& iter : dl.get_dfull()) { |
973 | std::string_view dentry = iter.dn; | |
974 | if (children.find(iter.inode.ino) == children.end()) { | |
975 | leaf_locations.push_back(Location(dir_ino, dentry)); | |
7c673cae FG |
976 | } |
977 | } | |
978 | ||
11fdf7f2 TL |
979 | for (const auto& iter : dl.get_dremote()) { |
980 | std::string_view dentry = iter.dn; | |
981 | leaf_locations.push_back(Location(dir_ino, dentry)); | |
7c673cae FG |
982 | } |
983 | ||
11fdf7f2 TL |
984 | for (const auto& iter : dl.get_dnull()) { |
985 | std::string_view dentry = iter.dn; | |
986 | leaf_locations.push_back(Location(dir_ino, dentry)); | |
7c673cae FG |
987 | } |
988 | } | |
989 | ||
990 | // For all the leaf locations identified, generate paths | |
991 | for (std::vector<Location>::iterator i = leaf_locations.begin(); i != leaf_locations.end(); ++i) { | |
992 | Location const &loc = *i; | |
993 | std::string path = loc.second; | |
994 | inodeno_t ino = loc.first; | |
11fdf7f2 TL |
995 | std::map<inodeno_t, Location>::iterator iter = ino_locations.find(ino); |
996 | while(iter != ino_locations.end()) { | |
997 | Location const &loc = iter->second; | |
7c673cae FG |
998 | if (!path.empty()) { |
999 | path = loc.second + "/" + path; | |
1000 | } else { | |
1001 | path = loc.second + path; | |
1002 | } | |
11fdf7f2 | 1003 | iter = ino_locations.find(loc.first); |
7c673cae FG |
1004 | } |
1005 | ||
1006 | paths.push_back(path); | |
1007 | } | |
1008 | } | |
1009 | ||
1010 | ||
1011 | void EMetaBlob::dump(Formatter *f) const | |
1012 | { | |
1013 | f->open_array_section("lumps"); | |
11fdf7f2 | 1014 | for (const auto& d : lump_order) { |
7c673cae FG |
1015 | f->open_object_section("lump"); |
1016 | f->open_object_section("dirfrag"); | |
11fdf7f2 | 1017 | f->dump_stream("dirfrag") << d; |
7c673cae FG |
1018 | f->close_section(); // dirfrag |
1019 | f->open_object_section("dirlump"); | |
11fdf7f2 | 1020 | lump_map.at(d).dump(f); |
7c673cae FG |
1021 | f->close_section(); // dirlump |
1022 | f->close_section(); // lump | |
1023 | } | |
1024 | f->close_section(); // lumps | |
1025 | ||
1026 | f->open_array_section("roots"); | |
11fdf7f2 | 1027 | for (const auto& iter : roots) { |
7c673cae | 1028 | f->open_object_section("root"); |
11fdf7f2 | 1029 | iter.dump(f); |
7c673cae FG |
1030 | f->close_section(); // root |
1031 | } | |
1032 | f->close_section(); // roots | |
1033 | ||
1034 | f->open_array_section("tableclient tranactions"); | |
11fdf7f2 | 1035 | for (const auto& p : table_tids) { |
7c673cae | 1036 | f->open_object_section("transaction"); |
11fdf7f2 TL |
1037 | f->dump_int("tid", p.first); |
1038 | f->dump_int("version", p.second); | |
7c673cae FG |
1039 | f->close_section(); // transaction |
1040 | } | |
1041 | f->close_section(); // tableclient transactions | |
1042 | ||
1043 | f->dump_int("renamed directory inodeno", renamed_dirino); | |
1044 | ||
1045 | f->open_array_section("renamed directory fragments"); | |
11fdf7f2 TL |
1046 | for (const auto& p : renamed_dir_frags) { |
1047 | f->dump_int("frag", p); | |
7c673cae FG |
1048 | } |
1049 | f->close_section(); // renamed directory fragments | |
1050 | ||
1051 | f->dump_int("inotable version", inotablev); | |
1052 | f->dump_int("SessionMap version", sessionmapv); | |
1053 | f->dump_int("allocated ino", allocated_ino); | |
1054 | ||
1055 | f->dump_stream("preallocated inos") << preallocated_inos; | |
1056 | f->dump_int("used preallocated ino", used_preallocated_ino); | |
1057 | ||
1058 | f->open_object_section("client name"); | |
1059 | client_name.dump(f); | |
1060 | f->close_section(); // client name | |
1061 | ||
1062 | f->open_array_section("inodes starting a truncate"); | |
11fdf7f2 TL |
1063 | for(const auto& ino : truncate_start) { |
1064 | f->dump_int("inodeno", ino); | |
7c673cae FG |
1065 | } |
1066 | f->close_section(); // truncate inodes | |
1067 | f->open_array_section("inodes finishing a truncated"); | |
11fdf7f2 | 1068 | for(const auto& p : truncate_finish) { |
7c673cae | 1069 | f->open_object_section("inode+segment"); |
11fdf7f2 TL |
1070 | f->dump_int("inodeno", p.first); |
1071 | f->dump_int("truncate starting segment", p.second); | |
7c673cae FG |
1072 | f->close_section(); // truncated inode |
1073 | } | |
1074 | f->close_section(); // truncate finish inodes | |
1075 | ||
1076 | f->open_array_section("destroyed inodes"); | |
1077 | for(vector<inodeno_t>::const_iterator i = destroyed_inodes.begin(); | |
1078 | i != destroyed_inodes.end(); ++i) { | |
1079 | f->dump_int("inodeno", *i); | |
1080 | } | |
1081 | f->close_section(); // destroyed inodes | |
1082 | ||
1083 | f->open_array_section("client requests"); | |
11fdf7f2 | 1084 | for(const auto& p : client_reqs) { |
7c673cae | 1085 | f->open_object_section("Client request"); |
11fdf7f2 TL |
1086 | f->dump_stream("request ID") << p.first; |
1087 | f->dump_int("oldest request on client", p.second); | |
7c673cae FG |
1088 | f->close_section(); // request |
1089 | } | |
1090 | f->close_section(); // client requests | |
1091 | } | |
1092 | ||
9f95a23c | 1093 | void EMetaBlob::generate_test_instances(std::list<EMetaBlob*>& ls) |
7c673cae FG |
1094 | { |
1095 | ls.push_back(new EMetaBlob()); | |
1096 | } | |
1097 | ||
1098 | void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) | |
1099 | { | |
1100 | dout(10) << "EMetaBlob.replay " << lump_map.size() << " dirlumps by " << client_name << dendl; | |
1101 | ||
11fdf7f2 | 1102 | ceph_assert(logseg); |
7c673cae | 1103 | |
11fdf7f2 | 1104 | ceph_assert(g_conf()->mds_kill_journal_replay_at != 1); |
7c673cae | 1105 | |
11fdf7f2 TL |
1106 | for (auto& p : roots) { |
1107 | CInode *in = mds->mdcache->get_inode(p.inode.ino); | |
7c673cae FG |
1108 | bool isnew = in ? false:true; |
1109 | if (!in) | |
11fdf7f2 TL |
1110 | in = new CInode(mds->mdcache, false, 2, CEPH_NOSNAP); |
1111 | p.update_inode(mds, in); | |
7c673cae FG |
1112 | |
1113 | if (isnew) | |
1114 | mds->mdcache->add_inode(in); | |
11fdf7f2 | 1115 | if (p.is_dirty()) in->_mark_dirty(logseg); |
7c673cae FG |
1116 | dout(10) << "EMetaBlob.replay " << (isnew ? " added root ":" updated root ") << *in << dendl; |
1117 | } | |
1118 | ||
1119 | CInode *renamed_diri = 0; | |
1120 | CDir *olddir = 0; | |
1121 | if (renamed_dirino) { | |
1122 | renamed_diri = mds->mdcache->get_inode(renamed_dirino); | |
1123 | if (renamed_diri) | |
1124 | dout(10) << "EMetaBlob.replay renamed inode is " << *renamed_diri << dendl; | |
1125 | else | |
1126 | dout(10) << "EMetaBlob.replay don't have renamed ino " << renamed_dirino << dendl; | |
1127 | ||
1128 | int nnull = 0; | |
11fdf7f2 TL |
1129 | for (const auto& lp : lump_order) { |
1130 | dirlump &lump = lump_map[lp]; | |
7c673cae | 1131 | if (lump.nnull) { |
11fdf7f2 | 1132 | dout(10) << "EMetaBlob.replay found null dentry in dir " << lp << dendl; |
7c673cae FG |
1133 | nnull += lump.nnull; |
1134 | } | |
1135 | } | |
11fdf7f2 | 1136 | ceph_assert(nnull <= 1); |
7c673cae FG |
1137 | } |
1138 | ||
1139 | // keep track of any inodes we unlink and don't relink elsewhere | |
1140 | map<CInode*, CDir*> unlinked; | |
1141 | set<CInode*> linked; | |
1142 | ||
1143 | // walk through my dirs (in order!) | |
f6b5b4d7 | 1144 | int count = 0; |
11fdf7f2 TL |
1145 | for (const auto& lp : lump_order) { |
1146 | dout(10) << "EMetaBlob.replay dir " << lp << dendl; | |
1147 | dirlump &lump = lump_map[lp]; | |
7c673cae FG |
1148 | |
1149 | // the dir | |
11fdf7f2 | 1150 | CDir *dir = mds->mdcache->get_force_dirfrag(lp, true); |
7c673cae FG |
1151 | if (!dir) { |
1152 | // hmm. do i have the inode? | |
11fdf7f2 | 1153 | CInode *diri = mds->mdcache->get_inode((lp).ino); |
7c673cae | 1154 | if (!diri) { |
11fdf7f2 TL |
1155 | if (MDS_INO_IS_MDSDIR(lp.ino)) { |
1156 | ceph_assert(MDS_INO_MDSDIR(mds->get_nodeid()) != lp.ino); | |
1157 | diri = mds->mdcache->create_system_inode(lp.ino, S_IFDIR|0755); | |
7c673cae FG |
1158 | diri->state_clear(CInode::STATE_AUTH); |
1159 | dout(10) << "EMetaBlob.replay created base " << *diri << dendl; | |
1160 | } else { | |
11fdf7f2 | 1161 | dout(0) << "EMetaBlob.replay missing dir ino " << lp.ino << dendl; |
7c673cae FG |
1162 | mds->clog->error() << "failure replaying journal (EMetaBlob)"; |
1163 | mds->damaged(); | |
1164 | ceph_abort(); // Should be unreachable because damaged() calls respawn() | |
1165 | } | |
1166 | } | |
1167 | ||
1168 | // create the dirfrag | |
11fdf7f2 | 1169 | dir = diri->get_or_open_dirfrag(mds->mdcache, lp.frag); |
7c673cae | 1170 | |
11fdf7f2 | 1171 | if (MDS_INO_IS_BASE(lp.ino)) |
7c673cae FG |
1172 | mds->mdcache->adjust_subtree_auth(dir, CDIR_AUTH_UNDEF); |
1173 | ||
1174 | dout(10) << "EMetaBlob.replay added dir " << *dir << dendl; | |
1175 | } | |
1176 | dir->set_version( lump.fnode.version ); | |
1177 | dir->fnode = lump.fnode; | |
1178 | ||
1179 | if (lump.is_importing()) { | |
1180 | dir->state_set(CDir::STATE_AUTH); | |
1181 | dir->state_clear(CDir::STATE_COMPLETE); | |
1182 | } | |
1183 | if (lump.is_dirty()) { | |
1184 | dir->_mark_dirty(logseg); | |
1185 | ||
1186 | if (!(dir->fnode.rstat == dir->fnode.accounted_rstat)) { | |
1187 | dout(10) << "EMetaBlob.replay dirty nestinfo on " << *dir << dendl; | |
1188 | mds->locker->mark_updated_scatterlock(&dir->inode->nestlock); | |
1189 | logseg->dirty_dirfrag_nest.push_back(&dir->inode->item_dirty_dirfrag_nest); | |
1190 | } else { | |
1191 | dout(10) << "EMetaBlob.replay clean nestinfo on " << *dir << dendl; | |
1192 | } | |
1193 | if (!(dir->fnode.fragstat == dir->fnode.accounted_fragstat)) { | |
1194 | dout(10) << "EMetaBlob.replay dirty fragstat on " << *dir << dendl; | |
1195 | mds->locker->mark_updated_scatterlock(&dir->inode->filelock); | |
1196 | logseg->dirty_dirfrag_dir.push_back(&dir->inode->item_dirty_dirfrag_dir); | |
1197 | } else { | |
1198 | dout(10) << "EMetaBlob.replay clean fragstat on " << *dir << dendl; | |
1199 | } | |
1200 | } | |
1201 | if (lump.is_dirty_dft()) { | |
1202 | dout(10) << "EMetaBlob.replay dirty dirfragtree on " << *dir << dendl; | |
1203 | dir->state_set(CDir::STATE_DIRTYDFT); | |
1204 | mds->locker->mark_updated_scatterlock(&dir->inode->dirfragtreelock); | |
1205 | logseg->dirty_dirfrag_dirfragtree.push_back(&dir->inode->item_dirty_dirfrag_dirfragtree); | |
1206 | } | |
1207 | if (lump.is_new()) | |
1208 | dir->mark_new(logseg); | |
1209 | if (lump.is_complete()) | |
1210 | dir->mark_complete(); | |
1211 | ||
1212 | dout(10) << "EMetaBlob.replay updated dir " << *dir << dendl; | |
1213 | ||
1214 | // decode bits | |
1215 | lump._decode_bits(); | |
1216 | ||
1217 | // full dentry+inode pairs | |
11fdf7f2 TL |
1218 | for (auto& fb : lump._get_dfull()) { |
1219 | CDentry *dn = dir->lookup_exact_snap(fb.dn, fb.dnlast); | |
7c673cae | 1220 | if (!dn) { |
11fdf7f2 TL |
1221 | dn = dir->add_null_dentry(fb.dn, fb.dnfirst, fb.dnlast); |
1222 | dn->set_version(fb.dnv); | |
1223 | if (fb.is_dirty()) dn->_mark_dirty(logseg); | |
7c673cae FG |
1224 | dout(10) << "EMetaBlob.replay added (full) " << *dn << dendl; |
1225 | } else { | |
11fdf7f2 TL |
1226 | dn->set_version(fb.dnv); |
1227 | if (fb.is_dirty()) dn->_mark_dirty(logseg); | |
1228 | dout(10) << "EMetaBlob.replay for [" << fb.dnfirst << "," << fb.dnlast << "] had " << *dn << dendl; | |
1229 | dn->first = fb.dnfirst; | |
1230 | ceph_assert(dn->last == fb.dnlast); | |
7c673cae FG |
1231 | } |
1232 | if (lump.is_importing()) | |
1233 | dn->state_set(CDentry::STATE_AUTH); | |
1234 | ||
11fdf7f2 | 1235 | CInode *in = mds->mdcache->get_inode(fb.inode.ino, fb.dnlast); |
7c673cae | 1236 | if (!in) { |
11fdf7f2 TL |
1237 | in = new CInode(mds->mdcache, dn->is_auth(), fb.dnfirst, fb.dnlast); |
1238 | fb.update_inode(mds, in); | |
7c673cae FG |
1239 | mds->mdcache->add_inode(in); |
1240 | if (!dn->get_linkage()->is_null()) { | |
1241 | if (dn->get_linkage()->is_primary()) { | |
1242 | unlinked[dn->get_linkage()->get_inode()] = dir; | |
1243 | stringstream ss; | |
1244 | ss << "EMetaBlob.replay FIXME had dentry linked to wrong inode " << *dn | |
11fdf7f2 | 1245 | << " " << *dn->get_linkage()->get_inode() << " should be " << fb.inode.ino; |
7c673cae FG |
1246 | dout(0) << ss.str() << dendl; |
1247 | mds->clog->warn(ss); | |
1248 | } | |
31f18b77 | 1249 | dir->unlink_inode(dn, false); |
7c673cae FG |
1250 | } |
1251 | if (unlinked.count(in)) | |
1252 | linked.insert(in); | |
1253 | dir->link_primary_inode(dn, in); | |
1254 | dout(10) << "EMetaBlob.replay added " << *in << dendl; | |
1255 | } else { | |
11fdf7f2 TL |
1256 | in->first = fb.dnfirst; |
1257 | fb.update_inode(mds, in); | |
7c673cae FG |
1258 | if (dn->get_linkage()->get_inode() != in && in->get_parent_dn()) { |
1259 | dout(10) << "EMetaBlob.replay unlinking " << *in << dendl; | |
1260 | unlinked[in] = in->get_parent_dir(); | |
7c673cae | 1261 | in->get_parent_dir()->unlink_inode(in->get_parent_dn()); |
7c673cae FG |
1262 | } |
1263 | if (dn->get_linkage()->get_inode() != in) { | |
1264 | if (!dn->get_linkage()->is_null()) { // note: might be remote. as with stray reintegration. | |
1265 | if (dn->get_linkage()->is_primary()) { | |
1266 | unlinked[dn->get_linkage()->get_inode()] = dir; | |
1267 | stringstream ss; | |
1268 | ss << "EMetaBlob.replay FIXME had dentry linked to wrong inode " << *dn | |
11fdf7f2 | 1269 | << " " << *dn->get_linkage()->get_inode() << " should be " << fb.inode.ino; |
7c673cae FG |
1270 | dout(0) << ss.str() << dendl; |
1271 | mds->clog->warn(ss); | |
1272 | } | |
31f18b77 | 1273 | dir->unlink_inode(dn, false); |
7c673cae FG |
1274 | } |
1275 | if (unlinked.count(in)) | |
1276 | linked.insert(in); | |
1277 | dir->link_primary_inode(dn, in); | |
1278 | dout(10) << "EMetaBlob.replay linked " << *in << dendl; | |
1279 | } else { | |
11fdf7f2 | 1280 | dout(10) << "EMetaBlob.replay for [" << fb.dnfirst << "," << fb.dnlast << "] had " << *in << dendl; |
7c673cae | 1281 | } |
11fdf7f2 TL |
1282 | ceph_assert(in->first == fb.dnfirst || |
1283 | (in->is_multiversion() && in->first > fb.dnfirst)); | |
7c673cae | 1284 | } |
11fdf7f2 | 1285 | if (fb.is_dirty()) |
7c673cae | 1286 | in->_mark_dirty(logseg); |
11fdf7f2 TL |
1287 | if (fb.is_dirty_parent()) |
1288 | in->mark_dirty_parent(logseg, fb.is_dirty_pool()); | |
1289 | if (fb.need_snapflush()) | |
7c673cae FG |
1290 | logseg->open_files.push_back(&in->item_open_file); |
1291 | if (dn->is_auth()) | |
1292 | in->state_set(CInode::STATE_AUTH); | |
1293 | else | |
1294 | in->state_clear(CInode::STATE_AUTH); | |
11fdf7f2 | 1295 | ceph_assert(g_conf()->mds_kill_journal_replay_at != 2); |
f6b5b4d7 TL |
1296 | |
1297 | if (!(++count % 1000)) | |
1298 | mds->heartbeat_reset(); | |
7c673cae FG |
1299 | } |
1300 | ||
1301 | // remote dentries | |
11fdf7f2 TL |
1302 | for (const auto& rb : lump.get_dremote()) { |
1303 | CDentry *dn = dir->lookup_exact_snap(rb.dn, rb.dnlast); | |
7c673cae | 1304 | if (!dn) { |
11fdf7f2 TL |
1305 | dn = dir->add_remote_dentry(rb.dn, rb.ino, rb.d_type, rb.dnfirst, rb.dnlast); |
1306 | dn->set_version(rb.dnv); | |
1307 | if (rb.dirty) dn->_mark_dirty(logseg); | |
7c673cae FG |
1308 | dout(10) << "EMetaBlob.replay added " << *dn << dendl; |
1309 | } else { | |
1310 | if (!dn->get_linkage()->is_null()) { | |
1311 | dout(10) << "EMetaBlob.replay unlinking " << *dn << dendl; | |
1312 | if (dn->get_linkage()->is_primary()) { | |
1313 | unlinked[dn->get_linkage()->get_inode()] = dir; | |
1314 | stringstream ss; | |
1315 | ss << "EMetaBlob.replay FIXME had dentry linked to wrong inode " << *dn | |
11fdf7f2 | 1316 | << " " << *dn->get_linkage()->get_inode() << " should be remote " << rb.ino; |
7c673cae FG |
1317 | dout(0) << ss.str() << dendl; |
1318 | } | |
31f18b77 | 1319 | dir->unlink_inode(dn, false); |
7c673cae | 1320 | } |
11fdf7f2 TL |
1321 | dir->link_remote_inode(dn, rb.ino, rb.d_type); |
1322 | dn->set_version(rb.dnv); | |
1323 | if (rb.dirty) dn->_mark_dirty(logseg); | |
1324 | dout(10) << "EMetaBlob.replay for [" << rb.dnfirst << "," << rb.dnlast << "] had " << *dn << dendl; | |
1325 | dn->first = rb.dnfirst; | |
1326 | ceph_assert(dn->last == rb.dnlast); | |
7c673cae FG |
1327 | } |
1328 | if (lump.is_importing()) | |
1329 | dn->state_set(CDentry::STATE_AUTH); | |
f6b5b4d7 TL |
1330 | |
1331 | if (!(++count % 1000)) | |
1332 | mds->heartbeat_reset(); | |
7c673cae FG |
1333 | } |
1334 | ||
1335 | // null dentries | |
11fdf7f2 TL |
1336 | for (const auto& nb : lump.get_dnull()) { |
1337 | CDentry *dn = dir->lookup_exact_snap(nb.dn, nb.dnlast); | |
7c673cae | 1338 | if (!dn) { |
11fdf7f2 TL |
1339 | dn = dir->add_null_dentry(nb.dn, nb.dnfirst, nb.dnlast); |
1340 | dn->set_version(nb.dnv); | |
1341 | if (nb.dirty) dn->_mark_dirty(logseg); | |
7c673cae FG |
1342 | dout(10) << "EMetaBlob.replay added (nullbit) " << *dn << dendl; |
1343 | } else { | |
11fdf7f2 | 1344 | dn->first = nb.dnfirst; |
7c673cae FG |
1345 | if (!dn->get_linkage()->is_null()) { |
1346 | dout(10) << "EMetaBlob.replay unlinking " << *dn << dendl; | |
1347 | CInode *in = dn->get_linkage()->get_inode(); | |
1348 | // For renamed inode, We may call CInode::force_dirfrag() later. | |
1349 | // CInode::force_dirfrag() doesn't work well when inode is detached | |
1350 | // from the hierarchy. | |
1351 | if (!renamed_diri || renamed_diri != in) { | |
1352 | if (dn->get_linkage()->is_primary()) | |
1353 | unlinked[in] = dir; | |
1354 | dir->unlink_inode(dn); | |
7c673cae FG |
1355 | } |
1356 | } | |
11fdf7f2 TL |
1357 | dn->set_version(nb.dnv); |
1358 | if (nb.dirty) dn->_mark_dirty(logseg); | |
7c673cae | 1359 | dout(10) << "EMetaBlob.replay had " << *dn << dendl; |
11fdf7f2 | 1360 | ceph_assert(dn->last == nb.dnlast); |
7c673cae FG |
1361 | } |
1362 | olddir = dir; | |
1363 | if (lump.is_importing()) | |
1364 | dn->state_set(CDentry::STATE_AUTH); | |
1365 | ||
1366 | // Make null dentries the first things we trim | |
1367 | dout(10) << "EMetaBlob.replay pushing to bottom of lru " << *dn << dendl; | |
f6b5b4d7 TL |
1368 | |
1369 | if (!(++count % 1000)) | |
1370 | mds->heartbeat_reset(); | |
7c673cae FG |
1371 | } |
1372 | } | |
1373 | ||
11fdf7f2 | 1374 | ceph_assert(g_conf()->mds_kill_journal_replay_at != 3); |
7c673cae FG |
1375 | |
1376 | if (renamed_dirino) { | |
1377 | if (renamed_diri) { | |
11fdf7f2 TL |
1378 | ceph_assert(unlinked.count(renamed_diri)); |
1379 | ceph_assert(linked.count(renamed_diri)); | |
7c673cae FG |
1380 | olddir = unlinked[renamed_diri]; |
1381 | } else { | |
1382 | // we imported a diri we haven't seen before | |
1383 | renamed_diri = mds->mdcache->get_inode(renamed_dirino); | |
11fdf7f2 | 1384 | ceph_assert(renamed_diri); // it was in the metablob |
7c673cae FG |
1385 | } |
1386 | ||
1387 | if (olddir) { | |
1388 | if (olddir->authority() != CDIR_AUTH_UNDEF && | |
1389 | renamed_diri->authority() == CDIR_AUTH_UNDEF) { | |
11fdf7f2 TL |
1390 | ceph_assert(slaveup); // auth to non-auth, must be slave prepare |
1391 | frag_vec_t leaves; | |
7c673cae | 1392 | renamed_diri->dirfragtree.get_leaves(leaves); |
11fdf7f2 TL |
1393 | for (const auto& leaf : leaves) { |
1394 | CDir *dir = renamed_diri->get_dirfrag(leaf); | |
1395 | ceph_assert(dir); | |
7c673cae FG |
1396 | if (dir->get_dir_auth() == CDIR_AUTH_UNDEF) |
1397 | // preserve subtree bound until slave commit | |
1398 | slaveup->olddirs.insert(dir->inode); | |
1399 | else | |
1400 | dir->state_set(CDir::STATE_AUTH); | |
f6b5b4d7 TL |
1401 | |
1402 | if (!(++count % 1000)) | |
1403 | mds->heartbeat_reset(); | |
7c673cae FG |
1404 | } |
1405 | } | |
1406 | ||
1407 | mds->mdcache->adjust_subtree_after_rename(renamed_diri, olddir, false); | |
1408 | ||
1409 | // see if we can discard the subtree we renamed out of | |
1410 | CDir *root = mds->mdcache->get_subtree_root(olddir); | |
1411 | if (root->get_dir_auth() == CDIR_AUTH_UNDEF) { | |
1412 | if (slaveup) // preserve the old dir until slave commit | |
1413 | slaveup->olddirs.insert(olddir->inode); | |
1414 | else | |
1415 | mds->mdcache->try_trim_non_auth_subtree(root); | |
1416 | } | |
1417 | } | |
1418 | ||
1419 | // if we are the srci importer, we'll also have some dirfrags we have to open up... | |
1420 | if (renamed_diri->authority() != CDIR_AUTH_UNDEF) { | |
11fdf7f2 TL |
1421 | for (const auto& p : renamed_dir_frags) { |
1422 | CDir *dir = renamed_diri->get_dirfrag(p); | |
7c673cae FG |
1423 | if (dir) { |
1424 | // we already had the inode before, and we already adjusted this subtree accordingly. | |
1425 | dout(10) << " already had+adjusted rename import bound " << *dir << dendl; | |
11fdf7f2 | 1426 | ceph_assert(olddir); |
7c673cae FG |
1427 | continue; |
1428 | } | |
11fdf7f2 | 1429 | dir = renamed_diri->get_or_open_dirfrag(mds->mdcache, p); |
7c673cae FG |
1430 | dout(10) << " creating new rename import bound " << *dir << dendl; |
1431 | dir->state_clear(CDir::STATE_AUTH); | |
224ce89b | 1432 | mds->mdcache->adjust_subtree_auth(dir, CDIR_AUTH_UNDEF); |
f6b5b4d7 TL |
1433 | |
1434 | if (!(++count % 1000)) | |
1435 | mds->heartbeat_reset(); | |
7c673cae FG |
1436 | } |
1437 | } | |
1438 | ||
1439 | // rename may overwrite an empty directory and move it into stray dir. | |
1440 | unlinked.erase(renamed_diri); | |
1441 | for (map<CInode*, CDir*>::iterator p = unlinked.begin(); p != unlinked.end(); ++p) { | |
1442 | if (!linked.count(p->first)) | |
1443 | continue; | |
11fdf7f2 | 1444 | ceph_assert(p->first->is_dir()); |
7c673cae | 1445 | mds->mdcache->adjust_subtree_after_rename(p->first, p->second, false); |
f6b5b4d7 TL |
1446 | |
1447 | if (!(++count % 1000)) | |
1448 | mds->heartbeat_reset(); | |
7c673cae FG |
1449 | } |
1450 | } | |
1451 | ||
1452 | if (!unlinked.empty()) { | |
1453 | for (set<CInode*>::iterator p = linked.begin(); p != linked.end(); ++p) | |
1454 | unlinked.erase(*p); | |
1455 | dout(10) << " unlinked set contains " << unlinked << dendl; | |
1456 | for (map<CInode*, CDir*>::iterator p = unlinked.begin(); p != unlinked.end(); ++p) { | |
11fdf7f2 TL |
1457 | CInode *in = p->first; |
1458 | if (slaveup) { // preserve unlinked inodes until slave commit | |
1459 | slaveup->unlinked.insert(in); | |
1460 | if (in->snaprealm) | |
1461 | in->snaprealm->adjust_parent(); | |
1462 | } else | |
1463 | mds->mdcache->remove_inode_recursive(in); | |
f6b5b4d7 TL |
1464 | |
1465 | if (!(++count % 1000)) | |
1466 | mds->heartbeat_reset(); | |
7c673cae FG |
1467 | } |
1468 | } | |
1469 | ||
1470 | // table client transactions | |
11fdf7f2 TL |
1471 | for (const auto& p : table_tids) { |
1472 | dout(10) << "EMetaBlob.replay noting " << get_mdstable_name(p.first) | |
1473 | << " transaction " << p.second << dendl; | |
1474 | MDSTableClient *client = mds->get_table_client(p.first); | |
7c673cae | 1475 | if (client) |
11fdf7f2 | 1476 | client->got_journaled_agree(p.second, logseg); |
f6b5b4d7 TL |
1477 | |
1478 | if (!(++count % 1000)) | |
1479 | mds->heartbeat_reset(); | |
7c673cae FG |
1480 | } |
1481 | ||
1482 | // opened ino? | |
1483 | if (opened_ino) { | |
1484 | CInode *in = mds->mdcache->get_inode(opened_ino); | |
11fdf7f2 | 1485 | ceph_assert(in); |
7c673cae FG |
1486 | dout(10) << "EMetaBlob.replay noting opened inode " << *in << dendl; |
1487 | logseg->open_files.push_back(&in->item_open_file); | |
1488 | } | |
1489 | ||
1490 | // allocated_inos | |
1491 | if (inotablev) { | |
1492 | if (mds->inotable->get_version() >= inotablev) { | |
1493 | dout(10) << "EMetaBlob.replay inotable tablev " << inotablev | |
1494 | << " <= table " << mds->inotable->get_version() << dendl; | |
1495 | } else { | |
1496 | dout(10) << "EMetaBlob.replay inotable v " << inotablev | |
1497 | << " - 1 == table " << mds->inotable->get_version() | |
1498 | << " allocated+used " << allocated_ino | |
1499 | << " prealloc " << preallocated_inos | |
1500 | << dendl; | |
1501 | if (allocated_ino) | |
1502 | mds->inotable->replay_alloc_id(allocated_ino); | |
1503 | if (preallocated_inos.size()) | |
1504 | mds->inotable->replay_alloc_ids(preallocated_inos); | |
1505 | ||
1506 | // [repair bad inotable updates] | |
1507 | if (inotablev > mds->inotable->get_version()) { | |
1508 | mds->clog->error() << "journal replay inotablev mismatch " | |
1509 | << mds->inotable->get_version() << " -> " << inotablev; | |
1510 | mds->inotable->force_replay_version(inotablev); | |
1511 | } | |
1512 | ||
11fdf7f2 | 1513 | ceph_assert(inotablev == mds->inotable->get_version()); |
7c673cae FG |
1514 | } |
1515 | } | |
1516 | if (sessionmapv) { | |
81eedcae | 1517 | unsigned diff = (used_preallocated_ino && !preallocated_inos.empty()) ? 2 : 1; |
7c673cae FG |
1518 | if (mds->sessionmap.get_version() >= sessionmapv) { |
1519 | dout(10) << "EMetaBlob.replay sessionmap v " << sessionmapv | |
1520 | << " <= table " << mds->sessionmap.get_version() << dendl; | |
81eedcae | 1521 | } else if (mds->sessionmap.get_version() + diff == sessionmapv) { |
7c673cae | 1522 | dout(10) << "EMetaBlob.replay sessionmap v " << sessionmapv |
81eedcae | 1523 | << " - " << diff << " == table " << mds->sessionmap.get_version() |
7c673cae FG |
1524 | << " prealloc " << preallocated_inos |
1525 | << " used " << used_preallocated_ino | |
1526 | << dendl; | |
1527 | Session *session = mds->sessionmap.get_session(client_name); | |
1528 | if (session) { | |
1529 | dout(20) << " (session prealloc " << session->info.prealloc_inos << ")" << dendl; | |
1530 | if (used_preallocated_ino) { | |
1531 | if (!session->info.prealloc_inos.empty()) { | |
7c673cae | 1532 | inodeno_t i = session->take_ino(used_preallocated_ino); |
11fdf7f2 | 1533 | ceph_assert(i == used_preallocated_ino); |
7c673cae FG |
1534 | session->info.used_inos.clear(); |
1535 | } | |
1536 | mds->sessionmap.replay_dirty_session(session); | |
1537 | } | |
1538 | if (!preallocated_inos.empty()) { | |
1539 | session->info.prealloc_inos.insert(preallocated_inos); | |
1540 | mds->sessionmap.replay_dirty_session(session); | |
1541 | } | |
1542 | ||
1543 | } else { | |
1544 | dout(10) << "EMetaBlob.replay no session for " << client_name << dendl; | |
81eedcae | 1545 | if (used_preallocated_ino) |
7c673cae | 1546 | mds->sessionmap.replay_advance_version(); |
81eedcae | 1547 | |
7c673cae FG |
1548 | if (!preallocated_inos.empty()) |
1549 | mds->sessionmap.replay_advance_version(); | |
1550 | } | |
11fdf7f2 | 1551 | ceph_assert(sessionmapv == mds->sessionmap.get_version()); |
7c673cae | 1552 | } else { |
81eedcae TL |
1553 | mds->clog->error() << "EMetaBlob.replay sessionmap v " << sessionmapv |
1554 | << " - " << diff << " > table " << mds->sessionmap.get_version(); | |
11fdf7f2 | 1555 | ceph_assert(g_conf()->mds_wipe_sessions); |
7c673cae FG |
1556 | mds->sessionmap.wipe(); |
1557 | mds->sessionmap.set_version(sessionmapv); | |
1558 | } | |
1559 | } | |
1560 | ||
1561 | // truncating inodes | |
11fdf7f2 TL |
1562 | for (const auto& ino : truncate_start) { |
1563 | CInode *in = mds->mdcache->get_inode(ino); | |
1564 | ceph_assert(in); | |
7c673cae | 1565 | mds->mdcache->add_recovered_truncate(in, logseg); |
f6b5b4d7 TL |
1566 | |
1567 | if (!(++count % 1000)) | |
1568 | mds->heartbeat_reset(); | |
7c673cae | 1569 | } |
11fdf7f2 TL |
1570 | for (const auto& p : truncate_finish) { |
1571 | LogSegment *ls = mds->mdlog->get_segment(p.second); | |
7c673cae | 1572 | if (ls) { |
11fdf7f2 TL |
1573 | CInode *in = mds->mdcache->get_inode(p.first); |
1574 | ceph_assert(in); | |
7c673cae FG |
1575 | mds->mdcache->remove_recovered_truncate(in, ls); |
1576 | } | |
f6b5b4d7 TL |
1577 | |
1578 | if (!(++count % 1000)) | |
1579 | mds->heartbeat_reset(); | |
7c673cae FG |
1580 | } |
1581 | ||
1582 | // destroyed inodes | |
11fdf7f2 TL |
1583 | if (!destroyed_inodes.empty()) { |
1584 | for (vector<inodeno_t>::iterator p = destroyed_inodes.begin(); | |
1585 | p != destroyed_inodes.end(); | |
1586 | ++p) { | |
1587 | CInode *in = mds->mdcache->get_inode(*p); | |
1588 | if (in) { | |
1589 | dout(10) << "EMetaBlob.replay destroyed " << *p << ", dropping " << *in << dendl; | |
1590 | CDentry *parent = in->get_parent_dn(); | |
1591 | mds->mdcache->remove_inode(in); | |
1592 | if (parent) { | |
1593 | dout(10) << "EMetaBlob.replay unlinked from dentry " << *parent << dendl; | |
1594 | ceph_assert(parent->get_linkage()->is_null()); | |
1595 | } | |
1596 | } else { | |
1597 | dout(10) << "EMetaBlob.replay destroyed " << *p << ", not in cache" << dendl; | |
7c673cae | 1598 | } |
f6b5b4d7 TL |
1599 | |
1600 | if (!(++count % 1000)) | |
1601 | mds->heartbeat_reset(); | |
7c673cae | 1602 | } |
11fdf7f2 | 1603 | mds->mdcache->open_file_table.note_destroyed_inos(logseg->seq, destroyed_inodes); |
7c673cae FG |
1604 | } |
1605 | ||
1606 | // client requests | |
11fdf7f2 TL |
1607 | for (const auto& p : client_reqs) { |
1608 | if (p.first.name.is_client()) { | |
1609 | dout(10) << "EMetaBlob.replay request " << p.first << " trim_to " << p.second << dendl; | |
7c673cae FG |
1610 | inodeno_t created = allocated_ino ? allocated_ino : used_preallocated_ino; |
1611 | // if we allocated an inode, there should be exactly one client request id. | |
11fdf7f2 | 1612 | ceph_assert(created == inodeno_t() || client_reqs.size() == 1); |
7c673cae | 1613 | |
11fdf7f2 | 1614 | Session *session = mds->sessionmap.get_session(p.first.name); |
7c673cae | 1615 | if (session) { |
11fdf7f2 TL |
1616 | session->add_completed_request(p.first.tid, created); |
1617 | if (p.second) | |
1618 | session->trim_completed_requests(p.second); | |
7c673cae FG |
1619 | } |
1620 | } | |
f6b5b4d7 TL |
1621 | |
1622 | if (!(++count % 1000)) | |
1623 | mds->heartbeat_reset(); | |
7c673cae FG |
1624 | } |
1625 | ||
1626 | // client flushes | |
11fdf7f2 TL |
1627 | for (const auto& p : client_flushes) { |
1628 | if (p.first.name.is_client()) { | |
1629 | dout(10) << "EMetaBlob.replay flush " << p.first << " trim_to " << p.second << dendl; | |
1630 | Session *session = mds->sessionmap.get_session(p.first.name); | |
7c673cae | 1631 | if (session) { |
11fdf7f2 TL |
1632 | session->add_completed_flush(p.first.tid); |
1633 | if (p.second) | |
1634 | session->trim_completed_flushes(p.second); | |
7c673cae FG |
1635 | } |
1636 | } | |
f6b5b4d7 TL |
1637 | |
1638 | if (!(++count % 1000)) | |
1639 | mds->heartbeat_reset(); | |
7c673cae FG |
1640 | } |
1641 | ||
1642 | // update segment | |
1643 | update_segment(logseg); | |
1644 | ||
11fdf7f2 | 1645 | ceph_assert(g_conf()->mds_kill_journal_replay_at != 4); |
7c673cae FG |
1646 | } |
1647 | ||
9f95a23c TL |
1648 | // ----------------------- |
1649 | // EPurged | |
1650 | void EPurged::update_segment() | |
1651 | { | |
1652 | if (inos.size() && inotablev) | |
1653 | get_segment()->inotablev = inotablev; | |
1654 | return; | |
1655 | } | |
1656 | ||
1657 | void EPurged::replay(MDSRank *mds) | |
1658 | { | |
1659 | if (inos.size()) { | |
1660 | LogSegment *ls = mds->mdlog->get_segment(seq); | |
1661 | if (ls) { | |
1662 | ls->purge_inodes.subtract(inos); | |
1663 | } | |
1664 | if (mds->inotable->get_version() >= inotablev) { | |
1665 | dout(10) << "EPurged.replay inotable " << mds->inotable->get_version() | |
1666 | << " >= " << inotablev << ", noop" << dendl; | |
1667 | } else { | |
1668 | dout(10) << "EPurged.replay inotable " << mds->inotable->get_version() | |
1669 | << " < " << inotablev << " " << dendl; | |
1670 | mds->inotable->replay_release_ids(inos); | |
1671 | assert(mds->inotable->get_version() == inotablev); | |
1672 | } | |
1673 | } | |
1674 | update_segment(); | |
1675 | } | |
1676 | ||
1677 | void EPurged::encode(bufferlist& bl, uint64_t features) const | |
1678 | { | |
1679 | ENCODE_START(1, 1, bl); | |
1680 | encode(inos, bl); | |
1681 | encode(inotablev, bl); | |
1682 | encode(seq, bl); | |
1683 | ENCODE_FINISH(bl); | |
1684 | } | |
1685 | ||
1686 | void EPurged::decode(bufferlist::const_iterator& bl) | |
1687 | { | |
1688 | DECODE_START(1, bl); | |
1689 | decode(inos, bl); | |
1690 | decode(inotablev, bl); | |
1691 | decode(seq, bl); | |
1692 | DECODE_FINISH(bl); | |
1693 | } | |
1694 | ||
1695 | void EPurged::dump(Formatter *f) const | |
1696 | { | |
1697 | f->dump_stream("inos") << inos; | |
1698 | f->dump_int("inotable version", inotablev); | |
1699 | f->dump_int("segment seq", seq); | |
1700 | } | |
1701 | ||
7c673cae FG |
1702 | // ----------------------- |
1703 | // ESession | |
1704 | ||
1705 | void ESession::update_segment() | |
1706 | { | |
11fdf7f2 | 1707 | get_segment()->sessionmapv = cmapv; |
7c673cae | 1708 | if (inos.size() && inotablev) |
11fdf7f2 | 1709 | get_segment()->inotablev = inotablev; |
7c673cae FG |
1710 | } |
1711 | ||
1712 | void ESession::replay(MDSRank *mds) | |
1713 | { | |
9f95a23c TL |
1714 | if (purge_inos.size()) |
1715 | get_segment()->purge_inodes.insert(purge_inos); | |
1716 | ||
7c673cae FG |
1717 | if (mds->sessionmap.get_version() >= cmapv) { |
1718 | dout(10) << "ESession.replay sessionmap " << mds->sessionmap.get_version() | |
1719 | << " >= " << cmapv << ", noop" << dendl; | |
81eedcae | 1720 | } else if (mds->sessionmap.get_version() + 1 == cmapv) { |
7c673cae FG |
1721 | dout(10) << "ESession.replay sessionmap " << mds->sessionmap.get_version() |
1722 | << " < " << cmapv << " " << (open ? "open":"close") << " " << client_inst << dendl; | |
1723 | Session *session; | |
1724 | if (open) { | |
1725 | session = mds->sessionmap.get_or_add_session(client_inst); | |
1726 | mds->sessionmap.set_state(session, Session::STATE_OPEN); | |
1727 | session->set_client_metadata(client_metadata); | |
1728 | dout(10) << " opened session " << session->info.inst << dendl; | |
1729 | } else { | |
1730 | session = mds->sessionmap.get_session(client_inst.name); | |
1731 | if (session) { // there always should be a session, but there's a bug | |
11fdf7f2 | 1732 | if (session->get_connection() == NULL) { |
7c673cae FG |
1733 | dout(10) << " removed session " << session->info.inst << dendl; |
1734 | mds->sessionmap.remove_session(session); | |
1735 | session = NULL; | |
1736 | } else { | |
1737 | session->clear(); // the client has reconnected; keep the Session, but reset | |
1738 | dout(10) << " reset session " << session->info.inst << " (they reconnected)" << dendl; | |
1739 | } | |
1740 | } else { | |
1741 | mds->clog->error() << "replayed stray Session close event for " << client_inst | |
1742 | << " from time " << stamp << ", ignoring"; | |
1743 | } | |
1744 | } | |
1745 | if (session) { | |
1746 | mds->sessionmap.replay_dirty_session(session); | |
1747 | } else { | |
1748 | mds->sessionmap.replay_advance_version(); | |
1749 | } | |
11fdf7f2 | 1750 | ceph_assert(mds->sessionmap.get_version() == cmapv); |
81eedcae TL |
1751 | } else { |
1752 | mds->clog->error() << "ESession.replay sessionmap v " << cmapv | |
1753 | << " - 1 > table " << mds->sessionmap.get_version(); | |
1754 | ceph_assert(g_conf()->mds_wipe_sessions); | |
1755 | mds->sessionmap.wipe(); | |
1756 | mds->sessionmap.set_version(cmapv); | |
7c673cae FG |
1757 | } |
1758 | ||
1759 | if (inos.size() && inotablev) { | |
1760 | if (mds->inotable->get_version() >= inotablev) { | |
1761 | dout(10) << "ESession.replay inotable " << mds->inotable->get_version() | |
1762 | << " >= " << inotablev << ", noop" << dendl; | |
1763 | } else { | |
1764 | dout(10) << "ESession.replay inotable " << mds->inotable->get_version() | |
1765 | << " < " << inotablev << " " << (open ? "add":"remove") << dendl; | |
11fdf7f2 | 1766 | ceph_assert(!open); // for now |
7c673cae | 1767 | mds->inotable->replay_release_ids(inos); |
11fdf7f2 | 1768 | ceph_assert(mds->inotable->get_version() == inotablev); |
7c673cae FG |
1769 | } |
1770 | } | |
1771 | ||
1772 | update_segment(); | |
1773 | } | |
1774 | ||
1775 | void ESession::encode(bufferlist &bl, uint64_t features) const | |
1776 | { | |
9f95a23c | 1777 | ENCODE_START(6, 5, bl); |
11fdf7f2 TL |
1778 | encode(stamp, bl); |
1779 | encode(client_inst, bl, features); | |
1780 | encode(open, bl); | |
1781 | encode(cmapv, bl); | |
1782 | encode(inos, bl); | |
1783 | encode(inotablev, bl); | |
1784 | encode(client_metadata, bl); | |
9f95a23c | 1785 | encode(purge_inos, bl); |
7c673cae FG |
1786 | ENCODE_FINISH(bl); |
1787 | } | |
1788 | ||
11fdf7f2 | 1789 | void ESession::decode(bufferlist::const_iterator &bl) |
7c673cae | 1790 | { |
9f95a23c | 1791 | DECODE_START_LEGACY_COMPAT_LEN(6, 3, 3, bl); |
7c673cae | 1792 | if (struct_v >= 2) |
11fdf7f2 TL |
1793 | decode(stamp, bl); |
1794 | decode(client_inst, bl); | |
1795 | decode(open, bl); | |
1796 | decode(cmapv, bl); | |
1797 | decode(inos, bl); | |
1798 | decode(inotablev, bl); | |
1799 | if (struct_v == 4) { | |
1800 | decode(client_metadata.kv_map, bl); | |
1801 | } else if (struct_v >= 5) { | |
1802 | decode(client_metadata, bl); | |
7c673cae | 1803 | } |
9f95a23c TL |
1804 | if (struct_v >= 6){ |
1805 | decode(purge_inos, bl); | |
1806 | } | |
1807 | ||
7c673cae FG |
1808 | DECODE_FINISH(bl); |
1809 | } | |
1810 | ||
1811 | void ESession::dump(Formatter *f) const | |
1812 | { | |
1813 | f->dump_stream("client instance") << client_inst; | |
1814 | f->dump_string("open", open ? "true" : "false"); | |
1815 | f->dump_int("client map version", cmapv); | |
1816 | f->dump_stream("inos") << inos; | |
1817 | f->dump_int("inotable version", inotablev); | |
1818 | f->open_object_section("client_metadata"); | |
11fdf7f2 | 1819 | client_metadata.dump(f); |
7c673cae FG |
1820 | f->close_section(); // client_metadata |
1821 | } | |
1822 | ||
9f95a23c | 1823 | void ESession::generate_test_instances(std::list<ESession*>& ls) |
7c673cae FG |
1824 | { |
1825 | ls.push_back(new ESession); | |
1826 | } | |
1827 | ||
1828 | // ----------------------- | |
1829 | // ESessions | |
1830 | ||
1831 | void ESessions::encode(bufferlist &bl, uint64_t features) const | |
1832 | { | |
11fdf7f2 TL |
1833 | ENCODE_START(2, 1, bl); |
1834 | encode(client_map, bl, features); | |
1835 | encode(cmapv, bl); | |
1836 | encode(stamp, bl); | |
1837 | encode(client_metadata_map, bl); | |
7c673cae FG |
1838 | ENCODE_FINISH(bl); |
1839 | } | |
1840 | ||
11fdf7f2 | 1841 | void ESessions::decode_old(bufferlist::const_iterator &bl) |
7c673cae | 1842 | { |
11fdf7f2 TL |
1843 | using ceph::decode; |
1844 | decode(client_map, bl); | |
1845 | decode(cmapv, bl); | |
7c673cae | 1846 | if (!bl.end()) |
11fdf7f2 | 1847 | decode(stamp, bl); |
7c673cae FG |
1848 | } |
1849 | ||
11fdf7f2 | 1850 | void ESessions::decode_new(bufferlist::const_iterator &bl) |
7c673cae | 1851 | { |
11fdf7f2 TL |
1852 | DECODE_START(2, bl); |
1853 | decode(client_map, bl); | |
1854 | decode(cmapv, bl); | |
1855 | decode(stamp, bl); | |
1856 | if (struct_v >= 2) | |
1857 | decode(client_metadata_map, bl); | |
7c673cae FG |
1858 | DECODE_FINISH(bl); |
1859 | } | |
1860 | ||
1861 | void ESessions::dump(Formatter *f) const | |
1862 | { | |
1863 | f->dump_int("client map version", cmapv); | |
1864 | ||
1865 | f->open_array_section("client map"); | |
1866 | for (map<client_t,entity_inst_t>::const_iterator i = client_map.begin(); | |
1867 | i != client_map.end(); ++i) { | |
1868 | f->open_object_section("client"); | |
1869 | f->dump_int("client id", i->first.v); | |
1870 | f->dump_stream("client entity") << i->second; | |
1871 | f->close_section(); // client | |
1872 | } | |
1873 | f->close_section(); // client map | |
1874 | } | |
1875 | ||
9f95a23c | 1876 | void ESessions::generate_test_instances(std::list<ESessions*>& ls) |
7c673cae FG |
1877 | { |
1878 | ls.push_back(new ESessions()); | |
1879 | } | |
1880 | ||
1881 | void ESessions::update_segment() | |
1882 | { | |
11fdf7f2 | 1883 | get_segment()->sessionmapv = cmapv; |
7c673cae FG |
1884 | } |
1885 | ||
1886 | void ESessions::replay(MDSRank *mds) | |
1887 | { | |
1888 | if (mds->sessionmap.get_version() >= cmapv) { | |
1889 | dout(10) << "ESessions.replay sessionmap " << mds->sessionmap.get_version() | |
1890 | << " >= " << cmapv << ", noop" << dendl; | |
1891 | } else { | |
1892 | dout(10) << "ESessions.replay sessionmap " << mds->sessionmap.get_version() | |
1893 | << " < " << cmapv << dendl; | |
81eedcae | 1894 | mds->sessionmap.replay_open_sessions(cmapv, client_map, client_metadata_map); |
7c673cae FG |
1895 | } |
1896 | update_segment(); | |
1897 | } | |
1898 | ||
1899 | ||
1900 | // ----------------------- | |
1901 | // ETableServer | |
1902 | ||
1903 | void ETableServer::encode(bufferlist& bl, uint64_t features) const | |
1904 | { | |
1905 | ENCODE_START(3, 3, bl); | |
11fdf7f2 TL |
1906 | encode(stamp, bl); |
1907 | encode(table, bl); | |
1908 | encode(op, bl); | |
1909 | encode(reqid, bl); | |
1910 | encode(bymds, bl); | |
1911 | encode(mutation, bl); | |
1912 | encode(tid, bl); | |
1913 | encode(version, bl); | |
7c673cae FG |
1914 | ENCODE_FINISH(bl); |
1915 | } | |
1916 | ||
11fdf7f2 | 1917 | void ETableServer::decode(bufferlist::const_iterator &bl) |
7c673cae FG |
1918 | { |
1919 | DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl); | |
1920 | if (struct_v >= 2) | |
11fdf7f2 TL |
1921 | decode(stamp, bl); |
1922 | decode(table, bl); | |
1923 | decode(op, bl); | |
1924 | decode(reqid, bl); | |
1925 | decode(bymds, bl); | |
1926 | decode(mutation, bl); | |
1927 | decode(tid, bl); | |
1928 | decode(version, bl); | |
7c673cae FG |
1929 | DECODE_FINISH(bl); |
1930 | } | |
1931 | ||
1932 | void ETableServer::dump(Formatter *f) const | |
1933 | { | |
1934 | f->dump_int("table id", table); | |
1935 | f->dump_int("op", op); | |
1936 | f->dump_int("request id", reqid); | |
1937 | f->dump_int("by mds", bymds); | |
1938 | f->dump_int("tid", tid); | |
1939 | f->dump_int("version", version); | |
1940 | } | |
1941 | ||
9f95a23c | 1942 | void ETableServer::generate_test_instances(std::list<ETableServer*>& ls) |
7c673cae FG |
1943 | { |
1944 | ls.push_back(new ETableServer()); | |
1945 | } | |
1946 | ||
1947 | ||
1948 | void ETableServer::update_segment() | |
1949 | { | |
11fdf7f2 | 1950 | get_segment()->tablev[table] = version; |
7c673cae FG |
1951 | } |
1952 | ||
1953 | void ETableServer::replay(MDSRank *mds) | |
1954 | { | |
1955 | MDSTableServer *server = mds->get_table_server(table); | |
1956 | if (!server) | |
1957 | return; | |
1958 | ||
1959 | if (server->get_version() >= version) { | |
1960 | dout(10) << "ETableServer.replay " << get_mdstable_name(table) | |
1961 | << " " << get_mdstableserver_opname(op) | |
1962 | << " event " << version | |
1963 | << " <= table " << server->get_version() << dendl; | |
1964 | return; | |
1965 | } | |
1966 | ||
1967 | dout(10) << " ETableServer.replay " << get_mdstable_name(table) | |
1968 | << " " << get_mdstableserver_opname(op) | |
1969 | << " event " << version << " - 1 == table " << server->get_version() << dendl; | |
11fdf7f2 | 1970 | ceph_assert(version-1 == server->get_version()); |
7c673cae FG |
1971 | |
1972 | switch (op) { | |
11fdf7f2 TL |
1973 | case TABLESERVER_OP_PREPARE: { |
1974 | server->_note_prepare(bymds, reqid, true); | |
1975 | bufferlist out; | |
1976 | server->_prepare(mutation, reqid, bymds, out); | |
1977 | mutation = std::move(out); | |
7c673cae | 1978 | break; |
11fdf7f2 | 1979 | } |
7c673cae | 1980 | case TABLESERVER_OP_COMMIT: |
9f95a23c | 1981 | server->_commit(tid, ref_t<MMDSTableRequest>()); |
11fdf7f2 | 1982 | server->_note_commit(tid, true); |
7c673cae FG |
1983 | break; |
1984 | case TABLESERVER_OP_ROLLBACK: | |
1985 | server->_rollback(tid); | |
11fdf7f2 | 1986 | server->_note_rollback(tid, true); |
7c673cae FG |
1987 | break; |
1988 | case TABLESERVER_OP_SERVER_UPDATE: | |
1989 | server->_server_update(mutation); | |
11fdf7f2 | 1990 | server->_note_server_update(mutation, true); |
7c673cae FG |
1991 | break; |
1992 | default: | |
1993 | mds->clog->error() << "invalid tableserver op in ETableServer"; | |
1994 | mds->damaged(); | |
1995 | ceph_abort(); // Should be unreachable because damaged() calls respawn() | |
1996 | } | |
1997 | ||
11fdf7f2 | 1998 | ceph_assert(version == server->get_version()); |
7c673cae FG |
1999 | update_segment(); |
2000 | } | |
2001 | ||
2002 | ||
2003 | // --------------------- | |
2004 | // ETableClient | |
2005 | ||
2006 | void ETableClient::encode(bufferlist& bl, uint64_t features) const | |
2007 | { | |
2008 | ENCODE_START(3, 3, bl); | |
11fdf7f2 TL |
2009 | encode(stamp, bl); |
2010 | encode(table, bl); | |
2011 | encode(op, bl); | |
2012 | encode(tid, bl); | |
7c673cae FG |
2013 | ENCODE_FINISH(bl); |
2014 | } | |
2015 | ||
11fdf7f2 | 2016 | void ETableClient::decode(bufferlist::const_iterator &bl) |
7c673cae FG |
2017 | { |
2018 | DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl); | |
2019 | if (struct_v >= 2) | |
11fdf7f2 TL |
2020 | decode(stamp, bl); |
2021 | decode(table, bl); | |
2022 | decode(op, bl); | |
2023 | decode(tid, bl); | |
7c673cae FG |
2024 | DECODE_FINISH(bl); |
2025 | } | |
2026 | ||
2027 | void ETableClient::dump(Formatter *f) const | |
2028 | { | |
2029 | f->dump_int("table", table); | |
2030 | f->dump_int("op", op); | |
2031 | f->dump_int("tid", tid); | |
2032 | } | |
2033 | ||
9f95a23c | 2034 | void ETableClient::generate_test_instances(std::list<ETableClient*>& ls) |
7c673cae FG |
2035 | { |
2036 | ls.push_back(new ETableClient()); | |
2037 | } | |
2038 | ||
2039 | void ETableClient::replay(MDSRank *mds) | |
2040 | { | |
2041 | dout(10) << " ETableClient.replay " << get_mdstable_name(table) | |
2042 | << " op " << get_mdstableserver_opname(op) | |
2043 | << " tid " << tid << dendl; | |
2044 | ||
2045 | MDSTableClient *client = mds->get_table_client(table); | |
2046 | if (!client) | |
2047 | return; | |
2048 | ||
11fdf7f2 | 2049 | ceph_assert(op == TABLESERVER_OP_ACK); |
7c673cae FG |
2050 | client->got_journaled_ack(tid); |
2051 | } | |
2052 | ||
2053 | ||
2054 | // ----------------------- | |
2055 | // ESnap | |
2056 | /* | |
2057 | void ESnap::update_segment() | |
2058 | { | |
11fdf7f2 | 2059 | get_segment()->tablev[TABLE_SNAP] = version; |
7c673cae FG |
2060 | } |
2061 | ||
2062 | void ESnap::replay(MDSRank *mds) | |
2063 | { | |
2064 | if (mds->snaptable->get_version() >= version) { | |
2065 | dout(10) << "ESnap.replay event " << version | |
2066 | << " <= table " << mds->snaptable->get_version() << dendl; | |
2067 | return; | |
2068 | } | |
2069 | ||
2070 | dout(10) << " ESnap.replay event " << version | |
2071 | << " - 1 == table " << mds->snaptable->get_version() << dendl; | |
11fdf7f2 | 2072 | ceph_assert(version-1 == mds->snaptable->get_version()); |
7c673cae FG |
2073 | |
2074 | if (create) { | |
2075 | version_t v; | |
2076 | snapid_t s = mds->snaptable->create(snap.dirino, snap.name, snap.stamp, &v); | |
11fdf7f2 | 2077 | ceph_assert(s == snap.snapid); |
7c673cae FG |
2078 | } else { |
2079 | mds->snaptable->remove(snap.snapid); | |
2080 | } | |
2081 | ||
11fdf7f2 | 2082 | ceph_assert(version == mds->snaptable->get_version()); |
7c673cae FG |
2083 | } |
2084 | */ | |
2085 | ||
2086 | ||
2087 | ||
2088 | // ----------------------- | |
2089 | // EUpdate | |
2090 | ||
2091 | void EUpdate::encode(bufferlist &bl, uint64_t features) const | |
2092 | { | |
2093 | ENCODE_START(4, 4, bl); | |
11fdf7f2 TL |
2094 | encode(stamp, bl); |
2095 | encode(type, bl); | |
2096 | encode(metablob, bl, features); | |
2097 | encode(client_map, bl); | |
2098 | encode(cmapv, bl); | |
2099 | encode(reqid, bl); | |
2100 | encode(had_slaves, bl); | |
7c673cae FG |
2101 | ENCODE_FINISH(bl); |
2102 | } | |
2103 | ||
11fdf7f2 | 2104 | void EUpdate::decode(bufferlist::const_iterator &bl) |
7c673cae FG |
2105 | { |
2106 | DECODE_START_LEGACY_COMPAT_LEN(4, 4, 4, bl); | |
2107 | if (struct_v >= 2) | |
11fdf7f2 TL |
2108 | decode(stamp, bl); |
2109 | decode(type, bl); | |
2110 | decode(metablob, bl); | |
2111 | decode(client_map, bl); | |
7c673cae | 2112 | if (struct_v >= 3) |
11fdf7f2 TL |
2113 | decode(cmapv, bl); |
2114 | decode(reqid, bl); | |
2115 | decode(had_slaves, bl); | |
7c673cae FG |
2116 | DECODE_FINISH(bl); |
2117 | } | |
2118 | ||
2119 | void EUpdate::dump(Formatter *f) const | |
2120 | { | |
2121 | f->open_object_section("metablob"); | |
2122 | metablob.dump(f); | |
2123 | f->close_section(); // metablob | |
2124 | ||
2125 | f->dump_string("type", type); | |
2126 | f->dump_int("client map length", client_map.length()); | |
2127 | f->dump_int("client map version", cmapv); | |
2128 | f->dump_stream("reqid") << reqid; | |
2129 | f->dump_string("had slaves", had_slaves ? "true" : "false"); | |
2130 | } | |
2131 | ||
9f95a23c | 2132 | void EUpdate::generate_test_instances(std::list<EUpdate*>& ls) |
7c673cae FG |
2133 | { |
2134 | ls.push_back(new EUpdate()); | |
2135 | } | |
2136 | ||
2137 | ||
2138 | void EUpdate::update_segment() | |
2139 | { | |
11fdf7f2 TL |
2140 | auto&& segment = get_segment(); |
2141 | metablob.update_segment(segment); | |
7c673cae FG |
2142 | |
2143 | if (client_map.length()) | |
11fdf7f2 | 2144 | segment->sessionmapv = cmapv; |
7c673cae FG |
2145 | |
2146 | if (had_slaves) | |
11fdf7f2 | 2147 | segment->uncommitted_masters.insert(reqid); |
7c673cae FG |
2148 | } |
2149 | ||
2150 | void EUpdate::replay(MDSRank *mds) | |
2151 | { | |
11fdf7f2 TL |
2152 | auto&& segment = get_segment(); |
2153 | metablob.replay(mds, segment); | |
7c673cae FG |
2154 | |
2155 | if (had_slaves) { | |
2156 | dout(10) << "EUpdate.replay " << reqid << " had slaves, expecting a matching ECommitted" << dendl; | |
11fdf7f2 | 2157 | segment->uncommitted_masters.insert(reqid); |
7c673cae | 2158 | set<mds_rank_t> slaves; |
11fdf7f2 | 2159 | mds->mdcache->add_uncommitted_master(reqid, segment, slaves, true); |
7c673cae FG |
2160 | } |
2161 | ||
2162 | if (client_map.length()) { | |
2163 | if (mds->sessionmap.get_version() >= cmapv) { | |
2164 | dout(10) << "EUpdate.replay sessionmap v " << cmapv | |
2165 | << " <= table " << mds->sessionmap.get_version() << dendl; | |
2166 | } else { | |
2167 | dout(10) << "EUpdate.replay sessionmap " << mds->sessionmap.get_version() | |
2168 | << " < " << cmapv << dendl; | |
2169 | // open client sessions? | |
2170 | map<client_t,entity_inst_t> cm; | |
11fdf7f2 TL |
2171 | map<client_t,client_metadata_t> cmm; |
2172 | auto blp = client_map.cbegin(); | |
2173 | using ceph::decode; | |
2174 | decode(cm, blp); | |
2175 | if (!blp.end()) | |
2176 | decode(cmm, blp); | |
81eedcae | 2177 | mds->sessionmap.replay_open_sessions(cmapv, cm, cmm); |
7c673cae FG |
2178 | } |
2179 | } | |
2180 | update_segment(); | |
2181 | } | |
2182 | ||
2183 | ||
2184 | // ------------------------ | |
2185 | // EOpen | |
2186 | ||
2187 | void EOpen::encode(bufferlist &bl, uint64_t features) const { | |
2188 | ENCODE_START(4, 3, bl); | |
11fdf7f2 TL |
2189 | encode(stamp, bl); |
2190 | encode(metablob, bl, features); | |
2191 | encode(inos, bl); | |
2192 | encode(snap_inos, bl); | |
7c673cae FG |
2193 | ENCODE_FINISH(bl); |
2194 | } | |
2195 | ||
11fdf7f2 | 2196 | void EOpen::decode(bufferlist::const_iterator &bl) { |
7c673cae FG |
2197 | DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl); |
2198 | if (struct_v >= 2) | |
11fdf7f2 TL |
2199 | decode(stamp, bl); |
2200 | decode(metablob, bl); | |
2201 | decode(inos, bl); | |
7c673cae | 2202 | if (struct_v >= 4) |
11fdf7f2 | 2203 | decode(snap_inos, bl); |
7c673cae FG |
2204 | DECODE_FINISH(bl); |
2205 | } | |
2206 | ||
2207 | void EOpen::dump(Formatter *f) const | |
2208 | { | |
2209 | f->open_object_section("metablob"); | |
2210 | metablob.dump(f); | |
2211 | f->close_section(); // metablob | |
2212 | f->open_array_section("inos involved"); | |
2213 | for (vector<inodeno_t>::const_iterator i = inos.begin(); | |
2214 | i != inos.end(); ++i) { | |
2215 | f->dump_int("ino", *i); | |
2216 | } | |
2217 | f->close_section(); // inos | |
2218 | } | |
2219 | ||
9f95a23c | 2220 | void EOpen::generate_test_instances(std::list<EOpen*>& ls) |
7c673cae FG |
2221 | { |
2222 | ls.push_back(new EOpen()); | |
2223 | ls.push_back(new EOpen()); | |
2224 | ls.back()->add_ino(0); | |
2225 | } | |
2226 | ||
2227 | void EOpen::update_segment() | |
2228 | { | |
2229 | // ?? | |
2230 | } | |
2231 | ||
2232 | void EOpen::replay(MDSRank *mds) | |
2233 | { | |
2234 | dout(10) << "EOpen.replay " << dendl; | |
11fdf7f2 TL |
2235 | auto&& segment = get_segment(); |
2236 | metablob.replay(mds, segment); | |
7c673cae FG |
2237 | |
2238 | // note which segments inodes belong to, so we don't have to start rejournaling them | |
2239 | for (const auto &ino : inos) { | |
2240 | CInode *in = mds->mdcache->get_inode(ino); | |
2241 | if (!in) { | |
2242 | dout(0) << "EOpen.replay ino " << ino << " not in metablob" << dendl; | |
11fdf7f2 | 2243 | ceph_assert(in); |
7c673cae | 2244 | } |
11fdf7f2 | 2245 | segment->open_files.push_back(&in->item_open_file); |
7c673cae FG |
2246 | } |
2247 | for (const auto &vino : snap_inos) { | |
2248 | CInode *in = mds->mdcache->get_inode(vino); | |
2249 | if (!in) { | |
2250 | dout(0) << "EOpen.replay ino " << vino << " not in metablob" << dendl; | |
11fdf7f2 | 2251 | ceph_assert(in); |
7c673cae | 2252 | } |
11fdf7f2 | 2253 | segment->open_files.push_back(&in->item_open_file); |
7c673cae FG |
2254 | } |
2255 | } | |
2256 | ||
2257 | ||
2258 | // ----------------------- | |
2259 | // ECommitted | |
2260 | ||
2261 | void ECommitted::replay(MDSRank *mds) | |
2262 | { | |
2263 | if (mds->mdcache->uncommitted_masters.count(reqid)) { | |
2264 | dout(10) << "ECommitted.replay " << reqid << dendl; | |
2265 | mds->mdcache->uncommitted_masters[reqid].ls->uncommitted_masters.erase(reqid); | |
2266 | mds->mdcache->uncommitted_masters.erase(reqid); | |
2267 | } else { | |
2268 | dout(10) << "ECommitted.replay " << reqid << " -- didn't see original op" << dendl; | |
2269 | } | |
2270 | } | |
2271 | ||
2272 | void ECommitted::encode(bufferlist& bl, uint64_t features) const | |
2273 | { | |
2274 | ENCODE_START(3, 3, bl); | |
11fdf7f2 TL |
2275 | encode(stamp, bl); |
2276 | encode(reqid, bl); | |
7c673cae FG |
2277 | ENCODE_FINISH(bl); |
2278 | } | |
2279 | ||
11fdf7f2 | 2280 | void ECommitted::decode(bufferlist::const_iterator& bl) |
7c673cae FG |
2281 | { |
2282 | DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl); | |
2283 | if (struct_v >= 2) | |
11fdf7f2 TL |
2284 | decode(stamp, bl); |
2285 | decode(reqid, bl); | |
7c673cae FG |
2286 | DECODE_FINISH(bl); |
2287 | } | |
2288 | ||
2289 | void ECommitted::dump(Formatter *f) const { | |
2290 | f->dump_stream("stamp") << stamp; | |
2291 | f->dump_stream("reqid") << reqid; | |
2292 | } | |
2293 | ||
9f95a23c | 2294 | void ECommitted::generate_test_instances(std::list<ECommitted*>& ls) |
7c673cae FG |
2295 | { |
2296 | ls.push_back(new ECommitted); | |
2297 | ls.push_back(new ECommitted); | |
2298 | ls.back()->stamp = utime_t(1, 2); | |
2299 | ls.back()->reqid = metareqid_t(entity_name_t::CLIENT(123), 456); | |
2300 | } | |
2301 | ||
2302 | // ----------------------- | |
2303 | // ESlaveUpdate | |
2304 | ||
2305 | void link_rollback::encode(bufferlist &bl) const | |
2306 | { | |
11fdf7f2 TL |
2307 | ENCODE_START(3, 2, bl); |
2308 | encode(reqid, bl); | |
2309 | encode(ino, bl); | |
2310 | encode(was_inc, bl); | |
2311 | encode(old_ctime, bl); | |
2312 | encode(old_dir_mtime, bl); | |
2313 | encode(old_dir_rctime, bl); | |
2314 | encode(snapbl, bl); | |
7c673cae FG |
2315 | ENCODE_FINISH(bl); |
2316 | } | |
2317 | ||
11fdf7f2 | 2318 | void link_rollback::decode(bufferlist::const_iterator &bl) |
7c673cae | 2319 | { |
11fdf7f2 TL |
2320 | DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl); |
2321 | decode(reqid, bl); | |
2322 | decode(ino, bl); | |
2323 | decode(was_inc, bl); | |
2324 | decode(old_ctime, bl); | |
2325 | decode(old_dir_mtime, bl); | |
2326 | decode(old_dir_rctime, bl); | |
2327 | if (struct_v >= 3) | |
2328 | decode(snapbl, bl); | |
7c673cae FG |
2329 | DECODE_FINISH(bl); |
2330 | } | |
2331 | ||
2332 | void link_rollback::dump(Formatter *f) const | |
2333 | { | |
2334 | f->dump_stream("metareqid") << reqid; | |
2335 | f->dump_int("ino", ino); | |
2336 | f->dump_string("was incremented", was_inc ? "true" : "false"); | |
2337 | f->dump_stream("old_ctime") << old_ctime; | |
2338 | f->dump_stream("old_dir_mtime") << old_dir_mtime; | |
2339 | f->dump_stream("old_dir_rctime") << old_dir_rctime; | |
2340 | } | |
2341 | ||
9f95a23c | 2342 | void link_rollback::generate_test_instances(std::list<link_rollback*>& ls) |
7c673cae FG |
2343 | { |
2344 | ls.push_back(new link_rollback()); | |
2345 | } | |
2346 | ||
2347 | void rmdir_rollback::encode(bufferlist& bl) const | |
2348 | { | |
11fdf7f2 TL |
2349 | ENCODE_START(3, 2, bl); |
2350 | encode(reqid, bl); | |
2351 | encode(src_dir, bl); | |
2352 | encode(src_dname, bl); | |
2353 | encode(dest_dir, bl); | |
2354 | encode(dest_dname, bl); | |
2355 | encode(snapbl, bl); | |
7c673cae FG |
2356 | ENCODE_FINISH(bl); |
2357 | } | |
2358 | ||
11fdf7f2 | 2359 | void rmdir_rollback::decode(bufferlist::const_iterator& bl) |
7c673cae | 2360 | { |
11fdf7f2 TL |
2361 | DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl); |
2362 | decode(reqid, bl); | |
2363 | decode(src_dir, bl); | |
2364 | decode(src_dname, bl); | |
2365 | decode(dest_dir, bl); | |
2366 | decode(dest_dname, bl); | |
2367 | if (struct_v >= 3) | |
2368 | decode(snapbl, bl); | |
7c673cae FG |
2369 | DECODE_FINISH(bl); |
2370 | } | |
2371 | ||
2372 | void rmdir_rollback::dump(Formatter *f) const | |
2373 | { | |
2374 | f->dump_stream("metareqid") << reqid; | |
2375 | f->dump_stream("source directory") << src_dir; | |
2376 | f->dump_string("source dname", src_dname); | |
2377 | f->dump_stream("destination directory") << dest_dir; | |
2378 | f->dump_string("destination dname", dest_dname); | |
2379 | } | |
2380 | ||
9f95a23c | 2381 | void rmdir_rollback::generate_test_instances(std::list<rmdir_rollback*>& ls) |
7c673cae FG |
2382 | { |
2383 | ls.push_back(new rmdir_rollback()); | |
2384 | } | |
2385 | ||
2386 | void rename_rollback::drec::encode(bufferlist &bl) const | |
2387 | { | |
2388 | ENCODE_START(2, 2, bl); | |
11fdf7f2 TL |
2389 | encode(dirfrag, bl); |
2390 | encode(dirfrag_old_mtime, bl); | |
2391 | encode(dirfrag_old_rctime, bl); | |
2392 | encode(ino, bl); | |
2393 | encode(remote_ino, bl); | |
2394 | encode(dname, bl); | |
2395 | encode(remote_d_type, bl); | |
2396 | encode(old_ctime, bl); | |
7c673cae FG |
2397 | ENCODE_FINISH(bl); |
2398 | } | |
2399 | ||
11fdf7f2 | 2400 | void rename_rollback::drec::decode(bufferlist::const_iterator &bl) |
7c673cae FG |
2401 | { |
2402 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); | |
11fdf7f2 TL |
2403 | decode(dirfrag, bl); |
2404 | decode(dirfrag_old_mtime, bl); | |
2405 | decode(dirfrag_old_rctime, bl); | |
2406 | decode(ino, bl); | |
2407 | decode(remote_ino, bl); | |
2408 | decode(dname, bl); | |
2409 | decode(remote_d_type, bl); | |
2410 | decode(old_ctime, bl); | |
7c673cae FG |
2411 | DECODE_FINISH(bl); |
2412 | } | |
2413 | ||
2414 | void rename_rollback::drec::dump(Formatter *f) const | |
2415 | { | |
2416 | f->dump_stream("directory fragment") << dirfrag; | |
2417 | f->dump_stream("directory old mtime") << dirfrag_old_mtime; | |
2418 | f->dump_stream("directory old rctime") << dirfrag_old_rctime; | |
2419 | f->dump_int("ino", ino); | |
2420 | f->dump_int("remote ino", remote_ino); | |
2421 | f->dump_string("dname", dname); | |
2422 | uint32_t type = DTTOIF(remote_d_type) & S_IFMT; // convert to type entries | |
2423 | string type_string; | |
2424 | switch(type) { | |
2425 | case S_IFREG: | |
2426 | type_string = "file"; break; | |
2427 | case S_IFLNK: | |
2428 | type_string = "symlink"; break; | |
2429 | case S_IFDIR: | |
2430 | type_string = "directory"; break; | |
2431 | default: | |
2432 | type_string = "UNKNOWN-" + stringify((int)type); break; | |
2433 | } | |
2434 | f->dump_string("remote dtype", type_string); | |
2435 | f->dump_stream("old ctime") << old_ctime; | |
2436 | } | |
2437 | ||
9f95a23c | 2438 | void rename_rollback::drec::generate_test_instances(std::list<drec*>& ls) |
7c673cae FG |
2439 | { |
2440 | ls.push_back(new drec()); | |
2441 | ls.back()->remote_d_type = IFTODT(S_IFREG); | |
2442 | } | |
2443 | ||
2444 | void rename_rollback::encode(bufferlist &bl) const | |
2445 | { | |
11fdf7f2 TL |
2446 | ENCODE_START(3, 2, bl); |
2447 | encode(reqid, bl); | |
7c673cae FG |
2448 | encode(orig_src, bl); |
2449 | encode(orig_dest, bl); | |
2450 | encode(stray, bl); | |
11fdf7f2 TL |
2451 | encode(ctime, bl); |
2452 | encode(srci_snapbl, bl); | |
2453 | encode(desti_snapbl, bl); | |
7c673cae FG |
2454 | ENCODE_FINISH(bl); |
2455 | } | |
2456 | ||
11fdf7f2 | 2457 | void rename_rollback::decode(bufferlist::const_iterator &bl) |
7c673cae | 2458 | { |
11fdf7f2 TL |
2459 | DECODE_START_LEGACY_COMPAT_LEN(3, 2, 2, bl); |
2460 | decode(reqid, bl); | |
7c673cae FG |
2461 | decode(orig_src, bl); |
2462 | decode(orig_dest, bl); | |
2463 | decode(stray, bl); | |
11fdf7f2 TL |
2464 | decode(ctime, bl); |
2465 | if (struct_v >= 3) { | |
2466 | decode(srci_snapbl, bl); | |
2467 | decode(desti_snapbl, bl); | |
2468 | } | |
7c673cae FG |
2469 | DECODE_FINISH(bl); |
2470 | } | |
2471 | ||
2472 | void rename_rollback::dump(Formatter *f) const | |
2473 | { | |
2474 | f->dump_stream("request id") << reqid; | |
2475 | f->open_object_section("original src drec"); | |
2476 | orig_src.dump(f); | |
2477 | f->close_section(); // original src drec | |
2478 | f->open_object_section("original dest drec"); | |
2479 | orig_dest.dump(f); | |
2480 | f->close_section(); // original dest drec | |
2481 | f->open_object_section("stray drec"); | |
2482 | stray.dump(f); | |
2483 | f->close_section(); // stray drec | |
2484 | f->dump_stream("ctime") << ctime; | |
2485 | } | |
2486 | ||
9f95a23c | 2487 | void rename_rollback::generate_test_instances(std::list<rename_rollback*>& ls) |
7c673cae FG |
2488 | { |
2489 | ls.push_back(new rename_rollback()); | |
2490 | ls.back()->orig_src.remote_d_type = IFTODT(S_IFREG); | |
2491 | ls.back()->orig_dest.remote_d_type = IFTODT(S_IFREG); | |
2492 | ls.back()->stray.remote_d_type = IFTODT(S_IFREG); | |
2493 | } | |
2494 | ||
2495 | void ESlaveUpdate::encode(bufferlist &bl, uint64_t features) const | |
2496 | { | |
2497 | ENCODE_START(3, 3, bl); | |
11fdf7f2 TL |
2498 | encode(stamp, bl); |
2499 | encode(type, bl); | |
2500 | encode(reqid, bl); | |
2501 | encode(master, bl); | |
2502 | encode(op, bl); | |
2503 | encode(origop, bl); | |
2504 | encode(commit, bl, features); | |
2505 | encode(rollback, bl); | |
7c673cae FG |
2506 | ENCODE_FINISH(bl); |
2507 | } | |
2508 | ||
11fdf7f2 | 2509 | void ESlaveUpdate::decode(bufferlist::const_iterator &bl) |
7c673cae FG |
2510 | { |
2511 | DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl); | |
2512 | if (struct_v >= 2) | |
11fdf7f2 TL |
2513 | decode(stamp, bl); |
2514 | decode(type, bl); | |
2515 | decode(reqid, bl); | |
2516 | decode(master, bl); | |
2517 | decode(op, bl); | |
2518 | decode(origop, bl); | |
2519 | decode(commit, bl); | |
2520 | decode(rollback, bl); | |
7c673cae FG |
2521 | DECODE_FINISH(bl); |
2522 | } | |
2523 | ||
2524 | void ESlaveUpdate::dump(Formatter *f) const | |
2525 | { | |
2526 | f->open_object_section("metablob"); | |
2527 | commit.dump(f); | |
2528 | f->close_section(); // metablob | |
2529 | ||
2530 | f->dump_int("rollback length", rollback.length()); | |
2531 | f->dump_string("type", type); | |
2532 | f->dump_stream("metareqid") << reqid; | |
2533 | f->dump_int("master", master); | |
2534 | f->dump_int("op", op); | |
2535 | f->dump_int("original op", origop); | |
2536 | } | |
2537 | ||
9f95a23c | 2538 | void ESlaveUpdate::generate_test_instances(std::list<ESlaveUpdate*>& ls) |
7c673cae FG |
2539 | { |
2540 | ls.push_back(new ESlaveUpdate()); | |
2541 | } | |
2542 | ||
7c673cae FG |
2543 | void ESlaveUpdate::replay(MDSRank *mds) |
2544 | { | |
2545 | MDSlaveUpdate *su; | |
11fdf7f2 | 2546 | auto&& segment = get_segment(); |
7c673cae FG |
2547 | switch (op) { |
2548 | case ESlaveUpdate::OP_PREPARE: | |
2549 | dout(10) << "ESlaveUpdate.replay prepare " << reqid << " for mds." << master | |
2550 | << ": applying commit, saving rollback info" << dendl; | |
e306af50 | 2551 | su = new MDSlaveUpdate(origop, rollback); |
11fdf7f2 | 2552 | commit.replay(mds, segment, su); |
e306af50 | 2553 | mds->mdcache->add_uncommitted_slave(reqid, segment, master, su); |
7c673cae FG |
2554 | break; |
2555 | ||
2556 | case ESlaveUpdate::OP_COMMIT: | |
e306af50 TL |
2557 | dout(10) << "ESlaveUpdate.replay commit " << reqid << " for mds." << master << dendl; |
2558 | mds->mdcache->finish_uncommitted_slave(reqid, false); | |
7c673cae FG |
2559 | break; |
2560 | ||
2561 | case ESlaveUpdate::OP_ROLLBACK: | |
2562 | dout(10) << "ESlaveUpdate.replay abort " << reqid << " for mds." << master | |
2563 | << ": applying rollback commit blob" << dendl; | |
11fdf7f2 | 2564 | commit.replay(mds, segment); |
e306af50 | 2565 | mds->mdcache->finish_uncommitted_slave(reqid, false); |
7c673cae FG |
2566 | break; |
2567 | ||
2568 | default: | |
2569 | mds->clog->error() << "invalid op in ESlaveUpdate"; | |
2570 | mds->damaged(); | |
2571 | ceph_abort(); // Should be unreachable because damaged() calls respawn() | |
2572 | } | |
2573 | } | |
2574 | ||
2575 | ||
2576 | // ----------------------- | |
2577 | // ESubtreeMap | |
2578 | ||
2579 | void ESubtreeMap::encode(bufferlist& bl, uint64_t features) const | |
2580 | { | |
2581 | ENCODE_START(6, 5, bl); | |
11fdf7f2 TL |
2582 | encode(stamp, bl); |
2583 | encode(metablob, bl, features); | |
2584 | encode(subtrees, bl); | |
2585 | encode(ambiguous_subtrees, bl); | |
2586 | encode(expire_pos, bl); | |
2587 | encode(event_seq, bl); | |
7c673cae FG |
2588 | ENCODE_FINISH(bl); |
2589 | } | |
2590 | ||
11fdf7f2 | 2591 | void ESubtreeMap::decode(bufferlist::const_iterator &bl) |
7c673cae FG |
2592 | { |
2593 | DECODE_START_LEGACY_COMPAT_LEN(6, 5, 5, bl); | |
2594 | if (struct_v >= 2) | |
11fdf7f2 TL |
2595 | decode(stamp, bl); |
2596 | decode(metablob, bl); | |
2597 | decode(subtrees, bl); | |
7c673cae | 2598 | if (struct_v >= 4) |
11fdf7f2 | 2599 | decode(ambiguous_subtrees, bl); |
7c673cae | 2600 | if (struct_v >= 3) |
11fdf7f2 | 2601 | decode(expire_pos, bl); |
7c673cae | 2602 | if (struct_v >= 6) |
11fdf7f2 | 2603 | decode(event_seq, bl); |
7c673cae FG |
2604 | DECODE_FINISH(bl); |
2605 | } | |
2606 | ||
2607 | void ESubtreeMap::dump(Formatter *f) const | |
2608 | { | |
2609 | f->open_object_section("metablob"); | |
2610 | metablob.dump(f); | |
2611 | f->close_section(); // metablob | |
2612 | ||
2613 | f->open_array_section("subtrees"); | |
2614 | for(map<dirfrag_t,vector<dirfrag_t> >::const_iterator i = subtrees.begin(); | |
2615 | i != subtrees.end(); ++i) { | |
2616 | f->open_object_section("tree"); | |
2617 | f->dump_stream("root dirfrag") << i->first; | |
2618 | for (vector<dirfrag_t>::const_iterator j = i->second.begin(); | |
2619 | j != i->second.end(); ++j) { | |
2620 | f->dump_stream("bound dirfrag") << *j; | |
2621 | } | |
2622 | f->close_section(); // tree | |
2623 | } | |
2624 | f->close_section(); // subtrees | |
2625 | ||
2626 | f->open_array_section("ambiguous subtrees"); | |
2627 | for(set<dirfrag_t>::const_iterator i = ambiguous_subtrees.begin(); | |
2628 | i != ambiguous_subtrees.end(); ++i) { | |
2629 | f->dump_stream("dirfrag") << *i; | |
2630 | } | |
2631 | f->close_section(); // ambiguous subtrees | |
2632 | ||
2633 | f->dump_int("expire position", expire_pos); | |
2634 | } | |
2635 | ||
9f95a23c | 2636 | void ESubtreeMap::generate_test_instances(std::list<ESubtreeMap*>& ls) |
7c673cae FG |
2637 | { |
2638 | ls.push_back(new ESubtreeMap()); | |
2639 | } | |
2640 | ||
2641 | void ESubtreeMap::replay(MDSRank *mds) | |
2642 | { | |
2643 | if (expire_pos && expire_pos > mds->mdlog->journaler->get_expire_pos()) | |
2644 | mds->mdlog->journaler->set_expire_pos(expire_pos); | |
2645 | ||
2646 | // suck up the subtree map? | |
2647 | if (mds->mdcache->is_subtrees()) { | |
2648 | dout(10) << "ESubtreeMap.replay -- i already have import map; verifying" << dendl; | |
2649 | int errors = 0; | |
2650 | ||
2651 | for (map<dirfrag_t, vector<dirfrag_t> >::iterator p = subtrees.begin(); | |
2652 | p != subtrees.end(); | |
2653 | ++p) { | |
2654 | CDir *dir = mds->mdcache->get_dirfrag(p->first); | |
2655 | if (!dir) { | |
2656 | mds->clog->error() << " replayed ESubtreeMap at " << get_start_off() | |
2657 | << " subtree root " << p->first << " not in cache"; | |
2658 | ++errors; | |
2659 | continue; | |
2660 | } | |
2661 | ||
2662 | if (!mds->mdcache->is_subtree(dir)) { | |
2663 | mds->clog->error() << " replayed ESubtreeMap at " << get_start_off() | |
2664 | << " subtree root " << p->first << " not a subtree in cache"; | |
2665 | ++errors; | |
2666 | continue; | |
2667 | } | |
2668 | if (dir->get_dir_auth().first != mds->get_nodeid()) { | |
2669 | mds->clog->error() << " replayed ESubtreeMap at " << get_start_off() | |
2670 | << " subtree root " << p->first | |
2671 | << " is not mine in cache (it's " << dir->get_dir_auth() << ")"; | |
2672 | ++errors; | |
2673 | continue; | |
2674 | } | |
2675 | ||
2676 | for (vector<dirfrag_t>::iterator q = p->second.begin(); q != p->second.end(); ++q) | |
2677 | mds->mdcache->get_force_dirfrag(*q, true); | |
2678 | ||
2679 | set<CDir*> bounds; | |
2680 | mds->mdcache->get_subtree_bounds(dir, bounds); | |
2681 | for (vector<dirfrag_t>::iterator q = p->second.begin(); q != p->second.end(); ++q) { | |
2682 | CDir *b = mds->mdcache->get_dirfrag(*q); | |
2683 | if (!b) { | |
2684 | mds->clog->error() << " replayed ESubtreeMap at " << get_start_off() | |
2685 | << " subtree " << p->first << " bound " << *q << " not in cache"; | |
2686 | ++errors; | |
2687 | continue; | |
2688 | } | |
2689 | if (bounds.count(b) == 0) { | |
2690 | mds->clog->error() << " replayed ESubtreeMap at " << get_start_off() | |
2691 | << " subtree " << p->first << " bound " << *q << " not a bound in cache"; | |
2692 | ++errors; | |
2693 | continue; | |
2694 | } | |
2695 | bounds.erase(b); | |
2696 | } | |
2697 | for (set<CDir*>::iterator q = bounds.begin(); q != bounds.end(); ++q) { | |
2698 | mds->clog->error() << " replayed ESubtreeMap at " << get_start_off() | |
2699 | << " subtree " << p->first << " has extra bound in cache " << (*q)->dirfrag(); | |
2700 | ++errors; | |
2701 | } | |
2702 | ||
2703 | if (ambiguous_subtrees.count(p->first)) { | |
2704 | if (!mds->mdcache->have_ambiguous_import(p->first)) { | |
2705 | mds->clog->error() << " replayed ESubtreeMap at " << get_start_off() | |
2706 | << " subtree " << p->first << " is ambiguous but is not in our cache"; | |
2707 | ++errors; | |
2708 | } | |
2709 | } else { | |
2710 | if (mds->mdcache->have_ambiguous_import(p->first)) { | |
2711 | mds->clog->error() << " replayed ESubtreeMap at " << get_start_off() | |
2712 | << " subtree " << p->first << " is not ambiguous but is in our cache"; | |
2713 | ++errors; | |
2714 | } | |
2715 | } | |
2716 | } | |
2717 | ||
11fdf7f2 TL |
2718 | std::vector<CDir*> dirs; |
2719 | mds->mdcache->get_subtrees(dirs); | |
2720 | for (const auto& dir : dirs) { | |
7c673cae FG |
2721 | if (dir->get_dir_auth().first != mds->get_nodeid()) |
2722 | continue; | |
2723 | if (subtrees.count(dir->dirfrag()) == 0) { | |
2724 | mds->clog->error() << " replayed ESubtreeMap at " << get_start_off() | |
2725 | << " does not include cache subtree " << dir->dirfrag(); | |
2726 | ++errors; | |
2727 | } | |
2728 | } | |
2729 | ||
2730 | if (errors) { | |
2731 | dout(0) << "journal subtrees: " << subtrees << dendl; | |
2732 | dout(0) << "journal ambig_subtrees: " << ambiguous_subtrees << dendl; | |
2733 | mds->mdcache->show_subtrees(); | |
11fdf7f2 | 2734 | ceph_assert(!g_conf()->mds_debug_subtrees || errors == 0); |
7c673cae FG |
2735 | } |
2736 | return; | |
2737 | } | |
2738 | ||
2739 | dout(10) << "ESubtreeMap.replay -- reconstructing (auth) subtree spanning tree" << dendl; | |
2740 | ||
2741 | // first, stick the spanning tree in my cache | |
2742 | //metablob.print(*_dout); | |
11fdf7f2 | 2743 | metablob.replay(mds, get_segment()); |
7c673cae FG |
2744 | |
2745 | // restore import/export maps | |
2746 | for (map<dirfrag_t, vector<dirfrag_t> >::iterator p = subtrees.begin(); | |
2747 | p != subtrees.end(); | |
2748 | ++p) { | |
2749 | CDir *dir = mds->mdcache->get_dirfrag(p->first); | |
11fdf7f2 | 2750 | ceph_assert(dir); |
7c673cae FG |
2751 | if (ambiguous_subtrees.count(p->first)) { |
2752 | // ambiguous! | |
2753 | mds->mdcache->add_ambiguous_import(p->first, p->second); | |
2754 | mds->mdcache->adjust_bounded_subtree_auth(dir, p->second, | |
2755 | mds_authority_t(mds->get_nodeid(), mds->get_nodeid())); | |
2756 | } else { | |
2757 | // not ambiguous | |
2758 | mds->mdcache->adjust_bounded_subtree_auth(dir, p->second, mds->get_nodeid()); | |
2759 | } | |
2760 | } | |
2761 | ||
2762 | mds->mdcache->recalc_auth_bits(true); | |
2763 | ||
2764 | mds->mdcache->show_subtrees(); | |
2765 | } | |
2766 | ||
2767 | ||
2768 | ||
2769 | // ----------------------- | |
2770 | // EFragment | |
2771 | ||
2772 | void EFragment::replay(MDSRank *mds) | |
2773 | { | |
2774 | dout(10) << "EFragment.replay " << op_name(op) << " " << ino << " " << basefrag << " by " << bits << dendl; | |
2775 | ||
9f95a23c | 2776 | std::vector<CDir*> resultfrags; |
11fdf7f2 | 2777 | MDSContext::vec waiters; |
7c673cae FG |
2778 | |
2779 | // in may be NULL if it wasn't in our cache yet. if it's a prepare | |
2780 | // it will be once we replay the metablob , but first we need to | |
2781 | // refragment anything we already have in the cache. | |
2782 | CInode *in = mds->mdcache->get_inode(ino); | |
2783 | ||
11fdf7f2 | 2784 | auto&& segment = get_segment(); |
7c673cae FG |
2785 | switch (op) { |
2786 | case OP_PREPARE: | |
11fdf7f2 | 2787 | mds->mdcache->add_uncommitted_fragment(dirfrag_t(ino, basefrag), bits, orig_frags, segment, &rollback); |
7c673cae FG |
2788 | |
2789 | if (in) | |
9f95a23c | 2790 | mds->mdcache->adjust_dir_fragments(in, basefrag, bits, &resultfrags, waiters, true); |
7c673cae FG |
2791 | break; |
2792 | ||
11fdf7f2 TL |
2793 | case OP_ROLLBACK: { |
2794 | frag_vec_t old_frags; | |
7c673cae FG |
2795 | if (in) { |
2796 | in->dirfragtree.get_leaves_under(basefrag, old_frags); | |
2797 | if (orig_frags.empty()) { | |
2798 | // old format EFragment | |
9f95a23c | 2799 | mds->mdcache->adjust_dir_fragments(in, basefrag, -bits, &resultfrags, waiters, true); |
7c673cae | 2800 | } else { |
11fdf7f2 TL |
2801 | for (const auto& fg : orig_frags) |
2802 | mds->mdcache->force_dir_fragment(in, fg); | |
7c673cae FG |
2803 | } |
2804 | } | |
11fdf7f2 | 2805 | mds->mdcache->rollback_uncommitted_fragment(dirfrag_t(ino, basefrag), std::move(old_frags)); |
7c673cae | 2806 | break; |
11fdf7f2 | 2807 | } |
7c673cae FG |
2808 | |
2809 | case OP_COMMIT: | |
2810 | case OP_FINISH: | |
2811 | mds->mdcache->finish_uncommitted_fragment(dirfrag_t(ino, basefrag), op); | |
2812 | break; | |
2813 | ||
2814 | default: | |
2815 | ceph_abort(); | |
2816 | } | |
2817 | ||
11fdf7f2 TL |
2818 | metablob.replay(mds, segment); |
2819 | if (in && g_conf()->mds_debug_frag) | |
7c673cae FG |
2820 | in->verify_dirfrags(); |
2821 | } | |
2822 | ||
2823 | void EFragment::encode(bufferlist &bl, uint64_t features) const { | |
2824 | ENCODE_START(5, 4, bl); | |
11fdf7f2 TL |
2825 | encode(stamp, bl); |
2826 | encode(op, bl); | |
2827 | encode(ino, bl); | |
2828 | encode(basefrag, bl); | |
2829 | encode(bits, bl); | |
2830 | encode(metablob, bl, features); | |
2831 | encode(orig_frags, bl); | |
2832 | encode(rollback, bl); | |
7c673cae FG |
2833 | ENCODE_FINISH(bl); |
2834 | } | |
2835 | ||
11fdf7f2 | 2836 | void EFragment::decode(bufferlist::const_iterator &bl) { |
7c673cae FG |
2837 | DECODE_START_LEGACY_COMPAT_LEN(5, 4, 4, bl); |
2838 | if (struct_v >= 2) | |
11fdf7f2 | 2839 | decode(stamp, bl); |
7c673cae | 2840 | if (struct_v >= 3) |
11fdf7f2 TL |
2841 | decode(op, bl); |
2842 | decode(ino, bl); | |
2843 | decode(basefrag, bl); | |
2844 | decode(bits, bl); | |
2845 | decode(metablob, bl); | |
7c673cae | 2846 | if (struct_v >= 5) { |
11fdf7f2 TL |
2847 | decode(orig_frags, bl); |
2848 | decode(rollback, bl); | |
7c673cae FG |
2849 | } |
2850 | DECODE_FINISH(bl); | |
2851 | } | |
2852 | ||
2853 | void EFragment::dump(Formatter *f) const | |
2854 | { | |
2855 | /*f->open_object_section("Metablob"); | |
2856 | metablob.dump(f); // sadly we don't have this; dunno if we'll get it | |
2857 | f->close_section();*/ | |
2858 | f->dump_string("op", op_name(op)); | |
2859 | f->dump_stream("ino") << ino; | |
2860 | f->dump_stream("base frag") << basefrag; | |
2861 | f->dump_int("bits", bits); | |
2862 | } | |
2863 | ||
9f95a23c | 2864 | void EFragment::generate_test_instances(std::list<EFragment*>& ls) |
7c673cae FG |
2865 | { |
2866 | ls.push_back(new EFragment); | |
2867 | ls.push_back(new EFragment); | |
2868 | ls.back()->op = OP_PREPARE; | |
2869 | ls.back()->ino = 1; | |
2870 | ls.back()->bits = 5; | |
2871 | } | |
2872 | ||
2873 | void dirfrag_rollback::encode(bufferlist &bl) const | |
2874 | { | |
2875 | ENCODE_START(1, 1, bl); | |
11fdf7f2 | 2876 | encode(fnode, bl); |
7c673cae FG |
2877 | ENCODE_FINISH(bl); |
2878 | } | |
2879 | ||
11fdf7f2 | 2880 | void dirfrag_rollback::decode(bufferlist::const_iterator &bl) |
7c673cae FG |
2881 | { |
2882 | DECODE_START(1, bl); | |
11fdf7f2 | 2883 | decode(fnode, bl); |
7c673cae FG |
2884 | DECODE_FINISH(bl); |
2885 | } | |
2886 | ||
2887 | ||
2888 | ||
2889 | // ========================================================================= | |
2890 | ||
2891 | // ----------------------- | |
2892 | // EExport | |
2893 | ||
2894 | void EExport::replay(MDSRank *mds) | |
2895 | { | |
2896 | dout(10) << "EExport.replay " << base << dendl; | |
11fdf7f2 TL |
2897 | auto&& segment = get_segment(); |
2898 | metablob.replay(mds, segment); | |
7c673cae FG |
2899 | |
2900 | CDir *dir = mds->mdcache->get_dirfrag(base); | |
11fdf7f2 | 2901 | ceph_assert(dir); |
7c673cae FG |
2902 | |
2903 | set<CDir*> realbounds; | |
2904 | for (set<dirfrag_t>::iterator p = bounds.begin(); | |
2905 | p != bounds.end(); | |
2906 | ++p) { | |
2907 | CDir *bd = mds->mdcache->get_dirfrag(*p); | |
11fdf7f2 | 2908 | ceph_assert(bd); |
7c673cae FG |
2909 | realbounds.insert(bd); |
2910 | } | |
2911 | ||
2912 | // adjust auth away | |
2913 | mds->mdcache->adjust_bounded_subtree_auth(dir, realbounds, CDIR_AUTH_UNDEF); | |
2914 | ||
2915 | mds->mdcache->try_trim_non_auth_subtree(dir); | |
2916 | } | |
2917 | ||
2918 | void EExport::encode(bufferlist& bl, uint64_t features) const | |
2919 | { | |
31f18b77 | 2920 | ENCODE_START(4, 3, bl); |
11fdf7f2 TL |
2921 | encode(stamp, bl); |
2922 | encode(metablob, bl, features); | |
2923 | encode(base, bl); | |
2924 | encode(bounds, bl); | |
2925 | encode(target, bl); | |
7c673cae FG |
2926 | ENCODE_FINISH(bl); |
2927 | } | |
2928 | ||
11fdf7f2 | 2929 | void EExport::decode(bufferlist::const_iterator &bl) |
7c673cae FG |
2930 | { |
2931 | DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl); | |
2932 | if (struct_v >= 2) | |
11fdf7f2 TL |
2933 | decode(stamp, bl); |
2934 | decode(metablob, bl); | |
2935 | decode(base, bl); | |
2936 | decode(bounds, bl); | |
31f18b77 | 2937 | if (struct_v >= 4) |
11fdf7f2 | 2938 | decode(target, bl); |
7c673cae FG |
2939 | DECODE_FINISH(bl); |
2940 | } | |
2941 | ||
2942 | void EExport::dump(Formatter *f) const | |
2943 | { | |
2944 | f->dump_float("stamp", (double)stamp); | |
2945 | /*f->open_object_section("Metablob"); | |
2946 | metablob.dump(f); // sadly we don't have this; dunno if we'll get it | |
2947 | f->close_section();*/ | |
2948 | f->dump_stream("base dirfrag") << base; | |
2949 | f->open_array_section("bounds dirfrags"); | |
2950 | for (set<dirfrag_t>::const_iterator i = bounds.begin(); | |
2951 | i != bounds.end(); ++i) { | |
2952 | f->dump_stream("dirfrag") << *i; | |
2953 | } | |
2954 | f->close_section(); // bounds dirfrags | |
2955 | } | |
2956 | ||
9f95a23c | 2957 | void EExport::generate_test_instances(std::list<EExport*>& ls) |
7c673cae FG |
2958 | { |
2959 | EExport *sample = new EExport(); | |
2960 | ls.push_back(sample); | |
2961 | } | |
2962 | ||
2963 | ||
2964 | // ----------------------- | |
2965 | // EImportStart | |
2966 | ||
2967 | void EImportStart::update_segment() | |
2968 | { | |
11fdf7f2 | 2969 | get_segment()->sessionmapv = cmapv; |
7c673cae FG |
2970 | } |
2971 | ||
2972 | void EImportStart::replay(MDSRank *mds) | |
2973 | { | |
2974 | dout(10) << "EImportStart.replay " << base << " bounds " << bounds << dendl; | |
2975 | //metablob.print(*_dout); | |
11fdf7f2 TL |
2976 | auto&& segment = get_segment(); |
2977 | metablob.replay(mds, segment); | |
7c673cae FG |
2978 | |
2979 | // put in ambiguous import list | |
2980 | mds->mdcache->add_ambiguous_import(base, bounds); | |
2981 | ||
2982 | // set auth partially to us so we don't trim it | |
2983 | CDir *dir = mds->mdcache->get_dirfrag(base); | |
11fdf7f2 | 2984 | ceph_assert(dir); |
7c673cae FG |
2985 | |
2986 | set<CDir*> realbounds; | |
2987 | for (vector<dirfrag_t>::iterator p = bounds.begin(); | |
2988 | p != bounds.end(); | |
2989 | ++p) { | |
2990 | CDir *bd = mds->mdcache->get_dirfrag(*p); | |
11fdf7f2 | 2991 | ceph_assert(bd); |
7c673cae FG |
2992 | if (!bd->is_subtree_root()) |
2993 | bd->state_clear(CDir::STATE_AUTH); | |
2994 | realbounds.insert(bd); | |
2995 | } | |
2996 | ||
2997 | mds->mdcache->adjust_bounded_subtree_auth(dir, realbounds, | |
2998 | mds_authority_t(mds->get_nodeid(), mds->get_nodeid())); | |
2999 | ||
3000 | // open client sessions? | |
3001 | if (mds->sessionmap.get_version() >= cmapv) { | |
3002 | dout(10) << "EImportStart.replay sessionmap " << mds->sessionmap.get_version() | |
3003 | << " >= " << cmapv << ", noop" << dendl; | |
3004 | } else { | |
3005 | dout(10) << "EImportStart.replay sessionmap " << mds->sessionmap.get_version() | |
3006 | << " < " << cmapv << dendl; | |
3007 | map<client_t,entity_inst_t> cm; | |
11fdf7f2 TL |
3008 | map<client_t,client_metadata_t> cmm; |
3009 | auto blp = client_map.cbegin(); | |
3010 | using ceph::decode; | |
3011 | decode(cm, blp); | |
3012 | if (!blp.end()) | |
3013 | decode(cmm, blp); | |
81eedcae | 3014 | mds->sessionmap.replay_open_sessions(cmapv, cm, cmm); |
7c673cae FG |
3015 | } |
3016 | update_segment(); | |
3017 | } | |
3018 | ||
3019 | void EImportStart::encode(bufferlist &bl, uint64_t features) const { | |
31f18b77 | 3020 | ENCODE_START(4, 3, bl); |
11fdf7f2 TL |
3021 | encode(stamp, bl); |
3022 | encode(base, bl); | |
3023 | encode(metablob, bl, features); | |
3024 | encode(bounds, bl); | |
3025 | encode(cmapv, bl); | |
3026 | encode(client_map, bl); | |
3027 | encode(from, bl); | |
7c673cae FG |
3028 | ENCODE_FINISH(bl); |
3029 | } | |
3030 | ||
11fdf7f2 | 3031 | void EImportStart::decode(bufferlist::const_iterator &bl) { |
7c673cae FG |
3032 | DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl); |
3033 | if (struct_v >= 2) | |
11fdf7f2 TL |
3034 | decode(stamp, bl); |
3035 | decode(base, bl); | |
3036 | decode(metablob, bl); | |
3037 | decode(bounds, bl); | |
3038 | decode(cmapv, bl); | |
3039 | decode(client_map, bl); | |
31f18b77 | 3040 | if (struct_v >= 4) |
11fdf7f2 | 3041 | decode(from, bl); |
7c673cae FG |
3042 | DECODE_FINISH(bl); |
3043 | } | |
3044 | ||
3045 | void EImportStart::dump(Formatter *f) const | |
3046 | { | |
3047 | f->dump_stream("base dirfrag") << base; | |
3048 | f->open_array_section("boundary dirfrags"); | |
3049 | for (vector<dirfrag_t>::const_iterator iter = bounds.begin(); | |
3050 | iter != bounds.end(); ++iter) { | |
3051 | f->dump_stream("frag") << *iter; | |
3052 | } | |
3053 | f->close_section(); | |
3054 | } | |
3055 | ||
9f95a23c | 3056 | void EImportStart::generate_test_instances(std::list<EImportStart*>& ls) |
7c673cae FG |
3057 | { |
3058 | ls.push_back(new EImportStart); | |
3059 | } | |
3060 | ||
3061 | // ----------------------- | |
3062 | // EImportFinish | |
3063 | ||
3064 | void EImportFinish::replay(MDSRank *mds) | |
3065 | { | |
3066 | if (mds->mdcache->have_ambiguous_import(base)) { | |
3067 | dout(10) << "EImportFinish.replay " << base << " success=" << success << dendl; | |
3068 | if (success) { | |
3069 | mds->mdcache->finish_ambiguous_import(base); | |
3070 | } else { | |
3071 | CDir *dir = mds->mdcache->get_dirfrag(base); | |
11fdf7f2 | 3072 | ceph_assert(dir); |
7c673cae FG |
3073 | vector<dirfrag_t> bounds; |
3074 | mds->mdcache->get_ambiguous_import_bounds(base, bounds); | |
3075 | mds->mdcache->adjust_bounded_subtree_auth(dir, bounds, CDIR_AUTH_UNDEF); | |
3076 | mds->mdcache->cancel_ambiguous_import(dir); | |
3077 | mds->mdcache->try_trim_non_auth_subtree(dir); | |
3078 | } | |
3079 | } else { | |
3080 | // this shouldn't happen unless this is an old journal | |
3081 | dout(10) << "EImportFinish.replay " << base << " success=" << success | |
3082 | << " on subtree not marked as ambiguous" | |
3083 | << dendl; | |
3084 | mds->clog->error() << "failure replaying journal (EImportFinish)"; | |
3085 | mds->damaged(); | |
3086 | ceph_abort(); // Should be unreachable because damaged() calls respawn() | |
3087 | } | |
3088 | } | |
3089 | ||
3090 | void EImportFinish::encode(bufferlist& bl, uint64_t features) const | |
3091 | { | |
3092 | ENCODE_START(3, 3, bl); | |
11fdf7f2 TL |
3093 | encode(stamp, bl); |
3094 | encode(base, bl); | |
3095 | encode(success, bl); | |
7c673cae FG |
3096 | ENCODE_FINISH(bl); |
3097 | } | |
3098 | ||
11fdf7f2 | 3099 | void EImportFinish::decode(bufferlist::const_iterator &bl) |
7c673cae FG |
3100 | { |
3101 | DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl); | |
3102 | if (struct_v >= 2) | |
11fdf7f2 TL |
3103 | decode(stamp, bl); |
3104 | decode(base, bl); | |
3105 | decode(success, bl); | |
7c673cae FG |
3106 | DECODE_FINISH(bl); |
3107 | } | |
3108 | ||
3109 | void EImportFinish::dump(Formatter *f) const | |
3110 | { | |
3111 | f->dump_stream("base dirfrag") << base; | |
3112 | f->dump_string("success", success ? "true" : "false"); | |
3113 | } | |
9f95a23c | 3114 | void EImportFinish::generate_test_instances(std::list<EImportFinish*>& ls) |
7c673cae FG |
3115 | { |
3116 | ls.push_back(new EImportFinish); | |
3117 | ls.push_back(new EImportFinish); | |
3118 | ls.back()->success = true; | |
3119 | } | |
3120 | ||
3121 | ||
3122 | // ------------------------ | |
3123 | // EResetJournal | |
3124 | ||
3125 | void EResetJournal::encode(bufferlist& bl, uint64_t features) const | |
3126 | { | |
3127 | ENCODE_START(2, 2, bl); | |
11fdf7f2 | 3128 | encode(stamp, bl); |
7c673cae FG |
3129 | ENCODE_FINISH(bl); |
3130 | } | |
3131 | ||
11fdf7f2 | 3132 | void EResetJournal::decode(bufferlist::const_iterator &bl) |
7c673cae FG |
3133 | { |
3134 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); | |
11fdf7f2 | 3135 | decode(stamp, bl); |
7c673cae FG |
3136 | DECODE_FINISH(bl); |
3137 | } | |
3138 | ||
3139 | void EResetJournal::dump(Formatter *f) const | |
3140 | { | |
3141 | f->dump_stream("timestamp") << stamp; | |
3142 | } | |
3143 | ||
9f95a23c | 3144 | void EResetJournal::generate_test_instances(std::list<EResetJournal*>& ls) |
7c673cae FG |
3145 | { |
3146 | ls.push_back(new EResetJournal()); | |
3147 | } | |
3148 | ||
3149 | void EResetJournal::replay(MDSRank *mds) | |
3150 | { | |
3151 | dout(1) << "EResetJournal" << dendl; | |
3152 | ||
3153 | mds->sessionmap.wipe(); | |
3154 | mds->inotable->replay_reset(); | |
3155 | ||
3156 | if (mds->mdsmap->get_root() == mds->get_nodeid()) { | |
3157 | CDir *rootdir = mds->mdcache->get_root()->get_or_open_dirfrag(mds->mdcache, frag_t()); | |
3158 | mds->mdcache->adjust_subtree_auth(rootdir, mds->get_nodeid()); | |
3159 | } | |
3160 | ||
3161 | CDir *mydir = mds->mdcache->get_myin()->get_or_open_dirfrag(mds->mdcache, frag_t()); | |
3162 | mds->mdcache->adjust_subtree_auth(mydir, mds->get_nodeid()); | |
3163 | ||
3164 | mds->mdcache->recalc_auth_bits(true); | |
3165 | ||
3166 | mds->mdcache->show_subtrees(); | |
3167 | } | |
3168 | ||
3169 | ||
3170 | void ENoOp::encode(bufferlist &bl, uint64_t features) const | |
3171 | { | |
3172 | ENCODE_START(2, 2, bl); | |
11fdf7f2 | 3173 | encode(pad_size, bl); |
7c673cae FG |
3174 | uint8_t const pad = 0xff; |
3175 | for (unsigned int i = 0; i < pad_size; ++i) { | |
11fdf7f2 | 3176 | encode(pad, bl); |
7c673cae FG |
3177 | } |
3178 | ENCODE_FINISH(bl); | |
3179 | } | |
3180 | ||
3181 | ||
11fdf7f2 | 3182 | void ENoOp::decode(bufferlist::const_iterator &bl) |
7c673cae FG |
3183 | { |
3184 | DECODE_START(2, bl); | |
11fdf7f2 | 3185 | decode(pad_size, bl); |
7c673cae FG |
3186 | if (bl.get_remaining() != pad_size) { |
3187 | // This is spiritually an assertion, but expressing in a way that will let | |
3188 | // journal debug tools catch it and recognise a malformed entry. | |
3189 | throw buffer::end_of_buffer(); | |
3190 | } else { | |
9f95a23c | 3191 | bl += pad_size; |
7c673cae FG |
3192 | } |
3193 | DECODE_FINISH(bl); | |
3194 | } | |
3195 | ||
3196 | ||
3197 | void ENoOp::replay(MDSRank *mds) | |
3198 | { | |
3199 | dout(4) << "ENoOp::replay, " << pad_size << " bytes skipped in journal" << dendl; | |
3200 | } | |
3201 | ||
3202 | /** | |
3203 | * If re-formatting an old journal that used absolute log position | |
3204 | * references as segment sequence numbers, use this function to update | |
3205 | * it. | |
3206 | * | |
3207 | * @param mds | |
3208 | * MDSRank instance, just used for logging | |
3209 | * @param old_to_new | |
3210 | * Map of old journal segment sequence numbers to new journal segment sequence numbers | |
3211 | * | |
3212 | * @return | |
3213 | * True if the event was modified. | |
3214 | */ | |
3215 | bool EMetaBlob::rewrite_truncate_finish(MDSRank const *mds, | |
9f95a23c | 3216 | std::map<LogSegment::seq_t, LogSegment::seq_t> const &old_to_new) |
7c673cae FG |
3217 | { |
3218 | bool modified = false; | |
9f95a23c | 3219 | map<inodeno_t, LogSegment::seq_t> new_trunc_finish; |
11fdf7f2 TL |
3220 | for (const auto& p : truncate_finish) { |
3221 | auto q = old_to_new.find(p.second); | |
3222 | if (q != old_to_new.end()) { | |
7c673cae | 3223 | dout(20) << __func__ << " applying segment seq mapping " |
11fdf7f2 TL |
3224 | << p.second << " -> " << q->second << dendl; |
3225 | new_trunc_finish.emplace(p.first, q->second); | |
7c673cae FG |
3226 | modified = true; |
3227 | } else { | |
3228 | dout(20) << __func__ << " no segment seq mapping found for " | |
11fdf7f2 TL |
3229 | << p.second << dendl; |
3230 | new_trunc_finish.insert(p); | |
7c673cae FG |
3231 | } |
3232 | } | |
11fdf7f2 | 3233 | truncate_finish.swap(new_trunc_finish); |
7c673cae FG |
3234 | |
3235 | return modified; | |
3236 | } |