]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include "common/config.h" | |
16 | #include "osdc/Journaler.h" | |
17 | #include "events/ESubtreeMap.h" | |
18 | #include "events/ESession.h" | |
19 | #include "events/ESessions.h" | |
20 | ||
21 | #include "events/EMetaBlob.h" | |
22 | #include "events/EResetJournal.h" | |
23 | #include "events/ENoOp.h" | |
24 | ||
25 | #include "events/EUpdate.h" | |
26 | #include "events/ESlaveUpdate.h" | |
27 | #include "events/EOpen.h" | |
28 | #include "events/ECommitted.h" | |
29 | ||
30 | #include "events/EExport.h" | |
31 | #include "events/EImportStart.h" | |
32 | #include "events/EImportFinish.h" | |
33 | #include "events/EFragment.h" | |
34 | ||
35 | #include "events/ETableClient.h" | |
36 | #include "events/ETableServer.h" | |
37 | ||
38 | #include "include/stringify.h" | |
39 | ||
40 | #include "LogSegment.h" | |
41 | ||
42 | #include "MDSRank.h" | |
43 | #include "MDLog.h" | |
44 | #include "MDCache.h" | |
45 | #include "Server.h" | |
46 | #include "Migrator.h" | |
47 | #include "Mutation.h" | |
48 | ||
49 | #include "InoTable.h" | |
50 | #include "MDSTableClient.h" | |
51 | #include "MDSTableServer.h" | |
52 | ||
53 | #include "Locker.h" | |
54 | ||
55 | #define dout_context g_ceph_context | |
56 | #define dout_subsys ceph_subsys_mds | |
57 | #undef dout_prefix | |
58 | #define dout_prefix *_dout << "mds." << mds->get_nodeid() << ".journal " | |
59 | ||
60 | ||
61 | // ----------------------- | |
62 | // LogSegment | |
63 | ||
64 | void LogSegment::try_to_expire(MDSRank *mds, MDSGatherBuilder &gather_bld, int op_prio) | |
65 | { | |
66 | set<CDir*> commit; | |
67 | ||
68 | dout(6) << "LogSegment(" << seq << "/" << offset << ").try_to_expire" << dendl; | |
69 | ||
70 | assert(g_conf->mds_kill_journal_expire_at != 1); | |
71 | ||
72 | // commit dirs | |
73 | for (elist<CDir*>::iterator p = new_dirfrags.begin(); !p.end(); ++p) { | |
74 | dout(20) << " new_dirfrag " << **p << dendl; | |
75 | assert((*p)->is_auth()); | |
76 | commit.insert(*p); | |
77 | } | |
78 | for (elist<CDir*>::iterator p = dirty_dirfrags.begin(); !p.end(); ++p) { | |
79 | dout(20) << " dirty_dirfrag " << **p << dendl; | |
80 | assert((*p)->is_auth()); | |
81 | commit.insert(*p); | |
82 | } | |
83 | for (elist<CDentry*>::iterator p = dirty_dentries.begin(); !p.end(); ++p) { | |
84 | dout(20) << " dirty_dentry " << **p << dendl; | |
85 | assert((*p)->is_auth()); | |
86 | commit.insert((*p)->get_dir()); | |
87 | } | |
88 | for (elist<CInode*>::iterator p = dirty_inodes.begin(); !p.end(); ++p) { | |
89 | dout(20) << " dirty_inode " << **p << dendl; | |
90 | assert((*p)->is_auth()); | |
91 | if ((*p)->is_base()) { | |
92 | (*p)->store(gather_bld.new_sub()); | |
93 | } else | |
94 | commit.insert((*p)->get_parent_dn()->get_dir()); | |
95 | } | |
96 | ||
97 | if (!commit.empty()) { | |
98 | for (set<CDir*>::iterator p = commit.begin(); | |
99 | p != commit.end(); | |
100 | ++p) { | |
101 | CDir *dir = *p; | |
102 | assert(dir->is_auth()); | |
103 | if (dir->can_auth_pin()) { | |
104 | dout(15) << "try_to_expire committing " << *dir << dendl; | |
105 | dir->commit(0, gather_bld.new_sub(), false, op_prio); | |
106 | } else { | |
107 | dout(15) << "try_to_expire waiting for unfreeze on " << *dir << dendl; | |
108 | dir->add_waiter(CDir::WAIT_UNFREEZE, gather_bld.new_sub()); | |
109 | } | |
110 | } | |
111 | } | |
112 | ||
113 | // master ops with possibly uncommitted slaves | |
114 | for (set<metareqid_t>::iterator p = uncommitted_masters.begin(); | |
115 | p != uncommitted_masters.end(); | |
116 | ++p) { | |
117 | dout(10) << "try_to_expire waiting for slaves to ack commit on " << *p << dendl; | |
118 | mds->mdcache->wait_for_uncommitted_master(*p, gather_bld.new_sub()); | |
119 | } | |
120 | ||
121 | // uncommitted fragments | |
122 | for (set<dirfrag_t>::iterator p = uncommitted_fragments.begin(); | |
123 | p != uncommitted_fragments.end(); | |
124 | ++p) { | |
125 | dout(10) << "try_to_expire waiting for uncommitted fragment " << *p << dendl; | |
126 | mds->mdcache->wait_for_uncommitted_fragment(*p, gather_bld.new_sub()); | |
127 | } | |
128 | ||
129 | // nudge scatterlocks | |
130 | for (elist<CInode*>::iterator p = dirty_dirfrag_dir.begin(); !p.end(); ++p) { | |
131 | CInode *in = *p; | |
132 | dout(10) << "try_to_expire waiting for dirlock flush on " << *in << dendl; | |
133 | mds->locker->scatter_nudge(&in->filelock, gather_bld.new_sub()); | |
134 | } | |
135 | for (elist<CInode*>::iterator p = dirty_dirfrag_dirfragtree.begin(); !p.end(); ++p) { | |
136 | CInode *in = *p; | |
137 | dout(10) << "try_to_expire waiting for dirfragtreelock flush on " << *in << dendl; | |
138 | mds->locker->scatter_nudge(&in->dirfragtreelock, gather_bld.new_sub()); | |
139 | } | |
140 | for (elist<CInode*>::iterator p = dirty_dirfrag_nest.begin(); !p.end(); ++p) { | |
141 | CInode *in = *p; | |
142 | dout(10) << "try_to_expire waiting for nest flush on " << *in << dendl; | |
143 | mds->locker->scatter_nudge(&in->nestlock, gather_bld.new_sub()); | |
144 | } | |
145 | ||
146 | assert(g_conf->mds_kill_journal_expire_at != 2); | |
147 | ||
148 | // open files and snap inodes | |
149 | if (!open_files.empty()) { | |
150 | assert(!mds->mdlog->is_capped()); // hmm FIXME | |
151 | EOpen *le = 0; | |
152 | LogSegment *ls = mds->mdlog->get_current_segment(); | |
153 | assert(ls != this); | |
154 | elist<CInode*>::iterator p = open_files.begin(member_offset(CInode, item_open_file)); | |
155 | while (!p.end()) { | |
156 | CInode *in = *p; | |
157 | ++p; | |
158 | if (in->last == CEPH_NOSNAP && in->is_auth() && | |
159 | !in->is_ambiguous_auth() && in->is_any_caps()) { | |
160 | if (in->is_any_caps_wanted()) { | |
161 | dout(20) << "try_to_expire requeueing open file " << *in << dendl; | |
162 | if (!le) { | |
163 | le = new EOpen(mds->mdlog); | |
164 | mds->mdlog->start_entry(le); | |
165 | } | |
166 | le->add_clean_inode(in); | |
167 | ls->open_files.push_back(&in->item_open_file); | |
168 | } else { | |
169 | // drop inodes that aren't wanted | |
170 | dout(20) << "try_to_expire not requeueing and delisting unwanted file " << *in << dendl; | |
171 | in->item_open_file.remove_myself(); | |
172 | } | |
173 | } else if (in->last != CEPH_NOSNAP && !in->client_snap_caps.empty()) { | |
174 | // journal snap inodes that need flush. This simplify the mds failover hanlding | |
175 | dout(20) << "try_to_expire requeueing snap needflush inode " << *in << dendl; | |
176 | if (!le) { | |
177 | le = new EOpen(mds->mdlog); | |
178 | mds->mdlog->start_entry(le); | |
179 | } | |
180 | le->add_clean_inode(in); | |
181 | ls->open_files.push_back(&in->item_open_file); | |
182 | } else { | |
183 | /* | |
184 | * we can get a capless inode here if we replay an open file, the client fails to | |
185 | * reconnect it, but does REPLAY an open request (that adds it to the logseg). AFAICS | |
186 | * it's ok for the client to replay an open on a file it doesn't have in it's cache | |
187 | * anymore. | |
188 | * | |
189 | * this makes the mds less sensitive to strict open_file consistency, although it does | |
190 | * make it easier to miss subtle problems. | |
191 | */ | |
192 | dout(20) << "try_to_expire not requeueing and delisting capless file " << *in << dendl; | |
193 | in->item_open_file.remove_myself(); | |
194 | } | |
195 | } | |
196 | if (le) { | |
197 | mds->mdlog->submit_entry(le); | |
198 | mds->mdlog->wait_for_safe(gather_bld.new_sub()); | |
199 | dout(10) << "try_to_expire waiting for open files to rejournal" << dendl; | |
200 | } | |
201 | } | |
202 | ||
203 | assert(g_conf->mds_kill_journal_expire_at != 3); | |
204 | ||
205 | // backtraces to be stored/updated | |
206 | for (elist<CInode*>::iterator p = dirty_parent_inodes.begin(); !p.end(); ++p) { | |
207 | CInode *in = *p; | |
208 | assert(in->is_auth()); | |
209 | if (in->can_auth_pin()) { | |
210 | dout(15) << "try_to_expire waiting for storing backtrace on " << *in << dendl; | |
211 | in->store_backtrace(gather_bld.new_sub(), op_prio); | |
212 | } else { | |
213 | dout(15) << "try_to_expire waiting for unfreeze on " << *in << dendl; | |
214 | in->add_waiter(CInode::WAIT_UNFREEZE, gather_bld.new_sub()); | |
215 | } | |
216 | } | |
217 | ||
218 | assert(g_conf->mds_kill_journal_expire_at != 4); | |
219 | ||
220 | // slave updates | |
221 | for (elist<MDSlaveUpdate*>::iterator p = slave_updates.begin(member_offset(MDSlaveUpdate, | |
222 | item)); | |
223 | !p.end(); ++p) { | |
224 | MDSlaveUpdate *su = *p; | |
225 | dout(10) << "try_to_expire waiting on slave update " << su << dendl; | |
226 | assert(su->waiter == 0); | |
227 | su->waiter = gather_bld.new_sub(); | |
228 | } | |
229 | ||
230 | // idalloc | |
231 | if (inotablev > mds->inotable->get_committed_version()) { | |
232 | dout(10) << "try_to_expire saving inotable table, need " << inotablev | |
233 | << ", committed is " << mds->inotable->get_committed_version() | |
234 | << " (" << mds->inotable->get_committing_version() << ")" | |
235 | << dendl; | |
236 | mds->inotable->save(gather_bld.new_sub(), inotablev); | |
237 | } | |
238 | ||
239 | // sessionmap | |
240 | if (sessionmapv > mds->sessionmap.get_committed()) { | |
241 | dout(10) << "try_to_expire saving sessionmap, need " << sessionmapv | |
242 | << ", committed is " << mds->sessionmap.get_committed() | |
243 | << " (" << mds->sessionmap.get_committing() << ")" | |
244 | << dendl; | |
245 | mds->sessionmap.save(gather_bld.new_sub(), sessionmapv); | |
246 | } | |
247 | ||
248 | // updates to sessions for completed_requests | |
249 | mds->sessionmap.save_if_dirty(touched_sessions, &gather_bld); | |
250 | touched_sessions.clear(); | |
251 | ||
252 | // pending commit atids | |
253 | for (map<int, ceph::unordered_set<version_t> >::iterator p = pending_commit_tids.begin(); | |
254 | p != pending_commit_tids.end(); | |
255 | ++p) { | |
256 | MDSTableClient *client = mds->get_table_client(p->first); | |
257 | assert(client); | |
258 | for (ceph::unordered_set<version_t>::iterator q = p->second.begin(); | |
259 | q != p->second.end(); | |
260 | ++q) { | |
261 | dout(10) << "try_to_expire " << get_mdstable_name(p->first) << " transaction " << *q | |
262 | << " pending commit (not yet acked), waiting" << dendl; | |
263 | assert(!client->has_committed(*q)); | |
264 | client->wait_for_ack(*q, gather_bld.new_sub()); | |
265 | } | |
266 | } | |
267 | ||
268 | // table servers | |
269 | for (map<int, version_t>::iterator p = tablev.begin(); | |
270 | p != tablev.end(); | |
271 | ++p) { | |
272 | MDSTableServer *server = mds->get_table_server(p->first); | |
273 | assert(server); | |
274 | if (p->second > server->get_committed_version()) { | |
275 | dout(10) << "try_to_expire waiting for " << get_mdstable_name(p->first) | |
276 | << " to save, need " << p->second << dendl; | |
277 | server->save(gather_bld.new_sub()); | |
278 | } | |
279 | } | |
280 | ||
281 | // truncating | |
282 | for (set<CInode*>::iterator p = truncating_inodes.begin(); | |
283 | p != truncating_inodes.end(); | |
284 | ++p) { | |
285 | dout(10) << "try_to_expire waiting for truncate of " << **p << dendl; | |
286 | (*p)->add_waiter(CInode::WAIT_TRUNC, gather_bld.new_sub()); | |
287 | } | |
288 | ||
289 | if (gather_bld.has_subs()) { | |
290 | dout(6) << "LogSegment(" << seq << "/" << offset << ").try_to_expire waiting" << dendl; | |
291 | mds->mdlog->flush(); | |
292 | } else { | |
293 | assert(g_conf->mds_kill_journal_expire_at != 5); | |
294 | dout(6) << "LogSegment(" << seq << "/" << offset << ").try_to_expire success" << dendl; | |
295 | } | |
296 | } | |
297 | ||
298 | ||
299 | // ----------------------- | |
300 | // EMetaBlob | |
301 | ||
302 | EMetaBlob::EMetaBlob(MDLog *mdlog) : opened_ino(0), renamed_dirino(0), | |
303 | inotablev(0), sessionmapv(0), allocated_ino(0), | |
304 | last_subtree_map(0), event_seq(0) | |
305 | { } | |
306 | ||
307 | void EMetaBlob::add_dir_context(CDir *dir, int mode) | |
308 | { | |
309 | MDSRank *mds = dir->cache->mds; | |
310 | ||
311 | list<CDentry*> parents; | |
312 | ||
313 | // it may be okay not to include the maybe items, if | |
314 | // - we journaled the maybe child inode in this segment | |
315 | // - that subtree turns out to be unambiguously auth | |
316 | list<CDentry*> maybe; | |
317 | bool maybenot = false; | |
318 | ||
319 | while (true) { | |
320 | // already have this dir? (we must always add in order) | |
321 | if (lump_map.count(dir->dirfrag())) { | |
322 | dout(20) << "EMetaBlob::add_dir_context(" << dir << ") have lump " << dir->dirfrag() << dendl; | |
323 | break; | |
324 | } | |
325 | ||
326 | // stop at root/stray | |
327 | CInode *diri = dir->get_inode(); | |
328 | CDentry *parent = diri->get_projected_parent_dn(); | |
329 | ||
330 | if (mode == TO_AUTH_SUBTREE_ROOT) { | |
331 | // subtree root? | |
31f18b77 FG |
332 | if (dir->is_subtree_root()) { |
333 | // match logic in MDCache::create_subtree_map() | |
334 | if (dir->get_dir_auth().first == mds->get_nodeid()) { | |
335 | mds_authority_t parent_auth = parent ? parent->authority() : CDIR_AUTH_UNDEF; | |
336 | if (parent_auth.first == dir->get_dir_auth().first) { | |
337 | if (parent_auth.second == CDIR_AUTH_UNKNOWN && | |
338 | !dir->is_ambiguous_dir_auth() && | |
339 | !dir->state_test(CDir::STATE_EXPORTBOUND) && | |
340 | !dir->state_test(CDir::STATE_AUXSUBTREE) && | |
341 | !diri->state_test(CInode::STATE_AMBIGUOUSAUTH)) { | |
342 | dout(0) << "EMetaBlob::add_dir_context unexpected subtree " << *dir << dendl; | |
343 | assert(0); | |
344 | } | |
345 | dout(20) << "EMetaBlob::add_dir_context(" << dir << ") ambiguous or transient subtree " << dendl; | |
7c673cae FG |
346 | } else { |
347 | // it's an auth subtree, we don't need maybe (if any), and we're done. | |
348 | dout(20) << "EMetaBlob::add_dir_context(" << dir << ") reached unambig auth subtree, don't need " << maybe | |
349 | << " at " << *dir << dendl; | |
350 | maybe.clear(); | |
351 | break; | |
352 | } | |
353 | } else { | |
354 | dout(20) << "EMetaBlob::add_dir_context(" << dir << ") reached ambig or !auth subtree, need " << maybe | |
355 | << " at " << *dir << dendl; | |
356 | // we need the maybe list after all! | |
357 | parents.splice(parents.begin(), maybe); | |
358 | maybenot = false; | |
359 | } | |
360 | } | |
31f18b77 | 361 | |
7c673cae FG |
362 | // was the inode journaled in this blob? |
363 | if (event_seq && diri->last_journaled == event_seq) { | |
364 | dout(20) << "EMetaBlob::add_dir_context(" << dir << ") already have diri this blob " << *diri << dendl; | |
365 | break; | |
366 | } | |
367 | ||
368 | // have we journaled this inode since the last subtree map? | |
369 | if (!maybenot && last_subtree_map && diri->last_journaled >= last_subtree_map) { | |
370 | dout(20) << "EMetaBlob::add_dir_context(" << dir << ") already have diri in this segment (" | |
371 | << diri->last_journaled << " >= " << last_subtree_map << "), setting maybenot flag " | |
372 | << *diri << dendl; | |
373 | maybenot = true; | |
374 | } | |
375 | } | |
376 | ||
377 | if (!parent) | |
378 | break; | |
379 | ||
380 | if (maybenot) { | |
381 | dout(25) << "EMetaBlob::add_dir_context(" << dir << ") maybe " << *parent << dendl; | |
382 | maybe.push_front(parent); | |
383 | } else { | |
384 | dout(25) << "EMetaBlob::add_dir_context(" << dir << ") definitely " << *parent << dendl; | |
385 | parents.push_front(parent); | |
386 | } | |
387 | ||
388 | dir = parent->get_dir(); | |
389 | } | |
390 | ||
391 | parents.splice(parents.begin(), maybe); | |
392 | ||
393 | dout(20) << "EMetaBlob::add_dir_context final: " << parents << dendl; | |
394 | for (list<CDentry*>::iterator p = parents.begin(); p != parents.end(); ++p) { | |
395 | assert((*p)->get_projected_linkage()->is_primary()); | |
396 | add_dentry(*p, false); | |
397 | } | |
398 | } | |
399 | ||
400 | void EMetaBlob::update_segment(LogSegment *ls) | |
401 | { | |
402 | // dirty inode mtimes | |
403 | // -> handled directly by Server.cc, replay() | |
404 | ||
405 | // alloc table update? | |
406 | if (inotablev) | |
407 | ls->inotablev = inotablev; | |
408 | if (sessionmapv) | |
409 | ls->sessionmapv = sessionmapv; | |
410 | ||
411 | // truncated inodes | |
412 | // -> handled directly by Server.cc | |
413 | ||
414 | // client requests | |
415 | // note the newest request per client | |
416 | //if (!client_reqs.empty()) | |
417 | // ls->last_client_tid[client_reqs.rbegin()->client] = client_reqs.rbegin()->tid); | |
418 | } | |
419 | ||
420 | // EMetaBlob::fullbit | |
421 | ||
422 | void EMetaBlob::fullbit::encode(bufferlist& bl, uint64_t features) const { | |
423 | ENCODE_START(8, 5, bl); | |
424 | ::encode(dn, bl); | |
425 | ::encode(dnfirst, bl); | |
426 | ::encode(dnlast, bl); | |
427 | ::encode(dnv, bl); | |
428 | ::encode(inode, bl, features); | |
429 | ::encode(xattrs, bl); | |
430 | if (inode.is_symlink()) | |
431 | ::encode(symlink, bl); | |
432 | if (inode.is_dir()) { | |
433 | ::encode(dirfragtree, bl); | |
434 | ::encode(snapbl, bl); | |
435 | } | |
436 | ::encode(state, bl); | |
437 | if (old_inodes.empty()) { | |
438 | ::encode(false, bl); | |
439 | } else { | |
440 | ::encode(true, bl); | |
441 | ::encode(old_inodes, bl, features); | |
442 | } | |
443 | if (!inode.is_dir()) | |
444 | ::encode(snapbl, bl); | |
445 | ::encode(oldest_snap, bl); | |
446 | ENCODE_FINISH(bl); | |
447 | } | |
448 | ||
449 | void EMetaBlob::fullbit::decode(bufferlist::iterator &bl) { | |
450 | DECODE_START_LEGACY_COMPAT_LEN(7, 5, 5, bl); | |
451 | ::decode(dn, bl); | |
452 | ::decode(dnfirst, bl); | |
453 | ::decode(dnlast, bl); | |
454 | ::decode(dnv, bl); | |
455 | ::decode(inode, bl); | |
456 | ::decode(xattrs, bl); | |
457 | if (inode.is_symlink()) | |
458 | ::decode(symlink, bl); | |
459 | if (inode.is_dir()) { | |
460 | ::decode(dirfragtree, bl); | |
461 | ::decode(snapbl, bl); | |
462 | if ((struct_v == 2) || (struct_v == 3)) { | |
463 | bool dir_layout_exists; | |
464 | ::decode(dir_layout_exists, bl); | |
465 | if (dir_layout_exists) { | |
466 | __u8 dir_struct_v; | |
467 | ::decode(dir_struct_v, bl); // default_file_layout version | |
468 | ::decode(inode.layout, bl); // and actual layout, that we care about | |
469 | } | |
470 | } | |
471 | } | |
472 | if (struct_v >= 6) { | |
473 | ::decode(state, bl); | |
474 | } else { | |
475 | bool dirty; | |
476 | ::decode(dirty, bl); | |
477 | state = dirty ? EMetaBlob::fullbit::STATE_DIRTY : 0; | |
478 | } | |
479 | ||
480 | if (struct_v >= 3) { | |
481 | bool old_inodes_present; | |
482 | ::decode(old_inodes_present, bl); | |
483 | if (old_inodes_present) { | |
484 | ::decode(old_inodes, bl); | |
485 | } | |
486 | } | |
487 | if (!inode.is_dir()) { | |
488 | if (struct_v >= 7) | |
489 | ::decode(snapbl, bl); | |
490 | } | |
491 | if (struct_v >= 8) | |
492 | ::decode(oldest_snap, bl); | |
493 | else | |
494 | oldest_snap = CEPH_NOSNAP; | |
495 | ||
496 | DECODE_FINISH(bl); | |
497 | } | |
498 | ||
499 | void EMetaBlob::fullbit::dump(Formatter *f) const | |
500 | { | |
501 | f->dump_string("dentry", dn); | |
502 | f->dump_stream("snapid.first") << dnfirst; | |
503 | f->dump_stream("snapid.last") << dnlast; | |
504 | f->dump_int("dentry version", dnv); | |
505 | f->open_object_section("inode"); | |
506 | inode.dump(f); | |
507 | f->close_section(); // inode | |
508 | f->open_object_section("xattrs"); | |
94b18763 FG |
509 | for (const auto &p : xattrs) { |
510 | std::string s(p.second.c_str(), p.second.length()); | |
511 | f->dump_string(p.first.c_str(), s); | |
7c673cae FG |
512 | } |
513 | f->close_section(); // xattrs | |
514 | if (inode.is_symlink()) { | |
515 | f->dump_string("symlink", symlink); | |
516 | } | |
517 | if (inode.is_dir()) { | |
518 | f->dump_stream("frag tree") << dirfragtree; | |
519 | f->dump_string("has_snapbl", snapbl.length() ? "true" : "false"); | |
520 | if (inode.has_layout()) { | |
521 | f->open_object_section("file layout policy"); | |
522 | // FIXME | |
523 | f->dump_string("layout", "the layout exists"); | |
524 | f->close_section(); // file layout policy | |
525 | } | |
526 | } | |
527 | f->dump_string("state", state_string()); | |
528 | if (!old_inodes.empty()) { | |
529 | f->open_array_section("old inodes"); | |
94b18763 | 530 | for (const auto &p : old_inodes) { |
7c673cae | 531 | f->open_object_section("inode"); |
94b18763 FG |
532 | f->dump_int("snapid", p.first); |
533 | p.second.dump(f); | |
7c673cae FG |
534 | f->close_section(); // inode |
535 | } | |
536 | f->close_section(); // old inodes | |
537 | } | |
538 | } | |
539 | ||
540 | void EMetaBlob::fullbit::generate_test_instances(list<EMetaBlob::fullbit*>& ls) | |
541 | { | |
94b18763 | 542 | CInode::mempool_inode inode; |
7c673cae | 543 | fragtree_t fragtree; |
94b18763 | 544 | CInode::mempool_xattr_map empty_xattrs; |
7c673cae FG |
545 | bufferlist empty_snapbl; |
546 | fullbit *sample = new fullbit("/testdn", 0, 0, 0, | |
547 | inode, fragtree, empty_xattrs, "", 0, empty_snapbl, | |
548 | false, NULL); | |
549 | ls.push_back(sample); | |
550 | } | |
551 | ||
552 | void EMetaBlob::fullbit::update_inode(MDSRank *mds, CInode *in) | |
553 | { | |
554 | in->inode = inode; | |
555 | in->xattrs = xattrs; | |
31f18b77 | 556 | in->maybe_export_pin(); |
7c673cae FG |
557 | if (in->inode.is_dir()) { |
558 | if (!(in->dirfragtree == dirfragtree)) { | |
559 | dout(10) << "EMetaBlob::fullbit::update_inode dft " << in->dirfragtree << " -> " | |
560 | << dirfragtree << " on " << *in << dendl; | |
561 | in->dirfragtree = dirfragtree; | |
562 | in->force_dirfrags(); | |
563 | if (in->has_dirfrags() && in->authority() == CDIR_AUTH_UNDEF) { | |
564 | list<CDir*> ls; | |
565 | in->get_nested_dirfrags(ls); | |
566 | for (list<CDir*>::iterator p = ls.begin(); p != ls.end(); ++p) { | |
567 | CDir *dir = *p; | |
568 | if (dir->get_num_any() == 0 && | |
569 | mds->mdcache->can_trim_non_auth_dirfrag(dir)) { | |
570 | dout(10) << " closing empty non-auth dirfrag " << *dir << dendl; | |
571 | in->close_dirfrag(dir->get_frag()); | |
572 | } | |
573 | } | |
574 | } | |
575 | } | |
576 | } else if (in->inode.is_symlink()) { | |
94b18763 | 577 | in->symlink = mempool::mds_co::string(boost::string_view(symlink)); |
7c673cae FG |
578 | } |
579 | in->old_inodes = old_inodes; | |
580 | if (!in->old_inodes.empty()) { | |
581 | snapid_t min_first = in->old_inodes.rbegin()->first + 1; | |
582 | if (min_first > in->first) | |
583 | in->first = min_first; | |
584 | } | |
585 | ||
586 | /* | |
587 | * we can do this before linking hte inode bc the split_at would | |
588 | * be a no-op.. we have no children (namely open snaprealms) to | |
589 | * divy up | |
590 | */ | |
591 | in->oldest_snap = oldest_snap; | |
592 | in->decode_snap_blob(snapbl); | |
593 | ||
594 | /* | |
595 | * In case there was anything malformed in the journal that we are | |
596 | * replaying, do sanity checks on the inodes we're replaying and | |
597 | * go damaged instead of letting any trash into a live cache | |
598 | */ | |
599 | if (in->is_file()) { | |
600 | // Files must have valid layouts with a pool set | |
601 | if (in->inode.layout.pool_id == -1 || !in->inode.layout.is_valid()) { | |
602 | dout(0) << "EMetaBlob.replay invalid layout on ino " << *in | |
603 | << ": " << in->inode.layout << dendl; | |
604 | std::ostringstream oss; | |
605 | oss << "Invalid layout for inode 0x" << std::hex << in->inode.ino | |
606 | << std::dec << " in journal"; | |
607 | mds->clog->error() << oss.str(); | |
608 | mds->damaged(); | |
609 | ceph_abort(); // Should be unreachable because damaged() calls respawn() | |
610 | } | |
611 | } | |
612 | } | |
613 | ||
614 | // EMetaBlob::remotebit | |
615 | ||
616 | void EMetaBlob::remotebit::encode(bufferlist& bl) const | |
617 | { | |
618 | ENCODE_START(2, 2, bl); | |
619 | ::encode(dn, bl); | |
620 | ::encode(dnfirst, bl); | |
621 | ::encode(dnlast, bl); | |
622 | ::encode(dnv, bl); | |
623 | ::encode(ino, bl); | |
624 | ::encode(d_type, bl); | |
625 | ::encode(dirty, bl); | |
626 | ENCODE_FINISH(bl); | |
627 | } | |
628 | ||
629 | void EMetaBlob::remotebit::decode(bufferlist::iterator &bl) | |
630 | { | |
631 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); | |
632 | ::decode(dn, bl); | |
633 | ::decode(dnfirst, bl); | |
634 | ::decode(dnlast, bl); | |
635 | ::decode(dnv, bl); | |
636 | ::decode(ino, bl); | |
637 | ::decode(d_type, bl); | |
638 | ::decode(dirty, bl); | |
639 | DECODE_FINISH(bl); | |
640 | } | |
641 | ||
642 | void EMetaBlob::remotebit::dump(Formatter *f) const | |
643 | { | |
644 | f->dump_string("dentry", dn); | |
645 | f->dump_int("snapid.first", dnfirst); | |
646 | f->dump_int("snapid.last", dnlast); | |
647 | f->dump_int("dentry version", dnv); | |
648 | f->dump_int("inodeno", ino); | |
649 | uint32_t type = DTTOIF(d_type) & S_IFMT; // convert to type entries | |
650 | string type_string; | |
651 | switch(type) { | |
652 | case S_IFREG: | |
653 | type_string = "file"; break; | |
654 | case S_IFLNK: | |
655 | type_string = "symlink"; break; | |
656 | case S_IFDIR: | |
657 | type_string = "directory"; break; | |
658 | case S_IFIFO: | |
659 | type_string = "fifo"; break; | |
660 | case S_IFCHR: | |
661 | type_string = "chr"; break; | |
662 | case S_IFBLK: | |
663 | type_string = "blk"; break; | |
664 | case S_IFSOCK: | |
665 | type_string = "sock"; break; | |
666 | default: | |
667 | assert (0 == "unknown d_type!"); | |
668 | } | |
669 | f->dump_string("d_type", type_string); | |
670 | f->dump_string("dirty", dirty ? "true" : "false"); | |
671 | } | |
672 | ||
673 | void EMetaBlob::remotebit:: | |
674 | generate_test_instances(list<EMetaBlob::remotebit*>& ls) | |
675 | { | |
676 | remotebit *remote = new remotebit("/test/dn", 0, 10, 15, 1, IFTODT(S_IFREG), false); | |
677 | ls.push_back(remote); | |
678 | } | |
679 | ||
680 | // EMetaBlob::nullbit | |
681 | ||
682 | void EMetaBlob::nullbit::encode(bufferlist& bl) const | |
683 | { | |
684 | ENCODE_START(2, 2, bl); | |
685 | ::encode(dn, bl); | |
686 | ::encode(dnfirst, bl); | |
687 | ::encode(dnlast, bl); | |
688 | ::encode(dnv, bl); | |
689 | ::encode(dirty, bl); | |
690 | ENCODE_FINISH(bl); | |
691 | } | |
692 | ||
693 | void EMetaBlob::nullbit::decode(bufferlist::iterator &bl) | |
694 | { | |
695 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); | |
696 | ::decode(dn, bl); | |
697 | ::decode(dnfirst, bl); | |
698 | ::decode(dnlast, bl); | |
699 | ::decode(dnv, bl); | |
700 | ::decode(dirty, bl); | |
701 | DECODE_FINISH(bl); | |
702 | } | |
703 | ||
704 | void EMetaBlob::nullbit::dump(Formatter *f) const | |
705 | { | |
706 | f->dump_string("dentry", dn); | |
707 | f->dump_int("snapid.first", dnfirst); | |
708 | f->dump_int("snapid.last", dnlast); | |
709 | f->dump_int("dentry version", dnv); | |
710 | f->dump_string("dirty", dirty ? "true" : "false"); | |
711 | } | |
712 | ||
713 | void EMetaBlob::nullbit::generate_test_instances(list<nullbit*>& ls) | |
714 | { | |
715 | nullbit *sample = new nullbit("/test/dentry", 0, 10, 15, false); | |
716 | nullbit *sample2 = new nullbit("/test/dirty", 10, 20, 25, true); | |
717 | ls.push_back(sample); | |
718 | ls.push_back(sample2); | |
719 | } | |
720 | ||
721 | // EMetaBlob::dirlump | |
722 | ||
723 | void EMetaBlob::dirlump::encode(bufferlist& bl, uint64_t features) const | |
724 | { | |
725 | ENCODE_START(2, 2, bl); | |
726 | ::encode(fnode, bl); | |
727 | ::encode(state, bl); | |
728 | ::encode(nfull, bl); | |
729 | ::encode(nremote, bl); | |
730 | ::encode(nnull, bl); | |
731 | _encode_bits(features); | |
732 | ::encode(dnbl, bl); | |
733 | ENCODE_FINISH(bl); | |
734 | } | |
735 | ||
736 | void EMetaBlob::dirlump::decode(bufferlist::iterator &bl) | |
737 | { | |
738 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl) | |
739 | ::decode(fnode, bl); | |
740 | ::decode(state, bl); | |
741 | ::decode(nfull, bl); | |
742 | ::decode(nremote, bl); | |
743 | ::decode(nnull, bl); | |
744 | ::decode(dnbl, bl); | |
745 | dn_decoded = false; // don't decode bits unless we need them. | |
746 | DECODE_FINISH(bl); | |
747 | } | |
748 | ||
749 | void EMetaBlob::dirlump::dump(Formatter *f) const | |
750 | { | |
751 | if (!dn_decoded) { | |
752 | dirlump *me = const_cast<dirlump*>(this); | |
753 | me->_decode_bits(); | |
754 | } | |
755 | f->open_object_section("fnode"); | |
756 | fnode.dump(f); | |
757 | f->close_section(); // fnode | |
758 | f->dump_string("state", state_string()); | |
759 | f->dump_int("nfull", nfull); | |
760 | f->dump_int("nremote", nremote); | |
761 | f->dump_int("nnull", nnull); | |
762 | ||
763 | f->open_array_section("full bits"); | |
764 | for (list<ceph::shared_ptr<fullbit> >::const_iterator | |
765 | iter = dfull.begin(); iter != dfull.end(); ++iter) { | |
766 | f->open_object_section("fullbit"); | |
767 | (*iter)->dump(f); | |
768 | f->close_section(); // fullbit | |
769 | } | |
770 | f->close_section(); // full bits | |
771 | f->open_array_section("remote bits"); | |
772 | for (list<remotebit>::const_iterator | |
773 | iter = dremote.begin(); iter != dremote.end(); ++iter) { | |
774 | f->open_object_section("remotebit"); | |
775 | (*iter).dump(f); | |
776 | f->close_section(); // remotebit | |
777 | } | |
778 | f->close_section(); // remote bits | |
779 | f->open_array_section("null bits"); | |
780 | for (list<nullbit>::const_iterator | |
781 | iter = dnull.begin(); iter != dnull.end(); ++iter) { | |
782 | f->open_object_section("null bit"); | |
783 | (*iter).dump(f); | |
784 | f->close_section(); // null bit | |
785 | } | |
786 | f->close_section(); // null bits | |
787 | } | |
788 | ||
789 | void EMetaBlob::dirlump::generate_test_instances(list<dirlump*>& ls) | |
790 | { | |
791 | ls.push_back(new dirlump()); | |
792 | } | |
793 | ||
794 | /** | |
795 | * EMetaBlob proper | |
796 | */ | |
797 | void EMetaBlob::encode(bufferlist& bl, uint64_t features) const | |
798 | { | |
799 | ENCODE_START(8, 5, bl); | |
800 | ::encode(lump_order, bl); | |
801 | ::encode(lump_map, bl, features); | |
802 | ::encode(roots, bl, features); | |
803 | ::encode(table_tids, bl); | |
804 | ::encode(opened_ino, bl); | |
805 | ::encode(allocated_ino, bl); | |
806 | ::encode(used_preallocated_ino, bl); | |
807 | ::encode(preallocated_inos, bl); | |
808 | ::encode(client_name, bl); | |
809 | ::encode(inotablev, bl); | |
810 | ::encode(sessionmapv, bl); | |
811 | ::encode(truncate_start, bl); | |
812 | ::encode(truncate_finish, bl); | |
813 | ::encode(destroyed_inodes, bl); | |
814 | ::encode(client_reqs, bl); | |
815 | ::encode(renamed_dirino, bl); | |
816 | ::encode(renamed_dir_frags, bl); | |
817 | { | |
818 | // make MDSRank use v6 format happy | |
819 | int64_t i = -1; | |
820 | bool b = false; | |
821 | ::encode(i, bl); | |
822 | ::encode(b, bl); | |
823 | } | |
824 | ::encode(client_flushes, bl); | |
825 | ENCODE_FINISH(bl); | |
826 | } | |
827 | void EMetaBlob::decode(bufferlist::iterator &bl) | |
828 | { | |
829 | DECODE_START_LEGACY_COMPAT_LEN(7, 5, 5, bl); | |
830 | ::decode(lump_order, bl); | |
831 | ::decode(lump_map, bl); | |
832 | if (struct_v >= 4) { | |
833 | ::decode(roots, bl); | |
834 | } else { | |
835 | bufferlist rootbl; | |
836 | ::decode(rootbl, bl); | |
837 | if (rootbl.length()) { | |
838 | bufferlist::iterator p = rootbl.begin(); | |
839 | roots.push_back(ceph::shared_ptr<fullbit>(new fullbit(p))); | |
840 | } | |
841 | } | |
842 | ::decode(table_tids, bl); | |
843 | ::decode(opened_ino, bl); | |
844 | ::decode(allocated_ino, bl); | |
845 | ::decode(used_preallocated_ino, bl); | |
846 | ::decode(preallocated_inos, bl); | |
847 | ::decode(client_name, bl); | |
848 | ::decode(inotablev, bl); | |
849 | ::decode(sessionmapv, bl); | |
850 | ::decode(truncate_start, bl); | |
851 | ::decode(truncate_finish, bl); | |
852 | ::decode(destroyed_inodes, bl); | |
853 | if (struct_v >= 2) { | |
854 | ::decode(client_reqs, bl); | |
855 | } else { | |
856 | list<metareqid_t> r; | |
857 | ::decode(r, bl); | |
858 | while (!r.empty()) { | |
859 | client_reqs.push_back(pair<metareqid_t,uint64_t>(r.front(), 0)); | |
860 | r.pop_front(); | |
861 | } | |
862 | } | |
863 | if (struct_v >= 3) { | |
864 | ::decode(renamed_dirino, bl); | |
865 | ::decode(renamed_dir_frags, bl); | |
866 | } | |
867 | if (struct_v >= 6) { | |
868 | // ignore | |
869 | int64_t i; | |
870 | bool b; | |
871 | ::decode(i, bl); | |
872 | ::decode(b, bl); | |
873 | } | |
874 | if (struct_v >= 8) { | |
875 | ::decode(client_flushes, bl); | |
876 | } | |
877 | DECODE_FINISH(bl); | |
878 | } | |
879 | ||
880 | ||
881 | /** | |
882 | * Get all inodes touched by this metablob. Includes the 'bits' within | |
883 | * dirlumps, and the inodes of the dirs themselves. | |
884 | */ | |
885 | void EMetaBlob::get_inodes( | |
886 | std::set<inodeno_t> &inodes) const | |
887 | { | |
888 | // For all dirlumps in this metablob | |
889 | for (std::map<dirfrag_t, dirlump>::const_iterator i = lump_map.begin(); i != lump_map.end(); ++i) { | |
890 | // Record inode of dirlump | |
891 | inodeno_t const dir_ino = i->first.ino; | |
892 | inodes.insert(dir_ino); | |
893 | ||
894 | // Decode dirlump bits | |
895 | dirlump const &dl = i->second; | |
896 | dl._decode_bits(); | |
897 | ||
898 | // Record inodes of fullbits | |
899 | list<ceph::shared_ptr<fullbit> > const &fb_list = dl.get_dfull(); | |
900 | for (list<ceph::shared_ptr<fullbit> >::const_iterator | |
901 | iter = fb_list.begin(); iter != fb_list.end(); ++iter) { | |
902 | inodes.insert((*iter)->inode.ino); | |
903 | } | |
904 | ||
905 | // Record inodes of remotebits | |
906 | list<remotebit> const &rb_list = dl.get_dremote(); | |
907 | for (list<remotebit>::const_iterator | |
908 | iter = rb_list.begin(); iter != rb_list.end(); ++iter) { | |
909 | inodes.insert(iter->ino); | |
910 | } | |
911 | } | |
912 | } | |
913 | ||
914 | ||
915 | /** | |
916 | * Get a map of dirfrag to set of dentries in that dirfrag which are | |
917 | * touched in this operation. | |
918 | */ | |
919 | void EMetaBlob::get_dentries(std::map<dirfrag_t, std::set<std::string> > &dentries) const | |
920 | { | |
921 | for (std::map<dirfrag_t, dirlump>::const_iterator i = lump_map.begin(); i != lump_map.end(); ++i) { | |
922 | dirlump const &dl = i->second; | |
923 | dirfrag_t const &df = i->first; | |
924 | ||
925 | // Get all bits | |
926 | dl._decode_bits(); | |
927 | list<ceph::shared_ptr<fullbit> > const &fb_list = dl.get_dfull(); | |
928 | list<nullbit> const &nb_list = dl.get_dnull(); | |
929 | list<remotebit> const &rb_list = dl.get_dremote(); | |
930 | ||
931 | // For all bits, store dentry | |
932 | for (list<ceph::shared_ptr<fullbit> >::const_iterator | |
933 | iter = fb_list.begin(); iter != fb_list.end(); ++iter) { | |
934 | dentries[df].insert((*iter)->dn); | |
935 | ||
936 | } | |
937 | for (list<nullbit>::const_iterator | |
938 | iter = nb_list.begin(); iter != nb_list.end(); ++iter) { | |
939 | dentries[df].insert(iter->dn); | |
940 | } | |
941 | for (list<remotebit>::const_iterator | |
942 | iter = rb_list.begin(); iter != rb_list.end(); ++iter) { | |
943 | dentries[df].insert(iter->dn); | |
944 | } | |
945 | } | |
946 | } | |
947 | ||
948 | ||
949 | ||
950 | /** | |
951 | * Calculate all paths that we can infer are touched by this metablob. Only uses | |
952 | * information local to this metablob so it may only be the path within the | |
953 | * subtree. | |
954 | */ | |
955 | void EMetaBlob::get_paths( | |
956 | std::vector<std::string> &paths) const | |
957 | { | |
958 | // Each dentry has a 'location' which is a 2-tuple of parent inode and dentry name | |
959 | typedef std::pair<inodeno_t, std::string> Location; | |
960 | ||
961 | // Whenever we see a dentry within a dirlump, we remember it as a child of | |
962 | // the dirlump's inode | |
963 | std::map<inodeno_t, std::list<std::string> > children; | |
964 | ||
965 | // Whenever we see a location for an inode, remember it: this allows us to | |
966 | // build a path given an inode | |
967 | std::map<inodeno_t, Location> ino_locations; | |
968 | ||
969 | // Special case: operations on root inode populate roots but not dirlumps | |
970 | if (lump_map.empty() && !roots.empty()) { | |
971 | paths.push_back("/"); | |
972 | return; | |
973 | } | |
974 | ||
975 | // First pass | |
976 | // ========== | |
977 | // Build a tiny local metadata cache for the path structure in this metablob | |
978 | for (std::map<dirfrag_t, dirlump>::const_iterator i = lump_map.begin(); i != lump_map.end(); ++i) { | |
979 | inodeno_t const dir_ino = i->first.ino; | |
980 | dirlump const &dl = i->second; | |
981 | dl._decode_bits(); | |
982 | ||
983 | list<ceph::shared_ptr<fullbit> > const &fb_list = dl.get_dfull(); | |
984 | list<nullbit> const &nb_list = dl.get_dnull(); | |
985 | list<remotebit> const &rb_list = dl.get_dremote(); | |
986 | ||
987 | for (list<ceph::shared_ptr<fullbit> >::const_iterator | |
988 | iter = fb_list.begin(); iter != fb_list.end(); ++iter) { | |
94b18763 FG |
989 | boost::string_view dentry = (*iter)->dn; |
990 | children[dir_ino].emplace_back(dentry); | |
991 | ino_locations[(*iter)->inode.ino] = Location(dir_ino, std::string(dentry)); | |
7c673cae FG |
992 | } |
993 | ||
994 | for (list<nullbit>::const_iterator | |
995 | iter = nb_list.begin(); iter != nb_list.end(); ++iter) { | |
94b18763 FG |
996 | boost::string_view dentry = iter->dn; |
997 | children[dir_ino].emplace_back(dentry); | |
7c673cae FG |
998 | } |
999 | ||
1000 | for (list<remotebit>::const_iterator | |
1001 | iter = rb_list.begin(); iter != rb_list.end(); ++iter) { | |
94b18763 FG |
1002 | boost::string_view dentry = iter->dn; |
1003 | children[dir_ino].emplace_back(dentry); | |
7c673cae FG |
1004 | } |
1005 | } | |
1006 | ||
1007 | std::vector<Location> leaf_locations; | |
1008 | ||
1009 | // Second pass | |
1010 | // =========== | |
1011 | // Output paths for all childless nodes in the metablob | |
1012 | for (std::map<dirfrag_t, dirlump>::const_iterator i = lump_map.begin(); i != lump_map.end(); ++i) { | |
1013 | inodeno_t const dir_ino = i->first.ino; | |
1014 | dirlump const &dl = i->second; | |
1015 | dl._decode_bits(); | |
1016 | ||
1017 | list<ceph::shared_ptr<fullbit> > const &fb_list = dl.get_dfull(); | |
1018 | for (list<ceph::shared_ptr<fullbit> >::const_iterator | |
1019 | iter = fb_list.begin(); iter != fb_list.end(); ++iter) { | |
94b18763 | 1020 | std::string dentry((*iter)->dn); |
7c673cae | 1021 | children[dir_ino].push_back(dentry); |
94b18763 | 1022 | ino_locations[(*iter)->inode.ino] = Location(dir_ino, std::string(dentry)); |
7c673cae | 1023 | if (children.find((*iter)->inode.ino) == children.end()) { |
94b18763 | 1024 | leaf_locations.push_back(Location(dir_ino, std::string(dentry))); |
7c673cae FG |
1025 | |
1026 | } | |
1027 | } | |
1028 | ||
1029 | list<nullbit> const &nb_list = dl.get_dnull(); | |
1030 | for (list<nullbit>::const_iterator | |
1031 | iter = nb_list.begin(); iter != nb_list.end(); ++iter) { | |
94b18763 FG |
1032 | boost::string_view dentry = iter->dn; |
1033 | leaf_locations.push_back(Location(dir_ino, std::string(dentry))); | |
7c673cae FG |
1034 | } |
1035 | ||
1036 | list<remotebit> const &rb_list = dl.get_dremote(); | |
1037 | for (list<remotebit>::const_iterator | |
1038 | iter = rb_list.begin(); iter != rb_list.end(); ++iter) { | |
94b18763 FG |
1039 | boost::string_view dentry = iter->dn; |
1040 | leaf_locations.push_back(Location(dir_ino, std::string(dentry))); | |
7c673cae FG |
1041 | } |
1042 | } | |
1043 | ||
1044 | // For all the leaf locations identified, generate paths | |
1045 | for (std::vector<Location>::iterator i = leaf_locations.begin(); i != leaf_locations.end(); ++i) { | |
1046 | Location const &loc = *i; | |
1047 | std::string path = loc.second; | |
1048 | inodeno_t ino = loc.first; | |
1049 | while(ino_locations.find(ino) != ino_locations.end()) { | |
1050 | Location const &loc = ino_locations[ino]; | |
1051 | if (!path.empty()) { | |
1052 | path = loc.second + "/" + path; | |
1053 | } else { | |
1054 | path = loc.second + path; | |
1055 | } | |
1056 | ino = loc.first; | |
1057 | } | |
1058 | ||
1059 | paths.push_back(path); | |
1060 | } | |
1061 | } | |
1062 | ||
1063 | ||
1064 | void EMetaBlob::dump(Formatter *f) const | |
1065 | { | |
1066 | f->open_array_section("lumps"); | |
1067 | for (list<dirfrag_t>::const_iterator i = lump_order.begin(); | |
1068 | i != lump_order.end(); ++i) { | |
1069 | f->open_object_section("lump"); | |
1070 | f->open_object_section("dirfrag"); | |
1071 | f->dump_stream("dirfrag") << *i; | |
1072 | f->close_section(); // dirfrag | |
1073 | f->open_object_section("dirlump"); | |
1074 | lump_map.at(*i).dump(f); | |
1075 | f->close_section(); // dirlump | |
1076 | f->close_section(); // lump | |
1077 | } | |
1078 | f->close_section(); // lumps | |
1079 | ||
1080 | f->open_array_section("roots"); | |
1081 | for (list<ceph::shared_ptr<fullbit> >::const_iterator i = roots.begin(); | |
1082 | i != roots.end(); ++i) { | |
1083 | f->open_object_section("root"); | |
1084 | (*i)->dump(f); | |
1085 | f->close_section(); // root | |
1086 | } | |
1087 | f->close_section(); // roots | |
1088 | ||
1089 | f->open_array_section("tableclient tranactions"); | |
1090 | for (list<pair<__u8,version_t> >::const_iterator i = table_tids.begin(); | |
1091 | i != table_tids.end(); ++i) { | |
1092 | f->open_object_section("transaction"); | |
1093 | f->dump_int("tid", i->first); | |
1094 | f->dump_int("version", i->second); | |
1095 | f->close_section(); // transaction | |
1096 | } | |
1097 | f->close_section(); // tableclient transactions | |
1098 | ||
1099 | f->dump_int("renamed directory inodeno", renamed_dirino); | |
1100 | ||
1101 | f->open_array_section("renamed directory fragments"); | |
1102 | for (list<frag_t>::const_iterator i = renamed_dir_frags.begin(); | |
1103 | i != renamed_dir_frags.end(); ++i) { | |
1104 | f->dump_int("frag", *i); | |
1105 | } | |
1106 | f->close_section(); // renamed directory fragments | |
1107 | ||
1108 | f->dump_int("inotable version", inotablev); | |
1109 | f->dump_int("SessionMap version", sessionmapv); | |
1110 | f->dump_int("allocated ino", allocated_ino); | |
1111 | ||
1112 | f->dump_stream("preallocated inos") << preallocated_inos; | |
1113 | f->dump_int("used preallocated ino", used_preallocated_ino); | |
1114 | ||
1115 | f->open_object_section("client name"); | |
1116 | client_name.dump(f); | |
1117 | f->close_section(); // client name | |
1118 | ||
1119 | f->open_array_section("inodes starting a truncate"); | |
1120 | for(list<inodeno_t>::const_iterator i = truncate_start.begin(); | |
1121 | i != truncate_start.end(); ++i) { | |
1122 | f->dump_int("inodeno", *i); | |
1123 | } | |
1124 | f->close_section(); // truncate inodes | |
1125 | f->open_array_section("inodes finishing a truncated"); | |
1126 | for(map<inodeno_t,uint64_t>::const_iterator i = truncate_finish.begin(); | |
1127 | i != truncate_finish.end(); ++i) { | |
1128 | f->open_object_section("inode+segment"); | |
1129 | f->dump_int("inodeno", i->first); | |
1130 | f->dump_int("truncate starting segment", i->second); | |
1131 | f->close_section(); // truncated inode | |
1132 | } | |
1133 | f->close_section(); // truncate finish inodes | |
1134 | ||
1135 | f->open_array_section("destroyed inodes"); | |
1136 | for(vector<inodeno_t>::const_iterator i = destroyed_inodes.begin(); | |
1137 | i != destroyed_inodes.end(); ++i) { | |
1138 | f->dump_int("inodeno", *i); | |
1139 | } | |
1140 | f->close_section(); // destroyed inodes | |
1141 | ||
1142 | f->open_array_section("client requests"); | |
1143 | for(list<pair<metareqid_t,uint64_t> >::const_iterator i = client_reqs.begin(); | |
1144 | i != client_reqs.end(); ++i) { | |
1145 | f->open_object_section("Client request"); | |
1146 | f->dump_stream("request ID") << i->first; | |
1147 | f->dump_int("oldest request on client", i->second); | |
1148 | f->close_section(); // request | |
1149 | } | |
1150 | f->close_section(); // client requests | |
1151 | } | |
1152 | ||
1153 | void EMetaBlob::generate_test_instances(list<EMetaBlob*>& ls) | |
1154 | { | |
1155 | ls.push_back(new EMetaBlob()); | |
1156 | } | |
1157 | ||
1158 | void EMetaBlob::replay(MDSRank *mds, LogSegment *logseg, MDSlaveUpdate *slaveup) | |
1159 | { | |
1160 | dout(10) << "EMetaBlob.replay " << lump_map.size() << " dirlumps by " << client_name << dendl; | |
1161 | ||
1162 | assert(logseg); | |
1163 | ||
1164 | assert(g_conf->mds_kill_journal_replay_at != 1); | |
1165 | ||
1166 | for (list<ceph::shared_ptr<fullbit> >::iterator p = roots.begin(); p != roots.end(); ++p) { | |
1167 | CInode *in = mds->mdcache->get_inode((*p)->inode.ino); | |
1168 | bool isnew = in ? false:true; | |
1169 | if (!in) | |
1170 | in = new CInode(mds->mdcache, false); | |
1171 | (*p)->update_inode(mds, in); | |
1172 | ||
1173 | if (isnew) | |
1174 | mds->mdcache->add_inode(in); | |
1175 | if ((*p)->is_dirty()) in->_mark_dirty(logseg); | |
1176 | dout(10) << "EMetaBlob.replay " << (isnew ? " added root ":" updated root ") << *in << dendl; | |
1177 | } | |
1178 | ||
1179 | CInode *renamed_diri = 0; | |
1180 | CDir *olddir = 0; | |
1181 | if (renamed_dirino) { | |
1182 | renamed_diri = mds->mdcache->get_inode(renamed_dirino); | |
1183 | if (renamed_diri) | |
1184 | dout(10) << "EMetaBlob.replay renamed inode is " << *renamed_diri << dendl; | |
1185 | else | |
1186 | dout(10) << "EMetaBlob.replay don't have renamed ino " << renamed_dirino << dendl; | |
1187 | ||
1188 | int nnull = 0; | |
1189 | for (list<dirfrag_t>::iterator lp = lump_order.begin(); lp != lump_order.end(); ++lp) { | |
1190 | dirlump &lump = lump_map[*lp]; | |
1191 | if (lump.nnull) { | |
1192 | dout(10) << "EMetaBlob.replay found null dentry in dir " << *lp << dendl; | |
1193 | nnull += lump.nnull; | |
1194 | } | |
1195 | } | |
1196 | assert(nnull <= 1); | |
1197 | } | |
1198 | ||
1199 | // keep track of any inodes we unlink and don't relink elsewhere | |
1200 | map<CInode*, CDir*> unlinked; | |
1201 | set<CInode*> linked; | |
1202 | ||
1203 | // walk through my dirs (in order!) | |
1204 | for (list<dirfrag_t>::iterator lp = lump_order.begin(); | |
1205 | lp != lump_order.end(); | |
1206 | ++lp) { | |
1207 | dout(10) << "EMetaBlob.replay dir " << *lp << dendl; | |
1208 | dirlump &lump = lump_map[*lp]; | |
1209 | ||
1210 | // the dir | |
1211 | CDir *dir = mds->mdcache->get_force_dirfrag(*lp, true); | |
1212 | if (!dir) { | |
1213 | // hmm. do i have the inode? | |
1214 | CInode *diri = mds->mdcache->get_inode((*lp).ino); | |
1215 | if (!diri) { | |
1216 | if (MDS_INO_IS_MDSDIR(lp->ino)) { | |
1217 | assert(MDS_INO_MDSDIR(mds->get_nodeid()) != lp->ino); | |
1218 | diri = mds->mdcache->create_system_inode(lp->ino, S_IFDIR|0755); | |
1219 | diri->state_clear(CInode::STATE_AUTH); | |
1220 | dout(10) << "EMetaBlob.replay created base " << *diri << dendl; | |
1221 | } else { | |
1222 | dout(0) << "EMetaBlob.replay missing dir ino " << (*lp).ino << dendl; | |
1223 | mds->clog->error() << "failure replaying journal (EMetaBlob)"; | |
1224 | mds->damaged(); | |
1225 | ceph_abort(); // Should be unreachable because damaged() calls respawn() | |
1226 | } | |
1227 | } | |
1228 | ||
1229 | // create the dirfrag | |
1230 | dir = diri->get_or_open_dirfrag(mds->mdcache, (*lp).frag); | |
1231 | ||
1232 | if (MDS_INO_IS_BASE(lp->ino)) | |
1233 | mds->mdcache->adjust_subtree_auth(dir, CDIR_AUTH_UNDEF); | |
1234 | ||
1235 | dout(10) << "EMetaBlob.replay added dir " << *dir << dendl; | |
1236 | } | |
1237 | dir->set_version( lump.fnode.version ); | |
1238 | dir->fnode = lump.fnode; | |
1239 | ||
1240 | if (lump.is_importing()) { | |
1241 | dir->state_set(CDir::STATE_AUTH); | |
1242 | dir->state_clear(CDir::STATE_COMPLETE); | |
1243 | } | |
1244 | if (lump.is_dirty()) { | |
1245 | dir->_mark_dirty(logseg); | |
1246 | ||
1247 | if (!(dir->fnode.rstat == dir->fnode.accounted_rstat)) { | |
1248 | dout(10) << "EMetaBlob.replay dirty nestinfo on " << *dir << dendl; | |
1249 | mds->locker->mark_updated_scatterlock(&dir->inode->nestlock); | |
1250 | logseg->dirty_dirfrag_nest.push_back(&dir->inode->item_dirty_dirfrag_nest); | |
1251 | } else { | |
1252 | dout(10) << "EMetaBlob.replay clean nestinfo on " << *dir << dendl; | |
1253 | } | |
1254 | if (!(dir->fnode.fragstat == dir->fnode.accounted_fragstat)) { | |
1255 | dout(10) << "EMetaBlob.replay dirty fragstat on " << *dir << dendl; | |
1256 | mds->locker->mark_updated_scatterlock(&dir->inode->filelock); | |
1257 | logseg->dirty_dirfrag_dir.push_back(&dir->inode->item_dirty_dirfrag_dir); | |
1258 | } else { | |
1259 | dout(10) << "EMetaBlob.replay clean fragstat on " << *dir << dendl; | |
1260 | } | |
1261 | } | |
1262 | if (lump.is_dirty_dft()) { | |
1263 | dout(10) << "EMetaBlob.replay dirty dirfragtree on " << *dir << dendl; | |
1264 | dir->state_set(CDir::STATE_DIRTYDFT); | |
1265 | mds->locker->mark_updated_scatterlock(&dir->inode->dirfragtreelock); | |
1266 | logseg->dirty_dirfrag_dirfragtree.push_back(&dir->inode->item_dirty_dirfrag_dirfragtree); | |
1267 | } | |
1268 | if (lump.is_new()) | |
1269 | dir->mark_new(logseg); | |
1270 | if (lump.is_complete()) | |
1271 | dir->mark_complete(); | |
1272 | ||
1273 | dout(10) << "EMetaBlob.replay updated dir " << *dir << dendl; | |
1274 | ||
1275 | // decode bits | |
1276 | lump._decode_bits(); | |
1277 | ||
1278 | // full dentry+inode pairs | |
1279 | for (list<ceph::shared_ptr<fullbit> >::const_iterator pp = lump.get_dfull().begin(); | |
1280 | pp != lump.get_dfull().end(); | |
1281 | ++pp) { | |
1282 | ceph::shared_ptr<fullbit> p = *pp; | |
1283 | CDentry *dn = dir->lookup_exact_snap(p->dn, p->dnlast); | |
1284 | if (!dn) { | |
1285 | dn = dir->add_null_dentry(p->dn, p->dnfirst, p->dnlast); | |
1286 | dn->set_version(p->dnv); | |
1287 | if (p->is_dirty()) dn->_mark_dirty(logseg); | |
1288 | dout(10) << "EMetaBlob.replay added (full) " << *dn << dendl; | |
1289 | } else { | |
1290 | dn->set_version(p->dnv); | |
1291 | if (p->is_dirty()) dn->_mark_dirty(logseg); | |
1292 | dout(10) << "EMetaBlob.replay for [" << p->dnfirst << "," << p->dnlast << "] had " << *dn << dendl; | |
1293 | dn->first = p->dnfirst; | |
1294 | assert(dn->last == p->dnlast); | |
1295 | } | |
1296 | if (lump.is_importing()) | |
1297 | dn->state_set(CDentry::STATE_AUTH); | |
1298 | ||
1299 | CInode *in = mds->mdcache->get_inode(p->inode.ino, p->dnlast); | |
1300 | if (!in) { | |
1301 | in = new CInode(mds->mdcache, dn->is_auth(), p->dnfirst, p->dnlast); | |
1302 | p->update_inode(mds, in); | |
1303 | mds->mdcache->add_inode(in); | |
1304 | if (!dn->get_linkage()->is_null()) { | |
1305 | if (dn->get_linkage()->is_primary()) { | |
1306 | unlinked[dn->get_linkage()->get_inode()] = dir; | |
1307 | stringstream ss; | |
1308 | ss << "EMetaBlob.replay FIXME had dentry linked to wrong inode " << *dn | |
1309 | << " " << *dn->get_linkage()->get_inode() << " should be " << p->inode.ino; | |
1310 | dout(0) << ss.str() << dendl; | |
1311 | mds->clog->warn(ss); | |
1312 | } | |
31f18b77 | 1313 | dir->unlink_inode(dn, false); |
7c673cae FG |
1314 | } |
1315 | if (unlinked.count(in)) | |
1316 | linked.insert(in); | |
1317 | dir->link_primary_inode(dn, in); | |
1318 | dout(10) << "EMetaBlob.replay added " << *in << dendl; | |
1319 | } else { | |
1320 | in->first = p->dnfirst; | |
1321 | p->update_inode(mds, in); | |
1322 | if (dn->get_linkage()->get_inode() != in && in->get_parent_dn()) { | |
1323 | dout(10) << "EMetaBlob.replay unlinking " << *in << dendl; | |
1324 | unlinked[in] = in->get_parent_dir(); | |
7c673cae | 1325 | in->get_parent_dir()->unlink_inode(in->get_parent_dn()); |
7c673cae FG |
1326 | } |
1327 | if (dn->get_linkage()->get_inode() != in) { | |
1328 | if (!dn->get_linkage()->is_null()) { // note: might be remote. as with stray reintegration. | |
1329 | if (dn->get_linkage()->is_primary()) { | |
1330 | unlinked[dn->get_linkage()->get_inode()] = dir; | |
1331 | stringstream ss; | |
1332 | ss << "EMetaBlob.replay FIXME had dentry linked to wrong inode " << *dn | |
1333 | << " " << *dn->get_linkage()->get_inode() << " should be " << p->inode.ino; | |
1334 | dout(0) << ss.str() << dendl; | |
1335 | mds->clog->warn(ss); | |
1336 | } | |
31f18b77 | 1337 | dir->unlink_inode(dn, false); |
7c673cae FG |
1338 | } |
1339 | if (unlinked.count(in)) | |
1340 | linked.insert(in); | |
1341 | dir->link_primary_inode(dn, in); | |
1342 | dout(10) << "EMetaBlob.replay linked " << *in << dendl; | |
1343 | } else { | |
1344 | dout(10) << "EMetaBlob.replay for [" << p->dnfirst << "," << p->dnlast << "] had " << *in << dendl; | |
1345 | } | |
1346 | assert(in->first == p->dnfirst || | |
1347 | (in->is_multiversion() && in->first > p->dnfirst)); | |
1348 | } | |
1349 | if (p->is_dirty()) | |
1350 | in->_mark_dirty(logseg); | |
1351 | if (p->is_dirty_parent()) | |
28e407b8 | 1352 | in->mark_dirty_parent(logseg, p->is_dirty_pool()); |
7c673cae FG |
1353 | if (p->need_snapflush()) |
1354 | logseg->open_files.push_back(&in->item_open_file); | |
1355 | if (dn->is_auth()) | |
1356 | in->state_set(CInode::STATE_AUTH); | |
1357 | else | |
1358 | in->state_clear(CInode::STATE_AUTH); | |
1359 | assert(g_conf->mds_kill_journal_replay_at != 2); | |
1360 | } | |
1361 | ||
1362 | // remote dentries | |
1363 | for (list<remotebit>::const_iterator p = lump.get_dremote().begin(); | |
1364 | p != lump.get_dremote().end(); | |
1365 | ++p) { | |
1366 | CDentry *dn = dir->lookup_exact_snap(p->dn, p->dnlast); | |
1367 | if (!dn) { | |
1368 | dn = dir->add_remote_dentry(p->dn, p->ino, p->d_type, p->dnfirst, p->dnlast); | |
1369 | dn->set_version(p->dnv); | |
1370 | if (p->dirty) dn->_mark_dirty(logseg); | |
1371 | dout(10) << "EMetaBlob.replay added " << *dn << dendl; | |
1372 | } else { | |
1373 | if (!dn->get_linkage()->is_null()) { | |
1374 | dout(10) << "EMetaBlob.replay unlinking " << *dn << dendl; | |
1375 | if (dn->get_linkage()->is_primary()) { | |
1376 | unlinked[dn->get_linkage()->get_inode()] = dir; | |
1377 | stringstream ss; | |
1378 | ss << "EMetaBlob.replay FIXME had dentry linked to wrong inode " << *dn | |
1379 | << " " << *dn->get_linkage()->get_inode() << " should be remote " << p->ino; | |
1380 | dout(0) << ss.str() << dendl; | |
1381 | } | |
31f18b77 | 1382 | dir->unlink_inode(dn, false); |
7c673cae FG |
1383 | } |
1384 | dir->link_remote_inode(dn, p->ino, p->d_type); | |
1385 | dn->set_version(p->dnv); | |
1386 | if (p->dirty) dn->_mark_dirty(logseg); | |
1387 | dout(10) << "EMetaBlob.replay for [" << p->dnfirst << "," << p->dnlast << "] had " << *dn << dendl; | |
1388 | dn->first = p->dnfirst; | |
1389 | assert(dn->last == p->dnlast); | |
1390 | } | |
1391 | if (lump.is_importing()) | |
1392 | dn->state_set(CDentry::STATE_AUTH); | |
1393 | } | |
1394 | ||
1395 | // null dentries | |
1396 | for (list<nullbit>::const_iterator p = lump.get_dnull().begin(); | |
1397 | p != lump.get_dnull().end(); | |
1398 | ++p) { | |
1399 | CDentry *dn = dir->lookup_exact_snap(p->dn, p->dnlast); | |
1400 | if (!dn) { | |
1401 | dn = dir->add_null_dentry(p->dn, p->dnfirst, p->dnlast); | |
1402 | dn->set_version(p->dnv); | |
1403 | if (p->dirty) dn->_mark_dirty(logseg); | |
1404 | dout(10) << "EMetaBlob.replay added (nullbit) " << *dn << dendl; | |
1405 | } else { | |
1406 | dn->first = p->dnfirst; | |
1407 | if (!dn->get_linkage()->is_null()) { | |
1408 | dout(10) << "EMetaBlob.replay unlinking " << *dn << dendl; | |
1409 | CInode *in = dn->get_linkage()->get_inode(); | |
1410 | // For renamed inode, We may call CInode::force_dirfrag() later. | |
1411 | // CInode::force_dirfrag() doesn't work well when inode is detached | |
1412 | // from the hierarchy. | |
1413 | if (!renamed_diri || renamed_diri != in) { | |
1414 | if (dn->get_linkage()->is_primary()) | |
1415 | unlinked[in] = dir; | |
1416 | dir->unlink_inode(dn); | |
7c673cae FG |
1417 | } |
1418 | } | |
1419 | dn->set_version(p->dnv); | |
1420 | if (p->dirty) dn->_mark_dirty(logseg); | |
1421 | dout(10) << "EMetaBlob.replay had " << *dn << dendl; | |
1422 | assert(dn->last == p->dnlast); | |
1423 | } | |
1424 | olddir = dir; | |
1425 | if (lump.is_importing()) | |
1426 | dn->state_set(CDentry::STATE_AUTH); | |
1427 | ||
1428 | // Make null dentries the first things we trim | |
1429 | dout(10) << "EMetaBlob.replay pushing to bottom of lru " << *dn << dendl; | |
7c673cae FG |
1430 | } |
1431 | } | |
1432 | ||
1433 | assert(g_conf->mds_kill_journal_replay_at != 3); | |
1434 | ||
1435 | if (renamed_dirino) { | |
1436 | if (renamed_diri) { | |
1437 | assert(unlinked.count(renamed_diri)); | |
1438 | assert(linked.count(renamed_diri)); | |
1439 | olddir = unlinked[renamed_diri]; | |
1440 | } else { | |
1441 | // we imported a diri we haven't seen before | |
1442 | renamed_diri = mds->mdcache->get_inode(renamed_dirino); | |
1443 | assert(renamed_diri); // it was in the metablob | |
1444 | } | |
1445 | ||
1446 | if (olddir) { | |
1447 | if (olddir->authority() != CDIR_AUTH_UNDEF && | |
1448 | renamed_diri->authority() == CDIR_AUTH_UNDEF) { | |
1449 | assert(slaveup); // auth to non-auth, must be slave prepare | |
1450 | list<frag_t> leaves; | |
1451 | renamed_diri->dirfragtree.get_leaves(leaves); | |
1452 | for (list<frag_t>::iterator p = leaves.begin(); p != leaves.end(); ++p) { | |
1453 | CDir *dir = renamed_diri->get_dirfrag(*p); | |
1454 | assert(dir); | |
1455 | if (dir->get_dir_auth() == CDIR_AUTH_UNDEF) | |
1456 | // preserve subtree bound until slave commit | |
1457 | slaveup->olddirs.insert(dir->inode); | |
1458 | else | |
1459 | dir->state_set(CDir::STATE_AUTH); | |
1460 | } | |
1461 | } | |
1462 | ||
1463 | mds->mdcache->adjust_subtree_after_rename(renamed_diri, olddir, false); | |
1464 | ||
1465 | // see if we can discard the subtree we renamed out of | |
1466 | CDir *root = mds->mdcache->get_subtree_root(olddir); | |
1467 | if (root->get_dir_auth() == CDIR_AUTH_UNDEF) { | |
1468 | if (slaveup) // preserve the old dir until slave commit | |
1469 | slaveup->olddirs.insert(olddir->inode); | |
1470 | else | |
1471 | mds->mdcache->try_trim_non_auth_subtree(root); | |
1472 | } | |
1473 | } | |
1474 | ||
1475 | // if we are the srci importer, we'll also have some dirfrags we have to open up... | |
1476 | if (renamed_diri->authority() != CDIR_AUTH_UNDEF) { | |
1477 | for (list<frag_t>::iterator p = renamed_dir_frags.begin(); p != renamed_dir_frags.end(); ++p) { | |
1478 | CDir *dir = renamed_diri->get_dirfrag(*p); | |
1479 | if (dir) { | |
1480 | // we already had the inode before, and we already adjusted this subtree accordingly. | |
1481 | dout(10) << " already had+adjusted rename import bound " << *dir << dendl; | |
1482 | assert(olddir); | |
1483 | continue; | |
1484 | } | |
1485 | dir = renamed_diri->get_or_open_dirfrag(mds->mdcache, *p); | |
1486 | dout(10) << " creating new rename import bound " << *dir << dendl; | |
1487 | dir->state_clear(CDir::STATE_AUTH); | |
224ce89b | 1488 | mds->mdcache->adjust_subtree_auth(dir, CDIR_AUTH_UNDEF); |
7c673cae FG |
1489 | } |
1490 | } | |
1491 | ||
1492 | // rename may overwrite an empty directory and move it into stray dir. | |
1493 | unlinked.erase(renamed_diri); | |
1494 | for (map<CInode*, CDir*>::iterator p = unlinked.begin(); p != unlinked.end(); ++p) { | |
1495 | if (!linked.count(p->first)) | |
1496 | continue; | |
1497 | assert(p->first->is_dir()); | |
1498 | mds->mdcache->adjust_subtree_after_rename(p->first, p->second, false); | |
1499 | } | |
1500 | } | |
1501 | ||
1502 | if (!unlinked.empty()) { | |
1503 | for (set<CInode*>::iterator p = linked.begin(); p != linked.end(); ++p) | |
1504 | unlinked.erase(*p); | |
1505 | dout(10) << " unlinked set contains " << unlinked << dendl; | |
1506 | for (map<CInode*, CDir*>::iterator p = unlinked.begin(); p != unlinked.end(); ++p) { | |
1507 | if (slaveup) // preserve unlinked inodes until slave commit | |
1508 | slaveup->unlinked.insert(p->first); | |
1509 | else | |
1510 | mds->mdcache->remove_inode_recursive(p->first); | |
1511 | } | |
1512 | } | |
1513 | ||
1514 | // table client transactions | |
1515 | for (list<pair<__u8,version_t> >::iterator p = table_tids.begin(); | |
1516 | p != table_tids.end(); | |
1517 | ++p) { | |
1518 | dout(10) << "EMetaBlob.replay noting " << get_mdstable_name(p->first) | |
1519 | << " transaction " << p->second << dendl; | |
1520 | MDSTableClient *client = mds->get_table_client(p->first); | |
1521 | if (client) | |
1522 | client->got_journaled_agree(p->second, logseg); | |
1523 | } | |
1524 | ||
1525 | // opened ino? | |
1526 | if (opened_ino) { | |
1527 | CInode *in = mds->mdcache->get_inode(opened_ino); | |
1528 | assert(in); | |
1529 | dout(10) << "EMetaBlob.replay noting opened inode " << *in << dendl; | |
1530 | logseg->open_files.push_back(&in->item_open_file); | |
1531 | } | |
1532 | ||
1533 | // allocated_inos | |
1534 | if (inotablev) { | |
1535 | if (mds->inotable->get_version() >= inotablev) { | |
1536 | dout(10) << "EMetaBlob.replay inotable tablev " << inotablev | |
1537 | << " <= table " << mds->inotable->get_version() << dendl; | |
1538 | } else { | |
1539 | dout(10) << "EMetaBlob.replay inotable v " << inotablev | |
1540 | << " - 1 == table " << mds->inotable->get_version() | |
1541 | << " allocated+used " << allocated_ino | |
1542 | << " prealloc " << preallocated_inos | |
1543 | << dendl; | |
1544 | if (allocated_ino) | |
1545 | mds->inotable->replay_alloc_id(allocated_ino); | |
1546 | if (preallocated_inos.size()) | |
1547 | mds->inotable->replay_alloc_ids(preallocated_inos); | |
1548 | ||
1549 | // [repair bad inotable updates] | |
1550 | if (inotablev > mds->inotable->get_version()) { | |
1551 | mds->clog->error() << "journal replay inotablev mismatch " | |
1552 | << mds->inotable->get_version() << " -> " << inotablev; | |
1553 | mds->inotable->force_replay_version(inotablev); | |
1554 | } | |
1555 | ||
1556 | assert(inotablev == mds->inotable->get_version()); | |
1557 | } | |
1558 | } | |
1559 | if (sessionmapv) { | |
1560 | if (mds->sessionmap.get_version() >= sessionmapv) { | |
1561 | dout(10) << "EMetaBlob.replay sessionmap v " << sessionmapv | |
1562 | << " <= table " << mds->sessionmap.get_version() << dendl; | |
1563 | } else if (mds->sessionmap.get_version() + 2 >= sessionmapv) { | |
1564 | dout(10) << "EMetaBlob.replay sessionmap v " << sessionmapv | |
1565 | << " -(1|2) == table " << mds->sessionmap.get_version() | |
1566 | << " prealloc " << preallocated_inos | |
1567 | << " used " << used_preallocated_ino | |
1568 | << dendl; | |
1569 | Session *session = mds->sessionmap.get_session(client_name); | |
1570 | if (session) { | |
1571 | dout(20) << " (session prealloc " << session->info.prealloc_inos << ")" << dendl; | |
1572 | if (used_preallocated_ino) { | |
1573 | if (!session->info.prealloc_inos.empty()) { | |
1574 | inodeno_t next = session->next_ino(); | |
1575 | inodeno_t i = session->take_ino(used_preallocated_ino); | |
1576 | if (next != i) | |
1577 | mds->clog->warn() << " replayed op " << client_reqs << " used ino " << i | |
1578 | << " but session next is " << next; | |
1579 | assert(i == used_preallocated_ino); | |
1580 | session->info.used_inos.clear(); | |
1581 | } | |
1582 | mds->sessionmap.replay_dirty_session(session); | |
1583 | } | |
1584 | if (!preallocated_inos.empty()) { | |
1585 | session->info.prealloc_inos.insert(preallocated_inos); | |
1586 | mds->sessionmap.replay_dirty_session(session); | |
1587 | } | |
1588 | ||
1589 | } else { | |
1590 | dout(10) << "EMetaBlob.replay no session for " << client_name << dendl; | |
1591 | if (used_preallocated_ino) { | |
1592 | mds->sessionmap.replay_advance_version(); | |
1593 | } | |
1594 | if (!preallocated_inos.empty()) | |
1595 | mds->sessionmap.replay_advance_version(); | |
1596 | } | |
1597 | assert(sessionmapv == mds->sessionmap.get_version()); | |
1598 | } else { | |
1599 | mds->clog->error() << "journal replay sessionmap v " << sessionmapv | |
1600 | << " -(1|2) > table " << mds->sessionmap.get_version(); | |
1601 | assert(g_conf->mds_wipe_sessions); | |
1602 | mds->sessionmap.wipe(); | |
1603 | mds->sessionmap.set_version(sessionmapv); | |
1604 | } | |
1605 | } | |
1606 | ||
1607 | // truncating inodes | |
1608 | for (list<inodeno_t>::iterator p = truncate_start.begin(); | |
1609 | p != truncate_start.end(); | |
1610 | ++p) { | |
1611 | CInode *in = mds->mdcache->get_inode(*p); | |
1612 | assert(in); | |
1613 | mds->mdcache->add_recovered_truncate(in, logseg); | |
1614 | } | |
1615 | for (map<inodeno_t,uint64_t>::iterator p = truncate_finish.begin(); | |
1616 | p != truncate_finish.end(); | |
1617 | ++p) { | |
1618 | LogSegment *ls = mds->mdlog->get_segment(p->second); | |
1619 | if (ls) { | |
1620 | CInode *in = mds->mdcache->get_inode(p->first); | |
1621 | assert(in); | |
1622 | mds->mdcache->remove_recovered_truncate(in, ls); | |
1623 | } | |
1624 | } | |
1625 | ||
1626 | // destroyed inodes | |
1627 | for (vector<inodeno_t>::iterator p = destroyed_inodes.begin(); | |
1628 | p != destroyed_inodes.end(); | |
1629 | ++p) { | |
1630 | CInode *in = mds->mdcache->get_inode(*p); | |
1631 | if (in) { | |
1632 | dout(10) << "EMetaBlob.replay destroyed " << *p << ", dropping " << *in << dendl; | |
1633 | CDentry *parent = in->get_parent_dn(); | |
1634 | mds->mdcache->remove_inode(in); | |
1635 | if (parent) { | |
1636 | dout(10) << "EMetaBlob.replay unlinked from dentry " << *parent << dendl; | |
1637 | assert(parent->get_linkage()->is_null()); | |
7c673cae FG |
1638 | } |
1639 | } else { | |
1640 | dout(10) << "EMetaBlob.replay destroyed " << *p << ", not in cache" << dendl; | |
1641 | } | |
1642 | } | |
1643 | ||
1644 | // client requests | |
1645 | for (list<pair<metareqid_t, uint64_t> >::iterator p = client_reqs.begin(); | |
1646 | p != client_reqs.end(); | |
1647 | ++p) { | |
1648 | if (p->first.name.is_client()) { | |
1649 | dout(10) << "EMetaBlob.replay request " << p->first << " trim_to " << p->second << dendl; | |
1650 | inodeno_t created = allocated_ino ? allocated_ino : used_preallocated_ino; | |
1651 | // if we allocated an inode, there should be exactly one client request id. | |
1652 | assert(created == inodeno_t() || client_reqs.size() == 1); | |
1653 | ||
1654 | Session *session = mds->sessionmap.get_session(p->first.name); | |
1655 | if (session) { | |
1656 | session->add_completed_request(p->first.tid, created); | |
1657 | if (p->second) | |
1658 | session->trim_completed_requests(p->second); | |
1659 | } | |
1660 | } | |
1661 | } | |
1662 | ||
1663 | // client flushes | |
1664 | for (list<pair<metareqid_t, uint64_t> >::iterator p = client_flushes.begin(); | |
1665 | p != client_flushes.end(); | |
1666 | ++p) { | |
1667 | if (p->first.name.is_client()) { | |
1668 | dout(10) << "EMetaBlob.replay flush " << p->first << " trim_to " << p->second << dendl; | |
1669 | Session *session = mds->sessionmap.get_session(p->first.name); | |
1670 | if (session) { | |
1671 | session->add_completed_flush(p->first.tid); | |
1672 | if (p->second) | |
1673 | session->trim_completed_flushes(p->second); | |
1674 | } | |
1675 | } | |
1676 | } | |
1677 | ||
1678 | // update segment | |
1679 | update_segment(logseg); | |
1680 | ||
1681 | assert(g_conf->mds_kill_journal_replay_at != 4); | |
1682 | } | |
1683 | ||
1684 | // ----------------------- | |
1685 | // ESession | |
1686 | ||
1687 | void ESession::update_segment() | |
1688 | { | |
1689 | _segment->sessionmapv = cmapv; | |
1690 | if (inos.size() && inotablev) | |
1691 | _segment->inotablev = inotablev; | |
1692 | } | |
1693 | ||
1694 | void ESession::replay(MDSRank *mds) | |
1695 | { | |
1696 | if (mds->sessionmap.get_version() >= cmapv) { | |
1697 | dout(10) << "ESession.replay sessionmap " << mds->sessionmap.get_version() | |
1698 | << " >= " << cmapv << ", noop" << dendl; | |
1699 | } else { | |
1700 | dout(10) << "ESession.replay sessionmap " << mds->sessionmap.get_version() | |
1701 | << " < " << cmapv << " " << (open ? "open":"close") << " " << client_inst << dendl; | |
1702 | Session *session; | |
1703 | if (open) { | |
1704 | session = mds->sessionmap.get_or_add_session(client_inst); | |
1705 | mds->sessionmap.set_state(session, Session::STATE_OPEN); | |
1706 | session->set_client_metadata(client_metadata); | |
1707 | dout(10) << " opened session " << session->info.inst << dendl; | |
1708 | } else { | |
1709 | session = mds->sessionmap.get_session(client_inst.name); | |
1710 | if (session) { // there always should be a session, but there's a bug | |
1711 | if (session->connection == NULL) { | |
1712 | dout(10) << " removed session " << session->info.inst << dendl; | |
1713 | mds->sessionmap.remove_session(session); | |
1714 | session = NULL; | |
1715 | } else { | |
1716 | session->clear(); // the client has reconnected; keep the Session, but reset | |
1717 | dout(10) << " reset session " << session->info.inst << " (they reconnected)" << dendl; | |
1718 | } | |
1719 | } else { | |
1720 | mds->clog->error() << "replayed stray Session close event for " << client_inst | |
1721 | << " from time " << stamp << ", ignoring"; | |
1722 | } | |
1723 | } | |
1724 | if (session) { | |
1725 | mds->sessionmap.replay_dirty_session(session); | |
1726 | } else { | |
1727 | mds->sessionmap.replay_advance_version(); | |
1728 | } | |
1729 | assert(mds->sessionmap.get_version() == cmapv); | |
1730 | } | |
1731 | ||
1732 | if (inos.size() && inotablev) { | |
1733 | if (mds->inotable->get_version() >= inotablev) { | |
1734 | dout(10) << "ESession.replay inotable " << mds->inotable->get_version() | |
1735 | << " >= " << inotablev << ", noop" << dendl; | |
1736 | } else { | |
1737 | dout(10) << "ESession.replay inotable " << mds->inotable->get_version() | |
1738 | << " < " << inotablev << " " << (open ? "add":"remove") << dendl; | |
1739 | assert(!open); // for now | |
1740 | mds->inotable->replay_release_ids(inos); | |
1741 | assert(mds->inotable->get_version() == inotablev); | |
1742 | } | |
1743 | } | |
1744 | ||
1745 | update_segment(); | |
1746 | } | |
1747 | ||
1748 | void ESession::encode(bufferlist &bl, uint64_t features) const | |
1749 | { | |
1750 | ENCODE_START(4, 3, bl); | |
1751 | ::encode(stamp, bl); | |
1752 | ::encode(client_inst, bl, features); | |
1753 | ::encode(open, bl); | |
1754 | ::encode(cmapv, bl); | |
1755 | ::encode(inos, bl); | |
1756 | ::encode(inotablev, bl); | |
1757 | ::encode(client_metadata, bl); | |
1758 | ENCODE_FINISH(bl); | |
1759 | } | |
1760 | ||
1761 | void ESession::decode(bufferlist::iterator &bl) | |
1762 | { | |
1763 | DECODE_START_LEGACY_COMPAT_LEN(4, 3, 3, bl); | |
1764 | if (struct_v >= 2) | |
1765 | ::decode(stamp, bl); | |
1766 | ::decode(client_inst, bl); | |
1767 | ::decode(open, bl); | |
1768 | ::decode(cmapv, bl); | |
1769 | ::decode(inos, bl); | |
1770 | ::decode(inotablev, bl); | |
1771 | if (struct_v >= 4) { | |
1772 | ::decode(client_metadata, bl); | |
1773 | } | |
1774 | DECODE_FINISH(bl); | |
1775 | } | |
1776 | ||
1777 | void ESession::dump(Formatter *f) const | |
1778 | { | |
1779 | f->dump_stream("client instance") << client_inst; | |
1780 | f->dump_string("open", open ? "true" : "false"); | |
1781 | f->dump_int("client map version", cmapv); | |
1782 | f->dump_stream("inos") << inos; | |
1783 | f->dump_int("inotable version", inotablev); | |
1784 | f->open_object_section("client_metadata"); | |
1785 | for (map<string, string>::const_iterator i = client_metadata.begin(); | |
1786 | i != client_metadata.end(); ++i) { | |
1787 | f->dump_string(i->first.c_str(), i->second); | |
1788 | } | |
1789 | f->close_section(); // client_metadata | |
1790 | } | |
1791 | ||
1792 | void ESession::generate_test_instances(list<ESession*>& ls) | |
1793 | { | |
1794 | ls.push_back(new ESession); | |
1795 | } | |
1796 | ||
1797 | // ----------------------- | |
1798 | // ESessions | |
1799 | ||
1800 | void ESessions::encode(bufferlist &bl, uint64_t features) const | |
1801 | { | |
1802 | ENCODE_START(1, 1, bl); | |
1803 | ::encode(client_map, bl, features); | |
1804 | ::encode(cmapv, bl); | |
1805 | ::encode(stamp, bl); | |
1806 | ENCODE_FINISH(bl); | |
1807 | } | |
1808 | ||
1809 | void ESessions::decode_old(bufferlist::iterator &bl) | |
1810 | { | |
1811 | ::decode(client_map, bl); | |
1812 | ::decode(cmapv, bl); | |
1813 | if (!bl.end()) | |
1814 | ::decode(stamp, bl); | |
1815 | } | |
1816 | ||
1817 | void ESessions::decode_new(bufferlist::iterator &bl) | |
1818 | { | |
1819 | DECODE_START(1, bl); | |
1820 | ::decode(client_map, bl); | |
1821 | ::decode(cmapv, bl); | |
1822 | if (!bl.end()) | |
1823 | ::decode(stamp, bl); | |
1824 | DECODE_FINISH(bl); | |
1825 | } | |
1826 | ||
1827 | void ESessions::dump(Formatter *f) const | |
1828 | { | |
1829 | f->dump_int("client map version", cmapv); | |
1830 | ||
1831 | f->open_array_section("client map"); | |
1832 | for (map<client_t,entity_inst_t>::const_iterator i = client_map.begin(); | |
1833 | i != client_map.end(); ++i) { | |
1834 | f->open_object_section("client"); | |
1835 | f->dump_int("client id", i->first.v); | |
1836 | f->dump_stream("client entity") << i->second; | |
1837 | f->close_section(); // client | |
1838 | } | |
1839 | f->close_section(); // client map | |
1840 | } | |
1841 | ||
1842 | void ESessions::generate_test_instances(list<ESessions*>& ls) | |
1843 | { | |
1844 | ls.push_back(new ESessions()); | |
1845 | } | |
1846 | ||
1847 | void ESessions::update_segment() | |
1848 | { | |
1849 | _segment->sessionmapv = cmapv; | |
1850 | } | |
1851 | ||
1852 | void ESessions::replay(MDSRank *mds) | |
1853 | { | |
1854 | if (mds->sessionmap.get_version() >= cmapv) { | |
1855 | dout(10) << "ESessions.replay sessionmap " << mds->sessionmap.get_version() | |
1856 | << " >= " << cmapv << ", noop" << dendl; | |
1857 | } else { | |
1858 | dout(10) << "ESessions.replay sessionmap " << mds->sessionmap.get_version() | |
1859 | << " < " << cmapv << dendl; | |
28e407b8 | 1860 | mds->sessionmap.replay_open_sessions(client_map); |
7c673cae | 1861 | assert(mds->sessionmap.get_version() == cmapv); |
7c673cae FG |
1862 | } |
1863 | update_segment(); | |
1864 | } | |
1865 | ||
1866 | ||
1867 | // ----------------------- | |
1868 | // ETableServer | |
1869 | ||
1870 | void ETableServer::encode(bufferlist& bl, uint64_t features) const | |
1871 | { | |
1872 | ENCODE_START(3, 3, bl); | |
1873 | ::encode(stamp, bl); | |
1874 | ::encode(table, bl); | |
1875 | ::encode(op, bl); | |
1876 | ::encode(reqid, bl); | |
1877 | ::encode(bymds, bl); | |
1878 | ::encode(mutation, bl); | |
1879 | ::encode(tid, bl); | |
1880 | ::encode(version, bl); | |
1881 | ENCODE_FINISH(bl); | |
1882 | } | |
1883 | ||
1884 | void ETableServer::decode(bufferlist::iterator &bl) | |
1885 | { | |
1886 | DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl); | |
1887 | if (struct_v >= 2) | |
1888 | ::decode(stamp, bl); | |
1889 | ::decode(table, bl); | |
1890 | ::decode(op, bl); | |
1891 | ::decode(reqid, bl); | |
1892 | ::decode(bymds, bl); | |
1893 | ::decode(mutation, bl); | |
1894 | ::decode(tid, bl); | |
1895 | ::decode(version, bl); | |
1896 | DECODE_FINISH(bl); | |
1897 | } | |
1898 | ||
1899 | void ETableServer::dump(Formatter *f) const | |
1900 | { | |
1901 | f->dump_int("table id", table); | |
1902 | f->dump_int("op", op); | |
1903 | f->dump_int("request id", reqid); | |
1904 | f->dump_int("by mds", bymds); | |
1905 | f->dump_int("tid", tid); | |
1906 | f->dump_int("version", version); | |
1907 | } | |
1908 | ||
1909 | void ETableServer::generate_test_instances(list<ETableServer*>& ls) | |
1910 | { | |
1911 | ls.push_back(new ETableServer()); | |
1912 | } | |
1913 | ||
1914 | ||
1915 | void ETableServer::update_segment() | |
1916 | { | |
1917 | _segment->tablev[table] = version; | |
1918 | } | |
1919 | ||
1920 | void ETableServer::replay(MDSRank *mds) | |
1921 | { | |
1922 | MDSTableServer *server = mds->get_table_server(table); | |
1923 | if (!server) | |
1924 | return; | |
1925 | ||
1926 | if (server->get_version() >= version) { | |
1927 | dout(10) << "ETableServer.replay " << get_mdstable_name(table) | |
1928 | << " " << get_mdstableserver_opname(op) | |
1929 | << " event " << version | |
1930 | << " <= table " << server->get_version() << dendl; | |
1931 | return; | |
1932 | } | |
1933 | ||
1934 | dout(10) << " ETableServer.replay " << get_mdstable_name(table) | |
1935 | << " " << get_mdstableserver_opname(op) | |
1936 | << " event " << version << " - 1 == table " << server->get_version() << dendl; | |
1937 | assert(version-1 == server->get_version()); | |
1938 | ||
1939 | switch (op) { | |
1940 | case TABLESERVER_OP_PREPARE: | |
1941 | server->_prepare(mutation, reqid, bymds); | |
1942 | server->_note_prepare(bymds, reqid); | |
1943 | break; | |
1944 | case TABLESERVER_OP_COMMIT: | |
1945 | server->_commit(tid); | |
1946 | server->_note_commit(tid); | |
1947 | break; | |
1948 | case TABLESERVER_OP_ROLLBACK: | |
1949 | server->_rollback(tid); | |
1950 | server->_note_rollback(tid); | |
1951 | break; | |
1952 | case TABLESERVER_OP_SERVER_UPDATE: | |
1953 | server->_server_update(mutation); | |
1954 | break; | |
1955 | default: | |
1956 | mds->clog->error() << "invalid tableserver op in ETableServer"; | |
1957 | mds->damaged(); | |
1958 | ceph_abort(); // Should be unreachable because damaged() calls respawn() | |
1959 | } | |
1960 | ||
1961 | assert(version == server->get_version()); | |
1962 | update_segment(); | |
1963 | } | |
1964 | ||
1965 | ||
1966 | // --------------------- | |
1967 | // ETableClient | |
1968 | ||
1969 | void ETableClient::encode(bufferlist& bl, uint64_t features) const | |
1970 | { | |
1971 | ENCODE_START(3, 3, bl); | |
1972 | ::encode(stamp, bl); | |
1973 | ::encode(table, bl); | |
1974 | ::encode(op, bl); | |
1975 | ::encode(tid, bl); | |
1976 | ENCODE_FINISH(bl); | |
1977 | } | |
1978 | ||
1979 | void ETableClient::decode(bufferlist::iterator &bl) | |
1980 | { | |
1981 | DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl); | |
1982 | if (struct_v >= 2) | |
1983 | ::decode(stamp, bl); | |
1984 | ::decode(table, bl); | |
1985 | ::decode(op, bl); | |
1986 | ::decode(tid, bl); | |
1987 | DECODE_FINISH(bl); | |
1988 | } | |
1989 | ||
1990 | void ETableClient::dump(Formatter *f) const | |
1991 | { | |
1992 | f->dump_int("table", table); | |
1993 | f->dump_int("op", op); | |
1994 | f->dump_int("tid", tid); | |
1995 | } | |
1996 | ||
1997 | void ETableClient::generate_test_instances(list<ETableClient*>& ls) | |
1998 | { | |
1999 | ls.push_back(new ETableClient()); | |
2000 | } | |
2001 | ||
2002 | void ETableClient::replay(MDSRank *mds) | |
2003 | { | |
2004 | dout(10) << " ETableClient.replay " << get_mdstable_name(table) | |
2005 | << " op " << get_mdstableserver_opname(op) | |
2006 | << " tid " << tid << dendl; | |
2007 | ||
2008 | MDSTableClient *client = mds->get_table_client(table); | |
2009 | if (!client) | |
2010 | return; | |
2011 | ||
2012 | assert(op == TABLESERVER_OP_ACK); | |
2013 | client->got_journaled_ack(tid); | |
2014 | } | |
2015 | ||
2016 | ||
2017 | // ----------------------- | |
2018 | // ESnap | |
2019 | /* | |
2020 | void ESnap::update_segment() | |
2021 | { | |
2022 | _segment->tablev[TABLE_SNAP] = version; | |
2023 | } | |
2024 | ||
2025 | void ESnap::replay(MDSRank *mds) | |
2026 | { | |
2027 | if (mds->snaptable->get_version() >= version) { | |
2028 | dout(10) << "ESnap.replay event " << version | |
2029 | << " <= table " << mds->snaptable->get_version() << dendl; | |
2030 | return; | |
2031 | } | |
2032 | ||
2033 | dout(10) << " ESnap.replay event " << version | |
2034 | << " - 1 == table " << mds->snaptable->get_version() << dendl; | |
2035 | assert(version-1 == mds->snaptable->get_version()); | |
2036 | ||
2037 | if (create) { | |
2038 | version_t v; | |
2039 | snapid_t s = mds->snaptable->create(snap.dirino, snap.name, snap.stamp, &v); | |
2040 | assert(s == snap.snapid); | |
2041 | } else { | |
2042 | mds->snaptable->remove(snap.snapid); | |
2043 | } | |
2044 | ||
2045 | assert(version == mds->snaptable->get_version()); | |
2046 | } | |
2047 | */ | |
2048 | ||
2049 | ||
2050 | ||
2051 | // ----------------------- | |
2052 | // EUpdate | |
2053 | ||
2054 | void EUpdate::encode(bufferlist &bl, uint64_t features) const | |
2055 | { | |
2056 | ENCODE_START(4, 4, bl); | |
2057 | ::encode(stamp, bl); | |
2058 | ::encode(type, bl); | |
2059 | ::encode(metablob, bl, features); | |
2060 | ::encode(client_map, bl); | |
2061 | ::encode(cmapv, bl); | |
2062 | ::encode(reqid, bl); | |
2063 | ::encode(had_slaves, bl); | |
2064 | ENCODE_FINISH(bl); | |
2065 | } | |
2066 | ||
2067 | void EUpdate::decode(bufferlist::iterator &bl) | |
2068 | { | |
2069 | DECODE_START_LEGACY_COMPAT_LEN(4, 4, 4, bl); | |
2070 | if (struct_v >= 2) | |
2071 | ::decode(stamp, bl); | |
2072 | ::decode(type, bl); | |
2073 | ::decode(metablob, bl); | |
2074 | ::decode(client_map, bl); | |
2075 | if (struct_v >= 3) | |
2076 | ::decode(cmapv, bl); | |
2077 | ::decode(reqid, bl); | |
2078 | ::decode(had_slaves, bl); | |
2079 | DECODE_FINISH(bl); | |
2080 | } | |
2081 | ||
2082 | void EUpdate::dump(Formatter *f) const | |
2083 | { | |
2084 | f->open_object_section("metablob"); | |
2085 | metablob.dump(f); | |
2086 | f->close_section(); // metablob | |
2087 | ||
2088 | f->dump_string("type", type); | |
2089 | f->dump_int("client map length", client_map.length()); | |
2090 | f->dump_int("client map version", cmapv); | |
2091 | f->dump_stream("reqid") << reqid; | |
2092 | f->dump_string("had slaves", had_slaves ? "true" : "false"); | |
2093 | } | |
2094 | ||
2095 | void EUpdate::generate_test_instances(list<EUpdate*>& ls) | |
2096 | { | |
2097 | ls.push_back(new EUpdate()); | |
2098 | } | |
2099 | ||
2100 | ||
2101 | void EUpdate::update_segment() | |
2102 | { | |
2103 | metablob.update_segment(_segment); | |
2104 | ||
2105 | if (client_map.length()) | |
2106 | _segment->sessionmapv = cmapv; | |
2107 | ||
2108 | if (had_slaves) | |
2109 | _segment->uncommitted_masters.insert(reqid); | |
2110 | } | |
2111 | ||
2112 | void EUpdate::replay(MDSRank *mds) | |
2113 | { | |
2114 | metablob.replay(mds, _segment); | |
2115 | ||
2116 | if (had_slaves) { | |
2117 | dout(10) << "EUpdate.replay " << reqid << " had slaves, expecting a matching ECommitted" << dendl; | |
2118 | _segment->uncommitted_masters.insert(reqid); | |
2119 | set<mds_rank_t> slaves; | |
2120 | mds->mdcache->add_uncommitted_master(reqid, _segment, slaves, true); | |
2121 | } | |
2122 | ||
2123 | if (client_map.length()) { | |
2124 | if (mds->sessionmap.get_version() >= cmapv) { | |
2125 | dout(10) << "EUpdate.replay sessionmap v " << cmapv | |
2126 | << " <= table " << mds->sessionmap.get_version() << dendl; | |
2127 | } else { | |
2128 | dout(10) << "EUpdate.replay sessionmap " << mds->sessionmap.get_version() | |
2129 | << " < " << cmapv << dendl; | |
2130 | // open client sessions? | |
2131 | map<client_t,entity_inst_t> cm; | |
2132 | bufferlist::iterator blp = client_map.begin(); | |
2133 | ::decode(cm, blp); | |
28e407b8 | 2134 | mds->sessionmap.replay_open_sessions(cm); |
7c673cae | 2135 | assert(mds->sessionmap.get_version() == cmapv); |
7c673cae FG |
2136 | } |
2137 | } | |
2138 | update_segment(); | |
2139 | } | |
2140 | ||
2141 | ||
2142 | // ------------------------ | |
2143 | // EOpen | |
2144 | ||
2145 | void EOpen::encode(bufferlist &bl, uint64_t features) const { | |
2146 | ENCODE_START(4, 3, bl); | |
2147 | ::encode(stamp, bl); | |
2148 | ::encode(metablob, bl, features); | |
2149 | ::encode(inos, bl); | |
2150 | ::encode(snap_inos, bl); | |
2151 | ENCODE_FINISH(bl); | |
2152 | } | |
2153 | ||
2154 | void EOpen::decode(bufferlist::iterator &bl) { | |
2155 | DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl); | |
2156 | if (struct_v >= 2) | |
2157 | ::decode(stamp, bl); | |
2158 | ::decode(metablob, bl); | |
2159 | ::decode(inos, bl); | |
2160 | if (struct_v >= 4) | |
2161 | ::decode(snap_inos, bl); | |
2162 | DECODE_FINISH(bl); | |
2163 | } | |
2164 | ||
2165 | void EOpen::dump(Formatter *f) const | |
2166 | { | |
2167 | f->open_object_section("metablob"); | |
2168 | metablob.dump(f); | |
2169 | f->close_section(); // metablob | |
2170 | f->open_array_section("inos involved"); | |
2171 | for (vector<inodeno_t>::const_iterator i = inos.begin(); | |
2172 | i != inos.end(); ++i) { | |
2173 | f->dump_int("ino", *i); | |
2174 | } | |
2175 | f->close_section(); // inos | |
2176 | } | |
2177 | ||
2178 | void EOpen::generate_test_instances(list<EOpen*>& ls) | |
2179 | { | |
2180 | ls.push_back(new EOpen()); | |
2181 | ls.push_back(new EOpen()); | |
2182 | ls.back()->add_ino(0); | |
2183 | } | |
2184 | ||
2185 | void EOpen::update_segment() | |
2186 | { | |
2187 | // ?? | |
2188 | } | |
2189 | ||
2190 | void EOpen::replay(MDSRank *mds) | |
2191 | { | |
2192 | dout(10) << "EOpen.replay " << dendl; | |
2193 | metablob.replay(mds, _segment); | |
2194 | ||
2195 | // note which segments inodes belong to, so we don't have to start rejournaling them | |
2196 | for (const auto &ino : inos) { | |
2197 | CInode *in = mds->mdcache->get_inode(ino); | |
2198 | if (!in) { | |
2199 | dout(0) << "EOpen.replay ino " << ino << " not in metablob" << dendl; | |
2200 | assert(in); | |
2201 | } | |
2202 | _segment->open_files.push_back(&in->item_open_file); | |
2203 | } | |
2204 | for (const auto &vino : snap_inos) { | |
2205 | CInode *in = mds->mdcache->get_inode(vino); | |
2206 | if (!in) { | |
2207 | dout(0) << "EOpen.replay ino " << vino << " not in metablob" << dendl; | |
2208 | assert(in); | |
2209 | } | |
2210 | _segment->open_files.push_back(&in->item_open_file); | |
2211 | } | |
2212 | } | |
2213 | ||
2214 | ||
2215 | // ----------------------- | |
2216 | // ECommitted | |
2217 | ||
2218 | void ECommitted::replay(MDSRank *mds) | |
2219 | { | |
2220 | if (mds->mdcache->uncommitted_masters.count(reqid)) { | |
2221 | dout(10) << "ECommitted.replay " << reqid << dendl; | |
2222 | mds->mdcache->uncommitted_masters[reqid].ls->uncommitted_masters.erase(reqid); | |
2223 | mds->mdcache->uncommitted_masters.erase(reqid); | |
2224 | } else { | |
2225 | dout(10) << "ECommitted.replay " << reqid << " -- didn't see original op" << dendl; | |
2226 | } | |
2227 | } | |
2228 | ||
2229 | void ECommitted::encode(bufferlist& bl, uint64_t features) const | |
2230 | { | |
2231 | ENCODE_START(3, 3, bl); | |
2232 | ::encode(stamp, bl); | |
2233 | ::encode(reqid, bl); | |
2234 | ENCODE_FINISH(bl); | |
2235 | } | |
2236 | ||
2237 | void ECommitted::decode(bufferlist::iterator& bl) | |
2238 | { | |
2239 | DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl); | |
2240 | if (struct_v >= 2) | |
2241 | ::decode(stamp, bl); | |
2242 | ::decode(reqid, bl); | |
2243 | DECODE_FINISH(bl); | |
2244 | } | |
2245 | ||
2246 | void ECommitted::dump(Formatter *f) const { | |
2247 | f->dump_stream("stamp") << stamp; | |
2248 | f->dump_stream("reqid") << reqid; | |
2249 | } | |
2250 | ||
2251 | void ECommitted::generate_test_instances(list<ECommitted*>& ls) | |
2252 | { | |
2253 | ls.push_back(new ECommitted); | |
2254 | ls.push_back(new ECommitted); | |
2255 | ls.back()->stamp = utime_t(1, 2); | |
2256 | ls.back()->reqid = metareqid_t(entity_name_t::CLIENT(123), 456); | |
2257 | } | |
2258 | ||
2259 | // ----------------------- | |
2260 | // ESlaveUpdate | |
2261 | ||
2262 | void link_rollback::encode(bufferlist &bl) const | |
2263 | { | |
2264 | ENCODE_START(2, 2, bl); | |
2265 | ::encode(reqid, bl); | |
2266 | ::encode(ino, bl); | |
2267 | ::encode(was_inc, bl); | |
2268 | ::encode(old_ctime, bl); | |
2269 | ::encode(old_dir_mtime, bl); | |
2270 | ::encode(old_dir_rctime, bl); | |
2271 | ENCODE_FINISH(bl); | |
2272 | } | |
2273 | ||
2274 | void link_rollback::decode(bufferlist::iterator &bl) | |
2275 | { | |
2276 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); | |
2277 | ::decode(reqid, bl); | |
2278 | ::decode(ino, bl); | |
2279 | ::decode(was_inc, bl); | |
2280 | ::decode(old_ctime, bl); | |
2281 | ::decode(old_dir_mtime, bl); | |
2282 | ::decode(old_dir_rctime, bl); | |
2283 | DECODE_FINISH(bl); | |
2284 | } | |
2285 | ||
2286 | void link_rollback::dump(Formatter *f) const | |
2287 | { | |
2288 | f->dump_stream("metareqid") << reqid; | |
2289 | f->dump_int("ino", ino); | |
2290 | f->dump_string("was incremented", was_inc ? "true" : "false"); | |
2291 | f->dump_stream("old_ctime") << old_ctime; | |
2292 | f->dump_stream("old_dir_mtime") << old_dir_mtime; | |
2293 | f->dump_stream("old_dir_rctime") << old_dir_rctime; | |
2294 | } | |
2295 | ||
2296 | void link_rollback::generate_test_instances(list<link_rollback*>& ls) | |
2297 | { | |
2298 | ls.push_back(new link_rollback()); | |
2299 | } | |
2300 | ||
2301 | void rmdir_rollback::encode(bufferlist& bl) const | |
2302 | { | |
2303 | ENCODE_START(2, 2, bl); | |
2304 | ::encode(reqid, bl); | |
2305 | ::encode(src_dir, bl); | |
2306 | ::encode(src_dname, bl); | |
2307 | ::encode(dest_dir, bl); | |
2308 | ::encode(dest_dname, bl); | |
2309 | ENCODE_FINISH(bl); | |
2310 | } | |
2311 | ||
2312 | void rmdir_rollback::decode(bufferlist::iterator& bl) | |
2313 | { | |
2314 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); | |
2315 | ::decode(reqid, bl); | |
2316 | ::decode(src_dir, bl); | |
2317 | ::decode(src_dname, bl); | |
2318 | ::decode(dest_dir, bl); | |
2319 | ::decode(dest_dname, bl); | |
2320 | DECODE_FINISH(bl); | |
2321 | } | |
2322 | ||
2323 | void rmdir_rollback::dump(Formatter *f) const | |
2324 | { | |
2325 | f->dump_stream("metareqid") << reqid; | |
2326 | f->dump_stream("source directory") << src_dir; | |
2327 | f->dump_string("source dname", src_dname); | |
2328 | f->dump_stream("destination directory") << dest_dir; | |
2329 | f->dump_string("destination dname", dest_dname); | |
2330 | } | |
2331 | ||
2332 | void rmdir_rollback::generate_test_instances(list<rmdir_rollback*>& ls) | |
2333 | { | |
2334 | ls.push_back(new rmdir_rollback()); | |
2335 | } | |
2336 | ||
2337 | void rename_rollback::drec::encode(bufferlist &bl) const | |
2338 | { | |
2339 | ENCODE_START(2, 2, bl); | |
2340 | ::encode(dirfrag, bl); | |
2341 | ::encode(dirfrag_old_mtime, bl); | |
2342 | ::encode(dirfrag_old_rctime, bl); | |
2343 | ::encode(ino, bl); | |
2344 | ::encode(remote_ino, bl); | |
2345 | ::encode(dname, bl); | |
2346 | ::encode(remote_d_type, bl); | |
2347 | ::encode(old_ctime, bl); | |
2348 | ENCODE_FINISH(bl); | |
2349 | } | |
2350 | ||
2351 | void rename_rollback::drec::decode(bufferlist::iterator &bl) | |
2352 | { | |
2353 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); | |
2354 | ::decode(dirfrag, bl); | |
2355 | ::decode(dirfrag_old_mtime, bl); | |
2356 | ::decode(dirfrag_old_rctime, bl); | |
2357 | ::decode(ino, bl); | |
2358 | ::decode(remote_ino, bl); | |
2359 | ::decode(dname, bl); | |
2360 | ::decode(remote_d_type, bl); | |
2361 | ::decode(old_ctime, bl); | |
2362 | DECODE_FINISH(bl); | |
2363 | } | |
2364 | ||
2365 | void rename_rollback::drec::dump(Formatter *f) const | |
2366 | { | |
2367 | f->dump_stream("directory fragment") << dirfrag; | |
2368 | f->dump_stream("directory old mtime") << dirfrag_old_mtime; | |
2369 | f->dump_stream("directory old rctime") << dirfrag_old_rctime; | |
2370 | f->dump_int("ino", ino); | |
2371 | f->dump_int("remote ino", remote_ino); | |
2372 | f->dump_string("dname", dname); | |
2373 | uint32_t type = DTTOIF(remote_d_type) & S_IFMT; // convert to type entries | |
2374 | string type_string; | |
2375 | switch(type) { | |
2376 | case S_IFREG: | |
2377 | type_string = "file"; break; | |
2378 | case S_IFLNK: | |
2379 | type_string = "symlink"; break; | |
2380 | case S_IFDIR: | |
2381 | type_string = "directory"; break; | |
2382 | default: | |
2383 | type_string = "UNKNOWN-" + stringify((int)type); break; | |
2384 | } | |
2385 | f->dump_string("remote dtype", type_string); | |
2386 | f->dump_stream("old ctime") << old_ctime; | |
2387 | } | |
2388 | ||
2389 | void rename_rollback::drec::generate_test_instances(list<drec*>& ls) | |
2390 | { | |
2391 | ls.push_back(new drec()); | |
2392 | ls.back()->remote_d_type = IFTODT(S_IFREG); | |
2393 | } | |
2394 | ||
2395 | void rename_rollback::encode(bufferlist &bl) const | |
2396 | { | |
2397 | ENCODE_START(2, 2, bl); | |
2398 | ::encode(reqid, bl); | |
2399 | encode(orig_src, bl); | |
2400 | encode(orig_dest, bl); | |
2401 | encode(stray, bl); | |
2402 | ::encode(ctime, bl); | |
2403 | ENCODE_FINISH(bl); | |
2404 | } | |
2405 | ||
2406 | void rename_rollback::decode(bufferlist::iterator &bl) | |
2407 | { | |
2408 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); | |
2409 | ::decode(reqid, bl); | |
2410 | decode(orig_src, bl); | |
2411 | decode(orig_dest, bl); | |
2412 | decode(stray, bl); | |
2413 | ::decode(ctime, bl); | |
2414 | DECODE_FINISH(bl); | |
2415 | } | |
2416 | ||
2417 | void rename_rollback::dump(Formatter *f) const | |
2418 | { | |
2419 | f->dump_stream("request id") << reqid; | |
2420 | f->open_object_section("original src drec"); | |
2421 | orig_src.dump(f); | |
2422 | f->close_section(); // original src drec | |
2423 | f->open_object_section("original dest drec"); | |
2424 | orig_dest.dump(f); | |
2425 | f->close_section(); // original dest drec | |
2426 | f->open_object_section("stray drec"); | |
2427 | stray.dump(f); | |
2428 | f->close_section(); // stray drec | |
2429 | f->dump_stream("ctime") << ctime; | |
2430 | } | |
2431 | ||
2432 | void rename_rollback::generate_test_instances(list<rename_rollback*>& ls) | |
2433 | { | |
2434 | ls.push_back(new rename_rollback()); | |
2435 | ls.back()->orig_src.remote_d_type = IFTODT(S_IFREG); | |
2436 | ls.back()->orig_dest.remote_d_type = IFTODT(S_IFREG); | |
2437 | ls.back()->stray.remote_d_type = IFTODT(S_IFREG); | |
2438 | } | |
2439 | ||
2440 | void ESlaveUpdate::encode(bufferlist &bl, uint64_t features) const | |
2441 | { | |
2442 | ENCODE_START(3, 3, bl); | |
2443 | ::encode(stamp, bl); | |
2444 | ::encode(type, bl); | |
2445 | ::encode(reqid, bl); | |
2446 | ::encode(master, bl); | |
2447 | ::encode(op, bl); | |
2448 | ::encode(origop, bl); | |
2449 | ::encode(commit, bl, features); | |
2450 | ::encode(rollback, bl); | |
2451 | ENCODE_FINISH(bl); | |
2452 | } | |
2453 | ||
2454 | void ESlaveUpdate::decode(bufferlist::iterator &bl) | |
2455 | { | |
2456 | DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl); | |
2457 | if (struct_v >= 2) | |
2458 | ::decode(stamp, bl); | |
2459 | ::decode(type, bl); | |
2460 | ::decode(reqid, bl); | |
2461 | ::decode(master, bl); | |
2462 | ::decode(op, bl); | |
2463 | ::decode(origop, bl); | |
2464 | ::decode(commit, bl); | |
2465 | ::decode(rollback, bl); | |
2466 | DECODE_FINISH(bl); | |
2467 | } | |
2468 | ||
2469 | void ESlaveUpdate::dump(Formatter *f) const | |
2470 | { | |
2471 | f->open_object_section("metablob"); | |
2472 | commit.dump(f); | |
2473 | f->close_section(); // metablob | |
2474 | ||
2475 | f->dump_int("rollback length", rollback.length()); | |
2476 | f->dump_string("type", type); | |
2477 | f->dump_stream("metareqid") << reqid; | |
2478 | f->dump_int("master", master); | |
2479 | f->dump_int("op", op); | |
2480 | f->dump_int("original op", origop); | |
2481 | } | |
2482 | ||
2483 | void ESlaveUpdate::generate_test_instances(list<ESlaveUpdate*>& ls) | |
2484 | { | |
2485 | ls.push_back(new ESlaveUpdate()); | |
2486 | } | |
2487 | ||
2488 | ||
2489 | void ESlaveUpdate::replay(MDSRank *mds) | |
2490 | { | |
2491 | MDSlaveUpdate *su; | |
2492 | switch (op) { | |
2493 | case ESlaveUpdate::OP_PREPARE: | |
2494 | dout(10) << "ESlaveUpdate.replay prepare " << reqid << " for mds." << master | |
2495 | << ": applying commit, saving rollback info" << dendl; | |
2496 | su = new MDSlaveUpdate(origop, rollback, _segment->slave_updates); | |
2497 | commit.replay(mds, _segment, su); | |
2498 | mds->mdcache->add_uncommitted_slave_update(reqid, master, su); | |
2499 | break; | |
2500 | ||
2501 | case ESlaveUpdate::OP_COMMIT: | |
2502 | su = mds->mdcache->get_uncommitted_slave_update(reqid, master); | |
2503 | if (su) { | |
2504 | dout(10) << "ESlaveUpdate.replay commit " << reqid << " for mds." << master << dendl; | |
2505 | mds->mdcache->finish_uncommitted_slave_update(reqid, master); | |
2506 | } else { | |
2507 | dout(10) << "ESlaveUpdate.replay commit " << reqid << " for mds." << master | |
2508 | << ": ignoring, no previously saved prepare" << dendl; | |
2509 | } | |
2510 | break; | |
2511 | ||
2512 | case ESlaveUpdate::OP_ROLLBACK: | |
2513 | dout(10) << "ESlaveUpdate.replay abort " << reqid << " for mds." << master | |
2514 | << ": applying rollback commit blob" << dendl; | |
2515 | commit.replay(mds, _segment); | |
2516 | su = mds->mdcache->get_uncommitted_slave_update(reqid, master); | |
2517 | if (su) | |
2518 | mds->mdcache->finish_uncommitted_slave_update(reqid, master); | |
2519 | break; | |
2520 | ||
2521 | default: | |
2522 | mds->clog->error() << "invalid op in ESlaveUpdate"; | |
2523 | mds->damaged(); | |
2524 | ceph_abort(); // Should be unreachable because damaged() calls respawn() | |
2525 | } | |
2526 | } | |
2527 | ||
2528 | ||
2529 | // ----------------------- | |
2530 | // ESubtreeMap | |
2531 | ||
2532 | void ESubtreeMap::encode(bufferlist& bl, uint64_t features) const | |
2533 | { | |
2534 | ENCODE_START(6, 5, bl); | |
2535 | ::encode(stamp, bl); | |
2536 | ::encode(metablob, bl, features); | |
2537 | ::encode(subtrees, bl); | |
2538 | ::encode(ambiguous_subtrees, bl); | |
2539 | ::encode(expire_pos, bl); | |
2540 | ::encode(event_seq, bl); | |
2541 | ENCODE_FINISH(bl); | |
2542 | } | |
2543 | ||
2544 | void ESubtreeMap::decode(bufferlist::iterator &bl) | |
2545 | { | |
2546 | DECODE_START_LEGACY_COMPAT_LEN(6, 5, 5, bl); | |
2547 | if (struct_v >= 2) | |
2548 | ::decode(stamp, bl); | |
2549 | ::decode(metablob, bl); | |
2550 | ::decode(subtrees, bl); | |
2551 | if (struct_v >= 4) | |
2552 | ::decode(ambiguous_subtrees, bl); | |
2553 | if (struct_v >= 3) | |
2554 | ::decode(expire_pos, bl); | |
2555 | if (struct_v >= 6) | |
2556 | ::decode(event_seq, bl); | |
2557 | DECODE_FINISH(bl); | |
2558 | } | |
2559 | ||
2560 | void ESubtreeMap::dump(Formatter *f) const | |
2561 | { | |
2562 | f->open_object_section("metablob"); | |
2563 | metablob.dump(f); | |
2564 | f->close_section(); // metablob | |
2565 | ||
2566 | f->open_array_section("subtrees"); | |
2567 | for(map<dirfrag_t,vector<dirfrag_t> >::const_iterator i = subtrees.begin(); | |
2568 | i != subtrees.end(); ++i) { | |
2569 | f->open_object_section("tree"); | |
2570 | f->dump_stream("root dirfrag") << i->first; | |
2571 | for (vector<dirfrag_t>::const_iterator j = i->second.begin(); | |
2572 | j != i->second.end(); ++j) { | |
2573 | f->dump_stream("bound dirfrag") << *j; | |
2574 | } | |
2575 | f->close_section(); // tree | |
2576 | } | |
2577 | f->close_section(); // subtrees | |
2578 | ||
2579 | f->open_array_section("ambiguous subtrees"); | |
2580 | for(set<dirfrag_t>::const_iterator i = ambiguous_subtrees.begin(); | |
2581 | i != ambiguous_subtrees.end(); ++i) { | |
2582 | f->dump_stream("dirfrag") << *i; | |
2583 | } | |
2584 | f->close_section(); // ambiguous subtrees | |
2585 | ||
2586 | f->dump_int("expire position", expire_pos); | |
2587 | } | |
2588 | ||
2589 | void ESubtreeMap::generate_test_instances(list<ESubtreeMap*>& ls) | |
2590 | { | |
2591 | ls.push_back(new ESubtreeMap()); | |
2592 | } | |
2593 | ||
2594 | void ESubtreeMap::replay(MDSRank *mds) | |
2595 | { | |
2596 | if (expire_pos && expire_pos > mds->mdlog->journaler->get_expire_pos()) | |
2597 | mds->mdlog->journaler->set_expire_pos(expire_pos); | |
2598 | ||
2599 | // suck up the subtree map? | |
2600 | if (mds->mdcache->is_subtrees()) { | |
2601 | dout(10) << "ESubtreeMap.replay -- i already have import map; verifying" << dendl; | |
2602 | int errors = 0; | |
2603 | ||
2604 | for (map<dirfrag_t, vector<dirfrag_t> >::iterator p = subtrees.begin(); | |
2605 | p != subtrees.end(); | |
2606 | ++p) { | |
2607 | CDir *dir = mds->mdcache->get_dirfrag(p->first); | |
2608 | if (!dir) { | |
2609 | mds->clog->error() << " replayed ESubtreeMap at " << get_start_off() | |
2610 | << " subtree root " << p->first << " not in cache"; | |
2611 | ++errors; | |
2612 | continue; | |
2613 | } | |
2614 | ||
2615 | if (!mds->mdcache->is_subtree(dir)) { | |
2616 | mds->clog->error() << " replayed ESubtreeMap at " << get_start_off() | |
2617 | << " subtree root " << p->first << " not a subtree in cache"; | |
2618 | ++errors; | |
2619 | continue; | |
2620 | } | |
2621 | if (dir->get_dir_auth().first != mds->get_nodeid()) { | |
2622 | mds->clog->error() << " replayed ESubtreeMap at " << get_start_off() | |
2623 | << " subtree root " << p->first | |
2624 | << " is not mine in cache (it's " << dir->get_dir_auth() << ")"; | |
2625 | ++errors; | |
2626 | continue; | |
2627 | } | |
2628 | ||
2629 | for (vector<dirfrag_t>::iterator q = p->second.begin(); q != p->second.end(); ++q) | |
2630 | mds->mdcache->get_force_dirfrag(*q, true); | |
2631 | ||
2632 | set<CDir*> bounds; | |
2633 | mds->mdcache->get_subtree_bounds(dir, bounds); | |
2634 | for (vector<dirfrag_t>::iterator q = p->second.begin(); q != p->second.end(); ++q) { | |
2635 | CDir *b = mds->mdcache->get_dirfrag(*q); | |
2636 | if (!b) { | |
2637 | mds->clog->error() << " replayed ESubtreeMap at " << get_start_off() | |
2638 | << " subtree " << p->first << " bound " << *q << " not in cache"; | |
2639 | ++errors; | |
2640 | continue; | |
2641 | } | |
2642 | if (bounds.count(b) == 0) { | |
2643 | mds->clog->error() << " replayed ESubtreeMap at " << get_start_off() | |
2644 | << " subtree " << p->first << " bound " << *q << " not a bound in cache"; | |
2645 | ++errors; | |
2646 | continue; | |
2647 | } | |
2648 | bounds.erase(b); | |
2649 | } | |
2650 | for (set<CDir*>::iterator q = bounds.begin(); q != bounds.end(); ++q) { | |
2651 | mds->clog->error() << " replayed ESubtreeMap at " << get_start_off() | |
2652 | << " subtree " << p->first << " has extra bound in cache " << (*q)->dirfrag(); | |
2653 | ++errors; | |
2654 | } | |
2655 | ||
2656 | if (ambiguous_subtrees.count(p->first)) { | |
2657 | if (!mds->mdcache->have_ambiguous_import(p->first)) { | |
2658 | mds->clog->error() << " replayed ESubtreeMap at " << get_start_off() | |
2659 | << " subtree " << p->first << " is ambiguous but is not in our cache"; | |
2660 | ++errors; | |
2661 | } | |
2662 | } else { | |
2663 | if (mds->mdcache->have_ambiguous_import(p->first)) { | |
2664 | mds->clog->error() << " replayed ESubtreeMap at " << get_start_off() | |
2665 | << " subtree " << p->first << " is not ambiguous but is in our cache"; | |
2666 | ++errors; | |
2667 | } | |
2668 | } | |
2669 | } | |
2670 | ||
2671 | list<CDir*> subs; | |
2672 | mds->mdcache->list_subtrees(subs); | |
2673 | for (list<CDir*>::iterator p = subs.begin(); p != subs.end(); ++p) { | |
2674 | CDir *dir = *p; | |
2675 | if (dir->get_dir_auth().first != mds->get_nodeid()) | |
2676 | continue; | |
2677 | if (subtrees.count(dir->dirfrag()) == 0) { | |
2678 | mds->clog->error() << " replayed ESubtreeMap at " << get_start_off() | |
2679 | << " does not include cache subtree " << dir->dirfrag(); | |
2680 | ++errors; | |
2681 | } | |
2682 | } | |
2683 | ||
2684 | if (errors) { | |
2685 | dout(0) << "journal subtrees: " << subtrees << dendl; | |
2686 | dout(0) << "journal ambig_subtrees: " << ambiguous_subtrees << dendl; | |
2687 | mds->mdcache->show_subtrees(); | |
2688 | assert(!g_conf->mds_debug_subtrees || errors == 0); | |
2689 | } | |
2690 | return; | |
2691 | } | |
2692 | ||
2693 | dout(10) << "ESubtreeMap.replay -- reconstructing (auth) subtree spanning tree" << dendl; | |
2694 | ||
2695 | // first, stick the spanning tree in my cache | |
2696 | //metablob.print(*_dout); | |
2697 | metablob.replay(mds, _segment); | |
2698 | ||
2699 | // restore import/export maps | |
2700 | for (map<dirfrag_t, vector<dirfrag_t> >::iterator p = subtrees.begin(); | |
2701 | p != subtrees.end(); | |
2702 | ++p) { | |
2703 | CDir *dir = mds->mdcache->get_dirfrag(p->first); | |
2704 | assert(dir); | |
2705 | if (ambiguous_subtrees.count(p->first)) { | |
2706 | // ambiguous! | |
2707 | mds->mdcache->add_ambiguous_import(p->first, p->second); | |
2708 | mds->mdcache->adjust_bounded_subtree_auth(dir, p->second, | |
2709 | mds_authority_t(mds->get_nodeid(), mds->get_nodeid())); | |
2710 | } else { | |
2711 | // not ambiguous | |
2712 | mds->mdcache->adjust_bounded_subtree_auth(dir, p->second, mds->get_nodeid()); | |
2713 | } | |
2714 | } | |
2715 | ||
2716 | mds->mdcache->recalc_auth_bits(true); | |
2717 | ||
2718 | mds->mdcache->show_subtrees(); | |
2719 | } | |
2720 | ||
2721 | ||
2722 | ||
2723 | // ----------------------- | |
2724 | // EFragment | |
2725 | ||
2726 | void EFragment::replay(MDSRank *mds) | |
2727 | { | |
2728 | dout(10) << "EFragment.replay " << op_name(op) << " " << ino << " " << basefrag << " by " << bits << dendl; | |
2729 | ||
2730 | list<CDir*> resultfrags; | |
2731 | list<MDSInternalContextBase*> waiters; | |
2732 | list<frag_t> old_frags; | |
2733 | ||
2734 | // in may be NULL if it wasn't in our cache yet. if it's a prepare | |
2735 | // it will be once we replay the metablob , but first we need to | |
2736 | // refragment anything we already have in the cache. | |
2737 | CInode *in = mds->mdcache->get_inode(ino); | |
2738 | ||
2739 | switch (op) { | |
2740 | case OP_PREPARE: | |
2741 | mds->mdcache->add_uncommitted_fragment(dirfrag_t(ino, basefrag), bits, orig_frags, _segment, &rollback); | |
2742 | ||
2743 | if (in) | |
2744 | mds->mdcache->adjust_dir_fragments(in, basefrag, bits, resultfrags, waiters, true); | |
2745 | break; | |
2746 | ||
2747 | case OP_ROLLBACK: | |
2748 | if (in) { | |
2749 | in->dirfragtree.get_leaves_under(basefrag, old_frags); | |
2750 | if (orig_frags.empty()) { | |
2751 | // old format EFragment | |
2752 | mds->mdcache->adjust_dir_fragments(in, basefrag, -bits, resultfrags, waiters, true); | |
2753 | } else { | |
2754 | for (list<frag_t>::iterator p = orig_frags.begin(); p != orig_frags.end(); ++p) | |
2755 | mds->mdcache->force_dir_fragment(in, *p); | |
2756 | } | |
2757 | } | |
2758 | mds->mdcache->rollback_uncommitted_fragment(dirfrag_t(ino, basefrag), old_frags); | |
2759 | break; | |
2760 | ||
2761 | case OP_COMMIT: | |
2762 | case OP_FINISH: | |
2763 | mds->mdcache->finish_uncommitted_fragment(dirfrag_t(ino, basefrag), op); | |
2764 | break; | |
2765 | ||
2766 | default: | |
2767 | ceph_abort(); | |
2768 | } | |
2769 | ||
2770 | metablob.replay(mds, _segment); | |
2771 | if (in && g_conf->mds_debug_frag) | |
2772 | in->verify_dirfrags(); | |
2773 | } | |
2774 | ||
2775 | void EFragment::encode(bufferlist &bl, uint64_t features) const { | |
2776 | ENCODE_START(5, 4, bl); | |
2777 | ::encode(stamp, bl); | |
2778 | ::encode(op, bl); | |
2779 | ::encode(ino, bl); | |
2780 | ::encode(basefrag, bl); | |
2781 | ::encode(bits, bl); | |
2782 | ::encode(metablob, bl, features); | |
2783 | ::encode(orig_frags, bl); | |
2784 | ::encode(rollback, bl); | |
2785 | ENCODE_FINISH(bl); | |
2786 | } | |
2787 | ||
2788 | void EFragment::decode(bufferlist::iterator &bl) { | |
2789 | DECODE_START_LEGACY_COMPAT_LEN(5, 4, 4, bl); | |
2790 | if (struct_v >= 2) | |
2791 | ::decode(stamp, bl); | |
2792 | if (struct_v >= 3) | |
2793 | ::decode(op, bl); | |
2794 | ::decode(ino, bl); | |
2795 | ::decode(basefrag, bl); | |
2796 | ::decode(bits, bl); | |
2797 | ::decode(metablob, bl); | |
2798 | if (struct_v >= 5) { | |
2799 | ::decode(orig_frags, bl); | |
2800 | ::decode(rollback, bl); | |
2801 | } | |
2802 | DECODE_FINISH(bl); | |
2803 | } | |
2804 | ||
2805 | void EFragment::dump(Formatter *f) const | |
2806 | { | |
2807 | /*f->open_object_section("Metablob"); | |
2808 | metablob.dump(f); // sadly we don't have this; dunno if we'll get it | |
2809 | f->close_section();*/ | |
2810 | f->dump_string("op", op_name(op)); | |
2811 | f->dump_stream("ino") << ino; | |
2812 | f->dump_stream("base frag") << basefrag; | |
2813 | f->dump_int("bits", bits); | |
2814 | } | |
2815 | ||
2816 | void EFragment::generate_test_instances(list<EFragment*>& ls) | |
2817 | { | |
2818 | ls.push_back(new EFragment); | |
2819 | ls.push_back(new EFragment); | |
2820 | ls.back()->op = OP_PREPARE; | |
2821 | ls.back()->ino = 1; | |
2822 | ls.back()->bits = 5; | |
2823 | } | |
2824 | ||
2825 | void dirfrag_rollback::encode(bufferlist &bl) const | |
2826 | { | |
2827 | ENCODE_START(1, 1, bl); | |
2828 | ::encode(fnode, bl); | |
2829 | ENCODE_FINISH(bl); | |
2830 | } | |
2831 | ||
2832 | void dirfrag_rollback::decode(bufferlist::iterator &bl) | |
2833 | { | |
2834 | DECODE_START(1, bl); | |
2835 | ::decode(fnode, bl); | |
2836 | DECODE_FINISH(bl); | |
2837 | } | |
2838 | ||
2839 | ||
2840 | ||
2841 | // ========================================================================= | |
2842 | ||
2843 | // ----------------------- | |
2844 | // EExport | |
2845 | ||
2846 | void EExport::replay(MDSRank *mds) | |
2847 | { | |
2848 | dout(10) << "EExport.replay " << base << dendl; | |
2849 | metablob.replay(mds, _segment); | |
2850 | ||
2851 | CDir *dir = mds->mdcache->get_dirfrag(base); | |
2852 | assert(dir); | |
2853 | ||
2854 | set<CDir*> realbounds; | |
2855 | for (set<dirfrag_t>::iterator p = bounds.begin(); | |
2856 | p != bounds.end(); | |
2857 | ++p) { | |
2858 | CDir *bd = mds->mdcache->get_dirfrag(*p); | |
2859 | assert(bd); | |
2860 | realbounds.insert(bd); | |
2861 | } | |
2862 | ||
2863 | // adjust auth away | |
2864 | mds->mdcache->adjust_bounded_subtree_auth(dir, realbounds, CDIR_AUTH_UNDEF); | |
2865 | ||
2866 | mds->mdcache->try_trim_non_auth_subtree(dir); | |
2867 | } | |
2868 | ||
2869 | void EExport::encode(bufferlist& bl, uint64_t features) const | |
2870 | { | |
31f18b77 | 2871 | ENCODE_START(4, 3, bl); |
7c673cae FG |
2872 | ::encode(stamp, bl); |
2873 | ::encode(metablob, bl, features); | |
2874 | ::encode(base, bl); | |
2875 | ::encode(bounds, bl); | |
31f18b77 | 2876 | ::encode(target, bl); |
7c673cae FG |
2877 | ENCODE_FINISH(bl); |
2878 | } | |
2879 | ||
2880 | void EExport::decode(bufferlist::iterator &bl) | |
2881 | { | |
2882 | DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl); | |
2883 | if (struct_v >= 2) | |
2884 | ::decode(stamp, bl); | |
2885 | ::decode(metablob, bl); | |
2886 | ::decode(base, bl); | |
2887 | ::decode(bounds, bl); | |
31f18b77 FG |
2888 | if (struct_v >= 4) |
2889 | ::decode(target, bl); | |
7c673cae FG |
2890 | DECODE_FINISH(bl); |
2891 | } | |
2892 | ||
2893 | void EExport::dump(Formatter *f) const | |
2894 | { | |
2895 | f->dump_float("stamp", (double)stamp); | |
2896 | /*f->open_object_section("Metablob"); | |
2897 | metablob.dump(f); // sadly we don't have this; dunno if we'll get it | |
2898 | f->close_section();*/ | |
2899 | f->dump_stream("base dirfrag") << base; | |
2900 | f->open_array_section("bounds dirfrags"); | |
2901 | for (set<dirfrag_t>::const_iterator i = bounds.begin(); | |
2902 | i != bounds.end(); ++i) { | |
2903 | f->dump_stream("dirfrag") << *i; | |
2904 | } | |
2905 | f->close_section(); // bounds dirfrags | |
2906 | } | |
2907 | ||
2908 | void EExport::generate_test_instances(list<EExport*>& ls) | |
2909 | { | |
2910 | EExport *sample = new EExport(); | |
2911 | ls.push_back(sample); | |
2912 | } | |
2913 | ||
2914 | ||
2915 | // ----------------------- | |
2916 | // EImportStart | |
2917 | ||
2918 | void EImportStart::update_segment() | |
2919 | { | |
2920 | _segment->sessionmapv = cmapv; | |
2921 | } | |
2922 | ||
2923 | void EImportStart::replay(MDSRank *mds) | |
2924 | { | |
2925 | dout(10) << "EImportStart.replay " << base << " bounds " << bounds << dendl; | |
2926 | //metablob.print(*_dout); | |
2927 | metablob.replay(mds, _segment); | |
2928 | ||
2929 | // put in ambiguous import list | |
2930 | mds->mdcache->add_ambiguous_import(base, bounds); | |
2931 | ||
2932 | // set auth partially to us so we don't trim it | |
2933 | CDir *dir = mds->mdcache->get_dirfrag(base); | |
2934 | assert(dir); | |
2935 | ||
2936 | set<CDir*> realbounds; | |
2937 | for (vector<dirfrag_t>::iterator p = bounds.begin(); | |
2938 | p != bounds.end(); | |
2939 | ++p) { | |
2940 | CDir *bd = mds->mdcache->get_dirfrag(*p); | |
2941 | assert(bd); | |
2942 | if (!bd->is_subtree_root()) | |
2943 | bd->state_clear(CDir::STATE_AUTH); | |
2944 | realbounds.insert(bd); | |
2945 | } | |
2946 | ||
2947 | mds->mdcache->adjust_bounded_subtree_auth(dir, realbounds, | |
2948 | mds_authority_t(mds->get_nodeid(), mds->get_nodeid())); | |
2949 | ||
2950 | // open client sessions? | |
2951 | if (mds->sessionmap.get_version() >= cmapv) { | |
2952 | dout(10) << "EImportStart.replay sessionmap " << mds->sessionmap.get_version() | |
2953 | << " >= " << cmapv << ", noop" << dendl; | |
2954 | } else { | |
2955 | dout(10) << "EImportStart.replay sessionmap " << mds->sessionmap.get_version() | |
2956 | << " < " << cmapv << dendl; | |
2957 | map<client_t,entity_inst_t> cm; | |
2958 | bufferlist::iterator blp = client_map.begin(); | |
2959 | ::decode(cm, blp); | |
28e407b8 | 2960 | mds->sessionmap.replay_open_sessions(cm); |
b32b8144 FG |
2961 | if (mds->sessionmap.get_version() != cmapv) |
2962 | { | |
2963 | derr << "sessionmap version " << mds->sessionmap.get_version() | |
2964 | << " != cmapv " << cmapv << dendl; | |
2965 | mds->clog->error() << "failure replaying journal (EImportStart)"; | |
2966 | mds->damaged(); | |
2967 | ceph_abort(); // Should be unreachable because damaged() calls respawn() | |
2968 | } | |
7c673cae FG |
2969 | } |
2970 | update_segment(); | |
2971 | } | |
2972 | ||
2973 | void EImportStart::encode(bufferlist &bl, uint64_t features) const { | |
31f18b77 | 2974 | ENCODE_START(4, 3, bl); |
7c673cae FG |
2975 | ::encode(stamp, bl); |
2976 | ::encode(base, bl); | |
2977 | ::encode(metablob, bl, features); | |
2978 | ::encode(bounds, bl); | |
2979 | ::encode(cmapv, bl); | |
2980 | ::encode(client_map, bl); | |
31f18b77 | 2981 | ::encode(from, bl); |
7c673cae FG |
2982 | ENCODE_FINISH(bl); |
2983 | } | |
2984 | ||
2985 | void EImportStart::decode(bufferlist::iterator &bl) { | |
2986 | DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl); | |
2987 | if (struct_v >= 2) | |
2988 | ::decode(stamp, bl); | |
2989 | ::decode(base, bl); | |
2990 | ::decode(metablob, bl); | |
2991 | ::decode(bounds, bl); | |
2992 | ::decode(cmapv, bl); | |
2993 | ::decode(client_map, bl); | |
31f18b77 FG |
2994 | if (struct_v >= 4) |
2995 | ::decode(from, bl); | |
7c673cae FG |
2996 | DECODE_FINISH(bl); |
2997 | } | |
2998 | ||
2999 | void EImportStart::dump(Formatter *f) const | |
3000 | { | |
3001 | f->dump_stream("base dirfrag") << base; | |
3002 | f->open_array_section("boundary dirfrags"); | |
3003 | for (vector<dirfrag_t>::const_iterator iter = bounds.begin(); | |
3004 | iter != bounds.end(); ++iter) { | |
3005 | f->dump_stream("frag") << *iter; | |
3006 | } | |
3007 | f->close_section(); | |
3008 | } | |
3009 | ||
3010 | void EImportStart::generate_test_instances(list<EImportStart*>& ls) | |
3011 | { | |
3012 | ls.push_back(new EImportStart); | |
3013 | } | |
3014 | ||
3015 | // ----------------------- | |
3016 | // EImportFinish | |
3017 | ||
3018 | void EImportFinish::replay(MDSRank *mds) | |
3019 | { | |
3020 | if (mds->mdcache->have_ambiguous_import(base)) { | |
3021 | dout(10) << "EImportFinish.replay " << base << " success=" << success << dendl; | |
3022 | if (success) { | |
3023 | mds->mdcache->finish_ambiguous_import(base); | |
3024 | } else { | |
3025 | CDir *dir = mds->mdcache->get_dirfrag(base); | |
3026 | assert(dir); | |
3027 | vector<dirfrag_t> bounds; | |
3028 | mds->mdcache->get_ambiguous_import_bounds(base, bounds); | |
3029 | mds->mdcache->adjust_bounded_subtree_auth(dir, bounds, CDIR_AUTH_UNDEF); | |
3030 | mds->mdcache->cancel_ambiguous_import(dir); | |
3031 | mds->mdcache->try_trim_non_auth_subtree(dir); | |
3032 | } | |
3033 | } else { | |
3034 | // this shouldn't happen unless this is an old journal | |
3035 | dout(10) << "EImportFinish.replay " << base << " success=" << success | |
3036 | << " on subtree not marked as ambiguous" | |
3037 | << dendl; | |
3038 | mds->clog->error() << "failure replaying journal (EImportFinish)"; | |
3039 | mds->damaged(); | |
3040 | ceph_abort(); // Should be unreachable because damaged() calls respawn() | |
3041 | } | |
3042 | } | |
3043 | ||
3044 | void EImportFinish::encode(bufferlist& bl, uint64_t features) const | |
3045 | { | |
3046 | ENCODE_START(3, 3, bl); | |
3047 | ::encode(stamp, bl); | |
3048 | ::encode(base, bl); | |
3049 | ::encode(success, bl); | |
3050 | ENCODE_FINISH(bl); | |
3051 | } | |
3052 | ||
3053 | void EImportFinish::decode(bufferlist::iterator &bl) | |
3054 | { | |
3055 | DECODE_START_LEGACY_COMPAT_LEN(3, 3, 3, bl); | |
3056 | if (struct_v >= 2) | |
3057 | ::decode(stamp, bl); | |
3058 | ::decode(base, bl); | |
3059 | ::decode(success, bl); | |
3060 | DECODE_FINISH(bl); | |
3061 | } | |
3062 | ||
3063 | void EImportFinish::dump(Formatter *f) const | |
3064 | { | |
3065 | f->dump_stream("base dirfrag") << base; | |
3066 | f->dump_string("success", success ? "true" : "false"); | |
3067 | } | |
3068 | void EImportFinish::generate_test_instances(list<EImportFinish*>& ls) | |
3069 | { | |
3070 | ls.push_back(new EImportFinish); | |
3071 | ls.push_back(new EImportFinish); | |
3072 | ls.back()->success = true; | |
3073 | } | |
3074 | ||
3075 | ||
3076 | // ------------------------ | |
3077 | // EResetJournal | |
3078 | ||
3079 | void EResetJournal::encode(bufferlist& bl, uint64_t features) const | |
3080 | { | |
3081 | ENCODE_START(2, 2, bl); | |
3082 | ::encode(stamp, bl); | |
3083 | ENCODE_FINISH(bl); | |
3084 | } | |
3085 | ||
3086 | void EResetJournal::decode(bufferlist::iterator &bl) | |
3087 | { | |
3088 | DECODE_START_LEGACY_COMPAT_LEN(2, 2, 2, bl); | |
3089 | ::decode(stamp, bl); | |
3090 | DECODE_FINISH(bl); | |
3091 | } | |
3092 | ||
3093 | void EResetJournal::dump(Formatter *f) const | |
3094 | { | |
3095 | f->dump_stream("timestamp") << stamp; | |
3096 | } | |
3097 | ||
3098 | void EResetJournal::generate_test_instances(list<EResetJournal*>& ls) | |
3099 | { | |
3100 | ls.push_back(new EResetJournal()); | |
3101 | } | |
3102 | ||
3103 | void EResetJournal::replay(MDSRank *mds) | |
3104 | { | |
3105 | dout(1) << "EResetJournal" << dendl; | |
3106 | ||
3107 | mds->sessionmap.wipe(); | |
3108 | mds->inotable->replay_reset(); | |
3109 | ||
3110 | if (mds->mdsmap->get_root() == mds->get_nodeid()) { | |
3111 | CDir *rootdir = mds->mdcache->get_root()->get_or_open_dirfrag(mds->mdcache, frag_t()); | |
3112 | mds->mdcache->adjust_subtree_auth(rootdir, mds->get_nodeid()); | |
3113 | } | |
3114 | ||
3115 | CDir *mydir = mds->mdcache->get_myin()->get_or_open_dirfrag(mds->mdcache, frag_t()); | |
3116 | mds->mdcache->adjust_subtree_auth(mydir, mds->get_nodeid()); | |
3117 | ||
3118 | mds->mdcache->recalc_auth_bits(true); | |
3119 | ||
3120 | mds->mdcache->show_subtrees(); | |
3121 | } | |
3122 | ||
3123 | ||
3124 | void ENoOp::encode(bufferlist &bl, uint64_t features) const | |
3125 | { | |
3126 | ENCODE_START(2, 2, bl); | |
3127 | ::encode(pad_size, bl); | |
3128 | uint8_t const pad = 0xff; | |
3129 | for (unsigned int i = 0; i < pad_size; ++i) { | |
3130 | ::encode(pad, bl); | |
3131 | } | |
3132 | ENCODE_FINISH(bl); | |
3133 | } | |
3134 | ||
3135 | ||
3136 | void ENoOp::decode(bufferlist::iterator &bl) | |
3137 | { | |
3138 | DECODE_START(2, bl); | |
3139 | ::decode(pad_size, bl); | |
3140 | if (bl.get_remaining() != pad_size) { | |
3141 | // This is spiritually an assertion, but expressing in a way that will let | |
3142 | // journal debug tools catch it and recognise a malformed entry. | |
3143 | throw buffer::end_of_buffer(); | |
3144 | } else { | |
3145 | bl.advance(pad_size); | |
3146 | } | |
3147 | DECODE_FINISH(bl); | |
3148 | } | |
3149 | ||
3150 | ||
3151 | void ENoOp::replay(MDSRank *mds) | |
3152 | { | |
3153 | dout(4) << "ENoOp::replay, " << pad_size << " bytes skipped in journal" << dendl; | |
3154 | } | |
3155 | ||
3156 | /** | |
3157 | * If re-formatting an old journal that used absolute log position | |
3158 | * references as segment sequence numbers, use this function to update | |
3159 | * it. | |
3160 | * | |
3161 | * @param mds | |
3162 | * MDSRank instance, just used for logging | |
3163 | * @param old_to_new | |
3164 | * Map of old journal segment sequence numbers to new journal segment sequence numbers | |
3165 | * | |
3166 | * @return | |
3167 | * True if the event was modified. | |
3168 | */ | |
3169 | bool EMetaBlob::rewrite_truncate_finish(MDSRank const *mds, | |
3170 | std::map<log_segment_seq_t, log_segment_seq_t> const &old_to_new) | |
3171 | { | |
3172 | bool modified = false; | |
3173 | map<inodeno_t, log_segment_seq_t> new_trunc_finish; | |
3174 | for (std::map<inodeno_t, log_segment_seq_t>::iterator i = truncate_finish.begin(); | |
3175 | i != truncate_finish.end(); ++i) { | |
3176 | if (old_to_new.count(i->second)) { | |
3177 | dout(20) << __func__ << " applying segment seq mapping " | |
3178 | << i->second << " -> " << old_to_new.find(i->second)->second << dendl; | |
3179 | new_trunc_finish[i->first] = old_to_new.find(i->second)->second; | |
3180 | modified = true; | |
3181 | } else { | |
3182 | dout(20) << __func__ << " no segment seq mapping found for " | |
3183 | << i->second << dendl; | |
3184 | new_trunc_finish[i->first] = i->second; | |
3185 | } | |
3186 | } | |
3187 | truncate_finish = new_trunc_finish; | |
3188 | ||
3189 | return modified; | |
3190 | } |