]>
git.proxmox.com Git - ceph.git/blob - ceph/src/osdc/Filer.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
19 #include "osd/OSDMap.h"
22 #include "messages/MOSDOp.h"
23 #include "messages/MOSDOpReply.h"
24 #include "messages/MOSDMap.h"
26 #include "msg/Messenger.h"
28 #include "include/Context.h"
30 #include "common/Finisher.h"
31 #include "common/config.h"
33 #define dout_subsys ceph_subsys_filer
35 #define dout_prefix *_dout << objecter->messenger->get_myname() << ".filer "
37 class Filer::C_Probe
: public Context
{
43 ceph::real_time mtime
;
44 C_Probe(Filer
*f
, Probe
*p
, object_t o
) : filer(f
), probe(p
), oid(o
),
46 void finish(int r
) override
{
54 Probe::unique_lock
pl(probe
->lock
);
59 probe_complete
= filer
->_probed(probe
, oid
, size
, mtime
, pl
);
60 assert(!pl
.owns_lock());
63 probe
->onfinish
->complete(probe
->err
);
69 int Filer::probe(inodeno_t ino
,
70 file_layout_t
*layout
,
73 uint64_t *end
, // LB, when !fwd
74 ceph::real_time
*pmtime
,
79 ldout(cct
, 10) << "probe " << (fwd
? "fwd ":"bwd ")
81 << " starting from " << start_from
84 assert(snapid
); // (until there is a non-NOSNAP write)
86 Probe
*probe
= new Probe(ino
, *layout
, snapid
, start_from
, end
, pmtime
,
87 flags
, fwd
, onfinish
);
89 return probe_impl(probe
, layout
, start_from
, end
);
92 int Filer::probe(inodeno_t ino
,
93 file_layout_t
*layout
,
96 uint64_t *end
, // LB, when !fwd
102 ldout(cct
, 10) << "probe " << (fwd
? "fwd ":"bwd ")
104 << " starting from " << start_from
107 assert(snapid
); // (until there is a non-NOSNAP write)
109 Probe
*probe
= new Probe(ino
, *layout
, snapid
, start_from
, end
, pmtime
,
110 flags
, fwd
, onfinish
);
111 return probe_impl(probe
, layout
, start_from
, end
);
114 int Filer::probe_impl(Probe
* probe
, file_layout_t
*layout
,
115 uint64_t start_from
, uint64_t *end
) // LB, when !fwd
117 // period (bytes before we jump unto a new set of object(s))
118 uint64_t period
= layout
->get_period();
120 // start with 1+ periods.
121 probe
->probing_len
= period
;
123 if (start_from
% period
)
124 probe
->probing_len
+= period
- (start_from
% period
);
126 assert(start_from
> *end
);
127 if (start_from
% period
)
128 probe
->probing_len
-= period
- (start_from
% period
);
129 probe
->probing_off
-= probe
->probing_len
;
132 Probe::unique_lock
pl(probe
->lock
);
134 assert(!pl
.owns_lock());
142 * probe->lock must be initially locked, this function will release it
144 void Filer::_probe(Probe
*probe
, Probe::unique_lock
& pl
)
146 assert(pl
.owns_lock() && pl
.mutex() == &probe
->lock
);
148 ldout(cct
, 10) << "_probe " << hex
<< probe
->ino
<< dec
149 << " " << probe
->probing_off
<< "~" << probe
->probing_len
152 // map range onto objects
153 probe
->known_size
.clear();
154 probe
->probing
.clear();
155 Striper::file_to_extents(cct
, probe
->ino
, &probe
->layout
, probe
->probing_off
,
156 probe
->probing_len
, 0, probe
->probing
);
158 std::vector
<ObjectExtent
> stat_extents
;
159 for (vector
<ObjectExtent
>::iterator p
= probe
->probing
.begin();
160 p
!= probe
->probing
.end();
162 ldout(cct
, 10) << "_probe probing " << p
->oid
<< dendl
;
163 probe
->ops
.insert(p
->oid
);
164 stat_extents
.push_back(*p
);
168 for (std::vector
<ObjectExtent
>::iterator i
= stat_extents
.begin();
169 i
!= stat_extents
.end(); ++i
) {
170 C_Probe
*c
= new C_Probe(this, probe
, i
->oid
);
171 objecter
->stat(i
->oid
, i
->oloc
, probe
->snapid
, &c
->size
, &c
->mtime
,
172 probe
->flags
| CEPH_OSD_FLAG_RWORDERED
,
173 new C_OnFinisher(c
, finisher
));
178 * probe->lock must be initially held, and will be released by this function.
180 * @return true if probe is complete and Probe object may be freed.
182 bool Filer::_probed(Probe
*probe
, const object_t
& oid
, uint64_t size
,
183 ceph::real_time mtime
, Probe::unique_lock
& pl
)
185 assert(pl
.owns_lock() && pl
.mutex() == &probe
->lock
);
187 ldout(cct
, 10) << "_probed " << probe
->ino
<< " object " << oid
188 << " has size " << size
<< " mtime " << mtime
<< dendl
;
190 probe
->known_size
[oid
] = size
;
191 if (mtime
> probe
->max_mtime
)
192 probe
->max_mtime
= mtime
;
194 assert(probe
->ops
.count(oid
));
195 probe
->ops
.erase(oid
);
197 if (!probe
->ops
.empty()) {
199 return false; // waiting for more!
202 if (probe
->err
) { // we hit an error, propagate back up
211 std::reverse(probe
->probing
.begin(), probe
->probing
.end());
214 for (vector
<ObjectExtent
>::iterator p
= probe
->probing
.begin();
215 p
!= probe
->probing
.end();
217 uint64_t shouldbe
= p
->length
+ p
->offset
;
218 ldout(cct
, 10) << "_probed " << probe
->ino
<< " object " << hex
219 << p
->oid
<< dec
<< " should be " << shouldbe
220 << ", actual is " << probe
->known_size
[p
->oid
]
223 if (!probe
->found_size
) {
224 assert(probe
->known_size
[p
->oid
] <= shouldbe
);
226 if ((probe
->fwd
&& probe
->known_size
[p
->oid
] == shouldbe
) ||
227 (!probe
->fwd
&& probe
->known_size
[p
->oid
] == 0 &&
228 probe
->probing_off
> 0))
229 continue; // keep going
231 // aha, we found the end!
232 // calc offset into buffer_extent to get distance from probe->from.
233 uint64_t oleft
= probe
->known_size
[p
->oid
] - p
->offset
;
234 for (vector
<pair
<uint64_t, uint64_t> >::iterator i
235 = p
->buffer_extents
.begin();
236 i
!= p
->buffer_extents
.end();
238 if (oleft
<= (uint64_t)i
->second
) {
239 end
= probe
->probing_off
+ i
->first
+ oleft
;
240 ldout(cct
, 10) << "_probed end is in buffer_extent " << i
->first
241 << "~" << i
->second
<< " off " << oleft
242 << ", from was " << probe
->probing_off
<< ", end is "
245 probe
->found_size
= true;
246 ldout(cct
, 10) << "_probed found size at " << end
<< dendl
;
249 if (!probe
->pmtime
&&
250 !probe
->pumtime
) // stop if we don't need mtime too
259 if (!probe
->found_size
|| (probe
->probing_off
&& (probe
->pmtime
||
262 ldout(cct
, 10) << "_probed probing further" << dendl
;
264 uint64_t period
= probe
->layout
.get_period();
266 probe
->probing_off
+= probe
->probing_len
;
267 assert(probe
->probing_off
% period
== 0);
268 probe
->probing_len
= period
;
271 assert(probe
->probing_off
% period
== 0);
272 probe
->probing_len
= period
;
273 probe
->probing_off
-= period
;
276 assert(!pl
.owns_lock());
278 } else if (probe
->pmtime
) {
279 ldout(cct
, 10) << "_probed found mtime " << probe
->max_mtime
<< dendl
;
280 *probe
->pmtime
= probe
->max_mtime
;
281 } else if (probe
->pumtime
) {
282 ldout(cct
, 10) << "_probed found mtime " << probe
->max_mtime
<< dendl
;
283 *probe
->pumtime
= ceph::real_clock::to_ceph_timespec(probe
->max_mtime
);
291 // -----------------------
295 typedef std::lock_guard
<std::mutex
> lock_guard
;
296 typedef std::unique_lock
<std::mutex
> unique_lock
;
298 file_layout_t layout
;
301 ceph::real_time mtime
;
305 PurgeRange(inodeno_t i
, const file_layout_t
& l
, const SnapContext
& sc
,
306 uint64_t fo
, uint64_t no
, ceph::real_time t
, int fl
,
308 : ino(i
), layout(l
), snapc(sc
), first(fo
), num(no
), mtime(t
), flags(fl
),
309 oncommit(fin
), uncommitted(0) {}
312 int Filer::purge_range(inodeno_t ino
,
313 const file_layout_t
*layout
,
314 const SnapContext
& snapc
,
315 uint64_t first_obj
, uint64_t num_obj
,
316 ceph::real_time mtime
,
322 // single object? easy!
324 object_t oid
= file_object_t(ino
, first_obj
);
325 object_locator_t oloc
= OSDMap::file_to_object_locator(*layout
);
326 objecter
->remove(oid
, oloc
, snapc
, mtime
, flags
, oncommit
);
330 PurgeRange
*pr
= new PurgeRange(ino
, *layout
, snapc
, first_obj
,
331 num_obj
, mtime
, flags
, oncommit
);
333 _do_purge_range(pr
, 0);
337 struct C_PurgeRange
: public Context
{
340 C_PurgeRange(Filer
*f
, PurgeRange
*p
) : filer(f
), pr(p
) {}
341 void finish(int r
) override
{
342 filer
->_do_purge_range(pr
, 1);
346 void Filer::_do_purge_range(PurgeRange
*pr
, int fin
)
348 PurgeRange::unique_lock
prl(pr
->lock
);
349 pr
->uncommitted
-= fin
;
350 ldout(cct
, 10) << "_do_purge_range " << pr
->ino
<< " objects " << pr
->first
351 << "~" << pr
->num
<< " uncommitted " << pr
->uncommitted
354 if (pr
->num
== 0 && pr
->uncommitted
== 0) {
355 pr
->oncommit
->complete(0);
361 std::vector
<object_t
> remove_oids
;
363 int max
= cct
->_conf
->filer_max_purge_ops
- pr
->uncommitted
;
364 while (pr
->num
> 0 && max
> 0) {
365 remove_oids
.push_back(file_object_t(pr
->ino
, pr
->first
));
373 // Issue objecter ops outside pr->lock to avoid lock dependency loop
374 for (const auto& oid
: remove_oids
) {
375 object_locator_t oloc
= OSDMap::file_to_object_locator(pr
->layout
);
376 objecter
->remove(oid
, oloc
, pr
->snapc
, pr
->mtime
, pr
->flags
,
377 new C_OnFinisher(new C_PurgeRange(this, pr
), finisher
));
381 // -----------------------
384 typedef std::lock_guard
<std::mutex
> lock_guard
;
385 typedef std::unique_lock
<std::mutex
> unique_lock
;
387 file_layout_t layout
;
389 ceph::real_time mtime
;
395 uint32_t truncate_seq
;
396 TruncRange(inodeno_t i
, const file_layout_t
& l
, const SnapContext
& sc
,
397 ceph::real_time t
, int fl
, Context
*fin
,
398 uint64_t off
, uint64_t len
, uint32_t ts
)
399 : ino(i
), layout(l
), snapc(sc
), mtime(t
), flags(fl
), oncommit(fin
),
400 uncommitted(0), offset(off
), length(len
), truncate_seq(ts
) {}
403 void Filer::truncate(inodeno_t ino
,
404 file_layout_t
*layout
,
405 const SnapContext
& snapc
,
409 ceph::real_time mtime
,
413 uint64_t period
= layout
->get_period();
414 uint64_t num_objs
= Striper::get_num_objects(*layout
, len
+ (offset
% period
));
416 vector
<ObjectExtent
> extents
;
417 Striper::file_to_extents(cct
, ino
, layout
, offset
, len
, 0, extents
);
418 vector
<OSDOp
> ops(1);
419 ops
[0].op
.op
= CEPH_OSD_OP_TRIMTRUNC
;
420 ops
[0].op
.extent
.truncate_seq
= truncate_seq
;
421 ops
[0].op
.extent
.truncate_size
= extents
[0].offset
;
422 objecter
->_modify(extents
[0].oid
, extents
[0].oloc
, ops
, mtime
, snapc
,
427 if (len
> 0 && (offset
+ len
) % period
)
428 len
+= period
- ((offset
+ len
) % period
);
430 TruncRange
*tr
= new TruncRange(ino
, *layout
, snapc
, mtime
, flags
, oncommit
,
431 offset
, len
, truncate_seq
);
432 _do_truncate_range(tr
, 0);
435 struct C_TruncRange
: public Context
{
438 C_TruncRange(Filer
*f
, TruncRange
*t
) : filer(f
), tr(t
) {}
439 void finish(int r
) override
{
440 filer
->_do_truncate_range(tr
, 1);
444 void Filer::_do_truncate_range(TruncRange
*tr
, int fin
)
446 TruncRange::unique_lock
trl(tr
->lock
);
447 tr
->uncommitted
-= fin
;
448 ldout(cct
, 10) << "_do_truncate_range " << tr
->ino
<< " objects " << tr
->offset
449 << "~" << tr
->length
<< " uncommitted " << tr
->uncommitted
452 if (tr
->length
== 0 && tr
->uncommitted
== 0) {
453 tr
->oncommit
->complete(0);
459 vector
<ObjectExtent
> extents
;
461 int max
= cct
->_conf
->filer_max_truncate_ops
- tr
->uncommitted
;
462 if (max
> 0 && tr
->length
> 0) {
463 uint64_t len
= tr
->layout
.get_period() * max
;
464 if (len
> tr
->length
)
467 uint64_t offset
= tr
->offset
+ tr
->length
- len
;
468 Striper::file_to_extents(cct
, tr
->ino
, &tr
->layout
, offset
, len
, 0, extents
);
469 tr
->uncommitted
+= extents
.size();
475 // Issue objecter ops outside tr->lock to avoid lock dependency loop
476 for (const auto& p
: extents
) {
477 vector
<OSDOp
> ops(1);
478 ops
[0].op
.op
= CEPH_OSD_OP_TRIMTRUNC
;
479 ops
[0].op
.extent
.truncate_size
= p
.offset
;
480 ops
[0].op
.extent
.truncate_seq
= tr
->truncate_seq
;
481 objecter
->_modify(p
.oid
, p
.oloc
, ops
, tr
->mtime
, tr
->snapc
, tr
->flags
,
482 new C_OnFinisher(new C_TruncRange(this, tr
), finisher
));