]> git.proxmox.com Git - ceph.git/blame - ceph/src/SimpleRADOSStriper.cc
update ceph source to reef 18.2.0
[ceph.git] / ceph / src / SimpleRADOSStriper.cc
CommitLineData
f67539c2
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4/*
5 * Ceph - scalable distributed file system
6 *
7 * Copyright (C) 2021 Red Hat, Inc.
8 *
9 * This is free software; you can redistribute it and/or modify it under the
10 * terms of the GNU Lesser General Public License version 2.1, as published by
11 * the Free Software Foundation. See file COPYING.
12 *
13 */
14
15#include <boost/smart_ptr/intrusive_ptr.hpp>
16
17#include <fcntl.h>
18#include <stdio.h>
19#include <sys/stat.h>
20#include <sys/types.h>
21#include <unistd.h>
22
23#include <iomanip>
24#include <iostream>
25#include <regex>
26#include <sstream>
27#include <string_view>
28
29#include <limits.h>
30#include <string.h>
31
32#include "include/ceph_assert.h"
33#include "include/rados/librados.hpp"
34
35#include "cls/lock/cls_lock_client.h"
36
37#include "common/ceph_argparse.h"
38#include "common/ceph_mutex.h"
39#include "common/common_init.h"
40#include "common/config.h"
41#include "common/debug.h"
42#include "common/errno.h"
43#include "common/version.h"
44
45#include "SimpleRADOSStriper.h"
46
47using ceph::bufferlist;
48
522d829b 49#define dout_subsys ceph_subsys_cephsqlite
f67539c2
TL
50#undef dout_prefix
51#define dout_prefix *_dout << "client." << ioctx.get_instance_id() << ": SimpleRADOSStriper: " << __func__ << ": " << oid << ": "
52#define d(lvl) ldout((CephContext*)ioctx.cct(), (lvl))
53
54enum {
55 P_FIRST = 0xe0000,
56 P_UPDATE_METADATA,
57 P_UPDATE_ALLOCATED,
58 P_UPDATE_SIZE,
59 P_UPDATE_VERSION,
60 P_SHRINK,
61 P_SHRINK_BYTES,
62 P_LOCK,
63 P_UNLOCK,
64 P_LAST,
65};
66
67int SimpleRADOSStriper::config_logger(CephContext* cct, std::string_view name, std::shared_ptr<PerfCounters>* l)
68{
69 PerfCountersBuilder plb(cct, name.data(), P_FIRST, P_LAST);
70 plb.add_u64_counter(P_UPDATE_METADATA, "update_metadata", "Number of metadata updates");
71 plb.add_u64_counter(P_UPDATE_ALLOCATED, "update_allocated", "Number of allocated updates");
72 plb.add_u64_counter(P_UPDATE_SIZE, "update_size", "Number of size updates");
73 plb.add_u64_counter(P_UPDATE_VERSION, "update_version", "Number of version updates");
74 plb.add_u64_counter(P_SHRINK, "shrink", "Number of allocation shrinks");
75 plb.add_u64_counter(P_SHRINK_BYTES, "shrink_bytes", "Bytes shrunk");
76 plb.add_u64_counter(P_LOCK, "lock", "Number of locks");
77 plb.add_u64_counter(P_UNLOCK, "unlock", "Number of unlocks");
78 l->reset(plb.create_perf_counters());
79 return 0;
80}
81
82SimpleRADOSStriper::~SimpleRADOSStriper()
83{
84 if (lock_keeper.joinable()) {
85 shutdown = true;
86 lock_keeper_cvar.notify_all();
87 lock_keeper.join();
88 }
89
90 if (ioctx.is_valid()) {
91 d(5) << dendl;
92
93 if (is_locked()) {
94 unlock();
95 }
96 }
97}
98
99SimpleRADOSStriper::extent SimpleRADOSStriper::get_next_extent(uint64_t off, size_t len) const
100{
101 extent e;
102 {
103 uint64_t stripe = (off>>object_size);
104 CachedStackStringStream css;
105 *css << oid;
106 *css << ".";
107 *css << std::setw(16) << std::setfill('0') << std::hex << stripe;
108 e.soid = css->str();
109 }
110 e.off = off & ((1<<object_size)-1);
111 e.len = std::min<size_t>(len, (1<<object_size)-e.off);
112 return e;
113}
114
115int SimpleRADOSStriper::remove()
116{
117 d(5) << dendl;
118
119 if (blocklisted.load()) {
120 return -EBLOCKLISTED;
121 }
122
123 if (int rc = wait_for_aios(true); rc < 0) {
124 aios_failure = 0;
125 return rc;
126 }
127
128 if (int rc = set_metadata(0, true); rc < 0) {
129 return rc;
130 }
131
132 auto ext = get_first_extent();
133 if (int rc = ioctx.remove(ext.soid); rc < 0) {
134 d(5) << " remove failed: " << cpp_strerror(rc) << dendl;
135 return rc;
136 }
137
138 locked = false;
139
140 return 0;
141}
142
143int SimpleRADOSStriper::truncate(uint64_t size)
144{
145 d(5) << size << dendl;
146
147 if (blocklisted.load()) {
148 return -EBLOCKLISTED;
149 }
150
151 /* TODO: (not currently used by SQLite) handle growth + sparse */
152 if (int rc = set_metadata(size, true); rc < 0) {
153 return rc;
154 }
155
156 return 0;
157}
158
159int SimpleRADOSStriper::wait_for_aios(bool block)
160{
161 while (!aios.empty()) {
162 auto& aiocp = aios.front();
163 int rc;
164 if (block) {
165 rc = aiocp->wait_for_complete();
166 } else {
167 if (aiocp->is_complete()) {
168 rc = aiocp->get_return_value();
169 } else {
170 return 0;
171 }
172 }
173 if (rc) {
174 d(5) << " aio failed: " << cpp_strerror(rc) << dendl;
175 if (aios_failure == 0) {
176 aios_failure = rc;
177 }
178 }
179 aios.pop();
180 }
181 return aios_failure;
182}
183
184int SimpleRADOSStriper::flush()
185{
186 d(5) << dendl;
187
188 if (blocklisted.load()) {
189 return -EBLOCKLISTED;
190 }
191
192 if (size_dirty) {
193 if (int rc = set_metadata(size, true); rc < 0) {
194 return rc;
195 }
196 }
197
198 if (int rc = wait_for_aios(true); rc < 0) {
199 aios_failure = 0;
200 return rc;
201 }
202
203 return 0;
204}
205
206int SimpleRADOSStriper::stat(uint64_t* s)
207{
208 d(5) << dendl;
209
210 if (blocklisted.load()) {
211 return -EBLOCKLISTED;
212 }
213
214 *s = size;
215 return 0;
216}
217
218int SimpleRADOSStriper::create()
219{
220 d(5) << dendl;
221
222 if (blocklisted.load()) {
223 return -EBLOCKLISTED;
224 }
225
226 auto ext = get_first_extent();
227 auto op = librados::ObjectWriteOperation();
228 /* exclusive create ensures we do none of these setxattrs happen if it fails */
229 op.create(1);
230 op.setxattr(XATTR_VERSION, uint2bl(0));
231 op.setxattr(XATTR_EXCL, bufferlist());
232 op.setxattr(XATTR_SIZE, uint2bl(0));
233 op.setxattr(XATTR_ALLOCATED, uint2bl(0));
234 op.setxattr(XATTR_LAYOUT_STRIPE_UNIT, uint2bl(1));
235 op.setxattr(XATTR_LAYOUT_STRIPE_COUNT, uint2bl(1));
236 op.setxattr(XATTR_LAYOUT_OBJECT_SIZE, uint2bl(1<<object_size));
237 if (int rc = ioctx.operate(ext.soid, &op); rc < 0) {
238 return rc; /* including EEXIST */
239 }
240 return 0;
241}
242
243int SimpleRADOSStriper::open()
244{
245 d(5) << oid << dendl;
246
247 if (blocklisted.load()) {
248 return -EBLOCKLISTED;
249 }
250
251 auto ext = get_first_extent();
252 auto op = librados::ObjectReadOperation();
253 bufferlist bl_excl, bl_size, bl_alloc, bl_version, pbl;
254 int prval_excl, prval_size, prval_alloc, prval_version;
255 op.getxattr(XATTR_EXCL, &bl_excl, &prval_excl);
256 op.getxattr(XATTR_SIZE, &bl_size, &prval_size);
257 op.getxattr(XATTR_ALLOCATED, &bl_alloc, &prval_alloc);
258 op.getxattr(XATTR_VERSION, &bl_version, &prval_version);
259 if (int rc = ioctx.operate(ext.soid, &op, &pbl); rc < 0) {
260 d(5) << " getxattr failed: " << cpp_strerror(rc) << dendl;
261 return rc;
262 }
263 exclusive_holder = bl_excl.to_str();
264 {
265 auto sstr = bl_size.to_str();
266 std::string err;
267 size = strict_strtoll(sstr.c_str(), 10, &err);
268 ceph_assert(err.empty());
269 }
270 {
271 auto sstr = bl_alloc.to_str();
272 std::string err;
273 allocated = strict_strtoll(sstr.c_str(), 10, &err);
274 ceph_assert(err.empty());
275 }
276 {
277 auto sstr = bl_version.to_str();
278 std::string err;
279 version = strict_strtoll(sstr.c_str(), 10, &err);
280 ceph_assert(err.empty());
281 }
282 d(15) << " size: " << size << " allocated: " << allocated << " version: " << version << dendl;
283 return 0;
284}
285
286int SimpleRADOSStriper::shrink_alloc(uint64_t a)
287{
288 d(5) << dendl;
289 std::vector<aiocompletionptr> removes;
290
291 ceph_assert(a <= allocated);
292 uint64_t prune = std::max<uint64_t>(a, (1u << object_size)); /* never delete first extent here */
293 uint64_t len = allocated - prune;
294 const uint64_t bytes_removed = len;
295 uint64_t offset = prune;
296 while (len > 0) {
297 auto ext = get_next_extent(offset, len);
298 auto aiocp = aiocompletionptr(librados::Rados::aio_create_completion());
299 if (int rc = ioctx.aio_remove(ext.soid, aiocp.get()); rc < 0) {
300 d(5) << " aio_remove failed: " << cpp_strerror(rc) << dendl;
301 return rc;
302 }
303 removes.emplace_back(std::move(aiocp));
304 len -= ext.len;
305 offset += ext.len;
306 }
307
308 for (auto& aiocp : removes) {
309 if (int rc = aiocp->wait_for_complete(); rc < 0 && rc != -ENOENT) {
310 d(5) << " aio_remove failed: " << cpp_strerror(rc) << dendl;
311 return rc;
312 }
313 }
314
315 auto ext = get_first_extent();
316 auto op = librados::ObjectWriteOperation();
317 auto aiocp = aiocompletionptr(librados::Rados::aio_create_completion());
318 op.setxattr(XATTR_ALLOCATED, uint2bl(a));
319 d(15) << " updating allocated to " << a << dendl;
320 op.setxattr(XATTR_VERSION, uint2bl(version+1));
321 d(15) << " updating version to " << (version+1) << dendl;
322 if (int rc = ioctx.aio_operate(ext.soid, aiocp.get(), &op); rc < 0) {
323 d(5) << " update failed: " << cpp_strerror(rc) << dendl;
324 return rc;
325 }
326 /* we need to wait so we don't have dangling extents */
327 d(10) << " waiting for allocated update" << dendl;
328 if (int rc = aiocp->wait_for_complete(); rc < 0) {
329 d(1) << " update failure: " << cpp_strerror(rc) << dendl;
330 return rc;
331 }
332 if (logger) {
333 logger->inc(P_UPDATE_METADATA);
334 logger->inc(P_UPDATE_ALLOCATED);
335 logger->inc(P_UPDATE_VERSION);
336 logger->inc(P_SHRINK);
337 logger->inc(P_SHRINK_BYTES, bytes_removed);
338 }
339
340 version += 1;
341 allocated = a;
342 return 0;
343}
344
345int SimpleRADOSStriper::maybe_shrink_alloc()
346{
347 d(15) << dendl;
348
349 if (size == 0) {
350 if (allocated > 0) {
351 d(10) << "allocation shrink to 0" << dendl;
352 return shrink_alloc(0);
353 } else {
354 return 0;
355 }
356 }
357
358 uint64_t mask = (1<<object_size)-1;
359 uint64_t new_allocated = min_growth + ((size + mask) & ~mask); /* round up base 2 */
360 if (allocated > new_allocated && ((allocated-new_allocated) > min_growth)) {
361 d(10) << "allocation shrink to " << new_allocated << dendl;
362 return shrink_alloc(new_allocated);
363 }
364
365 return 0;
366}
367
368bufferlist SimpleRADOSStriper::str2bl(std::string_view sv)
369{
370 bufferlist bl;
371 bl.append(sv);
372 return bl;
373}
374
375bufferlist SimpleRADOSStriper::uint2bl(uint64_t v)
376{
377 CachedStackStringStream css;
378 *css << std::dec << std::setw(16) << std::setfill('0') << v;
379 bufferlist bl;
380 bl.append(css->strv());
381 return bl;
382}
383
384int SimpleRADOSStriper::set_metadata(uint64_t new_size, bool update_size)
385{
386 d(10) << " new_size: " << new_size
387 << " update_size: " << update_size
388 << " allocated: " << allocated
389 << " size: " << size
390 << " version: " << version
391 << dendl;
392
393 bool do_op = false;
394 auto new_allocated = allocated;
395 auto ext = get_first_extent();
396 auto op = librados::ObjectWriteOperation();
397 if (new_size > allocated) {
398 uint64_t mask = (1<<object_size)-1;
399 new_allocated = min_growth + ((size + mask) & ~mask); /* round up base 2 */
400 op.setxattr(XATTR_ALLOCATED, uint2bl(new_allocated));
401 do_op = true;
402 if (logger) logger->inc(P_UPDATE_ALLOCATED);
403 d(15) << " updating allocated to " << new_allocated << dendl;
404 }
405 if (update_size) {
406 op.setxattr(XATTR_SIZE, uint2bl(new_size));
407 do_op = true;
408 if (logger) logger->inc(P_UPDATE_SIZE);
409 d(15) << " updating size to " << new_size << dendl;
410 }
411 if (do_op) {
412 if (logger) logger->inc(P_UPDATE_METADATA);
413 if (logger) logger->inc(P_UPDATE_VERSION);
414 op.setxattr(XATTR_VERSION, uint2bl(version+1));
415 d(15) << " updating version to " << (version+1) << dendl;
416 auto aiocp = aiocompletionptr(librados::Rados::aio_create_completion());
417 if (int rc = ioctx.aio_operate(ext.soid, aiocp.get(), &op); rc < 0) {
418 d(1) << " update failure: " << cpp_strerror(rc) << dendl;
419 return rc;
420 }
421 version += 1;
422 if (allocated != new_allocated) {
423 /* we need to wait so we don't have dangling extents */
424 d(10) << "waiting for allocated update" << dendl;
425 if (int rc = aiocp->wait_for_complete(); rc < 0) {
426 d(1) << " update failure: " << cpp_strerror(rc) << dendl;
427 return rc;
428 }
429 aiocp.reset();
430 allocated = new_allocated;
431 }
432 if (aiocp) {
433 aios.emplace(std::move(aiocp));
434 }
435 if (update_size) {
436 size = new_size;
437 size_dirty = false;
438 return maybe_shrink_alloc();
439 }
440 }
441 return 0;
442}
443
444ssize_t SimpleRADOSStriper::write(const void* data, size_t len, uint64_t off)
445{
446 d(5) << off << "~" << len << dendl;
447
448 if (blocklisted.load()) {
449 return -EBLOCKLISTED;
450 }
451
452 if (allocated < (len+off)) {
453 if (int rc = set_metadata(len+off, false); rc < 0) {
454 return rc;
455 }
456 }
457
458 size_t w = 0;
459 while ((len-w) > 0) {
460 auto ext = get_next_extent(off+w, len-w);
461 auto aiocp = aiocompletionptr(librados::Rados::aio_create_completion());
462 bufferlist bl;
463 bl.append((const char*)data+w, ext.len);
464 if (int rc = ioctx.aio_write(ext.soid, aiocp.get(), bl, ext.len, ext.off); rc < 0) {
465 break;
466 }
467 aios.emplace(std::move(aiocp));
468 w += ext.len;
469 }
470
471 wait_for_aios(false); // clean up finished completions
472
473 if (size < (len+off)) {
474 size = len+off;
475 size_dirty = true;
476 d(10) << " dirty size: " << size << dendl;
477 }
478
479 return (ssize_t)w;
480}
481
482ssize_t SimpleRADOSStriper::read(void* data, size_t len, uint64_t off)
483{
484 d(5) << off << "~" << len << dendl;
485
486 if (blocklisted.load()) {
487 return -EBLOCKLISTED;
488 }
489
490 size_t r = 0;
2a845540
TL
491 // Don't use std::vector to store bufferlists (e.g for parallelizing aio_reads),
492 // as they are being moved whenever the vector resizes
493 // and will cause invalidated references.
494 std::deque<std::pair<bufferlist, aiocompletionptr>> reads;
f67539c2
TL
495 while ((len-r) > 0) {
496 auto ext = get_next_extent(off+r, len-r);
497 auto& [bl, aiocp] = reads.emplace_back();
498 aiocp = aiocompletionptr(librados::Rados::aio_create_completion());
499 if (int rc = ioctx.aio_read(ext.soid, aiocp.get(), &bl, ext.len, ext.off); rc < 0) {
500 d(1) << " read failure: " << cpp_strerror(rc) << dendl;
501 return rc;
502 }
503 r += ext.len;
504 }
505
506 r = 0;
507 for (auto& [bl, aiocp] : reads) {
508 if (int rc = aiocp->wait_for_complete(); rc < 0) {
509 d(1) << " read failure: " << cpp_strerror(rc) << dendl;
510 return rc;
511 }
512 bl.begin().copy(bl.length(), ((char*)data)+r);
513 r += bl.length();
514 }
515 ceph_assert(r <= len);
516
517 return r;
518}
519
520int SimpleRADOSStriper::print_lockers(std::ostream& out)
521{
522 int exclusive;
523 std::string tag;
524 std::list<librados::locker_t> lockers;
525 auto ext = get_first_extent();
526 if (int rc = ioctx.list_lockers(ext.soid, biglock, &exclusive, &tag, &lockers); rc < 0) {
527 d(1) << " list_lockers failure: " << cpp_strerror(rc) << dendl;
528 return rc;
529 }
530 if (lockers.empty()) {
531 out << " lockers none";
532 } else {
533 out << " lockers exclusive=" << exclusive << " tag=" << tag << " lockers=[";
534 bool first = true;
535 for (const auto& l : lockers) {
536 if (!first) out << ",";
537 out << l.client << ":" << l.cookie << ":" << l.address;
538 }
539 out << "]";
540 }
541 return 0;
542}
543
544/* Do lock renewal in a separate thread: while it's unlikely sqlite chews on
545 * something for multiple seconds without calling into the VFS (where we could
546 * initiate a lock renewal), it's not impossible with complex queries. Also, we
547 * want to allow "PRAGMA locking_mode = exclusive" where the application may
548 * not use the sqlite3 database connection for an indeterminate amount of time.
549 */
550void SimpleRADOSStriper::lock_keeper_main(void)
551{
552 d(20) << dendl;
553 const auto ext = get_first_extent();
554 while (!shutdown) {
555 d(20) << "tick" << dendl;
556 std::unique_lock lock(lock_keeper_mutex);
557 auto now = clock::now();
558 auto since = now-last_renewal;
559
560 if (since >= lock_keeper_interval && locked) {
561 d(10) << "renewing lock" << dendl;
562 auto tv = ceph::to_timeval(lock_keeper_timeout);
563 int rc = ioctx.lock_exclusive(ext.soid, biglock, cookie.to_string(), lockdesc, &tv, LIBRADOS_LOCK_FLAG_MUST_RENEW);
564 if (rc) {
565 /* If lock renewal fails, we cannot continue the application. Return
566 * -EBLOCKLISTED for all calls into the striper for this instance, even
567 * if we're not actually blocklisted.
568 */
569 d(-1) << "lock renewal failed: " << cpp_strerror(rc) << dendl;
570 blocklisted = true;
571 break;
572 }
573 last_renewal = clock::now();
574 }
575
576 lock_keeper_cvar.wait_for(lock, lock_keeper_interval);
577 }
578}
579
580int SimpleRADOSStriper::recover_lock()
581{
582 d(5) << "attempting to recover lock" << dendl;
583
584 std::string addrs;
585 const auto ext = get_first_extent();
586
587 {
588 auto tv = ceph::to_timeval(lock_keeper_timeout);
589 if (int rc = ioctx.lock_exclusive(ext.soid, biglock, cookie.to_string(), lockdesc, &tv, 0); rc < 0) {
590 return rc;
591 }
592 locked = true;
593 last_renewal = clock::now();
594 }
595
596 d(5) << "acquired lock, fetching last owner" << dendl;
597
598 {
599 bufferlist bl_excl;
600 if (int rc = ioctx.getxattr(ext.soid, XATTR_EXCL, bl_excl); rc < 0) {
601 if (rc == -ENOENT) {
602 /* someone removed it? ok... */
603 goto setowner;
604 } else {
605 d(-1) << "could not recover exclusive locker" << dendl;
606 locked = false; /* it will drop eventually */
607 return -EIO;
608 }
609 }
610 addrs = bl_excl.to_str();
611 }
612
613 if (addrs.empty()) {
614 d(5) << "someone else cleaned up" << dendl;
615 goto setowner;
616 } else {
617 d(5) << "exclusive lock holder was " << addrs << dendl;
618 }
619
620 if (blocklist_the_dead) {
621 entity_addrvec_t addrv;
622 addrv.parse(addrs.c_str());
623 auto R = librados::Rados(ioctx);
20effc67 624 std::string_view b = "blocklist";
f67539c2
TL
625retry:
626 for (auto& a : addrv.v) {
627 CachedStackStringStream css;
628 *css << "{\"prefix\":\"osd " << b << "\", \"" << b << "op\":\"add\",";
629 *css << "\"addr\":\"";
630 *css << a;
631 *css << "\"}";
632 std::vector<std::string> cmd = {css->str()};
633 d(5) << "sending blocklist command: " << cmd << dendl;
634 std::string out;
635 if (int rc = R.mon_command(css->str(), bufferlist(), nullptr, &out); rc < 0) {
20effc67
TL
636 if (rc == -EINVAL && b == "blocklist") {
637 b = "blacklist";
f67539c2
TL
638 goto retry;
639 }
640 d(-1) << "Cannot proceed with recovery because I have failed to blocklist the old client: " << cpp_strerror(rc) << ", out = " << out << dendl;
641 locked = false; /* it will drop eventually */
642 return -EIO;
643 }
644 }
645 /* Ensure our osd_op requests have the latest epoch. */
646 R.wait_for_latest_osdmap();
647 }
648
649setowner:
650 d(5) << "setting new owner to myself, " << myaddrs << dendl;
651 {
652 auto myaddrbl = str2bl(myaddrs);
653 if (int rc = ioctx.setxattr(ext.soid, XATTR_EXCL, myaddrbl); rc < 0) {
654 d(-1) << "could not set lock owner" << dendl;
655 locked = false; /* it will drop eventually */
656 return -EIO;
657 }
658 }
659 return 0;
660}
661
662int SimpleRADOSStriper::lock(uint64_t timeoutms)
663{
664 /* XXX: timeoutms is unused */
665 d(5) << "timeout=" << timeoutms << dendl;
666
667 if (blocklisted.load()) {
668 return -EBLOCKLISTED;
669 }
670
671 std::scoped_lock lock(lock_keeper_mutex);
672
673 ceph_assert(!is_locked());
674
675 /* We're going to be very lazy here in implementation: only exclusive locks
676 * are allowed. That even ensures a single reader.
677 */
678 uint64_t slept = 0;
679
680 auto ext = get_first_extent();
681 while (true) {
682 /* The general fast path in one compound operation: obtain the lock,
683 * confirm the past locker cleaned up after themselves (set XATTR_EXCL to
684 * ""), then finally set XATTR_EXCL to our address vector as the new
685 * exclusive locker.
686 */
687
688 auto op = librados::ObjectWriteOperation();
689 auto tv = ceph::to_timeval(lock_keeper_timeout);
690 utime_t duration;
691 duration.set_from_timeval(&tv);
692 rados::cls::lock::lock(&op, biglock, ClsLockType::EXCLUSIVE, cookie.to_string(), "", lockdesc, duration, 0);
693 op.cmpxattr(XATTR_EXCL, LIBRADOS_CMPXATTR_OP_EQ, bufferlist());
694 op.setxattr(XATTR_EXCL, str2bl(myaddrs));
695 int rc = ioctx.operate(ext.soid, &op);
696 if (rc == 0) {
697 locked = true;
698 last_renewal = clock::now();
699 break;
700 } else if (rc == -EBUSY) {
701 if ((slept % 500000) == 0) {
702 d(-1) << "waiting for locks: ";
703 print_lockers(*_dout);
704 *_dout << dendl;
705 }
706 usleep(5000);
707 slept += 5000;
708 continue;
709 } else if (rc == -ECANCELED) {
710 /* CMPXATTR failed, a locker didn't cleanup. Try to recover! */
711 if (rc = recover_lock(); rc < 0) {
712 if (rc == -EBUSY) {
713 continue; /* try again */
714 }
715 return rc;
716 }
717 break;
718 } else {
719 d(-1) << " lock failed: " << cpp_strerror(rc) << dendl;
720 return rc;
721 }
722 }
723
724 if (!lock_keeper.joinable()) {
725 lock_keeper = std::thread(&SimpleRADOSStriper::lock_keeper_main, this);
726 }
727
728 if (int rc = open(); rc < 0) {
729 d(5) << " open failed: " << cpp_strerror(rc) << dendl;
730 return rc;
731 }
732
733 d(5) << " = 0" << dendl;
734 if (logger) {
735 logger->inc(P_LOCK);
736 }
737
738 return 0;
739}
740
741int SimpleRADOSStriper::unlock()
742{
743 d(5) << dendl;
744
745 if (blocklisted.load()) {
746 return -EBLOCKLISTED;
747 }
748
749 std::scoped_lock lock(lock_keeper_mutex);
750
751 ceph_assert(is_locked());
752
753 /* wait for flush of metadata */
754 if (int rc = flush(); rc < 0) {
755 return rc;
756 }
757
758 const auto ext = get_first_extent();
759 auto op = librados::ObjectWriteOperation();
760 op.cmpxattr(XATTR_EXCL, LIBRADOS_CMPXATTR_OP_EQ, str2bl(myaddrs));
761 op.setxattr(XATTR_EXCL, bufferlist());
762 rados::cls::lock::unlock(&op, biglock, cookie.to_string());
763 if (int rc = ioctx.operate(ext.soid, &op); rc < 0) {
764 d(-1) << " unlock failed: " << cpp_strerror(rc) << dendl;
765 return rc;
766 }
767 locked = false;
768
769 d(5) << " = 0" << dendl;
770 if (logger) {
771 logger->inc(P_UNLOCK);
772 }
773
774 return 0;
775}