1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
5 * Ceph - scalable distributed file system
7 * Copyright (C) 2021 Red Hat, Inc.
9 * This is free software; you can redistribute it and/or modify it under the
10 * terms of the GNU Lesser General Public License version 2.1, as published by
11 * the Free Software Foundation. See file COPYING.
15 #include <boost/smart_ptr/intrusive_ptr.hpp>
16 #include <fmt/format.h>
21 #include <sys/types.h>
29 #include <string_view>
34 #include <sqlite3ext.h>
35 SQLITE_EXTENSION_INIT1
37 #include "include/ceph_assert.h"
38 #include "include/rados/librados.hpp"
40 #include "common/Clock.h"
41 #include "common/Formatter.h"
42 #include "common/ceph_argparse.h"
43 #include "common/ceph_mutex.h"
44 #include "common/common_init.h"
45 #include "common/config.h"
46 #include "common/debug.h"
47 #include "common/errno.h"
48 #include "common/perf_counters.h"
49 #include "common/version.h"
51 #include "include/libcephsqlite.h"
52 #include "SimpleRADOSStriper.h"
54 #define dout_subsys ceph_subsys_cephsqlite
56 #define dout_prefix *_dout << "cephsqlite: " << __func__ << ": "
57 #define d(vfs,lvl) ldout(getcct(vfs), (lvl)) << "(client." << getdata(vfs).cluster.get_instance_id() << ") "
58 #define dv(lvl) d(vfs,(lvl))
59 #define df(lvl) d(f->vfs,(lvl)) << f->loc << " "
76 P_OPF_CHECKRESERVEDLOCK
,
79 P_OPF_DEVICECHARACTERISTICS
,
83 struct cephsqlite_appdata
{
84 ~cephsqlite_appdata() {
86 cct
->get_perfcounters_collection()->remove(logger
.get());
89 cct
->get_perfcounters_collection()->remove(striper_logger
.get());
95 PerfCountersBuilder
plb(cct
.get(), "libcephsqlite_vfs", P_FIRST
, P_LAST
);
96 plb
.add_time_avg(P_OP_OPEN
, "op_open", "Time average of Open operations");
97 plb
.add_time_avg(P_OP_DELETE
, "op_delete", "Time average of Delete operations");
98 plb
.add_time_avg(P_OP_ACCESS
, "op_access", "Time average of Access operations");
99 plb
.add_time_avg(P_OP_FULLPATHNAME
, "op_fullpathname", "Time average of FullPathname operations");
100 plb
.add_time_avg(P_OP_CURRENTTIME
, "op_currenttime", "Time average of Currenttime operations");
101 plb
.add_time_avg(P_OPF_CLOSE
, "opf_close", "Time average of Close file operations");
102 plb
.add_time_avg(P_OPF_READ
, "opf_read", "Time average of Read file operations");
103 plb
.add_time_avg(P_OPF_WRITE
, "opf_write", "Time average of Write file operations");
104 plb
.add_time_avg(P_OPF_TRUNCATE
, "opf_truncate", "Time average of Truncate file operations");
105 plb
.add_time_avg(P_OPF_SYNC
, "opf_sync", "Time average of Sync file operations");
106 plb
.add_time_avg(P_OPF_FILESIZE
, "opf_filesize", "Time average of FileSize file operations");
107 plb
.add_time_avg(P_OPF_LOCK
, "opf_lock", "Time average of Lock file operations");
108 plb
.add_time_avg(P_OPF_UNLOCK
, "opf_unlock", "Time average of Unlock file operations");
109 plb
.add_time_avg(P_OPF_CHECKRESERVEDLOCK
, "opf_checkreservedlock", "Time average of CheckReservedLock file operations");
110 plb
.add_time_avg(P_OPF_FILECONTROL
, "opf_filecontrol", "Time average of FileControl file operations");
111 plb
.add_time_avg(P_OPF_SECTORSIZE
, "opf_sectorsize", "Time average of SectorSize file operations");
112 plb
.add_time_avg(P_OPF_DEVICECHARACTERISTICS
, "opf_devicecharacteristics", "Time average of DeviceCharacteristics file operations");
113 logger
.reset(plb
.create_perf_counters());
114 if (int rc
= SimpleRADOSStriper::config_logger(cct
.get(), "libcephsqlite_striper", &striper_logger
); rc
< 0) {
117 cct
->get_perfcounters_collection()->add(logger
.get());
118 cct
->get_perfcounters_collection()->add(striper_logger
.get());
123 ldout(cct
, 5) << "initializing RADOS handle as " << cct
->_conf
->name
<< dendl
;
124 if (int rc
= cluster
.init_with_context(cct
.get()); rc
< 0) {
125 lderr(cct
) << "cannot initialize RADOS: " << cpp_strerror(rc
) << dendl
;
128 if (int rc
= cluster
.connect(); rc
< 0) {
129 lderr(cct
) << "cannot connect: " << cpp_strerror(rc
) << dendl
;
132 auto s
= cluster
.get_addrs();
133 ldout(cct
, 5) << "completed connection to RADOS with address " << s
<< dendl
;
137 boost::intrusive_ptr
<CephContext
> cct
;
138 std::unique_ptr
<PerfCounters
> logger
;
139 std::shared_ptr
<PerfCounters
> striper_logger
;
140 librados::Rados cluster
;
143 struct cephsqlite_fileloc
{
149 struct cephsqlite_fileio
{
150 librados::IoCtx ioctx
;
151 std::unique_ptr
<SimpleRADOSStriper
> rs
;
154 std::ostream
& operator<<(std::ostream
&out
, const cephsqlite_fileloc
& fileloc
) {
166 struct cephsqlite_file
{
168 struct sqlite3_vfs
* vfs
= nullptr;
170 // There are 5 lock states: https://sqlite.org/c3ref/c_lock_exclusive.html
172 struct cephsqlite_fileloc loc
{};
173 struct cephsqlite_fileio io
{};
177 #define getdata(vfs) (*((cephsqlite_appdata*)((vfs)->pAppData)))
179 static CephContext
* getcct(sqlite3_vfs
* vfs
)
181 auto&& appd
= getdata(vfs
);
182 auto& cct
= appd
.cct
;
188 std::vector
<const char*> env_args
;
189 env_to_vec(env_args
, "CEPH_ARGS");
190 std::string cluster
, conf_file_list
; // unused
191 CephInitParameters iparams
= ceph_argparse_early_args(env_args
, CEPH_ENTITY_TYPE_CLIENT
, &cluster
, &conf_file_list
);
192 cct
= boost::intrusive_ptr
<CephContext
>(common_preinit(iparams
, CODE_ENVIRONMENT_LIBRARY
, 0), false);
193 cct
->_conf
.parse_config_files(nullptr, &std::cerr
, 0);
194 cct
->_conf
.parse_env(cct
->get_module_type()); // environment variables override
195 cct
->_conf
.apply_changes(nullptr);
196 common_init_finish(cct
.get());
198 if (int rc
= appd
.setup_perf(); rc
< 0) {
199 ceph_abort("cannot setup perf counters");
202 if (int rc
= appd
.init_cluster(); rc
< 0) {
203 ceph_abort("cannot setup RADOS cluster handle");
209 static int Lock(sqlite3_file
*file
, int ilock
)
211 auto f
= (cephsqlite_file
*)file
;
212 auto start
= ceph::coarse_mono_clock::now();
213 df(5) << std::hex
<< ilock
<< dendl
;
215 auto& lock
= f
->lock
;
216 ceph_assert(!f
->io
.rs
->is_locked() || lock
> SQLITE_LOCK_NONE
);
217 ceph_assert(lock
<= ilock
);
218 if (!f
->io
.rs
->is_locked() && ilock
> SQLITE_LOCK_NONE
) {
219 if (int rc
= f
->io
.rs
->lock(0); rc
< 0) {
220 df(5) << "failed: " << rc
<< dendl
;
226 auto end
= ceph::coarse_mono_clock::now();
227 getdata(f
->vfs
).logger
->tinc(P_OPF_LOCK
, end
-start
);
231 static int Unlock(sqlite3_file
*file
, int ilock
)
233 auto f
= (cephsqlite_file
*)file
;
234 auto start
= ceph::coarse_mono_clock::now();
235 df(5) << std::hex
<< ilock
<< dendl
;
237 auto& lock
= f
->lock
;
238 ceph_assert(lock
== SQLITE_LOCK_NONE
|| (lock
> SQLITE_LOCK_NONE
&& f
->io
.rs
->is_locked()));
239 ceph_assert(lock
>= ilock
);
240 if (ilock
<= SQLITE_LOCK_NONE
&& SQLITE_LOCK_NONE
< lock
) {
241 if (int rc
= f
->io
.rs
->unlock(); rc
< 0) {
242 df(5) << "failed: " << rc
<< dendl
;
248 auto end
= ceph::coarse_mono_clock::now();
249 getdata(f
->vfs
).logger
->tinc(P_OPF_UNLOCK
, end
-start
);
253 static int CheckReservedLock(sqlite3_file
*file
, int *result
)
255 auto f
= (cephsqlite_file
*)file
;
256 auto start
= ceph::coarse_mono_clock::now();
260 auto& lock
= f
->lock
;
261 if (lock
> SQLITE_LOCK_SHARED
) {
266 f
->io
.rs
->print_lockers(*_dout
);
269 auto end
= ceph::coarse_mono_clock::now();
270 getdata(f
->vfs
).logger
->tinc(P_OPF_CHECKRESERVEDLOCK
, end
-start
);
274 static int Close(sqlite3_file
*file
)
276 auto f
= (cephsqlite_file
*)file
;
277 auto start
= ceph::coarse_mono_clock::now();
279 f
->~cephsqlite_file();
280 auto end
= ceph::coarse_mono_clock::now();
281 getdata(f
->vfs
).logger
->tinc(P_OPF_CLOSE
, end
-start
);
285 static int Read(sqlite3_file
*file
, void *buf
, int len
, sqlite_int64 off
)
287 auto f
= (cephsqlite_file
*)file
;
288 auto start
= ceph::coarse_mono_clock::now();
289 df(5) << buf
<< " " << off
<< "~" << len
<< dendl
;
291 if (int rc
= f
->io
.rs
->read(buf
, len
, off
); rc
< 0) {
292 df(5) << "read failed: " << cpp_strerror(rc
) << dendl
;
293 return SQLITE_IOERR_READ
;
295 df(5) << "= " << rc
<< dendl
;
296 auto end
= ceph::coarse_mono_clock::now();
297 getdata(f
->vfs
).logger
->tinc(P_OPF_READ
, end
-start
);
299 memset(buf
, 0, len
-rc
);
300 return SQLITE_IOERR_SHORT_READ
;
307 static int Write(sqlite3_file
*file
, const void *buf
, int len
, sqlite_int64 off
)
309 auto f
= (cephsqlite_file
*)file
;
310 auto start
= ceph::coarse_mono_clock::now();
311 df(5) << off
<< "~" << len
<< dendl
;
313 if (int rc
= f
->io
.rs
->write(buf
, len
, off
); rc
< 0) {
314 df(5) << "write failed: " << cpp_strerror(rc
) << dendl
;
315 return SQLITE_IOERR_WRITE
;
317 df(5) << "= " << rc
<< dendl
;
318 auto end
= ceph::coarse_mono_clock::now();
319 getdata(f
->vfs
).logger
->tinc(P_OPF_WRITE
, end
-start
);
325 static int Truncate(sqlite3_file
*file
, sqlite_int64 size
)
327 auto f
= (cephsqlite_file
*)file
;
328 auto start
= ceph::coarse_mono_clock::now();
329 df(5) << size
<< dendl
;
331 if (int rc
= f
->io
.rs
->truncate(size
); rc
< 0) {
332 df(5) << "truncate failed: " << cpp_strerror(rc
) << dendl
;
336 auto end
= ceph::coarse_mono_clock::now();
337 getdata(f
->vfs
).logger
->tinc(P_OPF_TRUNCATE
, end
-start
);
341 static int Sync(sqlite3_file
*file
, int flags
)
343 auto f
= (cephsqlite_file
*)file
;
344 auto start
= ceph::coarse_mono_clock::now();
345 df(5) << flags
<< dendl
;
347 if (int rc
= f
->io
.rs
->flush(); rc
< 0) {
348 df(5) << "failed: " << cpp_strerror(rc
) << dendl
;
352 df(5) << " = 0" << dendl
;
354 auto end
= ceph::coarse_mono_clock::now();
355 getdata(f
->vfs
).logger
->tinc(P_OPF_SYNC
, end
-start
);
360 static int FileSize(sqlite3_file
*file
, sqlite_int64
*osize
)
362 auto f
= (cephsqlite_file
*)file
;
363 auto start
= ceph::coarse_mono_clock::now();
367 if (int rc
= f
->io
.rs
->stat(&size
); rc
< 0) {
368 df(5) << "stat failed: " << cpp_strerror(rc
) << dendl
;
369 return SQLITE_NOTFOUND
;
372 *osize
= (sqlite_int64
)size
;
374 df(5) << "= " << size
<< dendl
;
376 auto end
= ceph::coarse_mono_clock::now();
377 getdata(f
->vfs
).logger
->tinc(P_OPF_FILESIZE
, end
-start
);
382 static bool parsepath(std::string_view path
, struct cephsqlite_fileloc
* fileloc
)
384 static const std::regex re1
{"^/*(\\*[[:digit:]]+):([[:alnum:]\\-_.]*)/([[:alnum:]\\-._]+)$"};
385 static const std::regex re2
{"^/*([[:alnum:]\\-_.]+):([[:alnum:]\\-_.]*)/([[:alnum:]\\-._]+)$"};
388 if (!std::regex_match(path
.data(), cm
, re1
)) {
389 if (!std::regex_match(path
.data(), cm
, re2
)) {
393 fileloc
->pool
= cm
[1];
394 fileloc
->radosns
= cm
[2];
395 fileloc
->name
= cm
[3];
400 static int makestriper(sqlite3_vfs
* vfs
, const cephsqlite_fileloc
& loc
, cephsqlite_fileio
* io
)
402 auto&& appd
= getdata(vfs
);
403 auto& cct
= appd
.cct
;
404 auto& cluster
= appd
.cluster
;
407 dv(10) << loc
<< dendl
;
410 if (loc
.pool
[0] == '*') {
412 int64_t id
= strict_strtoll(loc
.pool
.c_str()+1, 10, &err
);
413 ceph_assert(err
.empty());
414 if (int rc
= cluster
.ioctx_create2(id
, io
->ioctx
); rc
< 0) {
415 if (rc
== -ENOENT
&& !gotmap
) {
416 cluster
.wait_for_latest_osdmap();
420 dv(10) << "cannot create ioctx: " << cpp_strerror(rc
) << dendl
;
424 if (int rc
= cluster
.ioctx_create(loc
.pool
.c_str(), io
->ioctx
); rc
< 0) {
425 if (rc
== -ENOENT
&& !gotmap
) {
426 cluster
.wait_for_latest_osdmap();
430 dv(10) << "cannot create ioctx: " << cpp_strerror(rc
) << dendl
;
435 if (!loc
.radosns
.empty())
436 io
->ioctx
.set_namespace(loc
.radosns
);
438 io
->rs
= std::make_unique
<SimpleRADOSStriper
>(io
->ioctx
, loc
.name
);
439 io
->rs
->set_logger(appd
.striper_logger
);
440 io
->rs
->set_lock_timeout(cct
->_conf
.get_val
<std::chrono::milliseconds
>("cephsqlite_lock_renewal_timeout"));
441 io
->rs
->set_lock_interval(cct
->_conf
.get_val
<std::chrono::milliseconds
>("cephsqlite_lock_renewal_interval"));
442 io
->rs
->set_blocklist_the_dead(cct
->_conf
.get_val
<bool>("cephsqlite_blocklist_dead_locker"));
447 static int SectorSize(sqlite3_file
* sf
)
449 static const int size
= 65536;
450 auto start
= ceph::coarse_mono_clock::now();
451 auto f
= (cephsqlite_file
*)sf
;
452 df(5) << " = " << size
<< dendl
;
453 auto end
= ceph::coarse_mono_clock::now();
454 getdata(f
->vfs
).logger
->tinc(P_OPF_SECTORSIZE
, end
-start
);
458 static int FileControl(sqlite3_file
* sf
, int op
, void *arg
)
460 auto f
= (cephsqlite_file
*)sf
;
461 auto start
= ceph::coarse_mono_clock::now();
462 df(5) << op
<< ", " << arg
<< dendl
;
463 auto end
= ceph::coarse_mono_clock::now();
464 getdata(f
->vfs
).logger
->tinc(P_OPF_FILECONTROL
, end
-start
);
465 return SQLITE_NOTFOUND
;
468 static int DeviceCharacteristics(sqlite3_file
* sf
)
470 auto f
= (cephsqlite_file
*)sf
;
471 auto start
= ceph::coarse_mono_clock::now();
473 static const int c
= 0
475 |SQLITE_IOCAP_POWERSAFE_OVERWRITE
476 |SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN
477 |SQLITE_IOCAP_SAFE_APPEND
479 auto end
= ceph::coarse_mono_clock::now();
480 getdata(f
->vfs
).logger
->tinc(P_OPF_DEVICECHARACTERISTICS
, end
-start
);
484 static int Open(sqlite3_vfs
*vfs
, const char *name
, sqlite3_file
*file
,
485 int flags
, int *oflags
)
487 static const sqlite3_io_methods io
= {
492 Truncate
, /* xTruncate */
494 FileSize
, /* xFileSize */
496 Unlock
, /* xUnlock */
497 CheckReservedLock
, /* xCheckReservedLock */
498 FileControl
, /* xFileControl */
499 SectorSize
, /* xSectorSize */
500 DeviceCharacteristics
/* xDeviceCharacteristics */
503 auto start
= ceph::coarse_mono_clock::now();
505 auto& cluster
= getdata(vfs
).cluster
;
507 /* we are not going to create temporary files */
509 dv(-1) << " cannot open temporary database" << dendl
;
510 return SQLITE_CANTOPEN
;
512 auto path
= std::string_view(name
);
513 if (path
== ":memory:") {
514 dv(-1) << " cannot open temporary database" << dendl
;
518 dv(5) << path
<< " flags=" << std::hex
<< flags
<< dendl
;
520 auto f
= new (file
)cephsqlite_file();
522 if (!parsepath(path
, &f
->loc
)) {
523 ceph_assert(0); /* xFullPathname validates! */
528 if (int rc
= makestriper(vfs
, f
->loc
, &f
->io
); rc
< 0) {
529 f
->~cephsqlite_file();
530 dv(5) << "cannot open striper" << dendl
;
534 if (flags
& SQLITE_OPEN_CREATE
) {
535 dv(10) << "OPEN_CREATE" << dendl
;
536 if (int rc
= f
->io
.rs
->create(); rc
< 0 && rc
!= -EEXIST
) {
537 if (rc
== -ENOENT
&& !gotmap
) {
538 /* we may have an out of date OSDMap which cancels the op in the
539 * Objecter. Try to get a new one and retry. This is mostly noticable
540 * in testing when pools are getting created/deleted left and right.
542 dv(5) << "retrying create after getting latest OSDMap" << dendl
;
543 cluster
.wait_for_latest_osdmap();
547 dv(5) << "file cannot be created: " << cpp_strerror(rc
) << dendl
;
552 if (int rc
= f
->io
.rs
->open(); rc
< 0) {
553 if (rc
== -ENOENT
&& !gotmap
) {
554 /* See comment above for create case. */
555 dv(5) << "retrying open after getting latest OSDMap" << dendl
;
556 cluster
.wait_for_latest_osdmap();
560 dv(10) << "cannot open striper: " << cpp_strerror(rc
) << dendl
;
567 f
->base
.pMethods
= &io
;
568 auto end
= ceph::coarse_mono_clock::now();
569 getdata(vfs
).logger
->tinc(P_OP_OPEN
, end
-start
);
574 ** Delete the file identified by argument path. If the dsync parameter
575 ** is non-zero, then ensure the file-system modification to delete the
576 ** file has been synced to disk before returning.
578 static int Delete(sqlite3_vfs
* vfs
, const char* path
, int dsync
)
580 auto start
= ceph::coarse_mono_clock::now();
581 dv(5) << "'" << path
<< "', " << dsync
<< dendl
;
583 cephsqlite_fileloc fileloc
;
584 if (!parsepath(path
, &fileloc
)) {
585 dv(5) << "path does not parse!" << dendl
;
586 return SQLITE_NOTFOUND
;
589 cephsqlite_fileio io
;
590 if (int rc
= makestriper(vfs
, fileloc
, &io
); rc
< 0) {
591 dv(5) << "cannot open striper" << dendl
;
595 if (int rc
= io
.rs
->lock(0); rc
< 0) {
599 if (int rc
= io
.rs
->remove(); rc
< 0) {
600 dv(5) << "= " << rc
<< dendl
;
601 return SQLITE_IOERR_DELETE
;
604 /* No need to unlock */
605 dv(5) << "= 0" << dendl
;
606 auto end
= ceph::coarse_mono_clock::now();
607 getdata(vfs
).logger
->tinc(P_OP_DELETE
, end
-start
);
613 ** Query the file-system to see if the named file exists, is readable or
614 ** is both readable and writable.
616 static int Access(sqlite3_vfs
* vfs
, const char* path
, int flags
, int* result
)
618 auto start
= ceph::coarse_mono_clock::now();
619 dv(5) << path
<< " " << std::hex
<< flags
<< dendl
;
621 cephsqlite_fileloc fileloc
;
622 if (!parsepath(path
, &fileloc
)) {
623 dv(5) << "path does not parse!" << dendl
;
624 return SQLITE_NOTFOUND
;
627 cephsqlite_fileio io
;
628 if (int rc
= makestriper(vfs
, fileloc
, &io
); rc
< 0) {
629 dv(5) << "cannot open striper" << dendl
;
633 if (int rc
= io
.rs
->open(); rc
< 0) {
638 dv(10) << "cannot open striper: " << cpp_strerror(rc
) << dendl
;
645 if (int rc
= io
.rs
->stat(&size
); rc
< 0) {
646 dv(5) << "= " << rc
<< " (" << cpp_strerror(rc
) << ")" << dendl
;
649 dv(5) << "= 0" << dendl
;
653 auto end
= ceph::coarse_mono_clock::now();
654 getdata(vfs
).logger
->tinc(P_OP_ACCESS
, end
-start
);
658 /* This method is only called once for each database. It provides a chance to
659 * reformat the path into a canonical format.
661 static int FullPathname(sqlite3_vfs
* vfs
, const char* ipath
, int opathlen
, char* opath
)
663 auto start
= ceph::coarse_mono_clock::now();
664 auto path
= std::string_view(ipath
);
666 dv(5) << "1: " << path
<< dendl
;
668 cephsqlite_fileloc fileloc
;
669 if (!parsepath(path
, &fileloc
)) {
670 dv(5) << "path does not parse!" << dendl
;
671 return SQLITE_NOTFOUND
;
673 dv(5) << " parsed " << fileloc
<< dendl
;
675 auto p
= fmt::format("{}:{}/{}", fileloc
.pool
, fileloc
.radosns
, fileloc
.name
);
676 if (p
.size() >= (size_t)opathlen
) {
677 dv(5) << "path too long!" << dendl
;
678 return SQLITE_CANTOPEN
;
680 strcpy(opath
, p
.c_str());
681 dv(5) << " output " << p
<< dendl
;
683 auto end
= ceph::coarse_mono_clock::now();
684 getdata(vfs
).logger
->tinc(P_OP_FULLPATHNAME
, end
-start
);
688 static int CurrentTime(sqlite3_vfs
* vfs
, sqlite3_int64
* time
)
690 auto start
= ceph::coarse_mono_clock::now();
691 dv(5) << time
<< dendl
;
693 auto t
= ceph_clock_now();
694 *time
= t
.to_msec() + 2440587.5*86400000; /* julian days since 1970 converted to ms */
696 auto end
= ceph::coarse_mono_clock::now();
697 getdata(vfs
).logger
->tinc(P_OP_CURRENTTIME
, end
-start
);
701 LIBCEPHSQLITE_API
int cephsqlite_setcct(CephContext
* cct
, char** ident
)
703 ldout(cct
, 1) << "cct: " << cct
<< dendl
;
705 if (sqlite3_api
== nullptr) {
706 lderr(cct
) << "API violation: must have sqlite3 init libcephsqlite" << dendl
;
710 auto vfs
= sqlite3_vfs_find("ceph");
712 lderr(cct
) << "API violation: must have sqlite3 init libcephsqlite" << dendl
;
716 auto& appd
= getdata(vfs
);
718 if (int rc
= appd
.setup_perf(); rc
< 0) {
722 if (int rc
= appd
.init_cluster(); rc
< 0) {
727 auto s
= appd
.cluster
.get_addrs();
729 *ident
= strdup(s
.c_str());
732 ldout(cct
, 1) << "complete" << dendl
;
737 static void f_perf(sqlite3_context
* ctx
, int argc
, sqlite3_value
** argv
)
739 auto vfs
= (sqlite3_vfs
*)sqlite3_user_data(ctx
);
741 auto&& appd
= getdata(vfs
);
742 JSONFormatter
f(false);
743 f
.open_object_section("ceph_perf");
744 appd
.logger
->dump_formatted(&f
, false, false);
745 appd
.striper_logger
->dump_formatted(&f
, false, false);
748 CachedStackStringStream css
;
750 auto sv
= css
->strv();
751 dv(20) << " = " << sv
<< dendl
;
752 sqlite3_result_text(ctx
, sv
.data(), sv
.size(), SQLITE_TRANSIENT
);
756 static void f_status(sqlite3_context
* ctx
, int argc
, sqlite3_value
** argv
)
758 auto vfs
= (sqlite3_vfs
*)sqlite3_user_data(ctx
);
760 auto&& appd
= getdata(vfs
);
761 JSONFormatter
f(false);
762 f
.open_object_section("ceph_status");
763 f
.dump_int("id", appd
.cluster
.get_instance_id());
764 f
.dump_string("addr", appd
.cluster
.get_addrs());
767 CachedStackStringStream css
;
769 auto sv
= css
->strv();
770 dv(20) << " = " << sv
<< dendl
;
771 sqlite3_result_text(ctx
, sv
.data(), sv
.size(), SQLITE_TRANSIENT
);
775 static int autoreg(sqlite3
* db
, char** err
, const struct sqlite3_api_routines
* thunk
)
777 auto vfs
= sqlite3_vfs_find("ceph");
779 ceph_abort("ceph vfs not found");
782 if (int rc
= sqlite3_create_function(db
, "ceph_perf", 0, SQLITE_UTF8
, vfs
, f_perf
, nullptr, nullptr); rc
) {
786 if (int rc
= sqlite3_create_function(db
, "ceph_status", 0, SQLITE_UTF8
, vfs
, f_status
, nullptr, nullptr); rc
) {
793 /* You may wonder why we have an atexit handler? After all, atexit/exit creates
794 * a mess for multithreaded programs. Well, sqlite3 does not have an API for
795 * orderly removal of extensions. And, in fact, any API we might make
796 * unofficially (such as "sqlite3_cephsqlite_fini") would potentially race with
797 * other threads interacting with sqlite3 + the "ceph" VFS. There is a method
798 * for removing a VFS but it's not called by sqlite3 in any error scenario and
799 * there is no mechanism within sqlite3 to tell a VFS to unregister itself.
801 * This all would be mostly okay if /bin/sqlite3 did not call exit(3), but it
802 * does. (This occurs only for the sqlite3 binary, not when used as a library.)
803 * exit(3) calls destructors on all static-duration structures for the program.
804 * This breaks any outstanding threads created by the librados handle in all
805 * sorts of fantastic ways from C++ exceptions to memory faults. In general,
806 * Ceph libraries are not tolerant of exit(3) (_exit(3) is okay!). Applications
807 * must clean up after themselves or _exit(3).
809 * So, we have an atexit handler for libcephsqlite. This simply shuts down the
810 * RADOS handle. We can be assured that this occurs before any ceph library
811 * static-duration structures are destructed due to ordering guarantees by
812 * exit(3). Generally, we only see this called when the VFS is used by
813 * /bin/sqlite3 and only during sqlite3 error scenarios (like I/O errors
814 * arrising from blocklisting).
817 static void cephsqlite_atexit()
819 if (auto vfs
= sqlite3_vfs_find("ceph"); vfs
) {
821 auto&& appd
= getdata(vfs
);
823 vfs
->pAppData
= nullptr;
828 LIBCEPHSQLITE_API
int sqlite3_cephsqlite_init(sqlite3
* db
, char** err
, const sqlite3_api_routines
* api
)
830 SQLITE_EXTENSION_INIT2(api
);
832 auto vfs
= sqlite3_vfs_find("ceph");
834 vfs
= (sqlite3_vfs
*) calloc(1, sizeof(sqlite3_vfs
));
835 auto appd
= new cephsqlite_appdata
;
837 vfs
->szOsFile
= sizeof(struct cephsqlite_file
);
838 vfs
->mxPathname
= 4096;
840 vfs
->pAppData
= appd
;
842 vfs
->xDelete
= Delete
;
843 vfs
->xAccess
= Access
;
844 vfs
->xFullPathname
= FullPathname
;
845 vfs
->xCurrentTimeInt64
= CurrentTime
;
846 if (int rc
= sqlite3_vfs_register(vfs
, 0); rc
) {
853 if (int rc
= std::atexit(cephsqlite_atexit
); rc
) {
854 return SQLITE_INTERNAL
;
857 if (int rc
= sqlite3_auto_extension((void(*)(void))autoreg
); rc
) {
860 if (int rc
= autoreg(db
, err
, api
); rc
) {
864 return SQLITE_OK_LOAD_PERMANENTLY
;