]> git.proxmox.com Git - ceph.git/blob - ceph/src/libcephsqlite.cc
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / libcephsqlite.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 /*
5 * Ceph - scalable distributed file system
6 *
7 * Copyright (C) 2021 Red Hat, Inc.
8 *
9 * This is free software; you can redistribute it and/or modify it under the
10 * terms of the GNU Lesser General Public License version 2.1, as published by
11 * the Free Software Foundation. See file COPYING.
12 *
13 */
14
15 #include <boost/smart_ptr/intrusive_ptr.hpp>
16 #include <fmt/format.h>
17
18 #include <fcntl.h>
19 #include <stdio.h>
20 #include <sys/stat.h>
21 #include <sys/types.h>
22 #include <unistd.h>
23
24 #include <iomanip>
25 #include <iostream>
26 #include <regex>
27 #include <sstream>
28 #include <string_view>
29
30 #include <limits.h>
31 #include <string.h>
32
33 #include <sqlite3ext.h>
34 SQLITE_EXTENSION_INIT1
35
36 #include "include/ceph_assert.h"
37 #include "include/rados/librados.hpp"
38
39 #include "common/Clock.h"
40 #include "common/Formatter.h"
41 #include "common/ceph_argparse.h"
42 #include "common/ceph_mutex.h"
43 #include "common/common_init.h"
44 #include "common/config.h"
45 #include "common/debug.h"
46 #include "common/errno.h"
47 #include "common/perf_counters.h"
48 #include "common/version.h"
49
50 #include "include/libcephsqlite.h"
51 #include "SimpleRADOSStriper.h"
52
53 #define dout_subsys ceph_subsys_cephsqlite
54 #undef dout_prefix
55 #define dout_prefix *_dout << "cephsqlite: " << __func__ << ": "
56 #define d(vfs,lvl) ldout(getcct(vfs), (lvl)) << "(client." << getdata(vfs).cluster.get_instance_id() << ") "
57 #define dv(lvl) d(vfs,(lvl))
58 #define df(lvl) d(f->vfs,(lvl)) << f->loc << " "
59
60 enum {
61 P_FIRST = 0xf0000,
62 P_OP_OPEN,
63 P_OP_DELETE,
64 P_OP_ACCESS,
65 P_OP_FULLPATHNAME,
66 P_OP_CURRENTTIME,
67 P_OPF_CLOSE,
68 P_OPF_READ,
69 P_OPF_WRITE,
70 P_OPF_TRUNCATE,
71 P_OPF_SYNC,
72 P_OPF_FILESIZE,
73 P_OPF_LOCK,
74 P_OPF_UNLOCK,
75 P_OPF_CHECKRESERVEDLOCK,
76 P_OPF_FILECONTROL,
77 P_OPF_SECTORSIZE,
78 P_OPF_DEVICECHARACTERISTICS,
79 P_LAST,
80 };
81
82 struct cephsqlite_appdata {
83 ~cephsqlite_appdata() {
84 if (logger) {
85 cct->get_perfcounters_collection()->remove(logger.get());
86 }
87 if (striper_logger) {
88 cct->get_perfcounters_collection()->remove(striper_logger.get());
89 }
90 }
91 int setup_perf() {
92 ceph_assert(cct);
93 PerfCountersBuilder plb(cct.get(), "libcephsqlite_vfs", P_FIRST, P_LAST);
94 plb.add_time_avg(P_OP_OPEN, "op_open", "Time average of Open operations");
95 plb.add_time_avg(P_OP_DELETE, "op_delete", "Time average of Delete operations");
96 plb.add_time_avg(P_OP_ACCESS, "op_access", "Time average of Access operations");
97 plb.add_time_avg(P_OP_FULLPATHNAME, "op_fullpathname", "Time average of FullPathname operations");
98 plb.add_time_avg(P_OP_CURRENTTIME, "op_currenttime", "Time average of Currenttime operations");
99 plb.add_time_avg(P_OPF_CLOSE, "opf_close", "Time average of Close file operations");
100 plb.add_time_avg(P_OPF_READ, "opf_read", "Time average of Read file operations");
101 plb.add_time_avg(P_OPF_WRITE, "opf_write", "Time average of Write file operations");
102 plb.add_time_avg(P_OPF_TRUNCATE, "opf_truncate", "Time average of Truncate file operations");
103 plb.add_time_avg(P_OPF_SYNC, "opf_sync", "Time average of Sync file operations");
104 plb.add_time_avg(P_OPF_FILESIZE, "opf_filesize", "Time average of FileSize file operations");
105 plb.add_time_avg(P_OPF_LOCK, "opf_lock", "Time average of Lock file operations");
106 plb.add_time_avg(P_OPF_UNLOCK, "opf_unlock", "Time average of Unlock file operations");
107 plb.add_time_avg(P_OPF_CHECKRESERVEDLOCK, "opf_checkreservedlock", "Time average of CheckReservedLock file operations");
108 plb.add_time_avg(P_OPF_FILECONTROL, "opf_filecontrol", "Time average of FileControl file operations");
109 plb.add_time_avg(P_OPF_SECTORSIZE, "opf_sectorsize", "Time average of SectorSize file operations");
110 plb.add_time_avg(P_OPF_DEVICECHARACTERISTICS, "opf_devicecharacteristics", "Time average of DeviceCharacteristics file operations");
111 logger.reset(plb.create_perf_counters());
112 if (int rc = SimpleRADOSStriper::config_logger(cct.get(), "libcephsqlite_striper", &striper_logger); rc < 0) {
113 return rc;
114 }
115 cct->get_perfcounters_collection()->add(logger.get());
116 cct->get_perfcounters_collection()->add(striper_logger.get());
117 return 0;
118 }
119 int init_cluster() {
120 ceph_assert(cct);
121 ldout(cct, 5) << "initializing RADOS handle as " << cct->_conf->name << dendl;
122 if (int rc = cluster.init_with_context(cct.get()); rc < 0) {
123 lderr(cct) << "cannot initialize RADOS: " << cpp_strerror(rc) << dendl;
124 return rc;
125 }
126 if (int rc = cluster.connect(); rc < 0) {
127 lderr(cct) << "cannot connect: " << cpp_strerror(rc) << dendl;
128 return rc;
129 }
130 auto s = cluster.get_addrs();
131 ldout(cct, 5) << "completed connection to RADOS with address " << s << dendl;
132 return 0;
133 }
134
135 boost::intrusive_ptr<CephContext> cct;
136 std::unique_ptr<PerfCounters> logger;
137 std::shared_ptr<PerfCounters> striper_logger;
138 librados::Rados cluster;
139 struct sqlite3_vfs vfs{};
140 };
141
142 struct cephsqlite_fileloc {
143 std::string pool;
144 std::string radosns;
145 std::string name;
146 };
147
148 struct cephsqlite_fileio {
149 librados::IoCtx ioctx;
150 std::unique_ptr<SimpleRADOSStriper> rs;
151 };
152
153 std::ostream& operator<<(std::ostream &out, const cephsqlite_fileloc& fileloc) {
154 return out
155 << "["
156 << fileloc.pool
157 << ":"
158 << fileloc.radosns
159 << "/"
160 << fileloc.name
161 << "]"
162 ;
163 }
164
165 struct cephsqlite_file {
166 sqlite3_file base;
167 struct sqlite3_vfs* vfs = nullptr;
168 int flags = 0;
169 // There are 5 lock states: https://sqlite.org/c3ref/c_lock_exclusive.html
170 int lock = 0;
171 struct cephsqlite_fileloc loc{};
172 struct cephsqlite_fileio io{};
173 };
174
175
176 #define getdata(vfs) (*((cephsqlite_appdata*)((vfs)->pAppData)))
177
178 static CephContext* getcct(sqlite3_vfs* vfs)
179 {
180 auto&& appd = getdata(vfs);
181 auto& cct = appd.cct;
182 if (cct) {
183 return cct.get();
184 }
185
186 /* bootstrap cct */
187 std::vector<const char*> env_args;
188 env_to_vec(env_args, "CEPH_ARGS");
189 std::string cluster, conf_file_list; // unused
190 CephInitParameters iparams = ceph_argparse_early_args(env_args, CEPH_ENTITY_TYPE_CLIENT, &cluster, &conf_file_list);
191 cct = boost::intrusive_ptr<CephContext>(common_preinit(iparams, CODE_ENVIRONMENT_LIBRARY, 0), false);
192 cct->_conf.parse_config_files(nullptr, &std::cerr, 0);
193 cct->_conf.parse_env(cct->get_module_type()); // environment variables override
194 cct->_conf.apply_changes(nullptr);
195 common_init_finish(cct.get());
196
197 if (int rc = appd.setup_perf(); rc < 0) {
198 ceph_abort("cannot setup perf counters");
199 }
200
201 if (int rc = appd.init_cluster(); rc < 0) {
202 ceph_abort("cannot setup RADOS cluster handle");
203 }
204
205 return cct.get();
206 }
207
208 static int Lock(sqlite3_file *file, int ilock)
209 {
210 auto f = (cephsqlite_file*)file;
211 auto start = ceph::coarse_mono_clock::now();
212 df(5) << std::hex << ilock << dendl;
213
214 auto& lock = f->lock;
215 ceph_assert(!f->io.rs->is_locked() || lock > SQLITE_LOCK_NONE);
216 ceph_assert(lock <= ilock);
217 if (!f->io.rs->is_locked() && ilock > SQLITE_LOCK_NONE) {
218 if (int rc = f->io.rs->lock(0); rc < 0) {
219 df(5) << "failed: " << rc << dendl;
220 return SQLITE_IOERR;
221 }
222 }
223
224 lock = ilock;
225 auto end = ceph::coarse_mono_clock::now();
226 getdata(f->vfs).logger->tinc(P_OPF_LOCK, end-start);
227 return SQLITE_OK;
228 }
229
230 static int Unlock(sqlite3_file *file, int ilock)
231 {
232 auto f = (cephsqlite_file*)file;
233 auto start = ceph::coarse_mono_clock::now();
234 df(5) << std::hex << ilock << dendl;
235
236 auto& lock = f->lock;
237 ceph_assert(lock == SQLITE_LOCK_NONE || (lock > SQLITE_LOCK_NONE && f->io.rs->is_locked()));
238 ceph_assert(lock >= ilock);
239 if (ilock <= SQLITE_LOCK_NONE && SQLITE_LOCK_NONE < lock) {
240 if (int rc = f->io.rs->unlock(); rc < 0) {
241 df(5) << "failed: " << rc << dendl;
242 return SQLITE_IOERR;
243 }
244 }
245
246 lock = ilock;
247 auto end = ceph::coarse_mono_clock::now();
248 getdata(f->vfs).logger->tinc(P_OPF_UNLOCK, end-start);
249 return SQLITE_OK;
250 }
251
252 static int CheckReservedLock(sqlite3_file *file, int *result)
253 {
254 auto f = (cephsqlite_file*)file;
255 auto start = ceph::coarse_mono_clock::now();
256 df(5) << dendl;
257
258 auto& lock = f->lock;
259 if (lock > SQLITE_LOCK_SHARED) {
260 *result = 1;
261 }
262
263 df(10);
264 f->io.rs->print_lockers(*_dout);
265 *_dout << dendl;
266
267 *result = 0;
268 auto end = ceph::coarse_mono_clock::now();
269 getdata(f->vfs).logger->tinc(P_OPF_CHECKRESERVEDLOCK, end-start);
270 return SQLITE_OK;
271 }
272
273 static int Close(sqlite3_file *file)
274 {
275 auto f = (cephsqlite_file*)file;
276 auto start = ceph::coarse_mono_clock::now();
277 df(5) << dendl;
278 f->~cephsqlite_file();
279 auto end = ceph::coarse_mono_clock::now();
280 getdata(f->vfs).logger->tinc(P_OPF_CLOSE, end-start);
281 return SQLITE_OK;
282 }
283
284 static int Read(sqlite3_file *file, void *buf, int len, sqlite_int64 off)
285 {
286 auto f = (cephsqlite_file*)file;
287 auto start = ceph::coarse_mono_clock::now();
288 df(5) << buf << " " << off << "~" << len << dendl;
289
290 if (int rc = f->io.rs->read(buf, len, off); rc < 0) {
291 df(5) << "read failed: " << cpp_strerror(rc) << dendl;
292 return SQLITE_IOERR_READ;
293 } else {
294 df(5) << "= " << rc << dendl;
295 auto end = ceph::coarse_mono_clock::now();
296 getdata(f->vfs).logger->tinc(P_OPF_READ, end-start);
297 if (rc < len) {
298 memset(buf, 0, len-rc);
299 return SQLITE_IOERR_SHORT_READ;
300 } else {
301 return SQLITE_OK;
302 }
303 }
304 }
305
306 static int Write(sqlite3_file *file, const void *buf, int len, sqlite_int64 off)
307 {
308 auto f = (cephsqlite_file*)file;
309 auto start = ceph::coarse_mono_clock::now();
310 df(5) << off << "~" << len << dendl;
311
312 if (int rc = f->io.rs->write(buf, len, off); rc < 0) {
313 df(5) << "write failed: " << cpp_strerror(rc) << dendl;
314 return SQLITE_IOERR_WRITE;
315 } else {
316 df(5) << "= " << rc << dendl;
317 auto end = ceph::coarse_mono_clock::now();
318 getdata(f->vfs).logger->tinc(P_OPF_WRITE, end-start);
319 return SQLITE_OK;
320 }
321
322 }
323
324 static int Truncate(sqlite3_file *file, sqlite_int64 size)
325 {
326 auto f = (cephsqlite_file*)file;
327 auto start = ceph::coarse_mono_clock::now();
328 df(5) << size << dendl;
329
330 if (int rc = f->io.rs->truncate(size); rc < 0) {
331 df(5) << "truncate failed: " << cpp_strerror(rc) << dendl;
332 return SQLITE_IOERR;
333 }
334
335 auto end = ceph::coarse_mono_clock::now();
336 getdata(f->vfs).logger->tinc(P_OPF_TRUNCATE, end-start);
337 return SQLITE_OK;
338 }
339
340 static int Sync(sqlite3_file *file, int flags)
341 {
342 auto f = (cephsqlite_file*)file;
343 auto start = ceph::coarse_mono_clock::now();
344 df(5) << flags << dendl;
345
346 if (int rc = f->io.rs->flush(); rc < 0) {
347 df(5) << "failed: " << cpp_strerror(rc) << dendl;
348 return SQLITE_IOERR;
349 }
350
351 df(5) << " = 0" << dendl;
352
353 auto end = ceph::coarse_mono_clock::now();
354 getdata(f->vfs).logger->tinc(P_OPF_SYNC, end-start);
355 return SQLITE_OK;
356 }
357
358
359 static int FileSize(sqlite3_file *file, sqlite_int64 *osize)
360 {
361 auto f = (cephsqlite_file*)file;
362 auto start = ceph::coarse_mono_clock::now();
363 df(5) << dendl;
364
365 uint64_t size = 0;
366 if (int rc = f->io.rs->stat(&size); rc < 0) {
367 df(5) << "stat failed: " << cpp_strerror(rc) << dendl;
368 return SQLITE_NOTFOUND;
369 }
370
371 *osize = (sqlite_int64)size;
372
373 df(5) << "= " << size << dendl;
374
375 auto end = ceph::coarse_mono_clock::now();
376 getdata(f->vfs).logger->tinc(P_OPF_FILESIZE, end-start);
377 return SQLITE_OK;
378 }
379
380
381 static bool parsepath(std::string_view path, struct cephsqlite_fileloc* fileloc)
382 {
383 static const std::regex re1{"^/*(\\*[[:digit:]]+):([[:alnum:]-_.]*)/([[:alnum:]-._]+)$"};
384 static const std::regex re2{"^/*([[:alnum:]-_.]+):([[:alnum:]-_.]*)/([[:alnum:]-._]+)$"};
385
386 std::cmatch cm;
387 if (!std::regex_match(path.data(), cm, re1)) {
388 if (!std::regex_match(path.data(), cm, re2)) {
389 return false;
390 }
391 }
392 fileloc->pool = cm[1];
393 fileloc->radosns = cm[2];
394 fileloc->name = cm[3];
395
396 return true;
397 }
398
399 static int makestriper(sqlite3_vfs* vfs, const cephsqlite_fileloc& loc, cephsqlite_fileio* io)
400 {
401 auto&& appd = getdata(vfs);
402 auto& cct = appd.cct;
403 auto& cluster = appd.cluster;
404 bool gotmap = false;
405
406 dv(10) << loc << dendl;
407
408 enoent_retry:
409 if (loc.pool[0] == '*') {
410 std::string err;
411 int64_t id = strict_strtoll(loc.pool.c_str()+1, 10, &err);
412 ceph_assert(err.empty());
413 if (int rc = cluster.ioctx_create2(id, io->ioctx); rc < 0) {
414 if (rc == -ENOENT && !gotmap) {
415 cluster.wait_for_latest_osdmap();
416 gotmap = true;
417 goto enoent_retry;
418 }
419 dv(10) << "cannot create ioctx: " << cpp_strerror(rc) << dendl;
420 return rc;
421 }
422 } else {
423 if (int rc = cluster.ioctx_create(loc.pool.c_str(), io->ioctx); rc < 0) {
424 if (rc == -ENOENT && !gotmap) {
425 cluster.wait_for_latest_osdmap();
426 gotmap = true;
427 goto enoent_retry;
428 }
429 dv(10) << "cannot create ioctx: " << cpp_strerror(rc) << dendl;
430 return rc;
431 }
432 }
433
434 if (!loc.radosns.empty())
435 io->ioctx.set_namespace(loc.radosns);
436
437 io->rs = std::make_unique<SimpleRADOSStriper>(io->ioctx, loc.name);
438 io->rs->set_logger(appd.striper_logger);
439 io->rs->set_lock_timeout(cct->_conf.get_val<std::chrono::milliseconds>("cephsqlite_lock_renewal_timeout"));
440 io->rs->set_lock_interval(cct->_conf.get_val<std::chrono::milliseconds>("cephsqlite_lock_renewal_interval"));
441 io->rs->set_blocklist_the_dead(cct->_conf.get_val<bool>("cephsqlite_blocklist_dead_locker"));
442
443 return 0;
444 }
445
446 static int SectorSize(sqlite3_file* sf)
447 {
448 static const int size = 65536;
449 auto start = ceph::coarse_mono_clock::now();
450 auto f = (cephsqlite_file*)sf;
451 df(5) << " = " << size << dendl;
452 auto end = ceph::coarse_mono_clock::now();
453 getdata(f->vfs).logger->tinc(P_OPF_SECTORSIZE, end-start);
454 return size;
455 }
456
457 static int FileControl(sqlite3_file* sf, int op, void *arg)
458 {
459 auto f = (cephsqlite_file*)sf;
460 auto start = ceph::coarse_mono_clock::now();
461 df(5) << op << ", " << arg << dendl;
462 auto end = ceph::coarse_mono_clock::now();
463 getdata(f->vfs).logger->tinc(P_OPF_FILECONTROL, end-start);
464 return SQLITE_NOTFOUND;
465 }
466
467 static int DeviceCharacteristics(sqlite3_file* sf)
468 {
469 auto f = (cephsqlite_file*)sf;
470 auto start = ceph::coarse_mono_clock::now();
471 df(5) << dendl;
472 static const int c = 0
473 |SQLITE_IOCAP_ATOMIC
474 |SQLITE_IOCAP_POWERSAFE_OVERWRITE
475 |SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN
476 |SQLITE_IOCAP_SAFE_APPEND
477 ;
478 auto end = ceph::coarse_mono_clock::now();
479 getdata(f->vfs).logger->tinc(P_OPF_DEVICECHARACTERISTICS, end-start);
480 return c;
481 }
482
483 static int Open(sqlite3_vfs *vfs, const char *name, sqlite3_file *file,
484 int flags, int *oflags)
485 {
486 static const sqlite3_io_methods io = {
487 1, /* iVersion */
488 Close, /* xClose */
489 Read, /* xRead */
490 Write, /* xWrite */
491 Truncate, /* xTruncate */
492 Sync, /* xSync */
493 FileSize, /* xFileSize */
494 Lock, /* xLock */
495 Unlock, /* xUnlock */
496 CheckReservedLock, /* xCheckReservedLock */
497 FileControl, /* xFileControl */
498 SectorSize, /* xSectorSize */
499 DeviceCharacteristics /* xDeviceCharacteristics */
500 };
501
502 auto start = ceph::coarse_mono_clock::now();
503 bool gotmap = false;
504 auto& cluster = getdata(vfs).cluster;
505
506 /* we are not going to create temporary files */
507 if (name == NULL) {
508 dv(-1) << " cannot open temporary database" << dendl;
509 return SQLITE_CANTOPEN;
510 }
511 auto path = std::string_view(name);
512 if (path == ":memory:"sv) {
513 dv(-1) << " cannot open temporary database" << dendl;
514 return SQLITE_IOERR;
515 }
516
517 dv(5) << path << " flags=" << std::hex << flags << dendl;
518
519 auto f = new (file)cephsqlite_file();
520 f->vfs = vfs;
521 if (!parsepath(path, &f->loc)) {
522 ceph_assert(0); /* xFullPathname validates! */
523 }
524 f->flags = flags;
525
526 enoent_retry:
527 if (int rc = makestriper(vfs, f->loc, &f->io); rc < 0) {
528 f->~cephsqlite_file();
529 dv(5) << "cannot open striper" << dendl;
530 return SQLITE_IOERR;
531 }
532
533 if (flags & SQLITE_OPEN_CREATE) {
534 dv(10) << "OPEN_CREATE" << dendl;
535 if (int rc = f->io.rs->create(); rc < 0 && rc != -EEXIST) {
536 if (rc == -ENOENT && !gotmap) {
537 /* we may have an out of date OSDMap which cancels the op in the
538 * Objecter. Try to get a new one and retry. This is mostly noticable
539 * in testing when pools are getting created/deleted left and right.
540 */
541 dv(5) << "retrying create after getting latest OSDMap" << dendl;
542 cluster.wait_for_latest_osdmap();
543 gotmap = true;
544 goto enoent_retry;
545 }
546 dv(5) << "file cannot be created: " << cpp_strerror(rc) << dendl;
547 return SQLITE_IOERR;
548 }
549 }
550
551 if (int rc = f->io.rs->open(); rc < 0) {
552 if (rc == -ENOENT && !gotmap) {
553 /* See comment above for create case. */
554 dv(5) << "retrying open after getting latest OSDMap" << dendl;
555 cluster.wait_for_latest_osdmap();
556 gotmap = true;
557 goto enoent_retry;
558 }
559 dv(10) << "cannot open striper: " << cpp_strerror(rc) << dendl;
560 return rc;
561 }
562
563 if (oflags) {
564 *oflags = flags;
565 }
566 f->base.pMethods = &io;
567 auto end = ceph::coarse_mono_clock::now();
568 getdata(vfs).logger->tinc(P_OP_OPEN, end-start);
569 return SQLITE_OK;
570 }
571
572 /*
573 ** Delete the file identified by argument path. If the dsync parameter
574 ** is non-zero, then ensure the file-system modification to delete the
575 ** file has been synced to disk before returning.
576 */
577 static int Delete(sqlite3_vfs* vfs, const char* path, int dsync)
578 {
579 auto start = ceph::coarse_mono_clock::now();
580 dv(5) << "'" << path << "', " << dsync << dendl;
581
582 cephsqlite_fileloc fileloc;
583 if (!parsepath(path, &fileloc)) {
584 dv(5) << "path does not parse!" << dendl;
585 return SQLITE_NOTFOUND;
586 }
587
588 cephsqlite_fileio io;
589 if (int rc = makestriper(vfs, fileloc, &io); rc < 0) {
590 dv(5) << "cannot open striper" << dendl;
591 return SQLITE_IOERR;
592 }
593
594 if (int rc = io.rs->lock(0); rc < 0) {
595 return SQLITE_IOERR;
596 }
597
598 if (int rc = io.rs->remove(); rc < 0) {
599 dv(5) << "= " << rc << dendl;
600 return SQLITE_IOERR_DELETE;
601 }
602
603 /* No need to unlock */
604 dv(5) << "= 0" << dendl;
605 auto end = ceph::coarse_mono_clock::now();
606 getdata(vfs).logger->tinc(P_OP_DELETE, end-start);
607
608 return SQLITE_OK;
609 }
610
611 /*
612 ** Query the file-system to see if the named file exists, is readable or
613 ** is both readable and writable.
614 */
615 static int Access(sqlite3_vfs* vfs, const char* path, int flags, int* result)
616 {
617 auto start = ceph::coarse_mono_clock::now();
618 dv(5) << path << " " << std::hex << flags << dendl;
619
620 cephsqlite_fileloc fileloc;
621 if (!parsepath(path, &fileloc)) {
622 dv(5) << "path does not parse!" << dendl;
623 return SQLITE_NOTFOUND;
624 }
625
626 cephsqlite_fileio io;
627 if (int rc = makestriper(vfs, fileloc, &io); rc < 0) {
628 dv(5) << "cannot open striper" << dendl;
629 return SQLITE_IOERR;
630 }
631
632 if (int rc = io.rs->open(); rc < 0) {
633 if (rc == -ENOENT) {
634 *result = 0;
635 return SQLITE_OK;
636 } else {
637 dv(10) << "cannot open striper: " << cpp_strerror(rc) << dendl;
638 *result = 0;
639 return SQLITE_IOERR;
640 }
641 }
642
643 uint64_t size = 0;
644 if (int rc = io.rs->stat(&size); rc < 0) {
645 dv(5) << "= " << rc << " (" << cpp_strerror(rc) << ")" << dendl;
646 *result = 0;
647 } else {
648 dv(5) << "= 0" << dendl;
649 *result = 1;
650 }
651
652 auto end = ceph::coarse_mono_clock::now();
653 getdata(vfs).logger->tinc(P_OP_ACCESS, end-start);
654 return SQLITE_OK;
655 }
656
657 /* This method is only called once for each database. It provides a chance to
658 * reformat the path into a canonical format.
659 */
660 static int FullPathname(sqlite3_vfs* vfs, const char* ipath, int opathlen, char* opath)
661 {
662 auto start = ceph::coarse_mono_clock::now();
663 auto path = std::string_view(ipath);
664
665 dv(5) << "1: " << path << dendl;
666
667 cephsqlite_fileloc fileloc;
668 if (!parsepath(path, &fileloc)) {
669 dv(5) << "path does not parse!" << dendl;
670 return SQLITE_NOTFOUND;
671 }
672 dv(5) << " parsed " << fileloc << dendl;
673
674 auto p = fmt::format("{}:{}/{}", fileloc.pool, fileloc.radosns, fileloc.name);
675 if (p.size() >= (size_t)opathlen) {
676 dv(5) << "path too long!" << dendl;
677 return SQLITE_CANTOPEN;
678 }
679 strcpy(opath, p.c_str());
680 dv(5) << " output " << p << dendl;
681
682 auto end = ceph::coarse_mono_clock::now();
683 getdata(vfs).logger->tinc(P_OP_FULLPATHNAME, end-start);
684 return SQLITE_OK;
685 }
686
687 static int CurrentTime(sqlite3_vfs* vfs, sqlite3_int64* time)
688 {
689 auto start = ceph::coarse_mono_clock::now();
690 dv(5) << time << dendl;
691
692 auto t = ceph_clock_now();
693 *time = t.to_msec() + 2440587.5*86400000; /* julian days since 1970 converted to ms */
694
695 auto end = ceph::coarse_mono_clock::now();
696 getdata(vfs).logger->tinc(P_OP_CURRENTTIME, end-start);
697 return SQLITE_OK;
698 }
699
700 LIBCEPHSQLITE_API int cephsqlite_setcct(CephContext* cct, char** ident)
701 {
702 ldout(cct, 1) << "cct: " << cct << dendl;
703
704 if (sqlite3_api == nullptr) {
705 lderr(cct) << "API violation: must have sqlite3 init libcephsqlite" << dendl;
706 return -EINVAL;
707 }
708
709 auto vfs = sqlite3_vfs_find("ceph");
710 if (!vfs) {
711 lderr(cct) << "API violation: must have sqlite3 init libcephsqlite" << dendl;
712 return -EINVAL;
713 }
714
715 auto& appd = getdata(vfs);
716 appd.cct = cct;
717 if (int rc = appd.setup_perf(); rc < 0) {
718 appd.cct = nullptr;
719 return rc;
720 }
721 if (int rc = appd.init_cluster(); rc < 0) {
722 appd.cct = nullptr;
723 return rc;
724 }
725
726 auto s = appd.cluster.get_addrs();
727 if (ident) {
728 *ident = strdup(s.c_str());
729 }
730
731 ldout(cct, 1) << "complete" << dendl;
732
733 return 0;
734 }
735
736 static void f_perf(sqlite3_context* ctx, int argc, sqlite3_value** argv)
737 {
738 auto vfs = (sqlite3_vfs*)sqlite3_user_data(ctx);
739 dv(10) << dendl;
740 auto&& appd = getdata(vfs);
741 JSONFormatter f(false);
742 f.open_object_section("ceph_perf");
743 appd.logger->dump_formatted(&f, false);
744 appd.striper_logger->dump_formatted(&f, false);
745 f.close_section();
746 {
747 CachedStackStringStream css;
748 f.flush(*css);
749 auto sv = css->strv();
750 dv(20) << " = " << sv << dendl;
751 sqlite3_result_text(ctx, sv.data(), sv.size(), SQLITE_TRANSIENT);
752 }
753 }
754
755 static void f_status(sqlite3_context* ctx, int argc, sqlite3_value** argv)
756 {
757 auto vfs = (sqlite3_vfs*)sqlite3_user_data(ctx);
758 dv(10) << dendl;
759 auto&& appd = getdata(vfs);
760 JSONFormatter f(false);
761 f.open_object_section("ceph_status");
762 f.dump_int("id", appd.cluster.get_instance_id());
763 f.dump_string("addr", appd.cluster.get_addrs());
764 f.close_section();
765 {
766 CachedStackStringStream css;
767 f.flush(*css);
768 auto sv = css->strv();
769 dv(20) << " = " << sv << dendl;
770 sqlite3_result_text(ctx, sv.data(), sv.size(), SQLITE_TRANSIENT);
771 }
772 }
773
774 static int autoreg(sqlite3* db, char** err, const struct sqlite3_api_routines* thunk)
775 {
776 auto vfs = sqlite3_vfs_find("ceph");
777 if (!vfs) {
778 ceph_abort("ceph vfs not found");
779 }
780
781 if (int rc = sqlite3_create_function(db, "ceph_perf", 0, SQLITE_UTF8, vfs, f_perf, nullptr, nullptr); rc) {
782 return rc;
783 }
784
785 if (int rc = sqlite3_create_function(db, "ceph_status", 0, SQLITE_UTF8, vfs, f_status, nullptr, nullptr); rc) {
786 return rc;
787 }
788
789 return SQLITE_OK;
790 }
791
792 LIBCEPHSQLITE_API int sqlite3_cephsqlite_init(sqlite3* db, char** err, const sqlite3_api_routines* api)
793 {
794 SQLITE_EXTENSION_INIT2(api);
795
796 auto vfs = sqlite3_vfs_find("ceph");
797 if (!vfs) {
798 auto appd = new cephsqlite_appdata;
799 vfs = &appd->vfs;
800 vfs->iVersion = 2;
801 vfs->szOsFile = sizeof(struct cephsqlite_file);
802 vfs->mxPathname = 4096;
803 vfs->zName = "ceph";
804 vfs->pAppData = appd;
805 vfs->xOpen = Open;
806 vfs->xDelete = Delete;
807 vfs->xAccess = Access;
808 vfs->xFullPathname = FullPathname;
809 vfs->xCurrentTimeInt64 = CurrentTime;
810 appd->cct = nullptr;
811 sqlite3_vfs_register(vfs, 0);
812 }
813
814 if (int rc = sqlite3_auto_extension((void(*)(void))autoreg); rc) {
815 return rc;
816 }
817 if (int rc = autoreg(db, err, api); rc) {
818 return rc;
819 }
820
821 return SQLITE_OK_LOAD_PERMANENTLY;
822 }