]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/rgw_file.h
import ceph 14.2.5
[ceph.git] / ceph / src / rgw / rgw_file.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #ifndef RGW_FILE_H
5 #define RGW_FILE_H
6
7 #include "include/rados/rgw_file.h"
8
9 /* internal header */
10 #include <string.h>
11 #include <sys/stat.h>
12 #include <stdint.h>
13
14 #include <atomic>
15 #include <chrono>
16 #include <thread>
17 #include <mutex>
18 #include <vector>
19 #include <deque>
20 #include <algorithm>
21 #include <functional>
22 #include <boost/intrusive_ptr.hpp>
23 #include <boost/range/adaptor/reversed.hpp>
24 #include <boost/container/flat_map.hpp>
25 #include <boost/variant.hpp>
26 #include <boost/utility/string_ref.hpp>
27 #include <boost/optional.hpp>
28 #include "xxhash.h"
29 #include "include/buffer.h"
30 #include "common/cohort_lru.h"
31 #include "common/ceph_timer.h"
32 #include "rgw_common.h"
33 #include "rgw_user.h"
34 #include "rgw_lib.h"
35 #include "rgw_ldap.h"
36 #include "rgw_token.h"
37 #include "rgw_putobj_processor.h"
38 #include "rgw_aio_throttle.h"
39 #include "rgw_compression.h"
40
41
42 /* XXX
43 * ASSERT_H somehow not defined after all the above (which bring
44 * in common/debug.h [e.g., dout])
45 */
46 #include "include/ceph_assert.h"
47
48
49 #define RGW_RWXMODE (S_IRWXU | S_IRWXG | S_IRWXO)
50
51 #define RGW_RWMODE (RGW_RWXMODE & \
52 ~(S_IXUSR | S_IXGRP | S_IXOTH))
53
54
55 namespace rgw {
56
57 template <typename T>
58 static inline void ignore(T &&) {}
59
60
61 namespace bi = boost::intrusive;
62
63 class RGWLibFS;
64 class RGWFileHandle;
65 class RGWWriteRequest;
66
67 static inline bool operator <(const struct timespec& lhs,
68 const struct timespec& rhs) {
69 if (lhs.tv_sec == rhs.tv_sec)
70 return lhs.tv_nsec < rhs.tv_nsec;
71 else
72 return lhs.tv_sec < rhs.tv_sec;
73 }
74
75 static inline bool operator ==(const struct timespec& lhs,
76 const struct timespec& rhs) {
77 return ((lhs.tv_sec == rhs.tv_sec) &&
78 (lhs.tv_nsec == rhs.tv_nsec));
79 }
80
81 /*
82 * XXX
83 * The current 64-bit, non-cryptographic hash used here is intended
84 * for prototyping only.
85 *
86 * However, the invariant being prototyped is that objects be
87 * identifiable by their hash components alone. We believe this can
88 * be legitimately implemented using 128-hash values for bucket and
89 * object components, together with a cluster-resident cryptographic
90 * key. Since an MD5 or SHA-1 key is 128 bits and the (fast),
91 * non-cryptographic CityHash128 hash algorithm takes a 128-bit seed,
92 * speculatively we could use that for the final hash computations.
93 */
94 struct fh_key
95 {
96 rgw_fh_hk fh_hk {};
97 uint32_t version;
98
99 static constexpr uint64_t seed = 8675309;
100
101 fh_key() : version(0) {}
102
103 fh_key(const rgw_fh_hk& _hk)
104 : fh_hk(_hk), version(0) {
105 // nothing
106 }
107
108 fh_key(const uint64_t bk, const uint64_t ok)
109 : version(0) {
110 fh_hk.bucket = bk;
111 fh_hk.object = ok;
112 }
113
114 fh_key(const uint64_t bk, const char *_o, const std::string& _t)
115 : version(0) {
116 fh_hk.bucket = bk;
117 std::string to = _t + ":" + _o;
118 fh_hk.object = XXH64(to.c_str(), to.length(), seed);
119 }
120
121 fh_key(const std::string& _b, const std::string& _o,
122 const std::string& _t /* tenant */)
123 : version(0) {
124 std::string tb = _t + ":" + _b;
125 std::string to = _t + ":" + _o;
126 fh_hk.bucket = XXH64(tb.c_str(), tb.length(), seed);
127 fh_hk.object = XXH64(to.c_str(), to.length(), seed);
128 }
129
130 void encode(buffer::list& bl) const {
131 ENCODE_START(2, 1, bl);
132 encode(fh_hk.bucket, bl);
133 encode(fh_hk.object, bl);
134 encode((uint32_t)2, bl);
135 ENCODE_FINISH(bl);
136 }
137
138 void decode(bufferlist::const_iterator& bl) {
139 DECODE_START(2, bl);
140 decode(fh_hk.bucket, bl);
141 decode(fh_hk.object, bl);
142 if (struct_v >= 2) {
143 decode(version, bl);
144 }
145 DECODE_FINISH(bl);
146 }
147
148 friend std::ostream& operator<<(std::ostream &os, fh_key const &fhk);
149
150 }; /* fh_key */
151
152 WRITE_CLASS_ENCODER(fh_key);
153
154 inline bool operator<(const fh_key& lhs, const fh_key& rhs)
155 {
156 return ((lhs.fh_hk.bucket < rhs.fh_hk.bucket) ||
157 ((lhs.fh_hk.bucket == rhs.fh_hk.bucket) &&
158 (lhs.fh_hk.object < rhs.fh_hk.object)));
159 }
160
161 inline bool operator>(const fh_key& lhs, const fh_key& rhs)
162 {
163 return (rhs < lhs);
164 }
165
166 inline bool operator==(const fh_key& lhs, const fh_key& rhs)
167 {
168 return ((lhs.fh_hk.bucket == rhs.fh_hk.bucket) &&
169 (lhs.fh_hk.object == rhs.fh_hk.object));
170 }
171
172 inline bool operator!=(const fh_key& lhs, const fh_key& rhs)
173 {
174 return !(lhs == rhs);
175 }
176
177 inline bool operator<=(const fh_key& lhs, const fh_key& rhs)
178 {
179 return (lhs < rhs) || (lhs == rhs);
180 }
181
182 using boost::variant;
183 using boost::container::flat_map;
184
185 typedef std::tuple<bool, bool> DecodeAttrsResult;
186
187 class RGWFileHandle : public cohort::lru::Object
188 {
189 struct rgw_file_handle fh;
190 std::mutex mtx;
191
192 RGWLibFS* fs;
193 RGWFileHandle* bucket;
194 RGWFileHandle* parent;
195 /* const */ std::string name; /* XXX file or bucket name */
196 /* const */ fh_key fhk;
197
198 using lock_guard = std::lock_guard<std::mutex>;
199 using unique_lock = std::unique_lock<std::mutex>;
200
201 /* TODO: keeping just the last marker is sufficient for
202 * nfs-ganesha 2.4.5; in the near future, nfs-ganesha will
203 * be able to hint the name of the next dirent required,
204 * from which we can directly synthesize a RADOS marker.
205 * using marker_cache_t = flat_map<uint64_t, rgw_obj_key>;
206 */
207
208 struct State {
209 uint64_t dev;
210 uint64_t size;
211 uint64_t nlink;
212 uint32_t owner_uid; /* XXX need Unix attr */
213 uint32_t owner_gid; /* XXX need Unix attr */
214 mode_t unix_mode;
215 struct timespec ctime;
216 struct timespec mtime;
217 struct timespec atime;
218 uint32_t version;
219 State() : dev(0), size(0), nlink(1), owner_uid(0), owner_gid(0),
220 ctime{0,0}, mtime{0,0}, atime{0,0}, version(0) {}
221 } state;
222
223 struct file {
224 RGWWriteRequest* write_req;
225 file() : write_req(nullptr) {}
226 ~file();
227 };
228
229 struct directory {
230
231 static constexpr uint32_t FLAG_NONE = 0x0000;
232
233 uint32_t flags;
234 rgw_obj_key last_marker;
235 struct timespec last_readdir;
236
237 directory() : flags(FLAG_NONE), last_readdir{0,0} {}
238 };
239
240 void clear_state();
241 void advance_mtime(uint32_t flags = FLAG_NONE);
242
243 boost::variant<file, directory> variant_type;
244
245 uint16_t depth;
246 uint32_t flags;
247
248 ceph::buffer::list etag;
249 ceph::buffer::list acls;
250
251 public:
252 const static std::string root_name;
253
254 static constexpr uint16_t MAX_DEPTH = 256;
255
256 static constexpr uint32_t FLAG_NONE = 0x0000;
257 static constexpr uint32_t FLAG_OPEN = 0x0001;
258 static constexpr uint32_t FLAG_ROOT = 0x0002;
259 static constexpr uint32_t FLAG_CREATE = 0x0004;
260 static constexpr uint32_t FLAG_CREATING = 0x0008;
261 static constexpr uint32_t FLAG_SYMBOLIC_LINK = 0x0009;
262 static constexpr uint32_t FLAG_DIRECTORY = 0x0010;
263 static constexpr uint32_t FLAG_BUCKET = 0x0020;
264 static constexpr uint32_t FLAG_LOCK = 0x0040;
265 static constexpr uint32_t FLAG_DELETED = 0x0080;
266 static constexpr uint32_t FLAG_UNLINK_THIS = 0x0100;
267 static constexpr uint32_t FLAG_LOCKED = 0x0200;
268 static constexpr uint32_t FLAG_STATELESS_OPEN = 0x0400;
269 static constexpr uint32_t FLAG_EXACT_MATCH = 0x0800;
270 static constexpr uint32_t FLAG_MOUNT = 0x1000;
271
272 #define CREATE_FLAGS(x) \
273 ((x) & ~(RGWFileHandle::FLAG_CREATE|RGWFileHandle::FLAG_LOCK))
274
275 static constexpr uint32_t RCB_MASK = \
276 RGW_SETATTR_MTIME|RGW_SETATTR_CTIME|RGW_SETATTR_ATIME|RGW_SETATTR_SIZE;
277
278 friend class RGWLibFS;
279
280 private:
281 explicit RGWFileHandle(RGWLibFS* _fs)
282 : fs(_fs), bucket(nullptr), parent(nullptr), variant_type{directory()},
283 depth(0), flags(FLAG_NONE)
284 {
285 fh.fh_hk.bucket = 0;
286 fh.fh_hk.object = 0;
287 /* root */
288 fh.fh_type = RGW_FS_TYPE_DIRECTORY;
289 variant_type = directory();
290 /* stat */
291 state.unix_mode = RGW_RWXMODE|S_IFDIR;
292 /* pointer to self */
293 fh.fh_private = this;
294 }
295
296 uint64_t init_fsid(std::string& uid) {
297 return XXH64(uid.c_str(), uid.length(), fh_key::seed);
298 }
299
300 void init_rootfs(std::string& fsid, const std::string& object_name,
301 bool is_bucket) {
302 /* fh_key */
303 fh.fh_hk.bucket = XXH64(fsid.c_str(), fsid.length(), fh_key::seed);
304 fh.fh_hk.object = XXH64(object_name.c_str(), object_name.length(),
305 fh_key::seed);
306 fhk = fh.fh_hk;
307 name = object_name;
308
309 state.dev = init_fsid(fsid);
310
311 if (is_bucket) {
312 flags |= RGWFileHandle::FLAG_BUCKET | RGWFileHandle::FLAG_MOUNT;
313 bucket = this;
314 depth = 1;
315 } else {
316 flags |= RGWFileHandle::FLAG_ROOT | RGWFileHandle::FLAG_MOUNT;
317 }
318 }
319
320 public:
321 RGWFileHandle(RGWLibFS* _fs, RGWFileHandle* _parent,
322 const fh_key& _fhk, std::string& _name, uint32_t _flags)
323 : fs(_fs), bucket(nullptr), parent(_parent), name(std::move(_name)),
324 fhk(_fhk), flags(_flags) {
325
326 if (parent->is_root()) {
327 fh.fh_type = RGW_FS_TYPE_DIRECTORY;
328 variant_type = directory();
329 flags |= FLAG_BUCKET;
330 } else {
331 bucket = parent->is_bucket() ? parent
332 : parent->bucket;
333 if (flags & FLAG_DIRECTORY) {
334 fh.fh_type = RGW_FS_TYPE_DIRECTORY;
335 variant_type = directory();
336 } else if(flags & FLAG_SYMBOLIC_LINK) {
337 fh.fh_type = RGW_FS_TYPE_SYMBOLIC_LINK;
338 variant_type = file();
339 } else {
340 fh.fh_type = RGW_FS_TYPE_FILE;
341 variant_type = file();
342 }
343 }
344
345 depth = parent->depth + 1;
346
347 /* save constant fhk */
348 fh.fh_hk = fhk.fh_hk; /* XXX redundant in fh_hk */
349
350 /* inherits parent's fsid */
351 state.dev = parent->state.dev;
352
353 switch (fh.fh_type) {
354 case RGW_FS_TYPE_DIRECTORY:
355 state.unix_mode = RGW_RWXMODE|S_IFDIR;
356 /* virtual directories are always invalid */
357 advance_mtime();
358 break;
359 case RGW_FS_TYPE_FILE:
360 state.unix_mode = RGW_RWMODE|S_IFREG;
361 break;
362 case RGW_FS_TYPE_SYMBOLIC_LINK:
363 state.unix_mode = RGW_RWMODE|S_IFLNK;
364 break;
365 default:
366 break;
367 }
368
369 /* pointer to self */
370 fh.fh_private = this;
371 }
372
373 const fh_key& get_key() const {
374 return fhk;
375 }
376
377 directory* get_directory() {
378 return get<directory>(&variant_type);
379 }
380
381 size_t get_size() const { return state.size; }
382
383 const char* stype() {
384 return is_dir() ? "DIR" : "FILE";
385 }
386
387 uint16_t get_depth() const { return depth; }
388
389 struct rgw_file_handle* get_fh() { return &fh; }
390
391 RGWLibFS* get_fs() { return fs; }
392
393 RGWFileHandle* get_parent() { return parent; }
394
395 uint32_t get_owner_uid() const { return state.owner_uid; }
396 uint32_t get_owner_gid() const { return state.owner_gid; }
397
398 struct timespec get_ctime() const { return state.ctime; }
399 struct timespec get_mtime() const { return state.mtime; }
400
401 const ceph::buffer::list& get_etag() const { return etag; }
402 const ceph::buffer::list& get_acls() const { return acls; }
403
404 void create_stat(struct stat* st, uint32_t mask) {
405 if (mask & RGW_SETATTR_UID)
406 state.owner_uid = st->st_uid;
407
408 if (mask & RGW_SETATTR_GID)
409 state.owner_gid = st->st_gid;
410
411 if (mask & RGW_SETATTR_MODE) {
412 switch (fh.fh_type) {
413 case RGW_FS_TYPE_DIRECTORY:
414 state.unix_mode = st->st_mode|S_IFDIR;
415 break;
416 case RGW_FS_TYPE_FILE:
417 state.unix_mode = st->st_mode|S_IFREG;
418 break;
419 case RGW_FS_TYPE_SYMBOLIC_LINK:
420 state.unix_mode = st->st_mode|S_IFLNK;
421 break;
422 default:
423 break;
424 }
425 }
426
427 if (mask & RGW_SETATTR_ATIME)
428 state.atime = st->st_atim;
429
430 if (mask & RGW_SETATTR_MTIME) {
431 if (fh.fh_type != RGW_FS_TYPE_DIRECTORY)
432 state.mtime = st->st_mtim;
433 }
434
435 if (mask & RGW_SETATTR_CTIME)
436 state.ctime = st->st_ctim;
437 }
438
439 int stat(struct stat* st, uint32_t flags = FLAG_NONE) {
440 /* partial Unix attrs */
441 memset(st, 0, sizeof(struct stat));
442 st->st_dev = state.dev;
443 st->st_ino = fh.fh_hk.object; // XXX
444
445 st->st_uid = state.owner_uid;
446 st->st_gid = state.owner_gid;
447
448 st->st_mode = state.unix_mode;
449
450 switch (fh.fh_type) {
451 case RGW_FS_TYPE_DIRECTORY:
452 /* virtual directories are always invalid */
453 advance_mtime(flags);
454 st->st_nlink = state.nlink;
455 break;
456 case RGW_FS_TYPE_FILE:
457 st->st_nlink = 1;
458 st->st_blksize = 4096;
459 st->st_size = state.size;
460 st->st_blocks = (state.size) / 512;
461 break;
462 case RGW_FS_TYPE_SYMBOLIC_LINK:
463 st->st_nlink = 1;
464 st->st_blksize = 4096;
465 st->st_size = state.size;
466 st->st_blocks = (state.size) / 512;
467 break;
468 default:
469 break;
470 }
471
472 #ifdef HAVE_STAT_ST_MTIMESPEC_TV_NSEC
473 st->st_atimespec = state.atime;
474 st->st_mtimespec = state.mtime;
475 st->st_ctimespec = state.ctime;
476 #else
477 st->st_atim = state.atime;
478 st->st_mtim = state.mtime;
479 st->st_ctim = state.ctime;
480 #endif
481
482 return 0;
483 }
484
485 const std::string& bucket_name() const {
486 if (is_root())
487 return root_name;
488 if (is_bucket())
489 return name;
490 return bucket->object_name();
491 }
492
493 const std::string& object_name() const { return name; }
494
495 std::string full_object_name(bool omit_bucket = false) const {
496 std::string path;
497 std::vector<const std::string*> segments;
498 int reserve = 0;
499 const RGWFileHandle* tfh = this;
500 while (tfh && !tfh->is_root() && !(tfh->is_bucket() && omit_bucket)) {
501 segments.push_back(&tfh->object_name());
502 reserve += (1 + tfh->object_name().length());
503 tfh = tfh->parent;
504 }
505 int pos = 1;
506 path.reserve(reserve);
507 for (auto& s : boost::adaptors::reverse(segments)) {
508 if (pos > 1) {
509 path += "/";
510 } else {
511 if (!omit_bucket && (path.front() != '/')) // pretty-print
512 path += "/";
513 }
514 path += *s;
515 ++pos;
516 }
517 return path;
518 }
519
520 inline std::string relative_object_name() const {
521 return full_object_name(true /* omit_bucket */);
522 }
523
524 inline std::string format_child_name(const std::string& cbasename,
525 bool is_dir) const {
526 std::string child_name{relative_object_name()};
527 if ((child_name.size() > 0) &&
528 (child_name.back() != '/'))
529 child_name += "/";
530 child_name += cbasename;
531 if (is_dir)
532 child_name += "/";
533 return child_name;
534 }
535
536 inline std::string make_key_name(const char *name) const {
537 std::string key_name{full_object_name()};
538 if (key_name.length() > 0)
539 key_name += "/";
540 key_name += name;
541 return key_name;
542 }
543
544 fh_key make_fhk(const std::string& name);
545
546 void add_marker(uint64_t off, const rgw_obj_key& marker,
547 uint8_t obj_type) {
548 using std::get;
549 directory* d = get<directory>(&variant_type);
550 if (d) {
551 unique_lock guard(mtx);
552 d->last_marker = marker;
553 }
554 }
555
556 const rgw_obj_key* find_marker(uint64_t off) const {
557 using std::get;
558 if (off > 0) {
559 const directory* d = get<directory>(&variant_type);
560 if (d ) {
561 return &d->last_marker;
562 }
563 }
564 return nullptr;
565 }
566
567 int offset_of(const std::string& name, int64_t *offset, uint32_t flags) {
568 if (unlikely(! is_dir())) {
569 return -EINVAL;
570 }
571 *offset = XXH64(name.c_str(), name.length(), fh_key::seed);
572 return 0;
573 }
574
575 bool is_open() const { return flags & FLAG_OPEN; }
576 bool is_root() const { return flags & FLAG_ROOT; }
577 bool is_mount() const { return flags & FLAG_MOUNT; }
578 bool is_bucket() const { return flags & FLAG_BUCKET; }
579 bool is_object() const { return !is_bucket(); }
580 bool is_file() const { return (fh.fh_type == RGW_FS_TYPE_FILE); }
581 bool is_dir() const { return (fh.fh_type == RGW_FS_TYPE_DIRECTORY); }
582 bool is_link() const { return (fh.fh_type == RGW_FS_TYPE_SYMBOLIC_LINK); }
583 bool creating() const { return flags & FLAG_CREATING; }
584 bool deleted() const { return flags & FLAG_DELETED; }
585 bool stateless_open() const { return flags & FLAG_STATELESS_OPEN; }
586 bool has_children() const;
587
588 int open(uint32_t gsh_flags) {
589 lock_guard guard(mtx);
590 if (! is_open()) {
591 if (gsh_flags & RGW_OPEN_FLAG_V3) {
592 flags |= FLAG_STATELESS_OPEN;
593 }
594 flags |= FLAG_OPEN;
595 return 0;
596 }
597 return -EPERM;
598 }
599
600 typedef boost::variant<uint64_t*, const char*> readdir_offset;
601
602 int readdir(rgw_readdir_cb rcb, void *cb_arg, readdir_offset offset,
603 bool *eof, uint32_t flags);
604
605 int write(uint64_t off, size_t len, size_t *nbytes, void *buffer);
606
607 int commit(uint64_t offset, uint64_t length, uint32_t flags) {
608 /* NFS3 and NFSv4 COMMIT implementation
609 * the current atomic update strategy doesn't actually permit
610 * clients to read-stable until either CLOSE (NFSv4+) or the
611 * expiration of the active write timer (NFS3). In the
612 * interim, the client may send an arbitrary number of COMMIT
613 * operations which must return a success result */
614 return 0;
615 }
616
617 int write_finish(uint32_t flags = FLAG_NONE);
618 int close();
619
620 void open_for_create() {
621 lock_guard guard(mtx);
622 flags |= FLAG_CREATING;
623 }
624
625 void clear_creating() {
626 lock_guard guard(mtx);
627 flags &= ~FLAG_CREATING;
628 }
629
630 void inc_nlink(const uint64_t n) {
631 state.nlink += n;
632 }
633
634 void set_nlink(const uint64_t n) {
635 state.nlink = n;
636 }
637
638 void set_size(const size_t size) {
639 state.size = size;
640 }
641
642 void set_times(const struct timespec &ts) {
643 state.ctime = ts;
644 state.mtime = state.ctime;
645 state.atime = state.ctime;
646 }
647
648 void set_times(real_time t) {
649 set_times(real_clock::to_timespec(t));
650 }
651
652 void set_ctime(const struct timespec &ts) {
653 state.ctime = ts;
654 }
655
656 void set_mtime(const struct timespec &ts) {
657 state.mtime = ts;
658 }
659
660 void set_atime(const struct timespec &ts) {
661 state.atime = ts;
662 }
663
664 void set_etag(const ceph::buffer::list& _etag ) {
665 etag = _etag;
666 }
667
668 void set_acls(const ceph::buffer::list& _acls ) {
669 acls = _acls;
670 }
671
672 void encode(buffer::list& bl) const {
673 ENCODE_START(2, 1, bl);
674 encode(uint32_t(fh.fh_type), bl);
675 encode(state.dev, bl);
676 encode(state.size, bl);
677 encode(state.nlink, bl);
678 encode(state.owner_uid, bl);
679 encode(state.owner_gid, bl);
680 encode(state.unix_mode, bl);
681 for (const auto& t : { state.ctime, state.mtime, state.atime }) {
682 encode(real_clock::from_timespec(t), bl);
683 }
684 encode((uint32_t)2, bl);
685 ENCODE_FINISH(bl);
686 }
687
688 void decode(bufferlist::const_iterator& bl) {
689 DECODE_START(2, bl);
690 uint32_t fh_type;
691 decode(fh_type, bl);
692 if ((fh.fh_type != fh_type) &&
693 (fh_type == RGW_FS_TYPE_SYMBOLIC_LINK))
694 fh.fh_type = RGW_FS_TYPE_SYMBOLIC_LINK;
695 ceph_assert(fh.fh_type == fh_type);
696 decode(state.dev, bl);
697 decode(state.size, bl);
698 decode(state.nlink, bl);
699 decode(state.owner_uid, bl);
700 decode(state.owner_gid, bl);
701 decode(state.unix_mode, bl);
702 ceph::real_time enc_time;
703 for (auto t : { &(state.ctime), &(state.mtime), &(state.atime) }) {
704 decode(enc_time, bl);
705 *t = real_clock::to_timespec(enc_time);
706 }
707 if (struct_v >= 2) {
708 decode(state.version, bl);
709 }
710 DECODE_FINISH(bl);
711 }
712
713 void encode_attrs(ceph::buffer::list& ux_key1,
714 ceph::buffer::list& ux_attrs1);
715
716 DecodeAttrsResult decode_attrs(const ceph::buffer::list* ux_key1,
717 const ceph::buffer::list* ux_attrs1);
718
719 void invalidate();
720
721 bool reclaim() override;
722
723 typedef cohort::lru::LRU<std::mutex> FhLRU;
724
725 struct FhLT
726 {
727 // for internal ordering
728 bool operator()(const RGWFileHandle& lhs, const RGWFileHandle& rhs) const
729 { return (lhs.get_key() < rhs.get_key()); }
730
731 // for external search by fh_key
732 bool operator()(const fh_key& k, const RGWFileHandle& fh) const
733 { return k < fh.get_key(); }
734
735 bool operator()(const RGWFileHandle& fh, const fh_key& k) const
736 { return fh.get_key() < k; }
737 };
738
739 struct FhEQ
740 {
741 bool operator()(const RGWFileHandle& lhs, const RGWFileHandle& rhs) const
742 { return (lhs.get_key() == rhs.get_key()); }
743
744 bool operator()(const fh_key& k, const RGWFileHandle& fh) const
745 { return k == fh.get_key(); }
746
747 bool operator()(const RGWFileHandle& fh, const fh_key& k) const
748 { return fh.get_key() == k; }
749 };
750
751 typedef bi::link_mode<bi::safe_link> link_mode; /* XXX normal */
752 #if defined(FHCACHE_AVL)
753 typedef bi::avl_set_member_hook<link_mode> tree_hook_type;
754 #else
755 /* RBT */
756 typedef bi::set_member_hook<link_mode> tree_hook_type;
757 #endif
758 tree_hook_type fh_hook;
759
760 typedef bi::member_hook<
761 RGWFileHandle, tree_hook_type, &RGWFileHandle::fh_hook> FhHook;
762
763 #if defined(FHCACHE_AVL)
764 typedef bi::avltree<RGWFileHandle, bi::compare<FhLT>, FhHook> FHTree;
765 #else
766 typedef bi::rbtree<RGWFileHandle, bi::compare<FhLT>, FhHook> FhTree;
767 #endif
768 typedef cohort::lru::TreeX<RGWFileHandle, FhTree, FhLT, FhEQ, fh_key,
769 std::mutex> FHCache;
770
771 ~RGWFileHandle() override;
772
773 friend std::ostream& operator<<(std::ostream &os,
774 RGWFileHandle const &rgw_fh);
775
776 class Factory : public cohort::lru::ObjectFactory
777 {
778 public:
779 RGWLibFS* fs;
780 RGWFileHandle* parent;
781 const fh_key& fhk;
782 std::string& name;
783 uint32_t flags;
784
785 Factory() = delete;
786
787 Factory(RGWLibFS* _fs, RGWFileHandle* _parent,
788 const fh_key& _fhk, std::string& _name, uint32_t _flags)
789 : fs(_fs), parent(_parent), fhk(_fhk), name(_name),
790 flags(_flags) {}
791
792 void recycle (cohort::lru::Object* o) override {
793 /* re-use an existing object */
794 o->~Object(); // call lru::Object virtual dtor
795 // placement new!
796 new (o) RGWFileHandle(fs, parent, fhk, name, flags);
797 }
798
799 cohort::lru::Object* alloc() override {
800 return new RGWFileHandle(fs, parent, fhk, name, flags);
801 }
802 }; /* Factory */
803
804 }; /* RGWFileHandle */
805
806 WRITE_CLASS_ENCODER(RGWFileHandle);
807
808 static inline RGWFileHandle* get_rgwfh(struct rgw_file_handle* fh) {
809 return static_cast<RGWFileHandle*>(fh->fh_private);
810 }
811
812 static inline enum rgw_fh_type fh_type_of(uint32_t flags) {
813 enum rgw_fh_type fh_type;
814 switch(flags & RGW_LOOKUP_TYPE_FLAGS)
815 {
816 case RGW_LOOKUP_FLAG_DIR:
817 fh_type = RGW_FS_TYPE_DIRECTORY;
818 break;
819 case RGW_LOOKUP_FLAG_FILE:
820 fh_type = RGW_FS_TYPE_FILE;
821 break;
822 default:
823 fh_type = RGW_FS_TYPE_NIL;
824 };
825 return fh_type;
826 }
827
828 typedef std::tuple<RGWFileHandle*, uint32_t> LookupFHResult;
829 typedef std::tuple<RGWFileHandle*, int> MkObjResult;
830
831 class RGWLibFS
832 {
833 CephContext* cct;
834 struct rgw_fs fs{};
835 RGWFileHandle root_fh;
836 rgw_fh_callback_t invalidate_cb;
837 void *invalidate_arg;
838 bool shutdown;
839
840 mutable std::atomic<uint64_t> refcnt;
841
842 RGWFileHandle::FHCache fh_cache;
843 RGWFileHandle::FhLRU fh_lru;
844
845 std::string uid; // should match user.user_id, iiuc
846
847 RGWUserInfo user;
848 RGWAccessKey key; // XXXX acc_key
849
850 static std::atomic<uint32_t> fs_inst_counter;
851
852 static uint32_t write_completion_interval_s;
853
854 using lock_guard = std::lock_guard<std::mutex>;
855 using unique_lock = std::unique_lock<std::mutex>;
856
857 struct event
858 {
859 enum class type : uint8_t { READDIR } ;
860 type t;
861 const fh_key fhk;
862 struct timespec ts;
863 event(type t, const fh_key& k, const struct timespec& ts)
864 : t(t), fhk(k), ts(ts) {}
865 };
866
867 friend std::ostream& operator<<(std::ostream &os,
868 RGWLibFS::event const &ev);
869
870 using event_vector = /* boost::small_vector<event, 16> */
871 std::vector<event>;
872
873 struct WriteCompletion
874 {
875 RGWFileHandle& rgw_fh;
876
877 explicit WriteCompletion(RGWFileHandle& _fh) : rgw_fh(_fh) {
878 rgw_fh.get_fs()->ref(&rgw_fh);
879 }
880
881 void operator()() {
882 rgw_fh.close(); /* will finish in-progress write */
883 rgw_fh.get_fs()->unref(&rgw_fh);
884 }
885 };
886
887 static ceph::timer<ceph::mono_clock> write_timer;
888
889 struct State {
890 std::mutex mtx;
891 std::atomic<uint32_t> flags;
892 std::deque<event> events;
893
894 State() : flags(0) {}
895
896 void push_event(const event& ev) {
897 events.push_back(ev);
898 }
899 } state;
900
901 uint32_t new_inst() {
902 return ++fs_inst_counter;
903 }
904
905 friend class RGWFileHandle;
906 friend class RGWLibProcess;
907
908 public:
909
910 static constexpr uint32_t FLAG_NONE = 0x0000;
911 static constexpr uint32_t FLAG_CLOSED = 0x0001;
912
913 struct BucketStats {
914 size_t size;
915 size_t size_rounded;
916 real_time creation_time;
917 uint64_t num_entries;
918 };
919
920 RGWLibFS(CephContext* _cct, const char *_uid, const char *_user_id,
921 const char* _key, const char *root)
922 : cct(_cct), root_fh(this), invalidate_cb(nullptr),
923 invalidate_arg(nullptr), shutdown(false), refcnt(1),
924 fh_cache(cct->_conf->rgw_nfs_fhcache_partitions,
925 cct->_conf->rgw_nfs_fhcache_size),
926 fh_lru(cct->_conf->rgw_nfs_lru_lanes,
927 cct->_conf->rgw_nfs_lru_lane_hiwat),
928 uid(_uid), key(_user_id, _key) {
929
930 if (!root || !strcmp(root, "/")) {
931 root_fh.init_rootfs(uid, RGWFileHandle::root_name, false);
932 } else {
933 root_fh.init_rootfs(uid, root, true);
934 }
935
936 /* pointer to self */
937 fs.fs_private = this;
938
939 /* expose public root fh */
940 fs.root_fh = root_fh.get_fh();
941
942 new_inst();
943 }
944
945 friend void intrusive_ptr_add_ref(const RGWLibFS* fs) {
946 fs->refcnt.fetch_add(1, std::memory_order_relaxed);
947 }
948
949 friend void intrusive_ptr_release(const RGWLibFS* fs) {
950 if (fs->refcnt.fetch_sub(1, std::memory_order_release) == 0) {
951 std::atomic_thread_fence(std::memory_order_acquire);
952 delete fs;
953 }
954 }
955
956 RGWLibFS* ref() {
957 intrusive_ptr_add_ref(this);
958 return this;
959 }
960
961 inline void rele() {
962 intrusive_ptr_release(this);
963 }
964
965 void stop() { shutdown = true; }
966
967 void release_evict(RGWFileHandle* fh) {
968 /* remove from cache, releases sentinel ref */
969 fh_cache.remove(fh->fh.fh_hk.object, fh,
970 RGWFileHandle::FHCache::FLAG_LOCK);
971 /* release call-path ref */
972 (void) fh_lru.unref(fh, cohort::lru::FLAG_NONE);
973 }
974
975 int authorize(RGWRados* store) {
976 int ret = rgw_get_user_info_by_access_key(store, key.id, user);
977 if (ret == 0) {
978 RGWAccessKey* k = user.get_key(key.id);
979 if (!k || (k->key != key.key))
980 return -EINVAL;
981 if (user.suspended)
982 return -ERR_USER_SUSPENDED;
983 } else {
984 /* try external authenticators (ldap for now) */
985 rgw::LDAPHelper* ldh = rgwlib.get_ldh(); /* !nullptr */
986 RGWToken token;
987 /* boost filters and/or string_ref may throw on invalid input */
988 try {
989 token = rgw::from_base64(key.id);
990 } catch(...) {
991 token = std::string("");
992 }
993 if (token.valid() && (ldh->auth(token.id, token.key) == 0)) {
994 /* try to store user if it doesn't already exist */
995 if (rgw_get_user_info_by_uid(store, token.id, user) < 0) {
996 int ret = rgw_store_user_info(store, user, NULL, NULL, real_time(),
997 true);
998 if (ret < 0) {
999 lsubdout(get_context(), rgw, 10)
1000 << "NOTICE: failed to store new user's info: ret=" << ret
1001 << dendl;
1002 }
1003 }
1004 } /* auth success */
1005 }
1006 return ret;
1007 } /* authorize */
1008
1009 int register_invalidate(rgw_fh_callback_t cb, void *arg, uint32_t flags) {
1010 invalidate_cb = cb;
1011 invalidate_arg = arg;
1012 return 0;
1013 }
1014
1015 /* find RGWFileHandle by id */
1016 LookupFHResult lookup_fh(const fh_key& fhk,
1017 const uint32_t flags = RGWFileHandle::FLAG_NONE) {
1018 using std::get;
1019
1020 // cast int32_t(RGWFileHandle::FLAG_NONE) due to strictness of Clang
1021 // the cast transfers a lvalue into a rvalue in the ctor
1022 // check the commit message for the full details
1023 LookupFHResult fhr { nullptr, uint32_t(RGWFileHandle::FLAG_NONE) };
1024
1025 RGWFileHandle::FHCache::Latch lat;
1026 bool fh_locked = flags & RGWFileHandle::FLAG_LOCKED;
1027
1028 retry:
1029 RGWFileHandle* fh =
1030 fh_cache.find_latch(fhk.fh_hk.object /* partition selector*/,
1031 fhk /* key */, lat /* serializer */,
1032 RGWFileHandle::FHCache::FLAG_LOCK);
1033 /* LATCHED */
1034 if (fh) {
1035 if (likely(! fh_locked))
1036 fh->mtx.lock(); // XXX !RAII because may-return-LOCKED
1037 /* need initial ref from LRU (fast path) */
1038 if (! fh_lru.ref(fh, cohort::lru::FLAG_INITIAL)) {
1039 lat.lock->unlock();
1040 if (likely(! fh_locked))
1041 fh->mtx.unlock();
1042 goto retry; /* !LATCHED */
1043 }
1044 /* LATCHED, LOCKED */
1045 if (! (flags & RGWFileHandle::FLAG_LOCK))
1046 fh->mtx.unlock(); /* ! LOCKED */
1047 }
1048 lat.lock->unlock(); /* !LATCHED */
1049 get<0>(fhr) = fh;
1050 if (fh) {
1051 lsubdout(get_context(), rgw, 17)
1052 << __func__ << " 1 " << *fh
1053 << dendl;
1054 }
1055 return fhr;
1056 } /* lookup_fh(const fh_key&) */
1057
1058 /* find or create an RGWFileHandle */
1059 LookupFHResult lookup_fh(RGWFileHandle* parent, const char *name,
1060 const uint32_t flags = RGWFileHandle::FLAG_NONE) {
1061 using std::get;
1062
1063 // cast int32_t(RGWFileHandle::FLAG_NONE) due to strictness of Clang
1064 // the cast transfers a lvalue into a rvalue in the ctor
1065 // check the commit message for the full details
1066 LookupFHResult fhr { nullptr, uint32_t(RGWFileHandle::FLAG_NONE) };
1067
1068 /* mount is stale? */
1069 if (state.flags & FLAG_CLOSED)
1070 return fhr;
1071
1072 RGWFileHandle::FHCache::Latch lat;
1073 bool fh_locked = flags & RGWFileHandle::FLAG_LOCKED;
1074
1075 std::string obj_name{name};
1076 std::string key_name{parent->make_key_name(name)};
1077 fh_key fhk = parent->make_fhk(obj_name);
1078
1079 lsubdout(get_context(), rgw, 10)
1080 << __func__ << " called on "
1081 << parent->object_name() << " for " << key_name
1082 << " (" << obj_name << ")"
1083 << " -> " << fhk
1084 << dendl;
1085
1086 retry:
1087 RGWFileHandle* fh =
1088 fh_cache.find_latch(fhk.fh_hk.object /* partition selector*/,
1089 fhk /* key */, lat /* serializer */,
1090 RGWFileHandle::FHCache::FLAG_LOCK);
1091 /* LATCHED */
1092 if (fh) {
1093 if (likely(! fh_locked))
1094 fh->mtx.lock(); // XXX !RAII because may-return-LOCKED
1095 if (fh->flags & RGWFileHandle::FLAG_DELETED) {
1096 /* for now, delay briefly and retry */
1097 lat.lock->unlock();
1098 if (likely(! fh_locked))
1099 fh->mtx.unlock();
1100 std::this_thread::sleep_for(std::chrono::milliseconds(20));
1101 goto retry; /* !LATCHED */
1102 }
1103 /* need initial ref from LRU (fast path) */
1104 if (! fh_lru.ref(fh, cohort::lru::FLAG_INITIAL)) {
1105 lat.lock->unlock();
1106 if (likely(! fh_locked))
1107 fh->mtx.unlock();
1108 goto retry; /* !LATCHED */
1109 }
1110 /* LATCHED, LOCKED */
1111 if (! (flags & RGWFileHandle::FLAG_LOCK))
1112 if (likely(! fh_locked))
1113 fh->mtx.unlock(); /* ! LOCKED */
1114 } else {
1115 /* make or re-use handle */
1116 RGWFileHandle::Factory prototype(this, parent, fhk,
1117 obj_name, CREATE_FLAGS(flags));
1118 uint32_t iflags{cohort::lru::FLAG_INITIAL};
1119 fh = static_cast<RGWFileHandle*>(
1120 fh_lru.insert(&prototype,
1121 cohort::lru::Edge::MRU,
1122 iflags));
1123 if (fh) {
1124 /* lock fh (LATCHED) */
1125 if (flags & RGWFileHandle::FLAG_LOCK)
1126 fh->mtx.lock();
1127 if (likely(! (iflags & cohort::lru::FLAG_RECYCLE))) {
1128 /* inserts at cached insert iterator, releasing latch */
1129 fh_cache.insert_latched(
1130 fh, lat, RGWFileHandle::FHCache::FLAG_UNLOCK);
1131 } else {
1132 /* recycle step invalidates Latch */
1133 fh_cache.insert(
1134 fhk.fh_hk.object, fh, RGWFileHandle::FHCache::FLAG_NONE);
1135 lat.lock->unlock(); /* !LATCHED */
1136 }
1137 get<1>(fhr) |= RGWFileHandle::FLAG_CREATE;
1138 /* ref parent (non-initial ref cannot fail on valid object) */
1139 if (! parent->is_mount()) {
1140 (void) fh_lru.ref(parent, cohort::lru::FLAG_NONE);
1141 }
1142 goto out; /* !LATCHED */
1143 } else {
1144 lat.lock->unlock();
1145 goto retry; /* !LATCHED */
1146 }
1147 }
1148 lat.lock->unlock(); /* !LATCHED */
1149 out:
1150 get<0>(fhr) = fh;
1151 if (fh) {
1152 lsubdout(get_context(), rgw, 17)
1153 << __func__ << " 2 " << *fh
1154 << dendl;
1155 }
1156 return fhr;
1157 } /* lookup_fh(RGWFileHandle*, const char *, const uint32_t) */
1158
1159 inline void unref(RGWFileHandle* fh) {
1160 if (likely(! fh->is_mount())) {
1161 (void) fh_lru.unref(fh, cohort::lru::FLAG_NONE);
1162 }
1163 }
1164
1165 inline RGWFileHandle* ref(RGWFileHandle* fh) {
1166 if (likely(! fh->is_mount())) {
1167 fh_lru.ref(fh, cohort::lru::FLAG_NONE);
1168 }
1169 return fh;
1170 }
1171
1172 int getattr(RGWFileHandle* rgw_fh, struct stat* st);
1173
1174 int setattr(RGWFileHandle* rgw_fh, struct stat* st, uint32_t mask,
1175 uint32_t flags);
1176
1177 void update_fh(RGWFileHandle *rgw_fh);
1178
1179 LookupFHResult stat_bucket(RGWFileHandle* parent, const char *path,
1180 RGWLibFS::BucketStats& bs,
1181 uint32_t flags);
1182
1183 LookupFHResult fake_leaf(RGWFileHandle* parent, const char *path,
1184 enum rgw_fh_type type = RGW_FS_TYPE_NIL,
1185 struct stat *st = nullptr, uint32_t mask = 0,
1186 uint32_t flags = RGWFileHandle::FLAG_NONE);
1187
1188 LookupFHResult stat_leaf(RGWFileHandle* parent, const char *path,
1189 enum rgw_fh_type type = RGW_FS_TYPE_NIL,
1190 uint32_t flags = RGWFileHandle::FLAG_NONE);
1191
1192 int read(RGWFileHandle* rgw_fh, uint64_t offset, size_t length,
1193 size_t* bytes_read, void* buffer, uint32_t flags);
1194
1195 int readlink(RGWFileHandle* rgw_fh, uint64_t offset, size_t length,
1196 size_t* bytes_read, void* buffer, uint32_t flags);
1197
1198 int rename(RGWFileHandle* old_fh, RGWFileHandle* new_fh,
1199 const char *old_name, const char *new_name);
1200
1201 MkObjResult create(RGWFileHandle* parent, const char *name, struct stat *st,
1202 uint32_t mask, uint32_t flags);
1203
1204 MkObjResult symlink(RGWFileHandle* parent, const char *name,
1205 const char *link_path, struct stat *st, uint32_t mask, uint32_t flags);
1206
1207 MkObjResult mkdir(RGWFileHandle* parent, const char *name, struct stat *st,
1208 uint32_t mask, uint32_t flags);
1209
1210 int unlink(RGWFileHandle* rgw_fh, const char *name,
1211 uint32_t flags = FLAG_NONE);
1212
1213 /* find existing RGWFileHandle */
1214 RGWFileHandle* lookup_handle(struct rgw_fh_hk fh_hk) {
1215
1216 if (state.flags & FLAG_CLOSED)
1217 return nullptr;
1218
1219 RGWFileHandle::FHCache::Latch lat;
1220 fh_key fhk(fh_hk);
1221
1222 retry:
1223 RGWFileHandle* fh =
1224 fh_cache.find_latch(fhk.fh_hk.object /* partition selector*/,
1225 fhk /* key */, lat /* serializer */,
1226 RGWFileHandle::FHCache::FLAG_LOCK);
1227 /* LATCHED */
1228 if (! fh) {
1229 if (unlikely(fhk == root_fh.fh.fh_hk)) {
1230 /* lookup for root of this fs */
1231 fh = &root_fh;
1232 goto out;
1233 }
1234 lsubdout(get_context(), rgw, 0)
1235 << __func__ << " handle lookup failed " << fhk
1236 << dendl;
1237 goto out;
1238 }
1239 fh->mtx.lock();
1240 if (fh->flags & RGWFileHandle::FLAG_DELETED) {
1241 /* for now, delay briefly and retry */
1242 lat.lock->unlock();
1243 fh->mtx.unlock(); /* !LOCKED */
1244 std::this_thread::sleep_for(std::chrono::milliseconds(20));
1245 goto retry; /* !LATCHED */
1246 }
1247 if (! fh_lru.ref(fh, cohort::lru::FLAG_INITIAL)) {
1248 lat.lock->unlock();
1249 fh->mtx.unlock();
1250 goto retry; /* !LATCHED */
1251 }
1252 /* LATCHED */
1253 fh->mtx.unlock(); /* !LOCKED */
1254 out:
1255 lat.lock->unlock(); /* !LATCHED */
1256
1257 /* special case: lookup root_fh */
1258 if (! fh) {
1259 if (unlikely(fh_hk == root_fh.fh.fh_hk)) {
1260 fh = &root_fh;
1261 }
1262 }
1263
1264 return fh;
1265 }
1266
1267 CephContext* get_context() {
1268 return cct;
1269 }
1270
1271 struct rgw_fs* get_fs() { return &fs; }
1272
1273 uint64_t get_fsid() { return root_fh.state.dev; }
1274
1275 RGWUserInfo* get_user() { return &user; }
1276
1277 void update_user() {
1278 RGWUserInfo _user = user;
1279 int ret = rgw_get_user_info_by_access_key(rgwlib.get_store(), key.id, user);
1280 if (ret != 0)
1281 user = _user;
1282 }
1283
1284 void close();
1285 void gc();
1286 }; /* RGWLibFS */
1287
1288 static inline std::string make_uri(const std::string& bucket_name,
1289 const std::string& object_name) {
1290 std::string uri("/");
1291 uri.reserve(bucket_name.length() + object_name.length() + 2);
1292 uri += bucket_name;
1293 uri += "/";
1294 uri += object_name;
1295 return uri;
1296 }
1297
1298 /*
1299 read directory content (buckets)
1300 */
1301
1302 class RGWListBucketsRequest : public RGWLibRequest,
1303 public RGWListBuckets /* RGWOp */
1304 {
1305 public:
1306 RGWFileHandle* rgw_fh;
1307 RGWFileHandle::readdir_offset offset;
1308 void* cb_arg;
1309 rgw_readdir_cb rcb;
1310 uint64_t* ioff;
1311 size_t ix;
1312 uint32_t d_count;
1313 bool rcb_eof; // caller forced early stop in readdir cycle
1314
1315 RGWListBucketsRequest(CephContext* _cct, RGWUserInfo *_user,
1316 RGWFileHandle* _rgw_fh, rgw_readdir_cb _rcb,
1317 void* _cb_arg, RGWFileHandle::readdir_offset& _offset)
1318 : RGWLibRequest(_cct, _user), rgw_fh(_rgw_fh), offset(_offset),
1319 cb_arg(_cb_arg), rcb(_rcb), ioff(nullptr), ix(0), d_count(0),
1320 rcb_eof(false) {
1321
1322 using boost::get;
1323
1324 if (unlikely(!! get<uint64_t*>(&offset))) {
1325 ioff = get<uint64_t*>(offset);
1326 const auto& mk = rgw_fh->find_marker(*ioff);
1327 if (mk) {
1328 marker = mk->name;
1329 }
1330 } else {
1331 const char* mk = get<const char*>(offset);
1332 if (mk) {
1333 marker = mk;
1334 }
1335 }
1336 op = this;
1337 }
1338
1339 bool only_bucket() override { return false; }
1340
1341 int op_init() override {
1342 // assign store, s, and dialect_handler
1343 RGWObjectCtx* rados_ctx
1344 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
1345 // framework promises to call op_init after parent init
1346 ceph_assert(rados_ctx);
1347 RGWOp::init(rados_ctx->get_store(), get_state(), this);
1348 op = this; // assign self as op: REQUIRED
1349 return 0;
1350 }
1351
1352 int header_init() override {
1353 struct req_state* s = get_state();
1354 s->info.method = "GET";
1355 s->op = OP_GET;
1356
1357 /* XXX derp derp derp */
1358 s->relative_uri = "/";
1359 s->info.request_uri = "/"; // XXX
1360 s->info.effective_uri = "/";
1361 s->info.request_params = "";
1362 s->info.domain = ""; /* XXX ? */
1363
1364 // woo
1365 s->user = user;
1366 s->bucket_tenant = user->user_id.tenant;
1367
1368 return 0;
1369 }
1370
1371 int get_params() override {
1372 limit = -1; /* no limit */
1373 return 0;
1374 }
1375
1376 void send_response_begin(bool has_buckets) override {
1377 sent_data = true;
1378 }
1379
1380 void send_response_data(RGWUserBuckets& buckets) override {
1381 if (!sent_data)
1382 return;
1383 map<string, RGWBucketEnt>& m = buckets.get_buckets();
1384 for (const auto& iter : m) {
1385 boost::string_ref marker{iter.first};
1386 const RGWBucketEnt& ent = iter.second;
1387 if (! this->operator()(ent.bucket.name, marker)) {
1388 /* caller cannot accept more */
1389 lsubdout(cct, rgw, 5) << "ListBuckets rcb failed"
1390 << " dirent=" << ent.bucket.name
1391 << " call count=" << ix
1392 << dendl;
1393 rcb_eof = true;
1394 return;
1395 }
1396 ++ix;
1397 }
1398 } /* send_response_data */
1399
1400 void send_response_end() override {
1401 // do nothing
1402 }
1403
1404 int operator()(const boost::string_ref& name,
1405 const boost::string_ref& marker) {
1406 uint64_t off = XXH64(name.data(), name.length(), fh_key::seed);
1407 if (!! ioff) {
1408 *ioff = off;
1409 }
1410 /* update traversal cache */
1411 rgw_fh->add_marker(off, rgw_obj_key{marker.data(), ""},
1412 RGW_FS_TYPE_DIRECTORY);
1413 ++d_count;
1414 return rcb(name.data(), cb_arg, off, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
1415 }
1416
1417 bool eof() {
1418 if (unlikely(cct->_conf->subsys.should_gather(ceph_subsys_rgw, 15))) {
1419 bool is_offset =
1420 unlikely(! get<const char*>(&offset)) ||
1421 !! get<const char*>(offset);
1422 lsubdout(cct, rgw, 15) << "READDIR offset: " <<
1423 ((is_offset) ? offset : "(nil)")
1424 << " is_truncated: " << is_truncated
1425 << dendl;
1426 }
1427 return !is_truncated && !rcb_eof;
1428 }
1429
1430 }; /* RGWListBucketsRequest */
1431
1432 /*
1433 read directory content (bucket objects)
1434 */
1435
1436 class RGWReaddirRequest : public RGWLibRequest,
1437 public RGWListBucket /* RGWOp */
1438 {
1439 public:
1440 RGWFileHandle* rgw_fh;
1441 RGWFileHandle::readdir_offset offset;
1442 void* cb_arg;
1443 rgw_readdir_cb rcb;
1444 uint64_t* ioff;
1445 size_t ix;
1446 uint32_t d_count;
1447 bool rcb_eof; // caller forced early stop in readdir cycle
1448
1449 RGWReaddirRequest(CephContext* _cct, RGWUserInfo *_user,
1450 RGWFileHandle* _rgw_fh, rgw_readdir_cb _rcb,
1451 void* _cb_arg, RGWFileHandle::readdir_offset& _offset)
1452 : RGWLibRequest(_cct, _user), rgw_fh(_rgw_fh), offset(_offset),
1453 cb_arg(_cb_arg), rcb(_rcb), ioff(nullptr), ix(0), d_count(0),
1454 rcb_eof(false) {
1455
1456 using boost::get;
1457
1458 if (unlikely(!! get<uint64_t*>(&offset))) {
1459 ioff = get<uint64_t*>(offset);
1460 const auto& mk = rgw_fh->find_marker(*ioff);
1461 if (mk) {
1462 marker = *mk;
1463 }
1464 } else {
1465 const char* mk = get<const char*>(offset);
1466 if (mk) {
1467 std::string tmark{rgw_fh->relative_object_name()};
1468 if (tmark.length() > 0)
1469 tmark += "/";
1470 tmark += mk;
1471 marker = rgw_obj_key{std::move(tmark), "", ""};
1472 }
1473 }
1474
1475 default_max = 1000; // XXX was being omitted
1476 op = this;
1477 }
1478
1479 bool only_bucket() override { return true; }
1480
1481 int op_init() override {
1482 // assign store, s, and dialect_handler
1483 RGWObjectCtx* rados_ctx
1484 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
1485 // framework promises to call op_init after parent init
1486 ceph_assert(rados_ctx);
1487 RGWOp::init(rados_ctx->get_store(), get_state(), this);
1488 op = this; // assign self as op: REQUIRED
1489 return 0;
1490 }
1491
1492 int header_init() override {
1493 struct req_state* s = get_state();
1494 s->info.method = "GET";
1495 s->op = OP_GET;
1496
1497 /* XXX derp derp derp */
1498 std::string uri = "/" + rgw_fh->bucket_name() + "/";
1499 s->relative_uri = uri;
1500 s->info.request_uri = uri; // XXX
1501 s->info.effective_uri = uri;
1502 s->info.request_params = "";
1503 s->info.domain = ""; /* XXX ? */
1504
1505 // woo
1506 s->user = user;
1507 s->bucket_tenant = user->user_id.tenant;
1508
1509 prefix = rgw_fh->relative_object_name();
1510 if (prefix.length() > 0)
1511 prefix += "/";
1512 delimiter = '/';
1513
1514 return 0;
1515 }
1516
1517 int operator()(const boost::string_ref name, const rgw_obj_key& marker,
1518 const ceph::real_time& t, const uint64_t fsz, uint8_t type) {
1519
1520 assert(name.length() > 0); // all cases handled in callers
1521
1522 /* hash offset of name in parent (short name) for NFS readdir cookie */
1523 uint64_t off = XXH64(name.data(), name.length(), fh_key::seed);
1524 if (unlikely(!! ioff)) {
1525 *ioff = off;
1526 }
1527
1528 /* update traversal cache */
1529 rgw_fh->add_marker(off, marker, type);
1530 ++d_count;
1531
1532 /* set c/mtime and size from bucket index entry */
1533 struct stat st = {};
1534 #ifdef HAVE_STAT_ST_MTIMESPEC_TV_NSEC
1535 st.st_atimespec = ceph::real_clock::to_timespec(t);
1536 st.st_mtimespec = st.st_atimespec;
1537 st.st_ctimespec = st.st_atimespec;
1538 #else
1539 st.st_atim = ceph::real_clock::to_timespec(t);
1540 st.st_mtim = st.st_atim;
1541 st.st_ctim = st.st_atim;
1542 #endif
1543 st.st_size = fsz;
1544
1545 return rcb(name.data(), cb_arg, off, &st, RGWFileHandle::RCB_MASK,
1546 (type == RGW_FS_TYPE_DIRECTORY) ?
1547 RGW_LOOKUP_FLAG_DIR :
1548 RGW_LOOKUP_FLAG_FILE);
1549 }
1550
1551 int get_params() override {
1552 max = default_max;
1553 return 0;
1554 }
1555
1556 void send_response() override {
1557 struct req_state* s = get_state();
1558 for (const auto& iter : objs) {
1559
1560 boost::string_ref sref {iter.key.name};
1561
1562 lsubdout(cct, rgw, 15) << "readdir objects prefix: " << prefix
1563 << " obj: " << sref << dendl;
1564
1565 size_t last_del = sref.find_last_of('/');
1566 if (last_del != string::npos)
1567 sref.remove_prefix(last_del+1);
1568
1569 /* leaf directory? */
1570 if (sref.empty())
1571 continue;
1572
1573 lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
1574 << __func__ << " "
1575 << "list uri=" << s->relative_uri << " "
1576 << " prefix=" << prefix << " "
1577 << " obj path=" << iter.key.name
1578 << " (" << sref << ")" << ""
1579 << " mtime="
1580 << real_clock::to_time_t(iter.meta.mtime)
1581 << " size=" << iter.meta.accounted_size
1582 << dendl;
1583
1584 if (! this->operator()(sref, next_marker, iter.meta.mtime,
1585 iter.meta.accounted_size, RGW_FS_TYPE_FILE)) {
1586 /* caller cannot accept more */
1587 lsubdout(cct, rgw, 5) << "readdir rcb failed"
1588 << " dirent=" << sref.data()
1589 << " call count=" << ix
1590 << dendl;
1591 rcb_eof = true;
1592 return;
1593 }
1594 ++ix;
1595 }
1596
1597 auto cnow = real_clock::now();
1598 for (auto& iter : common_prefixes) {
1599
1600 lsubdout(cct, rgw, 15) << "readdir common prefixes prefix: " << prefix
1601 << " iter first: " << iter.first
1602 << " iter second: " << iter.second
1603 << dendl;
1604
1605 /* XXX aieee--I have seen this case! */
1606 if (iter.first == "/")
1607 continue;
1608
1609 /* it's safest to modify the element in place--a suffix-modifying
1610 * string_ref operation is problematic since ULP rgw_file callers
1611 * will ultimately need a c-string */
1612 if (iter.first.back() == '/')
1613 const_cast<std::string&>(iter.first).pop_back();
1614
1615 boost::string_ref sref{iter.first};
1616
1617 size_t last_del = sref.find_last_of('/');
1618 if (last_del != string::npos)
1619 sref.remove_prefix(last_del+1);
1620
1621 lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
1622 << __func__ << " "
1623 << "list uri=" << s->relative_uri << " "
1624 << " prefix=" << prefix << " "
1625 << " cpref=" << sref
1626 << dendl;
1627
1628 if (sref.empty()) {
1629 /* null path segment--could be created in S3 but has no NFS
1630 * interpretation */
1631 return;
1632 }
1633
1634 if (! this->operator()(sref, next_marker, cnow, 0,
1635 RGW_FS_TYPE_DIRECTORY)) {
1636 /* caller cannot accept more */
1637 lsubdout(cct, rgw, 5) << "readdir rcb failed"
1638 << " dirent=" << sref.data()
1639 << " call count=" << ix
1640 << dendl;
1641 rcb_eof = true;
1642 return;
1643 }
1644 ++ix;
1645 }
1646 }
1647
1648 virtual void send_versioned_response() {
1649 send_response();
1650 }
1651
1652 bool eof() {
1653 if (unlikely(cct->_conf->subsys.should_gather(ceph_subsys_rgw, 15))) {
1654 bool is_offset =
1655 unlikely(! get<const char*>(&offset)) ||
1656 !! get<const char*>(offset);
1657 lsubdout(cct, rgw, 15) << "READDIR offset: " <<
1658 ((is_offset) ? offset : "(nil)")
1659 << " next marker: " << next_marker
1660 << " is_truncated: " << is_truncated
1661 << dendl;
1662 }
1663 return !is_truncated && !rcb_eof;
1664 }
1665
1666 }; /* RGWReaddirRequest */
1667
1668 /*
1669 dir has-children predicate (bucket objects)
1670 */
1671
1672 class RGWRMdirCheck : public RGWLibRequest,
1673 public RGWListBucket /* RGWOp */
1674 {
1675 public:
1676 const RGWFileHandle* rgw_fh;
1677 bool valid;
1678 bool has_children;
1679
1680 RGWRMdirCheck (CephContext* _cct, RGWUserInfo *_user,
1681 const RGWFileHandle* _rgw_fh)
1682 : RGWLibRequest(_cct, _user), rgw_fh(_rgw_fh), valid(false),
1683 has_children(false) {
1684 default_max = 2;
1685 op = this;
1686 }
1687
1688 bool only_bucket() override { return true; }
1689
1690 int op_init() override {
1691 // assign store, s, and dialect_handler
1692 RGWObjectCtx* rados_ctx
1693 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
1694 // framework promises to call op_init after parent init
1695 ceph_assert(rados_ctx);
1696 RGWOp::init(rados_ctx->get_store(), get_state(), this);
1697 op = this; // assign self as op: REQUIRED
1698 return 0;
1699 }
1700
1701 int header_init() override {
1702 struct req_state* s = get_state();
1703 s->info.method = "GET";
1704 s->op = OP_GET;
1705
1706 std::string uri = "/" + rgw_fh->bucket_name() + "/";
1707 s->relative_uri = uri;
1708 s->info.request_uri = uri;
1709 s->info.effective_uri = uri;
1710 s->info.request_params = "";
1711 s->info.domain = ""; /* XXX ? */
1712
1713 s->user = user;
1714 s->bucket_tenant = user->user_id.tenant;
1715
1716 prefix = rgw_fh->relative_object_name();
1717 if (prefix.length() > 0)
1718 prefix += "/";
1719 delimiter = '/';
1720
1721 return 0;
1722 }
1723
1724 int get_params() override {
1725 max = default_max;
1726 return 0;
1727 }
1728
1729 void send_response() override {
1730 valid = true;
1731 if ((objs.size() > 1) ||
1732 (! objs.empty() &&
1733 (objs.front().key.name != prefix))) {
1734 has_children = true;
1735 return;
1736 }
1737 for (auto& iter : common_prefixes) {
1738 /* readdir never produces a name for this case */
1739 if (iter.first == "/")
1740 continue;
1741 has_children = true;
1742 break;
1743 }
1744 }
1745
1746 virtual void send_versioned_response() {
1747 send_response();
1748 }
1749
1750 }; /* RGWRMdirCheck */
1751
1752 /*
1753 create bucket
1754 */
1755
1756 class RGWCreateBucketRequest : public RGWLibRequest,
1757 public RGWCreateBucket /* RGWOp */
1758 {
1759 public:
1760 const std::string& bucket_name;
1761
1762 RGWCreateBucketRequest(CephContext* _cct, RGWUserInfo *_user,
1763 std::string& _bname)
1764 : RGWLibRequest(_cct, _user), bucket_name(_bname) {
1765 op = this;
1766 }
1767
1768 bool only_bucket() override { return false; }
1769
1770 int read_permissions(RGWOp* op_obj) override {
1771 /* we ARE a 'create bucket' request (cf. rgw_rest.cc, ll. 1305-6) */
1772 return 0;
1773 }
1774
1775 int op_init() override {
1776 // assign store, s, and dialect_handler
1777 RGWObjectCtx* rados_ctx
1778 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
1779 // framework promises to call op_init after parent init
1780 ceph_assert(rados_ctx);
1781 RGWOp::init(rados_ctx->get_store(), get_state(), this);
1782 op = this; // assign self as op: REQUIRED
1783 return 0;
1784 }
1785
1786 int header_init() override {
1787
1788 struct req_state* s = get_state();
1789 s->info.method = "PUT";
1790 s->op = OP_PUT;
1791
1792 string uri = "/" + bucket_name;
1793 /* XXX derp derp derp */
1794 s->relative_uri = uri;
1795 s->info.request_uri = uri; // XXX
1796 s->info.effective_uri = uri;
1797 s->info.request_params = "";
1798 s->info.domain = ""; /* XXX ? */
1799
1800 // woo
1801 s->user = user;
1802 s->bucket_tenant = user->user_id.tenant;
1803
1804 return 0;
1805 }
1806
1807 int get_params() override {
1808 struct req_state* s = get_state();
1809 RGWAccessControlPolicy_S3 s3policy(s->cct);
1810 /* we don't have (any) headers, so just create canned ACLs */
1811 int ret = s3policy.create_canned(s->owner, s->bucket_owner, s->canned_acl);
1812 policy = s3policy;
1813 return ret;
1814 }
1815
1816 void send_response() override {
1817 /* TODO: something (maybe) */
1818 }
1819 }; /* RGWCreateBucketRequest */
1820
1821 /*
1822 delete bucket
1823 */
1824
1825 class RGWDeleteBucketRequest : public RGWLibRequest,
1826 public RGWDeleteBucket /* RGWOp */
1827 {
1828 public:
1829 const std::string& bucket_name;
1830
1831 RGWDeleteBucketRequest(CephContext* _cct, RGWUserInfo *_user,
1832 std::string& _bname)
1833 : RGWLibRequest(_cct, _user), bucket_name(_bname) {
1834 op = this;
1835 }
1836
1837 bool only_bucket() override { return true; }
1838
1839 int op_init() override {
1840 // assign store, s, and dialect_handler
1841 RGWObjectCtx* rados_ctx
1842 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
1843 // framework promises to call op_init after parent init
1844 ceph_assert(rados_ctx);
1845 RGWOp::init(rados_ctx->get_store(), get_state(), this);
1846 op = this; // assign self as op: REQUIRED
1847 return 0;
1848 }
1849
1850 int header_init() override {
1851
1852 struct req_state* s = get_state();
1853 s->info.method = "DELETE";
1854 s->op = OP_DELETE;
1855
1856 string uri = "/" + bucket_name;
1857 /* XXX derp derp derp */
1858 s->relative_uri = uri;
1859 s->info.request_uri = uri; // XXX
1860 s->info.effective_uri = uri;
1861 s->info.request_params = "";
1862 s->info.domain = ""; /* XXX ? */
1863
1864 // woo
1865 s->user = user;
1866 s->bucket_tenant = user->user_id.tenant;
1867
1868 return 0;
1869 }
1870
1871 void send_response() override {}
1872
1873 }; /* RGWDeleteBucketRequest */
1874
1875 /*
1876 put object
1877 */
1878 class RGWPutObjRequest : public RGWLibRequest,
1879 public RGWPutObj /* RGWOp */
1880 {
1881 public:
1882 const std::string& bucket_name;
1883 const std::string& obj_name;
1884 buffer::list& bl; /* XXX */
1885 size_t bytes_written;
1886
1887 RGWPutObjRequest(CephContext* _cct, RGWUserInfo *_user,
1888 const std::string& _bname, const std::string& _oname,
1889 buffer::list& _bl)
1890 : RGWLibRequest(_cct, _user), bucket_name(_bname), obj_name(_oname),
1891 bl(_bl), bytes_written(0) {
1892 op = this;
1893 }
1894
1895 bool only_bucket() override { return true; }
1896
1897 int op_init() override {
1898 // assign store, s, and dialect_handler
1899 RGWObjectCtx* rados_ctx
1900 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
1901 // framework promises to call op_init after parent init
1902 ceph_assert(rados_ctx);
1903 RGWOp::init(rados_ctx->get_store(), get_state(), this);
1904 op = this; // assign self as op: REQUIRED
1905
1906 int rc = valid_s3_object_name(obj_name);
1907 if (rc != 0)
1908 return rc;
1909
1910 return 0;
1911 }
1912
1913 int header_init() override {
1914
1915 struct req_state* s = get_state();
1916 s->info.method = "PUT";
1917 s->op = OP_PUT;
1918
1919 /* XXX derp derp derp */
1920 std::string uri = make_uri(bucket_name, obj_name);
1921 s->relative_uri = uri;
1922 s->info.request_uri = uri; // XXX
1923 s->info.effective_uri = uri;
1924 s->info.request_params = "";
1925 s->info.domain = ""; /* XXX ? */
1926
1927 /* XXX required in RGWOp::execute() */
1928 s->content_length = bl.length();
1929
1930 // woo
1931 s->user = user;
1932 s->bucket_tenant = user->user_id.tenant;
1933
1934 return 0;
1935 }
1936
1937 int get_params() override {
1938 struct req_state* s = get_state();
1939 RGWAccessControlPolicy_S3 s3policy(s->cct);
1940 /* we don't have (any) headers, so just create canned ACLs */
1941 int ret = s3policy.create_canned(s->owner, s->bucket_owner, s->canned_acl);
1942 policy = s3policy;
1943 return ret;
1944 }
1945
1946 int get_data(buffer::list& _bl) override {
1947 /* XXX for now, use sharing semantics */
1948 _bl.claim(bl);
1949 uint32_t len = _bl.length();
1950 bytes_written += len;
1951 return len;
1952 }
1953
1954 void send_response() override {}
1955
1956 int verify_params() override {
1957 if (bl.length() > cct->_conf->rgw_max_put_size)
1958 return -ERR_TOO_LARGE;
1959 return 0;
1960 }
1961
1962 buffer::list* get_attr(const std::string& k) {
1963 auto iter = attrs.find(k);
1964 return (iter != attrs.end()) ? &(iter->second) : nullptr;
1965 }
1966
1967 }; /* RGWPutObjRequest */
1968
1969 /*
1970 get object
1971 */
1972
1973 class RGWReadRequest : public RGWLibRequest,
1974 public RGWGetObj /* RGWOp */
1975 {
1976 public:
1977 RGWFileHandle* rgw_fh;
1978 void *ulp_buffer;
1979 size_t nread;
1980 size_t read_resid; /* initialize to len, <= sizeof(ulp_buffer) */
1981 bool do_hexdump = false;
1982
1983 RGWReadRequest(CephContext* _cct, RGWUserInfo *_user,
1984 RGWFileHandle* _rgw_fh, uint64_t off, uint64_t len,
1985 void *_ulp_buffer)
1986 : RGWLibRequest(_cct, _user), rgw_fh(_rgw_fh), ulp_buffer(_ulp_buffer),
1987 nread(0), read_resid(len) {
1988 op = this;
1989
1990 /* fixup RGWGetObj (already know range parameters) */
1991 RGWGetObj::range_parsed = true;
1992 RGWGetObj::get_data = true; // XXX
1993 RGWGetObj::partial_content = true;
1994 RGWGetObj::ofs = off;
1995 RGWGetObj::end = off + len;
1996 }
1997
1998 bool only_bucket() override { return false; }
1999
2000 int op_init() override {
2001 // assign store, s, and dialect_handler
2002 RGWObjectCtx* rados_ctx
2003 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
2004 // framework promises to call op_init after parent init
2005 ceph_assert(rados_ctx);
2006 RGWOp::init(rados_ctx->get_store(), get_state(), this);
2007 op = this; // assign self as op: REQUIRED
2008 return 0;
2009 }
2010
2011 int header_init() override {
2012
2013 struct req_state* s = get_state();
2014 s->info.method = "GET";
2015 s->op = OP_GET;
2016
2017 /* XXX derp derp derp */
2018 s->relative_uri = make_uri(rgw_fh->bucket_name(),
2019 rgw_fh->relative_object_name());
2020 s->info.request_uri = s->relative_uri; // XXX
2021 s->info.effective_uri = s->relative_uri;
2022 s->info.request_params = "";
2023 s->info.domain = ""; /* XXX ? */
2024
2025 // woo
2026 s->user = user;
2027 s->bucket_tenant = user->user_id.tenant;
2028
2029 return 0;
2030 }
2031
2032 int get_params() override {
2033 return 0;
2034 }
2035
2036 int send_response_data(ceph::buffer::list& bl, off_t bl_off,
2037 off_t bl_len) override {
2038 size_t bytes;
2039 for (auto& bp : bl.buffers()) {
2040 /* if for some reason bl_off indicates the start-of-data is not at
2041 * the current buffer::ptr, skip it and account */
2042 if (bl_off > bp.length()) {
2043 bl_off -= bp.length();
2044 continue;
2045 }
2046 /* read no more than read_resid */
2047 bytes = std::min(read_resid, size_t(bp.length()-bl_off));
2048 memcpy(static_cast<char*>(ulp_buffer)+nread, bp.c_str()+bl_off, bytes);
2049 read_resid -= bytes; /* reduce read_resid by bytes read */
2050 nread += bytes;
2051 bl_off = 0;
2052 /* stop if we have no residual ulp_buffer */
2053 if (! read_resid)
2054 break;
2055 }
2056 return 0;
2057 }
2058
2059 int send_response_data_error() override {
2060 /* S3 implementation just sends nothing--there is no side effect
2061 * to simulate here */
2062 return 0;
2063 }
2064
2065 }; /* RGWReadRequest */
2066
2067 /*
2068 delete object
2069 */
2070
2071 class RGWDeleteObjRequest : public RGWLibRequest,
2072 public RGWDeleteObj /* RGWOp */
2073 {
2074 public:
2075 const std::string& bucket_name;
2076 const std::string& obj_name;
2077
2078 RGWDeleteObjRequest(CephContext* _cct, RGWUserInfo *_user,
2079 const std::string& _bname, const std::string& _oname)
2080 : RGWLibRequest(_cct, _user), bucket_name(_bname), obj_name(_oname) {
2081 op = this;
2082 }
2083
2084 bool only_bucket() override { return true; }
2085
2086 int op_init() override {
2087 // assign store, s, and dialect_handler
2088 RGWObjectCtx* rados_ctx
2089 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
2090 // framework promises to call op_init after parent init
2091 ceph_assert(rados_ctx);
2092 RGWOp::init(rados_ctx->get_store(), get_state(), this);
2093 op = this; // assign self as op: REQUIRED
2094 return 0;
2095 }
2096
2097 int header_init() override {
2098
2099 struct req_state* s = get_state();
2100 s->info.method = "DELETE";
2101 s->op = OP_DELETE;
2102
2103 /* XXX derp derp derp */
2104 std::string uri = make_uri(bucket_name, obj_name);
2105 s->relative_uri = uri;
2106 s->info.request_uri = uri; // XXX
2107 s->info.effective_uri = uri;
2108 s->info.request_params = "";
2109 s->info.domain = ""; /* XXX ? */
2110
2111 // woo
2112 s->user = user;
2113 s->bucket_tenant = user->user_id.tenant;
2114
2115 return 0;
2116 }
2117
2118 void send_response() override {}
2119
2120 }; /* RGWDeleteObjRequest */
2121
2122 class RGWStatObjRequest : public RGWLibRequest,
2123 public RGWGetObj /* RGWOp */
2124 {
2125 public:
2126 const std::string& bucket_name;
2127 const std::string& obj_name;
2128 uint64_t _size;
2129 uint32_t flags;
2130
2131 static constexpr uint32_t FLAG_NONE = 0x000;
2132
2133 RGWStatObjRequest(CephContext* _cct, RGWUserInfo *_user,
2134 const std::string& _bname, const std::string& _oname,
2135 uint32_t _flags)
2136 : RGWLibRequest(_cct, _user), bucket_name(_bname), obj_name(_oname),
2137 _size(0), flags(_flags) {
2138 op = this;
2139
2140 /* fixup RGWGetObj (already know range parameters) */
2141 RGWGetObj::range_parsed = true;
2142 RGWGetObj::get_data = false; // XXX
2143 RGWGetObj::partial_content = true;
2144 RGWGetObj::ofs = 0;
2145 RGWGetObj::end = UINT64_MAX;
2146 }
2147
2148 const char* name() const override { return "stat_obj"; }
2149 RGWOpType get_type() override { return RGW_OP_STAT_OBJ; }
2150
2151 real_time get_mtime() const {
2152 return lastmod;
2153 }
2154
2155 /* attributes */
2156 uint64_t get_size() { return _size; }
2157 real_time ctime() { return mod_time; } // XXX
2158 real_time mtime() { return mod_time; }
2159 std::map<string, bufferlist>& get_attrs() { return attrs; }
2160
2161 buffer::list* get_attr(const std::string& k) {
2162 auto iter = attrs.find(k);
2163 return (iter != attrs.end()) ? &(iter->second) : nullptr;
2164 }
2165
2166 bool only_bucket() override { return false; }
2167
2168 int op_init() override {
2169 // assign store, s, and dialect_handler
2170 RGWObjectCtx* rados_ctx
2171 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
2172 // framework promises to call op_init after parent init
2173 ceph_assert(rados_ctx);
2174 RGWOp::init(rados_ctx->get_store(), get_state(), this);
2175 op = this; // assign self as op: REQUIRED
2176 return 0;
2177 }
2178
2179 int header_init() override {
2180
2181 struct req_state* s = get_state();
2182 s->info.method = "GET";
2183 s->op = OP_GET;
2184
2185 /* XXX derp derp derp */
2186 s->relative_uri = make_uri(bucket_name, obj_name);
2187 s->info.request_uri = s->relative_uri; // XXX
2188 s->info.effective_uri = s->relative_uri;
2189 s->info.request_params = "";
2190 s->info.domain = ""; /* XXX ? */
2191
2192 // woo
2193 s->user = user;
2194 s->bucket_tenant = user->user_id.tenant;
2195
2196 return 0;
2197 }
2198
2199 int get_params() override {
2200 return 0;
2201 }
2202
2203 int send_response_data(ceph::buffer::list& _bl, off_t s_off,
2204 off_t e_off) override {
2205 /* NOP */
2206 /* XXX save attrs? */
2207 return 0;
2208 }
2209
2210 int send_response_data_error() override {
2211 /* NOP */
2212 return 0;
2213 }
2214
2215 void execute() override {
2216 RGWGetObj::execute();
2217 _size = get_state()->obj_size;
2218 }
2219
2220 }; /* RGWStatObjRequest */
2221
2222 class RGWStatBucketRequest : public RGWLibRequest,
2223 public RGWStatBucket /* RGWOp */
2224 {
2225 public:
2226 std::string uri;
2227 std::map<std::string, buffer::list> attrs;
2228 RGWLibFS::BucketStats& bs;
2229
2230 RGWStatBucketRequest(CephContext* _cct, RGWUserInfo *_user,
2231 const std::string& _path,
2232 RGWLibFS::BucketStats& _stats)
2233 : RGWLibRequest(_cct, _user), bs(_stats) {
2234 uri = "/" + _path;
2235 op = this;
2236 }
2237
2238 buffer::list* get_attr(const std::string& k) {
2239 auto iter = attrs.find(k);
2240 return (iter != attrs.end()) ? &(iter->second) : nullptr;
2241 }
2242
2243 real_time get_ctime() const {
2244 return bucket.creation_time;
2245 }
2246
2247 bool only_bucket() override { return false; }
2248
2249 int op_init() override {
2250 // assign store, s, and dialect_handler
2251 RGWObjectCtx* rados_ctx
2252 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
2253 // framework promises to call op_init after parent init
2254 ceph_assert(rados_ctx);
2255 RGWOp::init(rados_ctx->get_store(), get_state(), this);
2256 op = this; // assign self as op: REQUIRED
2257 return 0;
2258 }
2259
2260 int header_init() override {
2261
2262 struct req_state* s = get_state();
2263 s->info.method = "GET";
2264 s->op = OP_GET;
2265
2266 /* XXX derp derp derp */
2267 s->relative_uri = uri;
2268 s->info.request_uri = uri; // XXX
2269 s->info.effective_uri = uri;
2270 s->info.request_params = "";
2271 s->info.domain = ""; /* XXX ? */
2272
2273 // woo
2274 s->user = user;
2275 s->bucket_tenant = user->user_id.tenant;
2276
2277 return 0;
2278 }
2279
2280 virtual int get_params() {
2281 return 0;
2282 }
2283
2284 void send_response() override {
2285 bucket.creation_time = get_state()->bucket_info.creation_time;
2286 bs.size = bucket.size;
2287 bs.size_rounded = bucket.size_rounded;
2288 bs.creation_time = bucket.creation_time;
2289 bs.num_entries = bucket.count;
2290 std::swap(attrs, get_state()->bucket_attrs);
2291 }
2292
2293 bool matched() {
2294 return (bucket.bucket.name.length() > 0);
2295 }
2296
2297 }; /* RGWStatBucketRequest */
2298
2299 class RGWStatLeafRequest : public RGWLibRequest,
2300 public RGWListBucket /* RGWOp */
2301 {
2302 public:
2303 RGWFileHandle* rgw_fh;
2304 std::string path;
2305 bool matched;
2306 bool is_dir;
2307 bool exact_matched;
2308
2309 RGWStatLeafRequest(CephContext* _cct, RGWUserInfo *_user,
2310 RGWFileHandle* _rgw_fh, const std::string& _path)
2311 : RGWLibRequest(_cct, _user), rgw_fh(_rgw_fh), path(_path),
2312 matched(false), is_dir(false), exact_matched(false) {
2313 default_max = 1000; // logical max {"foo", "foo/"}
2314 op = this;
2315 }
2316
2317 bool only_bucket() override { return true; }
2318
2319 int op_init() override {
2320 // assign store, s, and dialect_handler
2321 RGWObjectCtx* rados_ctx
2322 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
2323 // framework promises to call op_init after parent init
2324 ceph_assert(rados_ctx);
2325 RGWOp::init(rados_ctx->get_store(), get_state(), this);
2326 op = this; // assign self as op: REQUIRED
2327 return 0;
2328 }
2329
2330 int header_init() override {
2331
2332 struct req_state* s = get_state();
2333 s->info.method = "GET";
2334 s->op = OP_GET;
2335
2336 /* XXX derp derp derp */
2337 std::string uri = "/" + rgw_fh->bucket_name() + "/";
2338 s->relative_uri = uri;
2339 s->info.request_uri = uri; // XXX
2340 s->info.effective_uri = uri;
2341 s->info.request_params = "";
2342 s->info.domain = ""; /* XXX ? */
2343
2344 // woo
2345 s->user = user;
2346 s->bucket_tenant = user->user_id.tenant;
2347
2348 prefix = rgw_fh->relative_object_name();
2349 if (prefix.length() > 0)
2350 prefix += "/";
2351 prefix += path;
2352 delimiter = '/';
2353
2354 return 0;
2355 }
2356
2357 int get_params() override {
2358 max = default_max;
2359 return 0;
2360 }
2361
2362 void send_response() override {
2363 struct req_state* s = get_state();
2364 // try objects
2365 for (const auto& iter : objs) {
2366 auto& name = iter.key.name;
2367 lsubdout(cct, rgw, 15) << "RGWStatLeafRequest "
2368 << __func__ << " "
2369 << "list uri=" << s->relative_uri << " "
2370 << " prefix=" << prefix << " "
2371 << " obj path=" << name << ""
2372 << " target = " << path << ""
2373 << dendl;
2374 /* XXX is there a missing match-dir case (trailing '/')? */
2375 matched = true;
2376 if (name == path)
2377 exact_matched = true;
2378 return;
2379 }
2380 // try prefixes
2381 for (auto& iter : common_prefixes) {
2382 auto& name = iter.first;
2383 lsubdout(cct, rgw, 15) << "RGWStatLeafRequest "
2384 << __func__ << " "
2385 << "list uri=" << s->relative_uri << " "
2386 << " prefix=" << prefix << " "
2387 << " pref path=" << name << " (not chomped)"
2388 << " target = " << path << ""
2389 << dendl;
2390 matched = true;
2391 /* match-dir case (trailing '/') */
2392 if (name == prefix + "/")
2393 exact_matched = true;
2394 is_dir = true;
2395 break;
2396 }
2397 }
2398
2399 virtual void send_versioned_response() {
2400 send_response();
2401 }
2402 }; /* RGWStatLeafRequest */
2403
2404 /*
2405 put object
2406 */
2407
2408 class RGWWriteRequest : public RGWLibContinuedReq,
2409 public RGWPutObj /* RGWOp */
2410 {
2411 public:
2412 const std::string& bucket_name;
2413 const std::string& obj_name;
2414 RGWFileHandle* rgw_fh;
2415 std::optional<rgw::AioThrottle> aio;
2416 std::optional<rgw::putobj::AtomicObjectProcessor> processor;
2417 rgw::putobj::DataProcessor* filter;
2418 boost::optional<RGWPutObj_Compress> compressor;
2419 CompressorRef plugin;
2420 buffer::list data;
2421 uint64_t timer_id;
2422 MD5 hash;
2423 off_t real_ofs;
2424 size_t bytes_written;
2425 bool eio;
2426
2427 RGWWriteRequest(CephContext* _cct, RGWUserInfo *_user, RGWFileHandle* _fh,
2428 const std::string& _bname, const std::string& _oname)
2429 : RGWLibContinuedReq(_cct, _user),
2430 bucket_name(_bname), obj_name(_oname),
2431 rgw_fh(_fh), filter(nullptr), real_ofs(0),
2432 bytes_written(0), eio(false) {
2433
2434 int ret = header_init();
2435 if (ret == 0) {
2436 ret = init_from_header(get_state());
2437 }
2438 op = this;
2439 }
2440
2441 bool only_bucket() override { return true; }
2442
2443 int op_init() override {
2444 // assign store, s, and dialect_handler
2445 RGWObjectCtx* rados_ctx
2446 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
2447 // framework promises to call op_init after parent init
2448 ceph_assert(rados_ctx);
2449 RGWOp::init(rados_ctx->get_store(), get_state(), this);
2450 op = this; // assign self as op: REQUIRED
2451 return 0;
2452 }
2453
2454 int header_init() override {
2455
2456 struct req_state* s = get_state();
2457 s->info.method = "PUT";
2458 s->op = OP_PUT;
2459
2460 /* XXX derp derp derp */
2461 std::string uri = make_uri(bucket_name, obj_name);
2462 s->relative_uri = uri;
2463 s->info.request_uri = uri; // XXX
2464 s->info.effective_uri = uri;
2465 s->info.request_params = "";
2466 s->info.domain = ""; /* XXX ? */
2467
2468 // woo
2469 s->user = user;
2470 s->bucket_tenant = user->user_id.tenant;
2471
2472 return 0;
2473 }
2474
2475 int get_params() override {
2476 struct req_state* s = get_state();
2477 RGWAccessControlPolicy_S3 s3policy(s->cct);
2478 /* we don't have (any) headers, so just create canned ACLs */
2479 int ret = s3policy.create_canned(s->owner, s->bucket_owner, s->canned_acl);
2480 policy = s3policy;
2481 return ret;
2482 }
2483
2484 int get_data(buffer::list& _bl) override {
2485 /* XXX for now, use sharing semantics */
2486 uint32_t len = data.length();
2487 _bl.claim(data);
2488 bytes_written += len;
2489 return len;
2490 }
2491
2492 void put_data(off_t off, buffer::list& _bl) {
2493 if (off != real_ofs) {
2494 eio = true;
2495 }
2496 data.claim(_bl);
2497 real_ofs += data.length();
2498 ofs = off; /* consumed in exec_continue() */
2499 }
2500
2501 int exec_start() override;
2502 int exec_continue() override;
2503 int exec_finish() override;
2504
2505 void send_response() override {}
2506
2507 int verify_params() override {
2508 return 0;
2509 }
2510 }; /* RGWWriteRequest */
2511
2512 /*
2513 copy object
2514 */
2515 class RGWCopyObjRequest : public RGWLibRequest,
2516 public RGWCopyObj /* RGWOp */
2517 {
2518 public:
2519 RGWFileHandle* src_parent;
2520 RGWFileHandle* dst_parent;
2521 const std::string& src_name;
2522 const std::string& dst_name;
2523
2524 RGWCopyObjRequest(CephContext* _cct, RGWUserInfo *_user,
2525 RGWFileHandle* _src_parent, RGWFileHandle* _dst_parent,
2526 const std::string& _src_name, const std::string& _dst_name)
2527 : RGWLibRequest(_cct, _user), src_parent(_src_parent),
2528 dst_parent(_dst_parent), src_name(_src_name), dst_name(_dst_name) {
2529 /* all requests have this */
2530 op = this;
2531
2532 /* allow this request to replace selected attrs */
2533 attrs_mod = RGWRados::ATTRSMOD_MERGE;
2534 }
2535
2536 bool only_bucket() override { return true; }
2537
2538 int op_init() override {
2539 // assign store, s, and dialect_handler
2540 RGWObjectCtx* rados_ctx
2541 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
2542 // framework promises to call op_init after parent init
2543 ceph_assert(rados_ctx);
2544 RGWOp::init(rados_ctx->get_store(), get_state(), this);
2545 op = this; // assign self as op: REQUIRED
2546
2547 return 0;
2548 }
2549
2550 int header_init() override {
2551
2552 struct req_state* s = get_state();
2553 s->info.method = "PUT"; // XXX check
2554 s->op = OP_PUT;
2555
2556 src_bucket_name = src_parent->bucket_name();
2557 // need s->src_bucket_name?
2558 src_object.name = src_parent->format_child_name(src_name, false);
2559 // need s->src_object?
2560
2561 dest_bucket_name = dst_parent->bucket_name();
2562 // need s->bucket.name?
2563 dest_object = dst_parent->format_child_name(dst_name, false);
2564 // need s->object_name?
2565
2566 int rc = valid_s3_object_name(dest_object);
2567 if (rc != 0)
2568 return rc;
2569
2570 /* XXX and fixup key attr (could optimize w/string ref and
2571 * dest_object) */
2572 buffer::list ux_key;
2573 fh_key fhk = dst_parent->make_fhk(dst_name);
2574 rgw::encode(fhk, ux_key);
2575 emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
2576
2577 #if 0 /* XXX needed? */
2578 s->relative_uri = uri;
2579 s->info.request_uri = uri; // XXX
2580 s->info.effective_uri = uri;
2581 s->info.request_params = "";
2582 s->info.domain = ""; /* XXX ? */
2583 #endif
2584
2585 // woo
2586 s->user = user;
2587 s->bucket_tenant = user->user_id.tenant;
2588
2589 return 0;
2590 }
2591
2592 int get_params() override {
2593 struct req_state* s = get_state();
2594 RGWAccessControlPolicy_S3 s3policy(s->cct);
2595 /* we don't have (any) headers, so just create canned ACLs */
2596 int ret = s3policy.create_canned(s->owner, s->bucket_owner, s->canned_acl);
2597 dest_policy = s3policy;
2598 return ret;
2599 }
2600
2601 void send_response() override {}
2602 void send_partial_response(off_t ofs) override {}
2603
2604 }; /* RGWCopyObjRequest */
2605
2606 class RGWSetAttrsRequest : public RGWLibRequest,
2607 public RGWSetAttrs /* RGWOp */
2608 {
2609 public:
2610 const std::string& bucket_name;
2611 const std::string& obj_name;
2612
2613 RGWSetAttrsRequest(CephContext* _cct, RGWUserInfo *_user,
2614 const std::string& _bname, const std::string& _oname)
2615 : RGWLibRequest(_cct, _user), bucket_name(_bname), obj_name(_oname) {
2616 op = this;
2617 }
2618
2619 bool only_bucket() override { return false; }
2620
2621 int op_init() override {
2622 // assign store, s, and dialect_handler
2623 RGWObjectCtx* rados_ctx
2624 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
2625 // framework promises to call op_init after parent init
2626 ceph_assert(rados_ctx);
2627 RGWOp::init(rados_ctx->get_store(), get_state(), this);
2628 op = this; // assign self as op: REQUIRED
2629 return 0;
2630 }
2631
2632 int header_init() override {
2633
2634 struct req_state* s = get_state();
2635 s->info.method = "PUT";
2636 s->op = OP_PUT;
2637
2638 /* XXX derp derp derp */
2639 std::string uri = make_uri(bucket_name, obj_name);
2640 s->relative_uri = uri;
2641 s->info.request_uri = uri; // XXX
2642 s->info.effective_uri = uri;
2643 s->info.request_params = "";
2644 s->info.domain = ""; /* XXX ? */
2645
2646 // woo
2647 s->user = user;
2648 s->bucket_tenant = user->user_id.tenant;
2649
2650 return 0;
2651 }
2652
2653 int get_params() override {
2654 return 0;
2655 }
2656
2657 void send_response() override {}
2658
2659 }; /* RGWSetAttrsRequest */
2660
2661 /*
2662 * Send request to get the rados cluster stats
2663 */
2664 class RGWGetClusterStatReq : public RGWLibRequest,
2665 public RGWGetClusterStat {
2666 public:
2667 struct rados_cluster_stat_t& stats_req;
2668 RGWGetClusterStatReq(CephContext* _cct,RGWUserInfo *_user,
2669 rados_cluster_stat_t& _stats):
2670 RGWLibRequest(_cct, _user), stats_req(_stats){
2671 op = this;
2672 }
2673
2674 int op_init() override {
2675 // assign store, s, and dialect_handler
2676 RGWObjectCtx* rados_ctx
2677 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
2678 // framework promises to call op_init after parent init
2679 ceph_assert(rados_ctx);
2680 RGWOp::init(rados_ctx->get_store(), get_state(), this);
2681 op = this; // assign self as op: REQUIRED
2682 return 0;
2683 }
2684
2685 int header_init() override {
2686 struct req_state* s = get_state();
2687 s->info.method = "GET";
2688 s->op = OP_GET;
2689 s->user = user;
2690 return 0;
2691 }
2692
2693 int get_params() override { return 0; }
2694 bool only_bucket() override { return false; }
2695 void send_response() override {
2696 stats_req.kb = stats_op.kb;
2697 stats_req.kb_avail = stats_op.kb_avail;
2698 stats_req.kb_used = stats_op.kb_used;
2699 stats_req.num_objects = stats_op.num_objects;
2700 }
2701 }; /* RGWGetClusterStatReq */
2702
2703
2704 } /* namespace rgw */
2705
2706 #endif /* RGW_FILE_H */