]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/rgw_file.h
import 12.2.13 release
[ceph.git] / ceph / src / rgw / rgw_file.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #ifndef RGW_FILE_H
5 #define RGW_FILE_H
6
7 #include "include/rados/rgw_file.h"
8
9 /* internal header */
10 #include <string.h>
11 #include <sys/stat.h>
12 #include <stdint.h>
13
14 #include <atomic>
15 #include <chrono>
16 #include <thread>
17 #include <mutex>
18 #include <vector>
19 #include <deque>
20 #include <algorithm>
21 #include <functional>
22 #include <boost/intrusive_ptr.hpp>
23 #include <boost/range/adaptor/reversed.hpp>
24 #include <boost/container/flat_map.hpp>
25 #include <boost/variant.hpp>
26 #include <boost/utility/string_ref.hpp>
27 #include <boost/optional.hpp>
28 #include "xxhash.h"
29 #include "include/buffer.h"
30 #include "common/cohort_lru.h"
31 #include "common/ceph_timer.h"
32 #include "rgw_common.h"
33 #include "rgw_user.h"
34 #include "rgw_lib.h"
35 #include "rgw_ldap.h"
36 #include "rgw_token.h"
37 #include "rgw_compression.h"
38
39
40 /* XXX
41 * ASSERT_H somehow not defined after all the above (which bring
42 * in common/debug.h [e.g., dout])
43 */
44 #include "include/assert.h"
45
46
47 #define RGW_RWXMODE (S_IRWXU | S_IRWXG | S_IRWXO)
48
49 #define RGW_RWMODE (RGW_RWXMODE & \
50 ~(S_IXUSR | S_IXGRP | S_IXOTH))
51
52
53 namespace rgw {
54
55 template <typename T>
56 static inline void ignore(T &&) {}
57
58
59 namespace bi = boost::intrusive;
60
61 class RGWLibFS;
62 class RGWFileHandle;
63 class RGWWriteRequest;
64
65 static inline bool operator <(const struct timespec& lhs,
66 const struct timespec& rhs) {
67 if (lhs.tv_sec == rhs.tv_sec)
68 return lhs.tv_nsec < rhs.tv_nsec;
69 else
70 return lhs.tv_sec < rhs.tv_sec;
71 }
72
73 static inline bool operator ==(const struct timespec& lhs,
74 const struct timespec& rhs) {
75 return ((lhs.tv_sec == rhs.tv_sec) &&
76 (lhs.tv_nsec == rhs.tv_nsec));
77 }
78
79 /*
80 * XXX
81 * The current 64-bit, non-cryptographic hash used here is intended
82 * for prototyping only.
83 *
84 * However, the invariant being prototyped is that objects be
85 * identifiable by their hash components alone. We believe this can
86 * be legitimately implemented using 128-hash values for bucket and
87 * object components, together with a cluster-resident cryptographic
88 * key. Since an MD5 or SHA-1 key is 128 bits and the (fast),
89 * non-cryptographic CityHash128 hash algorithm takes a 128-bit seed,
90 * speculatively we could use that for the final hash computations.
91 */
92 struct fh_key
93 {
94 rgw_fh_hk fh_hk;
95 uint32_t version;
96
97 static constexpr uint64_t seed = 8675309;
98
99 fh_key() : version(0) {}
100
101 fh_key(const rgw_fh_hk& _hk)
102 : fh_hk(_hk), version(0) {
103 // nothing
104 }
105
106 fh_key(const uint64_t bk, const uint64_t ok)
107 : version(0) {
108 fh_hk.bucket = bk;
109 fh_hk.object = ok;
110 }
111
112 fh_key(const uint64_t bk, const char *_o)
113 : version(0) {
114 fh_hk.bucket = bk;
115 fh_hk.object = XXH64(_o, ::strlen(_o), seed);
116 }
117
118 fh_key(const std::string& _b, const std::string& _o)
119 : version(0) {
120 fh_hk.bucket = XXH64(_b.c_str(), _o.length(), seed);
121 fh_hk.object = XXH64(_o.c_str(), _o.length(), seed);
122 }
123
124 void encode(buffer::list& bl) const {
125 ENCODE_START(2, 1, bl);
126 ::encode(fh_hk.bucket, bl);
127 ::encode(fh_hk.object, bl);
128 ::encode((uint32_t)2, bl);
129 ENCODE_FINISH(bl);
130 }
131
132 void decode(bufferlist::iterator& bl) {
133 DECODE_START(2, bl);
134 ::decode(fh_hk.bucket, bl);
135 ::decode(fh_hk.object, bl);
136 if (struct_v >= 2) {
137 ::decode(version, bl);
138 }
139 DECODE_FINISH(bl);
140 }
141 }; /* fh_key */
142
143 WRITE_CLASS_ENCODER(fh_key);
144
145 inline bool operator<(const fh_key& lhs, const fh_key& rhs)
146 {
147 return ((lhs.fh_hk.bucket < rhs.fh_hk.bucket) ||
148 ((lhs.fh_hk.bucket == rhs.fh_hk.bucket) &&
149 (lhs.fh_hk.object < rhs.fh_hk.object)));
150 }
151
152 inline bool operator>(const fh_key& lhs, const fh_key& rhs)
153 {
154 return (rhs < lhs);
155 }
156
157 inline bool operator==(const fh_key& lhs, const fh_key& rhs)
158 {
159 return ((lhs.fh_hk.bucket == rhs.fh_hk.bucket) &&
160 (lhs.fh_hk.object == rhs.fh_hk.object));
161 }
162
163 inline bool operator!=(const fh_key& lhs, const fh_key& rhs)
164 {
165 return !(lhs == rhs);
166 }
167
168 inline bool operator<=(const fh_key& lhs, const fh_key& rhs)
169 {
170 return (lhs < rhs) || (lhs == rhs);
171 }
172
173 using boost::variant;
174 using boost::container::flat_map;
175
176 typedef std::tuple<bool, bool> DecodeAttrsResult;
177
178 class RGWFileHandle : public cohort::lru::Object
179 {
180 struct rgw_file_handle fh;
181 std::mutex mtx;
182
183 RGWLibFS* fs;
184 RGWFileHandle* bucket;
185 RGWFileHandle* parent;
186 /* const */ std::string name; /* XXX file or bucket name */
187 /* const */ fh_key fhk;
188
189 using lock_guard = std::lock_guard<std::mutex>;
190 using unique_lock = std::unique_lock<std::mutex>;
191
192 /* TODO: keeping just the last marker is sufficient for
193 * nfs-ganesha 2.4.5; in the near future, nfs-ganesha will
194 * be able to hint the name of the next dirent required,
195 * from which we can directly synthesize a RADOS marker.
196 * using marker_cache_t = flat_map<uint64_t, rgw_obj_key>;
197 */
198
199 struct State {
200 uint64_t dev;
201 uint64_t size;
202 uint64_t nlink;
203 uint32_t owner_uid; /* XXX need Unix attr */
204 uint32_t owner_gid; /* XXX need Unix attr */
205 mode_t unix_mode;
206 struct timespec ctime;
207 struct timespec mtime;
208 struct timespec atime;
209 uint32_t version;
210 State() : dev(0), size(0), nlink(1), owner_uid(0), owner_gid(0),
211 ctime{0,0}, mtime{0,0}, atime{0,0}, version(0) {}
212 } state;
213
214 struct file {
215 RGWWriteRequest* write_req;
216 file() : write_req(nullptr) {}
217 ~file();
218 };
219
220 struct directory {
221
222 static constexpr uint32_t FLAG_NONE = 0x0000;
223
224 uint32_t flags;
225 rgw_obj_key last_marker;
226 struct timespec last_readdir;
227
228 directory() : flags(FLAG_NONE), last_readdir{0,0} {}
229 };
230
231 void clear_state();
232
233 boost::variant<file, directory> variant_type;
234
235 uint16_t depth;
236 uint32_t flags;
237
238 ceph::buffer::list etag;
239 ceph::buffer::list acls;
240
241 public:
242 const static std::string root_name;
243
244 static constexpr uint16_t MAX_DEPTH = 256;
245
246 static constexpr uint32_t FLAG_NONE = 0x0000;
247 static constexpr uint32_t FLAG_OPEN = 0x0001;
248 static constexpr uint32_t FLAG_ROOT = 0x0002;
249 static constexpr uint32_t FLAG_CREATE = 0x0004;
250 static constexpr uint32_t FLAG_CREATING = 0x0008;
251 static constexpr uint32_t FLAG_DIRECTORY = 0x0010;
252 static constexpr uint32_t FLAG_BUCKET = 0x0020;
253 static constexpr uint32_t FLAG_LOCK = 0x0040;
254 static constexpr uint32_t FLAG_DELETED = 0x0080;
255 static constexpr uint32_t FLAG_UNLINK_THIS = 0x0100;
256 static constexpr uint32_t FLAG_LOCKED = 0x0200;
257 static constexpr uint32_t FLAG_STATELESS_OPEN = 0x0400;
258 static constexpr uint32_t FLAG_EXACT_MATCH = 0x0800;
259 static constexpr uint32_t FLAG_MOUNT = 0x1000;
260
261 #define CREATE_FLAGS(x) \
262 ((x) & ~(RGWFileHandle::FLAG_CREATE|RGWFileHandle::FLAG_LOCK))
263
264 friend class RGWLibFS;
265
266 private:
267 RGWFileHandle(RGWLibFS* _fs)
268 : fs(_fs), bucket(nullptr), parent(nullptr), variant_type{directory()},
269 depth(0), flags(FLAG_NONE)
270 {
271 /* root */
272 fh.fh_type = RGW_FS_TYPE_DIRECTORY;
273 variant_type = directory();
274 /* stat */
275 state.unix_mode = RGW_RWXMODE|S_IFDIR;
276 /* pointer to self */
277 fh.fh_private = this;
278 }
279
280 uint64_t init_fsid(std::string& uid) {
281 return XXH64(uid.c_str(), uid.length(), fh_key::seed);
282 }
283
284 void init_rootfs(std::string& fsid, const std::string& object_name,
285 bool is_bucket) {
286 /* fh_key */
287 fh.fh_hk.bucket = XXH64(fsid.c_str(), fsid.length(), fh_key::seed);
288 fh.fh_hk.object = XXH64(object_name.c_str(), object_name.length(),
289 fh_key::seed);
290 fhk = fh.fh_hk;
291 name = object_name;
292
293 state.dev = init_fsid(fsid);
294
295 if (is_bucket) {
296 flags |= RGWFileHandle::FLAG_BUCKET | RGWFileHandle::FLAG_MOUNT;
297 bucket = this;
298 depth = 1;
299 } else {
300 flags |= RGWFileHandle::FLAG_ROOT | RGWFileHandle::FLAG_MOUNT;
301 }
302 }
303
304 public:
305 RGWFileHandle(RGWLibFS* _fs, RGWFileHandle* _parent,
306 const fh_key& _fhk, std::string& _name, uint32_t _flags)
307 : fs(_fs), bucket(nullptr), parent(_parent), name(std::move(_name)),
308 fhk(_fhk), flags(_flags) {
309
310 if (parent->is_root()) {
311 fh.fh_type = RGW_FS_TYPE_DIRECTORY;
312 variant_type = directory();
313 flags |= FLAG_BUCKET;
314 } else {
315 bucket = parent->is_bucket() ? parent
316 : parent->bucket;
317 if (flags & FLAG_DIRECTORY) {
318 fh.fh_type = RGW_FS_TYPE_DIRECTORY;
319 variant_type = directory();
320 } else {
321 fh.fh_type = RGW_FS_TYPE_FILE;
322 variant_type = file();
323 }
324 }
325
326 depth = parent->depth + 1;
327
328 /* save constant fhk */
329 fh.fh_hk = fhk.fh_hk; /* XXX redundant in fh_hk */
330
331 /* inherits parent's fsid */
332 state.dev = parent->state.dev;
333
334 switch (fh.fh_type) {
335 case RGW_FS_TYPE_DIRECTORY:
336 state.unix_mode = RGW_RWXMODE|S_IFDIR;
337 break;
338 case RGW_FS_TYPE_FILE:
339 state.unix_mode = RGW_RWMODE|S_IFREG;
340 default:
341 break;
342 }
343
344 /* pointer to self */
345 fh.fh_private = this;
346 }
347
348 const fh_key& get_key() const {
349 return fhk;
350 }
351
352 directory* get_directory() {
353 return get<directory>(&variant_type);
354 }
355
356 size_t get_size() const { return state.size; }
357
358 const char* stype() {
359 return is_dir() ? "DIR" : "FILE";
360 }
361
362 uint16_t get_depth() const { return depth; }
363
364 struct rgw_file_handle* get_fh() { return &fh; }
365
366 RGWLibFS* get_fs() { return fs; }
367
368 RGWFileHandle* get_parent() { return parent; }
369
370 uint32_t get_owner_uid() const { return state.owner_uid; }
371 uint32_t get_owner_gid() const { return state.owner_gid; }
372
373 struct timespec get_ctime() const { return state.ctime; }
374 struct timespec get_mtime() const { return state.mtime; }
375
376 const ceph::buffer::list& get_etag() const { return etag; }
377 const ceph::buffer::list& get_acls() const { return acls; }
378
379 void create_stat(struct stat* st, uint32_t mask) {
380 if (mask & RGW_SETATTR_UID)
381 state.owner_uid = st->st_uid;
382
383 if (mask & RGW_SETATTR_GID)
384 state.owner_gid = st->st_gid;
385
386 if (mask & RGW_SETATTR_MODE) {
387 switch (fh.fh_type) {
388 case RGW_FS_TYPE_DIRECTORY:
389 state.unix_mode = st->st_mode|S_IFDIR;
390 break;
391 case RGW_FS_TYPE_FILE:
392 state.unix_mode = st->st_mode|S_IFREG;
393 default:
394 break;
395 }
396 }
397
398 if (mask & RGW_SETATTR_ATIME)
399 state.atime = st->st_atim;
400 if (mask & RGW_SETATTR_MTIME)
401 state.mtime = st->st_mtim;
402 if (mask & RGW_SETATTR_CTIME)
403 state.ctime = st->st_ctim;
404 }
405
406 int stat(struct stat* st) {
407 /* partial Unix attrs */
408 memset(st, 0, sizeof(struct stat));
409 st->st_dev = state.dev;
410 st->st_ino = fh.fh_hk.object; // XXX
411
412 st->st_uid = state.owner_uid;
413 st->st_gid = state.owner_gid;
414
415 st->st_mode = state.unix_mode;
416
417 #ifdef HAVE_STAT_ST_MTIMESPEC_TV_NSEC
418 st->st_atimespec = state.atime;
419 st->st_mtimespec = state.mtime;
420 st->st_ctimespec = state.ctime;
421 #else
422 st->st_atim = state.atime;
423 st->st_mtim = state.mtime;
424 st->st_ctim = state.ctime;
425 #endif
426
427 switch (fh.fh_type) {
428 case RGW_FS_TYPE_DIRECTORY:
429 st->st_nlink = state.nlink;
430 break;
431 case RGW_FS_TYPE_FILE:
432 st->st_nlink = 1;
433 st->st_blksize = 4096;
434 st->st_size = state.size;
435 st->st_blocks = (state.size) / 512;
436 default:
437 break;
438 }
439
440 return 0;
441 }
442
443 const std::string& bucket_name() const {
444 if (is_root())
445 return root_name;
446 if (is_bucket())
447 return name;
448 return bucket->object_name();
449 }
450
451 const std::string& object_name() const { return name; }
452
453 std::string full_object_name(bool omit_bucket = false) const {
454 std::string path;
455 std::vector<const std::string*> segments;
456 int reserve = 0;
457 const RGWFileHandle* tfh = this;
458 while (tfh && !tfh->is_root() && !(tfh->is_bucket() && omit_bucket)) {
459 segments.push_back(&tfh->object_name());
460 reserve += (1 + tfh->object_name().length());
461 tfh = tfh->parent;
462 }
463 bool first = true;
464 path.reserve(reserve);
465 for (auto& s : boost::adaptors::reverse(segments)) {
466 if (! first)
467 path += "/";
468 else {
469 if (!omit_bucket && (path.front() != '/')) // pretty-print
470 path += "/";
471 first = false;
472 }
473 path += *s;
474 }
475 return path;
476 }
477
478 inline std::string relative_object_name() const {
479 return full_object_name(true /* omit_bucket */);
480 }
481
482 inline std::string format_child_name(const std::string& cbasename,
483 bool is_dir) const {
484 std::string child_name{relative_object_name()};
485 if ((child_name.size() > 0) &&
486 (child_name.back() != '/'))
487 child_name += "/";
488 child_name += cbasename;
489 if (is_dir)
490 child_name += "/";
491 return child_name;
492 }
493
494 inline std::string make_key_name(const char *name) const {
495 std::string key_name{full_object_name()};
496 if (key_name.length() > 0)
497 key_name += "/";
498 key_name += name;
499 return key_name;
500 }
501
502 fh_key make_fhk(const std::string& name) const {
503 if (depth <= 1)
504 return fh_key(fhk.fh_hk.object, name.c_str());
505 else {
506 std::string key_name = make_key_name(name.c_str());
507 return fh_key(fhk.fh_hk.bucket, key_name.c_str());
508 }
509 }
510
511 void add_marker(uint64_t off, const rgw_obj_key& marker,
512 uint8_t obj_type) {
513 using std::get;
514 directory* d = get<directory>(&variant_type);
515 if (d) {
516 unique_lock guard(mtx);
517 d->last_marker = marker;
518 }
519 }
520
521 const rgw_obj_key* find_marker(uint64_t off) const {
522 using std::get;
523 if (off > 0) {
524 const directory* d = get<directory>(&variant_type);
525 if (d ) {
526 return &d->last_marker;
527 }
528 }
529 return nullptr;
530 }
531
532 int offset_of(const std::string& name, int64_t *offset, uint32_t flags) {
533 if (unlikely(! is_dir())) {
534 return -EINVAL;
535 }
536 *offset = XXH64(name.c_str(), name.length(), fh_key::seed);
537 return 0;
538 }
539
540 bool is_open() const { return flags & FLAG_OPEN; }
541 bool is_root() const { return flags & FLAG_ROOT; }
542 bool is_mount() const { return flags & FLAG_MOUNT; }
543 bool is_bucket() const { return flags & FLAG_BUCKET; }
544 bool is_object() const { return !is_bucket(); }
545 bool is_file() const { return (fh.fh_type == RGW_FS_TYPE_FILE); }
546 bool is_dir() const { return (fh.fh_type == RGW_FS_TYPE_DIRECTORY); }
547 bool creating() const { return flags & FLAG_CREATING; }
548 bool deleted() const { return flags & FLAG_DELETED; }
549 bool stateless_open() const { return flags & FLAG_STATELESS_OPEN; }
550 bool has_children() const;
551
552 int open(uint32_t gsh_flags) {
553 lock_guard guard(mtx);
554 if (! is_open()) {
555 if (gsh_flags & RGW_OPEN_FLAG_V3) {
556 flags |= FLAG_STATELESS_OPEN;
557 }
558 flags |= FLAG_OPEN;
559 return 0;
560 }
561 return -EPERM;
562 }
563
564 typedef boost::variant<uint64_t*, const char*> readdir_offset;
565
566 int readdir(rgw_readdir_cb rcb, void *cb_arg, readdir_offset offset,
567 bool *eof, uint32_t flags);
568
569 int write(uint64_t off, size_t len, size_t *nbytes, void *buffer);
570
571 int commit(uint64_t offset, uint64_t length, uint32_t flags) {
572 /* NFS3 and NFSv4 COMMIT implementation
573 * the current atomic update strategy doesn't actually permit
574 * clients to read-stable until either CLOSE (NFSv4+) or the
575 * expiration of the active write timer (NFS3). In the
576 * interim, the client may send an arbitrary number of COMMIT
577 * operations which must return a success result */
578 return 0;
579 }
580
581 int write_finish(uint32_t flags = FLAG_NONE);
582 int close();
583
584 void open_for_create() {
585 lock_guard guard(mtx);
586 flags |= FLAG_CREATING;
587 }
588
589 void clear_creating() {
590 lock_guard guard(mtx);
591 flags &= ~FLAG_CREATING;
592 }
593
594 void inc_nlink(const uint64_t n) {
595 state.nlink += n;
596 }
597
598 void set_nlink(const uint64_t n) {
599 state.nlink = n;
600 }
601
602 void set_size(const size_t size) {
603 state.size = size;
604 }
605
606 void set_times(real_time t) {
607 state.ctime = real_clock::to_timespec(t);
608 state.mtime = state.ctime;
609 state.atime = state.ctime;
610 }
611
612 void set_ctime(const struct timespec &ts) {
613 state.ctime = ts;
614 }
615
616 void set_mtime(const struct timespec &ts) {
617 state.mtime = ts;
618 }
619
620 void set_atime(const struct timespec &ts) {
621 state.atime = ts;
622 }
623
624 void set_etag(const ceph::buffer::list& _etag ) {
625 etag = _etag;
626 }
627
628 void set_acls(const ceph::buffer::list& _acls ) {
629 acls = _acls;
630 }
631
632 void encode(buffer::list& bl) const {
633 ENCODE_START(2, 1, bl);
634 ::encode(uint32_t(fh.fh_type), bl);
635 ::encode(state.dev, bl);
636 ::encode(state.size, bl);
637 ::encode(state.nlink, bl);
638 ::encode(state.owner_uid, bl);
639 ::encode(state.owner_gid, bl);
640 ::encode(state.unix_mode, bl);
641 for (const auto& t : { state.ctime, state.mtime, state.atime }) {
642 ::encode(real_clock::from_timespec(t), bl);
643 }
644 ::encode((uint32_t)2, bl);
645 ENCODE_FINISH(bl);
646 }
647
648 void decode(bufferlist::iterator& bl) {
649 DECODE_START(2, bl);
650 uint32_t fh_type;
651 ::decode(fh_type, bl);
652 assert(fh.fh_type == fh_type);
653 ::decode(state.dev, bl);
654 ::decode(state.size, bl);
655 ::decode(state.nlink, bl);
656 ::decode(state.owner_uid, bl);
657 ::decode(state.owner_gid, bl);
658 ::decode(state.unix_mode, bl);
659 ceph::real_time enc_time;
660 for (auto t : { &(state.ctime), &(state.mtime), &(state.atime) }) {
661 ::decode(enc_time, bl);
662 *t = real_clock::to_timespec(enc_time);
663 }
664 if (struct_v >= 2) {
665 ::decode(state.version, bl);
666 }
667 DECODE_FINISH(bl);
668 }
669
670 void encode_attrs(ceph::buffer::list& ux_key1,
671 ceph::buffer::list& ux_attrs1);
672
673 DecodeAttrsResult decode_attrs(const ceph::buffer::list* ux_key1,
674 const ceph::buffer::list* ux_attrs1);
675
676 void invalidate();
677
678 bool reclaim() override;
679
680 typedef cohort::lru::LRU<std::mutex> FhLRU;
681
682 struct FhLT
683 {
684 // for internal ordering
685 bool operator()(const RGWFileHandle& lhs, const RGWFileHandle& rhs) const
686 { return (lhs.get_key() < rhs.get_key()); }
687
688 // for external search by fh_key
689 bool operator()(const fh_key& k, const RGWFileHandle& fh) const
690 { return k < fh.get_key(); }
691
692 bool operator()(const RGWFileHandle& fh, const fh_key& k) const
693 { return fh.get_key() < k; }
694 };
695
696 struct FhEQ
697 {
698 bool operator()(const RGWFileHandle& lhs, const RGWFileHandle& rhs) const
699 { return (lhs.get_key() == rhs.get_key()); }
700
701 bool operator()(const fh_key& k, const RGWFileHandle& fh) const
702 { return k == fh.get_key(); }
703
704 bool operator()(const RGWFileHandle& fh, const fh_key& k) const
705 { return fh.get_key() == k; }
706 };
707
708 typedef bi::link_mode<bi::safe_link> link_mode; /* XXX normal */
709 #if defined(FHCACHE_AVL)
710 typedef bi::avl_set_member_hook<link_mode> tree_hook_type;
711 #else
712 /* RBT */
713 typedef bi::set_member_hook<link_mode> tree_hook_type;
714 #endif
715 tree_hook_type fh_hook;
716
717 typedef bi::member_hook<
718 RGWFileHandle, tree_hook_type, &RGWFileHandle::fh_hook> FhHook;
719
720 #if defined(FHCACHE_AVL)
721 typedef bi::avltree<RGWFileHandle, bi::compare<FhLT>, FhHook> FHTree;
722 #else
723 typedef bi::rbtree<RGWFileHandle, bi::compare<FhLT>, FhHook> FhTree;
724 #endif
725 typedef cohort::lru::TreeX<RGWFileHandle, FhTree, FhLT, FhEQ, fh_key,
726 std::mutex> FHCache;
727
728 ~RGWFileHandle() override;
729
730 friend std::ostream& operator<<(std::ostream &os,
731 RGWFileHandle const &rgw_fh);
732
733 class Factory : public cohort::lru::ObjectFactory
734 {
735 public:
736 RGWLibFS* fs;
737 RGWFileHandle* parent;
738 const fh_key& fhk;
739 std::string& name;
740 uint32_t flags;
741
742 Factory() = delete;
743
744 Factory(RGWLibFS* _fs, RGWFileHandle* _parent,
745 const fh_key& _fhk, std::string& _name, uint32_t _flags)
746 : fs(_fs), parent(_parent), fhk(_fhk), name(_name),
747 flags(_flags) {}
748
749 void recycle (cohort::lru::Object* o) override {
750 /* re-use an existing object */
751 o->~Object(); // call lru::Object virtual dtor
752 // placement new!
753 new (o) RGWFileHandle(fs, parent, fhk, name, flags);
754 }
755
756 cohort::lru::Object* alloc() override {
757 return new RGWFileHandle(fs, parent, fhk, name, flags);
758 }
759 }; /* Factory */
760
761 }; /* RGWFileHandle */
762
763 WRITE_CLASS_ENCODER(RGWFileHandle);
764
765 static inline RGWFileHandle* get_rgwfh(struct rgw_file_handle* fh) {
766 return static_cast<RGWFileHandle*>(fh->fh_private);
767 }
768
769 static inline enum rgw_fh_type fh_type_of(uint32_t flags) {
770 enum rgw_fh_type fh_type;
771 switch(flags & RGW_LOOKUP_TYPE_FLAGS)
772 {
773 case RGW_LOOKUP_FLAG_DIR:
774 fh_type = RGW_FS_TYPE_DIRECTORY;
775 break;
776 case RGW_LOOKUP_FLAG_FILE:
777 fh_type = RGW_FS_TYPE_FILE;
778 break;
779 default:
780 fh_type = RGW_FS_TYPE_NIL;
781 };
782 return fh_type;
783 }
784
785 typedef std::tuple<RGWFileHandle*, uint32_t> LookupFHResult;
786 typedef std::tuple<RGWFileHandle*, int> MkObjResult;
787
788 class RGWLibFS
789 {
790 CephContext* cct;
791 struct rgw_fs fs{};
792 RGWFileHandle root_fh;
793 rgw_fh_callback_t invalidate_cb;
794 void *invalidate_arg;
795 bool shutdown;
796
797 mutable std::atomic<uint64_t> refcnt;
798
799 RGWFileHandle::FHCache fh_cache;
800 RGWFileHandle::FhLRU fh_lru;
801
802 std::string uid; // should match user.user_id, iiuc
803
804 RGWUserInfo user;
805 RGWAccessKey key; // XXXX acc_key
806
807 static std::atomic<uint32_t> fs_inst_counter;
808
809 static uint32_t write_completion_interval_s;
810
811 using lock_guard = std::lock_guard<std::mutex>;
812 using unique_lock = std::unique_lock<std::mutex>;
813
814 struct event
815 {
816 enum class type : uint8_t { READDIR } ;
817 type t;
818 const fh_key fhk;
819 struct timespec ts;
820 event(type t, const fh_key& k, const struct timespec& ts)
821 : t(t), fhk(k), ts(ts) {}
822 };
823
824 friend std::ostream& operator<<(std::ostream &os,
825 RGWLibFS::event const &ev);
826
827 using event_vector = /* boost::small_vector<event, 16> */
828 std::vector<event>;
829
830 struct WriteCompletion
831 {
832 RGWFileHandle& rgw_fh;
833
834 WriteCompletion(RGWFileHandle& _fh) : rgw_fh(_fh) {
835 rgw_fh.get_fs()->ref(&rgw_fh);
836 }
837
838 void operator()() {
839 rgw_fh.close(); /* will finish in-progress write */
840 rgw_fh.get_fs()->unref(&rgw_fh);
841 }
842 };
843
844 static ceph::timer<ceph::mono_clock> write_timer;
845
846 struct State {
847 std::mutex mtx;
848 std::atomic<uint32_t> flags;
849 std::deque<event> events;
850
851 State() : flags(0) {}
852
853 void push_event(const event& ev) {
854 events.push_back(ev);
855 }
856 } state;
857
858 uint32_t new_inst() {
859 return ++fs_inst_counter;
860 }
861
862 friend class RGWFileHandle;
863 friend class RGWLibProcess;
864
865 public:
866
867 static constexpr uint32_t FLAG_NONE = 0x0000;
868 static constexpr uint32_t FLAG_CLOSED = 0x0001;
869
870 struct BucketStats {
871 size_t size;
872 size_t size_rounded;
873 real_time creation_time;
874 uint64_t num_entries;
875 };
876
877 RGWLibFS(CephContext* _cct, const char *_uid, const char *_user_id,
878 const char* _key, const char *root)
879 : cct(_cct), root_fh(this), invalidate_cb(nullptr),
880 invalidate_arg(nullptr), shutdown(false), refcnt(1),
881 fh_cache(cct->_conf->rgw_nfs_fhcache_partitions,
882 cct->_conf->rgw_nfs_fhcache_size),
883 fh_lru(cct->_conf->rgw_nfs_lru_lanes,
884 cct->_conf->rgw_nfs_lru_lane_hiwat),
885 uid(_uid), key(_user_id, _key) {
886
887 if (!root || !strcmp(root, "/")) {
888 root_fh.init_rootfs(uid, RGWFileHandle::root_name, false);
889 } else {
890 root_fh.init_rootfs(uid, root, true);
891 }
892
893 /* pointer to self */
894 fs.fs_private = this;
895
896 /* expose public root fh */
897 fs.root_fh = root_fh.get_fh();
898
899 new_inst();
900 }
901
902 friend void intrusive_ptr_add_ref(const RGWLibFS* fs) {
903 fs->refcnt.fetch_add(1, std::memory_order_relaxed);
904 }
905
906 friend void intrusive_ptr_release(const RGWLibFS* fs) {
907 if (fs->refcnt.fetch_sub(1, std::memory_order_release) == 0) {
908 std::atomic_thread_fence(std::memory_order_acquire);
909 delete fs;
910 }
911 }
912
913 RGWLibFS* ref() {
914 intrusive_ptr_add_ref(this);
915 return this;
916 }
917
918 inline void rele() {
919 intrusive_ptr_release(this);
920 }
921
922 void stop() { shutdown = true; }
923
924 void release_evict(RGWFileHandle* fh) {
925 /* remove from cache, releases sentinel ref */
926 fh_cache.remove(fh->fh.fh_hk.object, fh,
927 RGWFileHandle::FHCache::FLAG_LOCK);
928 /* release call-path ref */
929 (void) fh_lru.unref(fh, cohort::lru::FLAG_NONE);
930 }
931
932 int authorize(RGWRados* store) {
933 int ret = rgw_get_user_info_by_access_key(store, key.id, user);
934 if (ret == 0) {
935 RGWAccessKey* k = user.get_key(key.id);
936 if (!k || (k->key != key.key))
937 return -EINVAL;
938 if (user.suspended)
939 return -ERR_USER_SUSPENDED;
940 } else {
941 /* try external authenticators (ldap for now) */
942 rgw::LDAPHelper* ldh = rgwlib.get_ldh(); /* !nullptr */
943 RGWToken token;
944 /* boost filters and/or string_ref may throw on invalid input */
945 try {
946 token = rgw::from_base64(key.id);
947 } catch(...) {
948 token = std::string("");
949 }
950 if (token.valid() && (ldh->auth(token.id, token.key) == 0)) {
951 /* try to store user if it doesn't already exist */
952 if (rgw_get_user_info_by_uid(store, token.id, user) < 0) {
953 int ret = rgw_store_user_info(store, user, NULL, NULL, real_time(),
954 true);
955 if (ret < 0) {
956 lsubdout(get_context(), rgw, 10)
957 << "NOTICE: failed to store new user's info: ret=" << ret
958 << dendl;
959 }
960 }
961 } /* auth success */
962 }
963 return ret;
964 } /* authorize */
965
966 int register_invalidate(rgw_fh_callback_t cb, void *arg, uint32_t flags) {
967 invalidate_cb = cb;
968 invalidate_arg = arg;
969 return 0;
970 }
971
972 /* find RGWFileHandle by id */
973 LookupFHResult lookup_fh(const fh_key& fhk,
974 const uint32_t flags = RGWFileHandle::FLAG_NONE) {
975 using std::get;
976
977 // cast int32_t(RGWFileHandle::FLAG_NONE) due to strictness of Clang
978 // the cast transfers a lvalue into a rvalue in the ctor
979 // check the commit message for the full details
980 LookupFHResult fhr { nullptr, uint32_t(RGWFileHandle::FLAG_NONE) };
981
982 RGWFileHandle::FHCache::Latch lat;
983 bool fh_locked = flags & RGWFileHandle::FLAG_LOCKED;
984
985 retry:
986 RGWFileHandle* fh =
987 fh_cache.find_latch(fhk.fh_hk.object /* partition selector*/,
988 fhk /* key */, lat /* serializer */,
989 RGWFileHandle::FHCache::FLAG_LOCK);
990 /* LATCHED */
991 if (fh) {
992 if (likely(! fh_locked))
993 fh->mtx.lock(); // XXX !RAII because may-return-LOCKED
994 /* need initial ref from LRU (fast path) */
995 if (! fh_lru.ref(fh, cohort::lru::FLAG_INITIAL)) {
996 lat.lock->unlock();
997 if (likely(! fh_locked))
998 fh->mtx.unlock();
999 goto retry; /* !LATCHED */
1000 }
1001 /* LATCHED, LOCKED */
1002 if (! (flags & RGWFileHandle::FLAG_LOCK))
1003 fh->mtx.unlock(); /* ! LOCKED */
1004 }
1005 lat.lock->unlock(); /* !LATCHED */
1006 get<0>(fhr) = fh;
1007 if (fh) {
1008 lsubdout(get_context(), rgw, 17)
1009 << __func__ << " 1 " << *fh
1010 << dendl;
1011 }
1012 return fhr;
1013 } /* lookup_fh(const fh_key&) */
1014
1015 /* find or create an RGWFileHandle */
1016 LookupFHResult lookup_fh(RGWFileHandle* parent, const char *name,
1017 const uint32_t flags = RGWFileHandle::FLAG_NONE) {
1018 using std::get;
1019
1020 // cast int32_t(RGWFileHandle::FLAG_NONE) due to strictness of Clang
1021 // the cast transfers a lvalue into a rvalue in the ctor
1022 // check the commit message for the full details
1023 LookupFHResult fhr { nullptr, uint32_t(RGWFileHandle::FLAG_NONE) };
1024
1025 /* mount is stale? */
1026 if (state.flags & FLAG_CLOSED)
1027 return fhr;
1028
1029 RGWFileHandle::FHCache::Latch lat;
1030 bool fh_locked = flags & RGWFileHandle::FLAG_LOCKED;
1031
1032 std::string obj_name{name};
1033 std::string key_name{parent->make_key_name(name)};
1034
1035 lsubdout(get_context(), rgw, 10)
1036 << __func__ << " lookup called on "
1037 << parent->object_name() << " for " << key_name
1038 << " (" << obj_name << ")"
1039 << dendl;
1040
1041 fh_key fhk = parent->make_fhk(obj_name);
1042
1043 retry:
1044 RGWFileHandle* fh =
1045 fh_cache.find_latch(fhk.fh_hk.object /* partition selector*/,
1046 fhk /* key */, lat /* serializer */,
1047 RGWFileHandle::FHCache::FLAG_LOCK);
1048 /* LATCHED */
1049 if (fh) {
1050 if (likely(! fh_locked))
1051 fh->mtx.lock(); // XXX !RAII because may-return-LOCKED
1052 if (fh->flags & RGWFileHandle::FLAG_DELETED) {
1053 /* for now, delay briefly and retry */
1054 lat.lock->unlock();
1055 if (likely(! fh_locked))
1056 fh->mtx.unlock();
1057 std::this_thread::sleep_for(std::chrono::milliseconds(20));
1058 goto retry; /* !LATCHED */
1059 }
1060 /* need initial ref from LRU (fast path) */
1061 if (! fh_lru.ref(fh, cohort::lru::FLAG_INITIAL)) {
1062 lat.lock->unlock();
1063 if (likely(! fh_locked))
1064 fh->mtx.unlock();
1065 goto retry; /* !LATCHED */
1066 }
1067 /* LATCHED, LOCKED */
1068 if (! (flags & RGWFileHandle::FLAG_LOCK))
1069 if (likely(! fh_locked))
1070 fh->mtx.unlock(); /* ! LOCKED */
1071 } else {
1072 /* make or re-use handle */
1073 RGWFileHandle::Factory prototype(this, parent, fhk,
1074 obj_name, CREATE_FLAGS(flags));
1075 uint32_t iflags{cohort::lru::FLAG_INITIAL};
1076 fh = static_cast<RGWFileHandle*>(
1077 fh_lru.insert(&prototype,
1078 cohort::lru::Edge::MRU,
1079 iflags));
1080 if (fh) {
1081 /* lock fh (LATCHED) */
1082 if (flags & RGWFileHandle::FLAG_LOCK)
1083 fh->mtx.lock();
1084 if (likely(! (iflags & cohort::lru::FLAG_RECYCLE))) {
1085 /* inserts at cached insert iterator, releasing latch */
1086 fh_cache.insert_latched(
1087 fh, lat, RGWFileHandle::FHCache::FLAG_UNLOCK);
1088 } else {
1089 /* recycle step invalidates Latch */
1090 fh_cache.insert(
1091 fhk.fh_hk.object, fh, RGWFileHandle::FHCache::FLAG_NONE);
1092 lat.lock->unlock(); /* !LATCHED */
1093 }
1094 get<1>(fhr) |= RGWFileHandle::FLAG_CREATE;
1095 /* ref parent (non-initial ref cannot fail on valid object) */
1096 if (! parent->is_mount()) {
1097 (void) fh_lru.ref(parent, cohort::lru::FLAG_NONE);
1098 }
1099 goto out; /* !LATCHED */
1100 } else {
1101 lat.lock->unlock();
1102 goto retry; /* !LATCHED */
1103 }
1104 }
1105 lat.lock->unlock(); /* !LATCHED */
1106 out:
1107 get<0>(fhr) = fh;
1108 if (fh) {
1109 lsubdout(get_context(), rgw, 17)
1110 << __func__ << " 2 " << *fh
1111 << dendl;
1112 }
1113 return fhr;
1114 } /* lookup_fh(RGWFileHandle*, const char *, const uint32_t) */
1115
1116 inline void unref(RGWFileHandle* fh) {
1117 if (likely(! fh->is_mount())) {
1118 (void) fh_lru.unref(fh, cohort::lru::FLAG_NONE);
1119 }
1120 }
1121
1122 inline RGWFileHandle* ref(RGWFileHandle* fh) {
1123 if (likely(! fh->is_mount())) {
1124 fh_lru.ref(fh, cohort::lru::FLAG_NONE);
1125 }
1126 return fh;
1127 }
1128
1129 int getattr(RGWFileHandle* rgw_fh, struct stat* st);
1130
1131 int setattr(RGWFileHandle* rgw_fh, struct stat* st, uint32_t mask,
1132 uint32_t flags);
1133
1134 void update_fh(RGWFileHandle *rgw_fh);
1135
1136 LookupFHResult stat_bucket(RGWFileHandle* parent, const char *path,
1137 RGWLibFS::BucketStats& bs,
1138 uint32_t flags);
1139
1140 LookupFHResult stat_leaf(RGWFileHandle* parent, const char *path,
1141 enum rgw_fh_type type = RGW_FS_TYPE_NIL,
1142 uint32_t flags = RGWFileHandle::FLAG_NONE);
1143
1144 int read(RGWFileHandle* rgw_fh, uint64_t offset, size_t length,
1145 size_t* bytes_read, void* buffer, uint32_t flags);
1146
1147 int rename(RGWFileHandle* old_fh, RGWFileHandle* new_fh,
1148 const char *old_name, const char *new_name);
1149
1150 MkObjResult create(RGWFileHandle* parent, const char *name, struct stat *st,
1151 uint32_t mask, uint32_t flags);
1152
1153 MkObjResult mkdir(RGWFileHandle* parent, const char *name, struct stat *st,
1154 uint32_t mask, uint32_t flags);
1155
1156 int unlink(RGWFileHandle* rgw_fh, const char *name,
1157 uint32_t flags = FLAG_NONE);
1158
1159 /* find existing RGWFileHandle */
1160 RGWFileHandle* lookup_handle(struct rgw_fh_hk fh_hk) {
1161
1162 if (state.flags & FLAG_CLOSED)
1163 return nullptr;
1164
1165 RGWFileHandle::FHCache::Latch lat;
1166 fh_key fhk(fh_hk);
1167
1168 retry:
1169 RGWFileHandle* fh =
1170 fh_cache.find_latch(fhk.fh_hk.object /* partition selector*/,
1171 fhk /* key */, lat /* serializer */,
1172 RGWFileHandle::FHCache::FLAG_LOCK);
1173 /* LATCHED */
1174 if (! fh) {
1175 lsubdout(get_context(), rgw, 0)
1176 << __func__ << " handle lookup failed <"
1177 << fhk.fh_hk.bucket << "," << fhk.fh_hk.object << ">"
1178 << "(need persistent handles)"
1179 << dendl;
1180 goto out;
1181 }
1182 fh->mtx.lock();
1183 if (fh->flags & RGWFileHandle::FLAG_DELETED) {
1184 /* for now, delay briefly and retry */
1185 lat.lock->unlock();
1186 fh->mtx.unlock(); /* !LOCKED */
1187 std::this_thread::sleep_for(std::chrono::milliseconds(20));
1188 goto retry; /* !LATCHED */
1189 }
1190 if (! fh_lru.ref(fh, cohort::lru::FLAG_INITIAL)) {
1191 lat.lock->unlock();
1192 fh->mtx.unlock();
1193 goto retry; /* !LATCHED */
1194 }
1195 /* LATCHED */
1196 fh->mtx.unlock(); /* !LOCKED */
1197 out:
1198 lat.lock->unlock(); /* !LATCHED */
1199
1200 /* special case: lookup root_fh */
1201 if (! fh) {
1202 if (unlikely(fh_hk == root_fh.fh.fh_hk)) {
1203 fh = &root_fh;
1204 }
1205 }
1206
1207 return fh;
1208 }
1209
1210 CephContext* get_context() {
1211 return cct;
1212 }
1213
1214 struct rgw_fs* get_fs() { return &fs; }
1215
1216 uint64_t get_fsid() { return root_fh.state.dev; }
1217
1218 RGWUserInfo* get_user() { return &user; }
1219
1220 void update_user() {
1221 RGWUserInfo _user = user;
1222 int ret = rgw_get_user_info_by_access_key(rgwlib.get_store(), key.id, user);
1223 if (ret != 0)
1224 user = _user;
1225 }
1226
1227 void close();
1228 void gc();
1229 }; /* RGWLibFS */
1230
1231 static inline std::string make_uri(const std::string& bucket_name,
1232 const std::string& object_name) {
1233 std::string uri("/");
1234 uri.reserve(bucket_name.length() + object_name.length() + 2);
1235 uri += bucket_name;
1236 uri += "/";
1237 uri += object_name;
1238 return uri;
1239 }
1240
1241 /*
1242 read directory content (buckets)
1243 */
1244
1245 class RGWListBucketsRequest : public RGWLibRequest,
1246 public RGWListBuckets /* RGWOp */
1247 {
1248 public:
1249 RGWFileHandle* rgw_fh;
1250 RGWFileHandle::readdir_offset offset;
1251 void* cb_arg;
1252 rgw_readdir_cb rcb;
1253 uint64_t* ioff;
1254 size_t ix;
1255 uint32_t d_count;
1256
1257 RGWListBucketsRequest(CephContext* _cct, RGWUserInfo *_user,
1258 RGWFileHandle* _rgw_fh, rgw_readdir_cb _rcb,
1259 void* _cb_arg, RGWFileHandle::readdir_offset& _offset)
1260 : RGWLibRequest(_cct, _user), rgw_fh(_rgw_fh), offset(_offset),
1261 cb_arg(_cb_arg), rcb(_rcb), ioff(nullptr), ix(0), d_count(0) {
1262
1263 using boost::get;
1264
1265 if (unlikely(!! get<uint64_t*>(&offset))) {
1266 ioff = get<uint64_t*>(offset);
1267 const auto& mk = rgw_fh->find_marker(*ioff);
1268 if (mk) {
1269 marker = mk->name;
1270 }
1271 } else {
1272 const char* mk = get<const char*>(offset);
1273 if (mk) {
1274 marker = mk;
1275 }
1276 }
1277 op = this;
1278 }
1279
1280 bool only_bucket() override { return false; }
1281
1282 int op_init() override {
1283 // assign store, s, and dialect_handler
1284 RGWObjectCtx* rados_ctx
1285 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
1286 // framework promises to call op_init after parent init
1287 assert(rados_ctx);
1288 RGWOp::init(rados_ctx->store, get_state(), this);
1289 op = this; // assign self as op: REQUIRED
1290 return 0;
1291 }
1292
1293 int header_init() override {
1294 struct req_state* s = get_state();
1295 s->info.method = "GET";
1296 s->op = OP_GET;
1297
1298 /* XXX derp derp derp */
1299 s->relative_uri = "/";
1300 s->info.request_uri = "/"; // XXX
1301 s->info.effective_uri = "/";
1302 s->info.request_params = "";
1303 s->info.domain = ""; /* XXX ? */
1304
1305 // woo
1306 s->user = user;
1307 s->bucket_tenant = user->user_id.tenant;
1308
1309 return 0;
1310 }
1311
1312 int get_params() override {
1313 limit = -1; /* no limit */
1314 return 0;
1315 }
1316
1317 void send_response_begin(bool has_buckets) override {
1318 sent_data = true;
1319 }
1320
1321 void send_response_data(RGWUserBuckets& buckets) override {
1322 if (!sent_data)
1323 return;
1324 map<string, RGWBucketEnt>& m = buckets.get_buckets();
1325 for (const auto& iter : m) {
1326 boost::string_ref marker{iter.first};
1327 const RGWBucketEnt& ent = iter.second;
1328 if (! this->operator()(ent.bucket.name, marker)) {
1329 /* caller cannot accept more */
1330 lsubdout(cct, rgw, 5) << "ListBuckets rcb failed"
1331 << " dirent=" << ent.bucket.name
1332 << " call count=" << ix
1333 << dendl;
1334 return;
1335 }
1336 ++ix;
1337 }
1338 } /* send_response_data */
1339
1340 void send_response_end() override {
1341 // do nothing
1342 }
1343
1344 int operator()(const boost::string_ref& name,
1345 const boost::string_ref& marker) {
1346 uint64_t off = XXH64(name.data(), name.length(), fh_key::seed);
1347 if (!! ioff) {
1348 *ioff = off;
1349 }
1350 /* update traversal cache */
1351 rgw_fh->add_marker(off, rgw_obj_key{marker.data(), ""},
1352 RGW_FS_TYPE_DIRECTORY);
1353 ++d_count;
1354 return rcb(name.data(), cb_arg, off, RGW_LOOKUP_FLAG_DIR);
1355 }
1356
1357 bool eof() {
1358 if (unlikely(cct->_conf->subsys.should_gather(ceph_subsys_rgw, 15))) {
1359 bool is_offset =
1360 unlikely(! get<const char*>(&offset)) ||
1361 !! get<const char*>(offset);
1362 lsubdout(cct, rgw, 15) << "READDIR offset: " <<
1363 ((is_offset) ? offset : "(nil)")
1364 << " is_truncated: " << is_truncated
1365 << dendl;
1366 }
1367 return !is_truncated;
1368 }
1369
1370 }; /* RGWListBucketsRequest */
1371
1372 /*
1373 read directory content (bucket objects)
1374 */
1375
1376 class RGWReaddirRequest : public RGWLibRequest,
1377 public RGWListBucket /* RGWOp */
1378 {
1379 public:
1380 RGWFileHandle* rgw_fh;
1381 RGWFileHandle::readdir_offset offset;
1382 void* cb_arg;
1383 rgw_readdir_cb rcb;
1384 uint64_t* ioff;
1385 size_t ix;
1386 uint32_t d_count;
1387
1388 RGWReaddirRequest(CephContext* _cct, RGWUserInfo *_user,
1389 RGWFileHandle* _rgw_fh, rgw_readdir_cb _rcb,
1390 void* _cb_arg, RGWFileHandle::readdir_offset& _offset)
1391 : RGWLibRequest(_cct, _user), rgw_fh(_rgw_fh), offset(_offset),
1392 cb_arg(_cb_arg), rcb(_rcb), ioff(nullptr), ix(0), d_count(0) {
1393
1394 using boost::get;
1395
1396 if (unlikely(!! get<uint64_t*>(&offset))) {
1397 ioff = get<uint64_t*>(offset);
1398 const auto& mk = rgw_fh->find_marker(*ioff);
1399 if (mk) {
1400 marker = *mk;
1401 }
1402 } else {
1403 const char* mk = get<const char*>(offset);
1404 if (mk) {
1405 std::string tmark{rgw_fh->relative_object_name()};
1406 tmark += "/";
1407 tmark += mk;
1408 marker = rgw_obj_key{std::move(tmark), "", ""};
1409 }
1410 }
1411
1412 default_max = 1000; // XXX was being omitted
1413 op = this;
1414 }
1415
1416 bool only_bucket() override { return true; }
1417
1418 int op_init() override {
1419 // assign store, s, and dialect_handler
1420 RGWObjectCtx* rados_ctx
1421 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
1422 // framework promises to call op_init after parent init
1423 assert(rados_ctx);
1424 RGWOp::init(rados_ctx->store, get_state(), this);
1425 op = this; // assign self as op: REQUIRED
1426 return 0;
1427 }
1428
1429 int header_init() override {
1430 struct req_state* s = get_state();
1431 s->info.method = "GET";
1432 s->op = OP_GET;
1433
1434 /* XXX derp derp derp */
1435 std::string uri = "/" + rgw_fh->bucket_name() + "/";
1436 s->relative_uri = uri;
1437 s->info.request_uri = uri; // XXX
1438 s->info.effective_uri = uri;
1439 s->info.request_params = "";
1440 s->info.domain = ""; /* XXX ? */
1441
1442 // woo
1443 s->user = user;
1444 s->bucket_tenant = user->user_id.tenant;
1445
1446 prefix = rgw_fh->relative_object_name();
1447 if (prefix.length() > 0)
1448 prefix += "/";
1449 delimiter = '/';
1450
1451 return 0;
1452 }
1453
1454 int operator()(const boost::string_ref name, const rgw_obj_key& marker,
1455 uint8_t type) {
1456
1457 assert(name.length() > 0); // all cases handled in callers
1458
1459 /* hash offset of name in parent (short name) for NFS readdir cookie */
1460 uint64_t off = XXH64(name.data(), name.length(), fh_key::seed);
1461 if (unlikely(!! ioff)) {
1462 *ioff = off;
1463 }
1464 /* update traversal cache */
1465 rgw_fh->add_marker(off, marker, type);
1466 ++d_count;
1467 return rcb(name.data(), cb_arg, off,
1468 (type == RGW_FS_TYPE_DIRECTORY) ?
1469 RGW_LOOKUP_FLAG_DIR :
1470 RGW_LOOKUP_FLAG_FILE);
1471 }
1472
1473 int get_params() override {
1474 max = default_max;
1475 return 0;
1476 }
1477
1478 void send_response() override {
1479 struct req_state* s = get_state();
1480 for (const auto& iter : objs) {
1481
1482 boost::string_ref sref {iter.key.name};
1483
1484 lsubdout(cct, rgw, 15) << "readdir objects prefix: " << prefix
1485 << " obj: " << sref << dendl;
1486
1487 size_t last_del = sref.find_last_of('/');
1488 if (last_del != string::npos)
1489 sref.remove_prefix(last_del+1);
1490
1491 /* leaf directory? */
1492 if (sref.empty())
1493 continue;
1494
1495 lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
1496 << __func__ << " "
1497 << "list uri=" << s->relative_uri << " "
1498 << " prefix=" << prefix << " "
1499 << " obj path=" << iter.key.name
1500 << " (" << sref << ")" << ""
1501 << dendl;
1502
1503 if(! this->operator()(sref, next_marker, RGW_FS_TYPE_FILE)) {
1504 /* caller cannot accept more */
1505 lsubdout(cct, rgw, 5) << "readdir rcb failed"
1506 << " dirent=" << sref.data()
1507 << " call count=" << ix
1508 << dendl;
1509 return;
1510 }
1511 ++ix;
1512 }
1513 for (auto& iter : common_prefixes) {
1514
1515 lsubdout(cct, rgw, 15) << "readdir common prefixes prefix: " << prefix
1516 << " iter first: " << iter.first
1517 << " iter second: " << iter.second
1518 << dendl;
1519
1520 /* XXX aieee--I have seen this case! */
1521 if (iter.first == "/")
1522 continue;
1523
1524 /* it's safest to modify the element in place--a suffix-modifying
1525 * string_ref operation is problematic since ULP rgw_file callers
1526 * will ultimately need a c-string */
1527 if (iter.first.back() == '/')
1528 const_cast<std::string&>(iter.first).pop_back();
1529
1530 boost::string_ref sref{iter.first};
1531
1532 size_t last_del = sref.find_last_of('/');
1533 if (last_del != string::npos)
1534 sref.remove_prefix(last_del+1);
1535
1536 lsubdout(cct, rgw, 15) << "RGWReaddirRequest "
1537 << __func__ << " "
1538 << "list uri=" << s->relative_uri << " "
1539 << " prefix=" << prefix << " "
1540 << " cpref=" << sref
1541 << dendl;
1542
1543 if (sref.empty()) {
1544 /* null path segment--could be created in S3 but has no NFS
1545 * interpretation */
1546 return;
1547 }
1548
1549 this->operator()(sref, next_marker, RGW_FS_TYPE_DIRECTORY);
1550 ++ix;
1551 }
1552 }
1553
1554 virtual void send_versioned_response() {
1555 send_response();
1556 }
1557
1558 bool eof() {
1559 if (unlikely(cct->_conf->subsys.should_gather(ceph_subsys_rgw, 15))) {
1560 bool is_offset =
1561 unlikely(! get<const char*>(&offset)) ||
1562 !! get<const char*>(offset);
1563 lsubdout(cct, rgw, 15) << "READDIR offset: " <<
1564 ((is_offset) ? offset : "(nil)")
1565 << " next marker: " << next_marker
1566 << " is_truncated: " << is_truncated
1567 << dendl;
1568 }
1569 return !is_truncated;
1570 }
1571
1572 }; /* RGWReaddirRequest */
1573
1574 /*
1575 dir has-children predicate (bucket objects)
1576 */
1577
1578 class RGWRMdirCheck : public RGWLibRequest,
1579 public RGWListBucket /* RGWOp */
1580 {
1581 public:
1582 const RGWFileHandle* rgw_fh;
1583 bool valid;
1584 bool has_children;
1585
1586 RGWRMdirCheck (CephContext* _cct, RGWUserInfo *_user,
1587 const RGWFileHandle* _rgw_fh)
1588 : RGWLibRequest(_cct, _user), rgw_fh(_rgw_fh), valid(false),
1589 has_children(false) {
1590 default_max = 2;
1591 op = this;
1592 }
1593
1594 bool only_bucket() override { return true; }
1595
1596 int op_init() override {
1597 // assign store, s, and dialect_handler
1598 RGWObjectCtx* rados_ctx
1599 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
1600 // framework promises to call op_init after parent init
1601 assert(rados_ctx);
1602 RGWOp::init(rados_ctx->store, get_state(), this);
1603 op = this; // assign self as op: REQUIRED
1604 return 0;
1605 }
1606
1607 int header_init() override {
1608 struct req_state* s = get_state();
1609 s->info.method = "GET";
1610 s->op = OP_GET;
1611
1612 std::string uri = "/" + rgw_fh->bucket_name() + "/";
1613 s->relative_uri = uri;
1614 s->info.request_uri = uri;
1615 s->info.effective_uri = uri;
1616 s->info.request_params = "";
1617 s->info.domain = ""; /* XXX ? */
1618
1619 s->user = user;
1620 s->bucket_tenant = user->user_id.tenant;
1621
1622 prefix = rgw_fh->relative_object_name();
1623 if (prefix.length() > 0)
1624 prefix += "/";
1625 delimiter = '/';
1626
1627 return 0;
1628 }
1629
1630 int get_params() override {
1631 max = default_max;
1632 return 0;
1633 }
1634
1635 void send_response() override {
1636 valid = true;
1637 if ((objs.size() > 1) ||
1638 (! objs.empty() &&
1639 (objs.front().key.name != prefix))) {
1640 has_children = true;
1641 return;
1642 }
1643 for (auto& iter : common_prefixes) {
1644 /* readdir never produces a name for this case */
1645 if (iter.first == "/")
1646 continue;
1647 has_children = true;
1648 break;
1649 }
1650 }
1651
1652 virtual void send_versioned_response() {
1653 send_response();
1654 }
1655
1656 }; /* RGWRMdirCheck */
1657
1658 /*
1659 create bucket
1660 */
1661
1662 class RGWCreateBucketRequest : public RGWLibRequest,
1663 public RGWCreateBucket /* RGWOp */
1664 {
1665 public:
1666 const std::string& bucket_name;
1667
1668 RGWCreateBucketRequest(CephContext* _cct, RGWUserInfo *_user,
1669 std::string& _bname)
1670 : RGWLibRequest(_cct, _user), bucket_name(_bname) {
1671 op = this;
1672 }
1673
1674 bool only_bucket() override { return false; }
1675
1676 int read_permissions(RGWOp* op_obj) override {
1677 /* we ARE a 'create bucket' request (cf. rgw_rest.cc, ll. 1305-6) */
1678 return 0;
1679 }
1680
1681 int op_init() override {
1682 // assign store, s, and dialect_handler
1683 RGWObjectCtx* rados_ctx
1684 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
1685 // framework promises to call op_init after parent init
1686 assert(rados_ctx);
1687 RGWOp::init(rados_ctx->store, get_state(), this);
1688 op = this; // assign self as op: REQUIRED
1689 return 0;
1690 }
1691
1692 int header_init() override {
1693
1694 struct req_state* s = get_state();
1695 s->info.method = "PUT";
1696 s->op = OP_PUT;
1697
1698 string uri = "/" + bucket_name;
1699 /* XXX derp derp derp */
1700 s->relative_uri = uri;
1701 s->info.request_uri = uri; // XXX
1702 s->info.effective_uri = uri;
1703 s->info.request_params = "";
1704 s->info.domain = ""; /* XXX ? */
1705
1706 // woo
1707 s->user = user;
1708 s->bucket_tenant = user->user_id.tenant;
1709
1710 return 0;
1711 }
1712
1713 int get_params() override {
1714 struct req_state* s = get_state();
1715 RGWAccessControlPolicy_S3 s3policy(s->cct);
1716 /* we don't have (any) headers, so just create canned ACLs */
1717 int ret = s3policy.create_canned(s->owner, s->bucket_owner, s->canned_acl);
1718 policy = s3policy;
1719 return ret;
1720 }
1721
1722 void send_response() override {
1723 /* TODO: something (maybe) */
1724 }
1725 }; /* RGWCreateBucketRequest */
1726
1727 /*
1728 delete bucket
1729 */
1730
1731 class RGWDeleteBucketRequest : public RGWLibRequest,
1732 public RGWDeleteBucket /* RGWOp */
1733 {
1734 public:
1735 const std::string& bucket_name;
1736
1737 RGWDeleteBucketRequest(CephContext* _cct, RGWUserInfo *_user,
1738 std::string& _bname)
1739 : RGWLibRequest(_cct, _user), bucket_name(_bname) {
1740 op = this;
1741 }
1742
1743 bool only_bucket() override { return true; }
1744
1745 int op_init() override {
1746 // assign store, s, and dialect_handler
1747 RGWObjectCtx* rados_ctx
1748 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
1749 // framework promises to call op_init after parent init
1750 assert(rados_ctx);
1751 RGWOp::init(rados_ctx->store, get_state(), this);
1752 op = this; // assign self as op: REQUIRED
1753 return 0;
1754 }
1755
1756 int header_init() override {
1757
1758 struct req_state* s = get_state();
1759 s->info.method = "DELETE";
1760 s->op = OP_DELETE;
1761
1762 string uri = "/" + bucket_name;
1763 /* XXX derp derp derp */
1764 s->relative_uri = uri;
1765 s->info.request_uri = uri; // XXX
1766 s->info.effective_uri = uri;
1767 s->info.request_params = "";
1768 s->info.domain = ""; /* XXX ? */
1769
1770 // woo
1771 s->user = user;
1772 s->bucket_tenant = user->user_id.tenant;
1773
1774 return 0;
1775 }
1776
1777 void send_response() override {}
1778
1779 }; /* RGWDeleteBucketRequest */
1780
1781 /*
1782 put object
1783 */
1784 class RGWPutObjRequest : public RGWLibRequest,
1785 public RGWPutObj /* RGWOp */
1786 {
1787 public:
1788 const std::string& bucket_name;
1789 const std::string& obj_name;
1790 buffer::list& bl; /* XXX */
1791 size_t bytes_written;
1792
1793 RGWPutObjRequest(CephContext* _cct, RGWUserInfo *_user,
1794 const std::string& _bname, const std::string& _oname,
1795 buffer::list& _bl)
1796 : RGWLibRequest(_cct, _user), bucket_name(_bname), obj_name(_oname),
1797 bl(_bl), bytes_written(0) {
1798 op = this;
1799 }
1800
1801 bool only_bucket() override { return true; }
1802
1803 int op_init() override {
1804 // assign store, s, and dialect_handler
1805 RGWObjectCtx* rados_ctx
1806 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
1807 // framework promises to call op_init after parent init
1808 assert(rados_ctx);
1809 RGWOp::init(rados_ctx->store, get_state(), this);
1810 op = this; // assign self as op: REQUIRED
1811
1812 int rc = valid_s3_object_name(obj_name);
1813 if (rc != 0)
1814 return rc;
1815
1816 return 0;
1817 }
1818
1819 int header_init() override {
1820
1821 struct req_state* s = get_state();
1822 s->info.method = "PUT";
1823 s->op = OP_PUT;
1824
1825 /* XXX derp derp derp */
1826 std::string uri = make_uri(bucket_name, obj_name);
1827 s->relative_uri = uri;
1828 s->info.request_uri = uri; // XXX
1829 s->info.effective_uri = uri;
1830 s->info.request_params = "";
1831 s->info.domain = ""; /* XXX ? */
1832
1833 /* XXX required in RGWOp::execute() */
1834 s->content_length = bl.length();
1835
1836 // woo
1837 s->user = user;
1838 s->bucket_tenant = user->user_id.tenant;
1839
1840 return 0;
1841 }
1842
1843 int get_params() override {
1844 struct req_state* s = get_state();
1845 RGWAccessControlPolicy_S3 s3policy(s->cct);
1846 /* we don't have (any) headers, so just create canned ACLs */
1847 int ret = s3policy.create_canned(s->owner, s->bucket_owner, s->canned_acl);
1848 policy = s3policy;
1849 return ret;
1850 }
1851
1852 int get_data(buffer::list& _bl) override {
1853 /* XXX for now, use sharing semantics */
1854 _bl.claim(bl);
1855 uint32_t len = _bl.length();
1856 bytes_written += len;
1857 return len;
1858 }
1859
1860 void send_response() override {}
1861
1862 int verify_params() override {
1863 if (bl.length() > cct->_conf->rgw_max_put_size)
1864 return -ERR_TOO_LARGE;
1865 return 0;
1866 }
1867
1868 buffer::list* get_attr(const std::string& k) {
1869 auto iter = attrs.find(k);
1870 return (iter != attrs.end()) ? &(iter->second) : nullptr;
1871 }
1872
1873 }; /* RGWPutObjRequest */
1874
1875 /*
1876 get object
1877 */
1878
1879 class RGWReadRequest : public RGWLibRequest,
1880 public RGWGetObj /* RGWOp */
1881 {
1882 public:
1883 RGWFileHandle* rgw_fh;
1884 void *ulp_buffer;
1885 size_t nread;
1886 size_t read_resid; /* initialize to len, <= sizeof(ulp_buffer) */
1887 bool do_hexdump = false;
1888
1889 RGWReadRequest(CephContext* _cct, RGWUserInfo *_user,
1890 RGWFileHandle* _rgw_fh, uint64_t off, uint64_t len,
1891 void *_ulp_buffer)
1892 : RGWLibRequest(_cct, _user), rgw_fh(_rgw_fh), ulp_buffer(_ulp_buffer),
1893 nread(0), read_resid(len) {
1894 op = this;
1895
1896 /* fixup RGWGetObj (already know range parameters) */
1897 RGWGetObj::range_parsed = true;
1898 RGWGetObj::get_data = true; // XXX
1899 RGWGetObj::partial_content = true;
1900 RGWGetObj::ofs = off;
1901 RGWGetObj::end = off + len;
1902 }
1903
1904 bool only_bucket() override { return false; }
1905
1906 int op_init() override {
1907 // assign store, s, and dialect_handler
1908 RGWObjectCtx* rados_ctx
1909 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
1910 // framework promises to call op_init after parent init
1911 assert(rados_ctx);
1912 RGWOp::init(rados_ctx->store, get_state(), this);
1913 op = this; // assign self as op: REQUIRED
1914 return 0;
1915 }
1916
1917 int header_init() override {
1918
1919 struct req_state* s = get_state();
1920 s->info.method = "GET";
1921 s->op = OP_GET;
1922
1923 /* XXX derp derp derp */
1924 s->relative_uri = make_uri(rgw_fh->bucket_name(),
1925 rgw_fh->relative_object_name());
1926 s->info.request_uri = s->relative_uri; // XXX
1927 s->info.effective_uri = s->relative_uri;
1928 s->info.request_params = "";
1929 s->info.domain = ""; /* XXX ? */
1930
1931 // woo
1932 s->user = user;
1933 s->bucket_tenant = user->user_id.tenant;
1934
1935 return 0;
1936 }
1937
1938 int get_params() override {
1939 return 0;
1940 }
1941
1942 int send_response_data(ceph::buffer::list& bl, off_t bl_off,
1943 off_t bl_len) override {
1944 size_t bytes;
1945 for (auto& bp : bl.buffers()) {
1946 /* if for some reason bl_off indicates the start-of-data is not at
1947 * the current buffer::ptr, skip it and account */
1948 if (bl_off > bp.length()) {
1949 bl_off -= bp.length();
1950 continue;
1951 }
1952 /* read no more than read_resid */
1953 bytes = std::min(read_resid, size_t(bp.length()-bl_off));
1954 memcpy(static_cast<char*>(ulp_buffer)+nread, bp.c_str()+bl_off, bytes);
1955 read_resid -= bytes; /* reduce read_resid by bytes read */
1956 nread += bytes;
1957 bl_off = 0;
1958 /* stop if we have no residual ulp_buffer */
1959 if (! read_resid)
1960 break;
1961 }
1962 return 0;
1963 }
1964
1965 int send_response_data_error() override {
1966 /* S3 implementation just sends nothing--there is no side effect
1967 * to simulate here */
1968 return 0;
1969 }
1970
1971 }; /* RGWReadRequest */
1972
1973 /*
1974 delete object
1975 */
1976
1977 class RGWDeleteObjRequest : public RGWLibRequest,
1978 public RGWDeleteObj /* RGWOp */
1979 {
1980 public:
1981 const std::string& bucket_name;
1982 const std::string& obj_name;
1983
1984 RGWDeleteObjRequest(CephContext* _cct, RGWUserInfo *_user,
1985 const std::string& _bname, const std::string& _oname)
1986 : RGWLibRequest(_cct, _user), bucket_name(_bname), obj_name(_oname) {
1987 op = this;
1988 }
1989
1990 bool only_bucket() override { return true; }
1991
1992 int op_init() override {
1993 // assign store, s, and dialect_handler
1994 RGWObjectCtx* rados_ctx
1995 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
1996 // framework promises to call op_init after parent init
1997 assert(rados_ctx);
1998 RGWOp::init(rados_ctx->store, get_state(), this);
1999 op = this; // assign self as op: REQUIRED
2000 return 0;
2001 }
2002
2003 int header_init() override {
2004
2005 struct req_state* s = get_state();
2006 s->info.method = "DELETE";
2007 s->op = OP_DELETE;
2008
2009 /* XXX derp derp derp */
2010 std::string uri = make_uri(bucket_name, obj_name);
2011 s->relative_uri = uri;
2012 s->info.request_uri = uri; // XXX
2013 s->info.effective_uri = uri;
2014 s->info.request_params = "";
2015 s->info.domain = ""; /* XXX ? */
2016
2017 // woo
2018 s->user = user;
2019 s->bucket_tenant = user->user_id.tenant;
2020
2021 return 0;
2022 }
2023
2024 void send_response() override {}
2025
2026 }; /* RGWDeleteObjRequest */
2027
2028 class RGWStatObjRequest : public RGWLibRequest,
2029 public RGWGetObj /* RGWOp */
2030 {
2031 public:
2032 const std::string& bucket_name;
2033 const std::string& obj_name;
2034 uint64_t _size;
2035 uint32_t flags;
2036
2037 static constexpr uint32_t FLAG_NONE = 0x000;
2038
2039 RGWStatObjRequest(CephContext* _cct, RGWUserInfo *_user,
2040 const std::string& _bname, const std::string& _oname,
2041 uint32_t _flags)
2042 : RGWLibRequest(_cct, _user), bucket_name(_bname), obj_name(_oname),
2043 _size(0), flags(_flags) {
2044 op = this;
2045
2046 /* fixup RGWGetObj (already know range parameters) */
2047 RGWGetObj::range_parsed = true;
2048 RGWGetObj::get_data = false; // XXX
2049 RGWGetObj::partial_content = true;
2050 RGWGetObj::ofs = 0;
2051 RGWGetObj::end = UINT64_MAX;
2052 }
2053
2054 const string name() override { return "stat_obj"; }
2055 RGWOpType get_type() override { return RGW_OP_STAT_OBJ; }
2056
2057 real_time get_mtime() const {
2058 return lastmod;
2059 }
2060
2061 /* attributes */
2062 uint64_t get_size() { return _size; }
2063 real_time ctime() { return mod_time; } // XXX
2064 real_time mtime() { return mod_time; }
2065 std::map<string, bufferlist>& get_attrs() { return attrs; }
2066
2067 buffer::list* get_attr(const std::string& k) {
2068 auto iter = attrs.find(k);
2069 return (iter != attrs.end()) ? &(iter->second) : nullptr;
2070 }
2071
2072 bool only_bucket() override { return false; }
2073
2074 int op_init() override {
2075 // assign store, s, and dialect_handler
2076 RGWObjectCtx* rados_ctx
2077 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
2078 // framework promises to call op_init after parent init
2079 assert(rados_ctx);
2080 RGWOp::init(rados_ctx->store, get_state(), this);
2081 op = this; // assign self as op: REQUIRED
2082 return 0;
2083 }
2084
2085 int header_init() override {
2086
2087 struct req_state* s = get_state();
2088 s->info.method = "GET";
2089 s->op = OP_GET;
2090
2091 /* XXX derp derp derp */
2092 s->relative_uri = make_uri(bucket_name, obj_name);
2093 s->info.request_uri = s->relative_uri; // XXX
2094 s->info.effective_uri = s->relative_uri;
2095 s->info.request_params = "";
2096 s->info.domain = ""; /* XXX ? */
2097
2098 // woo
2099 s->user = user;
2100 s->bucket_tenant = user->user_id.tenant;
2101
2102 return 0;
2103 }
2104
2105 int get_params() override {
2106 return 0;
2107 }
2108
2109 int send_response_data(ceph::buffer::list& _bl, off_t s_off,
2110 off_t e_off) override {
2111 /* NOP */
2112 /* XXX save attrs? */
2113 return 0;
2114 }
2115
2116 int send_response_data_error() override {
2117 /* NOP */
2118 return 0;
2119 }
2120
2121 void execute() override {
2122 RGWGetObj::execute();
2123 _size = get_state()->obj_size;
2124 }
2125
2126 }; /* RGWStatObjRequest */
2127
2128 class RGWStatBucketRequest : public RGWLibRequest,
2129 public RGWStatBucket /* RGWOp */
2130 {
2131 public:
2132 std::string uri;
2133 std::map<std::string, buffer::list> attrs;
2134 RGWLibFS::BucketStats& bs;
2135
2136 RGWStatBucketRequest(CephContext* _cct, RGWUserInfo *_user,
2137 const std::string& _path,
2138 RGWLibFS::BucketStats& _stats)
2139 : RGWLibRequest(_cct, _user), bs(_stats) {
2140 uri = "/" + _path;
2141 op = this;
2142 }
2143
2144 buffer::list* get_attr(const std::string& k) {
2145 auto iter = attrs.find(k);
2146 return (iter != attrs.end()) ? &(iter->second) : nullptr;
2147 }
2148
2149 real_time get_ctime() const {
2150 return bucket.creation_time;
2151 }
2152
2153 bool only_bucket() override { return false; }
2154
2155 int op_init() override {
2156 // assign store, s, and dialect_handler
2157 RGWObjectCtx* rados_ctx
2158 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
2159 // framework promises to call op_init after parent init
2160 assert(rados_ctx);
2161 RGWOp::init(rados_ctx->store, get_state(), this);
2162 op = this; // assign self as op: REQUIRED
2163 return 0;
2164 }
2165
2166 int header_init() override {
2167
2168 struct req_state* s = get_state();
2169 s->info.method = "GET";
2170 s->op = OP_GET;
2171
2172 /* XXX derp derp derp */
2173 s->relative_uri = uri;
2174 s->info.request_uri = uri; // XXX
2175 s->info.effective_uri = uri;
2176 s->info.request_params = "";
2177 s->info.domain = ""; /* XXX ? */
2178
2179 // woo
2180 s->user = user;
2181 s->bucket_tenant = user->user_id.tenant;
2182
2183 return 0;
2184 }
2185
2186 virtual int get_params() {
2187 return 0;
2188 }
2189
2190 void send_response() override {
2191 bucket.creation_time = get_state()->bucket_info.creation_time;
2192 bs.size = bucket.size;
2193 bs.size_rounded = bucket.size_rounded;
2194 bs.creation_time = bucket.creation_time;
2195 bs.num_entries = bucket.count;
2196 std::swap(attrs, get_state()->bucket_attrs);
2197 }
2198
2199 bool matched() {
2200 return (bucket.bucket.name.length() > 0);
2201 }
2202
2203 }; /* RGWStatBucketRequest */
2204
2205 class RGWStatLeafRequest : public RGWLibRequest,
2206 public RGWListBucket /* RGWOp */
2207 {
2208 public:
2209 RGWFileHandle* rgw_fh;
2210 std::string path;
2211 bool matched;
2212 bool is_dir;
2213 bool exact_matched;
2214
2215 RGWStatLeafRequest(CephContext* _cct, RGWUserInfo *_user,
2216 RGWFileHandle* _rgw_fh, const std::string& _path)
2217 : RGWLibRequest(_cct, _user), rgw_fh(_rgw_fh), path(_path),
2218 matched(false), is_dir(false), exact_matched(false) {
2219 default_max = 1000; // logical max {"foo", "foo/"}
2220 op = this;
2221 }
2222
2223 bool only_bucket() override { return true; }
2224
2225 int op_init() override {
2226 // assign store, s, and dialect_handler
2227 RGWObjectCtx* rados_ctx
2228 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
2229 // framework promises to call op_init after parent init
2230 assert(rados_ctx);
2231 RGWOp::init(rados_ctx->store, get_state(), this);
2232 op = this; // assign self as op: REQUIRED
2233 return 0;
2234 }
2235
2236 int header_init() override {
2237
2238 struct req_state* s = get_state();
2239 s->info.method = "GET";
2240 s->op = OP_GET;
2241
2242 /* XXX derp derp derp */
2243 std::string uri = "/" + rgw_fh->bucket_name() + "/";
2244 s->relative_uri = uri;
2245 s->info.request_uri = uri; // XXX
2246 s->info.effective_uri = uri;
2247 s->info.request_params = "";
2248 s->info.domain = ""; /* XXX ? */
2249
2250 // woo
2251 s->user = user;
2252 s->bucket_tenant = user->user_id.tenant;
2253
2254 prefix = rgw_fh->relative_object_name();
2255 if (prefix.length() > 0)
2256 prefix += "/";
2257 prefix += path;
2258 delimiter = '/';
2259
2260 return 0;
2261 }
2262
2263 int get_params() override {
2264 max = default_max;
2265 return 0;
2266 }
2267
2268 void send_response() override {
2269 struct req_state* s = get_state();
2270 // try objects
2271 for (const auto& iter : objs) {
2272 auto& name = iter.key.name;
2273 lsubdout(cct, rgw, 15) << "RGWStatLeafRequest "
2274 << __func__ << " "
2275 << "list uri=" << s->relative_uri << " "
2276 << " prefix=" << prefix << " "
2277 << " obj path=" << name << ""
2278 << " target = " << path << ""
2279 << dendl;
2280 /* XXX is there a missing match-dir case (trailing '/')? */
2281 matched = true;
2282 if (name == path)
2283 exact_matched = true;
2284 return;
2285 }
2286 // try prefixes
2287 for (auto& iter : common_prefixes) {
2288 auto& name = iter.first;
2289 lsubdout(cct, rgw, 15) << "RGWStatLeafRequest "
2290 << __func__ << " "
2291 << "list uri=" << s->relative_uri << " "
2292 << " prefix=" << prefix << " "
2293 << " pref path=" << name << " (not chomped)"
2294 << " target = " << path << ""
2295 << dendl;
2296 matched = true;
2297 /* match-dir case (trailing '/') */
2298 if (name == prefix + "/")
2299 exact_matched = true;
2300 is_dir = true;
2301 break;
2302 }
2303 }
2304
2305 virtual void send_versioned_response() {
2306 send_response();
2307 }
2308 }; /* RGWStatLeafRequest */
2309
2310 /*
2311 put object
2312 */
2313
2314 class RGWWriteRequest : public RGWLibContinuedReq,
2315 public RGWPutObj /* RGWOp */
2316 {
2317 public:
2318 const std::string& bucket_name;
2319 const std::string& obj_name;
2320 RGWFileHandle* rgw_fh;
2321 RGWPutObjProcessor* processor;
2322 RGWPutObjDataProcessor* filter;
2323 boost::optional<RGWPutObj_Compress> compressor;
2324 CompressorRef plugin;
2325 buffer::list data;
2326 uint64_t timer_id;
2327 MD5 hash;
2328 off_t real_ofs;
2329 size_t bytes_written;
2330 bool multipart;
2331 bool eio;
2332
2333 RGWWriteRequest(CephContext* _cct, RGWUserInfo *_user, RGWFileHandle* _fh,
2334 const std::string& _bname, const std::string& _oname)
2335 : RGWLibContinuedReq(_cct, _user), bucket_name(_bname), obj_name(_oname),
2336 rgw_fh(_fh), processor(nullptr), filter(nullptr), real_ofs(0),
2337 bytes_written(0), multipart(false), eio(false) {
2338
2339 int ret = header_init();
2340 if (ret == 0) {
2341 ret = init_from_header(get_state());
2342 }
2343 op = this;
2344 }
2345
2346 bool only_bucket() override { return true; }
2347
2348 int op_init() override {
2349 // assign store, s, and dialect_handler
2350 RGWObjectCtx* rados_ctx
2351 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
2352 // framework promises to call op_init after parent init
2353 assert(rados_ctx);
2354 RGWOp::init(rados_ctx->store, get_state(), this);
2355 op = this; // assign self as op: REQUIRED
2356 return 0;
2357 }
2358
2359 int header_init() override {
2360
2361 struct req_state* s = get_state();
2362 s->info.method = "PUT";
2363 s->op = OP_PUT;
2364
2365 /* XXX derp derp derp */
2366 std::string uri = make_uri(bucket_name, obj_name);
2367 s->relative_uri = uri;
2368 s->info.request_uri = uri; // XXX
2369 s->info.effective_uri = uri;
2370 s->info.request_params = "";
2371 s->info.domain = ""; /* XXX ? */
2372
2373 // woo
2374 s->user = user;
2375 s->bucket_tenant = user->user_id.tenant;
2376
2377 return 0;
2378 }
2379
2380 RGWPutObjProcessor *select_processor(RGWObjectCtx& obj_ctx,
2381 bool *is_multipart) override {
2382 struct req_state* s = get_state();
2383 uint64_t part_size = s->cct->_conf->rgw_obj_stripe_size;
2384 RGWPutObjProcessor_Atomic *processor =
2385 new RGWPutObjProcessor_Atomic(obj_ctx, s->bucket_info, s->bucket,
2386 s->object.name, part_size, s->req_id,
2387 s->bucket_info.versioning_enabled());
2388 processor->set_olh_epoch(olh_epoch);
2389 processor->set_version_id(version_id);
2390 return processor;
2391 }
2392
2393 int get_params() override {
2394 struct req_state* s = get_state();
2395 RGWAccessControlPolicy_S3 s3policy(s->cct);
2396 /* we don't have (any) headers, so just create canned ACLs */
2397 int ret = s3policy.create_canned(s->owner, s->bucket_owner, s->canned_acl);
2398 policy = s3policy;
2399 return ret;
2400 }
2401
2402 int get_data(buffer::list& _bl) override {
2403 /* XXX for now, use sharing semantics */
2404 uint32_t len = data.length();
2405 _bl.claim(data);
2406 bytes_written += len;
2407 return len;
2408 }
2409
2410 void put_data(off_t off, buffer::list& _bl) {
2411 if (off != real_ofs) {
2412 eio = true;
2413 }
2414 data.claim(_bl);
2415 real_ofs += data.length();
2416 ofs = off; /* consumed in exec_continue() */
2417 }
2418
2419 int exec_start() override;
2420 int exec_continue() override;
2421 int exec_finish() override;
2422
2423 void send_response() override {}
2424
2425 int verify_params() override {
2426 return 0;
2427 }
2428 }; /* RGWWriteRequest */
2429
2430 /*
2431 copy object
2432 */
2433 class RGWCopyObjRequest : public RGWLibRequest,
2434 public RGWCopyObj /* RGWOp */
2435 {
2436 public:
2437 RGWFileHandle* src_parent;
2438 RGWFileHandle* dst_parent;
2439 const std::string& src_name;
2440 const std::string& dst_name;
2441
2442 RGWCopyObjRequest(CephContext* _cct, RGWUserInfo *_user,
2443 RGWFileHandle* _src_parent, RGWFileHandle* _dst_parent,
2444 const std::string& _src_name, const std::string& _dst_name)
2445 : RGWLibRequest(_cct, _user), src_parent(_src_parent),
2446 dst_parent(_dst_parent), src_name(_src_name), dst_name(_dst_name) {
2447 /* all requests have this */
2448 op = this;
2449
2450 /* allow this request to replace selected attrs */
2451 attrs_mod = RGWRados::ATTRSMOD_MERGE;
2452 }
2453
2454 bool only_bucket() override { return true; }
2455
2456 int op_init() override {
2457 // assign store, s, and dialect_handler
2458 RGWObjectCtx* rados_ctx
2459 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
2460 // framework promises to call op_init after parent init
2461 assert(rados_ctx);
2462 RGWOp::init(rados_ctx->store, get_state(), this);
2463 op = this; // assign self as op: REQUIRED
2464
2465 return 0;
2466 }
2467
2468 int header_init() override {
2469
2470 struct req_state* s = get_state();
2471 s->info.method = "PUT"; // XXX check
2472 s->op = OP_PUT;
2473
2474 src_bucket_name = src_parent->bucket_name();
2475 // need s->src_bucket_name?
2476 src_object.name = src_parent->format_child_name(src_name, false);
2477 // need s->src_object?
2478
2479 dest_bucket_name = dst_parent->bucket_name();
2480 // need s->bucket.name?
2481 dest_object = dst_parent->format_child_name(dst_name, false);
2482 // need s->object_name?
2483
2484 int rc = valid_s3_object_name(dest_object);
2485 if (rc != 0)
2486 return rc;
2487
2488 /* XXX and fixup key attr (could optimize w/string ref and
2489 * dest_object) */
2490 buffer::list ux_key;
2491 fh_key fhk = dst_parent->make_fhk(dst_name);
2492 rgw::encode(fhk, ux_key);
2493 emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
2494
2495 #if 0 /* XXX needed? */
2496 s->relative_uri = uri;
2497 s->info.request_uri = uri; // XXX
2498 s->info.effective_uri = uri;
2499 s->info.request_params = "";
2500 s->info.domain = ""; /* XXX ? */
2501 #endif
2502
2503 // woo
2504 s->user = user;
2505 s->bucket_tenant = user->user_id.tenant;
2506
2507 return 0;
2508 }
2509
2510 int get_params() override {
2511 struct req_state* s = get_state();
2512 RGWAccessControlPolicy_S3 s3policy(s->cct);
2513 /* we don't have (any) headers, so just create canned ACLs */
2514 int ret = s3policy.create_canned(s->owner, s->bucket_owner, s->canned_acl);
2515 dest_policy = s3policy;
2516 return ret;
2517 }
2518
2519 void send_response() override {}
2520 void send_partial_response(off_t ofs) override {}
2521
2522 }; /* RGWCopyObjRequest */
2523
2524 class RGWSetAttrsRequest : public RGWLibRequest,
2525 public RGWSetAttrs /* RGWOp */
2526 {
2527 public:
2528 const std::string& bucket_name;
2529 const std::string& obj_name;
2530
2531 RGWSetAttrsRequest(CephContext* _cct, RGWUserInfo *_user,
2532 const std::string& _bname, const std::string& _oname)
2533 : RGWLibRequest(_cct, _user), bucket_name(_bname), obj_name(_oname) {
2534 op = this;
2535 }
2536
2537 bool only_bucket() override { return false; }
2538
2539 int op_init() override {
2540 // assign store, s, and dialect_handler
2541 RGWObjectCtx* rados_ctx
2542 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
2543 // framework promises to call op_init after parent init
2544 assert(rados_ctx);
2545 RGWOp::init(rados_ctx->store, get_state(), this);
2546 op = this; // assign self as op: REQUIRED
2547 return 0;
2548 }
2549
2550 int header_init() override {
2551
2552 struct req_state* s = get_state();
2553 s->info.method = "PUT";
2554 s->op = OP_PUT;
2555
2556 /* XXX derp derp derp */
2557 std::string uri = make_uri(bucket_name, obj_name);
2558 s->relative_uri = uri;
2559 s->info.request_uri = uri; // XXX
2560 s->info.effective_uri = uri;
2561 s->info.request_params = "";
2562 s->info.domain = ""; /* XXX ? */
2563
2564 // woo
2565 s->user = user;
2566 s->bucket_tenant = user->user_id.tenant;
2567
2568 return 0;
2569 }
2570
2571 int get_params() override {
2572 return 0;
2573 }
2574
2575 void send_response() override {}
2576
2577 }; /* RGWSetAttrsRequest */
2578
2579 /*
2580 * Send request to get the rados cluster stats
2581 */
2582 class RGWGetClusterStatReq : public RGWLibRequest,
2583 public RGWGetClusterStat {
2584 public:
2585 struct rados_cluster_stat_t& stats_req;
2586 RGWGetClusterStatReq(CephContext* _cct,RGWUserInfo *_user,
2587 rados_cluster_stat_t& _stats):
2588 RGWLibRequest(_cct, _user), stats_req(_stats){
2589 op = this;
2590 }
2591
2592 int op_init() override {
2593 // assign store, s, and dialect_handler
2594 RGWObjectCtx* rados_ctx
2595 = static_cast<RGWObjectCtx*>(get_state()->obj_ctx);
2596 // framework promises to call op_init after parent init
2597 assert(rados_ctx);
2598 RGWOp::init(rados_ctx->store, get_state(), this);
2599 op = this; // assign self as op: REQUIRED
2600 return 0;
2601 }
2602
2603 int header_init() override {
2604 struct req_state* s = get_state();
2605 s->info.method = "GET";
2606 s->op = OP_GET;
2607 s->user = user;
2608 return 0;
2609 }
2610
2611 int get_params() override { return 0; }
2612 bool only_bucket() override { return false; }
2613 void send_response() override {
2614 stats_req.kb = stats_op.kb;
2615 stats_req.kb_avail = stats_op.kb_avail;
2616 stats_req.kb_used = stats_op.kb_used;
2617 stats_req.num_objects = stats_op.num_objects;
2618 }
2619 }; /* RGWGetClusterStatReq */
2620
2621
2622 } /* namespace rgw */
2623
2624 #endif /* RGW_FILE_H */