]> git.proxmox.com Git - ceph.git/blame - ceph/src/rgw/rgw_file.cc
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rgw / rgw_file.cc
CommitLineData
7c673cae 1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
9f95a23c 2// vim: ts=8 sw=2 smarttab ft=cpp
7c673cae
FG
3
4#include "include/compat.h"
5#include "include/rados/rgw_file.h"
6
7#include <sys/types.h>
8#include <sys/stat.h>
9
10#include "rgw_lib.h"
7c673cae
FG
11#include "rgw_resolve.h"
12#include "rgw_op.h"
13#include "rgw_rest.h"
14#include "rgw_acl.h"
15#include "rgw_acl_s3.h"
16#include "rgw_frontend.h"
17#include "rgw_request.h"
18#include "rgw_process.h"
19#include "rgw_rest_user.h"
20#include "rgw_rest_s3.h"
21#include "rgw_os_lib.h"
22#include "rgw_auth_s3.h"
23#include "rgw_user.h"
24#include "rgw_bucket.h"
11fdf7f2 25#include "rgw_zone.h"
7c673cae
FG
26#include "rgw_file.h"
27#include "rgw_lib_frontend.h"
11fdf7f2 28#include "rgw_perf_counters.h"
28e407b8 29#include "common/errno.h"
7c673cae 30
9f95a23c
TL
31#include "services/svc_zone.h"
32
7c673cae
FG
33#include <atomic>
34
35#define dout_subsys ceph_subsys_rgw
36
20effc67 37using namespace std;
7c673cae
FG
38using namespace rgw;
39
40namespace rgw {
41
7c673cae
FG
42 const string RGWFileHandle::root_name = "/";
43
44 std::atomic<uint32_t> RGWLibFS::fs_inst_counter;
45
46 uint32_t RGWLibFS::write_completion_interval_s = 10;
47
48 ceph::timer<ceph::mono_clock> RGWLibFS::write_timer{
49 ceph::construct_suspended};
50
51 inline int valid_fs_bucket_name(const string& name) {
52 int rc = valid_s3_bucket_name(name, false /* relaxed */);
53 if (rc != 0) {
54 if (name.size() > 255)
55 return -ENAMETOOLONG;
56 return -EINVAL;
57 }
58 return 0;
59 }
60
61 inline int valid_fs_object_name(const string& name) {
62 int rc = valid_s3_object_name(name);
63 if (rc != 0) {
64 if (name.size() > 1024)
65 return -ENAMETOOLONG;
66 return -EINVAL;
67 }
68 return 0;
69 }
70
f67539c2
TL
71 class XattrHash
72 {
73 public:
74 std::size_t operator()(const rgw_xattrstr& att) const noexcept {
75 return XXH64(att.val, att.len, 5882300);
76 }
77 };
78
79 class XattrEqual
80 {
81 public:
82 bool operator()(const rgw_xattrstr& lhs, const rgw_xattrstr& rhs) const {
83 return ((lhs.len == rhs.len) &&
84 (strncmp(lhs.val, rhs.val, lhs.len) == 0));
85 }
86 };
87
88 /* well-known attributes */
89 static const std::unordered_set<
90 rgw_xattrstr, XattrHash, XattrEqual> rgw_exposed_attrs = {
91 rgw_xattrstr{const_cast<char*>(RGW_ATTR_ETAG), sizeof(RGW_ATTR_ETAG)-1}
92 };
93
94 static inline bool is_exposed_attr(const rgw_xattrstr& k) {
95 return (rgw_exposed_attrs.find(k) != rgw_exposed_attrs.end());
96 }
97
31f18b77
FG
98 LookupFHResult RGWLibFS::stat_bucket(RGWFileHandle* parent, const char *path,
99 RGWLibFS::BucketStats& bs,
100 uint32_t flags)
7c673cae
FG
101 {
102 LookupFHResult fhr{nullptr, 0};
103 std::string bucket_name{path};
20effc67 104 RGWStatBucketRequest req(cct, user->clone(), bucket_name, bs);
7c673cae 105
1e59de90 106 int rc = g_rgwlib->get_fe()->execute_req(&req);
7c673cae
FG
107 if ((rc == 0) &&
108 (req.get_ret() == 0) &&
109 (req.matched())) {
110 fhr = lookup_fh(parent, path,
31f18b77 111 (flags & RGWFileHandle::FLAG_LOCKED)|
7c673cae
FG
112 RGWFileHandle::FLAG_CREATE|
113 RGWFileHandle::FLAG_BUCKET);
114 if (get<0>(fhr)) {
115 RGWFileHandle* rgw_fh = get<0>(fhr);
31f18b77
FG
116 if (! (flags & RGWFileHandle::FLAG_LOCKED)) {
117 rgw_fh->mtx.lock();
118 }
7c673cae
FG
119 rgw_fh->set_times(req.get_ctime());
120 /* restore attributes */
121 auto ux_key = req.get_attr(RGW_ATTR_UNIX_KEY1);
122 auto ux_attrs = req.get_attr(RGW_ATTR_UNIX1);
123 if (ux_key && ux_attrs) {
3efd9988
FG
124 DecodeAttrsResult dar = rgw_fh->decode_attrs(ux_key, ux_attrs);
125 if (get<0>(dar) || get<1>(dar)) {
126 update_fh(rgw_fh);
127 }
7c673cae 128 }
31f18b77
FG
129 if (! (flags & RGWFileHandle::FLAG_LOCKED)) {
130 rgw_fh->mtx.unlock();
131 }
7c673cae
FG
132 }
133 }
134 return fhr;
135 }
136
eafe8130
TL
137 LookupFHResult RGWLibFS::fake_leaf(RGWFileHandle* parent,
138 const char *path,
139 enum rgw_fh_type type,
140 struct stat *st, uint32_t st_mask,
141 uint32_t flags)
142 {
143 /* synthesize a minimal handle from parent, path, type, and st */
144 using std::get;
145
146 flags |= RGWFileHandle::FLAG_CREATE;
147
148 switch (type) {
149 case RGW_FS_TYPE_DIRECTORY:
150 flags |= RGWFileHandle::FLAG_DIRECTORY;
151 break;
152 default:
153 /* file */
154 break;
155 };
156
157 LookupFHResult fhr = lookup_fh(parent, path, flags);
158 if (get<0>(fhr)) {
159 RGWFileHandle* rgw_fh = get<0>(fhr);
160 if (st) {
161 lock_guard guard(rgw_fh->mtx);
162 if (st_mask & RGW_SETATTR_SIZE) {
163 rgw_fh->set_size(st->st_size);
164 }
165 if (st_mask & RGW_SETATTR_MTIME) {
166 rgw_fh->set_times(st->st_mtim);
167 }
168 } /* st */
169 } /* rgw_fh */
170 return fhr;
171 } /* RGWLibFS::fake_leaf */
172
7c673cae
FG
173 LookupFHResult RGWLibFS::stat_leaf(RGWFileHandle* parent,
174 const char *path,
175 enum rgw_fh_type type,
176 uint32_t flags)
177 {
178 /* find either-of <object_name>, <object_name/>, only one of
179 * which should exist; atomicity? */
180 using std::get;
181
182 LookupFHResult fhr{nullptr, 0};
183
184 /* XXX the need for two round-trip operations to identify file or
185 * directory leaf objects is unecessary--the current proposed
186 * mechanism to avoid this is to store leaf object names with an
187 * object locator w/o trailing slash */
188
31f18b77 189 std::string obj_path = parent->format_child_name(path, false);
7c673cae
FG
190
191 for (auto ix : { 0, 1, 2 }) {
192 switch (ix) {
193 case 0:
194 {
195 /* type hint */
196 if (type == RGW_FS_TYPE_DIRECTORY)
197 continue;
198
20effc67 199 RGWStatObjRequest req(cct, user->clone(),
7c673cae
FG
200 parent->bucket_name(), obj_path,
201 RGWStatObjRequest::FLAG_NONE);
1e59de90 202 int rc = g_rgwlib->get_fe()->execute_req(&req);
7c673cae
FG
203 if ((rc == 0) &&
204 (req.get_ret() == 0)) {
205 fhr = lookup_fh(parent, path, RGWFileHandle::FLAG_CREATE);
206 if (get<0>(fhr)) {
207 RGWFileHandle* rgw_fh = get<0>(fhr);
208 lock_guard guard(rgw_fh->mtx);
209 rgw_fh->set_size(req.get_size());
210 rgw_fh->set_times(req.get_mtime());
211 /* restore attributes */
212 auto ux_key = req.get_attr(RGW_ATTR_UNIX_KEY1);
213 auto ux_attrs = req.get_attr(RGW_ATTR_UNIX1);
81eedcae
TL
214 rgw_fh->set_etag(*(req.get_attr(RGW_ATTR_ETAG)));
215 rgw_fh->set_acls(*(req.get_attr(RGW_ATTR_ACL)));
f67539c2
TL
216 if (!(flags & RGWFileHandle::FLAG_IN_CB) &&
217 ux_key && ux_attrs) {
3efd9988
FG
218 DecodeAttrsResult dar = rgw_fh->decode_attrs(ux_key, ux_attrs);
219 if (get<0>(dar) || get<1>(dar)) {
220 update_fh(rgw_fh);
221 }
7c673cae
FG
222 }
223 }
224 goto done;
225 }
226 }
227 break;
228 case 1:
229 {
230 /* try dir form */
231 /* type hint */
232 if (type == RGW_FS_TYPE_FILE)
233 continue;
234
235 obj_path += "/";
20effc67 236 RGWStatObjRequest req(cct, user->clone(),
7c673cae
FG
237 parent->bucket_name(), obj_path,
238 RGWStatObjRequest::FLAG_NONE);
1e59de90 239 int rc = g_rgwlib->get_fe()->execute_req(&req);
7c673cae
FG
240 if ((rc == 0) &&
241 (req.get_ret() == 0)) {
242 fhr = lookup_fh(parent, path, RGWFileHandle::FLAG_DIRECTORY);
243 if (get<0>(fhr)) {
244 RGWFileHandle* rgw_fh = get<0>(fhr);
245 lock_guard guard(rgw_fh->mtx);
246 rgw_fh->set_size(req.get_size());
247 rgw_fh->set_times(req.get_mtime());
248 /* restore attributes */
249 auto ux_key = req.get_attr(RGW_ATTR_UNIX_KEY1);
250 auto ux_attrs = req.get_attr(RGW_ATTR_UNIX1);
81eedcae
TL
251 rgw_fh->set_etag(*(req.get_attr(RGW_ATTR_ETAG)));
252 rgw_fh->set_acls(*(req.get_attr(RGW_ATTR_ACL)));
f67539c2
TL
253 if (!(flags & RGWFileHandle::FLAG_IN_CB) &&
254 ux_key && ux_attrs) {
3efd9988
FG
255 DecodeAttrsResult dar = rgw_fh->decode_attrs(ux_key, ux_attrs);
256 if (get<0>(dar) || get<1>(dar)) {
257 update_fh(rgw_fh);
258 }
7c673cae
FG
259 }
260 }
261 goto done;
262 }
263 }
264 break;
265 case 2:
266 {
267 std::string object_name{path};
20effc67 268 RGWStatLeafRequest req(cct, user->clone(),
f67539c2 269 parent, object_name);
1e59de90 270 int rc = g_rgwlib->get_fe()->execute_req(&req);
7c673cae
FG
271 if ((rc == 0) &&
272 (req.get_ret() == 0)) {
273 if (req.matched) {
274 /* we need rgw object's key name equal to file name, if
275 * not return NULL */
276 if ((flags & RGWFileHandle::FLAG_EXACT_MATCH) &&
277 !req.exact_matched) {
278 lsubdout(get_context(), rgw, 15)
279 << __func__
280 << ": stat leaf not exact match file name = "
281 << path << dendl;
282 goto done;
283 }
284 fhr = lookup_fh(parent, path,
285 RGWFileHandle::FLAG_CREATE|
286 ((req.is_dir) ?
287 RGWFileHandle::FLAG_DIRECTORY :
288 RGWFileHandle::FLAG_NONE));
289 /* XXX we don't have an object--in general, there need not
290 * be one (just a path segment in some other object). In
291 * actual leaf an object exists, but we'd need another round
292 * trip to get attrs */
293 if (get<0>(fhr)) {
294 /* for now use the parent object's mtime */
295 RGWFileHandle* rgw_fh = get<0>(fhr);
296 lock_guard guard(rgw_fh->mtx);
297 rgw_fh->set_mtime(parent->get_mtime());
298 }
299 }
300 }
301 }
302 break;
303 default:
304 /* not reached */
305 break;
306 }
307 }
308 done:
309 return fhr;
310 } /* RGWLibFS::stat_leaf */
311
312 int RGWLibFS::read(RGWFileHandle* rgw_fh, uint64_t offset, size_t length,
313 size_t* bytes_read, void* buffer, uint32_t flags)
314 {
315 if (! rgw_fh->is_file())
316 return -EINVAL;
317
318 if (rgw_fh->deleted())
319 return -ESTALE;
320
20effc67 321 RGWReadRequest req(get_context(), user->clone(), rgw_fh, offset, length, buffer);
7c673cae 322
1e59de90 323 int rc = g_rgwlib->get_fe()->execute_req(&req);
7c673cae 324 if ((rc == 0) &&
522d829b 325 ((rc = req.get_ret()) == 0)) {
11fdf7f2
TL
326 lock_guard guard(rgw_fh->mtx);
327 rgw_fh->set_atime(real_clock::to_timespec(real_clock::now()));
328 *bytes_read = req.nread;
329 }
330
331 return rc;
332 }
333
334 int RGWLibFS::readlink(RGWFileHandle* rgw_fh, uint64_t offset, size_t length,
335 size_t* bytes_read, void* buffer, uint32_t flags)
336 {
337 if (! rgw_fh->is_link())
338 return -EINVAL;
339
340 if (rgw_fh->deleted())
341 return -ESTALE;
342
20effc67 343 RGWReadRequest req(get_context(), user->clone(), rgw_fh, offset, length, buffer);
11fdf7f2 344
1e59de90 345 int rc = g_rgwlib->get_fe()->execute_req(&req);
11fdf7f2 346 if ((rc == 0) &&
522d829b 347 ((rc = req.get_ret()) == 0)) {
7c673cae
FG
348 lock_guard(rgw_fh->mtx);
349 rgw_fh->set_atime(real_clock::to_timespec(real_clock::now()));
350 *bytes_read = req.nread;
351 }
352
353 return rc;
354 }
355
356 int RGWLibFS::unlink(RGWFileHandle* rgw_fh, const char* name, uint32_t flags)
357 {
358 int rc = 0;
31f18b77 359 BucketStats bs;
7c673cae 360 RGWFileHandle* parent = nullptr;
31f18b77 361 RGWFileHandle* bkt_fh = nullptr;
7c673cae
FG
362
363 if (unlikely(flags & RGWFileHandle::FLAG_UNLINK_THIS)) {
364 /* LOCKED */
365 parent = rgw_fh->get_parent();
366 } else {
367 /* atomicity */
368 parent = rgw_fh;
369 LookupFHResult fhr = lookup_fh(parent, name, RGWFileHandle::FLAG_LOCK);
370 rgw_fh = get<0>(fhr);
371 /* LOCKED */
372 }
373
374 if (parent->is_root()) {
31f18b77
FG
375 /* a bucket may have an object storing Unix attributes, check
376 * for and delete it */
377 LookupFHResult fhr;
378 fhr = stat_bucket(parent, name, bs, (rgw_fh) ?
379 RGWFileHandle::FLAG_LOCKED :
380 RGWFileHandle::FLAG_NONE);
381 bkt_fh = get<0>(fhr);
382 if (unlikely(! bkt_fh)) {
383 /* implies !rgw_fh, so also !LOCKED */
384 return -ENOENT;
385 }
386
387 if (bs.num_entries > 1) {
388 unref(bkt_fh); /* return stat_bucket ref */
389 if (likely(!! rgw_fh)) { /* return lock and ref from
390 * lookup_fh (or caller in the
391 * special case of
392 * RGWFileHandle::FLAG_UNLINK_THIS) */
393 rgw_fh->mtx.unlock();
394 unref(rgw_fh);
395 }
396 return -ENOTEMPTY;
397 } else {
398 /* delete object w/key "<bucket>/" (uxattrs), if any */
399 string oname{"/"};
20effc67 400 RGWDeleteObjRequest req(cct, user->clone(), bkt_fh->bucket_name(), oname);
1e59de90 401 rc = g_rgwlib->get_fe()->execute_req(&req);
31f18b77
FG
402 /* don't care if ENOENT */
403 unref(bkt_fh);
404 }
405
406 string bname{name};
20effc67 407 RGWDeleteBucketRequest req(cct, user->clone(), bname);
1e59de90 408 rc = g_rgwlib->get_fe()->execute_req(&req);
7c673cae
FG
409 if (! rc) {
410 rc = req.get_ret();
411 }
412 } else {
413 /*
414 * leaf object
415 */
416 if (! rgw_fh) {
417 /* XXX for now, peform a hard lookup to deduce the type of
418 * object to be deleted ("foo" vs. "foo/")--also, ensures
419 * atomicity at this endpoint */
420 struct rgw_file_handle *fh;
421 rc = rgw_lookup(get_fs(), parent->get_fh(), name, &fh,
eafe8130 422 nullptr /* st */, 0 /* mask */,
7c673cae
FG
423 RGW_LOOKUP_FLAG_NONE);
424 if (!! rc)
425 return rc;
426
427 /* rgw_fh ref+ */
428 rgw_fh = get_rgwfh(fh);
429 rgw_fh->mtx.lock(); /* LOCKED */
430 }
431
432 std::string oname = rgw_fh->relative_object_name();
433 if (rgw_fh->is_dir()) {
434 /* for the duration of our cache timer, trust positive
435 * child cache */
436 if (rgw_fh->has_children()) {
437 rgw_fh->mtx.unlock();
438 unref(rgw_fh);
439 return(-ENOTEMPTY);
440 }
441 oname += "/";
442 }
20effc67 443 RGWDeleteObjRequest req(cct, user->clone(), parent->bucket_name(), oname);
1e59de90 444 rc = g_rgwlib->get_fe()->execute_req(&req);
7c673cae
FG
445 if (! rc) {
446 rc = req.get_ret();
447 }
448 }
449
31f18b77
FG
450 /* ENOENT when raced with other s3 gateway */
451 if (! rc || rc == -ENOENT) {
452 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
453 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
454 RGWFileHandle::FHCache::FLAG_LOCK);
455 }
7c673cae
FG
456
457 if (! rc) {
458 real_time t = real_clock::now();
459 parent->set_mtime(real_clock::to_timespec(t));
460 parent->set_ctime(real_clock::to_timespec(t));
461 }
462
463 rgw_fh->mtx.unlock();
464 unref(rgw_fh);
465
466 return rc;
467 } /* RGWLibFS::unlink */
468
469 int RGWLibFS::rename(RGWFileHandle* src_fh, RGWFileHandle* dst_fh,
470 const char *_src_name, const char *_dst_name)
471
472 {
473 /* XXX initial implementation: try-copy, and delete if copy
474 * succeeds */
475 int rc = -EINVAL;
7c673cae
FG
476 real_time t;
477
478 std::string src_name{_src_name};
479 std::string dst_name{_dst_name};
480
481 /* atomicity */
482 LookupFHResult fhr = lookup_fh(src_fh, _src_name, RGWFileHandle::FLAG_LOCK);
483 RGWFileHandle* rgw_fh = get<0>(fhr);
484
485 /* should not happen */
486 if (! rgw_fh) {
487 ldout(get_context(), 0) << __func__
488 << " BUG no such src renaming path="
489 << src_name
490 << dendl;
491 goto out;
492 }
493
494 /* forbid renaming of directories (unreasonable at scale) */
495 if (rgw_fh->is_dir()) {
496 ldout(get_context(), 12) << __func__
497 << " rejecting attempt to rename directory path="
498 << rgw_fh->full_object_name()
499 << dendl;
500 rc = -EPERM;
501 goto unlock;
502 }
503
504 /* forbid renaming open files (violates intent, for now) */
505 if (rgw_fh->is_open()) {
506 ldout(get_context(), 12) << __func__
507 << " rejecting attempt to rename open file path="
508 << rgw_fh->full_object_name()
509 << dendl;
510 rc = -EPERM;
511 goto unlock;
512 }
513
514 t = real_clock::now();
515
516 for (int ix : {0, 1}) {
517 switch (ix) {
518 case 0:
519 {
20effc67 520 RGWCopyObjRequest req(cct, user->clone(), src_fh, dst_fh, src_name, dst_name);
1e59de90 521 int rc = g_rgwlib->get_fe()->execute_req(&req);
7c673cae
FG
522 if ((rc != 0) ||
523 ((rc = req.get_ret()) != 0)) {
524 ldout(get_context(), 1)
525 << __func__
526 << " rename step 0 failed src="
527 << src_fh->full_object_name() << " " << src_name
528 << " dst=" << dst_fh->full_object_name()
529 << " " << dst_name
530 << "rc " << rc
531 << dendl;
532 goto unlock;
533 }
534 ldout(get_context(), 12)
535 << __func__
536 << " rename step 0 success src="
537 << src_fh->full_object_name() << " " << src_name
538 << " dst=" << dst_fh->full_object_name()
539 << " " << dst_name
540 << " rc " << rc
541 << dendl;
542 /* update dst change id */
543 dst_fh->set_times(t);
544 }
545 break;
546 case 1:
547 {
548 rc = this->unlink(rgw_fh /* LOCKED */, _src_name,
549 RGWFileHandle::FLAG_UNLINK_THIS);
550 /* !LOCKED, -ref */
551 if (! rc) {
552 ldout(get_context(), 12)
553 << __func__
554 << " rename step 1 success src="
555 << src_fh->full_object_name() << " " << src_name
556 << " dst=" << dst_fh->full_object_name()
557 << " " << dst_name
558 << " rc " << rc
559 << dendl;
560 /* update src change id */
561 src_fh->set_times(t);
562 } else {
563 ldout(get_context(), 1)
564 << __func__
565 << " rename step 1 failed src="
566 << src_fh->full_object_name() << " " << src_name
567 << " dst=" << dst_fh->full_object_name()
568 << " " << dst_name
569 << " rc " << rc
570 << dendl;
571 }
572 }
573 goto out;
574 default:
11fdf7f2 575 ceph_abort();
7c673cae
FG
576 } /* switch */
577 } /* ix */
578 unlock:
579 rgw_fh->mtx.unlock(); /* !LOCKED */
580 unref(rgw_fh); /* -ref */
581
582 out:
583 return rc;
584 } /* RGWLibFS::rename */
585
586 MkObjResult RGWLibFS::mkdir(RGWFileHandle* parent, const char *name,
587 struct stat *st, uint32_t mask, uint32_t flags)
588 {
7c673cae 589 int rc, rc2;
31f18b77
FG
590 rgw_file_handle *lfh;
591
592 rc = rgw_lookup(get_fs(), parent->get_fh(), name, &lfh,
eafe8130 593 nullptr /* st */, 0 /* mask */,
31f18b77
FG
594 RGW_LOOKUP_FLAG_NONE);
595 if (! rc) {
596 /* conflict! */
597 rc = rgw_fh_rele(get_fs(), lfh, RGW_FH_RELE_FLAG_NONE);
f67539c2 598 // ignore return code
31f18b77
FG
599 return MkObjResult{nullptr, -EEXIST};
600 }
7c673cae 601
31f18b77 602 MkObjResult mkr{nullptr, -EINVAL};
7c673cae
FG
603 LookupFHResult fhr;
604 RGWFileHandle* rgw_fh = nullptr;
605 buffer::list ux_key, ux_attrs;
606
607 fhr = lookup_fh(parent, name,
608 RGWFileHandle::FLAG_CREATE|
609 RGWFileHandle::FLAG_DIRECTORY|
610 RGWFileHandle::FLAG_LOCK);
611 rgw_fh = get<0>(fhr);
612 if (rgw_fh) {
613 rgw_fh->create_stat(st, mask);
614 rgw_fh->set_times(real_clock::now());
615 /* save attrs */
616 rgw_fh->encode_attrs(ux_key, ux_attrs);
617 if (st)
494da23a 618 rgw_fh->stat(st, RGWFileHandle::FLAG_LOCKED);
7c673cae
FG
619 get<0>(mkr) = rgw_fh;
620 } else {
621 get<1>(mkr) = -EIO;
622 return mkr;
623 }
624
625 if (parent->is_root()) {
626 /* bucket */
627 string bname{name};
628 /* enforce S3 name restrictions */
629 rc = valid_fs_bucket_name(bname);
630 if (rc != 0) {
631 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
632 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
633 RGWFileHandle::FHCache::FLAG_LOCK);
634 rgw_fh->mtx.unlock();
635 unref(rgw_fh);
636 get<0>(mkr) = nullptr;
637 get<1>(mkr) = rc;
638 return mkr;
639 }
640
20effc67 641 RGWCreateBucketRequest req(get_context(), user->clone(), bname);
7c673cae
FG
642
643 /* save attrs */
644 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
645 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
646
1e59de90 647 rc = g_rgwlib->get_fe()->execute_req(&req);
7c673cae
FG
648 rc2 = req.get_ret();
649 } else {
650 /* create an object representing the directory */
651 buffer::list bl;
31f18b77 652 string dir_name = parent->format_child_name(name, true);
7c673cae
FG
653
654 /* need valid S3 name (characters, length <= 1024, etc) */
655 rc = valid_fs_object_name(dir_name);
656 if (rc != 0) {
657 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
658 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
659 RGWFileHandle::FHCache::FLAG_LOCK);
660 rgw_fh->mtx.unlock();
661 unref(rgw_fh);
662 get<0>(mkr) = nullptr;
663 get<1>(mkr) = rc;
664 return mkr;
665 }
666
20effc67 667 RGWPutObjRequest req(get_context(), user->clone(), parent->bucket_name(), dir_name, bl);
7c673cae
FG
668
669 /* save attrs */
670 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
671 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
672
1e59de90 673 rc = g_rgwlib->get_fe()->execute_req(&req);
7c673cae
FG
674 rc2 = req.get_ret();
675 }
676
677 if (! ((rc == 0) &&
678 (rc2 == 0))) {
679 /* op failed */
680 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
681 rgw_fh->mtx.unlock(); /* !LOCKED */
682 unref(rgw_fh);
683 get<0>(mkr) = nullptr;
684 /* fixup rc */
685 if (!rc)
686 rc = rc2;
687 } else {
688 real_time t = real_clock::now();
689 parent->set_mtime(real_clock::to_timespec(t));
690 parent->set_ctime(real_clock::to_timespec(t));
691 rgw_fh->mtx.unlock(); /* !LOCKED */
692 }
693
694 get<1>(mkr) = rc;
695
696 return mkr;
697 } /* RGWLibFS::mkdir */
698
699 MkObjResult RGWLibFS::create(RGWFileHandle* parent, const char *name,
700 struct stat *st, uint32_t mask, uint32_t flags)
701 {
702 int rc, rc2;
703
704 using std::get;
705
706 rgw_file_handle *lfh;
707 rc = rgw_lookup(get_fs(), parent->get_fh(), name, &lfh,
eafe8130 708 nullptr /* st */, 0 /* mask */,
7c673cae
FG
709 RGW_LOOKUP_FLAG_NONE);
710 if (! rc) {
711 /* conflict! */
712 rc = rgw_fh_rele(get_fs(), lfh, RGW_FH_RELE_FLAG_NONE);
f67539c2 713 // ignore return code
7c673cae
FG
714 return MkObjResult{nullptr, -EEXIST};
715 }
716
717 /* expand and check name */
31f18b77 718 std::string obj_name = parent->format_child_name(name, false);
7c673cae
FG
719 rc = valid_fs_object_name(obj_name);
720 if (rc != 0) {
721 return MkObjResult{nullptr, rc};
722 }
723
724 /* create it */
725 buffer::list bl;
20effc67 726 RGWPutObjRequest req(cct, user->clone(), parent->bucket_name(), obj_name, bl);
7c673cae
FG
727 MkObjResult mkr{nullptr, -EINVAL};
728
1e59de90 729 rc = g_rgwlib->get_fe()->execute_req(&req);
7c673cae
FG
730 rc2 = req.get_ret();
731
732 if ((rc == 0) &&
733 (rc2 == 0)) {
734 /* XXX atomicity */
735 LookupFHResult fhr = lookup_fh(parent, name, RGWFileHandle::FLAG_CREATE |
736 RGWFileHandle::FLAG_LOCK);
737 RGWFileHandle* rgw_fh = get<0>(fhr);
738 if (rgw_fh) {
739 if (get<1>(fhr) & RGWFileHandle::FLAG_CREATE) {
740 /* fill in stat data */
741 real_time t = real_clock::now();
742 rgw_fh->create_stat(st, mask);
743 rgw_fh->set_times(t);
744
745 parent->set_mtime(real_clock::to_timespec(t));
746 parent->set_ctime(real_clock::to_timespec(t));
747 }
748 if (st)
494da23a 749 (void) rgw_fh->stat(st, RGWFileHandle::FLAG_LOCKED);
81eedcae
TL
750
751 rgw_fh->set_etag(*(req.get_attr(RGW_ATTR_ETAG)));
752 rgw_fh->set_acls(*(req.get_attr(RGW_ATTR_ACL)));
753
7c673cae 754 get<0>(mkr) = rgw_fh;
1d09f67e 755 rgw_fh->file_ondisk_version = 0; // inital version
7c673cae
FG
756 rgw_fh->mtx.unlock();
757 } else
758 rc = -EIO;
759 }
760
761 get<1>(mkr) = rc;
11fdf7f2
TL
762
763 /* case like : quota exceed will be considered as fail too*/
764 if(rc2 < 0)
765 get<1>(mkr) = rc2;
7c673cae
FG
766
767 return mkr;
768 } /* RGWLibFS::create */
769
11fdf7f2
TL
770 MkObjResult RGWLibFS::symlink(RGWFileHandle* parent, const char *name,
771 const char* link_path, struct stat *st, uint32_t mask, uint32_t flags)
772 {
773 int rc, rc2;
774
775 using std::get;
776
777 rgw_file_handle *lfh;
eafe8130
TL
778 rc = rgw_lookup(get_fs(), parent->get_fh(), name, &lfh,
779 nullptr /* st */, 0 /* mask */,
11fdf7f2
TL
780 RGW_LOOKUP_FLAG_NONE);
781 if (! rc) {
782 /* conflict! */
783 rc = rgw_fh_rele(get_fs(), lfh, RGW_FH_RELE_FLAG_NONE);
f67539c2 784 // ignore return code
11fdf7f2
TL
785 return MkObjResult{nullptr, -EEXIST};
786 }
787
788 MkObjResult mkr{nullptr, -EINVAL};
789 LookupFHResult fhr;
790 RGWFileHandle* rgw_fh = nullptr;
791 buffer::list ux_key, ux_attrs;
792
793 fhr = lookup_fh(parent, name,
794 RGWFileHandle::FLAG_CREATE|
795 RGWFileHandle::FLAG_SYMBOLIC_LINK|
796 RGWFileHandle::FLAG_LOCK);
797 rgw_fh = get<0>(fhr);
798 if (rgw_fh) {
799 rgw_fh->create_stat(st, mask);
800 rgw_fh->set_times(real_clock::now());
801 /* save attrs */
802 rgw_fh->encode_attrs(ux_key, ux_attrs);
803 if (st)
804 rgw_fh->stat(st);
805 get<0>(mkr) = rgw_fh;
806 } else {
807 get<1>(mkr) = -EIO;
808 return mkr;
809 }
810
811 /* need valid S3 name (characters, length <= 1024, etc) */
812 rc = valid_fs_object_name(name);
813 if (rc != 0) {
814 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
815 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
816 RGWFileHandle::FHCache::FLAG_LOCK);
817 rgw_fh->mtx.unlock();
818 unref(rgw_fh);
819 get<0>(mkr) = nullptr;
820 get<1>(mkr) = rc;
821 return mkr;
822 }
823
824 string obj_name = std::string(name);
825 /* create an object representing the directory */
826 buffer::list bl;
827
828 /* XXXX */
829#if 0
830 bl.push_back(
831 buffer::create_static(len, static_cast<char*>(buffer)));
832#else
833
834 bl.push_back(
835 buffer::copy(link_path, strlen(link_path)));
836#endif
837
20effc67 838 RGWPutObjRequest req(get_context(), user->clone(), parent->bucket_name(), obj_name, bl);
11fdf7f2
TL
839
840 /* save attrs */
841 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
842 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
843
1e59de90 844 rc = g_rgwlib->get_fe()->execute_req(&req);
11fdf7f2
TL
845 rc2 = req.get_ret();
846 if (! ((rc == 0) &&
847 (rc2 == 0))) {
848 /* op failed */
849 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
850 rgw_fh->mtx.unlock(); /* !LOCKED */
851 unref(rgw_fh);
852 get<0>(mkr) = nullptr;
853 /* fixup rc */
854 if (!rc)
855 rc = rc2;
856 } else {
857 real_time t = real_clock::now();
858 parent->set_mtime(real_clock::to_timespec(t));
859 parent->set_ctime(real_clock::to_timespec(t));
860 rgw_fh->mtx.unlock(); /* !LOCKED */
861 }
862
863 get<1>(mkr) = rc;
864
865 return mkr;
866 } /* RGWLibFS::symlink */
867
7c673cae
FG
868 int RGWLibFS::getattr(RGWFileHandle* rgw_fh, struct stat* st)
869 {
870 switch(rgw_fh->fh.fh_type) {
871 case RGW_FS_TYPE_FILE:
872 {
873 if (rgw_fh->deleted())
874 return -ESTALE;
875 }
876 break;
877 default:
878 break;
879 };
494da23a 880 /* if rgw_fh is a directory, mtime will be advanced */
7c673cae
FG
881 return rgw_fh->stat(st);
882 } /* RGWLibFS::getattr */
883
884 int RGWLibFS::setattr(RGWFileHandle* rgw_fh, struct stat* st, uint32_t mask,
885 uint32_t flags)
886 {
887 int rc, rc2;
888 buffer::list ux_key, ux_attrs;
81eedcae
TL
889 buffer::list etag = rgw_fh->get_etag();
890 buffer::list acls = rgw_fh->get_acls();
7c673cae
FG
891
892 lock_guard guard(rgw_fh->mtx);
893
894 switch(rgw_fh->fh.fh_type) {
895 case RGW_FS_TYPE_FILE:
896 {
897 if (rgw_fh->deleted())
898 return -ESTALE;
899 }
900 break;
901 default:
902 break;
903 };
904
905 string obj_name{rgw_fh->relative_object_name()};
906
31f18b77
FG
907 if (rgw_fh->is_dir() &&
908 (likely(! rgw_fh->is_bucket()))) {
7c673cae
FG
909 obj_name += "/";
910 }
911
20effc67 912 RGWSetAttrsRequest req(cct, user->clone(), rgw_fh->bucket_name(), obj_name);
7c673cae
FG
913
914 rgw_fh->create_stat(st, mask);
915 rgw_fh->encode_attrs(ux_key, ux_attrs);
916
917 /* save attrs */
918 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
919 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
81eedcae
TL
920 req.emplace_attr(RGW_ATTR_ETAG, std::move(etag));
921 req.emplace_attr(RGW_ATTR_ACL, std::move(acls));
7c673cae 922
1e59de90 923 rc = g_rgwlib->get_fe()->execute_req(&req);
7c673cae
FG
924 rc2 = req.get_ret();
925
926 if (rc == -ENOENT) {
927 /* special case: materialize placeholder dir */
928 buffer::list bl;
20effc67 929 RGWPutObjRequest req(get_context(), user->clone(), rgw_fh->bucket_name(), obj_name, bl);
7c673cae
FG
930
931 rgw_fh->encode_attrs(ux_key, ux_attrs); /* because std::moved */
932
933 /* save attrs */
934 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
935 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
936
1e59de90 937 rc = g_rgwlib->get_fe()->execute_req(&req);
7c673cae
FG
938 rc2 = req.get_ret();
939 }
940
941 if ((rc != 0) || (rc2 != 0)) {
942 return -EIO;
943 }
944
945 rgw_fh->set_ctime(real_clock::to_timespec(real_clock::now()));
946
947 return 0;
948 } /* RGWLibFS::setattr */
949
f67539c2
TL
950 static inline std::string prefix_xattr_keystr(const rgw_xattrstr& key) {
951 std::string keystr;
952 keystr.reserve(sizeof(RGW_ATTR_META_PREFIX) + key.len);
522d829b 953 keystr += string{RGW_ATTR_META_PREFIX};
f67539c2
TL
954 keystr += string{key.val, key.len};
955 return keystr;
956 }
957
958 static inline std::string_view unprefix_xattr_keystr(const std::string& key)
959 {
960 std::string_view svk{key};
961 auto pos = svk.find(RGW_ATTR_META_PREFIX);
962 if (pos == std::string_view::npos) {
963 return std::string_view{""};
964 } else if (pos == 0) {
965 svk.remove_prefix(sizeof(RGW_ATTR_META_PREFIX)-1);
966 }
967 return svk;
968 }
969
970 int RGWLibFS::getxattrs(RGWFileHandle* rgw_fh, rgw_xattrlist *attrs,
971 rgw_getxattr_cb cb, void *cb_arg,
972 uint32_t flags)
973 {
974 /* cannot store on fs_root, should not on buckets? */
975 if ((rgw_fh->is_bucket()) ||
976 (rgw_fh->is_root())) {
977 return -EINVAL;
978 }
979
980 int rc, rc2, rc3;
981 string obj_name{rgw_fh->relative_object_name2()};
982
20effc67 983 RGWGetAttrsRequest req(cct, user->clone(), rgw_fh->bucket_name(), obj_name);
f67539c2
TL
984
985 for (uint32_t ix = 0; ix < attrs->xattr_cnt; ++ix) {
986 auto& xattr = attrs->xattrs[ix];
987
988 /* pass exposed attr keys as given, else prefix */
989 std::string k = is_exposed_attr(xattr.key)
990 ? std::string{xattr.key.val, xattr.key.len}
991 : prefix_xattr_keystr(xattr.key);
992
993 req.emplace_key(std::move(k));
994 }
995
996 if (ldlog_p1(get_context(), ceph_subsys_rgw, 15)) {
997 lsubdout(get_context(), rgw, 15)
998 << __func__
999 << " get keys for: "
1000 << rgw_fh->object_name()
1001 << " keys:"
1002 << dendl;
1003 for (const auto& attr: req.get_attrs()) {
1004 lsubdout(get_context(), rgw, 15)
1005 << "\tkey: " << attr.first << dendl;
1006 }
1007 }
1008
1e59de90 1009 rc = g_rgwlib->get_fe()->execute_req(&req);
f67539c2
TL
1010 rc2 = req.get_ret();
1011 rc3 = ((rc == 0) && (rc2 == 0)) ? 0 : -EIO;
1012
1013 /* call back w/xattr data */
1014 if (rc3 == 0) {
1015 const auto& attrs = req.get_attrs();
1016 for (const auto& attr : attrs) {
1017
1018 if (!attr.second.has_value())
1019 continue;
1020
1021 const auto& k = attr.first;
1022 const auto& v = attr.second.value();
1023
1024 /* return exposed attr keys as given, else unprefix --
1025 * yes, we could have memoized the exposed check, but
1026 * to be efficient it would need to be saved with
1027 * RGWGetAttrs::attrs, I think */
1028 std::string_view svk =
1029 is_exposed_attr(rgw_xattrstr{const_cast<char*>(k.c_str()),
1030 uint32_t(k.length())})
1031 ? k
1032 : unprefix_xattr_keystr(k);
1033
1034 /* skip entries not matching prefix */
1035 if (svk.empty())
1036 continue;
1037
1038 rgw_xattrstr xattr_k = { const_cast<char*>(svk.data()),
1039 uint32_t(svk.length())};
1040 rgw_xattrstr xattr_v =
1041 {const_cast<char*>(const_cast<buffer::list&>(v).c_str()),
1042 uint32_t(v.length())};
1043 rgw_xattr xattr = { xattr_k, xattr_v };
1044 rgw_xattrlist xattrlist = { &xattr, 1 };
1045
1046 cb(&xattrlist, cb_arg, RGW_GETXATTR_FLAG_NONE);
1047 }
1048 }
1049
1050 return rc3;
1051 } /* RGWLibFS::getxattrs */
1052
1053 int RGWLibFS::lsxattrs(
1054 RGWFileHandle* rgw_fh, rgw_xattrstr *filter_prefix, rgw_getxattr_cb cb,
1055 void *cb_arg, uint32_t flags)
1056 {
1057 /* cannot store on fs_root, should not on buckets? */
1058 if ((rgw_fh->is_bucket()) ||
1059 (rgw_fh->is_root())) {
1060 return -EINVAL;
1061 }
1062
1063 int rc, rc2, rc3;
1064 string obj_name{rgw_fh->relative_object_name2()};
1065
20effc67 1066 RGWGetAttrsRequest req(cct, user->clone(), rgw_fh->bucket_name(), obj_name);
f67539c2 1067
1e59de90 1068 rc = g_rgwlib->get_fe()->execute_req(&req);
f67539c2
TL
1069 rc2 = req.get_ret();
1070 rc3 = ((rc == 0) && (rc2 == 0)) ? 0 : -EIO;
1071
1072 /* call back w/xattr data--check for eof */
1073 if (rc3 == 0) {
1074 const auto& keys = req.get_attrs();
1075 for (const auto& k : keys) {
1076
1077 /* return exposed attr keys as given, else unprefix */
1078 std::string_view svk =
1079 is_exposed_attr(rgw_xattrstr{const_cast<char*>(k.first.c_str()),
1080 uint32_t(k.first.length())})
1081 ? k.first
1082 : unprefix_xattr_keystr(k.first);
1083
1084 /* skip entries not matching prefix */
1085 if (svk.empty())
1086 continue;
1087
1088 rgw_xattrstr xattr_k = { const_cast<char*>(svk.data()),
1089 uint32_t(svk.length())};
1090 rgw_xattrstr xattr_v = { nullptr, 0 };
1091 rgw_xattr xattr = { xattr_k, xattr_v };
1092 rgw_xattrlist xattrlist = { &xattr, 1 };
1093
1094 auto cbr = cb(&xattrlist, cb_arg, RGW_LSXATTR_FLAG_NONE);
1095 if (cbr & RGW_LSXATTR_FLAG_STOP)
1096 break;
1097 }
1098 }
1099
1100 return rc3;
1101 } /* RGWLibFS::lsxattrs */
1102
1103 int RGWLibFS::setxattrs(RGWFileHandle* rgw_fh, rgw_xattrlist *attrs,
1104 uint32_t flags)
1105 {
1106 /* cannot store on fs_root, should not on buckets? */
1107 if ((rgw_fh->is_bucket()) ||
1108 (rgw_fh->is_root())) {
1109 return -EINVAL;
1110 }
1111
1112 int rc, rc2;
1113 string obj_name{rgw_fh->relative_object_name2()};
1114
20effc67 1115 RGWSetAttrsRequest req(cct, user->clone(), rgw_fh->bucket_name(), obj_name);
f67539c2
TL
1116
1117 for (uint32_t ix = 0; ix < attrs->xattr_cnt; ++ix) {
1118 auto& xattr = attrs->xattrs[ix];
1119 buffer::list attr_bl;
1120 /* don't allow storing at RGW_ATTR_META_PREFIX */
1121 if (! (xattr.key.len > 0))
1122 continue;
1123
1124 /* reject lexical match with any exposed attr */
1125 if (is_exposed_attr(xattr.key))
1126 continue;
1127
1128 string k = prefix_xattr_keystr(xattr.key);
1129 attr_bl.append(xattr.val.val, xattr.val.len);
1130 req.emplace_attr(k.c_str(), std::move(attr_bl));
1131 }
1132
1133 /* don't send null requests */
1134 if (! (req.get_attrs().size() > 0)) {
1135 return -EINVAL;
1136 }
1137
1e59de90 1138 rc = g_rgwlib->get_fe()->execute_req(&req);
f67539c2
TL
1139 rc2 = req.get_ret();
1140
1141 return (((rc == 0) && (rc2 == 0)) ? 0 : -EIO);
1142
1143 } /* RGWLibFS::setxattrs */
1144
1145 int RGWLibFS::rmxattrs(RGWFileHandle* rgw_fh, rgw_xattrlist* attrs,
1146 uint32_t flags)
1147 {
1148 /* cannot store on fs_root, should not on buckets? */
1149 if ((rgw_fh->is_bucket()) ||
1150 (rgw_fh->is_root())) {
1151 return -EINVAL;
1152 }
1153
1154 int rc, rc2;
1155 string obj_name{rgw_fh->relative_object_name2()};
1156
20effc67 1157 RGWRMAttrsRequest req(cct, user->clone(), rgw_fh->bucket_name(), obj_name);
f67539c2
TL
1158
1159 for (uint32_t ix = 0; ix < attrs->xattr_cnt; ++ix) {
1160 auto& xattr = attrs->xattrs[ix];
1161 /* don't allow storing at RGW_ATTR_META_PREFIX */
1162 if (! (xattr.key.len > 0)) {
1163 continue;
1164 }
1165 string k = prefix_xattr_keystr(xattr.key);
1166 req.emplace_key(std::move(k));
1167 }
1168
1169 /* don't send null requests */
1170 if (! (req.get_attrs().size() > 0)) {
1171 return -EINVAL;
1172 }
1173
1e59de90 1174 rc = g_rgwlib->get_fe()->execute_req(&req);
f67539c2
TL
1175 rc2 = req.get_ret();
1176
1177 return (((rc == 0) && (rc2 == 0)) ? 0 : -EIO);
1178
1179 } /* RGWLibFS::rmxattrs */
1180
1181 /* called with rgw_fh->mtx held */
3efd9988 1182 void RGWLibFS::update_fh(RGWFileHandle *rgw_fh)
224ce89b
WB
1183 {
1184 int rc, rc2;
1185 string obj_name{rgw_fh->relative_object_name()};
1186 buffer::list ux_key, ux_attrs;
1187
1188 if (rgw_fh->is_dir() &&
1189 (likely(! rgw_fh->is_bucket()))) {
1190 obj_name += "/";
1191 }
1192
1193 lsubdout(get_context(), rgw, 17)
1194 << __func__
3efd9988 1195 << " update old versioned fh : " << obj_name
224ce89b
WB
1196 << dendl;
1197
20effc67 1198 RGWSetAttrsRequest req(cct, user->clone(), rgw_fh->bucket_name(), obj_name);
224ce89b 1199
1d09f67e 1200 rgw_fh->encode_attrs(ux_key, ux_attrs, false);
224ce89b 1201
224ce89b 1202 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
3efd9988 1203 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
224ce89b 1204
1e59de90 1205 rc = g_rgwlib->get_fe()->execute_req(&req);
224ce89b
WB
1206 rc2 = req.get_ret();
1207
1208 if ((rc != 0) || (rc2 != 0)) {
1209 lsubdout(get_context(), rgw, 17)
1210 << __func__
3efd9988 1211 << " update fh failed : " << obj_name
224ce89b
WB
1212 << dendl;
1213 }
3efd9988 1214 } /* RGWLibFS::update_fh */
224ce89b 1215
7c673cae
FG
1216 void RGWLibFS::close()
1217 {
1218 state.flags |= FLAG_CLOSED;
1219
1220 class ObjUnref
1221 {
1222 RGWLibFS* fs;
1223 public:
11fdf7f2 1224 explicit ObjUnref(RGWLibFS* _fs) : fs(_fs) {}
7c673cae
FG
1225 void operator()(RGWFileHandle* fh) const {
1226 lsubdout(fs->get_context(), rgw, 5)
f67539c2 1227 << __PRETTY_FUNCTION__
7c673cae
FG
1228 << fh->name
1229 << " before ObjUnref refs=" << fh->get_refcnt()
1230 << dendl;
31f18b77 1231 fs->unref(fh);
7c673cae
FG
1232 }
1233 };
1234
1235 /* force cache drain, forces objects to evict */
1236 fh_cache.drain(ObjUnref(this),
1237 RGWFileHandle::FHCache::FLAG_LOCK);
1e59de90 1238 g_rgwlib->get_fe()->get_process()->unregister_fs(this);
7c673cae
FG
1239 rele();
1240 } /* RGWLibFS::close */
1241
494da23a
TL
1242 inline std::ostream& operator<<(std::ostream &os, fh_key const &fhk) {
1243 os << "<fh_key: bucket=";
1244 os << fhk.fh_hk.bucket;
1245 os << "; object=";
1246 os << fhk.fh_hk.object;
1247 os << ">";
1248 return os;
1249 }
1250
7c673cae
FG
1251 inline std::ostream& operator<<(std::ostream &os, struct timespec const &ts) {
1252 os << "<timespec: tv_sec=";
1253 os << ts.tv_sec;
1254 os << "; tv_nsec=";
1255 os << ts.tv_nsec;
1256 os << ">";
1257 return os;
1258 }
1259
1260 std::ostream& operator<<(std::ostream &os, RGWLibFS::event const &ev) {
1261 os << "<event:";
1262 switch (ev.t) {
1263 case RGWLibFS::event::type::READDIR:
1264 os << "type=READDIR;";
1265 break;
1266 default:
1267 os << "type=UNKNOWN;";
1268 break;
1269 };
1270 os << "fid=" << ev.fhk.fh_hk.bucket << ":" << ev.fhk.fh_hk.object
1271 << ";ts=" << ev.ts << ">";
1272 return os;
1273 }
1274
1275 void RGWLibFS::gc()
1276 {
1277 using std::get;
1278 using directory = RGWFileHandle::directory;
1279
1280 /* dirent invalidate timeout--basically, the upper-bound on
1281 * inconsistency with the S3 namespace */
1282 auto expire_s
1283 = get_context()->_conf->rgw_nfs_namespace_expire_secs;
1284
1285 /* max events to gc in one cycle */
c07f9fc5 1286 uint32_t max_ev = get_context()->_conf->rgw_nfs_max_gc;
7c673cae
FG
1287
1288 struct timespec now, expire_ts;
1289 event_vector ve;
1290 bool stop = false;
1291 std::deque<event> &events = state.events;
1292
1293 do {
1294 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now);
1295 lsubdout(get_context(), rgw, 15)
1296 << "GC: top of expire loop"
1297 << " now=" << now
1298 << " expire_s=" << expire_s
1299 << dendl;
1300 {
1301 lock_guard guard(state.mtx); /* LOCKED */
494da23a
TL
1302 lsubdout(get_context(), rgw, 15)
1303 << "GC: processing"
1304 << " count=" << events.size()
1305 << " events"
1306 << dendl;
1307 /* just return if no events */
7c673cae
FG
1308 if (events.empty()) {
1309 return;
1310 }
1311 uint32_t _max_ev =
1312 (events.size() < 500) ? max_ev : (events.size() / 4);
1313 for (uint32_t ix = 0; (ix < _max_ev) && (events.size() > 0); ++ix) {
1314 event& ev = events.front();
1315 expire_ts = ev.ts;
1316 expire_ts.tv_sec += expire_s;
1317 if (expire_ts > now) {
1318 stop = true;
1319 break;
1320 }
1321 ve.push_back(ev);
1322 events.pop_front();
1323 }
1324 } /* anon */
1325 /* !LOCKED */
1326 for (auto& ev : ve) {
1327 lsubdout(get_context(), rgw, 15)
1328 << "try-expire ev: " << ev << dendl;
1329 if (likely(ev.t == event::type::READDIR)) {
1330 RGWFileHandle* rgw_fh = lookup_handle(ev.fhk.fh_hk);
1331 lsubdout(get_context(), rgw, 15)
1332 << "ev rgw_fh: " << rgw_fh << dendl;
1333 if (rgw_fh) {
1334 RGWFileHandle::directory* d;
1335 if (unlikely(! rgw_fh->is_dir())) {
1336 lsubdout(get_context(), rgw, 0)
1337 << __func__
1338 << " BUG non-directory found with READDIR event "
1339 << "(" << rgw_fh->bucket_name() << ","
1340 << rgw_fh->object_name() << ")"
1341 << dendl;
1342 goto rele;
1343 }
1344 /* maybe clear state */
1345 d = get<directory>(&rgw_fh->variant_type);
1346 if (d) {
1347 struct timespec ev_ts = ev.ts;
1348 lock_guard guard(rgw_fh->mtx);
1349 struct timespec d_last_readdir = d->last_readdir;
1350 if (unlikely(ev_ts < d_last_readdir)) {
1351 /* readdir cycle in progress, don't invalidate */
1352 lsubdout(get_context(), rgw, 15)
1353 << "GC: delay expiration for "
1354 << rgw_fh->object_name()
1355 << " ev.ts=" << ev_ts
1356 << " last_readdir=" << d_last_readdir
1357 << dendl;
1358 continue;
1359 } else {
1360 lsubdout(get_context(), rgw, 15)
1361 << "GC: expiring "
1362 << rgw_fh->object_name()
1363 << dendl;
1364 rgw_fh->clear_state();
1365 rgw_fh->invalidate();
1366 }
1367 }
1368 rele:
1369 unref(rgw_fh);
1370 } /* rgw_fh */
1371 } /* event::type::READDIR */
1372 } /* ev */
1373 ve.clear();
1374 } while (! (stop || shutdown));
1375 } /* RGWLibFS::gc */
1376
1377 std::ostream& operator<<(std::ostream &os,
1378 RGWFileHandle const &rgw_fh)
1379 {
1380 const auto& fhk = rgw_fh.get_key();
1381 const auto& fh = const_cast<RGWFileHandle&>(rgw_fh).get_fh();
1382 os << "<RGWFileHandle:";
1383 os << "addr=" << &rgw_fh << ";";
1384 switch (fh->fh_type) {
1385 case RGW_FS_TYPE_DIRECTORY:
1386 os << "type=DIRECTORY;";
1387 break;
1388 case RGW_FS_TYPE_FILE:
1389 os << "type=FILE;";
1390 break;
1391 default:
1392 os << "type=UNKNOWN;";
1393 break;
1394 };
1395 os << "fid=" << fhk.fh_hk.bucket << ":" << fhk.fh_hk.object << ";";
1396 os << "name=" << rgw_fh.object_name() << ";";
1397 os << "refcnt=" << rgw_fh.get_refcnt() << ";";
1398 os << ">";
1399 return os;
1400 }
1401
1402 RGWFileHandle::~RGWFileHandle() {
28e407b8
AA
1403 /* !recycle case, handle may STILL be in handle table, BUT
1404 * the partition lock is not held in this path */
1405 if (fh_hook.is_linked()) {
1406 fs->fh_cache.remove(fh.fh_hk.object, this, FHCache::FLAG_LOCK);
1407 }
7c673cae 1408 /* cond-unref parent */
3efd9988 1409 if (parent && (! parent->is_mount())) {
7c673cae
FG
1410 /* safe because if parent->unref causes its deletion,
1411 * there are a) by refcnt, no other objects/paths pointing
1412 * to it and b) by the semantics of valid iteration of
1413 * fh_lru (observed, e.g., by cohort_lru<T,...>::drain())
1414 * no unsafe iterators reaching it either--n.b., this constraint
1415 * is binding oncode which may in future attempt to e.g.,
1416 * cause the eviction of objects in LRU order */
31f18b77 1417 (void) get_fs()->unref(parent);
7c673cae
FG
1418 }
1419 }
1420
494da23a
TL
1421 fh_key RGWFileHandle::make_fhk(const std::string& name)
1422 {
1423 std::string tenant = get_fs()->get_user()->user_id.to_str();
1424 if (depth == 0) {
1425 /* S3 bucket -- assert mount-at-bucket case reaches here */
1426 return fh_key(name, name, tenant);
1427 } else {
1428 std::string key_name = make_key_name(name.c_str());
1429 return fh_key(fhk.fh_hk.bucket, key_name.c_str(), tenant);
1430 }
1431 }
1432
7c673cae 1433 void RGWFileHandle::encode_attrs(ceph::buffer::list& ux_key1,
1d09f67e
TL
1434 ceph::buffer::list& ux_attrs1,
1435 bool inc_ov)
7c673cae 1436 {
11fdf7f2 1437 using ceph::encode;
7c673cae 1438 fh_key fhk(this->fh.fh_hk);
11fdf7f2 1439 encode(fhk, ux_key1);
1d09f67e
TL
1440 bool need_ondisk_version =
1441 (fh.fh_type == RGW_FS_TYPE_FILE ||
1442 fh.fh_type == RGW_FS_TYPE_SYMBOLIC_LINK);
1443 if (need_ondisk_version &&
1444 file_ondisk_version < 0) {
1445 file_ondisk_version = 0;
1446 }
11fdf7f2 1447 encode(*this, ux_attrs1);
1d09f67e
TL
1448 if (need_ondisk_version && inc_ov) {
1449 file_ondisk_version++;
1450 }
7c673cae
FG
1451 } /* RGWFileHandle::encode_attrs */
1452
3efd9988
FG
1453 DecodeAttrsResult RGWFileHandle::decode_attrs(const ceph::buffer::list* ux_key1,
1454 const ceph::buffer::list* ux_attrs1)
7c673cae 1455 {
11fdf7f2 1456 using ceph::decode;
3efd9988 1457 DecodeAttrsResult dar { false, false };
7c673cae 1458 fh_key fhk;
11fdf7f2
TL
1459 auto bl_iter_key1 = ux_key1->cbegin();
1460 decode(fhk, bl_iter_key1);
494da23a 1461 get<0>(dar) = true;
7c673cae 1462
1d09f67e
TL
1463 // decode to a temporary file handle which may not be
1464 // copied to the current file handle if its file_ondisk_version
1465 // is not newer
1466 RGWFileHandle tmp_fh(fs);
1467 tmp_fh.fh.fh_type = fh.fh_type;
11fdf7f2 1468 auto bl_iter_unix1 = ux_attrs1->cbegin();
1d09f67e
TL
1469 decode(tmp_fh, bl_iter_unix1);
1470
1471 fh.fh_type = tmp_fh.fh.fh_type;
1472 // for file handles that represent files and whose file_ondisk_version
1473 // is newer, no updates are need, otherwise, go updating the current
1474 // file handle
1475 if (!((fh.fh_type == RGW_FS_TYPE_FILE ||
1476 fh.fh_type == RGW_FS_TYPE_SYMBOLIC_LINK) &&
1477 file_ondisk_version >= tmp_fh.file_ondisk_version)) {
1478 // make sure the following "encode" always encode a greater version
1479 file_ondisk_version = tmp_fh.file_ondisk_version + 1;
1480 state.dev = tmp_fh.state.dev;
1481 state.size = tmp_fh.state.size;
1482 state.nlink = tmp_fh.state.nlink;
1483 state.owner_uid = tmp_fh.state.owner_uid;
1484 state.owner_gid = tmp_fh.state.owner_gid;
1485 state.unix_mode = tmp_fh.state.unix_mode;
1486 state.ctime = tmp_fh.state.ctime;
1487 state.mtime = tmp_fh.state.mtime;
1488 state.atime = tmp_fh.state.atime;
1489 state.version = tmp_fh.state.version;
1490 }
1491
3efd9988
FG
1492 if (this->state.version < 2) {
1493 get<1>(dar) = true;
1494 }
224ce89b 1495
3efd9988 1496 return dar;
7c673cae
FG
1497 } /* RGWFileHandle::decode_attrs */
1498
f91f0fd5 1499 bool RGWFileHandle::reclaim(const cohort::lru::ObjectFactory* newobj_fac) {
7c673cae
FG
1500 lsubdout(fs->get_context(), rgw, 17)
1501 << __func__ << " " << *this
1502 << dendl;
f91f0fd5
TL
1503 auto factory = dynamic_cast<const RGWFileHandle::Factory*>(newobj_fac);
1504 if (factory == nullptr) {
1505 return false;
1506 }
1507 /* make sure the reclaiming object is the same partiton with newobject factory,
1508 * then we can recycle the object, and replace with newobject */
1509 if (!fs->fh_cache.is_same_partition(factory->fhk.fh_hk.object, fh.fh_hk.object)) {
1510 return false;
1511 }
b32b8144 1512 /* in the non-delete case, handle may still be in handle table */
7c673cae 1513 if (fh_hook.is_linked()) {
b32b8144
FG
1514 /* in this case, we are being called from a context which holds
1515 * the partition lock */
1516 fs->fh_cache.remove(fh.fh_hk.object, this, FHCache::FLAG_NONE);
7c673cae
FG
1517 }
1518 return true;
1519 } /* RGWFileHandle::reclaim */
1520
1521 bool RGWFileHandle::has_children() const
1522 {
1523 if (unlikely(! is_dir()))
1524 return false;
1525
f67539c2 1526 RGWRMdirCheck req(fs->get_context(),
1e59de90 1527 g_rgwlib->get_driver()->get_user(fs->get_user()->user_id),
f67539c2 1528 this);
1e59de90 1529 int rc = g_rgwlib->get_fe()->execute_req(&req);
7c673cae
FG
1530 if (! rc) {
1531 return req.valid && req.has_children;
1532 }
1533
1534 return false;
1535 }
1536
3efd9988
FG
1537 std::ostream& operator<<(std::ostream &os,
1538 RGWFileHandle::readdir_offset const &offset)
1539 {
1540 using boost::get;
1541 if (unlikely(!! get<uint64_t*>(&offset))) {
1542 uint64_t* ioff = get<uint64_t*>(offset);
1543 os << *ioff;
1544 }
1545 else
1546 os << get<const char*>(offset);
1547 return os;
1548 }
1549
1550 int RGWFileHandle::readdir(rgw_readdir_cb rcb, void *cb_arg,
1551 readdir_offset offset,
7c673cae
FG
1552 bool *eof, uint32_t flags)
1553 {
1554 using event = RGWLibFS::event;
3efd9988 1555 using boost::get;
7c673cae
FG
1556 int rc = 0;
1557 struct timespec now;
1558 CephContext* cct = fs->get_context();
1559
494da23a
TL
1560 lsubdout(cct, rgw, 10)
1561 << __func__ << " readdir called on "
1562 << object_name()
1563 << dendl;
1564
7c673cae
FG
1565 directory* d = get<directory>(&variant_type);
1566 if (d) {
1567 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */
1568 lock_guard guard(mtx);
1569 d->last_readdir = now;
1570 }
1571
3efd9988 1572 bool initial_off;
494da23a
TL
1573 char* mk{nullptr};
1574
3efd9988 1575 if (likely(!! get<const char*>(&offset))) {
494da23a
TL
1576 mk = const_cast<char*>(get<const char*>(offset));
1577 initial_off = !mk;
3efd9988
FG
1578 } else {
1579 initial_off = (*get<uint64_t*>(offset) == 0);
1580 }
1581
7c673cae 1582 if (is_root()) {
1e59de90 1583 RGWListBucketsRequest req(cct, g_rgwlib->get_driver()->get_user(fs->get_user()->user_id),
f67539c2 1584 this, rcb, cb_arg, offset);
1e59de90 1585 rc = g_rgwlib->get_fe()->execute_req(&req);
7c673cae
FG
1586 if (! rc) {
1587 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */
1588 lock_guard guard(mtx);
1589 state.atime = now;
3efd9988 1590 if (initial_off)
7c673cae
FG
1591 set_nlink(2);
1592 inc_nlink(req.d_count);
1593 *eof = req.eof();
7c673cae
FG
1594 }
1595 } else {
1e59de90 1596 RGWReaddirRequest req(cct, g_rgwlib->get_driver()->get_user(fs->get_user()->user_id),
f67539c2 1597 this, rcb, cb_arg, offset);
1e59de90 1598 rc = g_rgwlib->get_fe()->execute_req(&req);
7c673cae
FG
1599 if (! rc) {
1600 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */
1601 lock_guard guard(mtx);
1602 state.atime = now;
3efd9988 1603 if (initial_off)
7c673cae
FG
1604 set_nlink(2);
1605 inc_nlink(req.d_count);
1606 *eof = req.eof();
7c673cae
FG
1607 }
1608 }
1609
494da23a
TL
1610 event ev(event::type::READDIR, get_key(), state.atime);
1611 lock_guard sguard(fs->state.mtx);
1612 fs->state.push_event(ev);
1613
7c673cae
FG
1614 lsubdout(fs->get_context(), rgw, 15)
1615 << __func__
1616 << " final link count=" << state.nlink
1617 << dendl;
1618
1619 return rc;
1620 } /* RGWFileHandle::readdir */
1621
1622 int RGWFileHandle::write(uint64_t off, size_t len, size_t *bytes_written,
1623 void *buffer)
1624 {
1625 using std::get;
1626 using WriteCompletion = RGWLibFS::WriteCompletion;
1627
1628 lock_guard guard(mtx);
1629
1630 int rc = 0;
1631
1632 file* f = get<file>(&variant_type);
1633 if (! f)
1634 return -EISDIR;
1635
1636 if (deleted()) {
1637 lsubdout(fs->get_context(), rgw, 5)
1638 << __func__
1639 << " write attempted on deleted object "
1640 << this->object_name()
1641 << dendl;
1642 /* zap write transaction, if any */
1643 if (f->write_req) {
1644 delete f->write_req;
1645 f->write_req = nullptr;
1646 }
1647 return -ESTALE;
1648 }
1649
1650 if (! f->write_req) {
1651 /* guard--we do not support (e.g., COW-backed) partial writes */
1652 if (off != 0) {
1653 lsubdout(fs->get_context(), rgw, 5)
1654 << __func__
1655 << " " << object_name()
1656 << " non-0 initial write position " << off
11fdf7f2 1657 << " (mounting with -o sync required)"
7c673cae
FG
1658 << dendl;
1659 return -EIO;
1660 }
1661
1e59de90
TL
1662 const RGWProcessEnv& penv = g_rgwlib->get_fe()->get_process()->get_env();
1663
7c673cae
FG
1664 /* start */
1665 std::string object_name = relative_object_name();
1666 f->write_req =
1e59de90
TL
1667 new RGWWriteRequest(g_rgwlib->get_driver(), penv,
1668 g_rgwlib->get_driver()->get_user(fs->get_user()->user_id),
f67539c2 1669 this, bucket_name(), object_name);
1e59de90 1670 rc = g_rgwlib->get_fe()->start_req(f->write_req);
7c673cae
FG
1671 if (rc < 0) {
1672 lsubdout(fs->get_context(), rgw, 5)
1673 << __func__
1674 << this->object_name()
1675 << " write start failed " << off
1676 << " (" << rc << ")"
1677 << dendl;
1678 /* zap failed write transaction */
1679 delete f->write_req;
1680 f->write_req = nullptr;
1681 return -EIO;
1682 } else {
1683 if (stateless_open()) {
1684 /* start write timer */
1685 f->write_req->timer_id =
1686 RGWLibFS::write_timer.add_event(
1687 std::chrono::seconds(RGWLibFS::write_completion_interval_s),
1688 WriteCompletion(*this));
1689 }
1690 }
1691 }
1692
3efd9988
FG
1693 int overlap = 0;
1694 if ((static_cast<off_t>(off) < f->write_req->real_ofs) &&
1695 ((f->write_req->real_ofs - off) <= len)) {
1696 overlap = f->write_req->real_ofs - off;
1697 off = f->write_req->real_ofs;
1698 buffer = static_cast<char*>(buffer) + overlap;
1699 len -= overlap;
1700 }
1701
7c673cae
FG
1702 buffer::list bl;
1703 /* XXXX */
1704#if 0
1705 bl.push_back(
1706 buffer::create_static(len, static_cast<char*>(buffer)));
1707#else
1708 bl.push_back(
1709 buffer::copy(static_cast<char*>(buffer), len));
1710#endif
1711
1712 f->write_req->put_data(off, bl);
1713 rc = f->write_req->exec_continue();
1714
1715 if (rc == 0) {
1716 size_t min_size = off + len;
1717 if (min_size > get_size())
1718 set_size(min_size);
1719 if (stateless_open()) {
1720 /* bump write timer */
1721 RGWLibFS::write_timer.adjust_event(
1722 f->write_req->timer_id, std::chrono::seconds(10));
1723 }
1724 } else {
1725 /* continuation failed (e.g., non-contiguous write position) */
1726 lsubdout(fs->get_context(), rgw, 5)
1727 << __func__
1728 << object_name()
1729 << " failed write at position " << off
1730 << " (fails write transaction) "
1731 << dendl;
1732 /* zap failed write transaction */
1733 delete f->write_req;
1734 f->write_req = nullptr;
1735 rc = -EIO;
1736 }
1737
3efd9988 1738 *bytes_written = (rc == 0) ? (len + overlap) : 0;
7c673cae
FG
1739 return rc;
1740 } /* RGWFileHandle::write */
1741
1742 int RGWFileHandle::write_finish(uint32_t flags)
1743 {
1744 unique_lock guard{mtx, std::defer_lock};
1745 int rc = 0;
1746
1747 if (! (flags & FLAG_LOCKED)) {
1748 guard.lock();
1749 }
1750
1751 file* f = get<file>(&variant_type);
1752 if (f && (f->write_req)) {
1753 lsubdout(fs->get_context(), rgw, 10)
1754 << __func__
1755 << " finishing write trans on " << object_name()
1756 << dendl;
1e59de90 1757 rc = g_rgwlib->get_fe()->finish_req(f->write_req);
7c673cae
FG
1758 if (! rc) {
1759 rc = f->write_req->get_ret();
1760 }
1761 delete f->write_req;
1762 f->write_req = nullptr;
1763 }
1764
1765 return rc;
1766 } /* RGWFileHandle::write_finish */
1767
1768 int RGWFileHandle::close()
1769 {
1770 lock_guard guard(mtx);
1771
1772 int rc = write_finish(FLAG_LOCKED);
1773
1774 flags &= ~FLAG_OPEN;
31f18b77
FG
1775 flags &= ~FLAG_STATELESS_OPEN;
1776
7c673cae
FG
1777 return rc;
1778 } /* RGWFileHandle::close */
1779
1780 RGWFileHandle::file::~file()
1781 {
1782 delete write_req;
1783 }
1784
1785 void RGWFileHandle::clear_state()
1786 {
1787 directory* d = get<directory>(&variant_type);
1788 if (d) {
1789 state.nlink = 2;
1790 d->last_marker = rgw_obj_key{};
1791 }
1792 }
1793
494da23a
TL
1794 void RGWFileHandle::advance_mtime(uint32_t flags) {
1795 /* intended for use on directories, fast-forward mtime so as to
1796 * ensure a new, higher value for the change attribute */
1797 unique_lock uniq(mtx, std::defer_lock);
1798 if (likely(! (flags & RGWFileHandle::FLAG_LOCKED))) {
1799 uniq.lock();
1800 }
1801
1802 /* advance mtime only if stored mtime is older than the
1803 * configured namespace expiration */
1804 auto now = real_clock::now();
1805 auto cmptime = state.mtime;
1806 cmptime.tv_sec +=
1807 fs->get_context()->_conf->rgw_nfs_namespace_expire_secs;
1808 if (cmptime < real_clock::to_timespec(now)) {
1809 /* sets ctime as well as mtime, to avoid masking updates should
1810 * ctime inexplicably hold a higher value */
1811 set_times(now);
1812 }
1813 }
1814
7c673cae
FG
1815 void RGWFileHandle::invalidate() {
1816 RGWLibFS *fs = get_fs();
1817 if (fs->invalidate_cb) {
1818 fs->invalidate_cb(fs->invalidate_arg, get_key().fh_hk);
1819 }
1820 }
1821
1822 int RGWWriteRequest::exec_start() {
1e59de90 1823 req_state* state = get_state();
f67539c2
TL
1824
1825 /* Object needs a bucket from this point */
1826 state->object->set_bucket(state->bucket.get());
7c673cae 1827
224ce89b 1828 auto compression_type =
1e59de90 1829 get_driver()->get_compression_type(state->bucket->get_placement_rule());
224ce89b 1830
7c673cae 1831 /* not obviously supportable */
11fdf7f2
TL
1832 ceph_assert(! dlo_manifest);
1833 ceph_assert(! slo_info);
7c673cae
FG
1834
1835 perfcounter->inc(l_rgw_put);
1836 op_ret = -EINVAL;
1837
f67539c2
TL
1838 if (state->object->empty()) {
1839 ldout(state->cct, 0) << __func__ << " called on empty object" << dendl;
7c673cae
FG
1840 goto done;
1841 }
1842
f67539c2 1843 op_ret = get_params(null_yield);
7c673cae
FG
1844 if (op_ret < 0)
1845 goto done;
1846
f67539c2 1847 op_ret = get_system_versioning_params(state, &olh_epoch, &version_id);
7c673cae
FG
1848 if (op_ret < 0) {
1849 goto done;
1850 }
1851
1852 /* user-supplied MD5 check skipped (not supplied) */
1853 /* early quota check skipped--we don't have size yet */
1854 /* skipping user-supplied etag--we might have one in future, but
1855 * like data it and other attrs would arrive after open */
11fdf7f2 1856
f67539c2 1857 aio.emplace(state->cct->_conf->rgw_put_obj_min_window_size);
11fdf7f2 1858
f67539c2 1859 if (state->bucket->versioning_enabled()) {
11fdf7f2 1860 if (!version_id.empty()) {
f67539c2 1861 state->object->set_instance(version_id);
11fdf7f2 1862 } else {
f67539c2
TL
1863 state->object->gen_rand_obj_instance_name();
1864 version_id = state->object->get_instance();
11fdf7f2
TL
1865 }
1866 }
1e59de90
TL
1867 processor = get_driver()->get_atomic_writer(this, state->yield, state->object.get(),
1868 state->bucket_owner.get_id(),
20effc67 1869 &state->dest_placement, 0, state->req_id);
11fdf7f2 1870
f67539c2 1871 op_ret = processor->prepare(state->yield);
224ce89b 1872 if (op_ret < 0) {
f67539c2 1873 ldout(state->cct, 20) << "processor->prepare() returned ret=" << op_ret
224ce89b
WB
1874 << dendl;
1875 goto done;
1876 }
11fdf7f2 1877 filter = &*processor;
224ce89b 1878 if (compression_type != "none") {
f67539c2 1879 plugin = Compressor::create(state->cct, compression_type);
11fdf7f2 1880 if (! plugin) {
f67539c2 1881 ldout(state->cct, 1) << "Cannot load plugin for rgw_compression_type "
11fdf7f2
TL
1882 << compression_type << dendl;
1883 } else {
f67539c2 1884 compressor.emplace(state->cct, plugin, filter);
11fdf7f2
TL
1885 filter = &*compressor;
1886 }
224ce89b 1887 }
7c673cae
FG
1888
1889 done:
1890 return op_ret;
1891 } /* exec_start */
1892
1893 int RGWWriteRequest::exec_continue()
1894 {
1e59de90 1895 req_state* state = get_state();
7c673cae
FG
1896 op_ret = 0;
1897
1898 /* check guards (e.g., contig write) */
11fdf7f2 1899 if (eio) {
f67539c2 1900 ldout(state->cct, 5)
11fdf7f2
TL
1901 << " chunks arrived in wrong order"
1902 << " (mounting with -o sync required)"
1903 << dendl;
1904 return -EIO;
1905 }
1906
1e59de90 1907 op_ret = state->bucket->check_quota(this, quota, real_ofs, null_yield, true);
11fdf7f2
TL
1908 /* max_size exceed */
1909 if (op_ret < 0)
7c673cae
FG
1910 return -EIO;
1911
1912 size_t len = data.length();
1913 if (! len)
1914 return 0;
1915
11fdf7f2
TL
1916 hash.Update((const unsigned char *)data.c_str(), data.length());
1917 op_ret = filter->process(std::move(data), ofs);
7c673cae 1918 if (op_ret < 0) {
11fdf7f2 1919 goto done;
7c673cae
FG
1920 }
1921 bytes_written += len;
1922
1923 done:
1924 return op_ret;
1925 } /* exec_continue */
1926
1927 int RGWWriteRequest::exec_finish()
1928 {
1929 buffer::list bl, aclbl, ux_key, ux_attrs;
1930 map<string, string>::iterator iter;
1931 char calc_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1];
1932 unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE];
1e59de90 1933 req_state* state = get_state();
7c673cae
FG
1934
1935 size_t osize = rgw_fh->get_size();
1936 struct timespec octime = rgw_fh->get_ctime();
1937 struct timespec omtime = rgw_fh->get_mtime();
1938 real_time appx_t = real_clock::now();
1939
f67539c2
TL
1940 state->obj_size = bytes_written;
1941 perfcounter->inc(l_rgw_put_b, state->obj_size);
7c673cae 1942
11fdf7f2 1943 // flush data in filters
f67539c2 1944 op_ret = filter->process({}, state->obj_size);
11fdf7f2
TL
1945 if (op_ret < 0) {
1946 goto done;
1947 }
1948
1e59de90 1949 op_ret = state->bucket->check_quota(this, quota, state->obj_size, null_yield, true);
11fdf7f2 1950 /* max_size exceed */
7c673cae
FG
1951 if (op_ret < 0) {
1952 goto done;
1953 }
1954
1955 hash.Final(m);
1956
224ce89b
WB
1957 if (compressor && compressor->is_compressed()) {
1958 bufferlist tmp;
1959 RGWCompressionInfo cs_info;
1960 cs_info.compression_type = plugin->get_type_name();
f67539c2 1961 cs_info.orig_size = state->obj_size;
224ce89b 1962 cs_info.blocks = std::move(compressor->get_compression_blocks());
11fdf7f2 1963 encode(cs_info, tmp);
224ce89b 1964 attrs[RGW_ATTR_COMPRESSION] = tmp;
b3b6e05e 1965 ldpp_dout(this, 20) << "storing " << RGW_ATTR_COMPRESSION
224ce89b
WB
1966 << " with type=" << cs_info.compression_type
1967 << ", orig_size=" << cs_info.orig_size
1968 << ", blocks=" << cs_info.blocks.size() << dendl;
1969 }
1970
7c673cae
FG
1971 buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5);
1972 etag = calc_md5;
1973
1974 bl.append(etag.c_str(), etag.size() + 1);
1975 emplace_attr(RGW_ATTR_ETAG, std::move(bl));
1976
1977 policy.encode(aclbl);
1978 emplace_attr(RGW_ATTR_ACL, std::move(aclbl));
1979
1980 /* unix attrs */
1981 rgw_fh->set_mtime(real_clock::to_timespec(appx_t));
1982 rgw_fh->set_ctime(real_clock::to_timespec(appx_t));
1983 rgw_fh->set_size(bytes_written);
1984 rgw_fh->encode_attrs(ux_key, ux_attrs);
1985
1986 emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
1987 emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
1988
f67539c2 1989 for (iter = state->generic_attrs.begin(); iter != state->generic_attrs.end();
7c673cae
FG
1990 ++iter) {
1991 buffer::list& attrbl = attrs[iter->first];
1992 const string& val = iter->second;
1993 attrbl.append(val.c_str(), val.size() + 1);
1994 }
1995
b3b6e05e 1996 op_ret = rgw_get_request_metadata(this, state->cct, state->info, attrs);
3efd9988
FG
1997 if (op_ret < 0) {
1998 goto done;
1999 }
7c673cae
FG
2000 encode_delete_at_attr(delete_at, attrs);
2001
2002 /* Add a custom metadata to expose the information whether an object
2003 * is an SLO or not. Appending the attribute must be performed AFTER
2004 * processing any input from user in order to prohibit overwriting. */
2005 if (unlikely(!! slo_info)) {
2006 buffer::list slo_userindicator_bl;
11fdf7f2
TL
2007 using ceph::encode;
2008 encode("True", slo_userindicator_bl);
7c673cae
FG
2009 emplace_attr(RGW_ATTR_SLO_UINDICATOR, std::move(slo_userindicator_bl));
2010 }
2011
f67539c2 2012 op_ret = processor->complete(state->obj_size, etag, &mtime, real_time(), attrs,
7c673cae 2013 (delete_at ? *delete_at : real_time()),
9f95a23c 2014 if_match, if_nomatch, nullptr, nullptr, nullptr,
f67539c2 2015 state->yield);
7c673cae
FG
2016 if (op_ret != 0) {
2017 /* revert attr updates */
2018 rgw_fh->set_mtime(omtime);
2019 rgw_fh->set_ctime(octime);
2020 rgw_fh->set_size(osize);
2021 }
2022
2023 done:
f67539c2 2024 perfcounter->tinc(l_rgw_put_lat, state->time_elapsed());
7c673cae
FG
2025 return op_ret;
2026 } /* exec_finish */
2027
2028} /* namespace rgw */
2029
2030/* librgw */
2031extern "C" {
2032
2033void rgwfile_version(int *major, int *minor, int *extra)
2034{
2035 if (major)
2036 *major = LIBRGW_FILE_VER_MAJOR;
2037 if (minor)
2038 *minor = LIBRGW_FILE_VER_MINOR;
2039 if (extra)
2040 *extra = LIBRGW_FILE_VER_EXTRA;
2041}
2042
2043/*
2044 attach rgw namespace
2045*/
2046 int rgw_mount(librgw_t rgw, const char *uid, const char *acc_key,
2047 const char *sec_key, struct rgw_fs **rgw_fs,
2048 uint32_t flags)
2049{
2050 int rc = 0;
2051
2052 /* stash access data for "mount" */
2053 RGWLibFS* new_fs = new RGWLibFS(static_cast<CephContext*>(rgw), uid, acc_key,
3efd9988 2054 sec_key, "/");
11fdf7f2 2055 ceph_assert(new_fs);
3efd9988 2056
1e59de90
TL
2057 const DoutPrefix dp(g_rgwlib->get_driver()->ctx(), dout_subsys, "rgw mount: ");
2058 rc = new_fs->authorize(&dp, g_rgwlib->get_driver());
3efd9988
FG
2059 if (rc != 0) {
2060 delete new_fs;
2061 return -EINVAL;
2062 }
2063
2064 /* register fs for shared gc */
1e59de90 2065 g_rgwlib->get_fe()->get_process()->register_fs(new_fs);
3efd9988
FG
2066
2067 struct rgw_fs *fs = new_fs->get_fs();
2068 fs->rgw = rgw;
2069
2070 /* XXX we no longer assume "/" is unique, but we aren't tracking the
2071 * roots atm */
2072
2073 *rgw_fs = fs;
2074
2075 return 0;
2076}
2077
2078int rgw_mount2(librgw_t rgw, const char *uid, const char *acc_key,
2079 const char *sec_key, const char *root, struct rgw_fs **rgw_fs,
2080 uint32_t flags)
2081{
2082 int rc = 0;
2083
20effc67
TL
2084 /* if the config has no value for path/root, choose "/" */
2085 RGWLibFS* new_fs{nullptr};
2086 if(root &&
2087 (!strcmp(root, ""))) {
2088 /* stash access data for "mount" */
2089 new_fs = new RGWLibFS(
2090 static_cast<CephContext*>(rgw), uid, acc_key, sec_key, "/");
2091 }
2092 else {
2093 /* stash access data for "mount" */
2094 new_fs = new RGWLibFS(
2095 static_cast<CephContext*>(rgw), uid, acc_key, sec_key, root);
2096 }
2097
2098 ceph_assert(new_fs); /* should we be using ceph_assert? */
7c673cae 2099
1e59de90
TL
2100 const DoutPrefix dp(g_rgwlib->get_driver()->ctx(), dout_subsys, "rgw mount2: ");
2101 rc = new_fs->authorize(&dp, g_rgwlib->get_driver());
7c673cae
FG
2102 if (rc != 0) {
2103 delete new_fs;
2104 return -EINVAL;
2105 }
2106
2107 /* register fs for shared gc */
1e59de90 2108 g_rgwlib->get_fe()->get_process()->register_fs(new_fs);
7c673cae
FG
2109
2110 struct rgw_fs *fs = new_fs->get_fs();
2111 fs->rgw = rgw;
2112
2113 /* XXX we no longer assume "/" is unique, but we aren't tracking the
2114 * roots atm */
2115
2116 *rgw_fs = fs;
2117
2118 return 0;
2119}
2120
2121/*
2122 register invalidate callbacks
2123*/
2124int rgw_register_invalidate(struct rgw_fs *rgw_fs, rgw_fh_callback_t cb,
2125 void *arg, uint32_t flags)
2126
2127{
2128 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2129 return fs->register_invalidate(cb, arg, flags);
2130}
2131
2132/*
2133 detach rgw namespace
2134*/
2135int rgw_umount(struct rgw_fs *rgw_fs, uint32_t flags)
2136{
2137 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2138 fs->close();
7c673cae
FG
2139 return 0;
2140}
2141
2142/*
2143 get filesystem attributes
2144*/
2145int rgw_statfs(struct rgw_fs *rgw_fs,
2146 struct rgw_file_handle *parent_fh,
2147 struct rgw_statvfs *vfs_st, uint32_t flags)
2148{
2149 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
28e407b8
AA
2150 struct rados_cluster_stat_t stats;
2151
f67539c2 2152 RGWGetClusterStatReq req(fs->get_context(),
1e59de90 2153 g_rgwlib->get_driver()->get_user(fs->get_user()->user_id),
f67539c2 2154 stats);
1e59de90 2155 int rc = g_rgwlib->get_fe()->execute_req(&req);
28e407b8
AA
2156 if (rc < 0) {
2157 lderr(fs->get_context()) << "ERROR: getting total cluster usage"
2158 << cpp_strerror(-rc) << dendl;
2159 return rc;
2160 }
7c673cae 2161
28e407b8
AA
2162 //Set block size to 1M.
2163 constexpr uint32_t CEPH_BLOCK_SHIFT = 20;
2164 vfs_st->f_bsize = 1 << CEPH_BLOCK_SHIFT;
2165 vfs_st->f_frsize = 1 << CEPH_BLOCK_SHIFT;
2166 vfs_st->f_blocks = stats.kb >> (CEPH_BLOCK_SHIFT - 10);
2167 vfs_st->f_bfree = stats.kb_avail >> (CEPH_BLOCK_SHIFT - 10);
2168 vfs_st->f_bavail = stats.kb_avail >> (CEPH_BLOCK_SHIFT - 10);
2169 vfs_st->f_files = stats.num_objects;
2170 vfs_st->f_ffree = -1;
3efd9988
FG
2171 vfs_st->f_fsid[0] = fs->get_fsid();
2172 vfs_st->f_fsid[1] = fs->get_fsid();
7c673cae
FG
2173 vfs_st->f_flag = 0;
2174 vfs_st->f_namemax = 4096;
2175 return 0;
2176}
2177
2178/*
2179 generic create -- create an empty regular file
2180*/
2181int rgw_create(struct rgw_fs *rgw_fs, struct rgw_file_handle *parent_fh,
2182 const char *name, struct stat *st, uint32_t mask,
2183 struct rgw_file_handle **fh, uint32_t posix_flags,
2184 uint32_t flags)
2185{
2186 using std::get;
2187
2188 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2189 RGWFileHandle* parent = get_rgwfh(parent_fh);
2190
2191 if ((! parent) ||
2192 (parent->is_root()) ||
2193 (parent->is_file())) {
2194 /* bad parent */
2195 return -EINVAL;
2196 }
2197
2198 MkObjResult fhr = fs->create(parent, name, st, mask, flags);
2199 RGWFileHandle *nfh = get<0>(fhr); // nullptr if !success
2200
2201 if (nfh)
2202 *fh = nfh->get_fh();
2203
2204 return get<1>(fhr);
2205} /* rgw_create */
2206
11fdf7f2
TL
2207/*
2208 create a symbolic link
2209 */
2210int rgw_symlink(struct rgw_fs *rgw_fs, struct rgw_file_handle *parent_fh,
2211 const char *name, const char *link_path, struct stat *st, uint32_t mask,
2212 struct rgw_file_handle **fh, uint32_t posix_flags,
2213 uint32_t flags)
2214{
2215 using std::get;
2216
2217 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2218 RGWFileHandle* parent = get_rgwfh(parent_fh);
2219
2220 if ((! parent) ||
2221 (parent->is_root()) ||
2222 (parent->is_file())) {
2223 /* bad parent */
2224 return -EINVAL;
2225 }
2226
2227 MkObjResult fhr = fs->symlink(parent, name, link_path, st, mask, flags);
2228 RGWFileHandle *nfh = get<0>(fhr); // nullptr if !success
2229
2230 if (nfh)
2231 *fh = nfh->get_fh();
2232
2233 return get<1>(fhr);
2234} /* rgw_symlink */
2235
7c673cae
FG
2236/*
2237 create a new directory
2238*/
2239int rgw_mkdir(struct rgw_fs *rgw_fs,
2240 struct rgw_file_handle *parent_fh,
2241 const char *name, struct stat *st, uint32_t mask,
2242 struct rgw_file_handle **fh, uint32_t flags)
2243{
2244 using std::get;
2245
2246 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2247 RGWFileHandle* parent = get_rgwfh(parent_fh);
2248
2249 if (! parent) {
2250 /* bad parent */
2251 return -EINVAL;
2252 }
2253
2254 MkObjResult fhr = fs->mkdir(parent, name, st, mask, flags);
2255 RGWFileHandle *nfh = get<0>(fhr); // nullptr if !success
2256
2257 if (nfh)
2258 *fh = nfh->get_fh();
2259
2260 return get<1>(fhr);
2261} /* rgw_mkdir */
2262
2263/*
2264 rename object
2265*/
2266int rgw_rename(struct rgw_fs *rgw_fs,
2267 struct rgw_file_handle *src, const char* src_name,
2268 struct rgw_file_handle *dst, const char* dst_name,
2269 uint32_t flags)
2270{
2271 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2272
2273 RGWFileHandle* src_fh = get_rgwfh(src);
2274 RGWFileHandle* dst_fh = get_rgwfh(dst);
2275
2276 return fs->rename(src_fh, dst_fh, src_name, dst_name);
2277}
2278
2279/*
2280 remove file or directory
2281*/
2282int rgw_unlink(struct rgw_fs *rgw_fs, struct rgw_file_handle *parent_fh,
2283 const char *name, uint32_t flags)
2284{
2285 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2286 RGWFileHandle* parent = get_rgwfh(parent_fh);
2287
2288 return fs->unlink(parent, name);
2289}
2290
2291/*
2292 lookup object by name (POSIX style)
2293*/
2294int rgw_lookup(struct rgw_fs *rgw_fs,
2295 struct rgw_file_handle *parent_fh, const char* path,
eafe8130
TL
2296 struct rgw_file_handle **fh,
2297 struct stat *st, uint32_t mask, uint32_t flags)
7c673cae
FG
2298{
2299 //CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
2300 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2301
2302 RGWFileHandle* parent = get_rgwfh(parent_fh);
2303 if ((! parent) ||
2304 (! parent->is_dir())) {
2305 /* bad parent */
2306 return -EINVAL;
2307 }
2308
2309 RGWFileHandle* rgw_fh;
2310 LookupFHResult fhr;
2311
2312 if (parent->is_root()) {
2313 /* special: parent lookup--note lack of ref()! */
2314 if (unlikely((strcmp(path, "..") == 0) ||
2315 (strcmp(path, "/") == 0))) {
2316 rgw_fh = parent;
2317 } else {
31f18b77
FG
2318 RGWLibFS::BucketStats bstat;
2319 fhr = fs->stat_bucket(parent, path, bstat, RGWFileHandle::FLAG_NONE);
7c673cae
FG
2320 rgw_fh = get<0>(fhr);
2321 if (! rgw_fh)
2322 return -ENOENT;
2323 }
2324 } else {
224ce89b
WB
2325 /* special: after readdir--note extra ref()! */
2326 if (unlikely((strcmp(path, "..") == 0))) {
2327 rgw_fh = parent;
2328 lsubdout(fs->get_context(), rgw, 17)
11fdf7f2 2329 << __func__ << " BANG"<< *rgw_fh
224ce89b
WB
2330 << dendl;
2331 fs->ref(rgw_fh);
2332 } else {
224ce89b
WB
2333 enum rgw_fh_type fh_type = fh_type_of(flags);
2334
2335 uint32_t sl_flags = (flags & RGW_LOOKUP_FLAG_RCB)
f67539c2 2336 ? RGWFileHandle::FLAG_IN_CB
224ce89b
WB
2337 : RGWFileHandle::FLAG_EXACT_MATCH;
2338
eafe8130
TL
2339 bool fast_attrs= fs->get_context()->_conf->rgw_nfs_s3_fast_attrs;
2340
2341 if ((flags & RGW_LOOKUP_FLAG_RCB) && fast_attrs) {
2342 /* FAKE STAT--this should mean, interpolate special
2343 * owner, group, and perms masks */
2344 fhr = fs->fake_leaf(parent, path, fh_type, st, mask, sl_flags);
2345 } else {
2346 if ((fh_type == RGW_FS_TYPE_DIRECTORY) && fast_attrs) {
2347 /* trust cached dir, if present */
2348 fhr = fs->lookup_fh(parent, path, RGWFileHandle::FLAG_DIRECTORY);
2349 if (get<0>(fhr)) {
2350 rgw_fh = get<0>(fhr);
2351 goto done;
2352 }
2353 }
2354 fhr = fs->stat_leaf(parent, path, fh_type, sl_flags);
2355 }
224ce89b
WB
2356 if (! get<0>(fhr)) {
2357 if (! (flags & RGW_LOOKUP_FLAG_CREATE))
2358 return -ENOENT;
2359 else
2360 fhr = fs->lookup_fh(parent, path, RGWFileHandle::FLAG_CREATE);
2361 }
2362 rgw_fh = get<0>(fhr);
7c673cae 2363 }
7c673cae
FG
2364 } /* !root */
2365
eafe8130 2366done:
7c673cae
FG
2367 struct rgw_file_handle *rfh = rgw_fh->get_fh();
2368 *fh = rfh;
2369
2370 return 0;
2371} /* rgw_lookup */
2372
2373/*
2374 lookup object by handle (NFS style)
2375*/
2376int rgw_lookup_handle(struct rgw_fs *rgw_fs, struct rgw_fh_hk *fh_hk,
2377 struct rgw_file_handle **fh, uint32_t flags)
2378{
2379 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2380
2381 RGWFileHandle* rgw_fh = fs->lookup_handle(*fh_hk);
2382 if (! rgw_fh) {
2383 /* not found */
2384 return -ENOENT;
2385 }
2386
2387 struct rgw_file_handle *rfh = rgw_fh->get_fh();
2388 *fh = rfh;
2389
2390 return 0;
2391}
2392
2393/*
2394 * release file handle
2395 */
2396int rgw_fh_rele(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
2397 uint32_t flags)
2398{
2399 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2400 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2401
2402 lsubdout(fs->get_context(), rgw, 17)
2403 << __func__ << " " << *rgw_fh
2404 << dendl;
2405
2406 fs->unref(rgw_fh);
2407 return 0;
2408}
2409
2410/*
2411 get unix attributes for object
2412*/
2413int rgw_getattr(struct rgw_fs *rgw_fs,
2414 struct rgw_file_handle *fh, struct stat *st, uint32_t flags)
2415{
2416 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2417 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2418
2419 return fs->getattr(rgw_fh, st);
2420}
2421
2422/*
2423 set unix attributes for object
2424*/
2425int rgw_setattr(struct rgw_fs *rgw_fs,
2426 struct rgw_file_handle *fh, struct stat *st,
2427 uint32_t mask, uint32_t flags)
2428{
2429 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2430 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2431
2432 return fs->setattr(rgw_fh, st, mask, flags);
2433}
2434
2435/*
2436 truncate file
2437*/
2438int rgw_truncate(struct rgw_fs *rgw_fs,
2439 struct rgw_file_handle *fh, uint64_t size, uint32_t flags)
2440{
2441 return 0;
2442}
2443
2444/*
2445 open file
2446*/
2447int rgw_open(struct rgw_fs *rgw_fs,
2448 struct rgw_file_handle *fh, uint32_t posix_flags, uint32_t flags)
2449{
2450 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2451
28e407b8 2452 /* XXX
7c673cae
FG
2453 * need to track specific opens--at least read opens and
2454 * a write open; we need to know when a write open is returned,
2455 * that closes a write transaction
2456 *
2457 * for now, we will support single-open only, it's preferable to
2458 * anything we can otherwise do without access to the NFS state
2459 */
2460 if (! rgw_fh->is_file())
2461 return -EISDIR;
2462
2463 return rgw_fh->open(flags);
2464}
2465
2466/*
2467 close file
2468*/
2469int rgw_close(struct rgw_fs *rgw_fs,
2470 struct rgw_file_handle *fh, uint32_t flags)
2471{
2472 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2473 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2474 int rc = rgw_fh->close(/* XXX */);
2475
2476 if (flags & RGW_CLOSE_FLAG_RELE)
2477 fs->unref(rgw_fh);
2478
2479 return rc;
2480}
2481
2482int rgw_readdir(struct rgw_fs *rgw_fs,
2483 struct rgw_file_handle *parent_fh, uint64_t *offset,
2484 rgw_readdir_cb rcb, void *cb_arg, bool *eof,
2485 uint32_t flags)
2486{
2487 RGWFileHandle* parent = get_rgwfh(parent_fh);
2488 if (! parent) {
2489 /* bad parent */
2490 return -EINVAL;
2491 }
3efd9988
FG
2492
2493 lsubdout(parent->get_fs()->get_context(), rgw, 15)
2494 << __func__
2495 << " offset=" << *offset
2496 << dendl;
2497
2498 if ((*offset == 0) &&
2499 (flags & RGW_READDIR_FLAG_DOTDOT)) {
2500 /* send '.' and '..' with their NFS-defined offsets */
eafe8130
TL
2501 rcb(".", cb_arg, 1, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
2502 rcb("..", cb_arg, 2, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
3efd9988
FG
2503 }
2504
7c673cae
FG
2505 int rc = parent->readdir(rcb, cb_arg, offset, eof, flags);
2506 return rc;
3efd9988
FG
2507} /* rgw_readdir */
2508
2509/* enumeration continuing from name */
2510int rgw_readdir2(struct rgw_fs *rgw_fs,
2511 struct rgw_file_handle *parent_fh, const char *name,
2512 rgw_readdir_cb rcb, void *cb_arg, bool *eof,
2513 uint32_t flags)
2514{
2515 RGWFileHandle* parent = get_rgwfh(parent_fh);
2516 if (! parent) {
2517 /* bad parent */
2518 return -EINVAL;
2519 }
2520
2521 lsubdout(parent->get_fs()->get_context(), rgw, 15)
2522 << __func__
94b18763 2523 << " offset=" << ((name) ? name : "(nil)")
3efd9988
FG
2524 << dendl;
2525
2526 if ((! name) &&
2527 (flags & RGW_READDIR_FLAG_DOTDOT)) {
2528 /* send '.' and '..' with their NFS-defined offsets */
eafe8130
TL
2529 rcb(".", cb_arg, 1, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
2530 rcb("..", cb_arg, 2, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
3efd9988
FG
2531 }
2532
2533 int rc = parent->readdir(rcb, cb_arg, name, eof, flags);
2534 return rc;
2535} /* rgw_readdir2 */
7c673cae 2536
c07f9fc5
FG
2537/* project offset of dirent name */
2538int rgw_dirent_offset(struct rgw_fs *rgw_fs,
2539 struct rgw_file_handle *parent_fh,
2540 const char *name, int64_t *offset,
2541 uint32_t flags)
2542{
2543 RGWFileHandle* parent = get_rgwfh(parent_fh);
2544 if ((! parent)) {
2545 /* bad parent */
2546 return -EINVAL;
2547 }
2548 std::string sname{name};
2549 int rc = parent->offset_of(sname, offset, flags);
2550 return rc;
2551}
2552
7c673cae
FG
2553/*
2554 read data from file
2555*/
2556int rgw_read(struct rgw_fs *rgw_fs,
2557 struct rgw_file_handle *fh, uint64_t offset,
2558 size_t length, size_t *bytes_read, void *buffer,
2559 uint32_t flags)
2560{
2561 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2562 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2563
2564 return fs->read(rgw_fh, offset, length, bytes_read, buffer, flags);
2565}
2566
11fdf7f2
TL
2567/*
2568 read symbolic link
2569*/
2570int rgw_readlink(struct rgw_fs *rgw_fs,
2571 struct rgw_file_handle *fh, uint64_t offset,
2572 size_t length, size_t *bytes_read, void *buffer,
2573 uint32_t flags)
2574{
2575 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2576 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2577
2578 return fs->readlink(rgw_fh, offset, length, bytes_read, buffer, flags);
2579}
2580
7c673cae
FG
2581/*
2582 write data to file
2583*/
2584int rgw_write(struct rgw_fs *rgw_fs,
2585 struct rgw_file_handle *fh, uint64_t offset,
2586 size_t length, size_t *bytes_written, void *buffer,
2587 uint32_t flags)
2588{
2589 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2590 int rc;
2591
2592 *bytes_written = 0;
2593
2594 if (! rgw_fh->is_file())
2595 return -EISDIR;
2596
3efd9988
FG
2597 if (! rgw_fh->is_open()) {
2598 if (flags & RGW_OPEN_FLAG_V3) {
2599 rc = rgw_fh->open(flags);
2600 if (!! rc)
2601 return rc;
2602 } else
2603 return -EPERM;
2604 }
7c673cae
FG
2605
2606 rc = rgw_fh->write(offset, length, bytes_written, buffer);
2607
2608 return rc;
2609}
2610
2611/*
2612 read data from file (vector)
2613*/
2614class RGWReadV
2615{
2616 buffer::list bl;
2617 struct rgw_vio* vio;
2618
2619public:
2620 RGWReadV(buffer::list& _bl, rgw_vio* _vio) : vio(_vio) {
f67539c2 2621 bl = std::move(_bl);
7c673cae
FG
2622 }
2623
2624 struct rgw_vio* get_vio() { return vio; }
2625
11fdf7f2 2626 const auto& buffers() { return bl.buffers(); }
7c673cae
FG
2627
2628 unsigned /* XXX */ length() { return bl.length(); }
2629
2630};
2631
2632void rgw_readv_rele(struct rgw_uio *uio, uint32_t flags)
2633{
2634 RGWReadV* rdv = static_cast<RGWReadV*>(uio->uio_p1);
2635 rdv->~RGWReadV();
2636 ::operator delete(rdv);
2637}
2638
2639int rgw_readv(struct rgw_fs *rgw_fs,
2640 struct rgw_file_handle *fh, rgw_uio *uio, uint32_t flags)
2641{
2642#if 0 /* XXX */
2643 CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
2644 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2645 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2646
2647 if (! rgw_fh->is_file())
2648 return -EINVAL;
2649
2650 int rc = 0;
2651
2652 buffer::list bl;
2653 RGWGetObjRequest req(cct, fs->get_user(), rgw_fh->bucket_name(),
2654 rgw_fh->object_name(), uio->uio_offset, uio->uio_resid,
2655 bl);
2656 req.do_hexdump = false;
2657
1e59de90 2658 rc = g_rgwlib->get_fe()->execute_req(&req);
7c673cae
FG
2659
2660 if (! rc) {
2661 RGWReadV* rdv = static_cast<RGWReadV*>(
2662 ::operator new(sizeof(RGWReadV) +
2663 (bl.buffers().size() * sizeof(struct rgw_vio))));
2664
2665 (void) new (rdv)
2666 RGWReadV(bl, reinterpret_cast<rgw_vio*>(rdv+sizeof(RGWReadV)));
2667
2668 uio->uio_p1 = rdv;
2669 uio->uio_cnt = rdv->buffers().size();
2670 uio->uio_resid = rdv->length();
2671 uio->uio_vio = rdv->get_vio();
2672 uio->uio_rele = rgw_readv_rele;
2673
2674 int ix = 0;
2675 auto& buffers = rdv->buffers();
2676 for (auto& bp : buffers) {
2677 rgw_vio *vio = &(uio->uio_vio[ix]);
2678 vio->vio_base = const_cast<char*>(bp.c_str());
2679 vio->vio_len = bp.length();
2680 vio->vio_u1 = nullptr;
2681 vio->vio_p1 = nullptr;
2682 ++ix;
2683 }
2684 }
2685
2686 return rc;
2687#else
2688 return 0;
2689#endif
2690}
2691
2692/*
2693 write data to file (vector)
2694*/
2695int rgw_writev(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
2696 rgw_uio *uio, uint32_t flags)
2697{
2698
f67539c2 2699 // not supported - rest of function is ignored
7c673cae
FG
2700 return -ENOTSUP;
2701
2702 CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
2703 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2704 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2705
2706 if (! rgw_fh->is_file())
2707 return -EINVAL;
2708
2709 buffer::list bl;
2710 for (unsigned int ix = 0; ix < uio->uio_cnt; ++ix) {
2711 rgw_vio *vio = &(uio->uio_vio[ix]);
2712 bl.push_back(
2713 buffer::create_static(vio->vio_len,
2714 static_cast<char*>(vio->vio_base)));
2715 }
2716
2717 std::string oname = rgw_fh->relative_object_name();
1e59de90 2718 RGWPutObjRequest req(cct, g_rgwlib->get_driver()->get_user(fs->get_user()->user_id),
f67539c2 2719 rgw_fh->bucket_name(), oname, bl);
7c673cae 2720
1e59de90 2721 int rc = g_rgwlib->get_fe()->execute_req(&req);
7c673cae
FG
2722
2723 /* XXX update size (in request) */
2724
2725 return rc;
2726}
2727
2728/*
2729 sync written data
2730*/
2731int rgw_fsync(struct rgw_fs *rgw_fs, struct rgw_file_handle *handle,
2732 uint32_t flags)
2733{
2734 return 0;
2735}
2736
2737int rgw_commit(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
2738 uint64_t offset, uint64_t length, uint32_t flags)
2739{
2740 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2741
2742 return rgw_fh->commit(offset, length, RGWFileHandle::FLAG_NONE);
2743}
2744
f67539c2
TL
2745/*
2746 extended attributes
2747 */
2748
2749int rgw_getxattrs(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
2750 rgw_xattrlist *attrs, rgw_getxattr_cb cb, void *cb_arg,
2751 uint32_t flags)
2752{
2753 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2754 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2755
2756 return fs->getxattrs(rgw_fh, attrs, cb, cb_arg, flags);
2757}
2758
2759int rgw_lsxattrs(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
2760 rgw_xattrstr *filter_prefix /* ignored */,
2761 rgw_getxattr_cb cb, void *cb_arg, uint32_t flags)
2762{
2763 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2764 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2765
2766 return fs->lsxattrs(rgw_fh, filter_prefix, cb, cb_arg, flags);
2767}
2768
2769int rgw_setxattrs(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
2770 rgw_xattrlist *attrs, uint32_t flags)
2771{
2772 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2773 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2774
2775 return fs->setxattrs(rgw_fh, attrs, flags);
2776}
2777
2778int rgw_rmxattrs(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
2779 rgw_xattrlist *attrs, uint32_t flags)
2780{
2781 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2782 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2783
2784 return fs->rmxattrs(rgw_fh, attrs, flags);
2785}
2786
7c673cae 2787} /* extern "C" */