]> git.proxmox.com Git - ceph.git/blame - ceph/src/rgw/rgw_file.cc
import 15.2.0 Octopus source
[ceph.git] / ceph / src / rgw / rgw_file.cc
CommitLineData
7c673cae 1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
9f95a23c 2// vim: ts=8 sw=2 smarttab ft=cpp
7c673cae
FG
3
4#include "include/compat.h"
5#include "include/rados/rgw_file.h"
6
7#include <sys/types.h>
8#include <sys/stat.h>
9
10#include "rgw_lib.h"
11#include "rgw_rados.h"
12#include "rgw_resolve.h"
13#include "rgw_op.h"
14#include "rgw_rest.h"
15#include "rgw_acl.h"
16#include "rgw_acl_s3.h"
17#include "rgw_frontend.h"
18#include "rgw_request.h"
19#include "rgw_process.h"
20#include "rgw_rest_user.h"
21#include "rgw_rest_s3.h"
22#include "rgw_os_lib.h"
23#include "rgw_auth_s3.h"
24#include "rgw_user.h"
25#include "rgw_bucket.h"
11fdf7f2 26#include "rgw_zone.h"
7c673cae
FG
27#include "rgw_file.h"
28#include "rgw_lib_frontend.h"
11fdf7f2 29#include "rgw_perf_counters.h"
28e407b8 30#include "common/errno.h"
7c673cae 31
9f95a23c
TL
32#include "services/svc_zone.h"
33
7c673cae
FG
34#include <atomic>
35
36#define dout_subsys ceph_subsys_rgw
37
38using namespace rgw;
39
40namespace rgw {
41
42 extern RGWLib rgwlib;
43
44 const string RGWFileHandle::root_name = "/";
45
46 std::atomic<uint32_t> RGWLibFS::fs_inst_counter;
47
48 uint32_t RGWLibFS::write_completion_interval_s = 10;
49
50 ceph::timer<ceph::mono_clock> RGWLibFS::write_timer{
51 ceph::construct_suspended};
52
53 inline int valid_fs_bucket_name(const string& name) {
54 int rc = valid_s3_bucket_name(name, false /* relaxed */);
55 if (rc != 0) {
56 if (name.size() > 255)
57 return -ENAMETOOLONG;
58 return -EINVAL;
59 }
60 return 0;
61 }
62
63 inline int valid_fs_object_name(const string& name) {
64 int rc = valid_s3_object_name(name);
65 if (rc != 0) {
66 if (name.size() > 1024)
67 return -ENAMETOOLONG;
68 return -EINVAL;
69 }
70 return 0;
71 }
72
31f18b77
FG
73 LookupFHResult RGWLibFS::stat_bucket(RGWFileHandle* parent, const char *path,
74 RGWLibFS::BucketStats& bs,
75 uint32_t flags)
7c673cae
FG
76 {
77 LookupFHResult fhr{nullptr, 0};
78 std::string bucket_name{path};
9f95a23c
TL
79 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
80 RGWStatBucketRequest req(cct, &ruser, bucket_name, bs);
7c673cae
FG
81
82 int rc = rgwlib.get_fe()->execute_req(&req);
83 if ((rc == 0) &&
84 (req.get_ret() == 0) &&
85 (req.matched())) {
86 fhr = lookup_fh(parent, path,
31f18b77 87 (flags & RGWFileHandle::FLAG_LOCKED)|
7c673cae
FG
88 RGWFileHandle::FLAG_CREATE|
89 RGWFileHandle::FLAG_BUCKET);
90 if (get<0>(fhr)) {
91 RGWFileHandle* rgw_fh = get<0>(fhr);
31f18b77
FG
92 if (! (flags & RGWFileHandle::FLAG_LOCKED)) {
93 rgw_fh->mtx.lock();
94 }
7c673cae
FG
95 rgw_fh->set_times(req.get_ctime());
96 /* restore attributes */
97 auto ux_key = req.get_attr(RGW_ATTR_UNIX_KEY1);
98 auto ux_attrs = req.get_attr(RGW_ATTR_UNIX1);
99 if (ux_key && ux_attrs) {
3efd9988
FG
100 DecodeAttrsResult dar = rgw_fh->decode_attrs(ux_key, ux_attrs);
101 if (get<0>(dar) || get<1>(dar)) {
102 update_fh(rgw_fh);
103 }
7c673cae 104 }
31f18b77
FG
105 if (! (flags & RGWFileHandle::FLAG_LOCKED)) {
106 rgw_fh->mtx.unlock();
107 }
7c673cae
FG
108 }
109 }
110 return fhr;
111 }
112
eafe8130
TL
113 LookupFHResult RGWLibFS::fake_leaf(RGWFileHandle* parent,
114 const char *path,
115 enum rgw_fh_type type,
116 struct stat *st, uint32_t st_mask,
117 uint32_t flags)
118 {
119 /* synthesize a minimal handle from parent, path, type, and st */
120 using std::get;
121
122 flags |= RGWFileHandle::FLAG_CREATE;
123
124 switch (type) {
125 case RGW_FS_TYPE_DIRECTORY:
126 flags |= RGWFileHandle::FLAG_DIRECTORY;
127 break;
128 default:
129 /* file */
130 break;
131 };
132
133 LookupFHResult fhr = lookup_fh(parent, path, flags);
134 if (get<0>(fhr)) {
135 RGWFileHandle* rgw_fh = get<0>(fhr);
136 if (st) {
137 lock_guard guard(rgw_fh->mtx);
138 if (st_mask & RGW_SETATTR_SIZE) {
139 rgw_fh->set_size(st->st_size);
140 }
141 if (st_mask & RGW_SETATTR_MTIME) {
142 rgw_fh->set_times(st->st_mtim);
143 }
144 } /* st */
145 } /* rgw_fh */
146 return fhr;
147 } /* RGWLibFS::fake_leaf */
148
7c673cae
FG
149 LookupFHResult RGWLibFS::stat_leaf(RGWFileHandle* parent,
150 const char *path,
151 enum rgw_fh_type type,
152 uint32_t flags)
153 {
154 /* find either-of <object_name>, <object_name/>, only one of
155 * which should exist; atomicity? */
156 using std::get;
157
158 LookupFHResult fhr{nullptr, 0};
159
160 /* XXX the need for two round-trip operations to identify file or
161 * directory leaf objects is unecessary--the current proposed
162 * mechanism to avoid this is to store leaf object names with an
163 * object locator w/o trailing slash */
164
31f18b77 165 std::string obj_path = parent->format_child_name(path, false);
9f95a23c 166 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
7c673cae
FG
167
168 for (auto ix : { 0, 1, 2 }) {
169 switch (ix) {
170 case 0:
171 {
172 /* type hint */
173 if (type == RGW_FS_TYPE_DIRECTORY)
174 continue;
175
9f95a23c 176 RGWStatObjRequest req(cct, &ruser,
7c673cae
FG
177 parent->bucket_name(), obj_path,
178 RGWStatObjRequest::FLAG_NONE);
179 int rc = rgwlib.get_fe()->execute_req(&req);
180 if ((rc == 0) &&
181 (req.get_ret() == 0)) {
182 fhr = lookup_fh(parent, path, RGWFileHandle::FLAG_CREATE);
183 if (get<0>(fhr)) {
184 RGWFileHandle* rgw_fh = get<0>(fhr);
185 lock_guard guard(rgw_fh->mtx);
186 rgw_fh->set_size(req.get_size());
187 rgw_fh->set_times(req.get_mtime());
188 /* restore attributes */
189 auto ux_key = req.get_attr(RGW_ATTR_UNIX_KEY1);
190 auto ux_attrs = req.get_attr(RGW_ATTR_UNIX1);
81eedcae
TL
191 rgw_fh->set_etag(*(req.get_attr(RGW_ATTR_ETAG)));
192 rgw_fh->set_acls(*(req.get_attr(RGW_ATTR_ACL)));
7c673cae 193 if (ux_key && ux_attrs) {
3efd9988
FG
194 DecodeAttrsResult dar = rgw_fh->decode_attrs(ux_key, ux_attrs);
195 if (get<0>(dar) || get<1>(dar)) {
196 update_fh(rgw_fh);
197 }
7c673cae
FG
198 }
199 }
200 goto done;
201 }
202 }
203 break;
204 case 1:
205 {
206 /* try dir form */
207 /* type hint */
208 if (type == RGW_FS_TYPE_FILE)
209 continue;
210
211 obj_path += "/";
9f95a23c 212 RGWStatObjRequest req(cct, &ruser,
7c673cae
FG
213 parent->bucket_name(), obj_path,
214 RGWStatObjRequest::FLAG_NONE);
215 int rc = rgwlib.get_fe()->execute_req(&req);
216 if ((rc == 0) &&
217 (req.get_ret() == 0)) {
218 fhr = lookup_fh(parent, path, RGWFileHandle::FLAG_DIRECTORY);
219 if (get<0>(fhr)) {
220 RGWFileHandle* rgw_fh = get<0>(fhr);
221 lock_guard guard(rgw_fh->mtx);
222 rgw_fh->set_size(req.get_size());
223 rgw_fh->set_times(req.get_mtime());
224 /* restore attributes */
225 auto ux_key = req.get_attr(RGW_ATTR_UNIX_KEY1);
226 auto ux_attrs = req.get_attr(RGW_ATTR_UNIX1);
81eedcae
TL
227 rgw_fh->set_etag(*(req.get_attr(RGW_ATTR_ETAG)));
228 rgw_fh->set_acls(*(req.get_attr(RGW_ATTR_ACL)));
7c673cae 229 if (ux_key && ux_attrs) {
3efd9988
FG
230 DecodeAttrsResult dar = rgw_fh->decode_attrs(ux_key, ux_attrs);
231 if (get<0>(dar) || get<1>(dar)) {
232 update_fh(rgw_fh);
233 }
7c673cae
FG
234 }
235 }
236 goto done;
237 }
238 }
239 break;
240 case 2:
241 {
242 std::string object_name{path};
9f95a23c 243 RGWStatLeafRequest req(cct, &ruser, parent, object_name);
7c673cae
FG
244 int rc = rgwlib.get_fe()->execute_req(&req);
245 if ((rc == 0) &&
246 (req.get_ret() == 0)) {
247 if (req.matched) {
248 /* we need rgw object's key name equal to file name, if
249 * not return NULL */
250 if ((flags & RGWFileHandle::FLAG_EXACT_MATCH) &&
251 !req.exact_matched) {
252 lsubdout(get_context(), rgw, 15)
253 << __func__
254 << ": stat leaf not exact match file name = "
255 << path << dendl;
256 goto done;
257 }
258 fhr = lookup_fh(parent, path,
259 RGWFileHandle::FLAG_CREATE|
260 ((req.is_dir) ?
261 RGWFileHandle::FLAG_DIRECTORY :
262 RGWFileHandle::FLAG_NONE));
263 /* XXX we don't have an object--in general, there need not
264 * be one (just a path segment in some other object). In
265 * actual leaf an object exists, but we'd need another round
266 * trip to get attrs */
267 if (get<0>(fhr)) {
268 /* for now use the parent object's mtime */
269 RGWFileHandle* rgw_fh = get<0>(fhr);
270 lock_guard guard(rgw_fh->mtx);
271 rgw_fh->set_mtime(parent->get_mtime());
272 }
273 }
274 }
275 }
276 break;
277 default:
278 /* not reached */
279 break;
280 }
281 }
282 done:
283 return fhr;
284 } /* RGWLibFS::stat_leaf */
285
286 int RGWLibFS::read(RGWFileHandle* rgw_fh, uint64_t offset, size_t length,
287 size_t* bytes_read, void* buffer, uint32_t flags)
288 {
289 if (! rgw_fh->is_file())
290 return -EINVAL;
291
292 if (rgw_fh->deleted())
293 return -ESTALE;
294
9f95a23c
TL
295 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
296 RGWReadRequest req(get_context(), &ruser, rgw_fh, offset, length,
7c673cae
FG
297 buffer);
298
299 int rc = rgwlib.get_fe()->execute_req(&req);
300 if ((rc == 0) &&
301 (req.get_ret() == 0)) {
11fdf7f2
TL
302 lock_guard guard(rgw_fh->mtx);
303 rgw_fh->set_atime(real_clock::to_timespec(real_clock::now()));
304 *bytes_read = req.nread;
305 }
306
307 return rc;
308 }
309
310 int RGWLibFS::readlink(RGWFileHandle* rgw_fh, uint64_t offset, size_t length,
311 size_t* bytes_read, void* buffer, uint32_t flags)
312 {
313 if (! rgw_fh->is_link())
314 return -EINVAL;
315
316 if (rgw_fh->deleted())
317 return -ESTALE;
318
9f95a23c
TL
319 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
320 RGWReadRequest req(get_context(), &ruser, rgw_fh, offset, length,
11fdf7f2
TL
321 buffer);
322
323 int rc = rgwlib.get_fe()->execute_req(&req);
324 if ((rc == 0) &&
325 (req.get_ret() == 0)) {
7c673cae
FG
326 lock_guard(rgw_fh->mtx);
327 rgw_fh->set_atime(real_clock::to_timespec(real_clock::now()));
328 *bytes_read = req.nread;
329 }
330
331 return rc;
332 }
333
334 int RGWLibFS::unlink(RGWFileHandle* rgw_fh, const char* name, uint32_t flags)
335 {
336 int rc = 0;
31f18b77 337 BucketStats bs;
7c673cae 338 RGWFileHandle* parent = nullptr;
31f18b77 339 RGWFileHandle* bkt_fh = nullptr;
7c673cae
FG
340
341 if (unlikely(flags & RGWFileHandle::FLAG_UNLINK_THIS)) {
342 /* LOCKED */
343 parent = rgw_fh->get_parent();
344 } else {
345 /* atomicity */
346 parent = rgw_fh;
347 LookupFHResult fhr = lookup_fh(parent, name, RGWFileHandle::FLAG_LOCK);
348 rgw_fh = get<0>(fhr);
349 /* LOCKED */
350 }
351
9f95a23c 352 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
7c673cae 353 if (parent->is_root()) {
31f18b77
FG
354 /* a bucket may have an object storing Unix attributes, check
355 * for and delete it */
356 LookupFHResult fhr;
357 fhr = stat_bucket(parent, name, bs, (rgw_fh) ?
358 RGWFileHandle::FLAG_LOCKED :
359 RGWFileHandle::FLAG_NONE);
360 bkt_fh = get<0>(fhr);
361 if (unlikely(! bkt_fh)) {
362 /* implies !rgw_fh, so also !LOCKED */
363 return -ENOENT;
364 }
365
366 if (bs.num_entries > 1) {
367 unref(bkt_fh); /* return stat_bucket ref */
368 if (likely(!! rgw_fh)) { /* return lock and ref from
369 * lookup_fh (or caller in the
370 * special case of
371 * RGWFileHandle::FLAG_UNLINK_THIS) */
372 rgw_fh->mtx.unlock();
373 unref(rgw_fh);
374 }
375 return -ENOTEMPTY;
376 } else {
377 /* delete object w/key "<bucket>/" (uxattrs), if any */
378 string oname{"/"};
9f95a23c 379 RGWDeleteObjRequest req(cct, &ruser, bkt_fh->bucket_name(), oname);
31f18b77
FG
380 rc = rgwlib.get_fe()->execute_req(&req);
381 /* don't care if ENOENT */
382 unref(bkt_fh);
383 }
384
385 string bname{name};
9f95a23c 386 RGWDeleteBucketRequest req(cct, &ruser, bname);
7c673cae
FG
387 rc = rgwlib.get_fe()->execute_req(&req);
388 if (! rc) {
389 rc = req.get_ret();
390 }
391 } else {
392 /*
393 * leaf object
394 */
395 if (! rgw_fh) {
396 /* XXX for now, peform a hard lookup to deduce the type of
397 * object to be deleted ("foo" vs. "foo/")--also, ensures
398 * atomicity at this endpoint */
399 struct rgw_file_handle *fh;
400 rc = rgw_lookup(get_fs(), parent->get_fh(), name, &fh,
eafe8130 401 nullptr /* st */, 0 /* mask */,
7c673cae
FG
402 RGW_LOOKUP_FLAG_NONE);
403 if (!! rc)
404 return rc;
405
406 /* rgw_fh ref+ */
407 rgw_fh = get_rgwfh(fh);
408 rgw_fh->mtx.lock(); /* LOCKED */
409 }
410
411 std::string oname = rgw_fh->relative_object_name();
412 if (rgw_fh->is_dir()) {
413 /* for the duration of our cache timer, trust positive
414 * child cache */
415 if (rgw_fh->has_children()) {
416 rgw_fh->mtx.unlock();
417 unref(rgw_fh);
418 return(-ENOTEMPTY);
419 }
420 oname += "/";
421 }
9f95a23c 422 RGWDeleteObjRequest req(cct, &ruser, parent->bucket_name(),
7c673cae
FG
423 oname);
424 rc = rgwlib.get_fe()->execute_req(&req);
425 if (! rc) {
426 rc = req.get_ret();
427 }
428 }
429
31f18b77
FG
430 /* ENOENT when raced with other s3 gateway */
431 if (! rc || rc == -ENOENT) {
432 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
433 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
434 RGWFileHandle::FHCache::FLAG_LOCK);
435 }
7c673cae
FG
436
437 if (! rc) {
438 real_time t = real_clock::now();
439 parent->set_mtime(real_clock::to_timespec(t));
440 parent->set_ctime(real_clock::to_timespec(t));
441 }
442
443 rgw_fh->mtx.unlock();
444 unref(rgw_fh);
445
446 return rc;
447 } /* RGWLibFS::unlink */
448
449 int RGWLibFS::rename(RGWFileHandle* src_fh, RGWFileHandle* dst_fh,
450 const char *_src_name, const char *_dst_name)
451
452 {
453 /* XXX initial implementation: try-copy, and delete if copy
454 * succeeds */
455 int rc = -EINVAL;
9f95a23c 456 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
7c673cae
FG
457
458 real_time t;
459
460 std::string src_name{_src_name};
461 std::string dst_name{_dst_name};
462
463 /* atomicity */
464 LookupFHResult fhr = lookup_fh(src_fh, _src_name, RGWFileHandle::FLAG_LOCK);
465 RGWFileHandle* rgw_fh = get<0>(fhr);
466
467 /* should not happen */
468 if (! rgw_fh) {
469 ldout(get_context(), 0) << __func__
470 << " BUG no such src renaming path="
471 << src_name
472 << dendl;
473 goto out;
474 }
475
476 /* forbid renaming of directories (unreasonable at scale) */
477 if (rgw_fh->is_dir()) {
478 ldout(get_context(), 12) << __func__
479 << " rejecting attempt to rename directory path="
480 << rgw_fh->full_object_name()
481 << dendl;
482 rc = -EPERM;
483 goto unlock;
484 }
485
486 /* forbid renaming open files (violates intent, for now) */
487 if (rgw_fh->is_open()) {
488 ldout(get_context(), 12) << __func__
489 << " rejecting attempt to rename open file path="
490 << rgw_fh->full_object_name()
491 << dendl;
492 rc = -EPERM;
493 goto unlock;
494 }
495
496 t = real_clock::now();
497
498 for (int ix : {0, 1}) {
499 switch (ix) {
500 case 0:
501 {
9f95a23c 502 RGWCopyObjRequest req(cct, &ruser, src_fh, dst_fh, src_name,
7c673cae
FG
503 dst_name);
504 int rc = rgwlib.get_fe()->execute_req(&req);
505 if ((rc != 0) ||
506 ((rc = req.get_ret()) != 0)) {
507 ldout(get_context(), 1)
508 << __func__
509 << " rename step 0 failed src="
510 << src_fh->full_object_name() << " " << src_name
511 << " dst=" << dst_fh->full_object_name()
512 << " " << dst_name
513 << "rc " << rc
514 << dendl;
515 goto unlock;
516 }
517 ldout(get_context(), 12)
518 << __func__
519 << " rename step 0 success src="
520 << src_fh->full_object_name() << " " << src_name
521 << " dst=" << dst_fh->full_object_name()
522 << " " << dst_name
523 << " rc " << rc
524 << dendl;
525 /* update dst change id */
526 dst_fh->set_times(t);
527 }
528 break;
529 case 1:
530 {
531 rc = this->unlink(rgw_fh /* LOCKED */, _src_name,
532 RGWFileHandle::FLAG_UNLINK_THIS);
533 /* !LOCKED, -ref */
534 if (! rc) {
535 ldout(get_context(), 12)
536 << __func__
537 << " rename step 1 success src="
538 << src_fh->full_object_name() << " " << src_name
539 << " dst=" << dst_fh->full_object_name()
540 << " " << dst_name
541 << " rc " << rc
542 << dendl;
543 /* update src change id */
544 src_fh->set_times(t);
545 } else {
546 ldout(get_context(), 1)
547 << __func__
548 << " rename step 1 failed src="
549 << src_fh->full_object_name() << " " << src_name
550 << " dst=" << dst_fh->full_object_name()
551 << " " << dst_name
552 << " rc " << rc
553 << dendl;
554 }
555 }
556 goto out;
557 default:
11fdf7f2 558 ceph_abort();
7c673cae
FG
559 } /* switch */
560 } /* ix */
561 unlock:
562 rgw_fh->mtx.unlock(); /* !LOCKED */
563 unref(rgw_fh); /* -ref */
564
565 out:
566 return rc;
567 } /* RGWLibFS::rename */
568
569 MkObjResult RGWLibFS::mkdir(RGWFileHandle* parent, const char *name,
570 struct stat *st, uint32_t mask, uint32_t flags)
571 {
7c673cae 572 int rc, rc2;
31f18b77 573 rgw_file_handle *lfh;
9f95a23c 574 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
31f18b77
FG
575
576 rc = rgw_lookup(get_fs(), parent->get_fh(), name, &lfh,
eafe8130 577 nullptr /* st */, 0 /* mask */,
31f18b77
FG
578 RGW_LOOKUP_FLAG_NONE);
579 if (! rc) {
580 /* conflict! */
581 rc = rgw_fh_rele(get_fs(), lfh, RGW_FH_RELE_FLAG_NONE);
582 return MkObjResult{nullptr, -EEXIST};
583 }
7c673cae 584
31f18b77 585 MkObjResult mkr{nullptr, -EINVAL};
7c673cae
FG
586 LookupFHResult fhr;
587 RGWFileHandle* rgw_fh = nullptr;
588 buffer::list ux_key, ux_attrs;
589
590 fhr = lookup_fh(parent, name,
591 RGWFileHandle::FLAG_CREATE|
592 RGWFileHandle::FLAG_DIRECTORY|
593 RGWFileHandle::FLAG_LOCK);
594 rgw_fh = get<0>(fhr);
595 if (rgw_fh) {
596 rgw_fh->create_stat(st, mask);
597 rgw_fh->set_times(real_clock::now());
598 /* save attrs */
599 rgw_fh->encode_attrs(ux_key, ux_attrs);
600 if (st)
494da23a 601 rgw_fh->stat(st, RGWFileHandle::FLAG_LOCKED);
7c673cae
FG
602 get<0>(mkr) = rgw_fh;
603 } else {
604 get<1>(mkr) = -EIO;
605 return mkr;
606 }
607
608 if (parent->is_root()) {
609 /* bucket */
610 string bname{name};
611 /* enforce S3 name restrictions */
612 rc = valid_fs_bucket_name(bname);
613 if (rc != 0) {
614 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
615 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
616 RGWFileHandle::FHCache::FLAG_LOCK);
617 rgw_fh->mtx.unlock();
618 unref(rgw_fh);
619 get<0>(mkr) = nullptr;
620 get<1>(mkr) = rc;
621 return mkr;
622 }
623
9f95a23c 624 RGWCreateBucketRequest req(get_context(), &ruser, bname);
7c673cae
FG
625
626 /* save attrs */
627 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
628 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
629
630 rc = rgwlib.get_fe()->execute_req(&req);
631 rc2 = req.get_ret();
632 } else {
633 /* create an object representing the directory */
634 buffer::list bl;
31f18b77 635 string dir_name = parent->format_child_name(name, true);
7c673cae
FG
636
637 /* need valid S3 name (characters, length <= 1024, etc) */
638 rc = valid_fs_object_name(dir_name);
639 if (rc != 0) {
640 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
641 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
642 RGWFileHandle::FHCache::FLAG_LOCK);
643 rgw_fh->mtx.unlock();
644 unref(rgw_fh);
645 get<0>(mkr) = nullptr;
646 get<1>(mkr) = rc;
647 return mkr;
648 }
649
9f95a23c 650 RGWPutObjRequest req(get_context(), &ruser, parent->bucket_name(),
7c673cae
FG
651 dir_name, bl);
652
653 /* save attrs */
654 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
655 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
656
657 rc = rgwlib.get_fe()->execute_req(&req);
658 rc2 = req.get_ret();
659 }
660
661 if (! ((rc == 0) &&
662 (rc2 == 0))) {
663 /* op failed */
664 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
665 rgw_fh->mtx.unlock(); /* !LOCKED */
666 unref(rgw_fh);
667 get<0>(mkr) = nullptr;
668 /* fixup rc */
669 if (!rc)
670 rc = rc2;
671 } else {
672 real_time t = real_clock::now();
673 parent->set_mtime(real_clock::to_timespec(t));
674 parent->set_ctime(real_clock::to_timespec(t));
675 rgw_fh->mtx.unlock(); /* !LOCKED */
676 }
677
678 get<1>(mkr) = rc;
679
680 return mkr;
681 } /* RGWLibFS::mkdir */
682
683 MkObjResult RGWLibFS::create(RGWFileHandle* parent, const char *name,
684 struct stat *st, uint32_t mask, uint32_t flags)
685 {
686 int rc, rc2;
687
688 using std::get;
689
690 rgw_file_handle *lfh;
691 rc = rgw_lookup(get_fs(), parent->get_fh(), name, &lfh,
eafe8130 692 nullptr /* st */, 0 /* mask */,
7c673cae
FG
693 RGW_LOOKUP_FLAG_NONE);
694 if (! rc) {
695 /* conflict! */
696 rc = rgw_fh_rele(get_fs(), lfh, RGW_FH_RELE_FLAG_NONE);
697 return MkObjResult{nullptr, -EEXIST};
698 }
699
700 /* expand and check name */
31f18b77 701 std::string obj_name = parent->format_child_name(name, false);
7c673cae
FG
702 rc = valid_fs_object_name(obj_name);
703 if (rc != 0) {
704 return MkObjResult{nullptr, rc};
705 }
706
707 /* create it */
708 buffer::list bl;
9f95a23c
TL
709 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
710 RGWPutObjRequest req(cct, &ruser, parent->bucket_name(), obj_name, bl);
7c673cae
FG
711 MkObjResult mkr{nullptr, -EINVAL};
712
713 rc = rgwlib.get_fe()->execute_req(&req);
714 rc2 = req.get_ret();
715
716 if ((rc == 0) &&
717 (rc2 == 0)) {
718 /* XXX atomicity */
719 LookupFHResult fhr = lookup_fh(parent, name, RGWFileHandle::FLAG_CREATE |
720 RGWFileHandle::FLAG_LOCK);
721 RGWFileHandle* rgw_fh = get<0>(fhr);
722 if (rgw_fh) {
723 if (get<1>(fhr) & RGWFileHandle::FLAG_CREATE) {
724 /* fill in stat data */
725 real_time t = real_clock::now();
726 rgw_fh->create_stat(st, mask);
727 rgw_fh->set_times(t);
728
729 parent->set_mtime(real_clock::to_timespec(t));
730 parent->set_ctime(real_clock::to_timespec(t));
731 }
732 if (st)
494da23a 733 (void) rgw_fh->stat(st, RGWFileHandle::FLAG_LOCKED);
81eedcae
TL
734
735 rgw_fh->set_etag(*(req.get_attr(RGW_ATTR_ETAG)));
736 rgw_fh->set_acls(*(req.get_attr(RGW_ATTR_ACL)));
737
7c673cae
FG
738 get<0>(mkr) = rgw_fh;
739 rgw_fh->mtx.unlock();
740 } else
741 rc = -EIO;
742 }
743
744 get<1>(mkr) = rc;
11fdf7f2
TL
745
746 /* case like : quota exceed will be considered as fail too*/
747 if(rc2 < 0)
748 get<1>(mkr) = rc2;
7c673cae
FG
749
750 return mkr;
751 } /* RGWLibFS::create */
752
11fdf7f2
TL
753 MkObjResult RGWLibFS::symlink(RGWFileHandle* parent, const char *name,
754 const char* link_path, struct stat *st, uint32_t mask, uint32_t flags)
755 {
756 int rc, rc2;
757
758 using std::get;
759
760 rgw_file_handle *lfh;
eafe8130
TL
761 rc = rgw_lookup(get_fs(), parent->get_fh(), name, &lfh,
762 nullptr /* st */, 0 /* mask */,
11fdf7f2
TL
763 RGW_LOOKUP_FLAG_NONE);
764 if (! rc) {
765 /* conflict! */
766 rc = rgw_fh_rele(get_fs(), lfh, RGW_FH_RELE_FLAG_NONE);
767 return MkObjResult{nullptr, -EEXIST};
768 }
769
770 MkObjResult mkr{nullptr, -EINVAL};
771 LookupFHResult fhr;
772 RGWFileHandle* rgw_fh = nullptr;
773 buffer::list ux_key, ux_attrs;
774
775 fhr = lookup_fh(parent, name,
776 RGWFileHandle::FLAG_CREATE|
777 RGWFileHandle::FLAG_SYMBOLIC_LINK|
778 RGWFileHandle::FLAG_LOCK);
779 rgw_fh = get<0>(fhr);
780 if (rgw_fh) {
781 rgw_fh->create_stat(st, mask);
782 rgw_fh->set_times(real_clock::now());
783 /* save attrs */
784 rgw_fh->encode_attrs(ux_key, ux_attrs);
785 if (st)
786 rgw_fh->stat(st);
787 get<0>(mkr) = rgw_fh;
788 } else {
789 get<1>(mkr) = -EIO;
790 return mkr;
791 }
792
793 /* need valid S3 name (characters, length <= 1024, etc) */
794 rc = valid_fs_object_name(name);
795 if (rc != 0) {
796 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
797 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
798 RGWFileHandle::FHCache::FLAG_LOCK);
799 rgw_fh->mtx.unlock();
800 unref(rgw_fh);
801 get<0>(mkr) = nullptr;
802 get<1>(mkr) = rc;
803 return mkr;
804 }
805
806 string obj_name = std::string(name);
807 /* create an object representing the directory */
808 buffer::list bl;
809
810 /* XXXX */
811#if 0
812 bl.push_back(
813 buffer::create_static(len, static_cast<char*>(buffer)));
814#else
815
816 bl.push_back(
817 buffer::copy(link_path, strlen(link_path)));
818#endif
819
9f95a23c
TL
820 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
821 RGWPutObjRequest req(get_context(), &ruser, parent->bucket_name(),
11fdf7f2
TL
822 obj_name, bl);
823
824 /* save attrs */
825 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
826 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
827
828 rc = rgwlib.get_fe()->execute_req(&req);
829 rc2 = req.get_ret();
830 if (! ((rc == 0) &&
831 (rc2 == 0))) {
832 /* op failed */
833 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
834 rgw_fh->mtx.unlock(); /* !LOCKED */
835 unref(rgw_fh);
836 get<0>(mkr) = nullptr;
837 /* fixup rc */
838 if (!rc)
839 rc = rc2;
840 } else {
841 real_time t = real_clock::now();
842 parent->set_mtime(real_clock::to_timespec(t));
843 parent->set_ctime(real_clock::to_timespec(t));
844 rgw_fh->mtx.unlock(); /* !LOCKED */
845 }
846
847 get<1>(mkr) = rc;
848
849 return mkr;
850 } /* RGWLibFS::symlink */
851
7c673cae
FG
852 int RGWLibFS::getattr(RGWFileHandle* rgw_fh, struct stat* st)
853 {
854 switch(rgw_fh->fh.fh_type) {
855 case RGW_FS_TYPE_FILE:
856 {
857 if (rgw_fh->deleted())
858 return -ESTALE;
859 }
860 break;
861 default:
862 break;
863 };
494da23a 864 /* if rgw_fh is a directory, mtime will be advanced */
7c673cae
FG
865 return rgw_fh->stat(st);
866 } /* RGWLibFS::getattr */
867
868 int RGWLibFS::setattr(RGWFileHandle* rgw_fh, struct stat* st, uint32_t mask,
869 uint32_t flags)
870 {
871 int rc, rc2;
872 buffer::list ux_key, ux_attrs;
81eedcae
TL
873 buffer::list etag = rgw_fh->get_etag();
874 buffer::list acls = rgw_fh->get_acls();
7c673cae
FG
875
876 lock_guard guard(rgw_fh->mtx);
877
878 switch(rgw_fh->fh.fh_type) {
879 case RGW_FS_TYPE_FILE:
880 {
881 if (rgw_fh->deleted())
882 return -ESTALE;
883 }
884 break;
885 default:
886 break;
887 };
888
889 string obj_name{rgw_fh->relative_object_name()};
890
31f18b77
FG
891 if (rgw_fh->is_dir() &&
892 (likely(! rgw_fh->is_bucket()))) {
7c673cae
FG
893 obj_name += "/";
894 }
895
9f95a23c
TL
896 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
897 RGWSetAttrsRequest req(cct, &ruser, rgw_fh->bucket_name(), obj_name);
7c673cae
FG
898
899 rgw_fh->create_stat(st, mask);
900 rgw_fh->encode_attrs(ux_key, ux_attrs);
901
902 /* save attrs */
903 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
904 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
81eedcae
TL
905 req.emplace_attr(RGW_ATTR_ETAG, std::move(etag));
906 req.emplace_attr(RGW_ATTR_ACL, std::move(acls));
7c673cae
FG
907
908 rc = rgwlib.get_fe()->execute_req(&req);
909 rc2 = req.get_ret();
910
911 if (rc == -ENOENT) {
912 /* special case: materialize placeholder dir */
913 buffer::list bl;
9f95a23c 914 RGWPutObjRequest req(get_context(), &ruser, rgw_fh->bucket_name(),
7c673cae
FG
915 obj_name, bl);
916
917 rgw_fh->encode_attrs(ux_key, ux_attrs); /* because std::moved */
918
919 /* save attrs */
920 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
921 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
922
923 rc = rgwlib.get_fe()->execute_req(&req);
924 rc2 = req.get_ret();
925 }
926
927 if ((rc != 0) || (rc2 != 0)) {
928 return -EIO;
929 }
930
931 rgw_fh->set_ctime(real_clock::to_timespec(real_clock::now()));
932
933 return 0;
934 } /* RGWLibFS::setattr */
935
224ce89b 936 /* called under rgw_fh->mtx held */
3efd9988 937 void RGWLibFS::update_fh(RGWFileHandle *rgw_fh)
224ce89b
WB
938 {
939 int rc, rc2;
940 string obj_name{rgw_fh->relative_object_name()};
941 buffer::list ux_key, ux_attrs;
942
943 if (rgw_fh->is_dir() &&
944 (likely(! rgw_fh->is_bucket()))) {
945 obj_name += "/";
946 }
947
948 lsubdout(get_context(), rgw, 17)
949 << __func__
3efd9988 950 << " update old versioned fh : " << obj_name
224ce89b
WB
951 << dendl;
952
9f95a23c
TL
953 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), user);
954 RGWSetAttrsRequest req(cct, &ruser, rgw_fh->bucket_name(), obj_name);
224ce89b
WB
955
956 rgw_fh->encode_attrs(ux_key, ux_attrs);
957
224ce89b 958 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
3efd9988 959 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
224ce89b
WB
960
961 rc = rgwlib.get_fe()->execute_req(&req);
962 rc2 = req.get_ret();
963
964 if ((rc != 0) || (rc2 != 0)) {
965 lsubdout(get_context(), rgw, 17)
966 << __func__
3efd9988 967 << " update fh failed : " << obj_name
224ce89b
WB
968 << dendl;
969 }
3efd9988 970 } /* RGWLibFS::update_fh */
224ce89b 971
7c673cae
FG
972 void RGWLibFS::close()
973 {
974 state.flags |= FLAG_CLOSED;
975
976 class ObjUnref
977 {
978 RGWLibFS* fs;
979 public:
11fdf7f2 980 explicit ObjUnref(RGWLibFS* _fs) : fs(_fs) {}
7c673cae
FG
981 void operator()(RGWFileHandle* fh) const {
982 lsubdout(fs->get_context(), rgw, 5)
983 << __func__
984 << fh->name
985 << " before ObjUnref refs=" << fh->get_refcnt()
986 << dendl;
31f18b77 987 fs->unref(fh);
7c673cae
FG
988 }
989 };
990
991 /* force cache drain, forces objects to evict */
992 fh_cache.drain(ObjUnref(this),
993 RGWFileHandle::FHCache::FLAG_LOCK);
994 rgwlib.get_fe()->get_process()->unregister_fs(this);
995 rele();
996 } /* RGWLibFS::close */
997
494da23a
TL
998 inline std::ostream& operator<<(std::ostream &os, fh_key const &fhk) {
999 os << "<fh_key: bucket=";
1000 os << fhk.fh_hk.bucket;
1001 os << "; object=";
1002 os << fhk.fh_hk.object;
1003 os << ">";
1004 return os;
1005 }
1006
7c673cae
FG
1007 inline std::ostream& operator<<(std::ostream &os, struct timespec const &ts) {
1008 os << "<timespec: tv_sec=";
1009 os << ts.tv_sec;
1010 os << "; tv_nsec=";
1011 os << ts.tv_nsec;
1012 os << ">";
1013 return os;
1014 }
1015
1016 std::ostream& operator<<(std::ostream &os, RGWLibFS::event const &ev) {
1017 os << "<event:";
1018 switch (ev.t) {
1019 case RGWLibFS::event::type::READDIR:
1020 os << "type=READDIR;";
1021 break;
1022 default:
1023 os << "type=UNKNOWN;";
1024 break;
1025 };
1026 os << "fid=" << ev.fhk.fh_hk.bucket << ":" << ev.fhk.fh_hk.object
1027 << ";ts=" << ev.ts << ">";
1028 return os;
1029 }
1030
1031 void RGWLibFS::gc()
1032 {
1033 using std::get;
1034 using directory = RGWFileHandle::directory;
1035
1036 /* dirent invalidate timeout--basically, the upper-bound on
1037 * inconsistency with the S3 namespace */
1038 auto expire_s
1039 = get_context()->_conf->rgw_nfs_namespace_expire_secs;
1040
1041 /* max events to gc in one cycle */
c07f9fc5 1042 uint32_t max_ev = get_context()->_conf->rgw_nfs_max_gc;
7c673cae
FG
1043
1044 struct timespec now, expire_ts;
1045 event_vector ve;
1046 bool stop = false;
1047 std::deque<event> &events = state.events;
1048
1049 do {
1050 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now);
1051 lsubdout(get_context(), rgw, 15)
1052 << "GC: top of expire loop"
1053 << " now=" << now
1054 << " expire_s=" << expire_s
1055 << dendl;
1056 {
1057 lock_guard guard(state.mtx); /* LOCKED */
494da23a
TL
1058 lsubdout(get_context(), rgw, 15)
1059 << "GC: processing"
1060 << " count=" << events.size()
1061 << " events"
1062 << dendl;
1063 /* just return if no events */
7c673cae
FG
1064 if (events.empty()) {
1065 return;
1066 }
1067 uint32_t _max_ev =
1068 (events.size() < 500) ? max_ev : (events.size() / 4);
1069 for (uint32_t ix = 0; (ix < _max_ev) && (events.size() > 0); ++ix) {
1070 event& ev = events.front();
1071 expire_ts = ev.ts;
1072 expire_ts.tv_sec += expire_s;
1073 if (expire_ts > now) {
1074 stop = true;
1075 break;
1076 }
1077 ve.push_back(ev);
1078 events.pop_front();
1079 }
1080 } /* anon */
1081 /* !LOCKED */
1082 for (auto& ev : ve) {
1083 lsubdout(get_context(), rgw, 15)
1084 << "try-expire ev: " << ev << dendl;
1085 if (likely(ev.t == event::type::READDIR)) {
1086 RGWFileHandle* rgw_fh = lookup_handle(ev.fhk.fh_hk);
1087 lsubdout(get_context(), rgw, 15)
1088 << "ev rgw_fh: " << rgw_fh << dendl;
1089 if (rgw_fh) {
1090 RGWFileHandle::directory* d;
1091 if (unlikely(! rgw_fh->is_dir())) {
1092 lsubdout(get_context(), rgw, 0)
1093 << __func__
1094 << " BUG non-directory found with READDIR event "
1095 << "(" << rgw_fh->bucket_name() << ","
1096 << rgw_fh->object_name() << ")"
1097 << dendl;
1098 goto rele;
1099 }
1100 /* maybe clear state */
1101 d = get<directory>(&rgw_fh->variant_type);
1102 if (d) {
1103 struct timespec ev_ts = ev.ts;
1104 lock_guard guard(rgw_fh->mtx);
1105 struct timespec d_last_readdir = d->last_readdir;
1106 if (unlikely(ev_ts < d_last_readdir)) {
1107 /* readdir cycle in progress, don't invalidate */
1108 lsubdout(get_context(), rgw, 15)
1109 << "GC: delay expiration for "
1110 << rgw_fh->object_name()
1111 << " ev.ts=" << ev_ts
1112 << " last_readdir=" << d_last_readdir
1113 << dendl;
1114 continue;
1115 } else {
1116 lsubdout(get_context(), rgw, 15)
1117 << "GC: expiring "
1118 << rgw_fh->object_name()
1119 << dendl;
1120 rgw_fh->clear_state();
1121 rgw_fh->invalidate();
1122 }
1123 }
1124 rele:
1125 unref(rgw_fh);
1126 } /* rgw_fh */
1127 } /* event::type::READDIR */
1128 } /* ev */
1129 ve.clear();
1130 } while (! (stop || shutdown));
1131 } /* RGWLibFS::gc */
1132
1133 std::ostream& operator<<(std::ostream &os,
1134 RGWFileHandle const &rgw_fh)
1135 {
1136 const auto& fhk = rgw_fh.get_key();
1137 const auto& fh = const_cast<RGWFileHandle&>(rgw_fh).get_fh();
1138 os << "<RGWFileHandle:";
1139 os << "addr=" << &rgw_fh << ";";
1140 switch (fh->fh_type) {
1141 case RGW_FS_TYPE_DIRECTORY:
1142 os << "type=DIRECTORY;";
1143 break;
1144 case RGW_FS_TYPE_FILE:
1145 os << "type=FILE;";
1146 break;
1147 default:
1148 os << "type=UNKNOWN;";
1149 break;
1150 };
1151 os << "fid=" << fhk.fh_hk.bucket << ":" << fhk.fh_hk.object << ";";
1152 os << "name=" << rgw_fh.object_name() << ";";
1153 os << "refcnt=" << rgw_fh.get_refcnt() << ";";
1154 os << ">";
1155 return os;
1156 }
1157
1158 RGWFileHandle::~RGWFileHandle() {
28e407b8
AA
1159 /* !recycle case, handle may STILL be in handle table, BUT
1160 * the partition lock is not held in this path */
1161 if (fh_hook.is_linked()) {
1162 fs->fh_cache.remove(fh.fh_hk.object, this, FHCache::FLAG_LOCK);
1163 }
7c673cae 1164 /* cond-unref parent */
3efd9988 1165 if (parent && (! parent->is_mount())) {
7c673cae
FG
1166 /* safe because if parent->unref causes its deletion,
1167 * there are a) by refcnt, no other objects/paths pointing
1168 * to it and b) by the semantics of valid iteration of
1169 * fh_lru (observed, e.g., by cohort_lru<T,...>::drain())
1170 * no unsafe iterators reaching it either--n.b., this constraint
1171 * is binding oncode which may in future attempt to e.g.,
1172 * cause the eviction of objects in LRU order */
31f18b77 1173 (void) get_fs()->unref(parent);
7c673cae
FG
1174 }
1175 }
1176
494da23a
TL
1177 fh_key RGWFileHandle::make_fhk(const std::string& name)
1178 {
1179 std::string tenant = get_fs()->get_user()->user_id.to_str();
1180 if (depth == 0) {
1181 /* S3 bucket -- assert mount-at-bucket case reaches here */
1182 return fh_key(name, name, tenant);
1183 } else {
1184 std::string key_name = make_key_name(name.c_str());
1185 return fh_key(fhk.fh_hk.bucket, key_name.c_str(), tenant);
1186 }
1187 }
1188
7c673cae
FG
1189 void RGWFileHandle::encode_attrs(ceph::buffer::list& ux_key1,
1190 ceph::buffer::list& ux_attrs1)
1191 {
11fdf7f2 1192 using ceph::encode;
7c673cae 1193 fh_key fhk(this->fh.fh_hk);
11fdf7f2
TL
1194 encode(fhk, ux_key1);
1195 encode(*this, ux_attrs1);
7c673cae
FG
1196 } /* RGWFileHandle::encode_attrs */
1197
3efd9988
FG
1198 DecodeAttrsResult RGWFileHandle::decode_attrs(const ceph::buffer::list* ux_key1,
1199 const ceph::buffer::list* ux_attrs1)
7c673cae 1200 {
11fdf7f2 1201 using ceph::decode;
3efd9988 1202 DecodeAttrsResult dar { false, false };
7c673cae 1203 fh_key fhk;
11fdf7f2
TL
1204 auto bl_iter_key1 = ux_key1->cbegin();
1205 decode(fhk, bl_iter_key1);
494da23a 1206 get<0>(dar) = true;
7c673cae 1207
11fdf7f2
TL
1208 auto bl_iter_unix1 = ux_attrs1->cbegin();
1209 decode(*this, bl_iter_unix1);
3efd9988
FG
1210 if (this->state.version < 2) {
1211 get<1>(dar) = true;
1212 }
224ce89b 1213
3efd9988 1214 return dar;
7c673cae
FG
1215 } /* RGWFileHandle::decode_attrs */
1216
1217 bool RGWFileHandle::reclaim() {
1218 lsubdout(fs->get_context(), rgw, 17)
1219 << __func__ << " " << *this
1220 << dendl;
b32b8144 1221 /* in the non-delete case, handle may still be in handle table */
7c673cae 1222 if (fh_hook.is_linked()) {
b32b8144
FG
1223 /* in this case, we are being called from a context which holds
1224 * the partition lock */
1225 fs->fh_cache.remove(fh.fh_hk.object, this, FHCache::FLAG_NONE);
7c673cae
FG
1226 }
1227 return true;
1228 } /* RGWFileHandle::reclaim */
1229
1230 bool RGWFileHandle::has_children() const
1231 {
9f95a23c 1232 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), *fs->get_user());
7c673cae
FG
1233 if (unlikely(! is_dir()))
1234 return false;
1235
9f95a23c 1236 RGWRMdirCheck req(fs->get_context(), &ruser, this);
7c673cae
FG
1237 int rc = rgwlib.get_fe()->execute_req(&req);
1238 if (! rc) {
1239 return req.valid && req.has_children;
1240 }
1241
1242 return false;
1243 }
1244
3efd9988
FG
1245 std::ostream& operator<<(std::ostream &os,
1246 RGWFileHandle::readdir_offset const &offset)
1247 {
1248 using boost::get;
1249 if (unlikely(!! get<uint64_t*>(&offset))) {
1250 uint64_t* ioff = get<uint64_t*>(offset);
1251 os << *ioff;
1252 }
1253 else
1254 os << get<const char*>(offset);
1255 return os;
1256 }
1257
1258 int RGWFileHandle::readdir(rgw_readdir_cb rcb, void *cb_arg,
1259 readdir_offset offset,
7c673cae
FG
1260 bool *eof, uint32_t flags)
1261 {
1262 using event = RGWLibFS::event;
3efd9988 1263 using boost::get;
7c673cae
FG
1264 int rc = 0;
1265 struct timespec now;
1266 CephContext* cct = fs->get_context();
9f95a23c 1267 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), *fs->get_user());
7c673cae 1268
494da23a
TL
1269 lsubdout(cct, rgw, 10)
1270 << __func__ << " readdir called on "
1271 << object_name()
1272 << dendl;
1273
7c673cae
FG
1274 directory* d = get<directory>(&variant_type);
1275 if (d) {
1276 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */
1277 lock_guard guard(mtx);
1278 d->last_readdir = now;
1279 }
1280
3efd9988 1281 bool initial_off;
494da23a
TL
1282 char* mk{nullptr};
1283
3efd9988 1284 if (likely(!! get<const char*>(&offset))) {
494da23a
TL
1285 mk = const_cast<char*>(get<const char*>(offset));
1286 initial_off = !mk;
3efd9988
FG
1287 } else {
1288 initial_off = (*get<uint64_t*>(offset) == 0);
1289 }
1290
7c673cae 1291 if (is_root()) {
9f95a23c 1292 RGWListBucketsRequest req(cct, &ruser, this, rcb, cb_arg,
7c673cae
FG
1293 offset);
1294 rc = rgwlib.get_fe()->execute_req(&req);
1295 if (! rc) {
1296 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */
1297 lock_guard guard(mtx);
1298 state.atime = now;
3efd9988 1299 if (initial_off)
7c673cae
FG
1300 set_nlink(2);
1301 inc_nlink(req.d_count);
1302 *eof = req.eof();
7c673cae
FG
1303 }
1304 } else {
9f95a23c 1305 RGWReaddirRequest req(cct, &ruser, this, rcb, cb_arg, offset);
7c673cae
FG
1306 rc = rgwlib.get_fe()->execute_req(&req);
1307 if (! rc) {
1308 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */
1309 lock_guard guard(mtx);
1310 state.atime = now;
3efd9988 1311 if (initial_off)
7c673cae
FG
1312 set_nlink(2);
1313 inc_nlink(req.d_count);
1314 *eof = req.eof();
7c673cae
FG
1315 }
1316 }
1317
494da23a
TL
1318 event ev(event::type::READDIR, get_key(), state.atime);
1319 lock_guard sguard(fs->state.mtx);
1320 fs->state.push_event(ev);
1321
7c673cae
FG
1322 lsubdout(fs->get_context(), rgw, 15)
1323 << __func__
1324 << " final link count=" << state.nlink
1325 << dendl;
1326
1327 return rc;
1328 } /* RGWFileHandle::readdir */
1329
1330 int RGWFileHandle::write(uint64_t off, size_t len, size_t *bytes_written,
1331 void *buffer)
1332 {
1333 using std::get;
1334 using WriteCompletion = RGWLibFS::WriteCompletion;
9f95a23c 1335 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), *fs->get_user());
7c673cae
FG
1336
1337 lock_guard guard(mtx);
1338
1339 int rc = 0;
1340
1341 file* f = get<file>(&variant_type);
1342 if (! f)
1343 return -EISDIR;
1344
1345 if (deleted()) {
1346 lsubdout(fs->get_context(), rgw, 5)
1347 << __func__
1348 << " write attempted on deleted object "
1349 << this->object_name()
1350 << dendl;
1351 /* zap write transaction, if any */
1352 if (f->write_req) {
1353 delete f->write_req;
1354 f->write_req = nullptr;
1355 }
1356 return -ESTALE;
1357 }
1358
1359 if (! f->write_req) {
1360 /* guard--we do not support (e.g., COW-backed) partial writes */
1361 if (off != 0) {
1362 lsubdout(fs->get_context(), rgw, 5)
1363 << __func__
1364 << " " << object_name()
1365 << " non-0 initial write position " << off
11fdf7f2 1366 << " (mounting with -o sync required)"
7c673cae
FG
1367 << dendl;
1368 return -EIO;
1369 }
1370
1371 /* start */
1372 std::string object_name = relative_object_name();
1373 f->write_req =
9f95a23c 1374 new RGWWriteRequest(fs->get_context(), &ruser, this,
7c673cae
FG
1375 bucket_name(), object_name);
1376 rc = rgwlib.get_fe()->start_req(f->write_req);
1377 if (rc < 0) {
1378 lsubdout(fs->get_context(), rgw, 5)
1379 << __func__
1380 << this->object_name()
1381 << " write start failed " << off
1382 << " (" << rc << ")"
1383 << dendl;
1384 /* zap failed write transaction */
1385 delete f->write_req;
1386 f->write_req = nullptr;
1387 return -EIO;
1388 } else {
1389 if (stateless_open()) {
1390 /* start write timer */
1391 f->write_req->timer_id =
1392 RGWLibFS::write_timer.add_event(
1393 std::chrono::seconds(RGWLibFS::write_completion_interval_s),
1394 WriteCompletion(*this));
1395 }
1396 }
1397 }
1398
3efd9988
FG
1399 int overlap = 0;
1400 if ((static_cast<off_t>(off) < f->write_req->real_ofs) &&
1401 ((f->write_req->real_ofs - off) <= len)) {
1402 overlap = f->write_req->real_ofs - off;
1403 off = f->write_req->real_ofs;
1404 buffer = static_cast<char*>(buffer) + overlap;
1405 len -= overlap;
1406 }
1407
7c673cae
FG
1408 buffer::list bl;
1409 /* XXXX */
1410#if 0
1411 bl.push_back(
1412 buffer::create_static(len, static_cast<char*>(buffer)));
1413#else
1414 bl.push_back(
1415 buffer::copy(static_cast<char*>(buffer), len));
1416#endif
1417
1418 f->write_req->put_data(off, bl);
1419 rc = f->write_req->exec_continue();
1420
1421 if (rc == 0) {
1422 size_t min_size = off + len;
1423 if (min_size > get_size())
1424 set_size(min_size);
1425 if (stateless_open()) {
1426 /* bump write timer */
1427 RGWLibFS::write_timer.adjust_event(
1428 f->write_req->timer_id, std::chrono::seconds(10));
1429 }
1430 } else {
1431 /* continuation failed (e.g., non-contiguous write position) */
1432 lsubdout(fs->get_context(), rgw, 5)
1433 << __func__
1434 << object_name()
1435 << " failed write at position " << off
1436 << " (fails write transaction) "
1437 << dendl;
1438 /* zap failed write transaction */
1439 delete f->write_req;
1440 f->write_req = nullptr;
1441 rc = -EIO;
1442 }
1443
3efd9988 1444 *bytes_written = (rc == 0) ? (len + overlap) : 0;
7c673cae
FG
1445 return rc;
1446 } /* RGWFileHandle::write */
1447
1448 int RGWFileHandle::write_finish(uint32_t flags)
1449 {
1450 unique_lock guard{mtx, std::defer_lock};
1451 int rc = 0;
1452
1453 if (! (flags & FLAG_LOCKED)) {
1454 guard.lock();
1455 }
1456
1457 file* f = get<file>(&variant_type);
1458 if (f && (f->write_req)) {
1459 lsubdout(fs->get_context(), rgw, 10)
1460 << __func__
1461 << " finishing write trans on " << object_name()
1462 << dendl;
1463 rc = rgwlib.get_fe()->finish_req(f->write_req);
1464 if (! rc) {
1465 rc = f->write_req->get_ret();
1466 }
1467 delete f->write_req;
1468 f->write_req = nullptr;
1469 }
1470
1471 return rc;
1472 } /* RGWFileHandle::write_finish */
1473
1474 int RGWFileHandle::close()
1475 {
1476 lock_guard guard(mtx);
1477
1478 int rc = write_finish(FLAG_LOCKED);
1479
1480 flags &= ~FLAG_OPEN;
31f18b77
FG
1481 flags &= ~FLAG_STATELESS_OPEN;
1482
7c673cae
FG
1483 return rc;
1484 } /* RGWFileHandle::close */
1485
1486 RGWFileHandle::file::~file()
1487 {
1488 delete write_req;
1489 }
1490
1491 void RGWFileHandle::clear_state()
1492 {
1493 directory* d = get<directory>(&variant_type);
1494 if (d) {
1495 state.nlink = 2;
1496 d->last_marker = rgw_obj_key{};
1497 }
1498 }
1499
494da23a
TL
1500 void RGWFileHandle::advance_mtime(uint32_t flags) {
1501 /* intended for use on directories, fast-forward mtime so as to
1502 * ensure a new, higher value for the change attribute */
1503 unique_lock uniq(mtx, std::defer_lock);
1504 if (likely(! (flags & RGWFileHandle::FLAG_LOCKED))) {
1505 uniq.lock();
1506 }
1507
1508 /* advance mtime only if stored mtime is older than the
1509 * configured namespace expiration */
1510 auto now = real_clock::now();
1511 auto cmptime = state.mtime;
1512 cmptime.tv_sec +=
1513 fs->get_context()->_conf->rgw_nfs_namespace_expire_secs;
1514 if (cmptime < real_clock::to_timespec(now)) {
1515 /* sets ctime as well as mtime, to avoid masking updates should
1516 * ctime inexplicably hold a higher value */
1517 set_times(now);
1518 }
1519 }
1520
7c673cae
FG
1521 void RGWFileHandle::invalidate() {
1522 RGWLibFS *fs = get_fs();
1523 if (fs->invalidate_cb) {
1524 fs->invalidate_cb(fs->invalidate_arg, get_key().fh_hk);
1525 }
1526 }
1527
1528 int RGWWriteRequest::exec_start() {
1529 struct req_state* s = get_state();
1530
224ce89b 1531 auto compression_type =
9f95a23c 1532 get_store()->svc()->zone->get_zone_params().get_compression_type(
224ce89b
WB
1533 s->bucket_info.placement_rule);
1534
7c673cae 1535 /* not obviously supportable */
11fdf7f2
TL
1536 ceph_assert(! dlo_manifest);
1537 ceph_assert(! slo_info);
7c673cae
FG
1538
1539 perfcounter->inc(l_rgw_put);
1540 op_ret = -EINVAL;
11fdf7f2 1541 rgw_obj obj{s->bucket, s->object};
7c673cae
FG
1542
1543 if (s->object.empty()) {
1544 ldout(s->cct, 0) << __func__ << " called on empty object" << dendl;
1545 goto done;
1546 }
1547
1548 op_ret = get_params();
1549 if (op_ret < 0)
1550 goto done;
1551
1552 op_ret = get_system_versioning_params(s, &olh_epoch, &version_id);
1553 if (op_ret < 0) {
1554 goto done;
1555 }
1556
1557 /* user-supplied MD5 check skipped (not supplied) */
1558 /* early quota check skipped--we don't have size yet */
1559 /* skipping user-supplied etag--we might have one in future, but
1560 * like data it and other attrs would arrive after open */
11fdf7f2
TL
1561
1562 aio.emplace(s->cct->_conf->rgw_put_obj_min_window_size);
1563
1564 if (s->bucket_info.versioning_enabled()) {
1565 if (!version_id.empty()) {
1566 obj.key.set_instance(version_id);
1567 } else {
9f95a23c 1568 get_store()->getRados()->gen_rand_obj_instance_name(&obj);
11fdf7f2
TL
1569 version_id = obj.key.instance;
1570 }
1571 }
1572 processor.emplace(&*aio, get_store(), s->bucket_info,
1573 &s->dest_placement,
1574 s->bucket_owner.get_id(),
1575 *static_cast<RGWObjectCtx *>(s->obj_ctx),
9f95a23c 1576 obj, olh_epoch, s->req_id, this, s->yield);
11fdf7f2 1577
9f95a23c 1578 op_ret = processor->prepare(s->yield);
224ce89b
WB
1579 if (op_ret < 0) {
1580 ldout(s->cct, 20) << "processor->prepare() returned ret=" << op_ret
1581 << dendl;
1582 goto done;
1583 }
11fdf7f2 1584 filter = &*processor;
224ce89b
WB
1585 if (compression_type != "none") {
1586 plugin = Compressor::create(s->cct, compression_type);
11fdf7f2
TL
1587 if (! plugin) {
1588 ldout(s->cct, 1) << "Cannot load plugin for rgw_compression_type "
1589 << compression_type << dendl;
1590 } else {
1591 compressor.emplace(s->cct, plugin, filter);
1592 filter = &*compressor;
1593 }
224ce89b 1594 }
7c673cae
FG
1595
1596 done:
1597 return op_ret;
1598 } /* exec_start */
1599
1600 int RGWWriteRequest::exec_continue()
1601 {
1602 struct req_state* s = get_state();
1603 op_ret = 0;
1604
1605 /* check guards (e.g., contig write) */
11fdf7f2
TL
1606 if (eio) {
1607 ldout(s->cct, 5)
1608 << " chunks arrived in wrong order"
1609 << " (mounting with -o sync required)"
1610 << dendl;
1611 return -EIO;
1612 }
1613
9f95a23c 1614 op_ret = get_store()->getRados()->check_quota(s->bucket_owner.get_id(), s->bucket,
11fdf7f2
TL
1615 user_quota, bucket_quota, real_ofs, true);
1616 /* max_size exceed */
1617 if (op_ret < 0)
7c673cae
FG
1618 return -EIO;
1619
1620 size_t len = data.length();
1621 if (! len)
1622 return 0;
1623
11fdf7f2
TL
1624 hash.Update((const unsigned char *)data.c_str(), data.length());
1625 op_ret = filter->process(std::move(data), ofs);
7c673cae 1626 if (op_ret < 0) {
11fdf7f2 1627 goto done;
7c673cae
FG
1628 }
1629 bytes_written += len;
1630
1631 done:
1632 return op_ret;
1633 } /* exec_continue */
1634
1635 int RGWWriteRequest::exec_finish()
1636 {
1637 buffer::list bl, aclbl, ux_key, ux_attrs;
1638 map<string, string>::iterator iter;
1639 char calc_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1];
1640 unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE];
1641 struct req_state* s = get_state();
1642
1643 size_t osize = rgw_fh->get_size();
1644 struct timespec octime = rgw_fh->get_ctime();
1645 struct timespec omtime = rgw_fh->get_mtime();
1646 real_time appx_t = real_clock::now();
1647
3efd9988 1648 s->obj_size = bytes_written;
7c673cae
FG
1649 perfcounter->inc(l_rgw_put_b, s->obj_size);
1650
11fdf7f2
TL
1651 // flush data in filters
1652 op_ret = filter->process({}, s->obj_size);
1653 if (op_ret < 0) {
1654 goto done;
1655 }
1656
9f95a23c 1657 op_ret = get_store()->getRados()->check_quota(s->bucket_owner.get_id(), s->bucket,
11fdf7f2
TL
1658 user_quota, bucket_quota, s->obj_size, true);
1659 /* max_size exceed */
7c673cae
FG
1660 if (op_ret < 0) {
1661 goto done;
1662 }
1663
1664 hash.Final(m);
1665
224ce89b
WB
1666 if (compressor && compressor->is_compressed()) {
1667 bufferlist tmp;
1668 RGWCompressionInfo cs_info;
1669 cs_info.compression_type = plugin->get_type_name();
1670 cs_info.orig_size = s->obj_size;
1671 cs_info.blocks = std::move(compressor->get_compression_blocks());
11fdf7f2 1672 encode(cs_info, tmp);
224ce89b
WB
1673 attrs[RGW_ATTR_COMPRESSION] = tmp;
1674 ldout(s->cct, 20) << "storing " << RGW_ATTR_COMPRESSION
1675 << " with type=" << cs_info.compression_type
1676 << ", orig_size=" << cs_info.orig_size
1677 << ", blocks=" << cs_info.blocks.size() << dendl;
1678 }
1679
7c673cae
FG
1680 buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5);
1681 etag = calc_md5;
1682
1683 bl.append(etag.c_str(), etag.size() + 1);
1684 emplace_attr(RGW_ATTR_ETAG, std::move(bl));
1685
1686 policy.encode(aclbl);
1687 emplace_attr(RGW_ATTR_ACL, std::move(aclbl));
1688
1689 /* unix attrs */
1690 rgw_fh->set_mtime(real_clock::to_timespec(appx_t));
1691 rgw_fh->set_ctime(real_clock::to_timespec(appx_t));
1692 rgw_fh->set_size(bytes_written);
1693 rgw_fh->encode_attrs(ux_key, ux_attrs);
1694
1695 emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
1696 emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
1697
1698 for (iter = s->generic_attrs.begin(); iter != s->generic_attrs.end();
1699 ++iter) {
1700 buffer::list& attrbl = attrs[iter->first];
1701 const string& val = iter->second;
1702 attrbl.append(val.c_str(), val.size() + 1);
1703 }
1704
3efd9988
FG
1705 op_ret = rgw_get_request_metadata(s->cct, s->info, attrs);
1706 if (op_ret < 0) {
1707 goto done;
1708 }
7c673cae
FG
1709 encode_delete_at_attr(delete_at, attrs);
1710
1711 /* Add a custom metadata to expose the information whether an object
1712 * is an SLO or not. Appending the attribute must be performed AFTER
1713 * processing any input from user in order to prohibit overwriting. */
1714 if (unlikely(!! slo_info)) {
1715 buffer::list slo_userindicator_bl;
11fdf7f2
TL
1716 using ceph::encode;
1717 encode("True", slo_userindicator_bl);
7c673cae
FG
1718 emplace_attr(RGW_ATTR_SLO_UINDICATOR, std::move(slo_userindicator_bl));
1719 }
1720
1721 op_ret = processor->complete(s->obj_size, etag, &mtime, real_time(), attrs,
1722 (delete_at ? *delete_at : real_time()),
9f95a23c
TL
1723 if_match, if_nomatch, nullptr, nullptr, nullptr,
1724 s->yield);
7c673cae
FG
1725 if (op_ret != 0) {
1726 /* revert attr updates */
1727 rgw_fh->set_mtime(omtime);
1728 rgw_fh->set_ctime(octime);
1729 rgw_fh->set_size(osize);
1730 }
1731
1732 done:
11fdf7f2 1733 perfcounter->tinc(l_rgw_put_lat, s->time_elapsed());
7c673cae
FG
1734 return op_ret;
1735 } /* exec_finish */
1736
1737} /* namespace rgw */
1738
1739/* librgw */
1740extern "C" {
1741
1742void rgwfile_version(int *major, int *minor, int *extra)
1743{
1744 if (major)
1745 *major = LIBRGW_FILE_VER_MAJOR;
1746 if (minor)
1747 *minor = LIBRGW_FILE_VER_MINOR;
1748 if (extra)
1749 *extra = LIBRGW_FILE_VER_EXTRA;
1750}
1751
1752/*
1753 attach rgw namespace
1754*/
1755 int rgw_mount(librgw_t rgw, const char *uid, const char *acc_key,
1756 const char *sec_key, struct rgw_fs **rgw_fs,
1757 uint32_t flags)
1758{
1759 int rc = 0;
1760
1761 /* stash access data for "mount" */
1762 RGWLibFS* new_fs = new RGWLibFS(static_cast<CephContext*>(rgw), uid, acc_key,
3efd9988 1763 sec_key, "/");
11fdf7f2 1764 ceph_assert(new_fs);
3efd9988
FG
1765
1766 rc = new_fs->authorize(rgwlib.get_store());
1767 if (rc != 0) {
1768 delete new_fs;
1769 return -EINVAL;
1770 }
1771
1772 /* register fs for shared gc */
1773 rgwlib.get_fe()->get_process()->register_fs(new_fs);
1774
1775 struct rgw_fs *fs = new_fs->get_fs();
1776 fs->rgw = rgw;
1777
1778 /* XXX we no longer assume "/" is unique, but we aren't tracking the
1779 * roots atm */
1780
1781 *rgw_fs = fs;
1782
1783 return 0;
1784}
1785
1786int rgw_mount2(librgw_t rgw, const char *uid, const char *acc_key,
1787 const char *sec_key, const char *root, struct rgw_fs **rgw_fs,
1788 uint32_t flags)
1789{
1790 int rc = 0;
1791
1792 /* stash access data for "mount" */
1793 RGWLibFS* new_fs = new RGWLibFS(static_cast<CephContext*>(rgw), uid, acc_key,
1794 sec_key, root);
11fdf7f2 1795 ceph_assert(new_fs);
7c673cae
FG
1796
1797 rc = new_fs->authorize(rgwlib.get_store());
1798 if (rc != 0) {
1799 delete new_fs;
1800 return -EINVAL;
1801 }
1802
1803 /* register fs for shared gc */
1804 rgwlib.get_fe()->get_process()->register_fs(new_fs);
1805
1806 struct rgw_fs *fs = new_fs->get_fs();
1807 fs->rgw = rgw;
1808
1809 /* XXX we no longer assume "/" is unique, but we aren't tracking the
1810 * roots atm */
1811
1812 *rgw_fs = fs;
1813
1814 return 0;
1815}
1816
1817/*
1818 register invalidate callbacks
1819*/
1820int rgw_register_invalidate(struct rgw_fs *rgw_fs, rgw_fh_callback_t cb,
1821 void *arg, uint32_t flags)
1822
1823{
1824 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1825 return fs->register_invalidate(cb, arg, flags);
1826}
1827
1828/*
1829 detach rgw namespace
1830*/
1831int rgw_umount(struct rgw_fs *rgw_fs, uint32_t flags)
1832{
1833 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1834 fs->close();
7c673cae
FG
1835 return 0;
1836}
1837
1838/*
1839 get filesystem attributes
1840*/
1841int rgw_statfs(struct rgw_fs *rgw_fs,
1842 struct rgw_file_handle *parent_fh,
1843 struct rgw_statvfs *vfs_st, uint32_t flags)
1844{
1845 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
28e407b8 1846 struct rados_cluster_stat_t stats;
9f95a23c 1847 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), *fs->get_user());
28e407b8 1848
9f95a23c 1849 RGWGetClusterStatReq req(fs->get_context(), &ruser, stats);
28e407b8
AA
1850 int rc = rgwlib.get_fe()->execute_req(&req);
1851 if (rc < 0) {
1852 lderr(fs->get_context()) << "ERROR: getting total cluster usage"
1853 << cpp_strerror(-rc) << dendl;
1854 return rc;
1855 }
7c673cae 1856
28e407b8
AA
1857 //Set block size to 1M.
1858 constexpr uint32_t CEPH_BLOCK_SHIFT = 20;
1859 vfs_st->f_bsize = 1 << CEPH_BLOCK_SHIFT;
1860 vfs_st->f_frsize = 1 << CEPH_BLOCK_SHIFT;
1861 vfs_st->f_blocks = stats.kb >> (CEPH_BLOCK_SHIFT - 10);
1862 vfs_st->f_bfree = stats.kb_avail >> (CEPH_BLOCK_SHIFT - 10);
1863 vfs_st->f_bavail = stats.kb_avail >> (CEPH_BLOCK_SHIFT - 10);
1864 vfs_st->f_files = stats.num_objects;
1865 vfs_st->f_ffree = -1;
3efd9988
FG
1866 vfs_st->f_fsid[0] = fs->get_fsid();
1867 vfs_st->f_fsid[1] = fs->get_fsid();
7c673cae
FG
1868 vfs_st->f_flag = 0;
1869 vfs_st->f_namemax = 4096;
1870 return 0;
1871}
1872
1873/*
1874 generic create -- create an empty regular file
1875*/
1876int rgw_create(struct rgw_fs *rgw_fs, struct rgw_file_handle *parent_fh,
1877 const char *name, struct stat *st, uint32_t mask,
1878 struct rgw_file_handle **fh, uint32_t posix_flags,
1879 uint32_t flags)
1880{
1881 using std::get;
1882
1883 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1884 RGWFileHandle* parent = get_rgwfh(parent_fh);
1885
1886 if ((! parent) ||
1887 (parent->is_root()) ||
1888 (parent->is_file())) {
1889 /* bad parent */
1890 return -EINVAL;
1891 }
1892
1893 MkObjResult fhr = fs->create(parent, name, st, mask, flags);
1894 RGWFileHandle *nfh = get<0>(fhr); // nullptr if !success
1895
1896 if (nfh)
1897 *fh = nfh->get_fh();
1898
1899 return get<1>(fhr);
1900} /* rgw_create */
1901
11fdf7f2
TL
1902/*
1903 create a symbolic link
1904 */
1905int rgw_symlink(struct rgw_fs *rgw_fs, struct rgw_file_handle *parent_fh,
1906 const char *name, const char *link_path, struct stat *st, uint32_t mask,
1907 struct rgw_file_handle **fh, uint32_t posix_flags,
1908 uint32_t flags)
1909{
1910 using std::get;
1911
1912 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1913 RGWFileHandle* parent = get_rgwfh(parent_fh);
1914
1915 if ((! parent) ||
1916 (parent->is_root()) ||
1917 (parent->is_file())) {
1918 /* bad parent */
1919 return -EINVAL;
1920 }
1921
1922 MkObjResult fhr = fs->symlink(parent, name, link_path, st, mask, flags);
1923 RGWFileHandle *nfh = get<0>(fhr); // nullptr if !success
1924
1925 if (nfh)
1926 *fh = nfh->get_fh();
1927
1928 return get<1>(fhr);
1929} /* rgw_symlink */
1930
7c673cae
FG
1931/*
1932 create a new directory
1933*/
1934int rgw_mkdir(struct rgw_fs *rgw_fs,
1935 struct rgw_file_handle *parent_fh,
1936 const char *name, struct stat *st, uint32_t mask,
1937 struct rgw_file_handle **fh, uint32_t flags)
1938{
1939 using std::get;
1940
1941 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1942 RGWFileHandle* parent = get_rgwfh(parent_fh);
1943
1944 if (! parent) {
1945 /* bad parent */
1946 return -EINVAL;
1947 }
1948
1949 MkObjResult fhr = fs->mkdir(parent, name, st, mask, flags);
1950 RGWFileHandle *nfh = get<0>(fhr); // nullptr if !success
1951
1952 if (nfh)
1953 *fh = nfh->get_fh();
1954
1955 return get<1>(fhr);
1956} /* rgw_mkdir */
1957
1958/*
1959 rename object
1960*/
1961int rgw_rename(struct rgw_fs *rgw_fs,
1962 struct rgw_file_handle *src, const char* src_name,
1963 struct rgw_file_handle *dst, const char* dst_name,
1964 uint32_t flags)
1965{
1966 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1967
1968 RGWFileHandle* src_fh = get_rgwfh(src);
1969 RGWFileHandle* dst_fh = get_rgwfh(dst);
1970
1971 return fs->rename(src_fh, dst_fh, src_name, dst_name);
1972}
1973
1974/*
1975 remove file or directory
1976*/
1977int rgw_unlink(struct rgw_fs *rgw_fs, struct rgw_file_handle *parent_fh,
1978 const char *name, uint32_t flags)
1979{
1980 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1981 RGWFileHandle* parent = get_rgwfh(parent_fh);
1982
1983 return fs->unlink(parent, name);
1984}
1985
1986/*
1987 lookup object by name (POSIX style)
1988*/
1989int rgw_lookup(struct rgw_fs *rgw_fs,
1990 struct rgw_file_handle *parent_fh, const char* path,
eafe8130
TL
1991 struct rgw_file_handle **fh,
1992 struct stat *st, uint32_t mask, uint32_t flags)
7c673cae
FG
1993{
1994 //CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
1995 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1996
1997 RGWFileHandle* parent = get_rgwfh(parent_fh);
1998 if ((! parent) ||
1999 (! parent->is_dir())) {
2000 /* bad parent */
2001 return -EINVAL;
2002 }
2003
2004 RGWFileHandle* rgw_fh;
2005 LookupFHResult fhr;
2006
2007 if (parent->is_root()) {
2008 /* special: parent lookup--note lack of ref()! */
2009 if (unlikely((strcmp(path, "..") == 0) ||
2010 (strcmp(path, "/") == 0))) {
2011 rgw_fh = parent;
2012 } else {
31f18b77
FG
2013 RGWLibFS::BucketStats bstat;
2014 fhr = fs->stat_bucket(parent, path, bstat, RGWFileHandle::FLAG_NONE);
7c673cae
FG
2015 rgw_fh = get<0>(fhr);
2016 if (! rgw_fh)
2017 return -ENOENT;
2018 }
2019 } else {
224ce89b
WB
2020 /* special: after readdir--note extra ref()! */
2021 if (unlikely((strcmp(path, "..") == 0))) {
2022 rgw_fh = parent;
2023 lsubdout(fs->get_context(), rgw, 17)
11fdf7f2 2024 << __func__ << " BANG"<< *rgw_fh
224ce89b
WB
2025 << dendl;
2026 fs->ref(rgw_fh);
2027 } else {
224ce89b
WB
2028 enum rgw_fh_type fh_type = fh_type_of(flags);
2029
2030 uint32_t sl_flags = (flags & RGW_LOOKUP_FLAG_RCB)
2031 ? RGWFileHandle::FLAG_NONE
2032 : RGWFileHandle::FLAG_EXACT_MATCH;
2033
eafe8130
TL
2034 bool fast_attrs= fs->get_context()->_conf->rgw_nfs_s3_fast_attrs;
2035
2036 if ((flags & RGW_LOOKUP_FLAG_RCB) && fast_attrs) {
2037 /* FAKE STAT--this should mean, interpolate special
2038 * owner, group, and perms masks */
2039 fhr = fs->fake_leaf(parent, path, fh_type, st, mask, sl_flags);
2040 } else {
2041 if ((fh_type == RGW_FS_TYPE_DIRECTORY) && fast_attrs) {
2042 /* trust cached dir, if present */
2043 fhr = fs->lookup_fh(parent, path, RGWFileHandle::FLAG_DIRECTORY);
2044 if (get<0>(fhr)) {
2045 rgw_fh = get<0>(fhr);
2046 goto done;
2047 }
2048 }
2049 fhr = fs->stat_leaf(parent, path, fh_type, sl_flags);
2050 }
224ce89b
WB
2051 if (! get<0>(fhr)) {
2052 if (! (flags & RGW_LOOKUP_FLAG_CREATE))
2053 return -ENOENT;
2054 else
2055 fhr = fs->lookup_fh(parent, path, RGWFileHandle::FLAG_CREATE);
2056 }
2057 rgw_fh = get<0>(fhr);
7c673cae 2058 }
7c673cae
FG
2059 } /* !root */
2060
eafe8130 2061done:
7c673cae
FG
2062 struct rgw_file_handle *rfh = rgw_fh->get_fh();
2063 *fh = rfh;
2064
2065 return 0;
2066} /* rgw_lookup */
2067
2068/*
2069 lookup object by handle (NFS style)
2070*/
2071int rgw_lookup_handle(struct rgw_fs *rgw_fs, struct rgw_fh_hk *fh_hk,
2072 struct rgw_file_handle **fh, uint32_t flags)
2073{
2074 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2075
2076 RGWFileHandle* rgw_fh = fs->lookup_handle(*fh_hk);
2077 if (! rgw_fh) {
2078 /* not found */
2079 return -ENOENT;
2080 }
2081
2082 struct rgw_file_handle *rfh = rgw_fh->get_fh();
2083 *fh = rfh;
2084
2085 return 0;
2086}
2087
2088/*
2089 * release file handle
2090 */
2091int rgw_fh_rele(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
2092 uint32_t flags)
2093{
2094 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2095 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2096
2097 lsubdout(fs->get_context(), rgw, 17)
2098 << __func__ << " " << *rgw_fh
2099 << dendl;
2100
2101 fs->unref(rgw_fh);
2102 return 0;
2103}
2104
2105/*
2106 get unix attributes for object
2107*/
2108int rgw_getattr(struct rgw_fs *rgw_fs,
2109 struct rgw_file_handle *fh, struct stat *st, uint32_t flags)
2110{
2111 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2112 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2113
2114 return fs->getattr(rgw_fh, st);
2115}
2116
2117/*
2118 set unix attributes for object
2119*/
2120int rgw_setattr(struct rgw_fs *rgw_fs,
2121 struct rgw_file_handle *fh, struct stat *st,
2122 uint32_t mask, uint32_t flags)
2123{
2124 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2125 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2126
2127 return fs->setattr(rgw_fh, st, mask, flags);
2128}
2129
2130/*
2131 truncate file
2132*/
2133int rgw_truncate(struct rgw_fs *rgw_fs,
2134 struct rgw_file_handle *fh, uint64_t size, uint32_t flags)
2135{
2136 return 0;
2137}
2138
2139/*
2140 open file
2141*/
2142int rgw_open(struct rgw_fs *rgw_fs,
2143 struct rgw_file_handle *fh, uint32_t posix_flags, uint32_t flags)
2144{
2145 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2146
28e407b8 2147 /* XXX
7c673cae
FG
2148 * need to track specific opens--at least read opens and
2149 * a write open; we need to know when a write open is returned,
2150 * that closes a write transaction
2151 *
2152 * for now, we will support single-open only, it's preferable to
2153 * anything we can otherwise do without access to the NFS state
2154 */
2155 if (! rgw_fh->is_file())
2156 return -EISDIR;
2157
2158 return rgw_fh->open(flags);
2159}
2160
2161/*
2162 close file
2163*/
2164int rgw_close(struct rgw_fs *rgw_fs,
2165 struct rgw_file_handle *fh, uint32_t flags)
2166{
2167 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2168 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2169 int rc = rgw_fh->close(/* XXX */);
2170
2171 if (flags & RGW_CLOSE_FLAG_RELE)
2172 fs->unref(rgw_fh);
2173
2174 return rc;
2175}
2176
2177int rgw_readdir(struct rgw_fs *rgw_fs,
2178 struct rgw_file_handle *parent_fh, uint64_t *offset,
2179 rgw_readdir_cb rcb, void *cb_arg, bool *eof,
2180 uint32_t flags)
2181{
2182 RGWFileHandle* parent = get_rgwfh(parent_fh);
2183 if (! parent) {
2184 /* bad parent */
2185 return -EINVAL;
2186 }
3efd9988
FG
2187
2188 lsubdout(parent->get_fs()->get_context(), rgw, 15)
2189 << __func__
2190 << " offset=" << *offset
2191 << dendl;
2192
2193 if ((*offset == 0) &&
2194 (flags & RGW_READDIR_FLAG_DOTDOT)) {
2195 /* send '.' and '..' with their NFS-defined offsets */
eafe8130
TL
2196 rcb(".", cb_arg, 1, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
2197 rcb("..", cb_arg, 2, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
3efd9988
FG
2198 }
2199
7c673cae
FG
2200 int rc = parent->readdir(rcb, cb_arg, offset, eof, flags);
2201 return rc;
3efd9988
FG
2202} /* rgw_readdir */
2203
2204/* enumeration continuing from name */
2205int rgw_readdir2(struct rgw_fs *rgw_fs,
2206 struct rgw_file_handle *parent_fh, const char *name,
2207 rgw_readdir_cb rcb, void *cb_arg, bool *eof,
2208 uint32_t flags)
2209{
2210 RGWFileHandle* parent = get_rgwfh(parent_fh);
2211 if (! parent) {
2212 /* bad parent */
2213 return -EINVAL;
2214 }
2215
2216 lsubdout(parent->get_fs()->get_context(), rgw, 15)
2217 << __func__
94b18763 2218 << " offset=" << ((name) ? name : "(nil)")
3efd9988
FG
2219 << dendl;
2220
2221 if ((! name) &&
2222 (flags & RGW_READDIR_FLAG_DOTDOT)) {
2223 /* send '.' and '..' with their NFS-defined offsets */
eafe8130
TL
2224 rcb(".", cb_arg, 1, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
2225 rcb("..", cb_arg, 2, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
3efd9988
FG
2226 }
2227
2228 int rc = parent->readdir(rcb, cb_arg, name, eof, flags);
2229 return rc;
2230} /* rgw_readdir2 */
7c673cae 2231
c07f9fc5
FG
2232/* project offset of dirent name */
2233int rgw_dirent_offset(struct rgw_fs *rgw_fs,
2234 struct rgw_file_handle *parent_fh,
2235 const char *name, int64_t *offset,
2236 uint32_t flags)
2237{
2238 RGWFileHandle* parent = get_rgwfh(parent_fh);
2239 if ((! parent)) {
2240 /* bad parent */
2241 return -EINVAL;
2242 }
2243 std::string sname{name};
2244 int rc = parent->offset_of(sname, offset, flags);
2245 return rc;
2246}
2247
7c673cae
FG
2248/*
2249 read data from file
2250*/
2251int rgw_read(struct rgw_fs *rgw_fs,
2252 struct rgw_file_handle *fh, uint64_t offset,
2253 size_t length, size_t *bytes_read, void *buffer,
2254 uint32_t flags)
2255{
2256 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2257 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2258
2259 return fs->read(rgw_fh, offset, length, bytes_read, buffer, flags);
2260}
2261
11fdf7f2
TL
2262/*
2263 read symbolic link
2264*/
2265int rgw_readlink(struct rgw_fs *rgw_fs,
2266 struct rgw_file_handle *fh, uint64_t offset,
2267 size_t length, size_t *bytes_read, void *buffer,
2268 uint32_t flags)
2269{
2270 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2271 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2272
2273 return fs->readlink(rgw_fh, offset, length, bytes_read, buffer, flags);
2274}
2275
7c673cae
FG
2276/*
2277 write data to file
2278*/
2279int rgw_write(struct rgw_fs *rgw_fs,
2280 struct rgw_file_handle *fh, uint64_t offset,
2281 size_t length, size_t *bytes_written, void *buffer,
2282 uint32_t flags)
2283{
2284 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2285 int rc;
2286
2287 *bytes_written = 0;
2288
2289 if (! rgw_fh->is_file())
2290 return -EISDIR;
2291
3efd9988
FG
2292 if (! rgw_fh->is_open()) {
2293 if (flags & RGW_OPEN_FLAG_V3) {
2294 rc = rgw_fh->open(flags);
2295 if (!! rc)
2296 return rc;
2297 } else
2298 return -EPERM;
2299 }
7c673cae
FG
2300
2301 rc = rgw_fh->write(offset, length, bytes_written, buffer);
2302
2303 return rc;
2304}
2305
2306/*
2307 read data from file (vector)
2308*/
2309class RGWReadV
2310{
2311 buffer::list bl;
2312 struct rgw_vio* vio;
2313
2314public:
2315 RGWReadV(buffer::list& _bl, rgw_vio* _vio) : vio(_vio) {
2316 bl.claim(_bl);
2317 }
2318
2319 struct rgw_vio* get_vio() { return vio; }
2320
11fdf7f2 2321 const auto& buffers() { return bl.buffers(); }
7c673cae
FG
2322
2323 unsigned /* XXX */ length() { return bl.length(); }
2324
2325};
2326
2327void rgw_readv_rele(struct rgw_uio *uio, uint32_t flags)
2328{
2329 RGWReadV* rdv = static_cast<RGWReadV*>(uio->uio_p1);
2330 rdv->~RGWReadV();
2331 ::operator delete(rdv);
2332}
2333
2334int rgw_readv(struct rgw_fs *rgw_fs,
2335 struct rgw_file_handle *fh, rgw_uio *uio, uint32_t flags)
2336{
2337#if 0 /* XXX */
2338 CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
2339 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2340 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2341
2342 if (! rgw_fh->is_file())
2343 return -EINVAL;
2344
2345 int rc = 0;
2346
2347 buffer::list bl;
2348 RGWGetObjRequest req(cct, fs->get_user(), rgw_fh->bucket_name(),
2349 rgw_fh->object_name(), uio->uio_offset, uio->uio_resid,
2350 bl);
2351 req.do_hexdump = false;
2352
2353 rc = rgwlib.get_fe()->execute_req(&req);
2354
2355 if (! rc) {
2356 RGWReadV* rdv = static_cast<RGWReadV*>(
2357 ::operator new(sizeof(RGWReadV) +
2358 (bl.buffers().size() * sizeof(struct rgw_vio))));
2359
2360 (void) new (rdv)
2361 RGWReadV(bl, reinterpret_cast<rgw_vio*>(rdv+sizeof(RGWReadV)));
2362
2363 uio->uio_p1 = rdv;
2364 uio->uio_cnt = rdv->buffers().size();
2365 uio->uio_resid = rdv->length();
2366 uio->uio_vio = rdv->get_vio();
2367 uio->uio_rele = rgw_readv_rele;
2368
2369 int ix = 0;
2370 auto& buffers = rdv->buffers();
2371 for (auto& bp : buffers) {
2372 rgw_vio *vio = &(uio->uio_vio[ix]);
2373 vio->vio_base = const_cast<char*>(bp.c_str());
2374 vio->vio_len = bp.length();
2375 vio->vio_u1 = nullptr;
2376 vio->vio_p1 = nullptr;
2377 ++ix;
2378 }
2379 }
2380
2381 return rc;
2382#else
2383 return 0;
2384#endif
2385}
2386
2387/*
2388 write data to file (vector)
2389*/
2390int rgw_writev(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
2391 rgw_uio *uio, uint32_t flags)
2392{
2393
2394 return -ENOTSUP;
2395
2396 CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
2397 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2398 RGWFileHandle* rgw_fh = get_rgwfh(fh);
9f95a23c 2399 rgw::sal::RGWRadosUser ruser(rgwlib.get_store(), *fs->get_user());
7c673cae
FG
2400
2401 if (! rgw_fh->is_file())
2402 return -EINVAL;
2403
2404 buffer::list bl;
2405 for (unsigned int ix = 0; ix < uio->uio_cnt; ++ix) {
2406 rgw_vio *vio = &(uio->uio_vio[ix]);
2407 bl.push_back(
2408 buffer::create_static(vio->vio_len,
2409 static_cast<char*>(vio->vio_base)));
2410 }
2411
2412 std::string oname = rgw_fh->relative_object_name();
9f95a23c 2413 RGWPutObjRequest req(cct, &ruser, rgw_fh->bucket_name(),
7c673cae
FG
2414 oname, bl);
2415
2416 int rc = rgwlib.get_fe()->execute_req(&req);
2417
2418 /* XXX update size (in request) */
2419
2420 return rc;
2421}
2422
2423/*
2424 sync written data
2425*/
2426int rgw_fsync(struct rgw_fs *rgw_fs, struct rgw_file_handle *handle,
2427 uint32_t flags)
2428{
2429 return 0;
2430}
2431
2432int rgw_commit(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
2433 uint64_t offset, uint64_t length, uint32_t flags)
2434{
2435 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2436
2437 return rgw_fh->commit(offset, length, RGWFileHandle::FLAG_NONE);
2438}
2439
2440} /* extern "C" */