]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/rgw_file.cc
import ceph 14.2.5
[ceph.git] / ceph / src / rgw / rgw_file.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "include/compat.h"
5 #include "include/rados/rgw_file.h"
6
7 #include <sys/types.h>
8 #include <sys/stat.h>
9
10 #include "rgw_lib.h"
11 #include "rgw_rados.h"
12 #include "rgw_resolve.h"
13 #include "rgw_op.h"
14 #include "rgw_rest.h"
15 #include "rgw_acl.h"
16 #include "rgw_acl_s3.h"
17 #include "rgw_frontend.h"
18 #include "rgw_request.h"
19 #include "rgw_process.h"
20 #include "rgw_rest_user.h"
21 #include "rgw_rest_s3.h"
22 #include "rgw_os_lib.h"
23 #include "rgw_auth_s3.h"
24 #include "rgw_user.h"
25 #include "rgw_bucket.h"
26 #include "rgw_zone.h"
27 #include "rgw_file.h"
28 #include "rgw_lib_frontend.h"
29 #include "rgw_perf_counters.h"
30 #include "common/errno.h"
31
32 #include <atomic>
33
34 #define dout_subsys ceph_subsys_rgw
35
36 using namespace rgw;
37
38 namespace rgw {
39
40 extern RGWLib rgwlib;
41
42 const string RGWFileHandle::root_name = "/";
43
44 std::atomic<uint32_t> RGWLibFS::fs_inst_counter;
45
46 uint32_t RGWLibFS::write_completion_interval_s = 10;
47
48 ceph::timer<ceph::mono_clock> RGWLibFS::write_timer{
49 ceph::construct_suspended};
50
51 inline int valid_fs_bucket_name(const string& name) {
52 int rc = valid_s3_bucket_name(name, false /* relaxed */);
53 if (rc != 0) {
54 if (name.size() > 255)
55 return -ENAMETOOLONG;
56 return -EINVAL;
57 }
58 return 0;
59 }
60
61 inline int valid_fs_object_name(const string& name) {
62 int rc = valid_s3_object_name(name);
63 if (rc != 0) {
64 if (name.size() > 1024)
65 return -ENAMETOOLONG;
66 return -EINVAL;
67 }
68 return 0;
69 }
70
71 LookupFHResult RGWLibFS::stat_bucket(RGWFileHandle* parent, const char *path,
72 RGWLibFS::BucketStats& bs,
73 uint32_t flags)
74 {
75 LookupFHResult fhr{nullptr, 0};
76 std::string bucket_name{path};
77 RGWStatBucketRequest req(cct, get_user(), bucket_name, bs);
78
79 int rc = rgwlib.get_fe()->execute_req(&req);
80 if ((rc == 0) &&
81 (req.get_ret() == 0) &&
82 (req.matched())) {
83 fhr = lookup_fh(parent, path,
84 (flags & RGWFileHandle::FLAG_LOCKED)|
85 RGWFileHandle::FLAG_CREATE|
86 RGWFileHandle::FLAG_BUCKET);
87 if (get<0>(fhr)) {
88 RGWFileHandle* rgw_fh = get<0>(fhr);
89 if (! (flags & RGWFileHandle::FLAG_LOCKED)) {
90 rgw_fh->mtx.lock();
91 }
92 rgw_fh->set_times(req.get_ctime());
93 /* restore attributes */
94 auto ux_key = req.get_attr(RGW_ATTR_UNIX_KEY1);
95 auto ux_attrs = req.get_attr(RGW_ATTR_UNIX1);
96 if (ux_key && ux_attrs) {
97 DecodeAttrsResult dar = rgw_fh->decode_attrs(ux_key, ux_attrs);
98 if (get<0>(dar) || get<1>(dar)) {
99 update_fh(rgw_fh);
100 }
101 }
102 if (! (flags & RGWFileHandle::FLAG_LOCKED)) {
103 rgw_fh->mtx.unlock();
104 }
105 }
106 }
107 return fhr;
108 }
109
110 LookupFHResult RGWLibFS::fake_leaf(RGWFileHandle* parent,
111 const char *path,
112 enum rgw_fh_type type,
113 struct stat *st, uint32_t st_mask,
114 uint32_t flags)
115 {
116 /* synthesize a minimal handle from parent, path, type, and st */
117 using std::get;
118
119 flags |= RGWFileHandle::FLAG_CREATE;
120
121 switch (type) {
122 case RGW_FS_TYPE_DIRECTORY:
123 flags |= RGWFileHandle::FLAG_DIRECTORY;
124 break;
125 default:
126 /* file */
127 break;
128 };
129
130 LookupFHResult fhr = lookup_fh(parent, path, flags);
131 if (get<0>(fhr)) {
132 RGWFileHandle* rgw_fh = get<0>(fhr);
133 if (st) {
134 lock_guard guard(rgw_fh->mtx);
135 if (st_mask & RGW_SETATTR_SIZE) {
136 rgw_fh->set_size(st->st_size);
137 }
138 if (st_mask & RGW_SETATTR_MTIME) {
139 rgw_fh->set_times(st->st_mtim);
140 }
141 } /* st */
142 } /* rgw_fh */
143 return fhr;
144 } /* RGWLibFS::fake_leaf */
145
146 LookupFHResult RGWLibFS::stat_leaf(RGWFileHandle* parent,
147 const char *path,
148 enum rgw_fh_type type,
149 uint32_t flags)
150 {
151 /* find either-of <object_name>, <object_name/>, only one of
152 * which should exist; atomicity? */
153 using std::get;
154
155 LookupFHResult fhr{nullptr, 0};
156
157 /* XXX the need for two round-trip operations to identify file or
158 * directory leaf objects is unecessary--the current proposed
159 * mechanism to avoid this is to store leaf object names with an
160 * object locator w/o trailing slash */
161
162 std::string obj_path = parent->format_child_name(path, false);
163
164 for (auto ix : { 0, 1, 2 }) {
165 switch (ix) {
166 case 0:
167 {
168 /* type hint */
169 if (type == RGW_FS_TYPE_DIRECTORY)
170 continue;
171
172 RGWStatObjRequest req(cct, get_user(),
173 parent->bucket_name(), obj_path,
174 RGWStatObjRequest::FLAG_NONE);
175 int rc = rgwlib.get_fe()->execute_req(&req);
176 if ((rc == 0) &&
177 (req.get_ret() == 0)) {
178 fhr = lookup_fh(parent, path, RGWFileHandle::FLAG_CREATE);
179 if (get<0>(fhr)) {
180 RGWFileHandle* rgw_fh = get<0>(fhr);
181 lock_guard guard(rgw_fh->mtx);
182 rgw_fh->set_size(req.get_size());
183 rgw_fh->set_times(req.get_mtime());
184 /* restore attributes */
185 auto ux_key = req.get_attr(RGW_ATTR_UNIX_KEY1);
186 auto ux_attrs = req.get_attr(RGW_ATTR_UNIX1);
187 rgw_fh->set_etag(*(req.get_attr(RGW_ATTR_ETAG)));
188 rgw_fh->set_acls(*(req.get_attr(RGW_ATTR_ACL)));
189 if (ux_key && ux_attrs) {
190 DecodeAttrsResult dar = rgw_fh->decode_attrs(ux_key, ux_attrs);
191 if (get<0>(dar) || get<1>(dar)) {
192 update_fh(rgw_fh);
193 }
194 }
195 }
196 goto done;
197 }
198 }
199 break;
200 case 1:
201 {
202 /* try dir form */
203 /* type hint */
204 if (type == RGW_FS_TYPE_FILE)
205 continue;
206
207 obj_path += "/";
208 RGWStatObjRequest req(cct, get_user(),
209 parent->bucket_name(), obj_path,
210 RGWStatObjRequest::FLAG_NONE);
211 int rc = rgwlib.get_fe()->execute_req(&req);
212 if ((rc == 0) &&
213 (req.get_ret() == 0)) {
214 fhr = lookup_fh(parent, path, RGWFileHandle::FLAG_DIRECTORY);
215 if (get<0>(fhr)) {
216 RGWFileHandle* rgw_fh = get<0>(fhr);
217 lock_guard guard(rgw_fh->mtx);
218 rgw_fh->set_size(req.get_size());
219 rgw_fh->set_times(req.get_mtime());
220 /* restore attributes */
221 auto ux_key = req.get_attr(RGW_ATTR_UNIX_KEY1);
222 auto ux_attrs = req.get_attr(RGW_ATTR_UNIX1);
223 rgw_fh->set_etag(*(req.get_attr(RGW_ATTR_ETAG)));
224 rgw_fh->set_acls(*(req.get_attr(RGW_ATTR_ACL)));
225 if (ux_key && ux_attrs) {
226 DecodeAttrsResult dar = rgw_fh->decode_attrs(ux_key, ux_attrs);
227 if (get<0>(dar) || get<1>(dar)) {
228 update_fh(rgw_fh);
229 }
230 }
231 }
232 goto done;
233 }
234 }
235 break;
236 case 2:
237 {
238 std::string object_name{path};
239 RGWStatLeafRequest req(cct, get_user(), parent, object_name);
240 int rc = rgwlib.get_fe()->execute_req(&req);
241 if ((rc == 0) &&
242 (req.get_ret() == 0)) {
243 if (req.matched) {
244 /* we need rgw object's key name equal to file name, if
245 * not return NULL */
246 if ((flags & RGWFileHandle::FLAG_EXACT_MATCH) &&
247 !req.exact_matched) {
248 lsubdout(get_context(), rgw, 15)
249 << __func__
250 << ": stat leaf not exact match file name = "
251 << path << dendl;
252 goto done;
253 }
254 fhr = lookup_fh(parent, path,
255 RGWFileHandle::FLAG_CREATE|
256 ((req.is_dir) ?
257 RGWFileHandle::FLAG_DIRECTORY :
258 RGWFileHandle::FLAG_NONE));
259 /* XXX we don't have an object--in general, there need not
260 * be one (just a path segment in some other object). In
261 * actual leaf an object exists, but we'd need another round
262 * trip to get attrs */
263 if (get<0>(fhr)) {
264 /* for now use the parent object's mtime */
265 RGWFileHandle* rgw_fh = get<0>(fhr);
266 lock_guard guard(rgw_fh->mtx);
267 rgw_fh->set_mtime(parent->get_mtime());
268 }
269 }
270 }
271 }
272 break;
273 default:
274 /* not reached */
275 break;
276 }
277 }
278 done:
279 return fhr;
280 } /* RGWLibFS::stat_leaf */
281
282 int RGWLibFS::read(RGWFileHandle* rgw_fh, uint64_t offset, size_t length,
283 size_t* bytes_read, void* buffer, uint32_t flags)
284 {
285 if (! rgw_fh->is_file())
286 return -EINVAL;
287
288 if (rgw_fh->deleted())
289 return -ESTALE;
290
291 RGWReadRequest req(get_context(), get_user(), rgw_fh, offset, length,
292 buffer);
293
294 int rc = rgwlib.get_fe()->execute_req(&req);
295 if ((rc == 0) &&
296 (req.get_ret() == 0)) {
297 lock_guard guard(rgw_fh->mtx);
298 rgw_fh->set_atime(real_clock::to_timespec(real_clock::now()));
299 *bytes_read = req.nread;
300 }
301
302 return rc;
303 }
304
305 int RGWLibFS::readlink(RGWFileHandle* rgw_fh, uint64_t offset, size_t length,
306 size_t* bytes_read, void* buffer, uint32_t flags)
307 {
308 if (! rgw_fh->is_link())
309 return -EINVAL;
310
311 if (rgw_fh->deleted())
312 return -ESTALE;
313
314 RGWReadRequest req(get_context(), get_user(), rgw_fh, offset, length,
315 buffer);
316
317 int rc = rgwlib.get_fe()->execute_req(&req);
318 if ((rc == 0) &&
319 (req.get_ret() == 0)) {
320 lock_guard(rgw_fh->mtx);
321 rgw_fh->set_atime(real_clock::to_timespec(real_clock::now()));
322 *bytes_read = req.nread;
323 }
324
325 return rc;
326 }
327
328 int RGWLibFS::unlink(RGWFileHandle* rgw_fh, const char* name, uint32_t flags)
329 {
330 int rc = 0;
331 BucketStats bs;
332 RGWFileHandle* parent = nullptr;
333 RGWFileHandle* bkt_fh = nullptr;
334
335 if (unlikely(flags & RGWFileHandle::FLAG_UNLINK_THIS)) {
336 /* LOCKED */
337 parent = rgw_fh->get_parent();
338 } else {
339 /* atomicity */
340 parent = rgw_fh;
341 LookupFHResult fhr = lookup_fh(parent, name, RGWFileHandle::FLAG_LOCK);
342 rgw_fh = get<0>(fhr);
343 /* LOCKED */
344 }
345
346 if (parent->is_root()) {
347 /* a bucket may have an object storing Unix attributes, check
348 * for and delete it */
349 LookupFHResult fhr;
350 fhr = stat_bucket(parent, name, bs, (rgw_fh) ?
351 RGWFileHandle::FLAG_LOCKED :
352 RGWFileHandle::FLAG_NONE);
353 bkt_fh = get<0>(fhr);
354 if (unlikely(! bkt_fh)) {
355 /* implies !rgw_fh, so also !LOCKED */
356 return -ENOENT;
357 }
358
359 if (bs.num_entries > 1) {
360 unref(bkt_fh); /* return stat_bucket ref */
361 if (likely(!! rgw_fh)) { /* return lock and ref from
362 * lookup_fh (or caller in the
363 * special case of
364 * RGWFileHandle::FLAG_UNLINK_THIS) */
365 rgw_fh->mtx.unlock();
366 unref(rgw_fh);
367 }
368 return -ENOTEMPTY;
369 } else {
370 /* delete object w/key "<bucket>/" (uxattrs), if any */
371 string oname{"/"};
372 RGWDeleteObjRequest req(cct, get_user(), bkt_fh->bucket_name(), oname);
373 rc = rgwlib.get_fe()->execute_req(&req);
374 /* don't care if ENOENT */
375 unref(bkt_fh);
376 }
377
378 string bname{name};
379 RGWDeleteBucketRequest req(cct, get_user(), bname);
380 rc = rgwlib.get_fe()->execute_req(&req);
381 if (! rc) {
382 rc = req.get_ret();
383 }
384 } else {
385 /*
386 * leaf object
387 */
388 if (! rgw_fh) {
389 /* XXX for now, peform a hard lookup to deduce the type of
390 * object to be deleted ("foo" vs. "foo/")--also, ensures
391 * atomicity at this endpoint */
392 struct rgw_file_handle *fh;
393 rc = rgw_lookup(get_fs(), parent->get_fh(), name, &fh,
394 nullptr /* st */, 0 /* mask */,
395 RGW_LOOKUP_FLAG_NONE);
396 if (!! rc)
397 return rc;
398
399 /* rgw_fh ref+ */
400 rgw_fh = get_rgwfh(fh);
401 rgw_fh->mtx.lock(); /* LOCKED */
402 }
403
404 std::string oname = rgw_fh->relative_object_name();
405 if (rgw_fh->is_dir()) {
406 /* for the duration of our cache timer, trust positive
407 * child cache */
408 if (rgw_fh->has_children()) {
409 rgw_fh->mtx.unlock();
410 unref(rgw_fh);
411 return(-ENOTEMPTY);
412 }
413 oname += "/";
414 }
415 RGWDeleteObjRequest req(cct, get_user(), parent->bucket_name(),
416 oname);
417 rc = rgwlib.get_fe()->execute_req(&req);
418 if (! rc) {
419 rc = req.get_ret();
420 }
421 }
422
423 /* ENOENT when raced with other s3 gateway */
424 if (! rc || rc == -ENOENT) {
425 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
426 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
427 RGWFileHandle::FHCache::FLAG_LOCK);
428 }
429
430 if (! rc) {
431 real_time t = real_clock::now();
432 parent->set_mtime(real_clock::to_timespec(t));
433 parent->set_ctime(real_clock::to_timespec(t));
434 }
435
436 rgw_fh->mtx.unlock();
437 unref(rgw_fh);
438
439 return rc;
440 } /* RGWLibFS::unlink */
441
442 int RGWLibFS::rename(RGWFileHandle* src_fh, RGWFileHandle* dst_fh,
443 const char *_src_name, const char *_dst_name)
444
445 {
446 /* XXX initial implementation: try-copy, and delete if copy
447 * succeeds */
448 int rc = -EINVAL;
449
450 real_time t;
451
452 std::string src_name{_src_name};
453 std::string dst_name{_dst_name};
454
455 /* atomicity */
456 LookupFHResult fhr = lookup_fh(src_fh, _src_name, RGWFileHandle::FLAG_LOCK);
457 RGWFileHandle* rgw_fh = get<0>(fhr);
458
459 /* should not happen */
460 if (! rgw_fh) {
461 ldout(get_context(), 0) << __func__
462 << " BUG no such src renaming path="
463 << src_name
464 << dendl;
465 goto out;
466 }
467
468 /* forbid renaming of directories (unreasonable at scale) */
469 if (rgw_fh->is_dir()) {
470 ldout(get_context(), 12) << __func__
471 << " rejecting attempt to rename directory path="
472 << rgw_fh->full_object_name()
473 << dendl;
474 rc = -EPERM;
475 goto unlock;
476 }
477
478 /* forbid renaming open files (violates intent, for now) */
479 if (rgw_fh->is_open()) {
480 ldout(get_context(), 12) << __func__
481 << " rejecting attempt to rename open file path="
482 << rgw_fh->full_object_name()
483 << dendl;
484 rc = -EPERM;
485 goto unlock;
486 }
487
488 t = real_clock::now();
489
490 for (int ix : {0, 1}) {
491 switch (ix) {
492 case 0:
493 {
494 RGWCopyObjRequest req(cct, get_user(), src_fh, dst_fh, src_name,
495 dst_name);
496 int rc = rgwlib.get_fe()->execute_req(&req);
497 if ((rc != 0) ||
498 ((rc = req.get_ret()) != 0)) {
499 ldout(get_context(), 1)
500 << __func__
501 << " rename step 0 failed src="
502 << src_fh->full_object_name() << " " << src_name
503 << " dst=" << dst_fh->full_object_name()
504 << " " << dst_name
505 << "rc " << rc
506 << dendl;
507 goto unlock;
508 }
509 ldout(get_context(), 12)
510 << __func__
511 << " rename step 0 success src="
512 << src_fh->full_object_name() << " " << src_name
513 << " dst=" << dst_fh->full_object_name()
514 << " " << dst_name
515 << " rc " << rc
516 << dendl;
517 /* update dst change id */
518 dst_fh->set_times(t);
519 }
520 break;
521 case 1:
522 {
523 rc = this->unlink(rgw_fh /* LOCKED */, _src_name,
524 RGWFileHandle::FLAG_UNLINK_THIS);
525 /* !LOCKED, -ref */
526 if (! rc) {
527 ldout(get_context(), 12)
528 << __func__
529 << " rename step 1 success src="
530 << src_fh->full_object_name() << " " << src_name
531 << " dst=" << dst_fh->full_object_name()
532 << " " << dst_name
533 << " rc " << rc
534 << dendl;
535 /* update src change id */
536 src_fh->set_times(t);
537 } else {
538 ldout(get_context(), 1)
539 << __func__
540 << " rename step 1 failed src="
541 << src_fh->full_object_name() << " " << src_name
542 << " dst=" << dst_fh->full_object_name()
543 << " " << dst_name
544 << " rc " << rc
545 << dendl;
546 }
547 }
548 goto out;
549 default:
550 ceph_abort();
551 } /* switch */
552 } /* ix */
553 unlock:
554 rgw_fh->mtx.unlock(); /* !LOCKED */
555 unref(rgw_fh); /* -ref */
556
557 out:
558 return rc;
559 } /* RGWLibFS::rename */
560
561 MkObjResult RGWLibFS::mkdir(RGWFileHandle* parent, const char *name,
562 struct stat *st, uint32_t mask, uint32_t flags)
563 {
564 int rc, rc2;
565 rgw_file_handle *lfh;
566
567 rc = rgw_lookup(get_fs(), parent->get_fh(), name, &lfh,
568 nullptr /* st */, 0 /* mask */,
569 RGW_LOOKUP_FLAG_NONE);
570 if (! rc) {
571 /* conflict! */
572 rc = rgw_fh_rele(get_fs(), lfh, RGW_FH_RELE_FLAG_NONE);
573 return MkObjResult{nullptr, -EEXIST};
574 }
575
576 MkObjResult mkr{nullptr, -EINVAL};
577 LookupFHResult fhr;
578 RGWFileHandle* rgw_fh = nullptr;
579 buffer::list ux_key, ux_attrs;
580
581 fhr = lookup_fh(parent, name,
582 RGWFileHandle::FLAG_CREATE|
583 RGWFileHandle::FLAG_DIRECTORY|
584 RGWFileHandle::FLAG_LOCK);
585 rgw_fh = get<0>(fhr);
586 if (rgw_fh) {
587 rgw_fh->create_stat(st, mask);
588 rgw_fh->set_times(real_clock::now());
589 /* save attrs */
590 rgw_fh->encode_attrs(ux_key, ux_attrs);
591 if (st)
592 rgw_fh->stat(st, RGWFileHandle::FLAG_LOCKED);
593 get<0>(mkr) = rgw_fh;
594 } else {
595 get<1>(mkr) = -EIO;
596 return mkr;
597 }
598
599 if (parent->is_root()) {
600 /* bucket */
601 string bname{name};
602 /* enforce S3 name restrictions */
603 rc = valid_fs_bucket_name(bname);
604 if (rc != 0) {
605 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
606 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
607 RGWFileHandle::FHCache::FLAG_LOCK);
608 rgw_fh->mtx.unlock();
609 unref(rgw_fh);
610 get<0>(mkr) = nullptr;
611 get<1>(mkr) = rc;
612 return mkr;
613 }
614
615 RGWCreateBucketRequest req(get_context(), get_user(), bname);
616
617 /* save attrs */
618 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
619 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
620
621 rc = rgwlib.get_fe()->execute_req(&req);
622 rc2 = req.get_ret();
623 } else {
624 /* create an object representing the directory */
625 buffer::list bl;
626 string dir_name = parent->format_child_name(name, true);
627
628 /* need valid S3 name (characters, length <= 1024, etc) */
629 rc = valid_fs_object_name(dir_name);
630 if (rc != 0) {
631 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
632 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
633 RGWFileHandle::FHCache::FLAG_LOCK);
634 rgw_fh->mtx.unlock();
635 unref(rgw_fh);
636 get<0>(mkr) = nullptr;
637 get<1>(mkr) = rc;
638 return mkr;
639 }
640
641 RGWPutObjRequest req(get_context(), get_user(), parent->bucket_name(),
642 dir_name, bl);
643
644 /* save attrs */
645 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
646 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
647
648 rc = rgwlib.get_fe()->execute_req(&req);
649 rc2 = req.get_ret();
650 }
651
652 if (! ((rc == 0) &&
653 (rc2 == 0))) {
654 /* op failed */
655 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
656 rgw_fh->mtx.unlock(); /* !LOCKED */
657 unref(rgw_fh);
658 get<0>(mkr) = nullptr;
659 /* fixup rc */
660 if (!rc)
661 rc = rc2;
662 } else {
663 real_time t = real_clock::now();
664 parent->set_mtime(real_clock::to_timespec(t));
665 parent->set_ctime(real_clock::to_timespec(t));
666 rgw_fh->mtx.unlock(); /* !LOCKED */
667 }
668
669 get<1>(mkr) = rc;
670
671 return mkr;
672 } /* RGWLibFS::mkdir */
673
674 MkObjResult RGWLibFS::create(RGWFileHandle* parent, const char *name,
675 struct stat *st, uint32_t mask, uint32_t flags)
676 {
677 int rc, rc2;
678
679 using std::get;
680
681 rgw_file_handle *lfh;
682 rc = rgw_lookup(get_fs(), parent->get_fh(), name, &lfh,
683 nullptr /* st */, 0 /* mask */,
684 RGW_LOOKUP_FLAG_NONE);
685 if (! rc) {
686 /* conflict! */
687 rc = rgw_fh_rele(get_fs(), lfh, RGW_FH_RELE_FLAG_NONE);
688 return MkObjResult{nullptr, -EEXIST};
689 }
690
691 /* expand and check name */
692 std::string obj_name = parent->format_child_name(name, false);
693 rc = valid_fs_object_name(obj_name);
694 if (rc != 0) {
695 return MkObjResult{nullptr, rc};
696 }
697
698 /* create it */
699 buffer::list bl;
700 RGWPutObjRequest req(cct, get_user(), parent->bucket_name(), obj_name, bl);
701 MkObjResult mkr{nullptr, -EINVAL};
702
703 rc = rgwlib.get_fe()->execute_req(&req);
704 rc2 = req.get_ret();
705
706 if ((rc == 0) &&
707 (rc2 == 0)) {
708 /* XXX atomicity */
709 LookupFHResult fhr = lookup_fh(parent, name, RGWFileHandle::FLAG_CREATE |
710 RGWFileHandle::FLAG_LOCK);
711 RGWFileHandle* rgw_fh = get<0>(fhr);
712 if (rgw_fh) {
713 if (get<1>(fhr) & RGWFileHandle::FLAG_CREATE) {
714 /* fill in stat data */
715 real_time t = real_clock::now();
716 rgw_fh->create_stat(st, mask);
717 rgw_fh->set_times(t);
718
719 parent->set_mtime(real_clock::to_timespec(t));
720 parent->set_ctime(real_clock::to_timespec(t));
721 }
722 if (st)
723 (void) rgw_fh->stat(st, RGWFileHandle::FLAG_LOCKED);
724
725 rgw_fh->set_etag(*(req.get_attr(RGW_ATTR_ETAG)));
726 rgw_fh->set_acls(*(req.get_attr(RGW_ATTR_ACL)));
727
728 get<0>(mkr) = rgw_fh;
729 rgw_fh->mtx.unlock();
730 } else
731 rc = -EIO;
732 }
733
734 get<1>(mkr) = rc;
735
736 /* case like : quota exceed will be considered as fail too*/
737 if(rc2 < 0)
738 get<1>(mkr) = rc2;
739
740 return mkr;
741 } /* RGWLibFS::create */
742
743 MkObjResult RGWLibFS::symlink(RGWFileHandle* parent, const char *name,
744 const char* link_path, struct stat *st, uint32_t mask, uint32_t flags)
745 {
746 int rc, rc2;
747
748 using std::get;
749
750 rgw_file_handle *lfh;
751 rc = rgw_lookup(get_fs(), parent->get_fh(), name, &lfh,
752 nullptr /* st */, 0 /* mask */,
753 RGW_LOOKUP_FLAG_NONE);
754 if (! rc) {
755 /* conflict! */
756 rc = rgw_fh_rele(get_fs(), lfh, RGW_FH_RELE_FLAG_NONE);
757 return MkObjResult{nullptr, -EEXIST};
758 }
759
760 MkObjResult mkr{nullptr, -EINVAL};
761 LookupFHResult fhr;
762 RGWFileHandle* rgw_fh = nullptr;
763 buffer::list ux_key, ux_attrs;
764
765 fhr = lookup_fh(parent, name,
766 RGWFileHandle::FLAG_CREATE|
767 RGWFileHandle::FLAG_SYMBOLIC_LINK|
768 RGWFileHandle::FLAG_LOCK);
769 rgw_fh = get<0>(fhr);
770 if (rgw_fh) {
771 rgw_fh->create_stat(st, mask);
772 rgw_fh->set_times(real_clock::now());
773 /* save attrs */
774 rgw_fh->encode_attrs(ux_key, ux_attrs);
775 if (st)
776 rgw_fh->stat(st);
777 get<0>(mkr) = rgw_fh;
778 } else {
779 get<1>(mkr) = -EIO;
780 return mkr;
781 }
782
783 /* need valid S3 name (characters, length <= 1024, etc) */
784 rc = valid_fs_object_name(name);
785 if (rc != 0) {
786 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
787 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
788 RGWFileHandle::FHCache::FLAG_LOCK);
789 rgw_fh->mtx.unlock();
790 unref(rgw_fh);
791 get<0>(mkr) = nullptr;
792 get<1>(mkr) = rc;
793 return mkr;
794 }
795
796 string obj_name = std::string(name);
797 /* create an object representing the directory */
798 buffer::list bl;
799
800 /* XXXX */
801 #if 0
802 bl.push_back(
803 buffer::create_static(len, static_cast<char*>(buffer)));
804 #else
805
806 bl.push_back(
807 buffer::copy(link_path, strlen(link_path)));
808 #endif
809
810 RGWPutObjRequest req(get_context(), get_user(), parent->bucket_name(),
811 obj_name, bl);
812
813 /* save attrs */
814 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
815 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
816
817 rc = rgwlib.get_fe()->execute_req(&req);
818 rc2 = req.get_ret();
819 if (! ((rc == 0) &&
820 (rc2 == 0))) {
821 /* op failed */
822 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
823 rgw_fh->mtx.unlock(); /* !LOCKED */
824 unref(rgw_fh);
825 get<0>(mkr) = nullptr;
826 /* fixup rc */
827 if (!rc)
828 rc = rc2;
829 } else {
830 real_time t = real_clock::now();
831 parent->set_mtime(real_clock::to_timespec(t));
832 parent->set_ctime(real_clock::to_timespec(t));
833 rgw_fh->mtx.unlock(); /* !LOCKED */
834 }
835
836 get<1>(mkr) = rc;
837
838 return mkr;
839 } /* RGWLibFS::symlink */
840
841 int RGWLibFS::getattr(RGWFileHandle* rgw_fh, struct stat* st)
842 {
843 switch(rgw_fh->fh.fh_type) {
844 case RGW_FS_TYPE_FILE:
845 {
846 if (rgw_fh->deleted())
847 return -ESTALE;
848 }
849 break;
850 default:
851 break;
852 };
853 /* if rgw_fh is a directory, mtime will be advanced */
854 return rgw_fh->stat(st);
855 } /* RGWLibFS::getattr */
856
857 int RGWLibFS::setattr(RGWFileHandle* rgw_fh, struct stat* st, uint32_t mask,
858 uint32_t flags)
859 {
860 int rc, rc2;
861 buffer::list ux_key, ux_attrs;
862 buffer::list etag = rgw_fh->get_etag();
863 buffer::list acls = rgw_fh->get_acls();
864
865 lock_guard guard(rgw_fh->mtx);
866
867 switch(rgw_fh->fh.fh_type) {
868 case RGW_FS_TYPE_FILE:
869 {
870 if (rgw_fh->deleted())
871 return -ESTALE;
872 }
873 break;
874 default:
875 break;
876 };
877
878 string obj_name{rgw_fh->relative_object_name()};
879
880 if (rgw_fh->is_dir() &&
881 (likely(! rgw_fh->is_bucket()))) {
882 obj_name += "/";
883 }
884
885 RGWSetAttrsRequest req(cct, get_user(), rgw_fh->bucket_name(), obj_name);
886
887 rgw_fh->create_stat(st, mask);
888 rgw_fh->encode_attrs(ux_key, ux_attrs);
889
890 /* save attrs */
891 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
892 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
893 req.emplace_attr(RGW_ATTR_ETAG, std::move(etag));
894 req.emplace_attr(RGW_ATTR_ACL, std::move(acls));
895
896 rc = rgwlib.get_fe()->execute_req(&req);
897 rc2 = req.get_ret();
898
899 if (rc == -ENOENT) {
900 /* special case: materialize placeholder dir */
901 buffer::list bl;
902 RGWPutObjRequest req(get_context(), get_user(), rgw_fh->bucket_name(),
903 obj_name, bl);
904
905 rgw_fh->encode_attrs(ux_key, ux_attrs); /* because std::moved */
906
907 /* save attrs */
908 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
909 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
910
911 rc = rgwlib.get_fe()->execute_req(&req);
912 rc2 = req.get_ret();
913 }
914
915 if ((rc != 0) || (rc2 != 0)) {
916 return -EIO;
917 }
918
919 rgw_fh->set_ctime(real_clock::to_timespec(real_clock::now()));
920
921 return 0;
922 } /* RGWLibFS::setattr */
923
924 /* called under rgw_fh->mtx held */
925 void RGWLibFS::update_fh(RGWFileHandle *rgw_fh)
926 {
927 int rc, rc2;
928 string obj_name{rgw_fh->relative_object_name()};
929 buffer::list ux_key, ux_attrs;
930
931 if (rgw_fh->is_dir() &&
932 (likely(! rgw_fh->is_bucket()))) {
933 obj_name += "/";
934 }
935
936 lsubdout(get_context(), rgw, 17)
937 << __func__
938 << " update old versioned fh : " << obj_name
939 << dendl;
940
941 RGWSetAttrsRequest req(cct, get_user(), rgw_fh->bucket_name(), obj_name);
942
943 rgw_fh->encode_attrs(ux_key, ux_attrs);
944
945 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
946 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
947
948 rc = rgwlib.get_fe()->execute_req(&req);
949 rc2 = req.get_ret();
950
951 if ((rc != 0) || (rc2 != 0)) {
952 lsubdout(get_context(), rgw, 17)
953 << __func__
954 << " update fh failed : " << obj_name
955 << dendl;
956 }
957 } /* RGWLibFS::update_fh */
958
959 void RGWLibFS::close()
960 {
961 state.flags |= FLAG_CLOSED;
962
963 class ObjUnref
964 {
965 RGWLibFS* fs;
966 public:
967 explicit ObjUnref(RGWLibFS* _fs) : fs(_fs) {}
968 void operator()(RGWFileHandle* fh) const {
969 lsubdout(fs->get_context(), rgw, 5)
970 << __func__
971 << fh->name
972 << " before ObjUnref refs=" << fh->get_refcnt()
973 << dendl;
974 fs->unref(fh);
975 }
976 };
977
978 /* force cache drain, forces objects to evict */
979 fh_cache.drain(ObjUnref(this),
980 RGWFileHandle::FHCache::FLAG_LOCK);
981 rgwlib.get_fe()->get_process()->unregister_fs(this);
982 rele();
983 } /* RGWLibFS::close */
984
985 inline std::ostream& operator<<(std::ostream &os, fh_key const &fhk) {
986 os << "<fh_key: bucket=";
987 os << fhk.fh_hk.bucket;
988 os << "; object=";
989 os << fhk.fh_hk.object;
990 os << ">";
991 return os;
992 }
993
994 inline std::ostream& operator<<(std::ostream &os, struct timespec const &ts) {
995 os << "<timespec: tv_sec=";
996 os << ts.tv_sec;
997 os << "; tv_nsec=";
998 os << ts.tv_nsec;
999 os << ">";
1000 return os;
1001 }
1002
1003 std::ostream& operator<<(std::ostream &os, RGWLibFS::event const &ev) {
1004 os << "<event:";
1005 switch (ev.t) {
1006 case RGWLibFS::event::type::READDIR:
1007 os << "type=READDIR;";
1008 break;
1009 default:
1010 os << "type=UNKNOWN;";
1011 break;
1012 };
1013 os << "fid=" << ev.fhk.fh_hk.bucket << ":" << ev.fhk.fh_hk.object
1014 << ";ts=" << ev.ts << ">";
1015 return os;
1016 }
1017
1018 void RGWLibFS::gc()
1019 {
1020 using std::get;
1021 using directory = RGWFileHandle::directory;
1022
1023 /* dirent invalidate timeout--basically, the upper-bound on
1024 * inconsistency with the S3 namespace */
1025 auto expire_s
1026 = get_context()->_conf->rgw_nfs_namespace_expire_secs;
1027
1028 /* max events to gc in one cycle */
1029 uint32_t max_ev = get_context()->_conf->rgw_nfs_max_gc;
1030
1031 struct timespec now, expire_ts;
1032 event_vector ve;
1033 bool stop = false;
1034 std::deque<event> &events = state.events;
1035
1036 do {
1037 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now);
1038 lsubdout(get_context(), rgw, 15)
1039 << "GC: top of expire loop"
1040 << " now=" << now
1041 << " expire_s=" << expire_s
1042 << dendl;
1043 {
1044 lock_guard guard(state.mtx); /* LOCKED */
1045 lsubdout(get_context(), rgw, 15)
1046 << "GC: processing"
1047 << " count=" << events.size()
1048 << " events"
1049 << dendl;
1050 /* just return if no events */
1051 if (events.empty()) {
1052 return;
1053 }
1054 uint32_t _max_ev =
1055 (events.size() < 500) ? max_ev : (events.size() / 4);
1056 for (uint32_t ix = 0; (ix < _max_ev) && (events.size() > 0); ++ix) {
1057 event& ev = events.front();
1058 expire_ts = ev.ts;
1059 expire_ts.tv_sec += expire_s;
1060 if (expire_ts > now) {
1061 stop = true;
1062 break;
1063 }
1064 ve.push_back(ev);
1065 events.pop_front();
1066 }
1067 } /* anon */
1068 /* !LOCKED */
1069 for (auto& ev : ve) {
1070 lsubdout(get_context(), rgw, 15)
1071 << "try-expire ev: " << ev << dendl;
1072 if (likely(ev.t == event::type::READDIR)) {
1073 RGWFileHandle* rgw_fh = lookup_handle(ev.fhk.fh_hk);
1074 lsubdout(get_context(), rgw, 15)
1075 << "ev rgw_fh: " << rgw_fh << dendl;
1076 if (rgw_fh) {
1077 RGWFileHandle::directory* d;
1078 if (unlikely(! rgw_fh->is_dir())) {
1079 lsubdout(get_context(), rgw, 0)
1080 << __func__
1081 << " BUG non-directory found with READDIR event "
1082 << "(" << rgw_fh->bucket_name() << ","
1083 << rgw_fh->object_name() << ")"
1084 << dendl;
1085 goto rele;
1086 }
1087 /* maybe clear state */
1088 d = get<directory>(&rgw_fh->variant_type);
1089 if (d) {
1090 struct timespec ev_ts = ev.ts;
1091 lock_guard guard(rgw_fh->mtx);
1092 struct timespec d_last_readdir = d->last_readdir;
1093 if (unlikely(ev_ts < d_last_readdir)) {
1094 /* readdir cycle in progress, don't invalidate */
1095 lsubdout(get_context(), rgw, 15)
1096 << "GC: delay expiration for "
1097 << rgw_fh->object_name()
1098 << " ev.ts=" << ev_ts
1099 << " last_readdir=" << d_last_readdir
1100 << dendl;
1101 continue;
1102 } else {
1103 lsubdout(get_context(), rgw, 15)
1104 << "GC: expiring "
1105 << rgw_fh->object_name()
1106 << dendl;
1107 rgw_fh->clear_state();
1108 rgw_fh->invalidate();
1109 }
1110 }
1111 rele:
1112 unref(rgw_fh);
1113 } /* rgw_fh */
1114 } /* event::type::READDIR */
1115 } /* ev */
1116 ve.clear();
1117 } while (! (stop || shutdown));
1118 } /* RGWLibFS::gc */
1119
1120 std::ostream& operator<<(std::ostream &os,
1121 RGWFileHandle const &rgw_fh)
1122 {
1123 const auto& fhk = rgw_fh.get_key();
1124 const auto& fh = const_cast<RGWFileHandle&>(rgw_fh).get_fh();
1125 os << "<RGWFileHandle:";
1126 os << "addr=" << &rgw_fh << ";";
1127 switch (fh->fh_type) {
1128 case RGW_FS_TYPE_DIRECTORY:
1129 os << "type=DIRECTORY;";
1130 break;
1131 case RGW_FS_TYPE_FILE:
1132 os << "type=FILE;";
1133 break;
1134 default:
1135 os << "type=UNKNOWN;";
1136 break;
1137 };
1138 os << "fid=" << fhk.fh_hk.bucket << ":" << fhk.fh_hk.object << ";";
1139 os << "name=" << rgw_fh.object_name() << ";";
1140 os << "refcnt=" << rgw_fh.get_refcnt() << ";";
1141 os << ">";
1142 return os;
1143 }
1144
1145 RGWFileHandle::~RGWFileHandle() {
1146 /* !recycle case, handle may STILL be in handle table, BUT
1147 * the partition lock is not held in this path */
1148 if (fh_hook.is_linked()) {
1149 fs->fh_cache.remove(fh.fh_hk.object, this, FHCache::FLAG_LOCK);
1150 }
1151 /* cond-unref parent */
1152 if (parent && (! parent->is_mount())) {
1153 /* safe because if parent->unref causes its deletion,
1154 * there are a) by refcnt, no other objects/paths pointing
1155 * to it and b) by the semantics of valid iteration of
1156 * fh_lru (observed, e.g., by cohort_lru<T,...>::drain())
1157 * no unsafe iterators reaching it either--n.b., this constraint
1158 * is binding oncode which may in future attempt to e.g.,
1159 * cause the eviction of objects in LRU order */
1160 (void) get_fs()->unref(parent);
1161 }
1162 }
1163
1164 fh_key RGWFileHandle::make_fhk(const std::string& name)
1165 {
1166 std::string tenant = get_fs()->get_user()->user_id.to_str();
1167 if (depth == 0) {
1168 /* S3 bucket -- assert mount-at-bucket case reaches here */
1169 return fh_key(name, name, tenant);
1170 } else {
1171 std::string key_name = make_key_name(name.c_str());
1172 return fh_key(fhk.fh_hk.bucket, key_name.c_str(), tenant);
1173 }
1174 }
1175
1176 void RGWFileHandle::encode_attrs(ceph::buffer::list& ux_key1,
1177 ceph::buffer::list& ux_attrs1)
1178 {
1179 using ceph::encode;
1180 fh_key fhk(this->fh.fh_hk);
1181 encode(fhk, ux_key1);
1182 encode(*this, ux_attrs1);
1183 } /* RGWFileHandle::encode_attrs */
1184
1185 DecodeAttrsResult RGWFileHandle::decode_attrs(const ceph::buffer::list* ux_key1,
1186 const ceph::buffer::list* ux_attrs1)
1187 {
1188 using ceph::decode;
1189 DecodeAttrsResult dar { false, false };
1190 fh_key fhk;
1191 auto bl_iter_key1 = ux_key1->cbegin();
1192 decode(fhk, bl_iter_key1);
1193 get<0>(dar) = true;
1194
1195 auto bl_iter_unix1 = ux_attrs1->cbegin();
1196 decode(*this, bl_iter_unix1);
1197 if (this->state.version < 2) {
1198 get<1>(dar) = true;
1199 }
1200
1201 return dar;
1202 } /* RGWFileHandle::decode_attrs */
1203
1204 bool RGWFileHandle::reclaim() {
1205 lsubdout(fs->get_context(), rgw, 17)
1206 << __func__ << " " << *this
1207 << dendl;
1208 /* in the non-delete case, handle may still be in handle table */
1209 if (fh_hook.is_linked()) {
1210 /* in this case, we are being called from a context which holds
1211 * the partition lock */
1212 fs->fh_cache.remove(fh.fh_hk.object, this, FHCache::FLAG_NONE);
1213 }
1214 return true;
1215 } /* RGWFileHandle::reclaim */
1216
1217 bool RGWFileHandle::has_children() const
1218 {
1219 if (unlikely(! is_dir()))
1220 return false;
1221
1222 RGWRMdirCheck req(fs->get_context(), fs->get_user(), this);
1223 int rc = rgwlib.get_fe()->execute_req(&req);
1224 if (! rc) {
1225 return req.valid && req.has_children;
1226 }
1227
1228 return false;
1229 }
1230
1231 std::ostream& operator<<(std::ostream &os,
1232 RGWFileHandle::readdir_offset const &offset)
1233 {
1234 using boost::get;
1235 if (unlikely(!! get<uint64_t*>(&offset))) {
1236 uint64_t* ioff = get<uint64_t*>(offset);
1237 os << *ioff;
1238 }
1239 else
1240 os << get<const char*>(offset);
1241 return os;
1242 }
1243
1244 int RGWFileHandle::readdir(rgw_readdir_cb rcb, void *cb_arg,
1245 readdir_offset offset,
1246 bool *eof, uint32_t flags)
1247 {
1248 using event = RGWLibFS::event;
1249 using boost::get;
1250 int rc = 0;
1251 struct timespec now;
1252 CephContext* cct = fs->get_context();
1253
1254 lsubdout(cct, rgw, 10)
1255 << __func__ << " readdir called on "
1256 << object_name()
1257 << dendl;
1258
1259 directory* d = get<directory>(&variant_type);
1260 if (d) {
1261 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */
1262 lock_guard guard(mtx);
1263 d->last_readdir = now;
1264 }
1265
1266 bool initial_off;
1267 char* mk{nullptr};
1268
1269 if (likely(!! get<const char*>(&offset))) {
1270 mk = const_cast<char*>(get<const char*>(offset));
1271 initial_off = !mk;
1272 } else {
1273 initial_off = (*get<uint64_t*>(offset) == 0);
1274 }
1275
1276 if (is_root()) {
1277 RGWListBucketsRequest req(cct, fs->get_user(), this, rcb, cb_arg,
1278 offset);
1279 rc = rgwlib.get_fe()->execute_req(&req);
1280 if (! rc) {
1281 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */
1282 lock_guard guard(mtx);
1283 state.atime = now;
1284 if (initial_off)
1285 set_nlink(2);
1286 inc_nlink(req.d_count);
1287 *eof = req.eof();
1288 }
1289 } else {
1290 RGWReaddirRequest req(cct, fs->get_user(), this, rcb, cb_arg, offset);
1291 rc = rgwlib.get_fe()->execute_req(&req);
1292 if (! rc) {
1293 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */
1294 lock_guard guard(mtx);
1295 state.atime = now;
1296 if (initial_off)
1297 set_nlink(2);
1298 inc_nlink(req.d_count);
1299 *eof = req.eof();
1300 }
1301 }
1302
1303 event ev(event::type::READDIR, get_key(), state.atime);
1304 lock_guard sguard(fs->state.mtx);
1305 fs->state.push_event(ev);
1306
1307 lsubdout(fs->get_context(), rgw, 15)
1308 << __func__
1309 << " final link count=" << state.nlink
1310 << dendl;
1311
1312 return rc;
1313 } /* RGWFileHandle::readdir */
1314
1315 int RGWFileHandle::write(uint64_t off, size_t len, size_t *bytes_written,
1316 void *buffer)
1317 {
1318 using std::get;
1319 using WriteCompletion = RGWLibFS::WriteCompletion;
1320
1321 lock_guard guard(mtx);
1322
1323 int rc = 0;
1324
1325 file* f = get<file>(&variant_type);
1326 if (! f)
1327 return -EISDIR;
1328
1329 if (deleted()) {
1330 lsubdout(fs->get_context(), rgw, 5)
1331 << __func__
1332 << " write attempted on deleted object "
1333 << this->object_name()
1334 << dendl;
1335 /* zap write transaction, if any */
1336 if (f->write_req) {
1337 delete f->write_req;
1338 f->write_req = nullptr;
1339 }
1340 return -ESTALE;
1341 }
1342
1343 if (! f->write_req) {
1344 /* guard--we do not support (e.g., COW-backed) partial writes */
1345 if (off != 0) {
1346 lsubdout(fs->get_context(), rgw, 5)
1347 << __func__
1348 << " " << object_name()
1349 << " non-0 initial write position " << off
1350 << " (mounting with -o sync required)"
1351 << dendl;
1352 return -EIO;
1353 }
1354
1355 /* start */
1356 std::string object_name = relative_object_name();
1357 f->write_req =
1358 new RGWWriteRequest(fs->get_context(), fs->get_user(), this,
1359 bucket_name(), object_name);
1360 rc = rgwlib.get_fe()->start_req(f->write_req);
1361 if (rc < 0) {
1362 lsubdout(fs->get_context(), rgw, 5)
1363 << __func__
1364 << this->object_name()
1365 << " write start failed " << off
1366 << " (" << rc << ")"
1367 << dendl;
1368 /* zap failed write transaction */
1369 delete f->write_req;
1370 f->write_req = nullptr;
1371 return -EIO;
1372 } else {
1373 if (stateless_open()) {
1374 /* start write timer */
1375 f->write_req->timer_id =
1376 RGWLibFS::write_timer.add_event(
1377 std::chrono::seconds(RGWLibFS::write_completion_interval_s),
1378 WriteCompletion(*this));
1379 }
1380 }
1381 }
1382
1383 int overlap = 0;
1384 if ((static_cast<off_t>(off) < f->write_req->real_ofs) &&
1385 ((f->write_req->real_ofs - off) <= len)) {
1386 overlap = f->write_req->real_ofs - off;
1387 off = f->write_req->real_ofs;
1388 buffer = static_cast<char*>(buffer) + overlap;
1389 len -= overlap;
1390 }
1391
1392 buffer::list bl;
1393 /* XXXX */
1394 #if 0
1395 bl.push_back(
1396 buffer::create_static(len, static_cast<char*>(buffer)));
1397 #else
1398 bl.push_back(
1399 buffer::copy(static_cast<char*>(buffer), len));
1400 #endif
1401
1402 f->write_req->put_data(off, bl);
1403 rc = f->write_req->exec_continue();
1404
1405 if (rc == 0) {
1406 size_t min_size = off + len;
1407 if (min_size > get_size())
1408 set_size(min_size);
1409 if (stateless_open()) {
1410 /* bump write timer */
1411 RGWLibFS::write_timer.adjust_event(
1412 f->write_req->timer_id, std::chrono::seconds(10));
1413 }
1414 } else {
1415 /* continuation failed (e.g., non-contiguous write position) */
1416 lsubdout(fs->get_context(), rgw, 5)
1417 << __func__
1418 << object_name()
1419 << " failed write at position " << off
1420 << " (fails write transaction) "
1421 << dendl;
1422 /* zap failed write transaction */
1423 delete f->write_req;
1424 f->write_req = nullptr;
1425 rc = -EIO;
1426 }
1427
1428 *bytes_written = (rc == 0) ? (len + overlap) : 0;
1429 return rc;
1430 } /* RGWFileHandle::write */
1431
1432 int RGWFileHandle::write_finish(uint32_t flags)
1433 {
1434 unique_lock guard{mtx, std::defer_lock};
1435 int rc = 0;
1436
1437 if (! (flags & FLAG_LOCKED)) {
1438 guard.lock();
1439 }
1440
1441 file* f = get<file>(&variant_type);
1442 if (f && (f->write_req)) {
1443 lsubdout(fs->get_context(), rgw, 10)
1444 << __func__
1445 << " finishing write trans on " << object_name()
1446 << dendl;
1447 rc = rgwlib.get_fe()->finish_req(f->write_req);
1448 if (! rc) {
1449 rc = f->write_req->get_ret();
1450 }
1451 delete f->write_req;
1452 f->write_req = nullptr;
1453 }
1454
1455 return rc;
1456 } /* RGWFileHandle::write_finish */
1457
1458 int RGWFileHandle::close()
1459 {
1460 lock_guard guard(mtx);
1461
1462 int rc = write_finish(FLAG_LOCKED);
1463
1464 flags &= ~FLAG_OPEN;
1465 flags &= ~FLAG_STATELESS_OPEN;
1466
1467 return rc;
1468 } /* RGWFileHandle::close */
1469
1470 RGWFileHandle::file::~file()
1471 {
1472 delete write_req;
1473 }
1474
1475 void RGWFileHandle::clear_state()
1476 {
1477 directory* d = get<directory>(&variant_type);
1478 if (d) {
1479 state.nlink = 2;
1480 d->last_marker = rgw_obj_key{};
1481 }
1482 }
1483
1484 void RGWFileHandle::advance_mtime(uint32_t flags) {
1485 /* intended for use on directories, fast-forward mtime so as to
1486 * ensure a new, higher value for the change attribute */
1487 unique_lock uniq(mtx, std::defer_lock);
1488 if (likely(! (flags & RGWFileHandle::FLAG_LOCKED))) {
1489 uniq.lock();
1490 }
1491
1492 /* advance mtime only if stored mtime is older than the
1493 * configured namespace expiration */
1494 auto now = real_clock::now();
1495 auto cmptime = state.mtime;
1496 cmptime.tv_sec +=
1497 fs->get_context()->_conf->rgw_nfs_namespace_expire_secs;
1498 if (cmptime < real_clock::to_timespec(now)) {
1499 /* sets ctime as well as mtime, to avoid masking updates should
1500 * ctime inexplicably hold a higher value */
1501 set_times(now);
1502 }
1503 }
1504
1505 void RGWFileHandle::invalidate() {
1506 RGWLibFS *fs = get_fs();
1507 if (fs->invalidate_cb) {
1508 fs->invalidate_cb(fs->invalidate_arg, get_key().fh_hk);
1509 }
1510 }
1511
1512 int RGWWriteRequest::exec_start() {
1513 struct req_state* s = get_state();
1514
1515 auto compression_type =
1516 get_store()->svc.zone->get_zone_params().get_compression_type(
1517 s->bucket_info.placement_rule);
1518
1519 /* not obviously supportable */
1520 ceph_assert(! dlo_manifest);
1521 ceph_assert(! slo_info);
1522
1523 perfcounter->inc(l_rgw_put);
1524 op_ret = -EINVAL;
1525 rgw_obj obj{s->bucket, s->object};
1526
1527 if (s->object.empty()) {
1528 ldout(s->cct, 0) << __func__ << " called on empty object" << dendl;
1529 goto done;
1530 }
1531
1532 op_ret = get_params();
1533 if (op_ret < 0)
1534 goto done;
1535
1536 op_ret = get_system_versioning_params(s, &olh_epoch, &version_id);
1537 if (op_ret < 0) {
1538 goto done;
1539 }
1540
1541 /* user-supplied MD5 check skipped (not supplied) */
1542 /* early quota check skipped--we don't have size yet */
1543 /* skipping user-supplied etag--we might have one in future, but
1544 * like data it and other attrs would arrive after open */
1545
1546 aio.emplace(s->cct->_conf->rgw_put_obj_min_window_size);
1547
1548 if (s->bucket_info.versioning_enabled()) {
1549 if (!version_id.empty()) {
1550 obj.key.set_instance(version_id);
1551 } else {
1552 get_store()->gen_rand_obj_instance_name(&obj);
1553 version_id = obj.key.instance;
1554 }
1555 }
1556 processor.emplace(&*aio, get_store(), s->bucket_info,
1557 &s->dest_placement,
1558 s->bucket_owner.get_id(),
1559 *static_cast<RGWObjectCtx *>(s->obj_ctx),
1560 obj, olh_epoch, s->req_id);
1561
1562 op_ret = processor->prepare();
1563 if (op_ret < 0) {
1564 ldout(s->cct, 20) << "processor->prepare() returned ret=" << op_ret
1565 << dendl;
1566 goto done;
1567 }
1568 filter = &*processor;
1569 if (compression_type != "none") {
1570 plugin = Compressor::create(s->cct, compression_type);
1571 if (! plugin) {
1572 ldout(s->cct, 1) << "Cannot load plugin for rgw_compression_type "
1573 << compression_type << dendl;
1574 } else {
1575 compressor.emplace(s->cct, plugin, filter);
1576 filter = &*compressor;
1577 }
1578 }
1579
1580 done:
1581 return op_ret;
1582 } /* exec_start */
1583
1584 int RGWWriteRequest::exec_continue()
1585 {
1586 struct req_state* s = get_state();
1587 op_ret = 0;
1588
1589 /* check guards (e.g., contig write) */
1590 if (eio) {
1591 ldout(s->cct, 5)
1592 << " chunks arrived in wrong order"
1593 << " (mounting with -o sync required)"
1594 << dendl;
1595 return -EIO;
1596 }
1597
1598 op_ret = get_store()->check_quota(s->bucket_owner.get_id(), s->bucket,
1599 user_quota, bucket_quota, real_ofs, true);
1600 /* max_size exceed */
1601 if (op_ret < 0)
1602 return -EIO;
1603
1604 size_t len = data.length();
1605 if (! len)
1606 return 0;
1607
1608 hash.Update((const unsigned char *)data.c_str(), data.length());
1609 op_ret = filter->process(std::move(data), ofs);
1610 if (op_ret < 0) {
1611 goto done;
1612 }
1613 bytes_written += len;
1614
1615 done:
1616 return op_ret;
1617 } /* exec_continue */
1618
1619 int RGWWriteRequest::exec_finish()
1620 {
1621 buffer::list bl, aclbl, ux_key, ux_attrs;
1622 map<string, string>::iterator iter;
1623 char calc_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1];
1624 unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE];
1625 struct req_state* s = get_state();
1626
1627 size_t osize = rgw_fh->get_size();
1628 struct timespec octime = rgw_fh->get_ctime();
1629 struct timespec omtime = rgw_fh->get_mtime();
1630 real_time appx_t = real_clock::now();
1631
1632 s->obj_size = bytes_written;
1633 perfcounter->inc(l_rgw_put_b, s->obj_size);
1634
1635 // flush data in filters
1636 op_ret = filter->process({}, s->obj_size);
1637 if (op_ret < 0) {
1638 goto done;
1639 }
1640
1641 op_ret = get_store()->check_quota(s->bucket_owner.get_id(), s->bucket,
1642 user_quota, bucket_quota, s->obj_size, true);
1643 /* max_size exceed */
1644 if (op_ret < 0) {
1645 goto done;
1646 }
1647
1648 op_ret = get_store()->check_bucket_shards(s->bucket_info, s->bucket,
1649 bucket_quota);
1650 if (op_ret < 0) {
1651 goto done;
1652 }
1653
1654 hash.Final(m);
1655
1656 if (compressor && compressor->is_compressed()) {
1657 bufferlist tmp;
1658 RGWCompressionInfo cs_info;
1659 cs_info.compression_type = plugin->get_type_name();
1660 cs_info.orig_size = s->obj_size;
1661 cs_info.blocks = std::move(compressor->get_compression_blocks());
1662 encode(cs_info, tmp);
1663 attrs[RGW_ATTR_COMPRESSION] = tmp;
1664 ldout(s->cct, 20) << "storing " << RGW_ATTR_COMPRESSION
1665 << " with type=" << cs_info.compression_type
1666 << ", orig_size=" << cs_info.orig_size
1667 << ", blocks=" << cs_info.blocks.size() << dendl;
1668 }
1669
1670 buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5);
1671 etag = calc_md5;
1672
1673 bl.append(etag.c_str(), etag.size() + 1);
1674 emplace_attr(RGW_ATTR_ETAG, std::move(bl));
1675
1676 policy.encode(aclbl);
1677 emplace_attr(RGW_ATTR_ACL, std::move(aclbl));
1678
1679 /* unix attrs */
1680 rgw_fh->set_mtime(real_clock::to_timespec(appx_t));
1681 rgw_fh->set_ctime(real_clock::to_timespec(appx_t));
1682 rgw_fh->set_size(bytes_written);
1683 rgw_fh->encode_attrs(ux_key, ux_attrs);
1684
1685 emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
1686 emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
1687
1688 for (iter = s->generic_attrs.begin(); iter != s->generic_attrs.end();
1689 ++iter) {
1690 buffer::list& attrbl = attrs[iter->first];
1691 const string& val = iter->second;
1692 attrbl.append(val.c_str(), val.size() + 1);
1693 }
1694
1695 op_ret = rgw_get_request_metadata(s->cct, s->info, attrs);
1696 if (op_ret < 0) {
1697 goto done;
1698 }
1699 encode_delete_at_attr(delete_at, attrs);
1700
1701 /* Add a custom metadata to expose the information whether an object
1702 * is an SLO or not. Appending the attribute must be performed AFTER
1703 * processing any input from user in order to prohibit overwriting. */
1704 if (unlikely(!! slo_info)) {
1705 buffer::list slo_userindicator_bl;
1706 using ceph::encode;
1707 encode("True", slo_userindicator_bl);
1708 emplace_attr(RGW_ATTR_SLO_UINDICATOR, std::move(slo_userindicator_bl));
1709 }
1710
1711 op_ret = processor->complete(s->obj_size, etag, &mtime, real_time(), attrs,
1712 (delete_at ? *delete_at : real_time()),
1713 if_match, if_nomatch, nullptr, nullptr, nullptr);
1714 if (op_ret != 0) {
1715 /* revert attr updates */
1716 rgw_fh->set_mtime(omtime);
1717 rgw_fh->set_ctime(octime);
1718 rgw_fh->set_size(osize);
1719 }
1720
1721 done:
1722 perfcounter->tinc(l_rgw_put_lat, s->time_elapsed());
1723 return op_ret;
1724 } /* exec_finish */
1725
1726 } /* namespace rgw */
1727
1728 /* librgw */
1729 extern "C" {
1730
1731 void rgwfile_version(int *major, int *minor, int *extra)
1732 {
1733 if (major)
1734 *major = LIBRGW_FILE_VER_MAJOR;
1735 if (minor)
1736 *minor = LIBRGW_FILE_VER_MINOR;
1737 if (extra)
1738 *extra = LIBRGW_FILE_VER_EXTRA;
1739 }
1740
1741 /*
1742 attach rgw namespace
1743 */
1744 int rgw_mount(librgw_t rgw, const char *uid, const char *acc_key,
1745 const char *sec_key, struct rgw_fs **rgw_fs,
1746 uint32_t flags)
1747 {
1748 int rc = 0;
1749
1750 /* stash access data for "mount" */
1751 RGWLibFS* new_fs = new RGWLibFS(static_cast<CephContext*>(rgw), uid, acc_key,
1752 sec_key, "/");
1753 ceph_assert(new_fs);
1754
1755 rc = new_fs->authorize(rgwlib.get_store());
1756 if (rc != 0) {
1757 delete new_fs;
1758 return -EINVAL;
1759 }
1760
1761 /* register fs for shared gc */
1762 rgwlib.get_fe()->get_process()->register_fs(new_fs);
1763
1764 struct rgw_fs *fs = new_fs->get_fs();
1765 fs->rgw = rgw;
1766
1767 /* XXX we no longer assume "/" is unique, but we aren't tracking the
1768 * roots atm */
1769
1770 *rgw_fs = fs;
1771
1772 return 0;
1773 }
1774
1775 int rgw_mount2(librgw_t rgw, const char *uid, const char *acc_key,
1776 const char *sec_key, const char *root, struct rgw_fs **rgw_fs,
1777 uint32_t flags)
1778 {
1779 int rc = 0;
1780
1781 /* stash access data for "mount" */
1782 RGWLibFS* new_fs = new RGWLibFS(static_cast<CephContext*>(rgw), uid, acc_key,
1783 sec_key, root);
1784 ceph_assert(new_fs);
1785
1786 rc = new_fs->authorize(rgwlib.get_store());
1787 if (rc != 0) {
1788 delete new_fs;
1789 return -EINVAL;
1790 }
1791
1792 /* register fs for shared gc */
1793 rgwlib.get_fe()->get_process()->register_fs(new_fs);
1794
1795 struct rgw_fs *fs = new_fs->get_fs();
1796 fs->rgw = rgw;
1797
1798 /* XXX we no longer assume "/" is unique, but we aren't tracking the
1799 * roots atm */
1800
1801 *rgw_fs = fs;
1802
1803 return 0;
1804 }
1805
1806 /*
1807 register invalidate callbacks
1808 */
1809 int rgw_register_invalidate(struct rgw_fs *rgw_fs, rgw_fh_callback_t cb,
1810 void *arg, uint32_t flags)
1811
1812 {
1813 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1814 return fs->register_invalidate(cb, arg, flags);
1815 }
1816
1817 /*
1818 detach rgw namespace
1819 */
1820 int rgw_umount(struct rgw_fs *rgw_fs, uint32_t flags)
1821 {
1822 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1823 fs->close();
1824 return 0;
1825 }
1826
1827 /*
1828 get filesystem attributes
1829 */
1830 int rgw_statfs(struct rgw_fs *rgw_fs,
1831 struct rgw_file_handle *parent_fh,
1832 struct rgw_statvfs *vfs_st, uint32_t flags)
1833 {
1834 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1835 struct rados_cluster_stat_t stats;
1836
1837 RGWGetClusterStatReq req(fs->get_context(), fs->get_user(), stats);
1838 int rc = rgwlib.get_fe()->execute_req(&req);
1839 if (rc < 0) {
1840 lderr(fs->get_context()) << "ERROR: getting total cluster usage"
1841 << cpp_strerror(-rc) << dendl;
1842 return rc;
1843 }
1844
1845 //Set block size to 1M.
1846 constexpr uint32_t CEPH_BLOCK_SHIFT = 20;
1847 vfs_st->f_bsize = 1 << CEPH_BLOCK_SHIFT;
1848 vfs_st->f_frsize = 1 << CEPH_BLOCK_SHIFT;
1849 vfs_st->f_blocks = stats.kb >> (CEPH_BLOCK_SHIFT - 10);
1850 vfs_st->f_bfree = stats.kb_avail >> (CEPH_BLOCK_SHIFT - 10);
1851 vfs_st->f_bavail = stats.kb_avail >> (CEPH_BLOCK_SHIFT - 10);
1852 vfs_st->f_files = stats.num_objects;
1853 vfs_st->f_ffree = -1;
1854 vfs_st->f_fsid[0] = fs->get_fsid();
1855 vfs_st->f_fsid[1] = fs->get_fsid();
1856 vfs_st->f_flag = 0;
1857 vfs_st->f_namemax = 4096;
1858 return 0;
1859 }
1860
1861 /*
1862 generic create -- create an empty regular file
1863 */
1864 int rgw_create(struct rgw_fs *rgw_fs, struct rgw_file_handle *parent_fh,
1865 const char *name, struct stat *st, uint32_t mask,
1866 struct rgw_file_handle **fh, uint32_t posix_flags,
1867 uint32_t flags)
1868 {
1869 using std::get;
1870
1871 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1872 RGWFileHandle* parent = get_rgwfh(parent_fh);
1873
1874 if ((! parent) ||
1875 (parent->is_root()) ||
1876 (parent->is_file())) {
1877 /* bad parent */
1878 return -EINVAL;
1879 }
1880
1881 MkObjResult fhr = fs->create(parent, name, st, mask, flags);
1882 RGWFileHandle *nfh = get<0>(fhr); // nullptr if !success
1883
1884 if (nfh)
1885 *fh = nfh->get_fh();
1886
1887 return get<1>(fhr);
1888 } /* rgw_create */
1889
1890 /*
1891 create a symbolic link
1892 */
1893 int rgw_symlink(struct rgw_fs *rgw_fs, struct rgw_file_handle *parent_fh,
1894 const char *name, const char *link_path, struct stat *st, uint32_t mask,
1895 struct rgw_file_handle **fh, uint32_t posix_flags,
1896 uint32_t flags)
1897 {
1898 using std::get;
1899
1900 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1901 RGWFileHandle* parent = get_rgwfh(parent_fh);
1902
1903 if ((! parent) ||
1904 (parent->is_root()) ||
1905 (parent->is_file())) {
1906 /* bad parent */
1907 return -EINVAL;
1908 }
1909
1910 MkObjResult fhr = fs->symlink(parent, name, link_path, st, mask, flags);
1911 RGWFileHandle *nfh = get<0>(fhr); // nullptr if !success
1912
1913 if (nfh)
1914 *fh = nfh->get_fh();
1915
1916 return get<1>(fhr);
1917 } /* rgw_symlink */
1918
1919 /*
1920 create a new directory
1921 */
1922 int rgw_mkdir(struct rgw_fs *rgw_fs,
1923 struct rgw_file_handle *parent_fh,
1924 const char *name, struct stat *st, uint32_t mask,
1925 struct rgw_file_handle **fh, uint32_t flags)
1926 {
1927 using std::get;
1928
1929 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1930 RGWFileHandle* parent = get_rgwfh(parent_fh);
1931
1932 if (! parent) {
1933 /* bad parent */
1934 return -EINVAL;
1935 }
1936
1937 MkObjResult fhr = fs->mkdir(parent, name, st, mask, flags);
1938 RGWFileHandle *nfh = get<0>(fhr); // nullptr if !success
1939
1940 if (nfh)
1941 *fh = nfh->get_fh();
1942
1943 return get<1>(fhr);
1944 } /* rgw_mkdir */
1945
1946 /*
1947 rename object
1948 */
1949 int rgw_rename(struct rgw_fs *rgw_fs,
1950 struct rgw_file_handle *src, const char* src_name,
1951 struct rgw_file_handle *dst, const char* dst_name,
1952 uint32_t flags)
1953 {
1954 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1955
1956 RGWFileHandle* src_fh = get_rgwfh(src);
1957 RGWFileHandle* dst_fh = get_rgwfh(dst);
1958
1959 return fs->rename(src_fh, dst_fh, src_name, dst_name);
1960 }
1961
1962 /*
1963 remove file or directory
1964 */
1965 int rgw_unlink(struct rgw_fs *rgw_fs, struct rgw_file_handle *parent_fh,
1966 const char *name, uint32_t flags)
1967 {
1968 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1969 RGWFileHandle* parent = get_rgwfh(parent_fh);
1970
1971 return fs->unlink(parent, name);
1972 }
1973
1974 /*
1975 lookup object by name (POSIX style)
1976 */
1977 int rgw_lookup(struct rgw_fs *rgw_fs,
1978 struct rgw_file_handle *parent_fh, const char* path,
1979 struct rgw_file_handle **fh,
1980 struct stat *st, uint32_t mask, uint32_t flags)
1981 {
1982 //CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
1983 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1984
1985 RGWFileHandle* parent = get_rgwfh(parent_fh);
1986 if ((! parent) ||
1987 (! parent->is_dir())) {
1988 /* bad parent */
1989 return -EINVAL;
1990 }
1991
1992 RGWFileHandle* rgw_fh;
1993 LookupFHResult fhr;
1994
1995 if (parent->is_root()) {
1996 /* special: parent lookup--note lack of ref()! */
1997 if (unlikely((strcmp(path, "..") == 0) ||
1998 (strcmp(path, "/") == 0))) {
1999 rgw_fh = parent;
2000 } else {
2001 RGWLibFS::BucketStats bstat;
2002 fhr = fs->stat_bucket(parent, path, bstat, RGWFileHandle::FLAG_NONE);
2003 rgw_fh = get<0>(fhr);
2004 if (! rgw_fh)
2005 return -ENOENT;
2006 }
2007 } else {
2008 /* special: after readdir--note extra ref()! */
2009 if (unlikely((strcmp(path, "..") == 0))) {
2010 rgw_fh = parent;
2011 lsubdout(fs->get_context(), rgw, 17)
2012 << __func__ << " BANG"<< *rgw_fh
2013 << dendl;
2014 fs->ref(rgw_fh);
2015 } else {
2016 enum rgw_fh_type fh_type = fh_type_of(flags);
2017
2018 uint32_t sl_flags = (flags & RGW_LOOKUP_FLAG_RCB)
2019 ? RGWFileHandle::FLAG_NONE
2020 : RGWFileHandle::FLAG_EXACT_MATCH;
2021
2022 bool fast_attrs= fs->get_context()->_conf->rgw_nfs_s3_fast_attrs;
2023
2024 if ((flags & RGW_LOOKUP_FLAG_RCB) && fast_attrs) {
2025 /* FAKE STAT--this should mean, interpolate special
2026 * owner, group, and perms masks */
2027 fhr = fs->fake_leaf(parent, path, fh_type, st, mask, sl_flags);
2028 } else {
2029 if ((fh_type == RGW_FS_TYPE_DIRECTORY) && fast_attrs) {
2030 /* trust cached dir, if present */
2031 fhr = fs->lookup_fh(parent, path, RGWFileHandle::FLAG_DIRECTORY);
2032 if (get<0>(fhr)) {
2033 rgw_fh = get<0>(fhr);
2034 goto done;
2035 }
2036 }
2037 fhr = fs->stat_leaf(parent, path, fh_type, sl_flags);
2038 }
2039 if (! get<0>(fhr)) {
2040 if (! (flags & RGW_LOOKUP_FLAG_CREATE))
2041 return -ENOENT;
2042 else
2043 fhr = fs->lookup_fh(parent, path, RGWFileHandle::FLAG_CREATE);
2044 }
2045 rgw_fh = get<0>(fhr);
2046 }
2047 } /* !root */
2048
2049 done:
2050 struct rgw_file_handle *rfh = rgw_fh->get_fh();
2051 *fh = rfh;
2052
2053 return 0;
2054 } /* rgw_lookup */
2055
2056 /*
2057 lookup object by handle (NFS style)
2058 */
2059 int rgw_lookup_handle(struct rgw_fs *rgw_fs, struct rgw_fh_hk *fh_hk,
2060 struct rgw_file_handle **fh, uint32_t flags)
2061 {
2062 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2063
2064 RGWFileHandle* rgw_fh = fs->lookup_handle(*fh_hk);
2065 if (! rgw_fh) {
2066 /* not found */
2067 return -ENOENT;
2068 }
2069
2070 struct rgw_file_handle *rfh = rgw_fh->get_fh();
2071 *fh = rfh;
2072
2073 return 0;
2074 }
2075
2076 /*
2077 * release file handle
2078 */
2079 int rgw_fh_rele(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
2080 uint32_t flags)
2081 {
2082 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2083 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2084
2085 lsubdout(fs->get_context(), rgw, 17)
2086 << __func__ << " " << *rgw_fh
2087 << dendl;
2088
2089 fs->unref(rgw_fh);
2090 return 0;
2091 }
2092
2093 /*
2094 get unix attributes for object
2095 */
2096 int rgw_getattr(struct rgw_fs *rgw_fs,
2097 struct rgw_file_handle *fh, struct stat *st, uint32_t flags)
2098 {
2099 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2100 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2101
2102 return fs->getattr(rgw_fh, st);
2103 }
2104
2105 /*
2106 set unix attributes for object
2107 */
2108 int rgw_setattr(struct rgw_fs *rgw_fs,
2109 struct rgw_file_handle *fh, struct stat *st,
2110 uint32_t mask, uint32_t flags)
2111 {
2112 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2113 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2114
2115 return fs->setattr(rgw_fh, st, mask, flags);
2116 }
2117
2118 /*
2119 truncate file
2120 */
2121 int rgw_truncate(struct rgw_fs *rgw_fs,
2122 struct rgw_file_handle *fh, uint64_t size, uint32_t flags)
2123 {
2124 return 0;
2125 }
2126
2127 /*
2128 open file
2129 */
2130 int rgw_open(struct rgw_fs *rgw_fs,
2131 struct rgw_file_handle *fh, uint32_t posix_flags, uint32_t flags)
2132 {
2133 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2134
2135 /* XXX
2136 * need to track specific opens--at least read opens and
2137 * a write open; we need to know when a write open is returned,
2138 * that closes a write transaction
2139 *
2140 * for now, we will support single-open only, it's preferable to
2141 * anything we can otherwise do without access to the NFS state
2142 */
2143 if (! rgw_fh->is_file())
2144 return -EISDIR;
2145
2146 return rgw_fh->open(flags);
2147 }
2148
2149 /*
2150 close file
2151 */
2152 int rgw_close(struct rgw_fs *rgw_fs,
2153 struct rgw_file_handle *fh, uint32_t flags)
2154 {
2155 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2156 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2157 int rc = rgw_fh->close(/* XXX */);
2158
2159 if (flags & RGW_CLOSE_FLAG_RELE)
2160 fs->unref(rgw_fh);
2161
2162 return rc;
2163 }
2164
2165 int rgw_readdir(struct rgw_fs *rgw_fs,
2166 struct rgw_file_handle *parent_fh, uint64_t *offset,
2167 rgw_readdir_cb rcb, void *cb_arg, bool *eof,
2168 uint32_t flags)
2169 {
2170 RGWFileHandle* parent = get_rgwfh(parent_fh);
2171 if (! parent) {
2172 /* bad parent */
2173 return -EINVAL;
2174 }
2175
2176 lsubdout(parent->get_fs()->get_context(), rgw, 15)
2177 << __func__
2178 << " offset=" << *offset
2179 << dendl;
2180
2181 if ((*offset == 0) &&
2182 (flags & RGW_READDIR_FLAG_DOTDOT)) {
2183 /* send '.' and '..' with their NFS-defined offsets */
2184 rcb(".", cb_arg, 1, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
2185 rcb("..", cb_arg, 2, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
2186 }
2187
2188 int rc = parent->readdir(rcb, cb_arg, offset, eof, flags);
2189 return rc;
2190 } /* rgw_readdir */
2191
2192 /* enumeration continuing from name */
2193 int rgw_readdir2(struct rgw_fs *rgw_fs,
2194 struct rgw_file_handle *parent_fh, const char *name,
2195 rgw_readdir_cb rcb, void *cb_arg, bool *eof,
2196 uint32_t flags)
2197 {
2198 RGWFileHandle* parent = get_rgwfh(parent_fh);
2199 if (! parent) {
2200 /* bad parent */
2201 return -EINVAL;
2202 }
2203
2204 lsubdout(parent->get_fs()->get_context(), rgw, 15)
2205 << __func__
2206 << " offset=" << ((name) ? name : "(nil)")
2207 << dendl;
2208
2209 if ((! name) &&
2210 (flags & RGW_READDIR_FLAG_DOTDOT)) {
2211 /* send '.' and '..' with their NFS-defined offsets */
2212 rcb(".", cb_arg, 1, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
2213 rcb("..", cb_arg, 2, nullptr, 0, RGW_LOOKUP_FLAG_DIR);
2214 }
2215
2216 int rc = parent->readdir(rcb, cb_arg, name, eof, flags);
2217 return rc;
2218 } /* rgw_readdir2 */
2219
2220 /* project offset of dirent name */
2221 int rgw_dirent_offset(struct rgw_fs *rgw_fs,
2222 struct rgw_file_handle *parent_fh,
2223 const char *name, int64_t *offset,
2224 uint32_t flags)
2225 {
2226 RGWFileHandle* parent = get_rgwfh(parent_fh);
2227 if ((! parent)) {
2228 /* bad parent */
2229 return -EINVAL;
2230 }
2231 std::string sname{name};
2232 int rc = parent->offset_of(sname, offset, flags);
2233 return rc;
2234 }
2235
2236 /*
2237 read data from file
2238 */
2239 int rgw_read(struct rgw_fs *rgw_fs,
2240 struct rgw_file_handle *fh, uint64_t offset,
2241 size_t length, size_t *bytes_read, void *buffer,
2242 uint32_t flags)
2243 {
2244 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2245 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2246
2247 return fs->read(rgw_fh, offset, length, bytes_read, buffer, flags);
2248 }
2249
2250 /*
2251 read symbolic link
2252 */
2253 int rgw_readlink(struct rgw_fs *rgw_fs,
2254 struct rgw_file_handle *fh, uint64_t offset,
2255 size_t length, size_t *bytes_read, void *buffer,
2256 uint32_t flags)
2257 {
2258 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2259 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2260
2261 return fs->readlink(rgw_fh, offset, length, bytes_read, buffer, flags);
2262 }
2263
2264 /*
2265 write data to file
2266 */
2267 int rgw_write(struct rgw_fs *rgw_fs,
2268 struct rgw_file_handle *fh, uint64_t offset,
2269 size_t length, size_t *bytes_written, void *buffer,
2270 uint32_t flags)
2271 {
2272 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2273 int rc;
2274
2275 *bytes_written = 0;
2276
2277 if (! rgw_fh->is_file())
2278 return -EISDIR;
2279
2280 if (! rgw_fh->is_open()) {
2281 if (flags & RGW_OPEN_FLAG_V3) {
2282 rc = rgw_fh->open(flags);
2283 if (!! rc)
2284 return rc;
2285 } else
2286 return -EPERM;
2287 }
2288
2289 rc = rgw_fh->write(offset, length, bytes_written, buffer);
2290
2291 return rc;
2292 }
2293
2294 /*
2295 read data from file (vector)
2296 */
2297 class RGWReadV
2298 {
2299 buffer::list bl;
2300 struct rgw_vio* vio;
2301
2302 public:
2303 RGWReadV(buffer::list& _bl, rgw_vio* _vio) : vio(_vio) {
2304 bl.claim(_bl);
2305 }
2306
2307 struct rgw_vio* get_vio() { return vio; }
2308
2309 const auto& buffers() { return bl.buffers(); }
2310
2311 unsigned /* XXX */ length() { return bl.length(); }
2312
2313 };
2314
2315 void rgw_readv_rele(struct rgw_uio *uio, uint32_t flags)
2316 {
2317 RGWReadV* rdv = static_cast<RGWReadV*>(uio->uio_p1);
2318 rdv->~RGWReadV();
2319 ::operator delete(rdv);
2320 }
2321
2322 int rgw_readv(struct rgw_fs *rgw_fs,
2323 struct rgw_file_handle *fh, rgw_uio *uio, uint32_t flags)
2324 {
2325 #if 0 /* XXX */
2326 CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
2327 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2328 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2329
2330 if (! rgw_fh->is_file())
2331 return -EINVAL;
2332
2333 int rc = 0;
2334
2335 buffer::list bl;
2336 RGWGetObjRequest req(cct, fs->get_user(), rgw_fh->bucket_name(),
2337 rgw_fh->object_name(), uio->uio_offset, uio->uio_resid,
2338 bl);
2339 req.do_hexdump = false;
2340
2341 rc = rgwlib.get_fe()->execute_req(&req);
2342
2343 if (! rc) {
2344 RGWReadV* rdv = static_cast<RGWReadV*>(
2345 ::operator new(sizeof(RGWReadV) +
2346 (bl.buffers().size() * sizeof(struct rgw_vio))));
2347
2348 (void) new (rdv)
2349 RGWReadV(bl, reinterpret_cast<rgw_vio*>(rdv+sizeof(RGWReadV)));
2350
2351 uio->uio_p1 = rdv;
2352 uio->uio_cnt = rdv->buffers().size();
2353 uio->uio_resid = rdv->length();
2354 uio->uio_vio = rdv->get_vio();
2355 uio->uio_rele = rgw_readv_rele;
2356
2357 int ix = 0;
2358 auto& buffers = rdv->buffers();
2359 for (auto& bp : buffers) {
2360 rgw_vio *vio = &(uio->uio_vio[ix]);
2361 vio->vio_base = const_cast<char*>(bp.c_str());
2362 vio->vio_len = bp.length();
2363 vio->vio_u1 = nullptr;
2364 vio->vio_p1 = nullptr;
2365 ++ix;
2366 }
2367 }
2368
2369 return rc;
2370 #else
2371 return 0;
2372 #endif
2373 }
2374
2375 /*
2376 write data to file (vector)
2377 */
2378 int rgw_writev(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
2379 rgw_uio *uio, uint32_t flags)
2380 {
2381
2382 return -ENOTSUP;
2383
2384 CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
2385 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
2386 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2387
2388 if (! rgw_fh->is_file())
2389 return -EINVAL;
2390
2391 buffer::list bl;
2392 for (unsigned int ix = 0; ix < uio->uio_cnt; ++ix) {
2393 rgw_vio *vio = &(uio->uio_vio[ix]);
2394 bl.push_back(
2395 buffer::create_static(vio->vio_len,
2396 static_cast<char*>(vio->vio_base)));
2397 }
2398
2399 std::string oname = rgw_fh->relative_object_name();
2400 RGWPutObjRequest req(cct, fs->get_user(), rgw_fh->bucket_name(),
2401 oname, bl);
2402
2403 int rc = rgwlib.get_fe()->execute_req(&req);
2404
2405 /* XXX update size (in request) */
2406
2407 return rc;
2408 }
2409
2410 /*
2411 sync written data
2412 */
2413 int rgw_fsync(struct rgw_fs *rgw_fs, struct rgw_file_handle *handle,
2414 uint32_t flags)
2415 {
2416 return 0;
2417 }
2418
2419 int rgw_commit(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
2420 uint64_t offset, uint64_t length, uint32_t flags)
2421 {
2422 RGWFileHandle* rgw_fh = get_rgwfh(fh);
2423
2424 return rgw_fh->commit(offset, length, RGWFileHandle::FLAG_NONE);
2425 }
2426
2427 } /* extern "C" */