]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/rgw_file.cc
e5032d0a5104a8221cf49848fb3b45622ab5568a
[ceph.git] / ceph / src / rgw / rgw_file.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "include/compat.h"
5 #include "include/rados/rgw_file.h"
6
7 #include <sys/types.h>
8 #include <sys/stat.h>
9
10 #include "rgw_lib.h"
11 #include "rgw_rados.h"
12 #include "rgw_resolve.h"
13 #include "rgw_op.h"
14 #include "rgw_rest.h"
15 #include "rgw_acl.h"
16 #include "rgw_acl_s3.h"
17 #include "rgw_frontend.h"
18 #include "rgw_request.h"
19 #include "rgw_process.h"
20 #include "rgw_rest_user.h"
21 #include "rgw_rest_s3.h"
22 #include "rgw_os_lib.h"
23 #include "rgw_auth_s3.h"
24 #include "rgw_user.h"
25 #include "rgw_bucket.h"
26
27 #include "rgw_file.h"
28 #include "rgw_lib_frontend.h"
29
30 #include <atomic>
31
32 #define dout_subsys ceph_subsys_rgw
33
34 using namespace rgw;
35
36 namespace rgw {
37
38 extern RGWLib rgwlib;
39
40 const string RGWFileHandle::root_name = "/";
41
42 std::atomic<uint32_t> RGWLibFS::fs_inst_counter;
43
44 uint32_t RGWLibFS::write_completion_interval_s = 10;
45
46 ceph::timer<ceph::mono_clock> RGWLibFS::write_timer{
47 ceph::construct_suspended};
48
49 inline int valid_fs_bucket_name(const string& name) {
50 int rc = valid_s3_bucket_name(name, false /* relaxed */);
51 if (rc != 0) {
52 if (name.size() > 255)
53 return -ENAMETOOLONG;
54 return -EINVAL;
55 }
56 return 0;
57 }
58
59 inline int valid_fs_object_name(const string& name) {
60 int rc = valid_s3_object_name(name);
61 if (rc != 0) {
62 if (name.size() > 1024)
63 return -ENAMETOOLONG;
64 return -EINVAL;
65 }
66 return 0;
67 }
68
69 LookupFHResult RGWLibFS::stat_bucket(RGWFileHandle* parent, const char *path,
70 RGWLibFS::BucketStats& bs,
71 uint32_t flags)
72 {
73 LookupFHResult fhr{nullptr, 0};
74 std::string bucket_name{path};
75 RGWStatBucketRequest req(cct, get_user(), bucket_name, bs);
76
77 int rc = rgwlib.get_fe()->execute_req(&req);
78 if ((rc == 0) &&
79 (req.get_ret() == 0) &&
80 (req.matched())) {
81 fhr = lookup_fh(parent, path,
82 (flags & RGWFileHandle::FLAG_LOCKED)|
83 RGWFileHandle::FLAG_CREATE|
84 RGWFileHandle::FLAG_BUCKET);
85 if (get<0>(fhr)) {
86 RGWFileHandle* rgw_fh = get<0>(fhr);
87 if (! (flags & RGWFileHandle::FLAG_LOCKED)) {
88 rgw_fh->mtx.lock();
89 }
90 rgw_fh->set_times(req.get_ctime());
91 /* restore attributes */
92 auto ux_key = req.get_attr(RGW_ATTR_UNIX_KEY1);
93 auto ux_attrs = req.get_attr(RGW_ATTR_UNIX1);
94 if (ux_key && ux_attrs) {
95 rgw_fh->decode_attrs(ux_key, ux_attrs);
96 }
97 if (! (flags & RGWFileHandle::FLAG_LOCKED)) {
98 rgw_fh->mtx.unlock();
99 }
100 }
101 }
102 return fhr;
103 }
104
105 LookupFHResult RGWLibFS::stat_leaf(RGWFileHandle* parent,
106 const char *path,
107 enum rgw_fh_type type,
108 uint32_t flags)
109 {
110 /* find either-of <object_name>, <object_name/>, only one of
111 * which should exist; atomicity? */
112 using std::get;
113
114 LookupFHResult fhr{nullptr, 0};
115
116 /* XXX the need for two round-trip operations to identify file or
117 * directory leaf objects is unecessary--the current proposed
118 * mechanism to avoid this is to store leaf object names with an
119 * object locator w/o trailing slash */
120
121 std::string obj_path = parent->format_child_name(path, false);
122
123 for (auto ix : { 0, 1, 2 }) {
124 switch (ix) {
125 case 0:
126 {
127 /* type hint */
128 if (type == RGW_FS_TYPE_DIRECTORY)
129 continue;
130
131 RGWStatObjRequest req(cct, get_user(),
132 parent->bucket_name(), obj_path,
133 RGWStatObjRequest::FLAG_NONE);
134 int rc = rgwlib.get_fe()->execute_req(&req);
135 if ((rc == 0) &&
136 (req.get_ret() == 0)) {
137 fhr = lookup_fh(parent, path, RGWFileHandle::FLAG_CREATE);
138 if (get<0>(fhr)) {
139 RGWFileHandle* rgw_fh = get<0>(fhr);
140 lock_guard guard(rgw_fh->mtx);
141 rgw_fh->set_size(req.get_size());
142 rgw_fh->set_times(req.get_mtime());
143 /* restore attributes */
144 auto ux_key = req.get_attr(RGW_ATTR_UNIX_KEY1);
145 auto ux_attrs = req.get_attr(RGW_ATTR_UNIX1);
146 if (ux_key && ux_attrs) {
147 rgw_fh->decode_attrs(ux_key, ux_attrs);
148 }
149 }
150 goto done;
151 }
152 }
153 break;
154 case 1:
155 {
156 /* try dir form */
157 /* type hint */
158 if (type == RGW_FS_TYPE_FILE)
159 continue;
160
161 obj_path += "/";
162 RGWStatObjRequest req(cct, get_user(),
163 parent->bucket_name(), obj_path,
164 RGWStatObjRequest::FLAG_NONE);
165 int rc = rgwlib.get_fe()->execute_req(&req);
166 if ((rc == 0) &&
167 (req.get_ret() == 0)) {
168 fhr = lookup_fh(parent, path, RGWFileHandle::FLAG_DIRECTORY);
169 if (get<0>(fhr)) {
170 RGWFileHandle* rgw_fh = get<0>(fhr);
171 lock_guard guard(rgw_fh->mtx);
172 rgw_fh->set_size(req.get_size());
173 rgw_fh->set_times(req.get_mtime());
174 /* restore attributes */
175 auto ux_key = req.get_attr(RGW_ATTR_UNIX_KEY1);
176 auto ux_attrs = req.get_attr(RGW_ATTR_UNIX1);
177 if (ux_key && ux_attrs) {
178 rgw_fh->decode_attrs(ux_key, ux_attrs);
179 }
180 }
181 goto done;
182 }
183 }
184 break;
185 case 2:
186 {
187 std::string object_name{path};
188 RGWStatLeafRequest req(cct, get_user(), parent, object_name);
189 int rc = rgwlib.get_fe()->execute_req(&req);
190 if ((rc == 0) &&
191 (req.get_ret() == 0)) {
192 if (req.matched) {
193 /* we need rgw object's key name equal to file name, if
194 * not return NULL */
195 if ((flags & RGWFileHandle::FLAG_EXACT_MATCH) &&
196 !req.exact_matched) {
197 lsubdout(get_context(), rgw, 15)
198 << __func__
199 << ": stat leaf not exact match file name = "
200 << path << dendl;
201 goto done;
202 }
203 fhr = lookup_fh(parent, path,
204 RGWFileHandle::FLAG_CREATE|
205 ((req.is_dir) ?
206 RGWFileHandle::FLAG_DIRECTORY :
207 RGWFileHandle::FLAG_NONE));
208 /* XXX we don't have an object--in general, there need not
209 * be one (just a path segment in some other object). In
210 * actual leaf an object exists, but we'd need another round
211 * trip to get attrs */
212 if (get<0>(fhr)) {
213 /* for now use the parent object's mtime */
214 RGWFileHandle* rgw_fh = get<0>(fhr);
215 lock_guard guard(rgw_fh->mtx);
216 rgw_fh->set_mtime(parent->get_mtime());
217 }
218 }
219 }
220 }
221 break;
222 default:
223 /* not reached */
224 break;
225 }
226 }
227 done:
228 return fhr;
229 } /* RGWLibFS::stat_leaf */
230
231 int RGWLibFS::read(RGWFileHandle* rgw_fh, uint64_t offset, size_t length,
232 size_t* bytes_read, void* buffer, uint32_t flags)
233 {
234 if (! rgw_fh->is_file())
235 return -EINVAL;
236
237 if (rgw_fh->deleted())
238 return -ESTALE;
239
240 RGWReadRequest req(get_context(), get_user(), rgw_fh, offset, length,
241 buffer);
242
243 int rc = rgwlib.get_fe()->execute_req(&req);
244 if ((rc == 0) &&
245 (req.get_ret() == 0)) {
246 lock_guard(rgw_fh->mtx);
247 rgw_fh->set_atime(real_clock::to_timespec(real_clock::now()));
248 *bytes_read = req.nread;
249 }
250
251 return rc;
252 }
253
254 int RGWLibFS::unlink(RGWFileHandle* rgw_fh, const char* name, uint32_t flags)
255 {
256 int rc = 0;
257 BucketStats bs;
258 RGWFileHandle* parent = nullptr;
259 RGWFileHandle* bkt_fh = nullptr;
260
261 if (unlikely(flags & RGWFileHandle::FLAG_UNLINK_THIS)) {
262 /* LOCKED */
263 parent = rgw_fh->get_parent();
264 } else {
265 /* atomicity */
266 parent = rgw_fh;
267 LookupFHResult fhr = lookup_fh(parent, name, RGWFileHandle::FLAG_LOCK);
268 rgw_fh = get<0>(fhr);
269 /* LOCKED */
270 }
271
272 if (parent->is_root()) {
273 /* a bucket may have an object storing Unix attributes, check
274 * for and delete it */
275 LookupFHResult fhr;
276 fhr = stat_bucket(parent, name, bs, (rgw_fh) ?
277 RGWFileHandle::FLAG_LOCKED :
278 RGWFileHandle::FLAG_NONE);
279 bkt_fh = get<0>(fhr);
280 if (unlikely(! bkt_fh)) {
281 /* implies !rgw_fh, so also !LOCKED */
282 return -ENOENT;
283 }
284
285 if (bs.num_entries > 1) {
286 unref(bkt_fh); /* return stat_bucket ref */
287 if (likely(!! rgw_fh)) { /* return lock and ref from
288 * lookup_fh (or caller in the
289 * special case of
290 * RGWFileHandle::FLAG_UNLINK_THIS) */
291 rgw_fh->mtx.unlock();
292 unref(rgw_fh);
293 }
294 return -ENOTEMPTY;
295 } else {
296 /* delete object w/key "<bucket>/" (uxattrs), if any */
297 string oname{"/"};
298 RGWDeleteObjRequest req(cct, get_user(), bkt_fh->bucket_name(), oname);
299 rc = rgwlib.get_fe()->execute_req(&req);
300 /* don't care if ENOENT */
301 unref(bkt_fh);
302 }
303
304 string bname{name};
305 RGWDeleteBucketRequest req(cct, get_user(), bname);
306 rc = rgwlib.get_fe()->execute_req(&req);
307 if (! rc) {
308 rc = req.get_ret();
309 }
310 } else {
311 /*
312 * leaf object
313 */
314 if (! rgw_fh) {
315 /* XXX for now, peform a hard lookup to deduce the type of
316 * object to be deleted ("foo" vs. "foo/")--also, ensures
317 * atomicity at this endpoint */
318 struct rgw_file_handle *fh;
319 rc = rgw_lookup(get_fs(), parent->get_fh(), name, &fh,
320 RGW_LOOKUP_FLAG_NONE);
321 if (!! rc)
322 return rc;
323
324 /* rgw_fh ref+ */
325 rgw_fh = get_rgwfh(fh);
326 rgw_fh->mtx.lock(); /* LOCKED */
327 }
328
329 std::string oname = rgw_fh->relative_object_name();
330 if (rgw_fh->is_dir()) {
331 /* for the duration of our cache timer, trust positive
332 * child cache */
333 if (rgw_fh->has_children()) {
334 rgw_fh->mtx.unlock();
335 unref(rgw_fh);
336 return(-ENOTEMPTY);
337 }
338 oname += "/";
339 }
340 RGWDeleteObjRequest req(cct, get_user(), parent->bucket_name(),
341 oname);
342 rc = rgwlib.get_fe()->execute_req(&req);
343 if (! rc) {
344 rc = req.get_ret();
345 }
346 }
347
348 /* ENOENT when raced with other s3 gateway */
349 if (! rc || rc == -ENOENT) {
350 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
351 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
352 RGWFileHandle::FHCache::FLAG_LOCK);
353 }
354
355 if (! rc) {
356 real_time t = real_clock::now();
357 parent->set_mtime(real_clock::to_timespec(t));
358 parent->set_ctime(real_clock::to_timespec(t));
359 }
360
361 rgw_fh->mtx.unlock();
362 unref(rgw_fh);
363
364 return rc;
365 } /* RGWLibFS::unlink */
366
367 int RGWLibFS::rename(RGWFileHandle* src_fh, RGWFileHandle* dst_fh,
368 const char *_src_name, const char *_dst_name)
369
370 {
371 /* XXX initial implementation: try-copy, and delete if copy
372 * succeeds */
373 int rc = -EINVAL;
374
375 real_time t;
376
377 std::string src_name{_src_name};
378 std::string dst_name{_dst_name};
379
380 /* atomicity */
381 LookupFHResult fhr = lookup_fh(src_fh, _src_name, RGWFileHandle::FLAG_LOCK);
382 RGWFileHandle* rgw_fh = get<0>(fhr);
383
384 /* should not happen */
385 if (! rgw_fh) {
386 ldout(get_context(), 0) << __func__
387 << " BUG no such src renaming path="
388 << src_name
389 << dendl;
390 goto out;
391 }
392
393 /* forbid renaming of directories (unreasonable at scale) */
394 if (rgw_fh->is_dir()) {
395 ldout(get_context(), 12) << __func__
396 << " rejecting attempt to rename directory path="
397 << rgw_fh->full_object_name()
398 << dendl;
399 rc = -EPERM;
400 goto unlock;
401 }
402
403 /* forbid renaming open files (violates intent, for now) */
404 if (rgw_fh->is_open()) {
405 ldout(get_context(), 12) << __func__
406 << " rejecting attempt to rename open file path="
407 << rgw_fh->full_object_name()
408 << dendl;
409 rc = -EPERM;
410 goto unlock;
411 }
412
413 t = real_clock::now();
414
415 for (int ix : {0, 1}) {
416 switch (ix) {
417 case 0:
418 {
419 RGWCopyObjRequest req(cct, get_user(), src_fh, dst_fh, src_name,
420 dst_name);
421 int rc = rgwlib.get_fe()->execute_req(&req);
422 if ((rc != 0) ||
423 ((rc = req.get_ret()) != 0)) {
424 ldout(get_context(), 1)
425 << __func__
426 << " rename step 0 failed src="
427 << src_fh->full_object_name() << " " << src_name
428 << " dst=" << dst_fh->full_object_name()
429 << " " << dst_name
430 << "rc " << rc
431 << dendl;
432 goto unlock;
433 }
434 ldout(get_context(), 12)
435 << __func__
436 << " rename step 0 success src="
437 << src_fh->full_object_name() << " " << src_name
438 << " dst=" << dst_fh->full_object_name()
439 << " " << dst_name
440 << " rc " << rc
441 << dendl;
442 /* update dst change id */
443 dst_fh->set_times(t);
444 }
445 break;
446 case 1:
447 {
448 rc = this->unlink(rgw_fh /* LOCKED */, _src_name,
449 RGWFileHandle::FLAG_UNLINK_THIS);
450 /* !LOCKED, -ref */
451 if (! rc) {
452 ldout(get_context(), 12)
453 << __func__
454 << " rename step 1 success src="
455 << src_fh->full_object_name() << " " << src_name
456 << " dst=" << dst_fh->full_object_name()
457 << " " << dst_name
458 << " rc " << rc
459 << dendl;
460 /* update src change id */
461 src_fh->set_times(t);
462 } else {
463 ldout(get_context(), 1)
464 << __func__
465 << " rename step 1 failed src="
466 << src_fh->full_object_name() << " " << src_name
467 << " dst=" << dst_fh->full_object_name()
468 << " " << dst_name
469 << " rc " << rc
470 << dendl;
471 }
472 }
473 goto out;
474 default:
475 abort();
476 } /* switch */
477 } /* ix */
478 unlock:
479 rgw_fh->mtx.unlock(); /* !LOCKED */
480 unref(rgw_fh); /* -ref */
481
482 out:
483 return rc;
484 } /* RGWLibFS::rename */
485
486 MkObjResult RGWLibFS::mkdir(RGWFileHandle* parent, const char *name,
487 struct stat *st, uint32_t mask, uint32_t flags)
488 {
489 int rc, rc2;
490 rgw_file_handle *lfh;
491
492 rc = rgw_lookup(get_fs(), parent->get_fh(), name, &lfh,
493 RGW_LOOKUP_FLAG_NONE);
494 if (! rc) {
495 /* conflict! */
496 rc = rgw_fh_rele(get_fs(), lfh, RGW_FH_RELE_FLAG_NONE);
497 return MkObjResult{nullptr, -EEXIST};
498 }
499
500 MkObjResult mkr{nullptr, -EINVAL};
501 LookupFHResult fhr;
502 RGWFileHandle* rgw_fh = nullptr;
503 buffer::list ux_key, ux_attrs;
504
505 fhr = lookup_fh(parent, name,
506 RGWFileHandle::FLAG_CREATE|
507 RGWFileHandle::FLAG_DIRECTORY|
508 RGWFileHandle::FLAG_LOCK);
509 rgw_fh = get<0>(fhr);
510 if (rgw_fh) {
511 rgw_fh->create_stat(st, mask);
512 rgw_fh->set_times(real_clock::now());
513 /* save attrs */
514 rgw_fh->encode_attrs(ux_key, ux_attrs);
515 if (st)
516 rgw_fh->stat(st);
517 get<0>(mkr) = rgw_fh;
518 } else {
519 get<1>(mkr) = -EIO;
520 return mkr;
521 }
522
523 if (parent->is_root()) {
524 /* bucket */
525 string bname{name};
526 /* enforce S3 name restrictions */
527 rc = valid_fs_bucket_name(bname);
528 if (rc != 0) {
529 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
530 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
531 RGWFileHandle::FHCache::FLAG_LOCK);
532 rgw_fh->mtx.unlock();
533 unref(rgw_fh);
534 get<0>(mkr) = nullptr;
535 get<1>(mkr) = rc;
536 return mkr;
537 }
538
539 RGWCreateBucketRequest req(get_context(), get_user(), bname);
540
541 /* save attrs */
542 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
543 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
544
545 rc = rgwlib.get_fe()->execute_req(&req);
546 rc2 = req.get_ret();
547 } else {
548 /* create an object representing the directory */
549 buffer::list bl;
550 string dir_name = parent->format_child_name(name, true);
551
552 /* need valid S3 name (characters, length <= 1024, etc) */
553 rc = valid_fs_object_name(dir_name);
554 if (rc != 0) {
555 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
556 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
557 RGWFileHandle::FHCache::FLAG_LOCK);
558 rgw_fh->mtx.unlock();
559 unref(rgw_fh);
560 get<0>(mkr) = nullptr;
561 get<1>(mkr) = rc;
562 return mkr;
563 }
564
565 RGWPutObjRequest req(get_context(), get_user(), parent->bucket_name(),
566 dir_name, bl);
567
568 /* save attrs */
569 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
570 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
571
572 rc = rgwlib.get_fe()->execute_req(&req);
573 rc2 = req.get_ret();
574 }
575
576 if (! ((rc == 0) &&
577 (rc2 == 0))) {
578 /* op failed */
579 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
580 rgw_fh->mtx.unlock(); /* !LOCKED */
581 unref(rgw_fh);
582 get<0>(mkr) = nullptr;
583 /* fixup rc */
584 if (!rc)
585 rc = rc2;
586 } else {
587 real_time t = real_clock::now();
588 parent->set_mtime(real_clock::to_timespec(t));
589 parent->set_ctime(real_clock::to_timespec(t));
590 rgw_fh->mtx.unlock(); /* !LOCKED */
591 }
592
593 get<1>(mkr) = rc;
594
595 return mkr;
596 } /* RGWLibFS::mkdir */
597
598 MkObjResult RGWLibFS::create(RGWFileHandle* parent, const char *name,
599 struct stat *st, uint32_t mask, uint32_t flags)
600 {
601 int rc, rc2;
602
603 using std::get;
604
605 rgw_file_handle *lfh;
606 rc = rgw_lookup(get_fs(), parent->get_fh(), name, &lfh,
607 RGW_LOOKUP_FLAG_NONE);
608 if (! rc) {
609 /* conflict! */
610 rc = rgw_fh_rele(get_fs(), lfh, RGW_FH_RELE_FLAG_NONE);
611 return MkObjResult{nullptr, -EEXIST};
612 }
613
614 /* expand and check name */
615 std::string obj_name = parent->format_child_name(name, false);
616 rc = valid_fs_object_name(obj_name);
617 if (rc != 0) {
618 return MkObjResult{nullptr, rc};
619 }
620
621 /* create it */
622 buffer::list bl;
623 RGWPutObjRequest req(cct, get_user(), parent->bucket_name(), obj_name, bl);
624 MkObjResult mkr{nullptr, -EINVAL};
625
626 rc = rgwlib.get_fe()->execute_req(&req);
627 rc2 = req.get_ret();
628
629 if ((rc == 0) &&
630 (rc2 == 0)) {
631 /* XXX atomicity */
632 LookupFHResult fhr = lookup_fh(parent, name, RGWFileHandle::FLAG_CREATE |
633 RGWFileHandle::FLAG_LOCK);
634 RGWFileHandle* rgw_fh = get<0>(fhr);
635 if (rgw_fh) {
636 if (get<1>(fhr) & RGWFileHandle::FLAG_CREATE) {
637 /* fill in stat data */
638 real_time t = real_clock::now();
639 rgw_fh->create_stat(st, mask);
640 rgw_fh->set_times(t);
641
642 parent->set_mtime(real_clock::to_timespec(t));
643 parent->set_ctime(real_clock::to_timespec(t));
644 }
645 if (st)
646 (void) rgw_fh->stat(st);
647 get<0>(mkr) = rgw_fh;
648 rgw_fh->mtx.unlock();
649 } else
650 rc = -EIO;
651 }
652
653 get<1>(mkr) = rc;
654
655 return mkr;
656 } /* RGWLibFS::create */
657
658 int RGWLibFS::getattr(RGWFileHandle* rgw_fh, struct stat* st)
659 {
660 switch(rgw_fh->fh.fh_type) {
661 case RGW_FS_TYPE_FILE:
662 {
663 if (rgw_fh->deleted())
664 return -ESTALE;
665 }
666 break;
667 default:
668 break;
669 };
670
671 return rgw_fh->stat(st);
672 } /* RGWLibFS::getattr */
673
674 int RGWLibFS::setattr(RGWFileHandle* rgw_fh, struct stat* st, uint32_t mask,
675 uint32_t flags)
676 {
677 int rc, rc2;
678 buffer::list ux_key, ux_attrs;
679
680 lock_guard guard(rgw_fh->mtx);
681
682 switch(rgw_fh->fh.fh_type) {
683 case RGW_FS_TYPE_FILE:
684 {
685 if (rgw_fh->deleted())
686 return -ESTALE;
687 }
688 break;
689 default:
690 break;
691 };
692
693 string obj_name{rgw_fh->relative_object_name()};
694
695 if (rgw_fh->is_dir() &&
696 (likely(! rgw_fh->is_bucket()))) {
697 obj_name += "/";
698 }
699
700 RGWSetAttrsRequest req(cct, get_user(), rgw_fh->bucket_name(), obj_name);
701
702 rgw_fh->create_stat(st, mask);
703 rgw_fh->encode_attrs(ux_key, ux_attrs);
704
705 /* save attrs */
706 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
707 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
708
709 rc = rgwlib.get_fe()->execute_req(&req);
710 rc2 = req.get_ret();
711
712 if (rc == -ENOENT) {
713 /* special case: materialize placeholder dir */
714 buffer::list bl;
715 RGWPutObjRequest req(get_context(), get_user(), rgw_fh->bucket_name(),
716 obj_name, bl);
717
718 rgw_fh->encode_attrs(ux_key, ux_attrs); /* because std::moved */
719
720 /* save attrs */
721 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
722 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
723
724 rc = rgwlib.get_fe()->execute_req(&req);
725 rc2 = req.get_ret();
726 }
727
728 if ((rc != 0) || (rc2 != 0)) {
729 return -EIO;
730 }
731
732 rgw_fh->set_ctime(real_clock::to_timespec(real_clock::now()));
733
734 return 0;
735 } /* RGWLibFS::setattr */
736
737 void RGWLibFS::close()
738 {
739 state.flags |= FLAG_CLOSED;
740
741 class ObjUnref
742 {
743 RGWLibFS* fs;
744 public:
745 ObjUnref(RGWLibFS* fs) : fs(fs) {}
746 void operator()(RGWFileHandle* fh) const {
747 lsubdout(fs->get_context(), rgw, 5)
748 << __func__
749 << fh->name
750 << " before ObjUnref refs=" << fh->get_refcnt()
751 << dendl;
752 fs->unref(fh);
753 }
754 };
755
756 /* force cache drain, forces objects to evict */
757 fh_cache.drain(ObjUnref(this),
758 RGWFileHandle::FHCache::FLAG_LOCK);
759 rgwlib.get_fe()->get_process()->unregister_fs(this);
760 rele();
761 } /* RGWLibFS::close */
762
763 inline std::ostream& operator<<(std::ostream &os, struct timespec const &ts) {
764 os << "<timespec: tv_sec=";
765 os << ts.tv_sec;
766 os << "; tv_nsec=";
767 os << ts.tv_nsec;
768 os << ">";
769 return os;
770 }
771
772 std::ostream& operator<<(std::ostream &os, RGWLibFS::event const &ev) {
773 os << "<event:";
774 switch (ev.t) {
775 case RGWLibFS::event::type::READDIR:
776 os << "type=READDIR;";
777 break;
778 default:
779 os << "type=UNKNOWN;";
780 break;
781 };
782 os << "fid=" << ev.fhk.fh_hk.bucket << ":" << ev.fhk.fh_hk.object
783 << ";ts=" << ev.ts << ">";
784 return os;
785 }
786
787 void RGWLibFS::gc()
788 {
789 using std::get;
790 using directory = RGWFileHandle::directory;
791
792 /* dirent invalidate timeout--basically, the upper-bound on
793 * inconsistency with the S3 namespace */
794 auto expire_s
795 = get_context()->_conf->rgw_nfs_namespace_expire_secs;
796
797 /* max events to gc in one cycle */
798 uint32_t max_ev =
799 std::max(1, get_context()->_conf->rgw_nfs_max_gc);
800
801 struct timespec now, expire_ts;
802 event_vector ve;
803 bool stop = false;
804 std::deque<event> &events = state.events;
805
806 do {
807 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now);
808 lsubdout(get_context(), rgw, 15)
809 << "GC: top of expire loop"
810 << " now=" << now
811 << " expire_s=" << expire_s
812 << dendl;
813 {
814 lock_guard guard(state.mtx); /* LOCKED */
815 /* just return if no events */
816 if (events.empty()) {
817 return;
818 }
819 uint32_t _max_ev =
820 (events.size() < 500) ? max_ev : (events.size() / 4);
821 for (uint32_t ix = 0; (ix < _max_ev) && (events.size() > 0); ++ix) {
822 event& ev = events.front();
823 expire_ts = ev.ts;
824 expire_ts.tv_sec += expire_s;
825 if (expire_ts > now) {
826 stop = true;
827 break;
828 }
829 ve.push_back(ev);
830 events.pop_front();
831 }
832 } /* anon */
833 /* !LOCKED */
834 for (auto& ev : ve) {
835 lsubdout(get_context(), rgw, 15)
836 << "try-expire ev: " << ev << dendl;
837 if (likely(ev.t == event::type::READDIR)) {
838 RGWFileHandle* rgw_fh = lookup_handle(ev.fhk.fh_hk);
839 lsubdout(get_context(), rgw, 15)
840 << "ev rgw_fh: " << rgw_fh << dendl;
841 if (rgw_fh) {
842 RGWFileHandle::directory* d;
843 if (unlikely(! rgw_fh->is_dir())) {
844 lsubdout(get_context(), rgw, 0)
845 << __func__
846 << " BUG non-directory found with READDIR event "
847 << "(" << rgw_fh->bucket_name() << ","
848 << rgw_fh->object_name() << ")"
849 << dendl;
850 goto rele;
851 }
852 /* maybe clear state */
853 d = get<directory>(&rgw_fh->variant_type);
854 if (d) {
855 struct timespec ev_ts = ev.ts;
856 lock_guard guard(rgw_fh->mtx);
857 struct timespec d_last_readdir = d->last_readdir;
858 if (unlikely(ev_ts < d_last_readdir)) {
859 /* readdir cycle in progress, don't invalidate */
860 lsubdout(get_context(), rgw, 15)
861 << "GC: delay expiration for "
862 << rgw_fh->object_name()
863 << " ev.ts=" << ev_ts
864 << " last_readdir=" << d_last_readdir
865 << dendl;
866 continue;
867 } else {
868 lsubdout(get_context(), rgw, 15)
869 << "GC: expiring "
870 << rgw_fh->object_name()
871 << dendl;
872 rgw_fh->clear_state();
873 rgw_fh->invalidate();
874 }
875 }
876 rele:
877 unref(rgw_fh);
878 } /* rgw_fh */
879 } /* event::type::READDIR */
880 } /* ev */
881 ve.clear();
882 } while (! (stop || shutdown));
883 } /* RGWLibFS::gc */
884
885 std::ostream& operator<<(std::ostream &os,
886 RGWFileHandle const &rgw_fh)
887 {
888 const auto& fhk = rgw_fh.get_key();
889 const auto& fh = const_cast<RGWFileHandle&>(rgw_fh).get_fh();
890 os << "<RGWFileHandle:";
891 os << "addr=" << &rgw_fh << ";";
892 switch (fh->fh_type) {
893 case RGW_FS_TYPE_DIRECTORY:
894 os << "type=DIRECTORY;";
895 break;
896 case RGW_FS_TYPE_FILE:
897 os << "type=FILE;";
898 break;
899 default:
900 os << "type=UNKNOWN;";
901 break;
902 };
903 os << "fid=" << fhk.fh_hk.bucket << ":" << fhk.fh_hk.object << ";";
904 os << "name=" << rgw_fh.object_name() << ";";
905 os << "refcnt=" << rgw_fh.get_refcnt() << ";";
906 os << ">";
907 return os;
908 }
909
910 RGWFileHandle::~RGWFileHandle() {
911 /* in the non-delete case, handle may still be in handle table */
912 if (fh_hook.is_linked()) {
913 fs->fh_cache.remove(fh.fh_hk.object, this, FHCache::FLAG_LOCK);
914 }
915 /* cond-unref parent */
916 if (parent && (! parent->is_root())) {
917 /* safe because if parent->unref causes its deletion,
918 * there are a) by refcnt, no other objects/paths pointing
919 * to it and b) by the semantics of valid iteration of
920 * fh_lru (observed, e.g., by cohort_lru<T,...>::drain())
921 * no unsafe iterators reaching it either--n.b., this constraint
922 * is binding oncode which may in future attempt to e.g.,
923 * cause the eviction of objects in LRU order */
924 (void) get_fs()->unref(parent);
925 }
926 }
927
928 void RGWFileHandle::encode_attrs(ceph::buffer::list& ux_key1,
929 ceph::buffer::list& ux_attrs1)
930 {
931 fh_key fhk(this->fh.fh_hk);
932 rgw::encode(fhk, ux_key1);
933 rgw::encode(*this, ux_attrs1);
934 } /* RGWFileHandle::encode_attrs */
935
936 void RGWFileHandle::decode_attrs(const ceph::buffer::list* ux_key1,
937 const ceph::buffer::list* ux_attrs1)
938 {
939 fh_key fhk;
940 auto bl_iter_key1 = const_cast<buffer::list*>(ux_key1)->begin();
941 rgw::decode(fhk, bl_iter_key1);
942 assert(this->fh.fh_hk == fhk.fh_hk);
943
944 auto bl_iter_unix1 = const_cast<buffer::list*>(ux_attrs1)->begin();
945 rgw::decode(*this, bl_iter_unix1);
946 } /* RGWFileHandle::decode_attrs */
947
948 bool RGWFileHandle::reclaim() {
949 lsubdout(fs->get_context(), rgw, 17)
950 << __func__ << " " << *this
951 << dendl;
952 /* remove if still in fh_cache */
953 if (fh_hook.is_linked()) {
954 fs->fh_cache.remove(fh.fh_hk.object, this, FHCache::FLAG_LOCK);
955 }
956 return true;
957 } /* RGWFileHandle::reclaim */
958
959 bool RGWFileHandle::has_children() const
960 {
961 if (unlikely(! is_dir()))
962 return false;
963
964 RGWRMdirCheck req(fs->get_context(), fs->get_user(), this);
965 int rc = rgwlib.get_fe()->execute_req(&req);
966 if (! rc) {
967 return req.valid && req.has_children;
968 }
969
970 return false;
971 }
972
973 int RGWFileHandle::readdir(rgw_readdir_cb rcb, void *cb_arg, uint64_t *offset,
974 bool *eof, uint32_t flags)
975 {
976 using event = RGWLibFS::event;
977 int rc = 0;
978 struct timespec now;
979 CephContext* cct = fs->get_context();
980
981 if ((*offset == 0) &&
982 (flags & RGW_READDIR_FLAG_DOTDOT)) {
983 /* send '.' and '..' with their NFS-defined offsets */
984 rcb(".", cb_arg, 1, RGW_LOOKUP_FLAG_DIR);
985 rcb("..", cb_arg, 2, RGW_LOOKUP_FLAG_DIR);
986 }
987
988 lsubdout(fs->get_context(), rgw, 15)
989 << __func__
990 << " offset=" << *offset
991 << dendl;
992
993 directory* d = get<directory>(&variant_type);
994 if (d) {
995 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */
996 lock_guard guard(mtx);
997 d->last_readdir = now;
998 }
999
1000 if (is_root()) {
1001 RGWListBucketsRequest req(cct, fs->get_user(), this, rcb, cb_arg,
1002 offset);
1003 rc = rgwlib.get_fe()->execute_req(&req);
1004 if (! rc) {
1005 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */
1006 lock_guard guard(mtx);
1007 state.atime = now;
1008 if (*offset == 0)
1009 set_nlink(2);
1010 inc_nlink(req.d_count);
1011 *eof = req.eof();
1012 event ev(event::type::READDIR, get_key(), state.atime);
1013 lock_guard sguard(fs->state.mtx);
1014 fs->state.push_event(ev);
1015 }
1016 } else {
1017 RGWReaddirRequest req(cct, fs->get_user(), this, rcb, cb_arg, offset);
1018 rc = rgwlib.get_fe()->execute_req(&req);
1019 if (! rc) {
1020 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */
1021 lock_guard guard(mtx);
1022 state.atime = now;
1023 if (*offset == 0)
1024 set_nlink(2);
1025 inc_nlink(req.d_count);
1026 *eof = req.eof();
1027 event ev(event::type::READDIR, get_key(), state.atime);
1028 lock_guard sguard(fs->state.mtx);
1029 fs->state.push_event(ev);
1030 }
1031 }
1032
1033 lsubdout(fs->get_context(), rgw, 15)
1034 << __func__
1035 << " final link count=" << state.nlink
1036 << dendl;
1037
1038 return rc;
1039 } /* RGWFileHandle::readdir */
1040
1041 int RGWFileHandle::write(uint64_t off, size_t len, size_t *bytes_written,
1042 void *buffer)
1043 {
1044 using std::get;
1045 using WriteCompletion = RGWLibFS::WriteCompletion;
1046
1047 lock_guard guard(mtx);
1048
1049 int rc = 0;
1050
1051 file* f = get<file>(&variant_type);
1052 if (! f)
1053 return -EISDIR;
1054
1055 if (deleted()) {
1056 lsubdout(fs->get_context(), rgw, 5)
1057 << __func__
1058 << " write attempted on deleted object "
1059 << this->object_name()
1060 << dendl;
1061 /* zap write transaction, if any */
1062 if (f->write_req) {
1063 delete f->write_req;
1064 f->write_req = nullptr;
1065 }
1066 return -ESTALE;
1067 }
1068
1069 if (! f->write_req) {
1070 /* guard--we do not support (e.g., COW-backed) partial writes */
1071 if (off != 0) {
1072 lsubdout(fs->get_context(), rgw, 5)
1073 << __func__
1074 << " " << object_name()
1075 << " non-0 initial write position " << off
1076 << dendl;
1077 return -EIO;
1078 }
1079
1080 /* start */
1081 std::string object_name = relative_object_name();
1082 f->write_req =
1083 new RGWWriteRequest(fs->get_context(), fs->get_user(), this,
1084 bucket_name(), object_name);
1085 rc = rgwlib.get_fe()->start_req(f->write_req);
1086 if (rc < 0) {
1087 lsubdout(fs->get_context(), rgw, 5)
1088 << __func__
1089 << this->object_name()
1090 << " write start failed " << off
1091 << " (" << rc << ")"
1092 << dendl;
1093 /* zap failed write transaction */
1094 delete f->write_req;
1095 f->write_req = nullptr;
1096 return -EIO;
1097 } else {
1098 if (stateless_open()) {
1099 /* start write timer */
1100 f->write_req->timer_id =
1101 RGWLibFS::write_timer.add_event(
1102 std::chrono::seconds(RGWLibFS::write_completion_interval_s),
1103 WriteCompletion(*this));
1104 }
1105 }
1106 }
1107
1108 buffer::list bl;
1109 /* XXXX */
1110 #if 0
1111 bl.push_back(
1112 buffer::create_static(len, static_cast<char*>(buffer)));
1113 #else
1114 bl.push_back(
1115 buffer::copy(static_cast<char*>(buffer), len));
1116 #endif
1117
1118 f->write_req->put_data(off, bl);
1119 rc = f->write_req->exec_continue();
1120
1121 if (rc == 0) {
1122 size_t min_size = off + len;
1123 if (min_size > get_size())
1124 set_size(min_size);
1125 if (stateless_open()) {
1126 /* bump write timer */
1127 RGWLibFS::write_timer.adjust_event(
1128 f->write_req->timer_id, std::chrono::seconds(10));
1129 }
1130 } else {
1131 /* continuation failed (e.g., non-contiguous write position) */
1132 lsubdout(fs->get_context(), rgw, 5)
1133 << __func__
1134 << object_name()
1135 << " failed write at position " << off
1136 << " (fails write transaction) "
1137 << dendl;
1138 /* zap failed write transaction */
1139 delete f->write_req;
1140 f->write_req = nullptr;
1141 rc = -EIO;
1142 }
1143
1144 *bytes_written = (rc == 0) ? len : 0;
1145 return rc;
1146 } /* RGWFileHandle::write */
1147
1148 int RGWFileHandle::write_finish(uint32_t flags)
1149 {
1150 unique_lock guard{mtx, std::defer_lock};
1151 int rc = 0;
1152
1153 if (! (flags & FLAG_LOCKED)) {
1154 guard.lock();
1155 }
1156
1157 file* f = get<file>(&variant_type);
1158 if (f && (f->write_req)) {
1159 lsubdout(fs->get_context(), rgw, 10)
1160 << __func__
1161 << " finishing write trans on " << object_name()
1162 << dendl;
1163 rc = rgwlib.get_fe()->finish_req(f->write_req);
1164 if (! rc) {
1165 rc = f->write_req->get_ret();
1166 }
1167 delete f->write_req;
1168 f->write_req = nullptr;
1169 }
1170
1171 return rc;
1172 } /* RGWFileHandle::write_finish */
1173
1174 int RGWFileHandle::close()
1175 {
1176 lock_guard guard(mtx);
1177
1178 int rc = write_finish(FLAG_LOCKED);
1179
1180 flags &= ~FLAG_OPEN;
1181 flags &= ~FLAG_STATELESS_OPEN;
1182
1183 return rc;
1184 } /* RGWFileHandle::close */
1185
1186 RGWFileHandle::file::~file()
1187 {
1188 delete write_req;
1189 }
1190
1191 void RGWFileHandle::clear_state()
1192 {
1193 directory* d = get<directory>(&variant_type);
1194 if (d) {
1195 state.nlink = 2;
1196 d->last_marker = rgw_obj_key{};
1197 }
1198 }
1199
1200 void RGWFileHandle::invalidate() {
1201 RGWLibFS *fs = get_fs();
1202 if (fs->invalidate_cb) {
1203 fs->invalidate_cb(fs->invalidate_arg, get_key().fh_hk);
1204 }
1205 }
1206
1207 int RGWWriteRequest::exec_start() {
1208 struct req_state* s = get_state();
1209
1210 /* not obviously supportable */
1211 assert(! dlo_manifest);
1212 assert(! slo_info);
1213
1214 perfcounter->inc(l_rgw_put);
1215 op_ret = -EINVAL;
1216
1217 if (s->object.empty()) {
1218 ldout(s->cct, 0) << __func__ << " called on empty object" << dendl;
1219 goto done;
1220 }
1221
1222 op_ret = get_params();
1223 if (op_ret < 0)
1224 goto done;
1225
1226 op_ret = get_system_versioning_params(s, &olh_epoch, &version_id);
1227 if (op_ret < 0) {
1228 goto done;
1229 }
1230
1231 /* user-supplied MD5 check skipped (not supplied) */
1232 /* early quota check skipped--we don't have size yet */
1233 /* skipping user-supplied etag--we might have one in future, but
1234 * like data it and other attrs would arrive after open */
1235 processor = select_processor(*static_cast<RGWObjectCtx *>(s->obj_ctx),
1236 &multipart);
1237 op_ret = processor->prepare(get_store(), NULL);
1238
1239 done:
1240 return op_ret;
1241 } /* exec_start */
1242
1243 int RGWWriteRequest::exec_continue()
1244 {
1245 struct req_state* s = get_state();
1246 op_ret = 0;
1247
1248 /* check guards (e.g., contig write) */
1249 if (eio)
1250 return -EIO;
1251
1252 size_t len = data.length();
1253 if (! len)
1254 return 0;
1255
1256 /* XXX we are currently synchronous--supplied data buffers cannot
1257 * be used after the caller returns */
1258 bool need_to_wait = true;
1259 bufferlist orig_data;
1260
1261 if (need_to_wait) {
1262 orig_data = data;
1263 }
1264 hash.Update((const byte *)data.c_str(), data.length());
1265 op_ret = put_data_and_throttle(processor, data, ofs,
1266 need_to_wait);
1267 if (op_ret < 0) {
1268 if (!need_to_wait || op_ret != -EEXIST) {
1269 ldout(s->cct, 20) << "processor->thottle_data() returned ret="
1270 << op_ret << dendl;
1271 goto done;
1272 }
1273
1274 ldout(s->cct, 5) << "NOTICE: processor->throttle_data() returned -EEXIST, need to restart write" << dendl;
1275
1276 /* restore original data */
1277 data.swap(orig_data);
1278
1279 /* restart processing with different oid suffix */
1280 dispose_processor(processor);
1281 processor = select_processor(*static_cast<RGWObjectCtx *>(s->obj_ctx),
1282 &multipart);
1283
1284 string oid_rand;
1285 char buf[33];
1286 gen_rand_alphanumeric(get_store()->ctx(), buf, sizeof(buf) - 1);
1287 oid_rand.append(buf);
1288
1289 op_ret = processor->prepare(get_store(), &oid_rand);
1290 if (op_ret < 0) {
1291 ldout(s->cct, 0) << "ERROR: processor->prepare() returned "
1292 << op_ret << dendl;
1293 goto done;
1294 }
1295
1296 op_ret = put_data_and_throttle(processor, data, ofs, false);
1297 if (op_ret < 0) {
1298 goto done;
1299 }
1300 }
1301 bytes_written += len;
1302
1303 done:
1304 return op_ret;
1305 } /* exec_continue */
1306
1307 int RGWWriteRequest::exec_finish()
1308 {
1309 buffer::list bl, aclbl, ux_key, ux_attrs;
1310 map<string, string>::iterator iter;
1311 char calc_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1];
1312 unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE];
1313 struct req_state* s = get_state();
1314
1315 size_t osize = rgw_fh->get_size();
1316 struct timespec octime = rgw_fh->get_ctime();
1317 struct timespec omtime = rgw_fh->get_mtime();
1318 real_time appx_t = real_clock::now();
1319
1320 s->obj_size = ofs; // XXX check ofs
1321 perfcounter->inc(l_rgw_put_b, s->obj_size);
1322
1323 op_ret = get_store()->check_quota(s->bucket_owner.get_id(), s->bucket,
1324 user_quota, bucket_quota, s->obj_size);
1325 if (op_ret < 0) {
1326 goto done;
1327 }
1328
1329 op_ret = get_store()->check_bucket_shards(s->bucket_info, s->bucket, bucket_quota);
1330 if (op_ret < 0) {
1331 goto done;
1332 }
1333
1334 hash.Final(m);
1335
1336 buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5);
1337 etag = calc_md5;
1338
1339 bl.append(etag.c_str(), etag.size() + 1);
1340 emplace_attr(RGW_ATTR_ETAG, std::move(bl));
1341
1342 policy.encode(aclbl);
1343 emplace_attr(RGW_ATTR_ACL, std::move(aclbl));
1344
1345 /* unix attrs */
1346 rgw_fh->set_mtime(real_clock::to_timespec(appx_t));
1347 rgw_fh->set_ctime(real_clock::to_timespec(appx_t));
1348 rgw_fh->set_size(bytes_written);
1349 rgw_fh->encode_attrs(ux_key, ux_attrs);
1350
1351 emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
1352 emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
1353
1354 for (iter = s->generic_attrs.begin(); iter != s->generic_attrs.end();
1355 ++iter) {
1356 buffer::list& attrbl = attrs[iter->first];
1357 const string& val = iter->second;
1358 attrbl.append(val.c_str(), val.size() + 1);
1359 }
1360
1361 rgw_get_request_metadata(s->cct, s->info, attrs);
1362 encode_delete_at_attr(delete_at, attrs);
1363
1364 /* Add a custom metadata to expose the information whether an object
1365 * is an SLO or not. Appending the attribute must be performed AFTER
1366 * processing any input from user in order to prohibit overwriting. */
1367 if (unlikely(!! slo_info)) {
1368 buffer::list slo_userindicator_bl;
1369 ::encode("True", slo_userindicator_bl);
1370 emplace_attr(RGW_ATTR_SLO_UINDICATOR, std::move(slo_userindicator_bl));
1371 }
1372
1373 op_ret = processor->complete(s->obj_size, etag, &mtime, real_time(), attrs,
1374 (delete_at ? *delete_at : real_time()),
1375 if_match, if_nomatch);
1376 if (op_ret != 0) {
1377 /* revert attr updates */
1378 rgw_fh->set_mtime(omtime);
1379 rgw_fh->set_ctime(octime);
1380 rgw_fh->set_size(osize);
1381 }
1382
1383 done:
1384 dispose_processor(processor);
1385 perfcounter->tinc(l_rgw_put_lat,
1386 (ceph_clock_now() - s->time));
1387 return op_ret;
1388 } /* exec_finish */
1389
1390 } /* namespace rgw */
1391
1392 /* librgw */
1393 extern "C" {
1394
1395 void rgwfile_version(int *major, int *minor, int *extra)
1396 {
1397 if (major)
1398 *major = LIBRGW_FILE_VER_MAJOR;
1399 if (minor)
1400 *minor = LIBRGW_FILE_VER_MINOR;
1401 if (extra)
1402 *extra = LIBRGW_FILE_VER_EXTRA;
1403 }
1404
1405 /*
1406 attach rgw namespace
1407 */
1408 int rgw_mount(librgw_t rgw, const char *uid, const char *acc_key,
1409 const char *sec_key, struct rgw_fs **rgw_fs,
1410 uint32_t flags)
1411 {
1412 int rc = 0;
1413
1414 /* stash access data for "mount" */
1415 RGWLibFS* new_fs = new RGWLibFS(static_cast<CephContext*>(rgw), uid, acc_key,
1416 sec_key);
1417 assert(new_fs);
1418
1419 rc = new_fs->authorize(rgwlib.get_store());
1420 if (rc != 0) {
1421 delete new_fs;
1422 return -EINVAL;
1423 }
1424
1425 /* register fs for shared gc */
1426 rgwlib.get_fe()->get_process()->register_fs(new_fs);
1427
1428 struct rgw_fs *fs = new_fs->get_fs();
1429 fs->rgw = rgw;
1430
1431 /* XXX we no longer assume "/" is unique, but we aren't tracking the
1432 * roots atm */
1433
1434 *rgw_fs = fs;
1435
1436 return 0;
1437 }
1438
1439 /*
1440 register invalidate callbacks
1441 */
1442 int rgw_register_invalidate(struct rgw_fs *rgw_fs, rgw_fh_callback_t cb,
1443 void *arg, uint32_t flags)
1444
1445 {
1446 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1447 return fs->register_invalidate(cb, arg, flags);
1448 }
1449
1450 /*
1451 detach rgw namespace
1452 */
1453 int rgw_umount(struct rgw_fs *rgw_fs, uint32_t flags)
1454 {
1455 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1456 fs->close();
1457 return 0;
1458 }
1459
1460 /*
1461 get filesystem attributes
1462 */
1463 int rgw_statfs(struct rgw_fs *rgw_fs,
1464 struct rgw_file_handle *parent_fh,
1465 struct rgw_statvfs *vfs_st, uint32_t flags)
1466 {
1467 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1468
1469 /* XXX for now, just publish a huge capacity and
1470 * limited utiliztion */
1471 vfs_st->f_bsize = 1024*1024 /* 1M */;
1472 vfs_st->f_frsize = 1024; /* minimal allocation unit (who cares) */
1473 vfs_st->f_blocks = UINT64_MAX;
1474 vfs_st->f_bfree = UINT64_MAX;
1475 vfs_st->f_bavail = UINT64_MAX;
1476 vfs_st->f_files = 1024; /* object count, do we have an est? */
1477 vfs_st->f_ffree = UINT64_MAX;
1478 vfs_st->f_fsid[0] = fs->get_inst();
1479 vfs_st->f_fsid[1] = fs->get_inst();
1480 vfs_st->f_flag = 0;
1481 vfs_st->f_namemax = 4096;
1482 return 0;
1483 }
1484
1485 /*
1486 generic create -- create an empty regular file
1487 */
1488 int rgw_create(struct rgw_fs *rgw_fs, struct rgw_file_handle *parent_fh,
1489 const char *name, struct stat *st, uint32_t mask,
1490 struct rgw_file_handle **fh, uint32_t posix_flags,
1491 uint32_t flags)
1492 {
1493 using std::get;
1494
1495 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1496 RGWFileHandle* parent = get_rgwfh(parent_fh);
1497
1498 if ((! parent) ||
1499 (parent->is_root()) ||
1500 (parent->is_file())) {
1501 /* bad parent */
1502 return -EINVAL;
1503 }
1504
1505 MkObjResult fhr = fs->create(parent, name, st, mask, flags);
1506 RGWFileHandle *nfh = get<0>(fhr); // nullptr if !success
1507
1508 if (nfh)
1509 *fh = nfh->get_fh();
1510
1511 return get<1>(fhr);
1512 } /* rgw_create */
1513
1514 /*
1515 create a new directory
1516 */
1517 int rgw_mkdir(struct rgw_fs *rgw_fs,
1518 struct rgw_file_handle *parent_fh,
1519 const char *name, struct stat *st, uint32_t mask,
1520 struct rgw_file_handle **fh, uint32_t flags)
1521 {
1522 using std::get;
1523
1524 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1525 RGWFileHandle* parent = get_rgwfh(parent_fh);
1526
1527 if (! parent) {
1528 /* bad parent */
1529 return -EINVAL;
1530 }
1531
1532 MkObjResult fhr = fs->mkdir(parent, name, st, mask, flags);
1533 RGWFileHandle *nfh = get<0>(fhr); // nullptr if !success
1534
1535 if (nfh)
1536 *fh = nfh->get_fh();
1537
1538 return get<1>(fhr);
1539 } /* rgw_mkdir */
1540
1541 /*
1542 rename object
1543 */
1544 int rgw_rename(struct rgw_fs *rgw_fs,
1545 struct rgw_file_handle *src, const char* src_name,
1546 struct rgw_file_handle *dst, const char* dst_name,
1547 uint32_t flags)
1548 {
1549 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1550
1551 RGWFileHandle* src_fh = get_rgwfh(src);
1552 RGWFileHandle* dst_fh = get_rgwfh(dst);
1553
1554 return fs->rename(src_fh, dst_fh, src_name, dst_name);
1555 }
1556
1557 /*
1558 remove file or directory
1559 */
1560 int rgw_unlink(struct rgw_fs *rgw_fs, struct rgw_file_handle *parent_fh,
1561 const char *name, uint32_t flags)
1562 {
1563 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1564 RGWFileHandle* parent = get_rgwfh(parent_fh);
1565
1566 return fs->unlink(parent, name);
1567 }
1568
1569 /*
1570 lookup object by name (POSIX style)
1571 */
1572 int rgw_lookup(struct rgw_fs *rgw_fs,
1573 struct rgw_file_handle *parent_fh, const char* path,
1574 struct rgw_file_handle **fh, uint32_t flags)
1575 {
1576 //CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
1577 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1578
1579 RGWFileHandle* parent = get_rgwfh(parent_fh);
1580 if ((! parent) ||
1581 (! parent->is_dir())) {
1582 /* bad parent */
1583 return -EINVAL;
1584 }
1585
1586 RGWFileHandle* rgw_fh;
1587 LookupFHResult fhr;
1588
1589 if (parent->is_root()) {
1590 /* special: parent lookup--note lack of ref()! */
1591 if (unlikely((strcmp(path, "..") == 0) ||
1592 (strcmp(path, "/") == 0))) {
1593 rgw_fh = parent;
1594 } else {
1595 RGWLibFS::BucketStats bstat;
1596 fhr = fs->stat_bucket(parent, path, bstat, RGWFileHandle::FLAG_NONE);
1597 rgw_fh = get<0>(fhr);
1598 if (! rgw_fh)
1599 return -ENOENT;
1600 }
1601 } else {
1602 /* lookup in a readdir callback */
1603 enum rgw_fh_type fh_type = fh_type_of(flags);
1604
1605 uint32_t sl_flags = (flags & RGW_LOOKUP_FLAG_RCB)
1606 ? RGWFileHandle::FLAG_NONE
1607 : RGWFileHandle::FLAG_EXACT_MATCH;
1608
1609 fhr = fs->stat_leaf(parent, path, fh_type, sl_flags);
1610 if (! get<0>(fhr)) {
1611 if (! (flags & RGW_LOOKUP_FLAG_CREATE))
1612 return -ENOENT;
1613 else
1614 fhr = fs->lookup_fh(parent, path, RGWFileHandle::FLAG_CREATE);
1615 }
1616 rgw_fh = get<0>(fhr);
1617 } /* !root */
1618
1619 struct rgw_file_handle *rfh = rgw_fh->get_fh();
1620 *fh = rfh;
1621
1622 return 0;
1623 } /* rgw_lookup */
1624
1625 /*
1626 lookup object by handle (NFS style)
1627 */
1628 int rgw_lookup_handle(struct rgw_fs *rgw_fs, struct rgw_fh_hk *fh_hk,
1629 struct rgw_file_handle **fh, uint32_t flags)
1630 {
1631 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1632
1633 RGWFileHandle* rgw_fh = fs->lookup_handle(*fh_hk);
1634 if (! rgw_fh) {
1635 /* not found */
1636 return -ENOENT;
1637 }
1638
1639 struct rgw_file_handle *rfh = rgw_fh->get_fh();
1640 *fh = rfh;
1641
1642 return 0;
1643 }
1644
1645 /*
1646 * release file handle
1647 */
1648 int rgw_fh_rele(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
1649 uint32_t flags)
1650 {
1651 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1652 RGWFileHandle* rgw_fh = get_rgwfh(fh);
1653
1654 lsubdout(fs->get_context(), rgw, 17)
1655 << __func__ << " " << *rgw_fh
1656 << dendl;
1657
1658 fs->unref(rgw_fh);
1659 return 0;
1660 }
1661
1662 /*
1663 get unix attributes for object
1664 */
1665 int rgw_getattr(struct rgw_fs *rgw_fs,
1666 struct rgw_file_handle *fh, struct stat *st, uint32_t flags)
1667 {
1668 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1669 RGWFileHandle* rgw_fh = get_rgwfh(fh);
1670
1671 return fs->getattr(rgw_fh, st);
1672 }
1673
1674 /*
1675 set unix attributes for object
1676 */
1677 int rgw_setattr(struct rgw_fs *rgw_fs,
1678 struct rgw_file_handle *fh, struct stat *st,
1679 uint32_t mask, uint32_t flags)
1680 {
1681 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1682 RGWFileHandle* rgw_fh = get_rgwfh(fh);
1683
1684 return fs->setattr(rgw_fh, st, mask, flags);
1685 }
1686
1687 /*
1688 truncate file
1689 */
1690 int rgw_truncate(struct rgw_fs *rgw_fs,
1691 struct rgw_file_handle *fh, uint64_t size, uint32_t flags)
1692 {
1693 return 0;
1694 }
1695
1696 /*
1697 open file
1698 */
1699 int rgw_open(struct rgw_fs *rgw_fs,
1700 struct rgw_file_handle *fh, uint32_t posix_flags, uint32_t flags)
1701 {
1702 RGWFileHandle* rgw_fh = get_rgwfh(fh);
1703
1704 /* XXX
1705 * need to track specific opens--at least read opens and
1706 * a write open; we need to know when a write open is returned,
1707 * that closes a write transaction
1708 *
1709 * for now, we will support single-open only, it's preferable to
1710 * anything we can otherwise do without access to the NFS state
1711 */
1712 if (! rgw_fh->is_file())
1713 return -EISDIR;
1714
1715 return rgw_fh->open(flags);
1716 }
1717
1718 /*
1719 close file
1720 */
1721 int rgw_close(struct rgw_fs *rgw_fs,
1722 struct rgw_file_handle *fh, uint32_t flags)
1723 {
1724 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1725 RGWFileHandle* rgw_fh = get_rgwfh(fh);
1726 int rc = rgw_fh->close(/* XXX */);
1727
1728 if (flags & RGW_CLOSE_FLAG_RELE)
1729 fs->unref(rgw_fh);
1730
1731 return rc;
1732 }
1733
1734 int rgw_readdir(struct rgw_fs *rgw_fs,
1735 struct rgw_file_handle *parent_fh, uint64_t *offset,
1736 rgw_readdir_cb rcb, void *cb_arg, bool *eof,
1737 uint32_t flags)
1738 {
1739 RGWFileHandle* parent = get_rgwfh(parent_fh);
1740 if (! parent) {
1741 /* bad parent */
1742 return -EINVAL;
1743 }
1744 int rc = parent->readdir(rcb, cb_arg, offset, eof, flags);
1745 return rc;
1746 }
1747
1748 /*
1749 read data from file
1750 */
1751 int rgw_read(struct rgw_fs *rgw_fs,
1752 struct rgw_file_handle *fh, uint64_t offset,
1753 size_t length, size_t *bytes_read, void *buffer,
1754 uint32_t flags)
1755 {
1756 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1757 RGWFileHandle* rgw_fh = get_rgwfh(fh);
1758
1759 return fs->read(rgw_fh, offset, length, bytes_read, buffer, flags);
1760 }
1761
1762 /*
1763 write data to file
1764 */
1765 int rgw_write(struct rgw_fs *rgw_fs,
1766 struct rgw_file_handle *fh, uint64_t offset,
1767 size_t length, size_t *bytes_written, void *buffer,
1768 uint32_t flags)
1769 {
1770 RGWFileHandle* rgw_fh = get_rgwfh(fh);
1771 int rc;
1772
1773 *bytes_written = 0;
1774
1775 if (! rgw_fh->is_file())
1776 return -EISDIR;
1777
1778 if (! rgw_fh->is_open())
1779 return -EPERM;
1780
1781 rc = rgw_fh->write(offset, length, bytes_written, buffer);
1782
1783 return rc;
1784 }
1785
1786 /*
1787 read data from file (vector)
1788 */
1789 class RGWReadV
1790 {
1791 buffer::list bl;
1792 struct rgw_vio* vio;
1793
1794 public:
1795 RGWReadV(buffer::list& _bl, rgw_vio* _vio) : vio(_vio) {
1796 bl.claim(_bl);
1797 }
1798
1799 struct rgw_vio* get_vio() { return vio; }
1800
1801 const std::list<buffer::ptr>& buffers() { return bl.buffers(); }
1802
1803 unsigned /* XXX */ length() { return bl.length(); }
1804
1805 };
1806
1807 void rgw_readv_rele(struct rgw_uio *uio, uint32_t flags)
1808 {
1809 RGWReadV* rdv = static_cast<RGWReadV*>(uio->uio_p1);
1810 rdv->~RGWReadV();
1811 ::operator delete(rdv);
1812 }
1813
1814 int rgw_readv(struct rgw_fs *rgw_fs,
1815 struct rgw_file_handle *fh, rgw_uio *uio, uint32_t flags)
1816 {
1817 #if 0 /* XXX */
1818 CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
1819 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1820 RGWFileHandle* rgw_fh = get_rgwfh(fh);
1821
1822 if (! rgw_fh->is_file())
1823 return -EINVAL;
1824
1825 int rc = 0;
1826
1827 buffer::list bl;
1828 RGWGetObjRequest req(cct, fs->get_user(), rgw_fh->bucket_name(),
1829 rgw_fh->object_name(), uio->uio_offset, uio->uio_resid,
1830 bl);
1831 req.do_hexdump = false;
1832
1833 rc = rgwlib.get_fe()->execute_req(&req);
1834
1835 if (! rc) {
1836 RGWReadV* rdv = static_cast<RGWReadV*>(
1837 ::operator new(sizeof(RGWReadV) +
1838 (bl.buffers().size() * sizeof(struct rgw_vio))));
1839
1840 (void) new (rdv)
1841 RGWReadV(bl, reinterpret_cast<rgw_vio*>(rdv+sizeof(RGWReadV)));
1842
1843 uio->uio_p1 = rdv;
1844 uio->uio_cnt = rdv->buffers().size();
1845 uio->uio_resid = rdv->length();
1846 uio->uio_vio = rdv->get_vio();
1847 uio->uio_rele = rgw_readv_rele;
1848
1849 int ix = 0;
1850 auto& buffers = rdv->buffers();
1851 for (auto& bp : buffers) {
1852 rgw_vio *vio = &(uio->uio_vio[ix]);
1853 vio->vio_base = const_cast<char*>(bp.c_str());
1854 vio->vio_len = bp.length();
1855 vio->vio_u1 = nullptr;
1856 vio->vio_p1 = nullptr;
1857 ++ix;
1858 }
1859 }
1860
1861 return rc;
1862 #else
1863 return 0;
1864 #endif
1865 }
1866
1867 /*
1868 write data to file (vector)
1869 */
1870 int rgw_writev(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
1871 rgw_uio *uio, uint32_t flags)
1872 {
1873
1874 return -ENOTSUP;
1875
1876 CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
1877 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1878 RGWFileHandle* rgw_fh = get_rgwfh(fh);
1879
1880 if (! rgw_fh->is_file())
1881 return -EINVAL;
1882
1883 buffer::list bl;
1884 for (unsigned int ix = 0; ix < uio->uio_cnt; ++ix) {
1885 rgw_vio *vio = &(uio->uio_vio[ix]);
1886 bl.push_back(
1887 buffer::create_static(vio->vio_len,
1888 static_cast<char*>(vio->vio_base)));
1889 }
1890
1891 std::string oname = rgw_fh->relative_object_name();
1892 RGWPutObjRequest req(cct, fs->get_user(), rgw_fh->bucket_name(),
1893 oname, bl);
1894
1895 int rc = rgwlib.get_fe()->execute_req(&req);
1896
1897 /* XXX update size (in request) */
1898
1899 return rc;
1900 }
1901
1902 /*
1903 sync written data
1904 */
1905 int rgw_fsync(struct rgw_fs *rgw_fs, struct rgw_file_handle *handle,
1906 uint32_t flags)
1907 {
1908 return 0;
1909 }
1910
1911 int rgw_commit(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
1912 uint64_t offset, uint64_t length, uint32_t flags)
1913 {
1914 RGWFileHandle* rgw_fh = get_rgwfh(fh);
1915
1916 return rgw_fh->commit(offset, length, RGWFileHandle::FLAG_NONE);
1917 }
1918
1919 } /* extern "C" */