]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/rgw_file.cc
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / rgw / rgw_file.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include "include/compat.h"
5 #include "include/rados/rgw_file.h"
6
7 #include <sys/types.h>
8 #include <sys/stat.h>
9
10 #include "rgw_lib.h"
11 #include "rgw_rados.h"
12 #include "rgw_resolve.h"
13 #include "rgw_op.h"
14 #include "rgw_rest.h"
15 #include "rgw_acl.h"
16 #include "rgw_acl_s3.h"
17 #include "rgw_frontend.h"
18 #include "rgw_request.h"
19 #include "rgw_process.h"
20 #include "rgw_rest_user.h"
21 #include "rgw_rest_s3.h"
22 #include "rgw_os_lib.h"
23 #include "rgw_auth_s3.h"
24 #include "rgw_user.h"
25 #include "rgw_bucket.h"
26
27 #include "rgw_file.h"
28 #include "rgw_lib_frontend.h"
29
30 #include <atomic>
31
32 #define dout_subsys ceph_subsys_rgw
33
34 using namespace rgw;
35
36 namespace rgw {
37
38 extern RGWLib rgwlib;
39
40 const string RGWFileHandle::root_name = "/";
41
42 std::atomic<uint32_t> RGWLibFS::fs_inst_counter;
43
44 uint32_t RGWLibFS::write_completion_interval_s = 10;
45
46 ceph::timer<ceph::mono_clock> RGWLibFS::write_timer{
47 ceph::construct_suspended};
48
49 inline int valid_fs_bucket_name(const string& name) {
50 int rc = valid_s3_bucket_name(name, false /* relaxed */);
51 if (rc != 0) {
52 if (name.size() > 255)
53 return -ENAMETOOLONG;
54 return -EINVAL;
55 }
56 return 0;
57 }
58
59 inline int valid_fs_object_name(const string& name) {
60 int rc = valid_s3_object_name(name);
61 if (rc != 0) {
62 if (name.size() > 1024)
63 return -ENAMETOOLONG;
64 return -EINVAL;
65 }
66 return 0;
67 }
68
69 LookupFHResult RGWLibFS::stat_bucket(RGWFileHandle* parent,
70 const char *path, uint32_t flags)
71 {
72 LookupFHResult fhr{nullptr, 0};
73 std::string bucket_name{path};
74 RGWStatBucketRequest req(cct, get_user(), bucket_name);
75
76 int rc = rgwlib.get_fe()->execute_req(&req);
77 if ((rc == 0) &&
78 (req.get_ret() == 0) &&
79 (req.matched())) {
80 fhr = lookup_fh(parent, path,
81 RGWFileHandle::FLAG_CREATE|
82 RGWFileHandle::FLAG_BUCKET);
83 if (get<0>(fhr)) {
84 RGWFileHandle* rgw_fh = get<0>(fhr);
85 lock_guard guard(rgw_fh->mtx);
86 rgw_fh->set_times(req.get_ctime());
87 /* restore attributes */
88 auto ux_key = req.get_attr(RGW_ATTR_UNIX_KEY1);
89 auto ux_attrs = req.get_attr(RGW_ATTR_UNIX1);
90 if (ux_key && ux_attrs) {
91 rgw_fh->decode_attrs(ux_key, ux_attrs);
92 }
93 }
94 }
95 return fhr;
96 }
97
98 LookupFHResult RGWLibFS::stat_leaf(RGWFileHandle* parent,
99 const char *path,
100 enum rgw_fh_type type,
101 uint32_t flags)
102 {
103 /* find either-of <object_name>, <object_name/>, only one of
104 * which should exist; atomicity? */
105 using std::get;
106
107 LookupFHResult fhr{nullptr, 0};
108
109 /* XXX the need for two round-trip operations to identify file or
110 * directory leaf objects is unecessary--the current proposed
111 * mechanism to avoid this is to store leaf object names with an
112 * object locator w/o trailing slash */
113
114 /* mutating path */
115 std::string obj_path{parent->relative_object_name()};
116 if ((obj_path.length() > 0) &&
117 (obj_path.back() != '/'))
118 obj_path += "/";
119 obj_path += path;
120
121 for (auto ix : { 0, 1, 2 }) {
122 switch (ix) {
123 case 0:
124 {
125 /* type hint */
126 if (type == RGW_FS_TYPE_DIRECTORY)
127 continue;
128
129 RGWStatObjRequest req(cct, get_user(),
130 parent->bucket_name(), obj_path,
131 RGWStatObjRequest::FLAG_NONE);
132 int rc = rgwlib.get_fe()->execute_req(&req);
133 if ((rc == 0) &&
134 (req.get_ret() == 0)) {
135 fhr = lookup_fh(parent, path, RGWFileHandle::FLAG_CREATE);
136 if (get<0>(fhr)) {
137 RGWFileHandle* rgw_fh = get<0>(fhr);
138 lock_guard guard(rgw_fh->mtx);
139 rgw_fh->set_size(req.get_size());
140 rgw_fh->set_times(req.get_mtime());
141 /* restore attributes */
142 auto ux_key = req.get_attr(RGW_ATTR_UNIX_KEY1);
143 auto ux_attrs = req.get_attr(RGW_ATTR_UNIX1);
144 if (ux_key && ux_attrs) {
145 rgw_fh->decode_attrs(ux_key, ux_attrs);
146 }
147 }
148 goto done;
149 }
150 }
151 break;
152 case 1:
153 {
154 /* try dir form */
155 /* type hint */
156 if (type == RGW_FS_TYPE_FILE)
157 continue;
158
159 obj_path += "/";
160 RGWStatObjRequest req(cct, get_user(),
161 parent->bucket_name(), obj_path,
162 RGWStatObjRequest::FLAG_NONE);
163 int rc = rgwlib.get_fe()->execute_req(&req);
164 if ((rc == 0) &&
165 (req.get_ret() == 0)) {
166 fhr = lookup_fh(parent, path, RGWFileHandle::FLAG_DIRECTORY);
167 if (get<0>(fhr)) {
168 RGWFileHandle* rgw_fh = get<0>(fhr);
169 lock_guard guard(rgw_fh->mtx);
170 rgw_fh->set_size(req.get_size());
171 rgw_fh->set_times(req.get_mtime());
172 /* restore attributes */
173 auto ux_key = req.get_attr(RGW_ATTR_UNIX_KEY1);
174 auto ux_attrs = req.get_attr(RGW_ATTR_UNIX1);
175 if (ux_key && ux_attrs) {
176 rgw_fh->decode_attrs(ux_key, ux_attrs);
177 }
178 }
179 goto done;
180 }
181 }
182 break;
183 case 2:
184 {
185 std::string object_name{path};
186 RGWStatLeafRequest req(cct, get_user(), parent, object_name);
187 int rc = rgwlib.get_fe()->execute_req(&req);
188 if ((rc == 0) &&
189 (req.get_ret() == 0)) {
190 if (req.matched) {
191 /* we need rgw object's key name equal to file name, if
192 * not return NULL */
193 if ((flags & RGWFileHandle::FLAG_EXACT_MATCH) &&
194 !req.exact_matched) {
195 lsubdout(get_context(), rgw, 15)
196 << __func__
197 << ": stat leaf not exact match file name = "
198 << path << dendl;
199 goto done;
200 }
201 fhr = lookup_fh(parent, path,
202 RGWFileHandle::FLAG_CREATE|
203 ((req.is_dir) ?
204 RGWFileHandle::FLAG_DIRECTORY :
205 RGWFileHandle::FLAG_NONE));
206 /* XXX we don't have an object--in general, there need not
207 * be one (just a path segment in some other object). In
208 * actual leaf an object exists, but we'd need another round
209 * trip to get attrs */
210 if (get<0>(fhr)) {
211 /* for now use the parent object's mtime */
212 RGWFileHandle* rgw_fh = get<0>(fhr);
213 lock_guard guard(rgw_fh->mtx);
214 rgw_fh->set_mtime(parent->get_mtime());
215 }
216 }
217 }
218 }
219 break;
220 default:
221 /* not reached */
222 break;
223 }
224 }
225 done:
226 return fhr;
227 } /* RGWLibFS::stat_leaf */
228
229 int RGWLibFS::read(RGWFileHandle* rgw_fh, uint64_t offset, size_t length,
230 size_t* bytes_read, void* buffer, uint32_t flags)
231 {
232 if (! rgw_fh->is_file())
233 return -EINVAL;
234
235 if (rgw_fh->deleted())
236 return -ESTALE;
237
238 RGWReadRequest req(get_context(), get_user(), rgw_fh, offset, length,
239 buffer);
240
241 int rc = rgwlib.get_fe()->execute_req(&req);
242 if ((rc == 0) &&
243 (req.get_ret() == 0)) {
244 lock_guard(rgw_fh->mtx);
245 rgw_fh->set_atime(real_clock::to_timespec(real_clock::now()));
246 *bytes_read = req.nread;
247 }
248
249 return rc;
250 }
251
252 int RGWLibFS::unlink(RGWFileHandle* rgw_fh, const char* name, uint32_t flags)
253 {
254 int rc = 0;
255 RGWFileHandle* parent = nullptr;
256
257 if (unlikely(flags & RGWFileHandle::FLAG_UNLINK_THIS)) {
258 /* LOCKED */
259 parent = rgw_fh->get_parent();
260 } else {
261 /* atomicity */
262 parent = rgw_fh;
263 LookupFHResult fhr = lookup_fh(parent, name, RGWFileHandle::FLAG_LOCK);
264 rgw_fh = get<0>(fhr);
265 /* LOCKED */
266 }
267
268 if (parent->is_root()) {
269 /* XXXX remove uri and deal with bucket and object names */
270 string uri = "/";
271 uri += name;
272 RGWDeleteBucketRequest req(cct, get_user(), uri);
273 rc = rgwlib.get_fe()->execute_req(&req);
274 if (! rc) {
275 rc = req.get_ret();
276 }
277 } else {
278 /*
279 * leaf object
280 */
281 if (! rgw_fh) {
282 /* XXX for now, peform a hard lookup to deduce the type of
283 * object to be deleted ("foo" vs. "foo/")--also, ensures
284 * atomicity at this endpoint */
285 struct rgw_file_handle *fh;
286 rc = rgw_lookup(get_fs(), parent->get_fh(), name, &fh,
287 RGW_LOOKUP_FLAG_NONE);
288 if (!! rc)
289 return rc;
290
291 /* rgw_fh ref+ */
292 rgw_fh = get_rgwfh(fh);
293 rgw_fh->mtx.lock(); /* LOCKED */
294 }
295
296 std::string oname = rgw_fh->relative_object_name();
297 if (rgw_fh->is_dir()) {
298 /* for the duration of our cache timer, trust positive
299 * child cache */
300 if (rgw_fh->has_children()) {
301 rgw_fh->mtx.unlock();
302 unref(rgw_fh);
303 return(-ENOTEMPTY);
304 }
305 oname += "/";
306 }
307 RGWDeleteObjRequest req(cct, get_user(), parent->bucket_name(),
308 oname);
309 rc = rgwlib.get_fe()->execute_req(&req);
310 if (! rc) {
311 rc = req.get_ret();
312 }
313 }
314
315 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
316 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
317 RGWFileHandle::FHCache::FLAG_LOCK);
318
319 #if 1 /* XXX verify clear cache */
320 fh_key fhk(rgw_fh->fh.fh_hk);
321 LookupFHResult tfhr = lookup_fh(fhk, RGWFileHandle::FLAG_LOCKED);
322 RGWFileHandle* nfh = get<0>(tfhr);
323 assert(!nfh);
324 #endif
325
326 if (! rc) {
327 real_time t = real_clock::now();
328 parent->set_mtime(real_clock::to_timespec(t));
329 parent->set_ctime(real_clock::to_timespec(t));
330 }
331
332 rgw_fh->mtx.unlock();
333 unref(rgw_fh);
334
335 return rc;
336 } /* RGWLibFS::unlink */
337
338 int RGWLibFS::rename(RGWFileHandle* src_fh, RGWFileHandle* dst_fh,
339 const char *_src_name, const char *_dst_name)
340
341 {
342 /* XXX initial implementation: try-copy, and delete if copy
343 * succeeds */
344 int rc = -EINVAL;
345
346 real_time t;
347
348 std::string src_name{_src_name};
349 std::string dst_name{_dst_name};
350
351 /* atomicity */
352 LookupFHResult fhr = lookup_fh(src_fh, _src_name, RGWFileHandle::FLAG_LOCK);
353 RGWFileHandle* rgw_fh = get<0>(fhr);
354
355 /* should not happen */
356 if (! rgw_fh) {
357 ldout(get_context(), 0) << __func__
358 << " BUG no such src renaming path="
359 << src_name
360 << dendl;
361 goto out;
362 }
363
364 /* forbid renaming of directories (unreasonable at scale) */
365 if (rgw_fh->is_dir()) {
366 ldout(get_context(), 12) << __func__
367 << " rejecting attempt to rename directory path="
368 << rgw_fh->full_object_name()
369 << dendl;
370 rc = -EPERM;
371 goto unlock;
372 }
373
374 /* forbid renaming open files (violates intent, for now) */
375 if (rgw_fh->is_open()) {
376 ldout(get_context(), 12) << __func__
377 << " rejecting attempt to rename open file path="
378 << rgw_fh->full_object_name()
379 << dendl;
380 rc = -EPERM;
381 goto unlock;
382 }
383
384 t = real_clock::now();
385
386 for (int ix : {0, 1}) {
387 switch (ix) {
388 case 0:
389 {
390 RGWCopyObjRequest req(cct, get_user(), src_fh, dst_fh, src_name,
391 dst_name);
392 int rc = rgwlib.get_fe()->execute_req(&req);
393 if ((rc != 0) ||
394 ((rc = req.get_ret()) != 0)) {
395 ldout(get_context(), 1)
396 << __func__
397 << " rename step 0 failed src="
398 << src_fh->full_object_name() << " " << src_name
399 << " dst=" << dst_fh->full_object_name()
400 << " " << dst_name
401 << "rc " << rc
402 << dendl;
403 goto unlock;
404 }
405 ldout(get_context(), 12)
406 << __func__
407 << " rename step 0 success src="
408 << src_fh->full_object_name() << " " << src_name
409 << " dst=" << dst_fh->full_object_name()
410 << " " << dst_name
411 << " rc " << rc
412 << dendl;
413 /* update dst change id */
414 dst_fh->set_times(t);
415 }
416 break;
417 case 1:
418 {
419 rc = this->unlink(rgw_fh /* LOCKED */, _src_name,
420 RGWFileHandle::FLAG_UNLINK_THIS);
421 /* !LOCKED, -ref */
422 if (! rc) {
423 ldout(get_context(), 12)
424 << __func__
425 << " rename step 1 success src="
426 << src_fh->full_object_name() << " " << src_name
427 << " dst=" << dst_fh->full_object_name()
428 << " " << dst_name
429 << " rc " << rc
430 << dendl;
431 /* update src change id */
432 src_fh->set_times(t);
433 } else {
434 ldout(get_context(), 1)
435 << __func__
436 << " rename step 1 failed src="
437 << src_fh->full_object_name() << " " << src_name
438 << " dst=" << dst_fh->full_object_name()
439 << " " << dst_name
440 << " rc " << rc
441 << dendl;
442 }
443 }
444 goto out;
445 default:
446 abort();
447 } /* switch */
448 } /* ix */
449 unlock:
450 rgw_fh->mtx.unlock(); /* !LOCKED */
451 unref(rgw_fh); /* -ref */
452
453 out:
454 return rc;
455 } /* RGWLibFS::rename */
456
457 MkObjResult RGWLibFS::mkdir(RGWFileHandle* parent, const char *name,
458 struct stat *st, uint32_t mask, uint32_t flags)
459 {
460 MkObjResult mkr{nullptr, -EINVAL};
461 int rc, rc2;
462
463 LookupFHResult fhr;
464 RGWFileHandle* rgw_fh = nullptr;
465 buffer::list ux_key, ux_attrs;
466
467 fhr = lookup_fh(parent, name,
468 RGWFileHandle::FLAG_CREATE|
469 RGWFileHandle::FLAG_DIRECTORY|
470 RGWFileHandle::FLAG_LOCK);
471 rgw_fh = get<0>(fhr);
472 if (rgw_fh) {
473 rgw_fh->create_stat(st, mask);
474 rgw_fh->set_times(real_clock::now());
475 /* save attrs */
476 rgw_fh->encode_attrs(ux_key, ux_attrs);
477 if (st)
478 rgw_fh->stat(st);
479 get<0>(mkr) = rgw_fh;
480 } else {
481 get<1>(mkr) = -EIO;
482 return mkr;
483 }
484
485 if (parent->is_root()) {
486 /* bucket */
487 string bname{name};
488 /* enforce S3 name restrictions */
489 rc = valid_fs_bucket_name(bname);
490 if (rc != 0) {
491 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
492 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
493 RGWFileHandle::FHCache::FLAG_LOCK);
494 rgw_fh->mtx.unlock();
495 unref(rgw_fh);
496 get<0>(mkr) = nullptr;
497 get<1>(mkr) = rc;
498 return mkr;
499 }
500
501 string uri = "/" + bname; /* XXX get rid of URI some day soon */
502 RGWCreateBucketRequest req(get_context(), get_user(), uri);
503
504 /* save attrs */
505 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
506 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
507
508 rc = rgwlib.get_fe()->execute_req(&req);
509 rc2 = req.get_ret();
510 } else {
511 /* create an object representing the directory */
512 buffer::list bl;
513 string dir_name = /* XXX get rid of this some day soon, too */
514 parent->relative_object_name();
515
516 /* creating objects w/leading '/' makes a mess */
517 if ((dir_name.size() > 0) &&
518 (dir_name.back() != '/'))
519 dir_name += "/";
520 dir_name += name;
521 dir_name += "/";
522
523 /* need valid S3 name (characters, length <= 1024, etc) */
524 rc = valid_fs_object_name(dir_name);
525 if (rc != 0) {
526 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
527 fh_cache.remove(rgw_fh->fh.fh_hk.object, rgw_fh,
528 RGWFileHandle::FHCache::FLAG_LOCK);
529 rgw_fh->mtx.unlock();
530 unref(rgw_fh);
531 get<0>(mkr) = nullptr;
532 get<1>(mkr) = rc;
533 return mkr;
534 }
535
536 RGWPutObjRequest req(get_context(), get_user(), parent->bucket_name(),
537 dir_name, bl);
538
539 /* save attrs */
540 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
541 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
542
543 rc = rgwlib.get_fe()->execute_req(&req);
544 rc2 = req.get_ret();
545 }
546
547 if (! ((rc == 0) &&
548 (rc2 == 0))) {
549 /* op failed */
550 rgw_fh->flags |= RGWFileHandle::FLAG_DELETED;
551 rgw_fh->mtx.unlock(); /* !LOCKED */
552 unref(rgw_fh);
553 get<0>(mkr) = nullptr;
554 /* fixup rc */
555 if (!rc)
556 rc = rc2;
557 } else {
558 real_time t = real_clock::now();
559 parent->set_mtime(real_clock::to_timespec(t));
560 parent->set_ctime(real_clock::to_timespec(t));
561 rgw_fh->mtx.unlock(); /* !LOCKED */
562 }
563
564 get<1>(mkr) = rc;
565
566 return mkr;
567 } /* RGWLibFS::mkdir */
568
569 MkObjResult RGWLibFS::create(RGWFileHandle* parent, const char *name,
570 struct stat *st, uint32_t mask, uint32_t flags)
571 {
572 int rc, rc2;
573
574 using std::get;
575
576 rgw_file_handle *lfh;
577 rc = rgw_lookup(get_fs(), parent->get_fh(), name, &lfh,
578 RGW_LOOKUP_FLAG_NONE);
579 if (! rc) {
580 /* conflict! */
581 rc = rgw_fh_rele(get_fs(), lfh, RGW_FH_RELE_FLAG_NONE);
582 return MkObjResult{nullptr, -EEXIST};
583 }
584
585 /* expand and check name */
586 std::string obj_name{parent->relative_object_name()};
587 if ((obj_name.size() > 0) &&
588 (obj_name.back() != '/'))
589 obj_name += "/";
590 obj_name += name;
591 rc = valid_fs_object_name(obj_name);
592 if (rc != 0) {
593 return MkObjResult{nullptr, rc};
594 }
595
596 /* create it */
597 buffer::list bl;
598 RGWPutObjRequest req(cct, get_user(), parent->bucket_name(), obj_name, bl);
599 MkObjResult mkr{nullptr, -EINVAL};
600
601 rc = rgwlib.get_fe()->execute_req(&req);
602 rc2 = req.get_ret();
603
604 if ((rc == 0) &&
605 (rc2 == 0)) {
606 /* XXX atomicity */
607 LookupFHResult fhr = lookup_fh(parent, name, RGWFileHandle::FLAG_CREATE |
608 RGWFileHandle::FLAG_LOCK);
609 RGWFileHandle* rgw_fh = get<0>(fhr);
610 if (rgw_fh) {
611 if (get<1>(fhr) & RGWFileHandle::FLAG_CREATE) {
612 /* fill in stat data */
613 real_time t = real_clock::now();
614 rgw_fh->create_stat(st, mask);
615 rgw_fh->set_times(t);
616
617 parent->set_mtime(real_clock::to_timespec(t));
618 parent->set_ctime(real_clock::to_timespec(t));
619 }
620 if (st)
621 (void) rgw_fh->stat(st);
622 get<0>(mkr) = rgw_fh;
623 rgw_fh->mtx.unlock();
624 } else
625 rc = -EIO;
626 }
627
628 get<1>(mkr) = rc;
629
630 return mkr;
631 } /* RGWLibFS::create */
632
633 int RGWLibFS::getattr(RGWFileHandle* rgw_fh, struct stat* st)
634 {
635 switch(rgw_fh->fh.fh_type) {
636 case RGW_FS_TYPE_FILE:
637 {
638 if (rgw_fh->deleted())
639 return -ESTALE;
640 }
641 break;
642 default:
643 break;
644 };
645
646 return rgw_fh->stat(st);
647 } /* RGWLibFS::getattr */
648
649 int RGWLibFS::setattr(RGWFileHandle* rgw_fh, struct stat* st, uint32_t mask,
650 uint32_t flags)
651 {
652 int rc, rc2;
653 buffer::list ux_key, ux_attrs;
654
655 lock_guard guard(rgw_fh->mtx);
656
657 switch(rgw_fh->fh.fh_type) {
658 case RGW_FS_TYPE_FILE:
659 {
660 if (rgw_fh->deleted())
661 return -ESTALE;
662 }
663 break;
664 default:
665 break;
666 };
667
668 string obj_name{rgw_fh->relative_object_name()};
669
670 if (rgw_fh->is_dir()) {
671 obj_name += "/";
672 }
673
674 RGWSetAttrsRequest req(cct, get_user(), rgw_fh->bucket_name(), obj_name);
675
676 rgw_fh->create_stat(st, mask);
677 rgw_fh->encode_attrs(ux_key, ux_attrs);
678
679 /* save attrs */
680 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
681 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
682
683 rc = rgwlib.get_fe()->execute_req(&req);
684 rc2 = req.get_ret();
685
686 if (rc == -ENOENT) {
687 /* special case: materialize placeholder dir */
688 buffer::list bl;
689 RGWPutObjRequest req(get_context(), get_user(), rgw_fh->bucket_name(),
690 obj_name, bl);
691
692 rgw_fh->encode_attrs(ux_key, ux_attrs); /* because std::moved */
693
694 /* save attrs */
695 req.emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
696 req.emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
697
698 rc = rgwlib.get_fe()->execute_req(&req);
699 rc2 = req.get_ret();
700 }
701
702 if ((rc != 0) || (rc2 != 0)) {
703 return -EIO;
704 }
705
706 rgw_fh->set_ctime(real_clock::to_timespec(real_clock::now()));
707
708 return 0;
709 } /* RGWLibFS::setattr */
710
711 void RGWLibFS::close()
712 {
713 state.flags |= FLAG_CLOSED;
714
715 class ObjUnref
716 {
717 RGWLibFS* fs;
718 public:
719 ObjUnref(RGWLibFS* fs) : fs(fs) {}
720 void operator()(RGWFileHandle* fh) const {
721 lsubdout(fs->get_context(), rgw, 5)
722 << __func__
723 << fh->name
724 << " before ObjUnref refs=" << fh->get_refcnt()
725 << dendl;
726 fs->fh_lru.unref(fh, cohort::lru::FLAG_NONE);
727 }
728 };
729
730 /* force cache drain, forces objects to evict */
731 fh_cache.drain(ObjUnref(this),
732 RGWFileHandle::FHCache::FLAG_LOCK);
733 rgwlib.get_fe()->get_process()->unregister_fs(this);
734 rele();
735 } /* RGWLibFS::close */
736
737 inline std::ostream& operator<<(std::ostream &os, struct timespec const &ts) {
738 os << "<timespec: tv_sec=";
739 os << ts.tv_sec;
740 os << "; tv_nsec=";
741 os << ts.tv_nsec;
742 os << ">";
743 return os;
744 }
745
746 std::ostream& operator<<(std::ostream &os, RGWLibFS::event const &ev) {
747 os << "<event:";
748 switch (ev.t) {
749 case RGWLibFS::event::type::READDIR:
750 os << "type=READDIR;";
751 break;
752 default:
753 os << "type=UNKNOWN;";
754 break;
755 };
756 os << "fid=" << ev.fhk.fh_hk.bucket << ":" << ev.fhk.fh_hk.object
757 << ";ts=" << ev.ts << ">";
758 return os;
759 }
760
761 void RGWLibFS::gc()
762 {
763 using std::get;
764 using directory = RGWFileHandle::directory;
765
766 /* dirent invalidate timeout--basically, the upper-bound on
767 * inconsistency with the S3 namespace */
768 auto expire_s
769 = get_context()->_conf->rgw_nfs_namespace_expire_secs;
770
771 /* max events to gc in one cycle */
772 uint32_t max_ev =
773 std::max(1, get_context()->_conf->rgw_nfs_max_gc);
774
775 struct timespec now, expire_ts;
776 event_vector ve;
777 bool stop = false;
778 std::deque<event> &events = state.events;
779
780 do {
781 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now);
782 lsubdout(get_context(), rgw, 15)
783 << "GC: top of expire loop"
784 << " now=" << now
785 << " expire_s=" << expire_s
786 << dendl;
787 {
788 lock_guard guard(state.mtx); /* LOCKED */
789 /* just return if no events */
790 if (events.empty()) {
791 return;
792 }
793 uint32_t _max_ev =
794 (events.size() < 500) ? max_ev : (events.size() / 4);
795 for (uint32_t ix = 0; (ix < _max_ev) && (events.size() > 0); ++ix) {
796 event& ev = events.front();
797 expire_ts = ev.ts;
798 expire_ts.tv_sec += expire_s;
799 if (expire_ts > now) {
800 stop = true;
801 break;
802 }
803 ve.push_back(ev);
804 events.pop_front();
805 }
806 } /* anon */
807 /* !LOCKED */
808 for (auto& ev : ve) {
809 lsubdout(get_context(), rgw, 15)
810 << "try-expire ev: " << ev << dendl;
811 if (likely(ev.t == event::type::READDIR)) {
812 RGWFileHandle* rgw_fh = lookup_handle(ev.fhk.fh_hk);
813 lsubdout(get_context(), rgw, 15)
814 << "ev rgw_fh: " << rgw_fh << dendl;
815 if (rgw_fh) {
816 RGWFileHandle::directory* d;
817 if (unlikely(! rgw_fh->is_dir())) {
818 lsubdout(get_context(), rgw, 0)
819 << __func__
820 << " BUG non-directory found with READDIR event "
821 << "(" << rgw_fh->bucket_name() << ","
822 << rgw_fh->object_name() << ")"
823 << dendl;
824 goto rele;
825 }
826 /* maybe clear state */
827 d = get<directory>(&rgw_fh->variant_type);
828 if (d) {
829 struct timespec ev_ts = ev.ts;
830 lock_guard guard(rgw_fh->mtx);
831 struct timespec d_last_readdir = d->last_readdir;
832 if (unlikely(ev_ts < d_last_readdir)) {
833 /* readdir cycle in progress, don't invalidate */
834 lsubdout(get_context(), rgw, 15)
835 << "GC: delay expiration for "
836 << rgw_fh->object_name()
837 << " ev.ts=" << ev_ts
838 << " last_readdir=" << d_last_readdir
839 << dendl;
840 continue;
841 } else {
842 lsubdout(get_context(), rgw, 15)
843 << "GC: expiring "
844 << rgw_fh->object_name()
845 << dendl;
846 rgw_fh->clear_state();
847 rgw_fh->invalidate();
848 }
849 }
850 rele:
851 unref(rgw_fh);
852 } /* rgw_fh */
853 } /* event::type::READDIR */
854 } /* ev */
855 ve.clear();
856 } while (! (stop || shutdown));
857 } /* RGWLibFS::gc */
858
859 std::ostream& operator<<(std::ostream &os,
860 RGWFileHandle const &rgw_fh)
861 {
862 const auto& fhk = rgw_fh.get_key();
863 const auto& fh = const_cast<RGWFileHandle&>(rgw_fh).get_fh();
864 os << "<RGWFileHandle:";
865 os << "addr=" << &rgw_fh << ";";
866 switch (fh->fh_type) {
867 case RGW_FS_TYPE_DIRECTORY:
868 os << "type=DIRECTORY;";
869 break;
870 case RGW_FS_TYPE_FILE:
871 os << "type=FILE;";
872 break;
873 default:
874 os << "type=UNKNOWN;";
875 break;
876 };
877 os << "fid=" << fhk.fh_hk.bucket << ":" << fhk.fh_hk.object << ";";
878 os << "name=" << rgw_fh.object_name() << ";";
879 os << "refcnt=" << rgw_fh.get_refcnt() << ";";
880 os << ">";
881 return os;
882 }
883
884 RGWFileHandle::~RGWFileHandle() {
885 /* in the non-delete case, handle may still be in handle table */
886 if (fh_hook.is_linked()) {
887 fs->fh_cache.remove(fh.fh_hk.object, this, FHCache::FLAG_LOCK);
888 }
889 /* cond-unref parent */
890 if (parent && (! parent->is_root())) {
891 /* safe because if parent->unref causes its deletion,
892 * there are a) by refcnt, no other objects/paths pointing
893 * to it and b) by the semantics of valid iteration of
894 * fh_lru (observed, e.g., by cohort_lru<T,...>::drain())
895 * no unsafe iterators reaching it either--n.b., this constraint
896 * is binding oncode which may in future attempt to e.g.,
897 * cause the eviction of objects in LRU order */
898 (void) get_fs()->fh_lru.unref(parent, cohort::lru::FLAG_NONE);
899 }
900 }
901
902 void RGWFileHandle::encode_attrs(ceph::buffer::list& ux_key1,
903 ceph::buffer::list& ux_attrs1)
904 {
905 fh_key fhk(this->fh.fh_hk);
906 rgw::encode(fhk, ux_key1);
907 rgw::encode(*this, ux_attrs1);
908 } /* RGWFileHandle::encode_attrs */
909
910 void RGWFileHandle::decode_attrs(const ceph::buffer::list* ux_key1,
911 const ceph::buffer::list* ux_attrs1)
912 {
913 fh_key fhk;
914 auto bl_iter_key1 = const_cast<buffer::list*>(ux_key1)->begin();
915 rgw::decode(fhk, bl_iter_key1);
916 assert(this->fh.fh_hk == fhk.fh_hk);
917
918 auto bl_iter_unix1 = const_cast<buffer::list*>(ux_attrs1)->begin();
919 rgw::decode(*this, bl_iter_unix1);
920 } /* RGWFileHandle::decode_attrs */
921
922 bool RGWFileHandle::reclaim() {
923 lsubdout(fs->get_context(), rgw, 17)
924 << __func__ << " " << *this
925 << dendl;
926 /* remove if still in fh_cache */
927 if (fh_hook.is_linked()) {
928 fs->fh_cache.remove(fh.fh_hk.object, this, FHCache::FLAG_LOCK);
929 }
930 return true;
931 } /* RGWFileHandle::reclaim */
932
933 bool RGWFileHandle::has_children() const
934 {
935 if (unlikely(! is_dir()))
936 return false;
937
938 RGWRMdirCheck req(fs->get_context(), fs->get_user(), this);
939 int rc = rgwlib.get_fe()->execute_req(&req);
940 if (! rc) {
941 return req.valid && req.has_children;
942 }
943
944 return false;
945 }
946
947 int RGWFileHandle::readdir(rgw_readdir_cb rcb, void *cb_arg, uint64_t *offset,
948 bool *eof, uint32_t flags)
949 {
950 using event = RGWLibFS::event;
951 int rc = 0;
952 struct timespec now;
953 CephContext* cct = fs->get_context();
954
955 if ((*offset == 0) &&
956 (flags & RGW_READDIR_FLAG_DOTDOT)) {
957 /* send '.' and '..' with their NFS-defined offsets */
958 rcb(".", cb_arg, 1, RGW_LOOKUP_FLAG_DIR);
959 rcb("..", cb_arg, 2, RGW_LOOKUP_FLAG_DIR);
960 }
961
962 lsubdout(fs->get_context(), rgw, 15)
963 << __func__
964 << " offset=" << *offset
965 << dendl;
966
967 directory* d = get<directory>(&variant_type);
968 if (d) {
969 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */
970 lock_guard guard(mtx);
971 d->last_readdir = now;
972 }
973
974 if (is_root()) {
975 RGWListBucketsRequest req(cct, fs->get_user(), this, rcb, cb_arg,
976 offset);
977 rc = rgwlib.get_fe()->execute_req(&req);
978 if (! rc) {
979 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */
980 lock_guard guard(mtx);
981 state.atime = now;
982 if (*offset == 0)
983 set_nlink(2);
984 inc_nlink(req.d_count);
985 *eof = req.eof();
986 event ev(event::type::READDIR, get_key(), state.atime);
987 fs->state.push_event(ev);
988 }
989 } else {
990 RGWReaddirRequest req(cct, fs->get_user(), this, rcb, cb_arg, offset);
991 rc = rgwlib.get_fe()->execute_req(&req);
992 if (! rc) {
993 (void) clock_gettime(CLOCK_MONOTONIC_COARSE, &now); /* !LOCKED */
994 lock_guard guard(mtx);
995 state.atime = now;
996 if (*offset == 0)
997 set_nlink(2);
998 inc_nlink(req.d_count);
999 *eof = req.eof();
1000 event ev(event::type::READDIR, get_key(), state.atime);
1001 fs->state.push_event(ev);
1002 }
1003 }
1004
1005 lsubdout(fs->get_context(), rgw, 15)
1006 << __func__
1007 << " final link count=" << state.nlink
1008 << dendl;
1009
1010 return rc;
1011 } /* RGWFileHandle::readdir */
1012
1013 int RGWFileHandle::write(uint64_t off, size_t len, size_t *bytes_written,
1014 void *buffer)
1015 {
1016 using std::get;
1017 using WriteCompletion = RGWLibFS::WriteCompletion;
1018
1019 lock_guard guard(mtx);
1020
1021 int rc = 0;
1022
1023 file* f = get<file>(&variant_type);
1024 if (! f)
1025 return -EISDIR;
1026
1027 if (deleted()) {
1028 lsubdout(fs->get_context(), rgw, 5)
1029 << __func__
1030 << " write attempted on deleted object "
1031 << this->object_name()
1032 << dendl;
1033 /* zap write transaction, if any */
1034 if (f->write_req) {
1035 delete f->write_req;
1036 f->write_req = nullptr;
1037 }
1038 return -ESTALE;
1039 }
1040
1041 if (! f->write_req) {
1042 /* guard--we do not support (e.g., COW-backed) partial writes */
1043 if (off != 0) {
1044 lsubdout(fs->get_context(), rgw, 5)
1045 << __func__
1046 << " " << object_name()
1047 << " non-0 initial write position " << off
1048 << dendl;
1049 return -EIO;
1050 }
1051
1052 /* start */
1053 std::string object_name = relative_object_name();
1054 f->write_req =
1055 new RGWWriteRequest(fs->get_context(), fs->get_user(), this,
1056 bucket_name(), object_name);
1057 rc = rgwlib.get_fe()->start_req(f->write_req);
1058 if (rc < 0) {
1059 lsubdout(fs->get_context(), rgw, 5)
1060 << __func__
1061 << this->object_name()
1062 << " write start failed " << off
1063 << " (" << rc << ")"
1064 << dendl;
1065 /* zap failed write transaction */
1066 delete f->write_req;
1067 f->write_req = nullptr;
1068 return -EIO;
1069 } else {
1070 if (stateless_open()) {
1071 /* start write timer */
1072 f->write_req->timer_id =
1073 RGWLibFS::write_timer.add_event(
1074 std::chrono::seconds(RGWLibFS::write_completion_interval_s),
1075 WriteCompletion(*this));
1076 }
1077 }
1078 }
1079
1080 buffer::list bl;
1081 /* XXXX */
1082 #if 0
1083 bl.push_back(
1084 buffer::create_static(len, static_cast<char*>(buffer)));
1085 #else
1086 bl.push_back(
1087 buffer::copy(static_cast<char*>(buffer), len));
1088 #endif
1089
1090 f->write_req->put_data(off, bl);
1091 rc = f->write_req->exec_continue();
1092
1093 if (rc == 0) {
1094 size_t min_size = off + len;
1095 if (min_size > get_size())
1096 set_size(min_size);
1097 if (stateless_open()) {
1098 /* bump write timer */
1099 RGWLibFS::write_timer.adjust_event(
1100 f->write_req->timer_id, std::chrono::seconds(10));
1101 }
1102 } else {
1103 /* continuation failed (e.g., non-contiguous write position) */
1104 lsubdout(fs->get_context(), rgw, 5)
1105 << __func__
1106 << object_name()
1107 << " failed write at position " << off
1108 << " (fails write transaction) "
1109 << dendl;
1110 /* zap failed write transaction */
1111 delete f->write_req;
1112 f->write_req = nullptr;
1113 rc = -EIO;
1114 }
1115
1116 *bytes_written = (rc == 0) ? len : 0;
1117 return rc;
1118 } /* RGWFileHandle::write */
1119
1120 int RGWFileHandle::write_finish(uint32_t flags)
1121 {
1122 unique_lock guard{mtx, std::defer_lock};
1123 int rc = 0;
1124
1125 if (! (flags & FLAG_LOCKED)) {
1126 guard.lock();
1127 }
1128
1129 file* f = get<file>(&variant_type);
1130 if (f && (f->write_req)) {
1131 lsubdout(fs->get_context(), rgw, 10)
1132 << __func__
1133 << " finishing write trans on " << object_name()
1134 << dendl;
1135 rc = rgwlib.get_fe()->finish_req(f->write_req);
1136 if (! rc) {
1137 rc = f->write_req->get_ret();
1138 }
1139 delete f->write_req;
1140 f->write_req = nullptr;
1141 }
1142
1143 return rc;
1144 } /* RGWFileHandle::write_finish */
1145
1146 int RGWFileHandle::close()
1147 {
1148 lock_guard guard(mtx);
1149
1150 int rc = write_finish(FLAG_LOCKED);
1151
1152 flags &= ~FLAG_OPEN;
1153 return rc;
1154 } /* RGWFileHandle::close */
1155
1156 RGWFileHandle::file::~file()
1157 {
1158 delete write_req;
1159 }
1160
1161 void RGWFileHandle::clear_state()
1162 {
1163 directory* d = get<directory>(&variant_type);
1164 if (d) {
1165 state.nlink = 2;
1166 d->last_marker = rgw_obj_key{};
1167 }
1168 }
1169
1170 void RGWFileHandle::invalidate() {
1171 RGWLibFS *fs = get_fs();
1172 if (fs->invalidate_cb) {
1173 fs->invalidate_cb(fs->invalidate_arg, get_key().fh_hk);
1174 }
1175 }
1176
1177 int RGWWriteRequest::exec_start() {
1178 struct req_state* s = get_state();
1179
1180 /* not obviously supportable */
1181 assert(! dlo_manifest);
1182 assert(! slo_info);
1183
1184 perfcounter->inc(l_rgw_put);
1185 op_ret = -EINVAL;
1186
1187 if (s->object.empty()) {
1188 ldout(s->cct, 0) << __func__ << " called on empty object" << dendl;
1189 goto done;
1190 }
1191
1192 op_ret = get_params();
1193 if (op_ret < 0)
1194 goto done;
1195
1196 op_ret = get_system_versioning_params(s, &olh_epoch, &version_id);
1197 if (op_ret < 0) {
1198 goto done;
1199 }
1200
1201 /* user-supplied MD5 check skipped (not supplied) */
1202 /* early quota check skipped--we don't have size yet */
1203 /* skipping user-supplied etag--we might have one in future, but
1204 * like data it and other attrs would arrive after open */
1205 processor = select_processor(*static_cast<RGWObjectCtx *>(s->obj_ctx),
1206 &multipart);
1207 op_ret = processor->prepare(get_store(), NULL);
1208
1209 done:
1210 return op_ret;
1211 } /* exec_start */
1212
1213 int RGWWriteRequest::exec_continue()
1214 {
1215 struct req_state* s = get_state();
1216 op_ret = 0;
1217
1218 /* check guards (e.g., contig write) */
1219 if (eio)
1220 return -EIO;
1221
1222 size_t len = data.length();
1223 if (! len)
1224 return 0;
1225
1226 /* XXX we are currently synchronous--supplied data buffers cannot
1227 * be used after the caller returns */
1228 bool need_to_wait = true;
1229 bufferlist orig_data;
1230
1231 if (need_to_wait) {
1232 orig_data = data;
1233 }
1234 hash.Update((const byte *)data.c_str(), data.length());
1235 op_ret = put_data_and_throttle(processor, data, ofs,
1236 need_to_wait);
1237 if (op_ret < 0) {
1238 if (!need_to_wait || op_ret != -EEXIST) {
1239 ldout(s->cct, 20) << "processor->thottle_data() returned ret="
1240 << op_ret << dendl;
1241 goto done;
1242 }
1243
1244 ldout(s->cct, 5) << "NOTICE: processor->throttle_data() returned -EEXIST, need to restart write" << dendl;
1245
1246 /* restore original data */
1247 data.swap(orig_data);
1248
1249 /* restart processing with different oid suffix */
1250 dispose_processor(processor);
1251 processor = select_processor(*static_cast<RGWObjectCtx *>(s->obj_ctx),
1252 &multipart);
1253
1254 string oid_rand;
1255 char buf[33];
1256 gen_rand_alphanumeric(get_store()->ctx(), buf, sizeof(buf) - 1);
1257 oid_rand.append(buf);
1258
1259 op_ret = processor->prepare(get_store(), &oid_rand);
1260 if (op_ret < 0) {
1261 ldout(s->cct, 0) << "ERROR: processor->prepare() returned "
1262 << op_ret << dendl;
1263 goto done;
1264 }
1265
1266 op_ret = put_data_and_throttle(processor, data, ofs, false);
1267 if (op_ret < 0) {
1268 goto done;
1269 }
1270 }
1271 bytes_written += len;
1272
1273 done:
1274 return op_ret;
1275 } /* exec_continue */
1276
1277 int RGWWriteRequest::exec_finish()
1278 {
1279 buffer::list bl, aclbl, ux_key, ux_attrs;
1280 map<string, string>::iterator iter;
1281 char calc_md5[CEPH_CRYPTO_MD5_DIGESTSIZE * 2 + 1];
1282 unsigned char m[CEPH_CRYPTO_MD5_DIGESTSIZE];
1283 struct req_state* s = get_state();
1284
1285 size_t osize = rgw_fh->get_size();
1286 struct timespec octime = rgw_fh->get_ctime();
1287 struct timespec omtime = rgw_fh->get_mtime();
1288 real_time appx_t = real_clock::now();
1289
1290 s->obj_size = ofs; // XXX check ofs
1291 perfcounter->inc(l_rgw_put_b, s->obj_size);
1292
1293 op_ret = get_store()->check_quota(s->bucket_owner.get_id(), s->bucket,
1294 user_quota, bucket_quota, s->obj_size);
1295 if (op_ret < 0) {
1296 goto done;
1297 }
1298
1299 hash.Final(m);
1300
1301 buf_to_hex(m, CEPH_CRYPTO_MD5_DIGESTSIZE, calc_md5);
1302 etag = calc_md5;
1303
1304 bl.append(etag.c_str(), etag.size() + 1);
1305 emplace_attr(RGW_ATTR_ETAG, std::move(bl));
1306
1307 policy.encode(aclbl);
1308 emplace_attr(RGW_ATTR_ACL, std::move(aclbl));
1309
1310 /* unix attrs */
1311 rgw_fh->set_mtime(real_clock::to_timespec(appx_t));
1312 rgw_fh->set_ctime(real_clock::to_timespec(appx_t));
1313 rgw_fh->set_size(bytes_written);
1314 rgw_fh->encode_attrs(ux_key, ux_attrs);
1315
1316 emplace_attr(RGW_ATTR_UNIX_KEY1, std::move(ux_key));
1317 emplace_attr(RGW_ATTR_UNIX1, std::move(ux_attrs));
1318
1319 for (iter = s->generic_attrs.begin(); iter != s->generic_attrs.end();
1320 ++iter) {
1321 buffer::list& attrbl = attrs[iter->first];
1322 const string& val = iter->second;
1323 attrbl.append(val.c_str(), val.size() + 1);
1324 }
1325
1326 rgw_get_request_metadata(s->cct, s->info, attrs);
1327 encode_delete_at_attr(delete_at, attrs);
1328
1329 /* Add a custom metadata to expose the information whether an object
1330 * is an SLO or not. Appending the attribute must be performed AFTER
1331 * processing any input from user in order to prohibit overwriting. */
1332 if (unlikely(!! slo_info)) {
1333 buffer::list slo_userindicator_bl;
1334 ::encode("True", slo_userindicator_bl);
1335 emplace_attr(RGW_ATTR_SLO_UINDICATOR, std::move(slo_userindicator_bl));
1336 }
1337
1338 op_ret = processor->complete(s->obj_size, etag, &mtime, real_time(), attrs,
1339 (delete_at ? *delete_at : real_time()),
1340 if_match, if_nomatch);
1341 if (op_ret != 0) {
1342 /* revert attr updates */
1343 rgw_fh->set_mtime(omtime);
1344 rgw_fh->set_ctime(octime);
1345 rgw_fh->set_size(osize);
1346 }
1347
1348 done:
1349 dispose_processor(processor);
1350 perfcounter->tinc(l_rgw_put_lat,
1351 (ceph_clock_now() - s->time));
1352 return op_ret;
1353 } /* exec_finish */
1354
1355 } /* namespace rgw */
1356
1357 /* librgw */
1358 extern "C" {
1359
1360 void rgwfile_version(int *major, int *minor, int *extra)
1361 {
1362 if (major)
1363 *major = LIBRGW_FILE_VER_MAJOR;
1364 if (minor)
1365 *minor = LIBRGW_FILE_VER_MINOR;
1366 if (extra)
1367 *extra = LIBRGW_FILE_VER_EXTRA;
1368 }
1369
1370 /*
1371 attach rgw namespace
1372 */
1373 int rgw_mount(librgw_t rgw, const char *uid, const char *acc_key,
1374 const char *sec_key, struct rgw_fs **rgw_fs,
1375 uint32_t flags)
1376 {
1377 int rc = 0;
1378
1379 /* stash access data for "mount" */
1380 RGWLibFS* new_fs = new RGWLibFS(static_cast<CephContext*>(rgw), uid, acc_key,
1381 sec_key);
1382 assert(new_fs);
1383
1384 rc = new_fs->authorize(rgwlib.get_store());
1385 if (rc != 0) {
1386 delete new_fs;
1387 return -EINVAL;
1388 }
1389
1390 /* register fs for shared gc */
1391 rgwlib.get_fe()->get_process()->register_fs(new_fs);
1392
1393 struct rgw_fs *fs = new_fs->get_fs();
1394 fs->rgw = rgw;
1395
1396 /* XXX we no longer assume "/" is unique, but we aren't tracking the
1397 * roots atm */
1398
1399 *rgw_fs = fs;
1400
1401 return 0;
1402 }
1403
1404 /*
1405 register invalidate callbacks
1406 */
1407 int rgw_register_invalidate(struct rgw_fs *rgw_fs, rgw_fh_callback_t cb,
1408 void *arg, uint32_t flags)
1409
1410 {
1411 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1412 return fs->register_invalidate(cb, arg, flags);
1413 }
1414
1415 /*
1416 detach rgw namespace
1417 */
1418 int rgw_umount(struct rgw_fs *rgw_fs, uint32_t flags)
1419 {
1420 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1421 fs->close();
1422 fs->rele();
1423 return 0;
1424 }
1425
1426 /*
1427 get filesystem attributes
1428 */
1429 int rgw_statfs(struct rgw_fs *rgw_fs,
1430 struct rgw_file_handle *parent_fh,
1431 struct rgw_statvfs *vfs_st, uint32_t flags)
1432 {
1433 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1434
1435 /* XXX for now, just publish a huge capacity and
1436 * limited utiliztion */
1437 vfs_st->f_bsize = 1024*1024 /* 1M */;
1438 vfs_st->f_frsize = 1024; /* minimal allocation unit (who cares) */
1439 vfs_st->f_blocks = UINT64_MAX;
1440 vfs_st->f_bfree = UINT64_MAX;
1441 vfs_st->f_bavail = UINT64_MAX;
1442 vfs_st->f_files = 1024; /* object count, do we have an est? */
1443 vfs_st->f_ffree = UINT64_MAX;
1444 vfs_st->f_fsid[0] = fs->get_inst();
1445 vfs_st->f_fsid[1] = fs->get_inst();
1446 vfs_st->f_flag = 0;
1447 vfs_st->f_namemax = 4096;
1448 return 0;
1449 }
1450
1451 /*
1452 generic create -- create an empty regular file
1453 */
1454 int rgw_create(struct rgw_fs *rgw_fs, struct rgw_file_handle *parent_fh,
1455 const char *name, struct stat *st, uint32_t mask,
1456 struct rgw_file_handle **fh, uint32_t posix_flags,
1457 uint32_t flags)
1458 {
1459 using std::get;
1460
1461 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1462 RGWFileHandle* parent = get_rgwfh(parent_fh);
1463
1464 if ((! parent) ||
1465 (parent->is_root()) ||
1466 (parent->is_file())) {
1467 /* bad parent */
1468 return -EINVAL;
1469 }
1470
1471 MkObjResult fhr = fs->create(parent, name, st, mask, flags);
1472 RGWFileHandle *nfh = get<0>(fhr); // nullptr if !success
1473
1474 if (nfh)
1475 *fh = nfh->get_fh();
1476
1477 return get<1>(fhr);
1478 } /* rgw_create */
1479
1480 /*
1481 create a new directory
1482 */
1483 int rgw_mkdir(struct rgw_fs *rgw_fs,
1484 struct rgw_file_handle *parent_fh,
1485 const char *name, struct stat *st, uint32_t mask,
1486 struct rgw_file_handle **fh, uint32_t flags)
1487 {
1488 using std::get;
1489
1490 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1491 RGWFileHandle* parent = get_rgwfh(parent_fh);
1492
1493 if (! parent) {
1494 /* bad parent */
1495 return -EINVAL;
1496 }
1497
1498 MkObjResult fhr = fs->mkdir(parent, name, st, mask, flags);
1499 RGWFileHandle *nfh = get<0>(fhr); // nullptr if !success
1500
1501 if (nfh)
1502 *fh = nfh->get_fh();
1503
1504 return get<1>(fhr);
1505 } /* rgw_mkdir */
1506
1507 /*
1508 rename object
1509 */
1510 int rgw_rename(struct rgw_fs *rgw_fs,
1511 struct rgw_file_handle *src, const char* src_name,
1512 struct rgw_file_handle *dst, const char* dst_name,
1513 uint32_t flags)
1514 {
1515 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1516
1517 RGWFileHandle* src_fh = get_rgwfh(src);
1518 RGWFileHandle* dst_fh = get_rgwfh(dst);
1519
1520 return fs->rename(src_fh, dst_fh, src_name, dst_name);
1521 }
1522
1523 /*
1524 remove file or directory
1525 */
1526 int rgw_unlink(struct rgw_fs *rgw_fs, struct rgw_file_handle *parent_fh,
1527 const char *name, uint32_t flags)
1528 {
1529 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1530 RGWFileHandle* parent = get_rgwfh(parent_fh);
1531
1532 return fs->unlink(parent, name);
1533 }
1534
1535 /*
1536 lookup object by name (POSIX style)
1537 */
1538 int rgw_lookup(struct rgw_fs *rgw_fs,
1539 struct rgw_file_handle *parent_fh, const char* path,
1540 struct rgw_file_handle **fh, uint32_t flags)
1541 {
1542 //CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
1543 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1544
1545 RGWFileHandle* parent = get_rgwfh(parent_fh);
1546 if ((! parent) ||
1547 (! parent->is_dir())) {
1548 /* bad parent */
1549 return -EINVAL;
1550 }
1551
1552 RGWFileHandle* rgw_fh;
1553 LookupFHResult fhr;
1554
1555 if (parent->is_root()) {
1556 /* special: parent lookup--note lack of ref()! */
1557 if (unlikely((strcmp(path, "..") == 0) ||
1558 (strcmp(path, "/") == 0))) {
1559 rgw_fh = parent;
1560 } else {
1561 fhr = fs->stat_bucket(parent, path, RGWFileHandle::FLAG_NONE);
1562 rgw_fh = get<0>(fhr);
1563 if (! rgw_fh)
1564 return -ENOENT;
1565 }
1566 } else {
1567 /* lookup in a readdir callback */
1568 enum rgw_fh_type fh_type = fh_type_of(flags);
1569
1570 uint32_t sl_flags = (flags & RGW_LOOKUP_FLAG_RCB)
1571 ? RGWFileHandle::FLAG_NONE
1572 : RGWFileHandle::FLAG_EXACT_MATCH;
1573
1574 fhr = fs->stat_leaf(parent, path, fh_type, sl_flags);
1575 if (! get<0>(fhr)) {
1576 if (! (flags & RGW_LOOKUP_FLAG_CREATE))
1577 return -ENOENT;
1578 else
1579 fhr = fs->lookup_fh(parent, path, RGWFileHandle::FLAG_CREATE);
1580 }
1581 rgw_fh = get<0>(fhr);
1582 } /* !root */
1583
1584 struct rgw_file_handle *rfh = rgw_fh->get_fh();
1585 *fh = rfh;
1586
1587 return 0;
1588 } /* rgw_lookup */
1589
1590 /*
1591 lookup object by handle (NFS style)
1592 */
1593 int rgw_lookup_handle(struct rgw_fs *rgw_fs, struct rgw_fh_hk *fh_hk,
1594 struct rgw_file_handle **fh, uint32_t flags)
1595 {
1596 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1597
1598 RGWFileHandle* rgw_fh = fs->lookup_handle(*fh_hk);
1599 if (! rgw_fh) {
1600 /* not found */
1601 return -ENOENT;
1602 }
1603
1604 struct rgw_file_handle *rfh = rgw_fh->get_fh();
1605 *fh = rfh;
1606
1607 return 0;
1608 }
1609
1610 /*
1611 * release file handle
1612 */
1613 int rgw_fh_rele(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
1614 uint32_t flags)
1615 {
1616 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1617 RGWFileHandle* rgw_fh = get_rgwfh(fh);
1618
1619 lsubdout(fs->get_context(), rgw, 17)
1620 << __func__ << " " << *rgw_fh
1621 << dendl;
1622
1623 fs->unref(rgw_fh);
1624 return 0;
1625 }
1626
1627 /*
1628 get unix attributes for object
1629 */
1630 int rgw_getattr(struct rgw_fs *rgw_fs,
1631 struct rgw_file_handle *fh, struct stat *st, uint32_t flags)
1632 {
1633 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1634 RGWFileHandle* rgw_fh = get_rgwfh(fh);
1635
1636 return fs->getattr(rgw_fh, st);
1637 }
1638
1639 /*
1640 set unix attributes for object
1641 */
1642 int rgw_setattr(struct rgw_fs *rgw_fs,
1643 struct rgw_file_handle *fh, struct stat *st,
1644 uint32_t mask, uint32_t flags)
1645 {
1646 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1647 RGWFileHandle* rgw_fh = get_rgwfh(fh);
1648
1649 return fs->setattr(rgw_fh, st, mask, flags);
1650 }
1651
1652 /*
1653 truncate file
1654 */
1655 int rgw_truncate(struct rgw_fs *rgw_fs,
1656 struct rgw_file_handle *fh, uint64_t size, uint32_t flags)
1657 {
1658 return 0;
1659 }
1660
1661 /*
1662 open file
1663 */
1664 int rgw_open(struct rgw_fs *rgw_fs,
1665 struct rgw_file_handle *fh, uint32_t posix_flags, uint32_t flags)
1666 {
1667 RGWFileHandle* rgw_fh = get_rgwfh(fh);
1668
1669 /* XXX
1670 * need to track specific opens--at least read opens and
1671 * a write open; we need to know when a write open is returned,
1672 * that closes a write transaction
1673 *
1674 * for now, we will support single-open only, it's preferable to
1675 * anything we can otherwise do without access to the NFS state
1676 */
1677 if (! rgw_fh->is_file())
1678 return -EISDIR;
1679
1680 return rgw_fh->open(flags);
1681 }
1682
1683 /*
1684 close file
1685 */
1686 int rgw_close(struct rgw_fs *rgw_fs,
1687 struct rgw_file_handle *fh, uint32_t flags)
1688 {
1689 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1690 RGWFileHandle* rgw_fh = get_rgwfh(fh);
1691 int rc = rgw_fh->close(/* XXX */);
1692
1693 if (flags & RGW_CLOSE_FLAG_RELE)
1694 fs->unref(rgw_fh);
1695
1696 return rc;
1697 }
1698
1699 int rgw_readdir(struct rgw_fs *rgw_fs,
1700 struct rgw_file_handle *parent_fh, uint64_t *offset,
1701 rgw_readdir_cb rcb, void *cb_arg, bool *eof,
1702 uint32_t flags)
1703 {
1704 RGWFileHandle* parent = get_rgwfh(parent_fh);
1705 if (! parent) {
1706 /* bad parent */
1707 return -EINVAL;
1708 }
1709 int rc = parent->readdir(rcb, cb_arg, offset, eof, flags);
1710 return rc;
1711 }
1712
1713 /*
1714 read data from file
1715 */
1716 int rgw_read(struct rgw_fs *rgw_fs,
1717 struct rgw_file_handle *fh, uint64_t offset,
1718 size_t length, size_t *bytes_read, void *buffer,
1719 uint32_t flags)
1720 {
1721 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1722 RGWFileHandle* rgw_fh = get_rgwfh(fh);
1723
1724 return fs->read(rgw_fh, offset, length, bytes_read, buffer, flags);
1725 }
1726
1727 /*
1728 write data to file
1729 */
1730 int rgw_write(struct rgw_fs *rgw_fs,
1731 struct rgw_file_handle *fh, uint64_t offset,
1732 size_t length, size_t *bytes_written, void *buffer,
1733 uint32_t flags)
1734 {
1735 RGWFileHandle* rgw_fh = get_rgwfh(fh);
1736 int rc;
1737
1738 *bytes_written = 0;
1739
1740 if (! rgw_fh->is_file())
1741 return -EISDIR;
1742
1743 if (! rgw_fh->is_open())
1744 return -EPERM;
1745
1746 rc = rgw_fh->write(offset, length, bytes_written, buffer);
1747
1748 return rc;
1749 }
1750
1751 /*
1752 read data from file (vector)
1753 */
1754 class RGWReadV
1755 {
1756 buffer::list bl;
1757 struct rgw_vio* vio;
1758
1759 public:
1760 RGWReadV(buffer::list& _bl, rgw_vio* _vio) : vio(_vio) {
1761 bl.claim(_bl);
1762 }
1763
1764 struct rgw_vio* get_vio() { return vio; }
1765
1766 const std::list<buffer::ptr>& buffers() { return bl.buffers(); }
1767
1768 unsigned /* XXX */ length() { return bl.length(); }
1769
1770 };
1771
1772 void rgw_readv_rele(struct rgw_uio *uio, uint32_t flags)
1773 {
1774 RGWReadV* rdv = static_cast<RGWReadV*>(uio->uio_p1);
1775 rdv->~RGWReadV();
1776 ::operator delete(rdv);
1777 }
1778
1779 int rgw_readv(struct rgw_fs *rgw_fs,
1780 struct rgw_file_handle *fh, rgw_uio *uio, uint32_t flags)
1781 {
1782 #if 0 /* XXX */
1783 CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
1784 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1785 RGWFileHandle* rgw_fh = get_rgwfh(fh);
1786
1787 if (! rgw_fh->is_file())
1788 return -EINVAL;
1789
1790 int rc = 0;
1791
1792 buffer::list bl;
1793 RGWGetObjRequest req(cct, fs->get_user(), rgw_fh->bucket_name(),
1794 rgw_fh->object_name(), uio->uio_offset, uio->uio_resid,
1795 bl);
1796 req.do_hexdump = false;
1797
1798 rc = rgwlib.get_fe()->execute_req(&req);
1799
1800 if (! rc) {
1801 RGWReadV* rdv = static_cast<RGWReadV*>(
1802 ::operator new(sizeof(RGWReadV) +
1803 (bl.buffers().size() * sizeof(struct rgw_vio))));
1804
1805 (void) new (rdv)
1806 RGWReadV(bl, reinterpret_cast<rgw_vio*>(rdv+sizeof(RGWReadV)));
1807
1808 uio->uio_p1 = rdv;
1809 uio->uio_cnt = rdv->buffers().size();
1810 uio->uio_resid = rdv->length();
1811 uio->uio_vio = rdv->get_vio();
1812 uio->uio_rele = rgw_readv_rele;
1813
1814 int ix = 0;
1815 auto& buffers = rdv->buffers();
1816 for (auto& bp : buffers) {
1817 rgw_vio *vio = &(uio->uio_vio[ix]);
1818 vio->vio_base = const_cast<char*>(bp.c_str());
1819 vio->vio_len = bp.length();
1820 vio->vio_u1 = nullptr;
1821 vio->vio_p1 = nullptr;
1822 ++ix;
1823 }
1824 }
1825
1826 return rc;
1827 #else
1828 return 0;
1829 #endif
1830 }
1831
1832 /*
1833 write data to file (vector)
1834 */
1835 int rgw_writev(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
1836 rgw_uio *uio, uint32_t flags)
1837 {
1838
1839 return -ENOTSUP;
1840
1841 CephContext* cct = static_cast<CephContext*>(rgw_fs->rgw);
1842 RGWLibFS *fs = static_cast<RGWLibFS*>(rgw_fs->fs_private);
1843 RGWFileHandle* rgw_fh = get_rgwfh(fh);
1844
1845 if (! rgw_fh->is_file())
1846 return -EINVAL;
1847
1848 buffer::list bl;
1849 for (unsigned int ix = 0; ix < uio->uio_cnt; ++ix) {
1850 rgw_vio *vio = &(uio->uio_vio[ix]);
1851 bl.push_back(
1852 buffer::create_static(vio->vio_len,
1853 static_cast<char*>(vio->vio_base)));
1854 }
1855
1856 std::string oname = rgw_fh->relative_object_name();
1857 RGWPutObjRequest req(cct, fs->get_user(), rgw_fh->bucket_name(),
1858 oname, bl);
1859
1860 int rc = rgwlib.get_fe()->execute_req(&req);
1861
1862 /* XXX update size (in request) */
1863
1864 return rc;
1865 }
1866
1867 /*
1868 sync written data
1869 */
1870 int rgw_fsync(struct rgw_fs *rgw_fs, struct rgw_file_handle *handle,
1871 uint32_t flags)
1872 {
1873 return 0;
1874 }
1875
1876 int rgw_commit(struct rgw_fs *rgw_fs, struct rgw_file_handle *fh,
1877 uint64_t offset, uint64_t length, uint32_t flags)
1878 {
1879 RGWFileHandle* rgw_fh = get_rgwfh(fh);
1880
1881 return rgw_fh->commit(offset, length, RGWFileHandle::FLAG_NONE);
1882 }
1883
1884 } /* extern "C" */