]> git.proxmox.com Git - ceph.git/blob - ceph/src/rgw/rgw_tools.cc
import 15.2.0 Octopus source
[ceph.git] / ceph / src / rgw / rgw_tools.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab ft=cpp
3
4 #include <errno.h>
5
6 #include "common/errno.h"
7 #include "common/safe_io.h"
8 #include "librados/librados_asio.h"
9 #include "common/async/yield_context.h"
10
11 #include "include/types.h"
12 #include "include/stringify.h"
13
14 #include "rgw_common.h"
15 #include "rgw_rados.h"
16 #include "rgw_tools.h"
17 #include "rgw_acl_s3.h"
18 #include "rgw_op.h"
19 #include "rgw_putobj_processor.h"
20 #include "rgw_aio_throttle.h"
21 #include "rgw_compression.h"
22 #include "rgw_zone.h"
23 #include "osd/osd_types.h"
24
25 #include "services/svc_sys_obj.h"
26 #include "services/svc_zone.h"
27 #include "services/svc_zone_utils.h"
28
29 #define dout_subsys ceph_subsys_rgw
30 #define dout_context g_ceph_context
31
32 #define READ_CHUNK_LEN (512 * 1024)
33
34 static std::map<std::string, std::string>* ext_mime_map;
35
36 int rgw_init_ioctx(librados::Rados *rados, const rgw_pool& pool,
37 librados::IoCtx& ioctx, bool create,
38 bool mostly_omap)
39 {
40 int r = rados->ioctx_create(pool.name.c_str(), ioctx);
41 if (r == -ENOENT && create) {
42 r = rados->pool_create(pool.name.c_str());
43 if (r == -ERANGE) {
44 dout(0)
45 << __func__
46 << " ERROR: librados::Rados::pool_create returned " << cpp_strerror(-r)
47 << " (this can be due to a pool or placement group misconfiguration, e.g."
48 << " pg_num < pgp_num or mon_max_pg_per_osd exceeded)"
49 << dendl;
50 }
51 if (r < 0 && r != -EEXIST) {
52 return r;
53 }
54
55 r = rados->ioctx_create(pool.name.c_str(), ioctx);
56 if (r < 0) {
57 return r;
58 }
59
60 r = ioctx.application_enable(pg_pool_t::APPLICATION_NAME_RGW, false);
61 if (r < 0 && r != -EOPNOTSUPP) {
62 return r;
63 }
64
65 if (mostly_omap) {
66 // set pg_autoscale_bias
67 bufferlist inbl;
68 float bias = g_conf().get_val<double>("rgw_rados_pool_autoscale_bias");
69 int r = rados->mon_command(
70 "{\"prefix\": \"osd pool set\", \"pool\": \"" +
71 pool.name + "\", \"var\": \"pg_autoscale_bias\", \"val\": \"" +
72 stringify(bias) + "\"}",
73 inbl, NULL, NULL);
74 if (r < 0) {
75 dout(10) << __func__ << " warning: failed to set pg_autoscale_bias on "
76 << pool.name << dendl;
77 }
78 // set pg_num_min
79 int min = g_conf().get_val<uint64_t>("rgw_rados_pool_pg_num_min");
80 r = rados->mon_command(
81 "{\"prefix\": \"osd pool set\", \"pool\": \"" +
82 pool.name + "\", \"var\": \"pg_num_min\", \"val\": \"" +
83 stringify(min) + "\"}",
84 inbl, NULL, NULL);
85 if (r < 0) {
86 dout(10) << __func__ << " warning: failed to set pg_num_min on "
87 << pool.name << dendl;
88 }
89 // set recovery_priority
90 int p = g_conf().get_val<uint64_t>("rgw_rados_pool_recovery_priority");
91 r = rados->mon_command(
92 "{\"prefix\": \"osd pool set\", \"pool\": \"" +
93 pool.name + "\", \"var\": \"recovery_priority\": \"" +
94 stringify(p) + "\"}",
95 inbl, NULL, NULL);
96 if (r < 0) {
97 dout(10) << __func__ << " warning: failed to set recovery_priority on "
98 << pool.name << dendl;
99 }
100 }
101 } else if (r < 0) {
102 return r;
103 }
104 if (!pool.ns.empty()) {
105 ioctx.set_namespace(pool.ns);
106 }
107 return 0;
108 }
109
110 void rgw_shard_name(const string& prefix, unsigned max_shards, const string& key, string& name, int *shard_id)
111 {
112 uint32_t val = ceph_str_hash_linux(key.c_str(), key.size());
113 char buf[16];
114 if (shard_id) {
115 *shard_id = val % max_shards;
116 }
117 snprintf(buf, sizeof(buf), "%u", (unsigned)(val % max_shards));
118 name = prefix + buf;
119 }
120
121 void rgw_shard_name(const string& prefix, unsigned max_shards, const string& section, const string& key, string& name)
122 {
123 uint32_t val = ceph_str_hash_linux(key.c_str(), key.size());
124 val ^= ceph_str_hash_linux(section.c_str(), section.size());
125 char buf[16];
126 snprintf(buf, sizeof(buf), "%u", (unsigned)(val % max_shards));
127 name = prefix + buf;
128 }
129
130 void rgw_shard_name(const string& prefix, unsigned shard_id, string& name)
131 {
132 char buf[16];
133 snprintf(buf, sizeof(buf), "%u", shard_id);
134 name = prefix + buf;
135 }
136
137 int rgw_parse_list_of_flags(struct rgw_name_to_flag *mapping,
138 const string& str, uint32_t *perm)
139 {
140 list<string> strs;
141 get_str_list(str, strs);
142 list<string>::iterator iter;
143 uint32_t v = 0;
144 for (iter = strs.begin(); iter != strs.end(); ++iter) {
145 string& s = *iter;
146 for (int i = 0; mapping[i].type_name; i++) {
147 if (s.compare(mapping[i].type_name) == 0)
148 v |= mapping[i].flag;
149 }
150 }
151
152 *perm = v;
153 return 0;
154 }
155
156 int rgw_put_system_obj(RGWSysObjectCtx& obj_ctx, const rgw_pool& pool, const string& oid, bufferlist& data, bool exclusive,
157 RGWObjVersionTracker *objv_tracker, real_time set_mtime, optional_yield y, map<string, bufferlist> *pattrs)
158 {
159 map<string,bufferlist> no_attrs;
160 if (!pattrs) {
161 pattrs = &no_attrs;
162 }
163
164 rgw_raw_obj obj(pool, oid);
165
166 auto sysobj = obj_ctx.get_obj(obj);
167 int ret = sysobj.wop()
168 .set_objv_tracker(objv_tracker)
169 .set_exclusive(exclusive)
170 .set_mtime(set_mtime)
171 .set_attrs(*pattrs)
172 .write(data, y);
173
174 return ret;
175 }
176
177 int rgw_put_system_obj(RGWSysObjectCtx& obj_ctx, const rgw_pool& pool, const string& oid, bufferlist& data, bool exclusive,
178 RGWObjVersionTracker *objv_tracker, real_time set_mtime, map<string, bufferlist> *pattrs)
179 {
180 return rgw_put_system_obj(obj_ctx, pool, oid, data, exclusive,
181 objv_tracker, set_mtime, null_yield, pattrs);
182 }
183
184 int rgw_get_system_obj(RGWSysObjectCtx& obj_ctx, const rgw_pool& pool, const string& key, bufferlist& bl,
185 RGWObjVersionTracker *objv_tracker, real_time *pmtime, optional_yield y, map<string, bufferlist> *pattrs,
186 rgw_cache_entry_info *cache_info,
187 boost::optional<obj_version> refresh_version)
188 {
189 bufferlist::iterator iter;
190 int request_len = READ_CHUNK_LEN;
191 rgw_raw_obj obj(pool, key);
192
193 obj_version original_readv;
194 if (objv_tracker && !objv_tracker->read_version.empty()) {
195 original_readv = objv_tracker->read_version;
196 }
197
198 do {
199 auto sysobj = obj_ctx.get_obj(obj);
200 auto rop = sysobj.rop();
201
202 int ret = rop.set_attrs(pattrs)
203 .set_last_mod(pmtime)
204 .set_objv_tracker(objv_tracker)
205 .stat(y);
206 if (ret < 0)
207 return ret;
208
209 ret = rop.set_cache_info(cache_info)
210 .set_refresh_version(refresh_version)
211 .read(&bl, y);
212 if (ret == -ECANCELED) {
213 /* raced, restart */
214 if (!original_readv.empty()) {
215 /* we were asked to read a specific obj_version, failed */
216 return ret;
217 }
218 if (objv_tracker) {
219 objv_tracker->read_version.clear();
220 }
221 sysobj.invalidate();
222 continue;
223 }
224 if (ret < 0)
225 return ret;
226
227 if (ret < request_len)
228 break;
229 bl.clear();
230 request_len *= 2;
231 } while (true);
232
233 return 0;
234 }
235
236 int rgw_delete_system_obj(RGWSI_SysObj *sysobj_svc, const rgw_pool& pool, const string& oid,
237 RGWObjVersionTracker *objv_tracker)
238 {
239 auto obj_ctx = sysobj_svc->init_obj_ctx();
240 auto sysobj = obj_ctx.get_obj(rgw_raw_obj{pool, oid});
241 rgw_raw_obj obj(pool, oid);
242 return sysobj.wop()
243 .set_objv_tracker(objv_tracker)
244 .remove(null_yield);
245 }
246
247 thread_local bool is_asio_thread = false;
248
249 int rgw_rados_operate(librados::IoCtx& ioctx, const std::string& oid,
250 librados::ObjectReadOperation *op, bufferlist* pbl,
251 optional_yield y)
252 {
253 #ifdef HAVE_BOOST_CONTEXT
254 // given a yield_context, call async_operate() to yield the coroutine instead
255 // of blocking
256 if (y) {
257 auto& context = y.get_io_context();
258 auto& yield = y.get_yield_context();
259 boost::system::error_code ec;
260 auto bl = librados::async_operate(context, ioctx, oid, op, 0, yield[ec]);
261 if (pbl) {
262 *pbl = std::move(bl);
263 }
264 return -ec.value();
265 }
266 // work on asio threads should be asynchronous, so warn when they block
267 if (is_asio_thread) {
268 dout(20) << "WARNING: blocking librados call" << dendl;
269 }
270 #endif
271 return ioctx.operate(oid, op, nullptr);
272 }
273
274 int rgw_rados_operate(librados::IoCtx& ioctx, const std::string& oid,
275 librados::ObjectWriteOperation *op, optional_yield y)
276 {
277 #ifdef HAVE_BOOST_CONTEXT
278 if (y) {
279 auto& context = y.get_io_context();
280 auto& yield = y.get_yield_context();
281 boost::system::error_code ec;
282 librados::async_operate(context, ioctx, oid, op, 0, yield[ec]);
283 return -ec.value();
284 }
285 if (is_asio_thread) {
286 dout(20) << "WARNING: blocking librados call" << dendl;
287 }
288 #endif
289 return ioctx.operate(oid, op);
290 }
291
292 int rgw_rados_notify(librados::IoCtx& ioctx, const std::string& oid,
293 bufferlist& bl, uint64_t timeout_ms, bufferlist* pbl,
294 optional_yield y)
295 {
296 #ifdef HAVE_BOOST_CONTEXT
297 if (y) {
298 auto& context = y.get_io_context();
299 auto& yield = y.get_yield_context();
300 boost::system::error_code ec;
301 auto reply = librados::async_notify(context, ioctx, oid,
302 bl, timeout_ms, yield[ec]);
303 if (pbl) {
304 *pbl = std::move(reply);
305 }
306 return -ec.value();
307 }
308 if (is_asio_thread) {
309 dout(20) << "WARNING: blocking librados call" << dendl;
310 }
311 #endif
312 return ioctx.notify2(oid, bl, timeout_ms, pbl);
313 }
314
315 void parse_mime_map_line(const char *start, const char *end)
316 {
317 char line[end - start + 1];
318 strncpy(line, start, end - start);
319 line[end - start] = '\0';
320 char *l = line;
321 #define DELIMS " \t\n\r"
322
323 while (isspace(*l))
324 l++;
325
326 char *mime = strsep(&l, DELIMS);
327 if (!mime)
328 return;
329
330 char *ext;
331 do {
332 ext = strsep(&l, DELIMS);
333 if (ext && *ext) {
334 (*ext_mime_map)[ext] = mime;
335 }
336 } while (ext);
337 }
338
339
340 void parse_mime_map(const char *buf)
341 {
342 const char *start = buf, *end = buf;
343 while (*end) {
344 while (*end && *end != '\n') {
345 end++;
346 }
347 parse_mime_map_line(start, end);
348 end++;
349 start = end;
350 }
351 }
352
353 static int ext_mime_map_init(CephContext *cct, const char *ext_map)
354 {
355 int fd = open(ext_map, O_RDONLY);
356 char *buf = NULL;
357 int ret;
358 if (fd < 0) {
359 ret = -errno;
360 ldout(cct, 0) << __func__ << " failed to open file=" << ext_map
361 << " : " << cpp_strerror(-ret) << dendl;
362 return ret;
363 }
364
365 struct stat st;
366 ret = fstat(fd, &st);
367 if (ret < 0) {
368 ret = -errno;
369 ldout(cct, 0) << __func__ << " failed to stat file=" << ext_map
370 << " : " << cpp_strerror(-ret) << dendl;
371 goto done;
372 }
373
374 buf = (char *)malloc(st.st_size + 1);
375 if (!buf) {
376 ret = -ENOMEM;
377 ldout(cct, 0) << __func__ << " failed to allocate buf" << dendl;
378 goto done;
379 }
380
381 ret = safe_read(fd, buf, st.st_size + 1);
382 if (ret != st.st_size) {
383 // huh? file size has changed?
384 ldout(cct, 0) << __func__ << " raced! will retry.." << dendl;
385 free(buf);
386 close(fd);
387 return ext_mime_map_init(cct, ext_map);
388 }
389 buf[st.st_size] = '\0';
390
391 parse_mime_map(buf);
392 ret = 0;
393 done:
394 free(buf);
395 close(fd);
396 return ret;
397 }
398
399 const char *rgw_find_mime_by_ext(string& ext)
400 {
401 map<string, string>::iterator iter = ext_mime_map->find(ext);
402 if (iter == ext_mime_map->end())
403 return NULL;
404
405 return iter->second.c_str();
406 }
407
408 void rgw_filter_attrset(map<string, bufferlist>& unfiltered_attrset, const string& check_prefix,
409 map<string, bufferlist> *attrset)
410 {
411 attrset->clear();
412 map<string, bufferlist>::iterator iter;
413 for (iter = unfiltered_attrset.lower_bound(check_prefix);
414 iter != unfiltered_attrset.end(); ++iter) {
415 if (!boost::algorithm::starts_with(iter->first, check_prefix))
416 break;
417 (*attrset)[iter->first] = iter->second;
418 }
419 }
420
421 RGWDataAccess::RGWDataAccess(rgw::sal::RGWRadosStore *_store) : store(_store)
422 {
423 sysobj_ctx = std::make_unique<RGWSysObjectCtx>(store->svc()->sysobj->init_obj_ctx());
424 }
425
426
427 int RGWDataAccess::Bucket::finish_init()
428 {
429 auto iter = attrs.find(RGW_ATTR_ACL);
430 if (iter == attrs.end()) {
431 return 0;
432 }
433
434 bufferlist::const_iterator bliter = iter->second.begin();
435 try {
436 policy.decode(bliter);
437 } catch (buffer::error& err) {
438 return -EIO;
439 }
440
441 return 0;
442 }
443
444 int RGWDataAccess::Bucket::init()
445 {
446 int ret = sd->store->getRados()->get_bucket_info(sd->store->svc(),
447 tenant, name,
448 bucket_info,
449 &mtime,
450 null_yield,
451 &attrs);
452 if (ret < 0) {
453 return ret;
454 }
455
456 return finish_init();
457 }
458
459 int RGWDataAccess::Bucket::init(const RGWBucketInfo& _bucket_info,
460 const map<string, bufferlist>& _attrs)
461 {
462 bucket_info = _bucket_info;
463 attrs = _attrs;
464
465 return finish_init();
466 }
467
468 int RGWDataAccess::Bucket::get_object(const rgw_obj_key& key,
469 ObjectRef *obj) {
470 obj->reset(new Object(sd, shared_from_this(), key));
471 return 0;
472 }
473
474 int RGWDataAccess::Object::put(bufferlist& data,
475 map<string, bufferlist>& attrs,
476 const DoutPrefixProvider *dpp,
477 optional_yield y)
478 {
479 rgw::sal::RGWRadosStore *store = sd->store;
480 CephContext *cct = store->ctx();
481
482 string tag;
483 append_rand_alpha(cct, tag, tag, 32);
484
485 RGWBucketInfo& bucket_info = bucket->bucket_info;
486
487 rgw::BlockingAioThrottle aio(store->ctx()->_conf->rgw_put_obj_min_window_size);
488
489 RGWObjectCtx obj_ctx(store);
490 rgw_obj obj(bucket_info.bucket, key);
491
492 auto& owner = bucket->policy.get_owner();
493
494 string req_id = store->svc()->zone_utils->unique_id(store->getRados()->get_new_req_id());
495
496 using namespace rgw::putobj;
497 AtomicObjectProcessor processor(&aio, store, bucket_info, nullptr,
498 owner.get_id(), obj_ctx, obj, olh_epoch,
499 req_id, dpp, y);
500
501 int ret = processor.prepare(y);
502 if (ret < 0)
503 return ret;
504
505 DataProcessor *filter = &processor;
506
507 CompressorRef plugin;
508 boost::optional<RGWPutObj_Compress> compressor;
509
510 const auto& compression_type = store->svc()->zone->get_zone_params().get_compression_type(bucket_info.placement_rule);
511 if (compression_type != "none") {
512 plugin = Compressor::create(store->ctx(), compression_type);
513 if (!plugin) {
514 ldout(store->ctx(), 1) << "Cannot load plugin for compression type "
515 << compression_type << dendl;
516 } else {
517 compressor.emplace(store->ctx(), plugin, filter);
518 filter = &*compressor;
519 }
520 }
521
522 off_t ofs = 0;
523 auto obj_size = data.length();
524
525 RGWMD5Etag etag_calc;
526
527 do {
528 size_t read_len = std::min(data.length(), (unsigned int)cct->_conf->rgw_max_chunk_size);
529
530 bufferlist bl;
531
532 data.splice(0, read_len, &bl);
533 etag_calc.update(bl);
534
535 ret = filter->process(std::move(bl), ofs);
536 if (ret < 0)
537 return ret;
538
539 ofs += read_len;
540 } while (data.length() > 0);
541
542 ret = filter->process({}, ofs);
543 if (ret < 0) {
544 return ret;
545 }
546 bool has_etag_attr = false;
547 auto iter = attrs.find(RGW_ATTR_ETAG);
548 if (iter != attrs.end()) {
549 bufferlist& bl = iter->second;
550 etag = bl.to_str();
551 has_etag_attr = true;
552 }
553
554 if (!aclbl) {
555 RGWAccessControlPolicy_S3 policy(cct);
556
557 policy.create_canned(bucket->policy.get_owner(), bucket->policy.get_owner(), string()); /* default private policy */
558
559 policy.encode(aclbl.emplace());
560 }
561
562 if (etag.empty()) {
563 etag_calc.finish(&etag);
564 }
565
566 if (!has_etag_attr) {
567 bufferlist etagbl;
568 etagbl.append(etag);
569 attrs[RGW_ATTR_ETAG] = etagbl;
570 }
571 attrs[RGW_ATTR_ACL] = *aclbl;
572
573 string *puser_data = nullptr;
574 if (user_data) {
575 puser_data = &(*user_data);
576 }
577
578 return processor.complete(obj_size, etag,
579 &mtime, mtime,
580 attrs, delete_at,
581 nullptr, nullptr,
582 puser_data,
583 nullptr, nullptr, y);
584 }
585
586 void RGWDataAccess::Object::set_policy(const RGWAccessControlPolicy& policy)
587 {
588 policy.encode(aclbl.emplace());
589 }
590
591 int rgw_tools_init(CephContext *cct)
592 {
593 ext_mime_map = new std::map<std::string, std::string>;
594 ext_mime_map_init(cct, cct->_conf->rgw_mime_types_file.c_str());
595 // ignore errors; missing mime.types is not fatal
596 return 0;
597 }
598
599 void rgw_tools_cleanup()
600 {
601 delete ext_mime_map;
602 ext_mime_map = nullptr;
603 }