]> git.proxmox.com Git - ceph.git/blob - ceph/src/test/librados/tier_cxx.cc
import ceph quincy 17.2.1
[ceph.git] / ceph / src / test / librados / tier_cxx.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 #include "gtest/gtest.h"
4
5 #include "mds/mdstypes.h"
6 #include "include/buffer.h"
7 #include "include/rbd_types.h"
8 #include "include/rados/librados.hpp"
9 #include "include/stringify.h"
10 #include "include/types.h"
11 #include "global/global_context.h"
12 #include "common/Cond.h"
13 #include "test/librados/test_cxx.h"
14 #include "test/librados/testcase_cxx.h"
15 #include "json_spirit/json_spirit.h"
16 #include "cls/cas/cls_cas_ops.h"
17 #include "cls/cas/cls_cas_internal.h"
18
19 #include "osd/HitSet.h"
20
21 #include <errno.h>
22 #include <map>
23 #include <sstream>
24 #include <string>
25
26 #include "cls/cas/cls_cas_client.h"
27 #include "cls/cas/cls_cas_internal.h"
28
29 using namespace std;
30 using namespace librados;
31
32 typedef RadosTestPP LibRadosTierPP;
33 typedef RadosTestECPP LibRadosTierECPP;
34
35 void flush_evict_all(librados::Rados& cluster, librados::IoCtx& cache_ioctx)
36 {
37 bufferlist inbl;
38 cache_ioctx.set_namespace(all_nspaces);
39 for (NObjectIterator it = cache_ioctx.nobjects_begin();
40 it != cache_ioctx.nobjects_end(); ++it) {
41 cache_ioctx.locator_set_key(it->get_locator());
42 cache_ioctx.set_namespace(it->get_nspace());
43 {
44 ObjectReadOperation op;
45 op.cache_flush();
46 librados::AioCompletion *completion = cluster.aio_create_completion();
47 cache_ioctx.aio_operate(
48 it->get_oid(), completion, &op,
49 librados::OPERATION_IGNORE_OVERLAY, NULL);
50 completion->wait_for_complete();
51 completion->get_return_value();
52 completion->release();
53 }
54 {
55 ObjectReadOperation op;
56 op.cache_evict();
57 librados::AioCompletion *completion = cluster.aio_create_completion();
58 cache_ioctx.aio_operate(
59 it->get_oid(), completion, &op,
60 librados::OPERATION_IGNORE_OVERLAY, NULL);
61 completion->wait_for_complete();
62 completion->get_return_value();
63 completion->release();
64 }
65 }
66 }
67
68 static string _get_required_osd_release(Rados& cluster)
69 {
70 bufferlist inbl;
71 string cmd = string("{\"prefix\": \"osd dump\",\"format\":\"json\"}");
72 bufferlist outbl;
73 int r = cluster.mon_command(cmd, inbl, &outbl, NULL);
74 ceph_assert(r >= 0);
75 string outstr(outbl.c_str(), outbl.length());
76 json_spirit::Value v;
77 if (!json_spirit::read(outstr, v)) {
78 cerr <<" unable to parse json " << outstr << std::endl;
79 return "";
80 }
81
82 json_spirit::Object& o = v.get_obj();
83 for (json_spirit::Object::size_type i=0; i<o.size(); i++) {
84 json_spirit::Pair& p = o[i];
85 if (p.name_ == "require_osd_release") {
86 cout << "require_osd_release = " << p.value_.get_str() << std::endl;
87 return p.value_.get_str();
88 }
89 }
90 cerr << "didn't find require_osd_release in " << outstr << std::endl;
91 return "";
92 }
93
94 void manifest_set_chunk(Rados& cluster, librados::IoCtx& src_ioctx,
95 librados::IoCtx& tgt_ioctx,
96 uint64_t src_offset, uint64_t length,
97 std::string src_oid, std::string tgt_oid)
98 {
99 ObjectReadOperation op;
100 op.set_chunk(src_offset, length, src_ioctx, src_oid, 0,
101 CEPH_OSD_OP_FLAG_WITH_REFERENCE);
102 librados::AioCompletion *completion = cluster.aio_create_completion();
103 ASSERT_EQ(0, tgt_ioctx.aio_operate(tgt_oid, completion, &op,
104 librados::OPERATION_IGNORE_CACHE, NULL));
105 completion->wait_for_complete();
106 ASSERT_EQ(0, completion->get_return_value());
107 completion->release();
108 }
109
110 #include "common/ceph_crypto.h"
111 using ceph::crypto::SHA1;
112 #include "rgw/rgw_common.h"
113
114 void check_fp_oid_refcount(librados::IoCtx& ioctx, std::string foid, uint64_t count,
115 std::string fp_algo = NULL)
116 {
117 bufferlist t;
118 int size = foid.length();
119 if (fp_algo == "sha1") {
120 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
121 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
122 SHA1 sha1_gen;
123 sha1_gen.Update((const unsigned char *)foid.c_str(), size);
124 sha1_gen.Final(fingerprint);
125 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
126 ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
127 } else if (fp_algo.empty()) {
128 ioctx.getxattr(foid, CHUNK_REFCOUNT_ATTR, t);
129 } else if (!fp_algo.empty()) {
130 ceph_assert(0 == "unrecognized fingerprint algorithm");
131 }
132
133 chunk_refs_t refs;
134 try {
135 auto iter = t.cbegin();
136 decode(refs, iter);
137 } catch (buffer::error& err) {
138 ASSERT_TRUE(0);
139 }
140 ASSERT_LE(count, refs.count());
141 }
142
143 string get_fp_oid(string oid, std::string fp_algo = NULL)
144 {
145 if (fp_algo == "sha1") {
146 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
147 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
148 SHA1 sha1_gen;
149 int size = oid.length();
150 sha1_gen.Update((const unsigned char *)oid.c_str(), size);
151 sha1_gen.Final(fingerprint);
152 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
153 return string(p_str);
154 }
155
156 return string();
157 }
158
159 void is_intended_refcount_state(librados::IoCtx& src_ioctx,
160 std::string src_oid,
161 librados::IoCtx& dst_ioctx,
162 std::string dst_oid,
163 int expected_refcount)
164 {
165 int src_refcount = 0, dst_refcount = 0;
166 bufferlist t;
167 int r = dst_ioctx.getxattr(dst_oid, CHUNK_REFCOUNT_ATTR, t);
168 if (r == -ENOENT) {
169 dst_refcount = 0;
170 } else {
171 chunk_refs_t refs;
172 try {
173 auto iter = t.cbegin();
174 decode(refs, iter);
175 } catch (buffer::error& err) {
176 ceph_assert(0);
177 }
178 dst_refcount = refs.count();
179 }
180 int tries = 0;
181 for (; tries < 30; ++tries) {
182 r = cls_cas_references_chunk(src_ioctx, src_oid, dst_oid);
183 if (r == -ENOENT || r == -ENOLINK) {
184 src_refcount = 0;
185 } else if (r == -EBUSY) {
186 sleep(20);
187 continue;
188 } else {
189 src_refcount = r;
190 }
191 break;
192 }
193 ASSERT_TRUE(tries < 30);
194 ASSERT_TRUE(src_refcount >= 0);
195 ASSERT_TRUE(src_refcount == expected_refcount);
196 ASSERT_TRUE(src_refcount <= dst_refcount);
197 }
198
199 class LibRadosTwoPoolsPP : public RadosTestPP
200 {
201 public:
202 LibRadosTwoPoolsPP() {};
203 ~LibRadosTwoPoolsPP() override {};
204 protected:
205 static void SetUpTestCase() {
206 pool_name = get_temp_pool_name();
207 ASSERT_EQ("", create_one_pool_pp(pool_name, s_cluster));
208 }
209 static void TearDownTestCase() {
210 ASSERT_EQ(0, destroy_one_pool_pp(pool_name, s_cluster));
211 }
212 static std::string cache_pool_name;
213
214 void SetUp() override {
215 cache_pool_name = get_temp_pool_name();
216 ASSERT_EQ(0, s_cluster.pool_create(cache_pool_name.c_str()));
217 RadosTestPP::SetUp();
218
219 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
220 cache_ioctx.application_enable("rados", true);
221 cache_ioctx.set_namespace(nspace);
222 }
223 void TearDown() override {
224 // flush + evict cache
225 flush_evict_all(cluster, cache_ioctx);
226
227 bufferlist inbl;
228 // tear down tiers
229 ASSERT_EQ(0, cluster.mon_command(
230 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
231 "\"}",
232 inbl, NULL, NULL));
233 ASSERT_EQ(0, cluster.mon_command(
234 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
235 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
236 inbl, NULL, NULL));
237
238 // wait for maps to settle before next test
239 cluster.wait_for_latest_osdmap();
240
241 RadosTestPP::TearDown();
242
243 cleanup_default_namespace(cache_ioctx);
244 cleanup_namespace(cache_ioctx, nspace);
245
246 cache_ioctx.close();
247 ASSERT_EQ(0, s_cluster.pool_delete(cache_pool_name.c_str()));
248 }
249 librados::IoCtx cache_ioctx;
250 };
251
252 class Completions
253 {
254 public:
255 Completions() = default;
256 librados::AioCompletion* getCompletion() {
257 librados::AioCompletion* comp = librados::Rados::aio_create_completion();
258 m_completions.push_back(comp);
259 return comp;
260 }
261
262 ~Completions() {
263 for (auto& comp : m_completions) {
264 comp->release();
265 }
266 }
267
268 private:
269 vector<librados::AioCompletion *> m_completions;
270 };
271
272 Completions completions;
273
274 std::string LibRadosTwoPoolsPP::cache_pool_name;
275
276 TEST_F(LibRadosTierPP, Dirty) {
277 {
278 ObjectWriteOperation op;
279 op.undirty();
280 ASSERT_EQ(0, ioctx.operate("foo", &op)); // still get 0 if it dne
281 }
282 {
283 ObjectWriteOperation op;
284 op.create(true);
285 ASSERT_EQ(0, ioctx.operate("foo", &op));
286 }
287 {
288 bool dirty = false;
289 int r = -1;
290 ObjectReadOperation op;
291 op.is_dirty(&dirty, &r);
292 ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
293 ASSERT_TRUE(dirty);
294 ASSERT_EQ(0, r);
295 }
296 {
297 ObjectWriteOperation op;
298 op.undirty();
299 ASSERT_EQ(0, ioctx.operate("foo", &op));
300 }
301 {
302 ObjectWriteOperation op;
303 op.undirty();
304 ASSERT_EQ(0, ioctx.operate("foo", &op)); // still 0 if already clean
305 }
306 {
307 bool dirty = false;
308 int r = -1;
309 ObjectReadOperation op;
310 op.is_dirty(&dirty, &r);
311 ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
312 ASSERT_FALSE(dirty);
313 ASSERT_EQ(0, r);
314 }
315 {
316 ObjectWriteOperation op;
317 op.truncate(0); // still a write even tho it is a no-op
318 ASSERT_EQ(0, ioctx.operate("foo", &op));
319 }
320 {
321 bool dirty = false;
322 int r = -1;
323 ObjectReadOperation op;
324 op.is_dirty(&dirty, &r);
325 ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
326 ASSERT_TRUE(dirty);
327 ASSERT_EQ(0, r);
328 }
329 }
330
331 TEST_F(LibRadosTwoPoolsPP, Overlay) {
332 // create objects
333 {
334 bufferlist bl;
335 bl.append("base");
336 ObjectWriteOperation op;
337 op.write_full(bl);
338 ASSERT_EQ(0, ioctx.operate("foo", &op));
339 }
340 {
341 bufferlist bl;
342 bl.append("cache");
343 ObjectWriteOperation op;
344 op.write_full(bl);
345 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
346 }
347
348 // configure cache
349 bufferlist inbl;
350 ASSERT_EQ(0, cluster.mon_command(
351 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
352 "\", \"tierpool\": \"" + cache_pool_name +
353 "\", \"force_nonempty\": \"--force-nonempty\" }",
354 inbl, NULL, NULL));
355 ASSERT_EQ(0, cluster.mon_command(
356 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
357 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
358 inbl, NULL, NULL));
359
360 // wait for maps to settle
361 cluster.wait_for_latest_osdmap();
362
363 // by default, the overlay sends us to cache pool
364 {
365 bufferlist bl;
366 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
367 ASSERT_EQ('c', bl[0]);
368 }
369 {
370 bufferlist bl;
371 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
372 ASSERT_EQ('c', bl[0]);
373 }
374
375 // unless we say otherwise
376 {
377 bufferlist bl;
378 ObjectReadOperation op;
379 op.read(0, 1, &bl, NULL);
380 librados::AioCompletion *completion = cluster.aio_create_completion();
381 ASSERT_EQ(0, ioctx.aio_operate(
382 "foo", completion, &op,
383 librados::OPERATION_IGNORE_OVERLAY, NULL));
384 completion->wait_for_complete();
385 ASSERT_EQ(0, completion->get_return_value());
386 completion->release();
387 ASSERT_EQ('b', bl[0]);
388 }
389 }
390
391 TEST_F(LibRadosTwoPoolsPP, Promote) {
392 // create object
393 {
394 bufferlist bl;
395 bl.append("hi there");
396 ObjectWriteOperation op;
397 op.write_full(bl);
398 ASSERT_EQ(0, ioctx.operate("foo", &op));
399 }
400
401 // configure cache
402 bufferlist inbl;
403 ASSERT_EQ(0, cluster.mon_command(
404 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
405 "\", \"tierpool\": \"" + cache_pool_name +
406 "\", \"force_nonempty\": \"--force-nonempty\" }",
407 inbl, NULL, NULL));
408 ASSERT_EQ(0, cluster.mon_command(
409 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
410 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
411 inbl, NULL, NULL));
412 ASSERT_EQ(0, cluster.mon_command(
413 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
414 "\", \"mode\": \"writeback\"}",
415 inbl, NULL, NULL));
416
417 // wait for maps to settle
418 cluster.wait_for_latest_osdmap();
419
420 // read, trigger a promote
421 {
422 bufferlist bl;
423 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
424 }
425
426 // read, trigger a whiteout
427 {
428 bufferlist bl;
429 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
430 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
431 }
432
433 // verify the object is present in the cache tier
434 {
435 NObjectIterator it = cache_ioctx.nobjects_begin();
436 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
437 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
438 ++it;
439 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
440 ++it;
441 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
442 }
443 }
444
445 TEST_F(LibRadosTwoPoolsPP, PromoteSnap) {
446 // create object
447 {
448 bufferlist bl;
449 bl.append("hi there");
450 ObjectWriteOperation op;
451 op.write_full(bl);
452 ASSERT_EQ(0, ioctx.operate("foo", &op));
453 }
454 {
455 bufferlist bl;
456 bl.append("hi there");
457 ObjectWriteOperation op;
458 op.write_full(bl);
459 ASSERT_EQ(0, ioctx.operate("bar", &op));
460 }
461 {
462 bufferlist bl;
463 bl.append("hi there");
464 ObjectWriteOperation op;
465 op.write_full(bl);
466 ASSERT_EQ(0, ioctx.operate("baz", &op));
467 }
468 {
469 bufferlist bl;
470 bl.append("hi there");
471 ObjectWriteOperation op;
472 op.write_full(bl);
473 ASSERT_EQ(0, ioctx.operate("bam", &op));
474 }
475
476 // create a snapshot, clone
477 vector<uint64_t> my_snaps(1);
478 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
479 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
480 my_snaps));
481 {
482 bufferlist bl;
483 bl.append("ciao!");
484 ObjectWriteOperation op;
485 op.write_full(bl);
486 ASSERT_EQ(0, ioctx.operate("foo", &op));
487 }
488 {
489 bufferlist bl;
490 bl.append("ciao!");
491 ObjectWriteOperation op;
492 op.write_full(bl);
493 ASSERT_EQ(0, ioctx.operate("bar", &op));
494 }
495 {
496 ObjectWriteOperation op;
497 op.remove();
498 ASSERT_EQ(0, ioctx.operate("baz", &op));
499 }
500 {
501 bufferlist bl;
502 bl.append("ciao!");
503 ObjectWriteOperation op;
504 op.write_full(bl);
505 ASSERT_EQ(0, ioctx.operate("bam", &op));
506 }
507
508 // configure cache
509 bufferlist inbl;
510 ASSERT_EQ(0, cluster.mon_command(
511 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
512 "\", \"tierpool\": \"" + cache_pool_name +
513 "\", \"force_nonempty\": \"--force-nonempty\" }",
514 inbl, NULL, NULL));
515 ASSERT_EQ(0, cluster.mon_command(
516 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
517 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
518 inbl, NULL, NULL));
519 ASSERT_EQ(0, cluster.mon_command(
520 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
521 "\", \"mode\": \"writeback\"}",
522 inbl, NULL, NULL));
523
524 // wait for maps to settle
525 cluster.wait_for_latest_osdmap();
526
527 // read, trigger a promote on the head
528 {
529 bufferlist bl;
530 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
531 ASSERT_EQ('c', bl[0]);
532 }
533 {
534 bufferlist bl;
535 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
536 ASSERT_EQ('c', bl[0]);
537 }
538
539 ioctx.snap_set_read(my_snaps[0]);
540
541 // read foo snap
542 {
543 bufferlist bl;
544 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
545 ASSERT_EQ('h', bl[0]);
546 }
547
548 // read bar snap
549 {
550 bufferlist bl;
551 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
552 ASSERT_EQ('h', bl[0]);
553 }
554
555 // read baz snap
556 {
557 bufferlist bl;
558 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
559 ASSERT_EQ('h', bl[0]);
560 }
561
562 ioctx.snap_set_read(librados::SNAP_HEAD);
563
564 // read foo
565 {
566 bufferlist bl;
567 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
568 ASSERT_EQ('c', bl[0]);
569 }
570
571 // read bar
572 {
573 bufferlist bl;
574 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
575 ASSERT_EQ('c', bl[0]);
576 }
577
578 // read baz
579 {
580 bufferlist bl;
581 ASSERT_EQ(-ENOENT, ioctx.read("baz", bl, 1, 0));
582 }
583
584 // cleanup
585 ioctx.selfmanaged_snap_remove(my_snaps[0]);
586 }
587
588 TEST_F(LibRadosTwoPoolsPP, PromoteSnapScrub) {
589 int num = 100;
590
591 // create objects
592 for (int i=0; i<num; ++i) {
593 bufferlist bl;
594 bl.append("hi there");
595 ObjectWriteOperation op;
596 op.write_full(bl);
597 ASSERT_EQ(0, ioctx.operate(string("foo") + stringify(i), &op));
598 }
599
600 vector<uint64_t> my_snaps;
601 for (int snap=0; snap<4; ++snap) {
602 // create a snapshot, clone
603 vector<uint64_t> ns(1);
604 ns.insert(ns.end(), my_snaps.begin(), my_snaps.end());
605 my_snaps.swap(ns);
606 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
607 cout << "my_snaps " << my_snaps << std::endl;
608 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
609 my_snaps));
610 for (int i=0; i<num; ++i) {
611 bufferlist bl;
612 bl.append(string("ciao! snap") + stringify(snap));
613 ObjectWriteOperation op;
614 op.write_full(bl);
615 ASSERT_EQ(0, ioctx.operate(string("foo") + stringify(i), &op));
616 }
617 }
618
619 // configure cache
620 bufferlist inbl;
621 ASSERT_EQ(0, cluster.mon_command(
622 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
623 "\", \"tierpool\": \"" + cache_pool_name +
624 "\", \"force_nonempty\": \"--force-nonempty\" }",
625 inbl, NULL, NULL));
626 ASSERT_EQ(0, cluster.mon_command(
627 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
628 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
629 inbl, NULL, NULL));
630 ASSERT_EQ(0, cluster.mon_command(
631 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
632 "\", \"mode\": \"writeback\"}",
633 inbl, NULL, NULL));
634
635 // wait for maps to settle
636 cluster.wait_for_latest_osdmap();
637
638 // read, trigger a promote on _some_ heads to make sure we handle cases
639 // where snaps are present and where they are not.
640 cout << "promoting some heads" << std::endl;
641 for (int i=0; i<num; ++i) {
642 if (i % 5 == 0 || i > num - 3) {
643 bufferlist bl;
644 ASSERT_EQ(1, ioctx.read(string("foo") + stringify(i), bl, 1, 0));
645 ASSERT_EQ('c', bl[0]);
646 }
647 }
648
649 for (unsigned snap = 0; snap < my_snaps.size(); ++snap) {
650 cout << "promoting from clones for snap " << my_snaps[snap] << std::endl;
651 ioctx.snap_set_read(my_snaps[snap]);
652
653 // read some snaps, semi-randomly
654 for (int i=0; i<50; ++i) {
655 bufferlist bl;
656 string o = string("foo") + stringify((snap * i * 137) % 80);
657 //cout << o << std::endl;
658 ASSERT_EQ(1, ioctx.read(o, bl, 1, 0));
659 }
660 }
661
662 // ok, stop and scrub this pool (to make sure scrub can handle
663 // missing clones in the cache tier).
664 {
665 IoCtx cache_ioctx;
666 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
667 for (int i=0; i<10; ++i) {
668 do {
669 ostringstream ss;
670 ss << "{\"prefix\": \"pg scrub\", \"pgid\": \""
671 << cache_ioctx.get_id() << "." << i
672 << "\"}";
673 int r = cluster.mon_command(ss.str(), inbl, NULL, NULL);
674 if (r == -ENOENT || // in case mgr osdmap is stale
675 r == -EAGAIN) {
676 sleep(5);
677 continue;
678 }
679 } while (false);
680 }
681
682 // give it a few seconds to go. this is sloppy but is usually enough time
683 cout << "waiting for scrubs..." << std::endl;
684 sleep(30);
685 cout << "done waiting" << std::endl;
686 }
687
688 ioctx.snap_set_read(librados::SNAP_HEAD);
689
690 //cleanup
691 for (unsigned snap = 0; snap < my_snaps.size(); ++snap) {
692 ioctx.selfmanaged_snap_remove(my_snaps[snap]);
693 }
694 }
695
696 TEST_F(LibRadosTwoPoolsPP, PromoteSnapTrimRace) {
697 // create object
698 {
699 bufferlist bl;
700 bl.append("hi there");
701 ObjectWriteOperation op;
702 op.write_full(bl);
703 ASSERT_EQ(0, ioctx.operate("foo", &op));
704 }
705
706 // create a snapshot, clone
707 vector<uint64_t> my_snaps(1);
708 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
709 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
710 my_snaps));
711 {
712 bufferlist bl;
713 bl.append("ciao!");
714 ObjectWriteOperation op;
715 op.write_full(bl);
716 ASSERT_EQ(0, ioctx.operate("foo", &op));
717 }
718
719 // configure cache
720 bufferlist inbl;
721 ASSERT_EQ(0, cluster.mon_command(
722 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
723 "\", \"tierpool\": \"" + cache_pool_name +
724 "\", \"force_nonempty\": \"--force-nonempty\" }",
725 inbl, NULL, NULL));
726 ASSERT_EQ(0, cluster.mon_command(
727 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
728 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
729 inbl, NULL, NULL));
730 ASSERT_EQ(0, cluster.mon_command(
731 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
732 "\", \"mode\": \"writeback\"}",
733 inbl, NULL, NULL));
734
735 // wait for maps to settle
736 cluster.wait_for_latest_osdmap();
737
738 // delete the snap
739 ASSERT_EQ(0, ioctx.selfmanaged_snap_remove(my_snaps[0]));
740
741 ioctx.snap_set_read(my_snaps[0]);
742
743 // read foo snap. the OSD may or may not realize that this snap has
744 // been logically deleted; either response is valid.
745 {
746 bufferlist bl;
747 int r = ioctx.read("foo", bl, 1, 0);
748 ASSERT_TRUE(r == 1 || r == -ENOENT);
749 }
750
751 // cleanup
752 ioctx.selfmanaged_snap_remove(my_snaps[0]);
753 }
754
755 TEST_F(LibRadosTwoPoolsPP, Whiteout) {
756 // create object
757 {
758 bufferlist bl;
759 bl.append("hi there");
760 ObjectWriteOperation op;
761 op.write_full(bl);
762 ASSERT_EQ(0, ioctx.operate("foo", &op));
763 }
764
765 // configure cache
766 bufferlist inbl;
767 ASSERT_EQ(0, cluster.mon_command(
768 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
769 "\", \"tierpool\": \"" + cache_pool_name +
770 "\", \"force_nonempty\": \"--force-nonempty\" }",
771 inbl, NULL, NULL));
772 ASSERT_EQ(0, cluster.mon_command(
773 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
774 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
775 inbl, NULL, NULL));
776 ASSERT_EQ(0, cluster.mon_command(
777 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
778 "\", \"mode\": \"writeback\"}",
779 inbl, NULL, NULL));
780
781 // wait for maps to settle
782 cluster.wait_for_latest_osdmap();
783
784 // create some whiteouts, verify they behave
785 {
786 ObjectWriteOperation op;
787 op.assert_exists();
788 op.remove();
789 ASSERT_EQ(0, ioctx.operate("foo", &op));
790 }
791
792 {
793 ObjectWriteOperation op;
794 op.assert_exists();
795 op.remove();
796 ASSERT_EQ(-ENOENT, ioctx.operate("bar", &op));
797 }
798 {
799 ObjectWriteOperation op;
800 op.assert_exists();
801 op.remove();
802 ASSERT_EQ(-ENOENT, ioctx.operate("bar", &op));
803 }
804
805 // verify the whiteouts are there in the cache tier
806 {
807 NObjectIterator it = cache_ioctx.nobjects_begin();
808 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
809 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
810 ++it;
811 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
812 ++it;
813 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
814 }
815
816 // delete a whiteout and verify it goes away
817 ASSERT_EQ(-ENOENT, ioctx.remove("foo"));
818 {
819 ObjectWriteOperation op;
820 op.remove();
821 librados::AioCompletion *completion = cluster.aio_create_completion();
822 ASSERT_EQ(0, ioctx.aio_operate("bar", completion, &op,
823 librados::OPERATION_IGNORE_CACHE));
824 completion->wait_for_complete();
825 ASSERT_EQ(0, completion->get_return_value());
826 completion->release();
827
828 NObjectIterator it = cache_ioctx.nobjects_begin();
829 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
830 ASSERT_TRUE(it->get_oid() == string("foo"));
831 ++it;
832 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
833 }
834
835 // recreate an object and verify we can read it
836 {
837 bufferlist bl;
838 bl.append("hi there");
839 ObjectWriteOperation op;
840 op.write_full(bl);
841 ASSERT_EQ(0, ioctx.operate("foo", &op));
842 }
843 {
844 bufferlist bl;
845 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
846 ASSERT_EQ('h', bl[0]);
847 }
848 }
849
850 TEST_F(LibRadosTwoPoolsPP, WhiteoutDeleteCreate) {
851 // configure cache
852 bufferlist inbl;
853 ASSERT_EQ(0, cluster.mon_command(
854 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
855 "\", \"tierpool\": \"" + cache_pool_name +
856 "\", \"force_nonempty\": \"--force-nonempty\" }",
857 inbl, NULL, NULL));
858 ASSERT_EQ(0, cluster.mon_command(
859 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
860 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
861 inbl, NULL, NULL));
862 ASSERT_EQ(0, cluster.mon_command(
863 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
864 "\", \"mode\": \"writeback\"}",
865 inbl, NULL, NULL));
866
867 // wait for maps to settle
868 cluster.wait_for_latest_osdmap();
869
870 // create an object
871 {
872 bufferlist bl;
873 bl.append("foo");
874 ASSERT_EQ(0, ioctx.write_full("foo", bl));
875 }
876
877 // do delete + create operation
878 {
879 ObjectWriteOperation op;
880 op.remove();
881 bufferlist bl;
882 bl.append("bar");
883 op.write_full(bl);
884 ASSERT_EQ(0, ioctx.operate("foo", &op));
885 }
886
887 // verify it still "exists" (w/ new content)
888 {
889 bufferlist bl;
890 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
891 ASSERT_EQ('b', bl[0]);
892 }
893 }
894
895 TEST_F(LibRadosTwoPoolsPP, Evict) {
896 // create object
897 {
898 bufferlist bl;
899 bl.append("hi there");
900 ObjectWriteOperation op;
901 op.write_full(bl);
902 ASSERT_EQ(0, ioctx.operate("foo", &op));
903 }
904
905 // configure cache
906 bufferlist inbl;
907 ASSERT_EQ(0, cluster.mon_command(
908 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
909 "\", \"tierpool\": \"" + cache_pool_name +
910 "\", \"force_nonempty\": \"--force-nonempty\" }",
911 inbl, NULL, NULL));
912 ASSERT_EQ(0, cluster.mon_command(
913 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
914 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
915 inbl, NULL, NULL));
916 ASSERT_EQ(0, cluster.mon_command(
917 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
918 "\", \"mode\": \"writeback\"}",
919 inbl, NULL, NULL));
920
921 // wait for maps to settle
922 cluster.wait_for_latest_osdmap();
923
924 // read, trigger a promote
925 {
926 bufferlist bl;
927 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
928 }
929
930 // read, trigger a whiteout, and a dirty object
931 {
932 bufferlist bl;
933 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
934 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
935 ASSERT_EQ(0, ioctx.write("bar", bl, bl.length(), 0));
936 }
937
938 // verify the object is present in the cache tier
939 {
940 NObjectIterator it = cache_ioctx.nobjects_begin();
941 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
942 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
943 ++it;
944 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
945 ++it;
946 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
947 }
948
949 // pin
950 {
951 ObjectWriteOperation op;
952 op.cache_pin();
953 librados::AioCompletion *completion = cluster.aio_create_completion();
954 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
955 completion->wait_for_complete();
956 ASSERT_EQ(0, completion->get_return_value());
957 completion->release();
958 }
959
960 // evict the pinned object with -EPERM
961 {
962 ObjectReadOperation op;
963 op.cache_evict();
964 librados::AioCompletion *completion = cluster.aio_create_completion();
965 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
966 librados::OPERATION_IGNORE_CACHE,
967 NULL));
968 completion->wait_for_complete();
969 ASSERT_EQ(-EPERM, completion->get_return_value());
970 completion->release();
971 }
972
973 // unpin
974 {
975 ObjectWriteOperation op;
976 op.cache_unpin();
977 librados::AioCompletion *completion = cluster.aio_create_completion();
978 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
979 completion->wait_for_complete();
980 ASSERT_EQ(0, completion->get_return_value());
981 completion->release();
982 }
983
984 // flush
985 {
986 ObjectReadOperation op;
987 op.cache_flush();
988 librados::AioCompletion *completion = cluster.aio_create_completion();
989 ASSERT_EQ(0, cache_ioctx.aio_operate(
990 "foo", completion, &op,
991 librados::OPERATION_IGNORE_OVERLAY, NULL));
992 completion->wait_for_complete();
993 ASSERT_EQ(0, completion->get_return_value());
994 completion->release();
995 }
996
997 // verify clean
998 {
999 bool dirty = false;
1000 int r = -1;
1001 ObjectReadOperation op;
1002 op.is_dirty(&dirty, &r);
1003 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
1004 ASSERT_FALSE(dirty);
1005 ASSERT_EQ(0, r);
1006 }
1007
1008 // evict
1009 {
1010 ObjectReadOperation op;
1011 op.cache_evict();
1012 librados::AioCompletion *completion = cluster.aio_create_completion();
1013 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
1014 librados::OPERATION_IGNORE_CACHE,
1015 NULL));
1016 completion->wait_for_complete();
1017 ASSERT_EQ(0, completion->get_return_value());
1018 completion->release();
1019 }
1020 {
1021 ObjectReadOperation op;
1022 op.cache_evict();
1023 librados::AioCompletion *completion = cluster.aio_create_completion();
1024 ASSERT_EQ(0, cache_ioctx.aio_operate(
1025 "foo", completion, &op,
1026 librados::OPERATION_IGNORE_CACHE, NULL));
1027 completion->wait_for_complete();
1028 ASSERT_EQ(0, completion->get_return_value());
1029 completion->release();
1030 }
1031 {
1032 ObjectReadOperation op;
1033 op.cache_evict();
1034 librados::AioCompletion *completion = cluster.aio_create_completion();
1035 ASSERT_EQ(0, cache_ioctx.aio_operate(
1036 "bar", completion, &op,
1037 librados::OPERATION_IGNORE_CACHE, NULL));
1038 completion->wait_for_complete();
1039 ASSERT_EQ(-EBUSY, completion->get_return_value());
1040 completion->release();
1041 }
1042 }
1043
1044 TEST_F(LibRadosTwoPoolsPP, EvictSnap) {
1045 // create object
1046 {
1047 bufferlist bl;
1048 bl.append("hi there");
1049 ObjectWriteOperation op;
1050 op.write_full(bl);
1051 ASSERT_EQ(0, ioctx.operate("foo", &op));
1052 }
1053 {
1054 bufferlist bl;
1055 bl.append("hi there");
1056 ObjectWriteOperation op;
1057 op.write_full(bl);
1058 ASSERT_EQ(0, ioctx.operate("bar", &op));
1059 }
1060 {
1061 bufferlist bl;
1062 bl.append("hi there");
1063 ObjectWriteOperation op;
1064 op.write_full(bl);
1065 ASSERT_EQ(0, ioctx.operate("baz", &op));
1066 }
1067 {
1068 bufferlist bl;
1069 bl.append("hi there");
1070 ObjectWriteOperation op;
1071 op.write_full(bl);
1072 ASSERT_EQ(0, ioctx.operate("bam", &op));
1073 }
1074
1075 // create a snapshot, clone
1076 vector<uint64_t> my_snaps(1);
1077 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
1078 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
1079 my_snaps));
1080 {
1081 bufferlist bl;
1082 bl.append("ciao!");
1083 ObjectWriteOperation op;
1084 op.write_full(bl);
1085 ASSERT_EQ(0, ioctx.operate("foo", &op));
1086 }
1087 {
1088 bufferlist bl;
1089 bl.append("ciao!");
1090 ObjectWriteOperation op;
1091 op.write_full(bl);
1092 ASSERT_EQ(0, ioctx.operate("bar", &op));
1093 }
1094 {
1095 ObjectWriteOperation op;
1096 op.remove();
1097 ASSERT_EQ(0, ioctx.operate("baz", &op));
1098 }
1099 {
1100 bufferlist bl;
1101 bl.append("ciao!");
1102 ObjectWriteOperation op;
1103 op.write_full(bl);
1104 ASSERT_EQ(0, ioctx.operate("bam", &op));
1105 }
1106
1107 // configure cache
1108 bufferlist inbl;
1109 ASSERT_EQ(0, cluster.mon_command(
1110 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1111 "\", \"tierpool\": \"" + cache_pool_name +
1112 "\", \"force_nonempty\": \"--force-nonempty\" }",
1113 inbl, NULL, NULL));
1114 ASSERT_EQ(0, cluster.mon_command(
1115 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1116 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1117 inbl, NULL, NULL));
1118 ASSERT_EQ(0, cluster.mon_command(
1119 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1120 "\", \"mode\": \"writeback\"}",
1121 inbl, NULL, NULL));
1122
1123 // wait for maps to settle
1124 cluster.wait_for_latest_osdmap();
1125
1126 // read, trigger a promote on the head
1127 {
1128 bufferlist bl;
1129 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1130 ASSERT_EQ('c', bl[0]);
1131 }
1132 {
1133 bufferlist bl;
1134 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
1135 ASSERT_EQ('c', bl[0]);
1136 }
1137
1138 // evict bam
1139 {
1140 ObjectReadOperation op;
1141 op.cache_evict();
1142 librados::AioCompletion *completion = cluster.aio_create_completion();
1143 ASSERT_EQ(0, cache_ioctx.aio_operate(
1144 "bam", completion, &op,
1145 librados::OPERATION_IGNORE_CACHE, NULL));
1146 completion->wait_for_complete();
1147 ASSERT_EQ(0, completion->get_return_value());
1148 completion->release();
1149 }
1150 {
1151 bufferlist bl;
1152 ObjectReadOperation op;
1153 op.read(1, 0, &bl, NULL);
1154 librados::AioCompletion *completion = cluster.aio_create_completion();
1155 ASSERT_EQ(0, cache_ioctx.aio_operate(
1156 "bam", completion, &op,
1157 librados::OPERATION_IGNORE_CACHE, NULL));
1158 completion->wait_for_complete();
1159 ASSERT_EQ(-ENOENT, completion->get_return_value());
1160 completion->release();
1161 }
1162
1163 // read foo snap
1164 ioctx.snap_set_read(my_snaps[0]);
1165 {
1166 bufferlist bl;
1167 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1168 ASSERT_EQ('h', bl[0]);
1169 }
1170
1171 // evict foo snap
1172 {
1173 ObjectReadOperation op;
1174 op.cache_evict();
1175 librados::AioCompletion *completion = cluster.aio_create_completion();
1176 ASSERT_EQ(0, ioctx.aio_operate(
1177 "foo", completion, &op,
1178 librados::OPERATION_IGNORE_CACHE, NULL));
1179 completion->wait_for_complete();
1180 ASSERT_EQ(0, completion->get_return_value());
1181 completion->release();
1182 }
1183 // snap is gone...
1184 {
1185 bufferlist bl;
1186 ObjectReadOperation op;
1187 op.read(1, 0, &bl, NULL);
1188 librados::AioCompletion *completion = cluster.aio_create_completion();
1189 ASSERT_EQ(0, ioctx.aio_operate(
1190 "foo", completion, &op,
1191 librados::OPERATION_IGNORE_CACHE, NULL));
1192 completion->wait_for_complete();
1193 ASSERT_EQ(-ENOENT, completion->get_return_value());
1194 completion->release();
1195 }
1196 // head is still there...
1197 ioctx.snap_set_read(librados::SNAP_HEAD);
1198 {
1199 bufferlist bl;
1200 ObjectReadOperation op;
1201 op.read(1, 0, &bl, NULL);
1202 librados::AioCompletion *completion = cluster.aio_create_completion();
1203 ASSERT_EQ(0, ioctx.aio_operate(
1204 "foo", completion, &op,
1205 librados::OPERATION_IGNORE_CACHE, NULL));
1206 completion->wait_for_complete();
1207 ASSERT_EQ(0, completion->get_return_value());
1208 completion->release();
1209 }
1210
1211 // promote head + snap of bar
1212 ioctx.snap_set_read(librados::SNAP_HEAD);
1213 {
1214 bufferlist bl;
1215 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
1216 ASSERT_EQ('c', bl[0]);
1217 }
1218 ioctx.snap_set_read(my_snaps[0]);
1219 {
1220 bufferlist bl;
1221 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
1222 ASSERT_EQ('h', bl[0]);
1223 }
1224
1225 // evict bar head (fail)
1226 ioctx.snap_set_read(librados::SNAP_HEAD);
1227 {
1228 ObjectReadOperation op;
1229 op.cache_evict();
1230 librados::AioCompletion *completion = cluster.aio_create_completion();
1231 ASSERT_EQ(0, ioctx.aio_operate(
1232 "bar", completion, &op,
1233 librados::OPERATION_IGNORE_CACHE, NULL));
1234 completion->wait_for_complete();
1235 ASSERT_EQ(-EBUSY, completion->get_return_value());
1236 completion->release();
1237 }
1238
1239 // evict bar snap
1240 ioctx.snap_set_read(my_snaps[0]);
1241 {
1242 ObjectReadOperation op;
1243 op.cache_evict();
1244 librados::AioCompletion *completion = cluster.aio_create_completion();
1245 ASSERT_EQ(0, ioctx.aio_operate(
1246 "bar", completion, &op,
1247 librados::OPERATION_IGNORE_CACHE, NULL));
1248 completion->wait_for_complete();
1249 ASSERT_EQ(0, completion->get_return_value());
1250 completion->release();
1251 }
1252 // ...and then head
1253 ioctx.snap_set_read(librados::SNAP_HEAD);
1254 {
1255 bufferlist bl;
1256 ObjectReadOperation op;
1257 op.read(1, 0, &bl, NULL);
1258 librados::AioCompletion *completion = cluster.aio_create_completion();
1259 ASSERT_EQ(0, ioctx.aio_operate(
1260 "bar", completion, &op,
1261 librados::OPERATION_IGNORE_CACHE, NULL));
1262 completion->wait_for_complete();
1263 ASSERT_EQ(0, completion->get_return_value());
1264 completion->release();
1265 }
1266 {
1267 ObjectReadOperation op;
1268 op.cache_evict();
1269 librados::AioCompletion *completion = cluster.aio_create_completion();
1270 ASSERT_EQ(0, ioctx.aio_operate(
1271 "bar", completion, &op,
1272 librados::OPERATION_IGNORE_CACHE, NULL));
1273 completion->wait_for_complete();
1274 ASSERT_EQ(0, completion->get_return_value());
1275 completion->release();
1276 }
1277
1278 // cleanup
1279 ioctx.selfmanaged_snap_remove(my_snaps[0]);
1280 }
1281
1282 // this test case reproduces http://tracker.ceph.com/issues/8629
1283 TEST_F(LibRadosTwoPoolsPP, EvictSnap2) {
1284 // create object
1285 {
1286 bufferlist bl;
1287 bl.append("hi there");
1288 ObjectWriteOperation op;
1289 op.write_full(bl);
1290 ASSERT_EQ(0, ioctx.operate("foo", &op));
1291 }
1292 // create a snapshot, clone
1293 vector<uint64_t> my_snaps(1);
1294 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
1295 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
1296 my_snaps));
1297 {
1298 bufferlist bl;
1299 bl.append("ciao!");
1300 ObjectWriteOperation op;
1301 op.write_full(bl);
1302 ASSERT_EQ(0, ioctx.operate("foo", &op));
1303 }
1304 // configure cache
1305 bufferlist inbl;
1306 ASSERT_EQ(0, cluster.mon_command(
1307 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1308 "\", \"tierpool\": \"" + cache_pool_name +
1309 "\", \"force_nonempty\": \"--force-nonempty\" }",
1310 inbl, NULL, NULL));
1311 ASSERT_EQ(0, cluster.mon_command(
1312 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1313 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1314 inbl, NULL, NULL));
1315 ASSERT_EQ(0, cluster.mon_command(
1316 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1317 "\", \"mode\": \"writeback\"}",
1318 inbl, NULL, NULL));
1319
1320 // wait for maps to settle
1321 cluster.wait_for_latest_osdmap();
1322
1323 // read, trigger a promote on the head
1324 {
1325 bufferlist bl;
1326 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1327 ASSERT_EQ('c', bl[0]);
1328 }
1329
1330 // evict
1331 {
1332 ObjectReadOperation op;
1333 op.cache_evict();
1334 librados::AioCompletion *completion = cluster.aio_create_completion();
1335 ASSERT_EQ(0, cache_ioctx.aio_operate(
1336 "foo", completion, &op,
1337 librados::OPERATION_IGNORE_CACHE, NULL));
1338 completion->wait_for_complete();
1339 ASSERT_EQ(0, completion->get_return_value());
1340 completion->release();
1341 }
1342
1343 // verify the snapdir is not present in the cache pool
1344 {
1345 ObjectReadOperation op;
1346 librados::snap_set_t snapset;
1347 op.list_snaps(&snapset, NULL);
1348 ioctx.snap_set_read(librados::SNAP_DIR);
1349 librados::AioCompletion *completion = cluster.aio_create_completion();
1350 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op,
1351 librados::OPERATION_IGNORE_CACHE, NULL));
1352 completion->wait_for_complete();
1353 ASSERT_EQ(-ENOENT, completion->get_return_value());
1354 completion->release();
1355 }
1356 }
1357
1358 //This test case reproduces http://tracker.ceph.com/issues/17445
1359 TEST_F(LibRadosTwoPoolsPP, ListSnap){
1360 // Create object
1361 {
1362 bufferlist bl;
1363 bl.append("hi there");
1364 ObjectWriteOperation op;
1365 op.write_full(bl);
1366 ASSERT_EQ(0, ioctx.operate("foo", &op));
1367 }
1368 {
1369 bufferlist bl;
1370 bl.append("hi there");
1371 ObjectWriteOperation op;
1372 op.write_full(bl);
1373 ASSERT_EQ(0, ioctx.operate("bar", &op));
1374 }
1375 {
1376 bufferlist bl;
1377 bl.append("hi there");
1378 ObjectWriteOperation op;
1379 op.write_full(bl);
1380 ASSERT_EQ(0, ioctx.operate("baz", &op));
1381 }
1382 {
1383 bufferlist bl;
1384 bl.append("hi there");
1385 ObjectWriteOperation op;
1386 op.write_full(bl);
1387 ASSERT_EQ(0, ioctx.operate("bam", &op));
1388 }
1389
1390 // Create a snapshot, clone
1391 vector<uint64_t> my_snaps(1);
1392 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
1393 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
1394 my_snaps));
1395 {
1396 bufferlist bl;
1397 bl.append("ciao!");
1398 ObjectWriteOperation op;
1399 op.write_full(bl);
1400 ASSERT_EQ(0, ioctx.operate("foo", &op));
1401 }
1402 {
1403 bufferlist bl;
1404 bl.append("ciao!");
1405 ObjectWriteOperation op;
1406 op.write_full(bl);
1407 ASSERT_EQ(0, ioctx.operate("bar", &op));
1408 }
1409 {
1410 ObjectWriteOperation op;
1411 op.remove();
1412 ASSERT_EQ(0, ioctx.operate("baz", &op));
1413 }
1414 {
1415 bufferlist bl;
1416 bl.append("ciao!");
1417 ObjectWriteOperation op;
1418 op.write_full(bl);
1419 ASSERT_EQ(0, ioctx.operate("bam", &op));
1420 }
1421
1422 // Configure cache
1423 bufferlist inbl;
1424 ASSERT_EQ(0, cluster.mon_command(
1425 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1426 "\", \"tierpool\": \"" + cache_pool_name +
1427 "\", \"force_nonempty\": \"--force-nonempty\" }",
1428 inbl, NULL, NULL));
1429 ASSERT_EQ(0, cluster.mon_command(
1430 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1431 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1432 inbl, NULL, NULL));
1433 ASSERT_EQ(0, cluster.mon_command(
1434 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1435 "\", \"mode\": \"writeback\"}",
1436 inbl, NULL, NULL));
1437
1438 // Wait for maps to settle
1439 cluster.wait_for_latest_osdmap();
1440
1441 // Read, trigger a promote on the head
1442 {
1443 bufferlist bl;
1444 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1445 ASSERT_EQ('c', bl[0]);
1446 }
1447
1448 // Read foo snap
1449 ioctx.snap_set_read(my_snaps[0]);
1450 {
1451 bufferlist bl;
1452 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1453 ASSERT_EQ('h', bl[0]);
1454 }
1455
1456 // Evict foo snap
1457 {
1458 ObjectReadOperation op;
1459 op.cache_evict();
1460 librados::AioCompletion *completion = cluster.aio_create_completion();
1461 ASSERT_EQ(0, ioctx.aio_operate(
1462 "foo", completion, &op,
1463 librados::OPERATION_IGNORE_CACHE, NULL));
1464 completion->wait_for_complete();
1465 ASSERT_EQ(0, completion->get_return_value());
1466 completion->release();
1467 }
1468 // Snap is gone...
1469 {
1470 bufferlist bl;
1471 ObjectReadOperation op;
1472 op.read(1, 0, &bl, NULL);
1473 librados::AioCompletion *completion = cluster.aio_create_completion();
1474 ASSERT_EQ(0, ioctx.aio_operate(
1475 "foo", completion, &op,
1476 librados::OPERATION_IGNORE_CACHE, NULL));
1477 completion->wait_for_complete();
1478 ASSERT_EQ(-ENOENT, completion->get_return_value());
1479 completion->release();
1480 }
1481
1482 // Do list-snaps
1483 ioctx.snap_set_read(CEPH_SNAPDIR);
1484 {
1485 snap_set_t snap_set;
1486 int snap_ret;
1487 ObjectReadOperation op;
1488 op.list_snaps(&snap_set, &snap_ret);
1489 librados::AioCompletion *completion = cluster.aio_create_completion();
1490 ASSERT_EQ(0, ioctx.aio_operate(
1491 "foo", completion, &op,
1492 0, NULL));
1493 completion->wait_for_complete();
1494 ASSERT_EQ(0, snap_ret);
1495 ASSERT_LT(0u, snap_set.clones.size());
1496 for (vector<librados::clone_info_t>::const_iterator r = snap_set.clones.begin();
1497 r != snap_set.clones.end();
1498 ++r) {
1499 if (r->cloneid != librados::SNAP_HEAD) {
1500 ASSERT_LT(0u, r->snaps.size());
1501 }
1502 }
1503 }
1504
1505 // Cleanup
1506 ioctx.selfmanaged_snap_remove(my_snaps[0]);
1507 }
1508
1509 // This test case reproduces https://tracker.ceph.com/issues/49409
1510 TEST_F(LibRadosTwoPoolsPP, EvictSnapRollbackReadRace) {
1511 // create object
1512 {
1513 bufferlist bl;
1514 int len = string("hi there").length() * 2;
1515 // append more chrunk data make sure the second promote
1516 // op coming before the first promote op finished
1517 for (int i=0; i<4*1024*1024/len; ++i)
1518 bl.append("hi therehi there");
1519 ObjectWriteOperation op;
1520 op.write_full(bl);
1521 ASSERT_EQ(0, ioctx.operate("foo", &op));
1522 }
1523
1524 // create two snapshot, a clone
1525 vector<uint64_t> my_snaps(2);
1526 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[1]));
1527 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
1528 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
1529 my_snaps));
1530 {
1531 bufferlist bl;
1532 bl.append("ciao!");
1533 ObjectWriteOperation op;
1534 op.write_full(bl);
1535 ASSERT_EQ(0, ioctx.operate("foo", &op));
1536 }
1537
1538 // configure cache
1539 bufferlist inbl;
1540 ASSERT_EQ(0, cluster.mon_command(
1541 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1542 "\", \"tierpool\": \"" + cache_pool_name +
1543 "\", \"force_nonempty\": \"--force-nonempty\" }",
1544 inbl, NULL, NULL));
1545 ASSERT_EQ(0, cluster.mon_command(
1546 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1547 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1548 inbl, NULL, NULL));
1549 ASSERT_EQ(0, cluster.mon_command(
1550 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1551 "\", \"mode\": \"writeback\"}",
1552 inbl, NULL, NULL));
1553
1554 // wait for maps to settle
1555 cluster.wait_for_latest_osdmap();
1556
1557 // read, trigger a promote on the head
1558 {
1559 bufferlist bl;
1560 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1561 ASSERT_EQ('c', bl[0]);
1562 }
1563
1564 // try more times
1565 int retries = 50;
1566 for (int i=0; i<retries; ++i)
1567 {
1568 {
1569 librados::AioCompletion * completion = cluster.aio_create_completion();
1570 librados::AioCompletion * completion1 = cluster.aio_create_completion();
1571
1572 // send a snap rollback op and a snap read op parallel
1573 // trigger two promote(copy) to the same snap clone obj
1574 // the second snap read op is read-ordered make sure
1575 // op not wait for objects_blocked_on_snap_promotion
1576 ObjectWriteOperation op;
1577 op.selfmanaged_snap_rollback(my_snaps[0]);
1578 ASSERT_EQ(0, ioctx.aio_operate(
1579 "foo", completion, &op));
1580
1581 ioctx.snap_set_read(my_snaps[1]);
1582 std::map<uint64_t, uint64_t> extents;
1583 bufferlist read_bl;
1584 int rval = -1;
1585 ObjectReadOperation op1;
1586 op1.sparse_read(0, 8, &extents, &read_bl, &rval);
1587 ASSERT_EQ(0, ioctx.aio_operate("foo", completion1, &op1, &read_bl));
1588 ioctx.snap_set_read(librados::SNAP_HEAD);
1589
1590 completion->wait_for_complete();
1591 ASSERT_EQ(0, completion->get_return_value());
1592 completion->release();
1593
1594 completion1->wait_for_complete();
1595 ASSERT_EQ(0, completion1->get_return_value());
1596 completion1->release();
1597 }
1598
1599 // evict foo snap
1600 ioctx.snap_set_read(my_snaps[0]);
1601 {
1602 ObjectReadOperation op;
1603 op.cache_evict();
1604 librados::AioCompletion *completion = cluster.aio_create_completion();
1605 ASSERT_EQ(0, ioctx.aio_operate(
1606 "foo", completion, &op,
1607 librados::OPERATION_IGNORE_CACHE, NULL));
1608 completion->wait_for_complete();
1609 ASSERT_EQ(0, completion->get_return_value());
1610 completion->release();
1611 }
1612 ioctx.snap_set_read(librados::SNAP_HEAD);
1613 }
1614
1615 // cleanup
1616 ioctx.selfmanaged_snap_remove(my_snaps[0]);
1617 ioctx.selfmanaged_snap_remove(my_snaps[1]);
1618 }
1619
1620 TEST_F(LibRadosTwoPoolsPP, TryFlush) {
1621 // configure cache
1622 bufferlist inbl;
1623 ASSERT_EQ(0, cluster.mon_command(
1624 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1625 "\", \"tierpool\": \"" + cache_pool_name +
1626 "\", \"force_nonempty\": \"--force-nonempty\" }",
1627 inbl, NULL, NULL));
1628 ASSERT_EQ(0, cluster.mon_command(
1629 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1630 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1631 inbl, NULL, NULL));
1632 ASSERT_EQ(0, cluster.mon_command(
1633 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1634 "\", \"mode\": \"writeback\"}",
1635 inbl, NULL, NULL));
1636
1637 // wait for maps to settle
1638 cluster.wait_for_latest_osdmap();
1639
1640 // create object
1641 {
1642 bufferlist bl;
1643 bl.append("hi there");
1644 ObjectWriteOperation op;
1645 op.write_full(bl);
1646 ASSERT_EQ(0, ioctx.operate("foo", &op));
1647 }
1648
1649 // verify the object is present in the cache tier
1650 {
1651 NObjectIterator it = cache_ioctx.nobjects_begin();
1652 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
1653 ASSERT_TRUE(it->get_oid() == string("foo"));
1654 ++it;
1655 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1656 }
1657
1658 // verify the object is NOT present in the base tier
1659 {
1660 NObjectIterator it = ioctx.nobjects_begin();
1661 ASSERT_TRUE(it == ioctx.nobjects_end());
1662 }
1663
1664 // verify dirty
1665 {
1666 bool dirty = false;
1667 int r = -1;
1668 ObjectReadOperation op;
1669 op.is_dirty(&dirty, &r);
1670 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
1671 ASSERT_TRUE(dirty);
1672 ASSERT_EQ(0, r);
1673 }
1674
1675 // pin
1676 {
1677 ObjectWriteOperation op;
1678 op.cache_pin();
1679 librados::AioCompletion *completion = cluster.aio_create_completion();
1680 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
1681 completion->wait_for_complete();
1682 ASSERT_EQ(0, completion->get_return_value());
1683 completion->release();
1684 }
1685
1686 // flush the pinned object with -EPERM
1687 {
1688 ObjectReadOperation op;
1689 op.cache_try_flush();
1690 librados::AioCompletion *completion = cluster.aio_create_completion();
1691 ASSERT_EQ(0, cache_ioctx.aio_operate(
1692 "foo", completion, &op,
1693 librados::OPERATION_IGNORE_OVERLAY |
1694 librados::OPERATION_SKIPRWLOCKS, NULL));
1695 completion->wait_for_complete();
1696 ASSERT_EQ(-EPERM, completion->get_return_value());
1697 completion->release();
1698 }
1699
1700 // unpin
1701 {
1702 ObjectWriteOperation op;
1703 op.cache_unpin();
1704 librados::AioCompletion *completion = cluster.aio_create_completion();
1705 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
1706 completion->wait_for_complete();
1707 ASSERT_EQ(0, completion->get_return_value());
1708 completion->release();
1709 }
1710
1711 // flush
1712 {
1713 ObjectReadOperation op;
1714 op.cache_try_flush();
1715 librados::AioCompletion *completion = cluster.aio_create_completion();
1716 ASSERT_EQ(0, cache_ioctx.aio_operate(
1717 "foo", completion, &op,
1718 librados::OPERATION_IGNORE_OVERLAY |
1719 librados::OPERATION_SKIPRWLOCKS, NULL));
1720 completion->wait_for_complete();
1721 ASSERT_EQ(0, completion->get_return_value());
1722 completion->release();
1723 }
1724
1725 // verify clean
1726 {
1727 bool dirty = false;
1728 int r = -1;
1729 ObjectReadOperation op;
1730 op.is_dirty(&dirty, &r);
1731 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
1732 ASSERT_FALSE(dirty);
1733 ASSERT_EQ(0, r);
1734 }
1735
1736 // verify in base tier
1737 {
1738 NObjectIterator it = ioctx.nobjects_begin();
1739 ASSERT_TRUE(it != ioctx.nobjects_end());
1740 ASSERT_TRUE(it->get_oid() == string("foo"));
1741 ++it;
1742 ASSERT_TRUE(it == ioctx.nobjects_end());
1743 }
1744
1745 // evict it
1746 {
1747 ObjectReadOperation op;
1748 op.cache_evict();
1749 librados::AioCompletion *completion = cluster.aio_create_completion();
1750 ASSERT_EQ(0, cache_ioctx.aio_operate(
1751 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
1752 completion->wait_for_complete();
1753 ASSERT_EQ(0, completion->get_return_value());
1754 completion->release();
1755 }
1756
1757 // verify no longer in cache tier
1758 {
1759 NObjectIterator it = cache_ioctx.nobjects_begin();
1760 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1761 }
1762 }
1763
1764 TEST_F(LibRadosTwoPoolsPP, Flush) {
1765 // configure cache
1766 bufferlist inbl;
1767 ASSERT_EQ(0, cluster.mon_command(
1768 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1769 "\", \"tierpool\": \"" + cache_pool_name +
1770 "\", \"force_nonempty\": \"--force-nonempty\" }",
1771 inbl, NULL, NULL));
1772 ASSERT_EQ(0, cluster.mon_command(
1773 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1774 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1775 inbl, NULL, NULL));
1776 ASSERT_EQ(0, cluster.mon_command(
1777 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1778 "\", \"mode\": \"writeback\"}",
1779 inbl, NULL, NULL));
1780
1781 // wait for maps to settle
1782 cluster.wait_for_latest_osdmap();
1783
1784 uint64_t user_version = 0;
1785
1786 // create object
1787 {
1788 bufferlist bl;
1789 bl.append("hi there");
1790 ObjectWriteOperation op;
1791 op.write_full(bl);
1792 ASSERT_EQ(0, ioctx.operate("foo", &op));
1793 }
1794
1795 // verify the object is present in the cache tier
1796 {
1797 NObjectIterator it = cache_ioctx.nobjects_begin();
1798 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
1799 ASSERT_TRUE(it->get_oid() == string("foo"));
1800 ++it;
1801 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1802 }
1803
1804 // verify the object is NOT present in the base tier
1805 {
1806 NObjectIterator it = ioctx.nobjects_begin();
1807 ASSERT_TRUE(it == ioctx.nobjects_end());
1808 }
1809
1810 // verify dirty
1811 {
1812 bool dirty = false;
1813 int r = -1;
1814 ObjectReadOperation op;
1815 op.is_dirty(&dirty, &r);
1816 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
1817 ASSERT_TRUE(dirty);
1818 ASSERT_EQ(0, r);
1819 user_version = cache_ioctx.get_last_version();
1820 }
1821
1822 // pin
1823 {
1824 ObjectWriteOperation op;
1825 op.cache_pin();
1826 librados::AioCompletion *completion = cluster.aio_create_completion();
1827 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
1828 completion->wait_for_complete();
1829 ASSERT_EQ(0, completion->get_return_value());
1830 completion->release();
1831 }
1832
1833 // flush the pinned object with -EPERM
1834 {
1835 ObjectReadOperation op;
1836 op.cache_try_flush();
1837 librados::AioCompletion *completion = cluster.aio_create_completion();
1838 ASSERT_EQ(0, cache_ioctx.aio_operate(
1839 "foo", completion, &op,
1840 librados::OPERATION_IGNORE_OVERLAY |
1841 librados::OPERATION_SKIPRWLOCKS, NULL));
1842 completion->wait_for_complete();
1843 ASSERT_EQ(-EPERM, completion->get_return_value());
1844 completion->release();
1845 }
1846
1847 // unpin
1848 {
1849 ObjectWriteOperation op;
1850 op.cache_unpin();
1851 librados::AioCompletion *completion = cluster.aio_create_completion();
1852 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
1853 completion->wait_for_complete();
1854 ASSERT_EQ(0, completion->get_return_value());
1855 completion->release();
1856 }
1857
1858 // flush
1859 {
1860 ObjectReadOperation op;
1861 op.cache_flush();
1862 librados::AioCompletion *completion = cluster.aio_create_completion();
1863 ASSERT_EQ(0, cache_ioctx.aio_operate(
1864 "foo", completion, &op,
1865 librados::OPERATION_IGNORE_OVERLAY, NULL));
1866 completion->wait_for_complete();
1867 ASSERT_EQ(0, completion->get_return_value());
1868 completion->release();
1869 }
1870
1871 // verify clean
1872 {
1873 bool dirty = false;
1874 int r = -1;
1875 ObjectReadOperation op;
1876 op.is_dirty(&dirty, &r);
1877 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
1878 ASSERT_FALSE(dirty);
1879 ASSERT_EQ(0, r);
1880 }
1881
1882 // verify in base tier
1883 {
1884 NObjectIterator it = ioctx.nobjects_begin();
1885 ASSERT_TRUE(it != ioctx.nobjects_end());
1886 ASSERT_TRUE(it->get_oid() == string("foo"));
1887 ++it;
1888 ASSERT_TRUE(it == ioctx.nobjects_end());
1889 }
1890
1891 // evict it
1892 {
1893 ObjectReadOperation op;
1894 op.cache_evict();
1895 librados::AioCompletion *completion = cluster.aio_create_completion();
1896 ASSERT_EQ(0, cache_ioctx.aio_operate(
1897 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
1898 completion->wait_for_complete();
1899 ASSERT_EQ(0, completion->get_return_value());
1900 completion->release();
1901 }
1902
1903 // verify no longer in cache tier
1904 {
1905 NObjectIterator it = cache_ioctx.nobjects_begin();
1906 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1907 }
1908
1909 // read it again and verify the version is consistent
1910 {
1911 bufferlist bl;
1912 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
1913 ASSERT_EQ(user_version, cache_ioctx.get_last_version());
1914 }
1915
1916 // erase it
1917 {
1918 ObjectWriteOperation op;
1919 op.remove();
1920 ASSERT_EQ(0, ioctx.operate("foo", &op));
1921 }
1922
1923 // flush whiteout
1924 {
1925 ObjectReadOperation op;
1926 op.cache_flush();
1927 librados::AioCompletion *completion = cluster.aio_create_completion();
1928 ASSERT_EQ(0, cache_ioctx.aio_operate(
1929 "foo", completion, &op,
1930 librados::OPERATION_IGNORE_OVERLAY, NULL));
1931 completion->wait_for_complete();
1932 ASSERT_EQ(0, completion->get_return_value());
1933 completion->release();
1934 }
1935
1936 // evict
1937 {
1938 ObjectReadOperation op;
1939 op.cache_evict();
1940 librados::AioCompletion *completion = cluster.aio_create_completion();
1941 ASSERT_EQ(0, cache_ioctx.aio_operate(
1942 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
1943 completion->wait_for_complete();
1944 ASSERT_EQ(0, completion->get_return_value());
1945 completion->release();
1946 }
1947
1948 // verify no longer in cache tier
1949 {
1950 NObjectIterator it = cache_ioctx.nobjects_begin();
1951 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1952 }
1953 // or base tier
1954 {
1955 NObjectIterator it = ioctx.nobjects_begin();
1956 ASSERT_TRUE(it == ioctx.nobjects_end());
1957 }
1958 }
1959
1960 TEST_F(LibRadosTwoPoolsPP, FlushSnap) {
1961 // configure cache
1962 bufferlist inbl;
1963 ASSERT_EQ(0, cluster.mon_command(
1964 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1965 "\", \"tierpool\": \"" + cache_pool_name +
1966 "\", \"force_nonempty\": \"--force-nonempty\" }",
1967 inbl, NULL, NULL));
1968 ASSERT_EQ(0, cluster.mon_command(
1969 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1970 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1971 inbl, NULL, NULL));
1972 ASSERT_EQ(0, cluster.mon_command(
1973 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1974 "\", \"mode\": \"writeback\"}",
1975 inbl, NULL, NULL));
1976
1977 // wait for maps to settle
1978 cluster.wait_for_latest_osdmap();
1979
1980 // create object
1981 {
1982 bufferlist bl;
1983 bl.append("a");
1984 ObjectWriteOperation op;
1985 op.write_full(bl);
1986 ASSERT_EQ(0, ioctx.operate("foo", &op));
1987 }
1988
1989 // create a snapshot, clone
1990 vector<uint64_t> my_snaps(1);
1991 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
1992 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
1993 my_snaps));
1994 {
1995 bufferlist bl;
1996 bl.append("b");
1997 ObjectWriteOperation op;
1998 op.write_full(bl);
1999 ASSERT_EQ(0, ioctx.operate("foo", &op));
2000 }
2001
2002 // and another
2003 my_snaps.resize(2);
2004 my_snaps[1] = my_snaps[0];
2005 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
2006 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
2007 my_snaps));
2008 {
2009 bufferlist bl;
2010 bl.append("c");
2011 ObjectWriteOperation op;
2012 op.write_full(bl);
2013 ASSERT_EQ(0, ioctx.operate("foo", &op));
2014 }
2015
2016 // verify the object is present in the cache tier
2017 {
2018 NObjectIterator it = cache_ioctx.nobjects_begin();
2019 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
2020 ASSERT_TRUE(it->get_oid() == string("foo"));
2021 ++it;
2022 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
2023 }
2024
2025 // verify the object is NOT present in the base tier
2026 {
2027 NObjectIterator it = ioctx.nobjects_begin();
2028 ASSERT_TRUE(it == ioctx.nobjects_end());
2029 }
2030
2031 // flush on head (should fail)
2032 ioctx.snap_set_read(librados::SNAP_HEAD);
2033 {
2034 ObjectReadOperation op;
2035 op.cache_flush();
2036 librados::AioCompletion *completion = cluster.aio_create_completion();
2037 ASSERT_EQ(0, ioctx.aio_operate(
2038 "foo", completion, &op,
2039 librados::OPERATION_IGNORE_CACHE, NULL));
2040 completion->wait_for_complete();
2041 ASSERT_EQ(-EBUSY, completion->get_return_value());
2042 completion->release();
2043 }
2044 // flush on recent snap (should fail)
2045 ioctx.snap_set_read(my_snaps[0]);
2046 {
2047 ObjectReadOperation op;
2048 op.cache_flush();
2049 librados::AioCompletion *completion = cluster.aio_create_completion();
2050 ASSERT_EQ(0, ioctx.aio_operate(
2051 "foo", completion, &op,
2052 librados::OPERATION_IGNORE_CACHE, NULL));
2053 completion->wait_for_complete();
2054 ASSERT_EQ(-EBUSY, completion->get_return_value());
2055 completion->release();
2056 }
2057 // flush on oldest snap
2058 ioctx.snap_set_read(my_snaps[1]);
2059 {
2060 ObjectReadOperation op;
2061 op.cache_flush();
2062 librados::AioCompletion *completion = cluster.aio_create_completion();
2063 ASSERT_EQ(0, ioctx.aio_operate(
2064 "foo", completion, &op,
2065 librados::OPERATION_IGNORE_CACHE, NULL));
2066 completion->wait_for_complete();
2067 ASSERT_EQ(0, completion->get_return_value());
2068 completion->release();
2069 }
2070 // flush on next oldest snap
2071 ioctx.snap_set_read(my_snaps[0]);
2072 {
2073 ObjectReadOperation op;
2074 op.cache_flush();
2075 librados::AioCompletion *completion = cluster.aio_create_completion();
2076 ASSERT_EQ(0, ioctx.aio_operate(
2077 "foo", completion, &op,
2078 librados::OPERATION_IGNORE_CACHE, NULL));
2079 completion->wait_for_complete();
2080 ASSERT_EQ(0, completion->get_return_value());
2081 completion->release();
2082 }
2083 // flush on head
2084 ioctx.snap_set_read(librados::SNAP_HEAD);
2085 {
2086 ObjectReadOperation op;
2087 op.cache_flush();
2088 librados::AioCompletion *completion = cluster.aio_create_completion();
2089 ASSERT_EQ(0, ioctx.aio_operate(
2090 "foo", completion, &op,
2091 librados::OPERATION_IGNORE_CACHE, NULL));
2092 completion->wait_for_complete();
2093 ASSERT_EQ(0, completion->get_return_value());
2094 completion->release();
2095 }
2096
2097 // verify i can read the snaps from the cache pool
2098 ioctx.snap_set_read(librados::SNAP_HEAD);
2099 {
2100 bufferlist bl;
2101 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
2102 ASSERT_EQ('c', bl[0]);
2103 }
2104 ioctx.snap_set_read(my_snaps[0]);
2105 {
2106 bufferlist bl;
2107 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
2108 ASSERT_EQ('b', bl[0]);
2109 }
2110 ioctx.snap_set_read(my_snaps[1]);
2111 {
2112 bufferlist bl;
2113 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
2114 ASSERT_EQ('a', bl[0]);
2115 }
2116
2117 // remove overlay
2118 ASSERT_EQ(0, cluster.mon_command(
2119 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
2120 "\"}",
2121 inbl, NULL, NULL));
2122
2123 // wait for maps to settle
2124 cluster.wait_for_latest_osdmap();
2125
2126 // verify i can read the snaps from the base pool
2127 ioctx.snap_set_read(librados::SNAP_HEAD);
2128 {
2129 bufferlist bl;
2130 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
2131 ASSERT_EQ('c', bl[0]);
2132 }
2133 ioctx.snap_set_read(my_snaps[0]);
2134 {
2135 bufferlist bl;
2136 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
2137 ASSERT_EQ('b', bl[0]);
2138 }
2139 ioctx.snap_set_read(my_snaps[1]);
2140 {
2141 bufferlist bl;
2142 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
2143 ASSERT_EQ('a', bl[0]);
2144 }
2145
2146 ASSERT_EQ(0, cluster.mon_command(
2147 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2148 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2149 inbl, NULL, NULL));
2150
2151 // cleanup
2152 ioctx.selfmanaged_snap_remove(my_snaps[0]);
2153 }
2154
2155 TEST_F(LibRadosTierPP, FlushWriteRaces) {
2156 Rados cluster;
2157 std::string pool_name = get_temp_pool_name();
2158 std::string cache_pool_name = pool_name + "-cache";
2159 ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
2160 ASSERT_EQ(0, cluster.pool_create(cache_pool_name.c_str()));
2161 IoCtx cache_ioctx;
2162 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
2163 cache_ioctx.application_enable("rados", true);
2164 IoCtx ioctx;
2165 ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), ioctx));
2166
2167 // configure cache
2168 bufferlist inbl;
2169 ASSERT_EQ(0, cluster.mon_command(
2170 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2171 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
2172 inbl, NULL, NULL));
2173 ASSERT_EQ(0, cluster.mon_command(
2174 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2175 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2176 inbl, NULL, NULL));
2177 ASSERT_EQ(0, cluster.mon_command(
2178 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
2179 "\", \"mode\": \"writeback\"}",
2180 inbl, NULL, NULL));
2181
2182 // wait for maps to settle
2183 cluster.wait_for_latest_osdmap();
2184
2185 // create/dirty object
2186 bufferlist bl;
2187 bl.append("hi there");
2188 {
2189 ObjectWriteOperation op;
2190 op.write_full(bl);
2191 ASSERT_EQ(0, ioctx.operate("foo", &op));
2192 }
2193
2194 // flush + write
2195 {
2196 ObjectReadOperation op;
2197 op.cache_flush();
2198 librados::AioCompletion *completion = cluster.aio_create_completion();
2199 ASSERT_EQ(0, cache_ioctx.aio_operate(
2200 "foo", completion, &op,
2201 librados::OPERATION_IGNORE_OVERLAY, NULL));
2202
2203 ObjectWriteOperation op2;
2204 op2.write_full(bl);
2205 librados::AioCompletion *completion2 = cluster.aio_create_completion();
2206 ASSERT_EQ(0, ioctx.aio_operate(
2207 "foo", completion2, &op2, 0));
2208
2209 completion->wait_for_complete();
2210 completion2->wait_for_complete();
2211 ASSERT_EQ(0, completion->get_return_value());
2212 ASSERT_EQ(0, completion2->get_return_value());
2213 completion->release();
2214 completion2->release();
2215 }
2216
2217 int tries = 1000;
2218 do {
2219 // create/dirty object
2220 {
2221 bufferlist bl;
2222 bl.append("hi there");
2223 ObjectWriteOperation op;
2224 op.write_full(bl);
2225 ASSERT_EQ(0, ioctx.operate("foo", &op));
2226 }
2227
2228 // try-flush + write
2229 {
2230 ObjectReadOperation op;
2231 op.cache_try_flush();
2232 librados::AioCompletion *completion = cluster.aio_create_completion();
2233 ASSERT_EQ(0, cache_ioctx.aio_operate(
2234 "foo", completion, &op,
2235 librados::OPERATION_IGNORE_OVERLAY |
2236 librados::OPERATION_SKIPRWLOCKS, NULL));
2237
2238 ObjectWriteOperation op2;
2239 op2.write_full(bl);
2240 librados::AioCompletion *completion2 = cluster.aio_create_completion();
2241 ASSERT_EQ(0, ioctx.aio_operate("foo", completion2, &op2, 0));
2242
2243 completion->wait_for_complete();
2244 completion2->wait_for_complete();
2245 int r = completion->get_return_value();
2246 ASSERT_TRUE(r == -EBUSY || r == 0);
2247 ASSERT_EQ(0, completion2->get_return_value());
2248 completion->release();
2249 completion2->release();
2250 if (r == -EBUSY)
2251 break;
2252 cout << "didn't get EBUSY, trying again" << std::endl;
2253 }
2254 ASSERT_TRUE(--tries);
2255 } while (true);
2256
2257 // tear down tiers
2258 ASSERT_EQ(0, cluster.mon_command(
2259 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
2260 "\"}",
2261 inbl, NULL, NULL));
2262 ASSERT_EQ(0, cluster.mon_command(
2263 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
2264 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
2265 inbl, NULL, NULL));
2266
2267 // wait for maps to settle before next test
2268 cluster.wait_for_latest_osdmap();
2269
2270 ASSERT_EQ(0, cluster.pool_delete(cache_pool_name.c_str()));
2271 ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
2272 }
2273
2274 TEST_F(LibRadosTwoPoolsPP, FlushTryFlushRaces) {
2275 // configure cache
2276 bufferlist inbl;
2277 ASSERT_EQ(0, cluster.mon_command(
2278 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2279 "\", \"tierpool\": \"" + cache_pool_name +
2280 "\", \"force_nonempty\": \"--force-nonempty\" }",
2281 inbl, NULL, NULL));
2282 ASSERT_EQ(0, cluster.mon_command(
2283 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2284 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2285 inbl, NULL, NULL));
2286 ASSERT_EQ(0, cluster.mon_command(
2287 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
2288 "\", \"mode\": \"writeback\"}",
2289 inbl, NULL, NULL));
2290
2291 // wait for maps to settle
2292 cluster.wait_for_latest_osdmap();
2293
2294 // create/dirty object
2295 {
2296 bufferlist bl;
2297 bl.append("hi there");
2298 ObjectWriteOperation op;
2299 op.write_full(bl);
2300 ASSERT_EQ(0, ioctx.operate("foo", &op));
2301 }
2302
2303 // flush + flush
2304 {
2305 ObjectReadOperation op;
2306 op.cache_flush();
2307 librados::AioCompletion *completion = cluster.aio_create_completion();
2308 ASSERT_EQ(0, cache_ioctx.aio_operate(
2309 "foo", completion, &op,
2310 librados::OPERATION_IGNORE_OVERLAY, NULL));
2311
2312 ObjectReadOperation op2;
2313 op2.cache_flush();
2314 librados::AioCompletion *completion2 = cluster.aio_create_completion();
2315 ASSERT_EQ(0, cache_ioctx.aio_operate(
2316 "foo", completion2, &op2,
2317 librados::OPERATION_IGNORE_OVERLAY, NULL));
2318
2319 completion->wait_for_complete();
2320 completion2->wait_for_complete();
2321 ASSERT_EQ(0, completion->get_return_value());
2322 ASSERT_EQ(0, completion2->get_return_value());
2323 completion->release();
2324 completion2->release();
2325 }
2326
2327 // create/dirty object
2328 {
2329 bufferlist bl;
2330 bl.append("hi there");
2331 ObjectWriteOperation op;
2332 op.write_full(bl);
2333 ASSERT_EQ(0, ioctx.operate("foo", &op));
2334 }
2335
2336 // flush + try-flush
2337 {
2338 ObjectReadOperation op;
2339 op.cache_flush();
2340 librados::AioCompletion *completion = cluster.aio_create_completion();
2341 ASSERT_EQ(0, cache_ioctx.aio_operate(
2342 "foo", completion, &op,
2343 librados::OPERATION_IGNORE_OVERLAY, NULL));
2344
2345 ObjectReadOperation op2;
2346 op2.cache_try_flush();
2347 librados::AioCompletion *completion2 = cluster.aio_create_completion();
2348 ASSERT_EQ(0, cache_ioctx.aio_operate(
2349 "foo", completion2, &op2,
2350 librados::OPERATION_IGNORE_OVERLAY |
2351 librados::OPERATION_SKIPRWLOCKS, NULL));
2352
2353 completion->wait_for_complete();
2354 completion2->wait_for_complete();
2355 ASSERT_EQ(0, completion->get_return_value());
2356 ASSERT_EQ(0, completion2->get_return_value());
2357 completion->release();
2358 completion2->release();
2359 }
2360
2361 // create/dirty object
2362 int tries = 1000;
2363 do {
2364 {
2365 bufferlist bl;
2366 bl.append("hi there");
2367 ObjectWriteOperation op;
2368 op.write_full(bl);
2369 ASSERT_EQ(0, ioctx.operate("foo", &op));
2370 }
2371
2372 // try-flush + flush
2373 // (flush will not piggyback on try-flush)
2374 {
2375 ObjectReadOperation op;
2376 op.cache_try_flush();
2377 librados::AioCompletion *completion = cluster.aio_create_completion();
2378 ASSERT_EQ(0, cache_ioctx.aio_operate(
2379 "foo", completion, &op,
2380 librados::OPERATION_IGNORE_OVERLAY |
2381 librados::OPERATION_SKIPRWLOCKS, NULL));
2382
2383 ObjectReadOperation op2;
2384 op2.cache_flush();
2385 librados::AioCompletion *completion2 = cluster.aio_create_completion();
2386 ASSERT_EQ(0, cache_ioctx.aio_operate(
2387 "foo", completion2, &op2,
2388 librados::OPERATION_IGNORE_OVERLAY, NULL));
2389
2390 completion->wait_for_complete();
2391 completion2->wait_for_complete();
2392 int r = completion->get_return_value();
2393 ASSERT_TRUE(r == -EBUSY || r == 0);
2394 ASSERT_EQ(0, completion2->get_return_value());
2395 completion->release();
2396 completion2->release();
2397 if (r == -EBUSY)
2398 break;
2399 cout << "didn't get EBUSY, trying again" << std::endl;
2400 }
2401 ASSERT_TRUE(--tries);
2402 } while (true);
2403
2404 // create/dirty object
2405 {
2406 bufferlist bl;
2407 bl.append("hi there");
2408 ObjectWriteOperation op;
2409 op.write_full(bl);
2410 ASSERT_EQ(0, ioctx.operate("foo", &op));
2411 }
2412
2413 // try-flush + try-flush
2414 {
2415 ObjectReadOperation op;
2416 op.cache_try_flush();
2417 librados::AioCompletion *completion = cluster.aio_create_completion();
2418 ASSERT_EQ(0, cache_ioctx.aio_operate(
2419 "foo", completion, &op,
2420 librados::OPERATION_IGNORE_OVERLAY |
2421 librados::OPERATION_SKIPRWLOCKS, NULL));
2422
2423 ObjectReadOperation op2;
2424 op2.cache_try_flush();
2425 librados::AioCompletion *completion2 = cluster.aio_create_completion();
2426 ASSERT_EQ(0, cache_ioctx.aio_operate(
2427 "foo", completion2, &op2,
2428 librados::OPERATION_IGNORE_OVERLAY |
2429 librados::OPERATION_SKIPRWLOCKS, NULL));
2430
2431 completion->wait_for_complete();
2432 completion2->wait_for_complete();
2433 ASSERT_EQ(0, completion->get_return_value());
2434 ASSERT_EQ(0, completion2->get_return_value());
2435 completion->release();
2436 completion2->release();
2437 }
2438 }
2439
2440
2441 IoCtx *read_ioctx = 0;
2442 ceph::mutex test_lock = ceph::make_mutex("FlushReadRaces::lock");
2443 ceph::condition_variable cond;
2444 int max_reads = 100;
2445 int num_reads = 0; // in progress
2446
2447 void flush_read_race_cb(completion_t cb, void *arg);
2448
2449 void start_flush_read()
2450 {
2451 //cout << " starting read" << std::endl;
2452 ObjectReadOperation op;
2453 op.stat(NULL, NULL, NULL);
2454 librados::AioCompletion *completion = completions.getCompletion();
2455 completion->set_complete_callback(0, flush_read_race_cb);
2456 read_ioctx->aio_operate("foo", completion, &op, NULL);
2457 }
2458
2459 void flush_read_race_cb(completion_t cb, void *arg)
2460 {
2461 //cout << " finished read" << std::endl;
2462 std::lock_guard l{test_lock};
2463 if (num_reads > max_reads) {
2464 num_reads--;
2465 cond.notify_all();
2466 } else {
2467 start_flush_read();
2468 }
2469 }
2470
2471 TEST_F(LibRadosTwoPoolsPP, TryFlushReadRace) {
2472 // configure cache
2473 bufferlist inbl;
2474 ASSERT_EQ(0, cluster.mon_command(
2475 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2476 "\", \"tierpool\": \"" + cache_pool_name +
2477 "\", \"force_nonempty\": \"--force-nonempty\" }",
2478 inbl, NULL, NULL));
2479 ASSERT_EQ(0, cluster.mon_command(
2480 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2481 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2482 inbl, NULL, NULL));
2483 ASSERT_EQ(0, cluster.mon_command(
2484 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
2485 "\", \"mode\": \"writeback\"}",
2486 inbl, NULL, NULL));
2487
2488 // wait for maps to settle
2489 cluster.wait_for_latest_osdmap();
2490
2491 // create/dirty object
2492 {
2493 bufferlist bl;
2494 bl.append("hi there");
2495 bufferptr bp(4000000); // make it big!
2496 bp.zero();
2497 bl.append(bp);
2498 ObjectWriteOperation op;
2499 op.write_full(bl);
2500 ASSERT_EQ(0, ioctx.operate("foo", &op));
2501 }
2502
2503 // start a continuous stream of reads
2504 read_ioctx = &ioctx;
2505 test_lock.lock();
2506 for (int i = 0; i < max_reads; ++i) {
2507 start_flush_read();
2508 num_reads++;
2509 }
2510 test_lock.unlock();
2511
2512 // try-flush
2513 ObjectReadOperation op;
2514 op.cache_try_flush();
2515 librados::AioCompletion *completion = cluster.aio_create_completion();
2516 ASSERT_EQ(0, cache_ioctx.aio_operate(
2517 "foo", completion, &op,
2518 librados::OPERATION_IGNORE_OVERLAY |
2519 librados::OPERATION_SKIPRWLOCKS, NULL));
2520
2521 completion->wait_for_complete();
2522 ASSERT_EQ(0, completion->get_return_value());
2523 completion->release();
2524
2525 // stop reads
2526 std::unique_lock locker{test_lock};
2527 max_reads = 0;
2528 cond.wait(locker, [] { return num_reads == 0;});
2529 }
2530
2531 TEST_F(LibRadosTierPP, HitSetNone) {
2532 {
2533 list< pair<time_t,time_t> > ls;
2534 AioCompletion *c = librados::Rados::aio_create_completion();
2535 ASSERT_EQ(0, ioctx.hit_set_list(123, c, &ls));
2536 c->wait_for_complete();
2537 ASSERT_EQ(0, c->get_return_value());
2538 ASSERT_TRUE(ls.empty());
2539 c->release();
2540 }
2541 {
2542 bufferlist bl;
2543 AioCompletion *c = librados::Rados::aio_create_completion();
2544 ASSERT_EQ(0, ioctx.hit_set_get(123, c, 12345, &bl));
2545 c->wait_for_complete();
2546 ASSERT_EQ(-ENOENT, c->get_return_value());
2547 c->release();
2548 }
2549 }
2550
2551 string set_pool_str(string pool, string var, string val)
2552 {
2553 return string("{\"prefix\": \"osd pool set\",\"pool\":\"") + pool
2554 + string("\",\"var\": \"") + var + string("\",\"val\": \"")
2555 + val + string("\"}");
2556 }
2557
2558 string set_pool_str(string pool, string var, int val)
2559 {
2560 return string("{\"prefix\": \"osd pool set\",\"pool\":\"") + pool
2561 + string("\",\"var\": \"") + var + string("\",\"val\": \"")
2562 + stringify(val) + string("\"}");
2563 }
2564
2565 TEST_F(LibRadosTwoPoolsPP, HitSetRead) {
2566 // make it a tier
2567 bufferlist inbl;
2568 ASSERT_EQ(0, cluster.mon_command(
2569 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2570 "\", \"tierpool\": \"" + cache_pool_name +
2571 "\", \"force_nonempty\": \"--force-nonempty\" }",
2572 inbl, NULL, NULL));
2573
2574 // enable hitset tracking for this pool
2575 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_count", 2),
2576 inbl, NULL, NULL));
2577 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_period", 600),
2578 inbl, NULL, NULL));
2579 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_type",
2580 "explicit_object"),
2581 inbl, NULL, NULL));
2582
2583 // wait for maps to settle
2584 cluster.wait_for_latest_osdmap();
2585
2586 cache_ioctx.set_namespace("");
2587
2588 // keep reading until we see our object appear in the HitSet
2589 utime_t start = ceph_clock_now();
2590 utime_t hard_stop = start + utime_t(600, 0);
2591
2592 while (true) {
2593 utime_t now = ceph_clock_now();
2594 ASSERT_TRUE(now < hard_stop);
2595
2596 string name = "foo";
2597 uint32_t hash;
2598 ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(name, &hash));
2599 hobject_t oid(sobject_t(name, CEPH_NOSNAP), "", hash,
2600 cluster.pool_lookup(cache_pool_name.c_str()), "");
2601
2602 bufferlist bl;
2603 ASSERT_EQ(-ENOENT, cache_ioctx.read("foo", bl, 1, 0));
2604
2605 bufferlist hbl;
2606 AioCompletion *c = librados::Rados::aio_create_completion();
2607 ASSERT_EQ(0, cache_ioctx.hit_set_get(hash, c, now.sec(), &hbl));
2608 c->wait_for_complete();
2609 c->release();
2610
2611 if (hbl.length()) {
2612 auto p = hbl.cbegin();
2613 HitSet hs;
2614 decode(hs, p);
2615 if (hs.contains(oid)) {
2616 cout << "ok, hit_set contains " << oid << std::endl;
2617 break;
2618 }
2619 cout << "hmm, not in HitSet yet" << std::endl;
2620 } else {
2621 cout << "hmm, no HitSet yet" << std::endl;
2622 }
2623
2624 sleep(1);
2625 }
2626 }
2627
2628 static int _get_pg_num(Rados& cluster, string pool_name)
2629 {
2630 bufferlist inbl;
2631 string cmd = string("{\"prefix\": \"osd pool get\",\"pool\":\"")
2632 + pool_name
2633 + string("\",\"var\": \"pg_num\",\"format\": \"json\"}");
2634 bufferlist outbl;
2635 int r = cluster.mon_command(cmd, inbl, &outbl, NULL);
2636 ceph_assert(r >= 0);
2637 string outstr(outbl.c_str(), outbl.length());
2638 json_spirit::Value v;
2639 if (!json_spirit::read(outstr, v)) {
2640 cerr <<" unable to parse json " << outstr << std::endl;
2641 return -1;
2642 }
2643
2644 json_spirit::Object& o = v.get_obj();
2645 for (json_spirit::Object::size_type i=0; i<o.size(); i++) {
2646 json_spirit::Pair& p = o[i];
2647 if (p.name_ == "pg_num") {
2648 cout << "pg_num = " << p.value_.get_int() << std::endl;
2649 return p.value_.get_int();
2650 }
2651 }
2652 cerr << "didn't find pg_num in " << outstr << std::endl;
2653 return -1;
2654 }
2655
2656 int make_hitset(Rados& cluster, librados::IoCtx& cache_ioctx, int num_pg,
2657 int num, std::map<int, HitSet>& hitsets, std::string& cache_pool_name)
2658 {
2659 int pg = num_pg;
2660 // do a bunch of writes
2661 for (int i=0; i<num; ++i) {
2662 bufferlist bl;
2663 bl.append("a");
2664 ceph_assert(0 == cache_ioctx.write(stringify(i), bl, 1, 0));
2665 }
2666
2667 // get HitSets
2668 for (int i=0; i<pg; ++i) {
2669 list< pair<time_t,time_t> > ls;
2670 AioCompletion *c = librados::Rados::aio_create_completion();
2671 ceph_assert(0 == cache_ioctx.hit_set_list(i, c, &ls));
2672 c->wait_for_complete();
2673 c->release();
2674 std::cout << "pg " << i << " ls " << ls << std::endl;
2675 ceph_assert(!ls.empty());
2676
2677 // get the latest
2678 c = librados::Rados::aio_create_completion();
2679 bufferlist bl;
2680 ceph_assert(0 == cache_ioctx.hit_set_get(i, c, ls.back().first, &bl));
2681 c->wait_for_complete();
2682 c->release();
2683
2684 try {
2685 auto p = bl.cbegin();
2686 decode(hitsets[i], p);
2687 }
2688 catch (buffer::error& e) {
2689 std::cout << "failed to decode hit set; bl len is " << bl.length() << "\n";
2690 bl.hexdump(std::cout);
2691 std::cout << std::endl;
2692 throw e;
2693 }
2694
2695 // cope with racing splits by refreshing pg_num
2696 if (i == pg - 1)
2697 pg = _get_pg_num(cluster, cache_pool_name);
2698 }
2699 return pg;
2700 }
2701
2702 TEST_F(LibRadosTwoPoolsPP, HitSetWrite) {
2703 int num_pg = _get_pg_num(cluster, pool_name);
2704 ceph_assert(num_pg > 0);
2705
2706 // make it a tier
2707 bufferlist inbl;
2708 ASSERT_EQ(0, cluster.mon_command(
2709 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2710 "\", \"tierpool\": \"" + cache_pool_name +
2711 "\", \"force_nonempty\": \"--force-nonempty\" }",
2712 inbl, NULL, NULL));
2713
2714 // enable hitset tracking for this pool
2715 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_count", 8),
2716 inbl, NULL, NULL));
2717 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_period", 600),
2718 inbl, NULL, NULL));
2719 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_type",
2720 "explicit_hash"),
2721 inbl, NULL, NULL));
2722
2723 // wait for maps to settle
2724 cluster.wait_for_latest_osdmap();
2725
2726 cache_ioctx.set_namespace("");
2727
2728 int num = 200;
2729
2730 std::map<int,HitSet> hitsets;
2731
2732 num_pg = make_hitset(cluster, cache_ioctx, num_pg, num, hitsets, cache_pool_name);
2733
2734 int retry = 0;
2735
2736 for (int i=0; i<num; ++i) {
2737 string n = stringify(i);
2738 uint32_t hash;
2739 ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(n, &hash));
2740 hobject_t oid(sobject_t(n, CEPH_NOSNAP), "", hash,
2741 cluster.pool_lookup(cache_pool_name.c_str()), "");
2742 std::cout << "checking for " << oid << std::endl;
2743 bool found = false;
2744 for (int p=0; p<num_pg; ++p) {
2745 if (hitsets[p].contains(oid)) {
2746 found = true;
2747 break;
2748 }
2749 }
2750 if (!found && retry < 5) {
2751 num_pg = make_hitset(cluster, cache_ioctx, num_pg, num, hitsets, cache_pool_name);
2752 i--;
2753 retry++;
2754 continue;
2755 }
2756 ASSERT_TRUE(found);
2757 }
2758 }
2759
2760 TEST_F(LibRadosTwoPoolsPP, HitSetTrim) {
2761 unsigned count = 3;
2762 unsigned period = 3;
2763
2764 // make it a tier
2765 bufferlist inbl;
2766 ASSERT_EQ(0, cluster.mon_command(
2767 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2768 "\", \"tierpool\": \"" + cache_pool_name +
2769 "\", \"force_nonempty\": \"--force-nonempty\" }",
2770 inbl, NULL, NULL));
2771
2772 // enable hitset tracking for this pool
2773 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_count", count),
2774 inbl, NULL, NULL));
2775 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_period", period),
2776 inbl, NULL, NULL));
2777 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
2778 inbl, NULL, NULL));
2779 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_fpp", ".01"),
2780 inbl, NULL, NULL));
2781
2782 // wait for maps to settle
2783 cluster.wait_for_latest_osdmap();
2784
2785 cache_ioctx.set_namespace("");
2786
2787 // do a bunch of writes and make sure the hitsets rotate
2788 utime_t start = ceph_clock_now();
2789 utime_t hard_stop = start + utime_t(count * period * 50, 0);
2790
2791 time_t first = 0;
2792 while (true) {
2793 string name = "foo";
2794 uint32_t hash;
2795 ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(name, &hash));
2796 hobject_t oid(sobject_t(name, CEPH_NOSNAP), "", hash, -1, "");
2797
2798 bufferlist bl;
2799 bl.append("f");
2800 ASSERT_EQ(0, cache_ioctx.write("foo", bl, 1, 0));
2801
2802 list<pair<time_t, time_t> > ls;
2803 AioCompletion *c = librados::Rados::aio_create_completion();
2804 ASSERT_EQ(0, cache_ioctx.hit_set_list(hash, c, &ls));
2805 c->wait_for_complete();
2806 c->release();
2807
2808 cout << " got ls " << ls << std::endl;
2809 if (!ls.empty()) {
2810 if (!first) {
2811 first = ls.front().first;
2812 cout << "first is " << first << std::endl;
2813 } else {
2814 if (ls.front().first != first) {
2815 cout << "first now " << ls.front().first << ", trimmed" << std::endl;
2816 break;
2817 }
2818 }
2819 }
2820
2821 utime_t now = ceph_clock_now();
2822 ASSERT_TRUE(now < hard_stop);
2823
2824 sleep(1);
2825 }
2826 }
2827
2828 TEST_F(LibRadosTwoPoolsPP, PromoteOn2ndRead) {
2829 // create object
2830 for (int i=0; i<20; ++i) {
2831 bufferlist bl;
2832 bl.append("hi there");
2833 ObjectWriteOperation op;
2834 op.write_full(bl);
2835 ASSERT_EQ(0, ioctx.operate("foo" + stringify(i), &op));
2836 }
2837
2838 // configure cache
2839 bufferlist inbl;
2840 ASSERT_EQ(0, cluster.mon_command(
2841 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2842 "\", \"tierpool\": \"" + cache_pool_name +
2843 "\", \"force_nonempty\": \"--force-nonempty\" }",
2844 inbl, NULL, NULL));
2845 ASSERT_EQ(0, cluster.mon_command(
2846 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2847 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2848 inbl, NULL, NULL));
2849 ASSERT_EQ(0, cluster.mon_command(
2850 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
2851 "\", \"mode\": \"writeback\"}",
2852 inbl, NULL, NULL));
2853
2854 // enable hitset tracking for this pool
2855 ASSERT_EQ(0, cluster.mon_command(
2856 set_pool_str(cache_pool_name, "hit_set_count", 2),
2857 inbl, NULL, NULL));
2858 ASSERT_EQ(0, cluster.mon_command(
2859 set_pool_str(cache_pool_name, "hit_set_period", 600),
2860 inbl, NULL, NULL));
2861 ASSERT_EQ(0, cluster.mon_command(
2862 set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
2863 inbl, NULL, NULL));
2864 ASSERT_EQ(0, cluster.mon_command(
2865 set_pool_str(cache_pool_name, "min_read_recency_for_promote", 1),
2866 inbl, NULL, NULL));
2867 ASSERT_EQ(0, cluster.mon_command(
2868 set_pool_str(cache_pool_name, "hit_set_grade_decay_rate", 20),
2869 inbl, NULL, NULL));
2870 ASSERT_EQ(0, cluster.mon_command(
2871 set_pool_str(cache_pool_name, "hit_set_search_last_n", 1),
2872 inbl, NULL, NULL));
2873
2874 // wait for maps to settle
2875 cluster.wait_for_latest_osdmap();
2876
2877 int fake = 0; // set this to non-zero to test spurious promotion,
2878 // e.g. from thrashing
2879 int attempt = 0;
2880 string obj;
2881 while (true) {
2882 // 1st read, don't trigger a promote
2883 obj = "foo" + stringify(attempt);
2884 cout << obj << std::endl;
2885 {
2886 bufferlist bl;
2887 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
2888 if (--fake >= 0) {
2889 sleep(1);
2890 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
2891 sleep(1);
2892 }
2893 }
2894
2895 // verify the object is NOT present in the cache tier
2896 {
2897 bool found = false;
2898 NObjectIterator it = cache_ioctx.nobjects_begin();
2899 while (it != cache_ioctx.nobjects_end()) {
2900 cout << " see " << it->get_oid() << std::endl;
2901 if (it->get_oid() == string(obj.c_str())) {
2902 found = true;
2903 break;
2904 }
2905 ++it;
2906 }
2907 if (!found)
2908 break;
2909 }
2910
2911 ++attempt;
2912 ASSERT_LE(attempt, 20);
2913 cout << "hrm, object is present in cache on attempt " << attempt
2914 << ", retrying" << std::endl;
2915 }
2916
2917 // Read until the object is present in the cache tier
2918 cout << "verifying " << obj << " is eventually promoted" << std::endl;
2919 while (true) {
2920 bufferlist bl;
2921 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
2922
2923 bool there = false;
2924 NObjectIterator it = cache_ioctx.nobjects_begin();
2925 while (it != cache_ioctx.nobjects_end()) {
2926 if (it->get_oid() == string(obj.c_str())) {
2927 there = true;
2928 break;
2929 }
2930 ++it;
2931 }
2932 if (there)
2933 break;
2934
2935 sleep(1);
2936 }
2937
2938 // tear down tiers
2939 ASSERT_EQ(0, cluster.mon_command(
2940 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
2941 "\"}",
2942 inbl, NULL, NULL));
2943 ASSERT_EQ(0, cluster.mon_command(
2944 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
2945 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
2946 inbl, NULL, NULL));
2947
2948 // wait for maps to settle before next test
2949 cluster.wait_for_latest_osdmap();
2950 }
2951
2952 TEST_F(LibRadosTwoPoolsPP, ProxyRead) {
2953 // create object
2954 {
2955 bufferlist bl;
2956 bl.append("hi there");
2957 ObjectWriteOperation op;
2958 op.write_full(bl);
2959 ASSERT_EQ(0, ioctx.operate("foo", &op));
2960 }
2961
2962 // configure cache
2963 bufferlist inbl;
2964 ASSERT_EQ(0, cluster.mon_command(
2965 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2966 "\", \"tierpool\": \"" + cache_pool_name +
2967 "\", \"force_nonempty\": \"--force-nonempty\" }",
2968 inbl, NULL, NULL));
2969 ASSERT_EQ(0, cluster.mon_command(
2970 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2971 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2972 inbl, NULL, NULL));
2973 ASSERT_EQ(0, cluster.mon_command(
2974 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
2975 "\", \"mode\": \"readproxy\"}",
2976 inbl, NULL, NULL));
2977
2978 // wait for maps to settle
2979 cluster.wait_for_latest_osdmap();
2980
2981 // read and verify the object
2982 {
2983 bufferlist bl;
2984 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
2985 ASSERT_EQ('h', bl[0]);
2986 }
2987
2988 // Verify 10 times the object is NOT present in the cache tier
2989 uint32_t i = 0;
2990 while (i++ < 10) {
2991 NObjectIterator it = cache_ioctx.nobjects_begin();
2992 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
2993 sleep(1);
2994 }
2995
2996 // tear down tiers
2997 ASSERT_EQ(0, cluster.mon_command(
2998 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
2999 "\"}",
3000 inbl, NULL, NULL));
3001 ASSERT_EQ(0, cluster.mon_command(
3002 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
3003 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
3004 inbl, NULL, NULL));
3005
3006 // wait for maps to settle before next test
3007 cluster.wait_for_latest_osdmap();
3008 }
3009
3010 TEST_F(LibRadosTwoPoolsPP, CachePin) {
3011 // create object
3012 {
3013 bufferlist bl;
3014 bl.append("hi there");
3015 ObjectWriteOperation op;
3016 op.write_full(bl);
3017 ASSERT_EQ(0, ioctx.operate("foo", &op));
3018 }
3019 {
3020 bufferlist bl;
3021 bl.append("hi there");
3022 ObjectWriteOperation op;
3023 op.write_full(bl);
3024 ASSERT_EQ(0, ioctx.operate("bar", &op));
3025 }
3026 {
3027 bufferlist bl;
3028 bl.append("hi there");
3029 ObjectWriteOperation op;
3030 op.write_full(bl);
3031 ASSERT_EQ(0, ioctx.operate("baz", &op));
3032 }
3033 {
3034 bufferlist bl;
3035 bl.append("hi there");
3036 ObjectWriteOperation op;
3037 op.write_full(bl);
3038 ASSERT_EQ(0, ioctx.operate("bam", &op));
3039 }
3040
3041 // configure cache
3042 bufferlist inbl;
3043 ASSERT_EQ(0, cluster.mon_command(
3044 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
3045 "\", \"tierpool\": \"" + cache_pool_name +
3046 "\", \"force_nonempty\": \"--force-nonempty\" }",
3047 inbl, NULL, NULL));
3048 ASSERT_EQ(0, cluster.mon_command(
3049 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
3050 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
3051 inbl, NULL, NULL));
3052 ASSERT_EQ(0, cluster.mon_command(
3053 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
3054 "\", \"mode\": \"writeback\"}",
3055 inbl, NULL, NULL));
3056
3057 // wait for maps to settle
3058 cluster.wait_for_latest_osdmap();
3059
3060 // read, trigger promote
3061 {
3062 bufferlist bl;
3063 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3064 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
3065 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
3066 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
3067 }
3068
3069 // verify the objects are present in the cache tier
3070 {
3071 NObjectIterator it = cache_ioctx.nobjects_begin();
3072 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
3073 for (uint32_t i = 0; i < 4; i++) {
3074 ASSERT_TRUE(it->get_oid() == string("foo") ||
3075 it->get_oid() == string("bar") ||
3076 it->get_oid() == string("baz") ||
3077 it->get_oid() == string("bam"));
3078 ++it;
3079 }
3080 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
3081 }
3082
3083 // pin objects
3084 {
3085 ObjectWriteOperation op;
3086 op.cache_pin();
3087 librados::AioCompletion *completion = cluster.aio_create_completion();
3088 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
3089 completion->wait_for_complete();
3090 ASSERT_EQ(0, completion->get_return_value());
3091 completion->release();
3092 }
3093 {
3094 ObjectWriteOperation op;
3095 op.cache_pin();
3096 librados::AioCompletion *completion = cluster.aio_create_completion();
3097 ASSERT_EQ(0, cache_ioctx.aio_operate("baz", completion, &op));
3098 completion->wait_for_complete();
3099 ASSERT_EQ(0, completion->get_return_value());
3100 completion->release();
3101 }
3102
3103 // enable agent
3104 ASSERT_EQ(0, cluster.mon_command(
3105 set_pool_str(cache_pool_name, "hit_set_count", 2),
3106 inbl, NULL, NULL));
3107 ASSERT_EQ(0, cluster.mon_command(
3108 set_pool_str(cache_pool_name, "hit_set_period", 600),
3109 inbl, NULL, NULL));
3110 ASSERT_EQ(0, cluster.mon_command(
3111 set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
3112 inbl, NULL, NULL));
3113 ASSERT_EQ(0, cluster.mon_command(
3114 set_pool_str(cache_pool_name, "min_read_recency_for_promote", 1),
3115 inbl, NULL, NULL));
3116 ASSERT_EQ(0, cluster.mon_command(
3117 set_pool_str(cache_pool_name, "target_max_objects", 1),
3118 inbl, NULL, NULL));
3119
3120 sleep(10);
3121
3122 // Verify the pinned object 'foo' is not flushed/evicted
3123 uint32_t count = 0;
3124 while (true) {
3125 bufferlist bl;
3126 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
3127
3128 count = 0;
3129 NObjectIterator it = cache_ioctx.nobjects_begin();
3130 while (it != cache_ioctx.nobjects_end()) {
3131 ASSERT_TRUE(it->get_oid() == string("foo") ||
3132 it->get_oid() == string("bar") ||
3133 it->get_oid() == string("baz") ||
3134 it->get_oid() == string("bam"));
3135 ++count;
3136 ++it;
3137 }
3138 if (count == 2) {
3139 ASSERT_TRUE(it->get_oid() == string("foo") ||
3140 it->get_oid() == string("baz"));
3141 break;
3142 }
3143
3144 sleep(1);
3145 }
3146
3147 // tear down tiers
3148 ASSERT_EQ(0, cluster.mon_command(
3149 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
3150 "\"}",
3151 inbl, NULL, NULL));
3152 ASSERT_EQ(0, cluster.mon_command(
3153 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
3154 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
3155 inbl, NULL, NULL));
3156
3157 // wait for maps to settle before next test
3158 cluster.wait_for_latest_osdmap();
3159 }
3160
3161 TEST_F(LibRadosTwoPoolsPP, SetRedirectRead) {
3162 // create object
3163 {
3164 bufferlist bl;
3165 bl.append("hi there");
3166 ObjectWriteOperation op;
3167 op.write_full(bl);
3168 ASSERT_EQ(0, ioctx.operate("foo", &op));
3169 }
3170 {
3171 bufferlist bl;
3172 bl.append("there");
3173 ObjectWriteOperation op;
3174 op.write_full(bl);
3175 ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
3176 }
3177
3178 // wait for maps to settle
3179 cluster.wait_for_latest_osdmap();
3180
3181 {
3182 ObjectWriteOperation op;
3183 op.set_redirect("bar", cache_ioctx, 0);
3184 librados::AioCompletion *completion = cluster.aio_create_completion();
3185 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
3186 completion->wait_for_complete();
3187 ASSERT_EQ(0, completion->get_return_value());
3188 completion->release();
3189 }
3190 // read and verify the object
3191 {
3192 bufferlist bl;
3193 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3194 ASSERT_EQ('t', bl[0]);
3195 }
3196
3197 // wait for maps to settle before next test
3198 cluster.wait_for_latest_osdmap();
3199 }
3200
3201 TEST_F(LibRadosTwoPoolsPP, ManifestPromoteRead) {
3202 // skip test if not yet mimic
3203 if (_get_required_osd_release(cluster) < "mimic") {
3204 GTEST_SKIP() << "cluster is not yet mimic, skipping test";
3205 }
3206
3207 // create object
3208 {
3209 bufferlist bl;
3210 bl.append("hi there");
3211 ObjectWriteOperation op;
3212 op.write_full(bl);
3213 ASSERT_EQ(0, ioctx.operate("foo", &op));
3214 }
3215 {
3216 bufferlist bl;
3217 bl.append("base chunk");
3218 ObjectWriteOperation op;
3219 op.write_full(bl);
3220 ASSERT_EQ(0, ioctx.operate("foo-chunk", &op));
3221 }
3222 {
3223 bufferlist bl;
3224 bl.append("there");
3225 ObjectWriteOperation op;
3226 op.write_full(bl);
3227 ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
3228 }
3229 {
3230 bufferlist bl;
3231 bl.append("CHUNK");
3232 ObjectWriteOperation op;
3233 op.write_full(bl);
3234 ASSERT_EQ(0, cache_ioctx.operate("bar-chunk", &op));
3235 }
3236
3237 // wait for maps to settle
3238 cluster.wait_for_latest_osdmap();
3239
3240 // set-redirect
3241 {
3242 ObjectWriteOperation op;
3243 op.set_redirect("bar", cache_ioctx, 0);
3244 librados::AioCompletion *completion = cluster.aio_create_completion();
3245 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
3246 completion->wait_for_complete();
3247 ASSERT_EQ(0, completion->get_return_value());
3248 completion->release();
3249 }
3250 // set-chunk
3251 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 2, "bar-chunk", "foo-chunk");
3252
3253 // promote
3254 {
3255 ObjectWriteOperation op;
3256 op.tier_promote();
3257 librados::AioCompletion *completion = cluster.aio_create_completion();
3258 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
3259 completion->wait_for_complete();
3260 ASSERT_EQ(0, completion->get_return_value());
3261 completion->release();
3262 }
3263 // read and verify the object (redirect)
3264 {
3265 bufferlist bl;
3266 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3267 ASSERT_EQ('t', bl[0]);
3268 }
3269 // promote
3270 {
3271 ObjectWriteOperation op;
3272 op.tier_promote();
3273 librados::AioCompletion *completion = cluster.aio_create_completion();
3274 ASSERT_EQ(0, ioctx.aio_operate("foo-chunk", completion, &op));
3275 completion->wait_for_complete();
3276 ASSERT_EQ(0, completion->get_return_value());
3277 completion->release();
3278 }
3279 // read and verify the object
3280 {
3281 bufferlist bl;
3282 ASSERT_EQ(1, ioctx.read("foo-chunk", bl, 1, 0));
3283 ASSERT_EQ('C', bl[0]);
3284 }
3285
3286 // wait for maps to settle before next test
3287 cluster.wait_for_latest_osdmap();
3288 }
3289
3290 TEST_F(LibRadosTwoPoolsPP, ManifestRefRead) {
3291 // note: require >= mimic
3292
3293 // create object
3294 {
3295 bufferlist bl;
3296 bl.append("hi there");
3297 ObjectWriteOperation op;
3298 op.write_full(bl);
3299 ASSERT_EQ(0, ioctx.operate("foo", &op));
3300 }
3301 {
3302 bufferlist bl;
3303 bl.append("base chunk");
3304 ObjectWriteOperation op;
3305 op.write_full(bl);
3306 ASSERT_EQ(0, ioctx.operate("foo-chunk", &op));
3307 }
3308 {
3309 bufferlist bl;
3310 bl.append("there");
3311 ObjectWriteOperation op;
3312 op.write_full(bl);
3313 ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
3314 }
3315 {
3316 bufferlist bl;
3317 bl.append("CHUNK");
3318 ObjectWriteOperation op;
3319 op.write_full(bl);
3320 ASSERT_EQ(0, cache_ioctx.operate("bar-chunk", &op));
3321 }
3322
3323 // wait for maps to settle
3324 cluster.wait_for_latest_osdmap();
3325
3326 // set-redirect
3327 {
3328 ObjectWriteOperation op;
3329 op.set_redirect("bar", cache_ioctx, 0, CEPH_OSD_OP_FLAG_WITH_REFERENCE);
3330 librados::AioCompletion *completion = cluster.aio_create_completion();
3331 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
3332 completion->wait_for_complete();
3333 ASSERT_EQ(0, completion->get_return_value());
3334 completion->release();
3335 }
3336 // set-chunk
3337 {
3338 ObjectReadOperation op;
3339 op.set_chunk(0, 2, cache_ioctx, "bar-chunk", 0, CEPH_OSD_OP_FLAG_WITH_REFERENCE);
3340 librados::AioCompletion *completion = cluster.aio_create_completion();
3341 ASSERT_EQ(0, ioctx.aio_operate("foo-chunk", completion, &op,
3342 librados::OPERATION_IGNORE_CACHE, NULL));
3343 completion->wait_for_complete();
3344 ASSERT_EQ(0, completion->get_return_value());
3345 completion->release();
3346 }
3347 // redirect's refcount
3348 {
3349 bufferlist t;
3350 cache_ioctx.getxattr("bar", CHUNK_REFCOUNT_ATTR, t);
3351 chunk_refs_t refs;
3352 try {
3353 auto iter = t.cbegin();
3354 decode(refs, iter);
3355 } catch (buffer::error& err) {
3356 ASSERT_TRUE(0);
3357 }
3358 ASSERT_EQ(1U, refs.count());
3359 }
3360 // chunk's refcount
3361 {
3362 bufferlist t;
3363 cache_ioctx.getxattr("bar-chunk", CHUNK_REFCOUNT_ATTR, t);
3364 chunk_refs_t refs;
3365 try {
3366 auto iter = t.cbegin();
3367 decode(refs, iter);
3368 } catch (buffer::error& err) {
3369 ASSERT_TRUE(0);
3370 }
3371 ASSERT_EQ(1u, refs.count());
3372 }
3373
3374 // wait for maps to settle before next test
3375 cluster.wait_for_latest_osdmap();
3376 }
3377
3378 TEST_F(LibRadosTwoPoolsPP, ManifestUnset) {
3379 // skip test if not yet nautilus
3380 if (_get_required_osd_release(cluster) < "nautilus") {
3381 GTEST_SKIP() << "cluster is not yet nautilus, skipping test";
3382 }
3383
3384 // create object
3385 {
3386 bufferlist bl;
3387 bl.append("hi there");
3388 ObjectWriteOperation op;
3389 op.write_full(bl);
3390 ASSERT_EQ(0, ioctx.operate("foo", &op));
3391 }
3392 {
3393 bufferlist bl;
3394 bl.append("base chunk");
3395 ObjectWriteOperation op;
3396 op.write_full(bl);
3397 ASSERT_EQ(0, ioctx.operate("foo-chunk", &op));
3398 }
3399 {
3400 bufferlist bl;
3401 bl.append("there");
3402 ObjectWriteOperation op;
3403 op.write_full(bl);
3404 ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
3405 }
3406 {
3407 bufferlist bl;
3408 bl.append("CHUNK");
3409 ObjectWriteOperation op;
3410 op.write_full(bl);
3411 ASSERT_EQ(0, cache_ioctx.operate("bar-chunk", &op));
3412 }
3413
3414 // wait for maps to settle
3415 cluster.wait_for_latest_osdmap();
3416
3417 // set-redirect
3418 {
3419 ObjectWriteOperation op;
3420 op.set_redirect("bar", cache_ioctx, 0, CEPH_OSD_OP_FLAG_WITH_REFERENCE);
3421 librados::AioCompletion *completion = cluster.aio_create_completion();
3422 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
3423 completion->wait_for_complete();
3424 ASSERT_EQ(0, completion->get_return_value());
3425 completion->release();
3426 }
3427 // set-chunk
3428 {
3429 ObjectReadOperation op;
3430 op.set_chunk(0, 2, cache_ioctx, "bar-chunk", 0, CEPH_OSD_OP_FLAG_WITH_REFERENCE);
3431 librados::AioCompletion *completion = cluster.aio_create_completion();
3432 ASSERT_EQ(0, ioctx.aio_operate("foo-chunk", completion, &op,
3433 librados::OPERATION_IGNORE_CACHE, NULL));
3434 completion->wait_for_complete();
3435 ASSERT_EQ(0, completion->get_return_value());
3436 completion->release();
3437 }
3438 // redirect's refcount
3439 {
3440 bufferlist t;
3441 cache_ioctx.getxattr("bar", CHUNK_REFCOUNT_ATTR, t);
3442 chunk_refs_t refs;
3443 try {
3444 auto iter = t.cbegin();
3445 decode(refs, iter);
3446 } catch (buffer::error& err) {
3447 ASSERT_TRUE(0);
3448 }
3449 ASSERT_EQ(1u, refs.count());
3450 }
3451 // chunk's refcount
3452 {
3453 bufferlist t;
3454 cache_ioctx.getxattr("bar-chunk", CHUNK_REFCOUNT_ATTR, t);
3455 chunk_refs_t refs;
3456 try {
3457 auto iter = t.cbegin();
3458 decode(refs, iter);
3459 } catch (buffer::error& err) {
3460 ASSERT_TRUE(0);
3461 }
3462 ASSERT_EQ(1u, refs.count());
3463 }
3464
3465 // unset-manifest for set-redirect
3466 {
3467 ObjectWriteOperation op;
3468 op.unset_manifest();
3469 librados::AioCompletion *completion = cluster.aio_create_completion();
3470 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
3471 completion->wait_for_complete();
3472 ASSERT_EQ(0, completion->get_return_value());
3473 completion->release();
3474 }
3475
3476 // unset-manifest for set-chunk
3477 {
3478 ObjectWriteOperation op;
3479 op.unset_manifest();
3480 librados::AioCompletion *completion = cluster.aio_create_completion();
3481 ASSERT_EQ(0, ioctx.aio_operate("foo-chunk", completion, &op));
3482 completion->wait_for_complete();
3483 ASSERT_EQ(0, completion->get_return_value());
3484 completion->release();
3485 }
3486 // redirect's refcount
3487 {
3488 bufferlist t;
3489 cache_ioctx.getxattr("bar-chunk", CHUNK_REFCOUNT_ATTR, t);
3490 if (t.length() != 0U) {
3491 ObjectWriteOperation op;
3492 op.unset_manifest();
3493 librados::AioCompletion *completion = cluster.aio_create_completion();
3494 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
3495 completion->wait_for_complete();
3496 ASSERT_EQ(-EOPNOTSUPP, completion->get_return_value());
3497 completion->release();
3498 }
3499 }
3500 // chunk's refcount
3501 {
3502 bufferlist t;
3503 cache_ioctx.getxattr("bar-chunk", CHUNK_REFCOUNT_ATTR, t);
3504 if (t.length() != 0U) {
3505 ObjectWriteOperation op;
3506 op.unset_manifest();
3507 librados::AioCompletion *completion = cluster.aio_create_completion();
3508 ASSERT_EQ(0, ioctx.aio_operate("foo-chunk", completion, &op));
3509 completion->wait_for_complete();
3510 ASSERT_EQ(-EOPNOTSUPP, completion->get_return_value());
3511 completion->release();
3512 }
3513 }
3514
3515 // wait for maps to settle before next test
3516 cluster.wait_for_latest_osdmap();
3517 }
3518
3519 #include "common/ceph_crypto.h"
3520 using ceph::crypto::SHA1;
3521 #include "rgw/rgw_common.h"
3522 TEST_F(LibRadosTwoPoolsPP, ManifestDedupRefRead) {
3523 // skip test if not yet nautilus
3524 if (_get_required_osd_release(cluster) < "nautilus") {
3525 GTEST_SKIP() << "cluster is not yet nautilus, skipping test";
3526 }
3527
3528 bufferlist inbl;
3529 ASSERT_EQ(0, cluster.mon_command(
3530 set_pool_str(pool_name, "fingerprint_algorithm", "sha1"),
3531 inbl, NULL, NULL));
3532 cluster.wait_for_latest_osdmap();
3533 string tgt_oid;
3534
3535 // get fp_oid
3536 tgt_oid = get_fp_oid("There hi", "sha1");
3537
3538 // create object
3539 {
3540 bufferlist bl;
3541 bl.append("There hi");
3542 ObjectWriteOperation op;
3543 op.write_full(bl);
3544 ASSERT_EQ(0, ioctx.operate("foo", &op));
3545 }
3546 {
3547 bufferlist bl;
3548 bl.append("There hi");
3549 ObjectWriteOperation op;
3550 op.write_full(bl);
3551 ASSERT_EQ(0, ioctx.operate("foo-dedup", &op));
3552 }
3553
3554 // write
3555 {
3556 ObjectWriteOperation op;
3557 bufferlist bl;
3558 bl.append("There hi");
3559 op.write_full(bl);
3560 ASSERT_EQ(0, cache_ioctx.operate(tgt_oid, &op));
3561 }
3562
3563 // set-chunk (dedup)
3564 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 8, tgt_oid, "foo-dedup");
3565 // set-chunk (dedup)
3566 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 8, tgt_oid, "foo");
3567 // chunk's refcount
3568 {
3569 bufferlist t;
3570 cache_ioctx.getxattr(tgt_oid, CHUNK_REFCOUNT_ATTR, t);
3571 chunk_refs_t refs;
3572 try {
3573 auto iter = t.cbegin();
3574 decode(refs, iter);
3575 } catch (buffer::error& err) {
3576 ASSERT_TRUE(0);
3577 }
3578 ASSERT_LE(2u, refs.count());
3579 }
3580
3581 // wait for maps to settle before next test
3582 cluster.wait_for_latest_osdmap();
3583 }
3584
3585 TEST_F(LibRadosTwoPoolsPP, ManifestSnapRefcount) {
3586 // skip test if not yet octopus
3587 if (_get_required_osd_release(cluster) < "octopus") {
3588 cout << "cluster is not yet octopus, skipping test" << std::endl;
3589 return;
3590 }
3591
3592 bufferlist inbl;
3593 ASSERT_EQ(0, cluster.mon_command(
3594 set_pool_str(pool_name, "fingerprint_algorithm", "sha1"),
3595 inbl, NULL, NULL));
3596 cluster.wait_for_latest_osdmap();
3597
3598 // create object
3599 {
3600 bufferlist bl;
3601 bl.append("there hi");
3602 ObjectWriteOperation op;
3603 op.write_full(bl);
3604 ASSERT_EQ(0, ioctx.operate("foo", &op));
3605 }
3606 {
3607 bufferlist bl;
3608 bl.append("there hi");
3609 ObjectWriteOperation op;
3610 op.write_full(bl);
3611 ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
3612 }
3613
3614 // wait for maps to settle
3615 cluster.wait_for_latest_osdmap();
3616
3617 string er_fp_oid, hi_fp_oid, bb_fp_oid;
3618
3619 // get fp_oid
3620 er_fp_oid = get_fp_oid("er", "sha1");
3621 hi_fp_oid = get_fp_oid("hi", "sha1");
3622 bb_fp_oid = get_fp_oid("bb", "sha1");
3623
3624 // write
3625 {
3626 ObjectWriteOperation op;
3627 bufferlist bl;
3628 bl.append("er");
3629 op.write_full(bl);
3630 ASSERT_EQ(0, cache_ioctx.operate(er_fp_oid, &op));
3631 }
3632 // write
3633 {
3634 ObjectWriteOperation op;
3635 bufferlist bl;
3636 bl.append("hi");
3637 op.write_full(bl);
3638 ASSERT_EQ(0, cache_ioctx.operate(hi_fp_oid, &op));
3639 }
3640 // write
3641 {
3642 ObjectWriteOperation op;
3643 bufferlist bl;
3644 bl.append("bb");
3645 op.write_full(bl);
3646 ASSERT_EQ(0, cache_ioctx.operate(bb_fp_oid, &op));
3647 }
3648
3649 // set-chunk (dedup)
3650 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, er_fp_oid, "foo");
3651 // set-chunk (dedup)
3652 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, hi_fp_oid, "foo");
3653
3654 // make all chunks dirty --> flush
3655 // foo: [er] [hi]
3656
3657 // check chunk's refcount
3658 {
3659 bufferlist t;
3660 SHA1 sha1_gen;
3661 int size = strlen("er");
3662 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
3663 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
3664 sha1_gen.Update((const unsigned char *)"er", size);
3665 sha1_gen.Final(fingerprint);
3666 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
3667 cache_ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
3668 chunk_refs_t refs;
3669 try {
3670 auto iter = t.cbegin();
3671 decode(refs, iter);
3672 } catch (buffer::error& err) {
3673 ASSERT_TRUE(0);
3674 }
3675 ASSERT_LE(1u, refs.count());
3676 }
3677
3678 // create a snapshot, clone
3679 vector<uint64_t> my_snaps(1);
3680 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
3681 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
3682 my_snaps));
3683
3684 // foo: [bb] [hi]
3685 // create a clone
3686 {
3687 bufferlist bl;
3688 bl.append("Thbbe");
3689 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
3690 }
3691 // make clean
3692 {
3693 bufferlist bl;
3694 bl.append("Thbbe");
3695 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
3696 }
3697 // set-chunk (dedup)
3698 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, bb_fp_oid, "foo");
3699
3700 // and another
3701 my_snaps.resize(2);
3702 my_snaps[1] = my_snaps[0];
3703 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
3704 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
3705 my_snaps));
3706
3707 // foo: [er] [hi]
3708 // create a clone
3709 {
3710 bufferlist bl;
3711 bl.append("There");
3712 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
3713 }
3714 // make clean
3715 {
3716 bufferlist bl;
3717 bl.append("There");
3718 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
3719 }
3720 // set-chunk (dedup)
3721 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, er_fp_oid, "foo");
3722
3723 // check chunk's refcount
3724 {
3725 bufferlist t;
3726 SHA1 sha1_gen;
3727 int size = strlen("er");
3728 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
3729 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
3730 sha1_gen.Update((const unsigned char *)"er", size);
3731 sha1_gen.Final(fingerprint);
3732 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
3733 cache_ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
3734 chunk_refs_t refs;
3735 try {
3736 auto iter = t.cbegin();
3737 decode(refs, iter);
3738 } catch (buffer::error& err) {
3739 ASSERT_TRUE(0);
3740 }
3741 ASSERT_LE(2u, refs.count());
3742 }
3743
3744 // and another
3745 my_snaps.resize(3);
3746 my_snaps[2] = my_snaps[1];
3747 my_snaps[1] = my_snaps[0];
3748 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
3749 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
3750 my_snaps));
3751
3752 // foo: [bb] [hi]
3753 // create a clone
3754 {
3755 bufferlist bl;
3756 bl.append("Thbbe");
3757 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
3758 }
3759 // make clean
3760 {
3761 bufferlist bl;
3762 bl.append("Thbbe");
3763 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
3764 }
3765 // set-chunk (dedup)
3766 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, bb_fp_oid, "foo");
3767
3768 /*
3769 * snap[2]: [er] [hi]
3770 * snap[1]: [bb] [hi]
3771 * snap[0]: [er] [hi]
3772 * head: [bb] [hi]
3773 */
3774
3775 // check chunk's refcount
3776 {
3777 bufferlist t;
3778 SHA1 sha1_gen;
3779 int size = strlen("hi");
3780 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
3781 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
3782 sha1_gen.Update((const unsigned char *)"hi", size);
3783 sha1_gen.Final(fingerprint);
3784 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
3785 is_intended_refcount_state(ioctx, "foo", cache_ioctx, p_str, 1);
3786 }
3787
3788 // check chunk's refcount
3789 {
3790 bufferlist t;
3791 SHA1 sha1_gen;
3792 int size = strlen("er");
3793 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
3794 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
3795 sha1_gen.Update((const unsigned char *)"er", size);
3796 sha1_gen.Final(fingerprint);
3797 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
3798 cache_ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
3799 chunk_refs_t refs;
3800 try {
3801 auto iter = t.cbegin();
3802 decode(refs, iter);
3803 } catch (buffer::error& err) {
3804 ASSERT_TRUE(0);
3805 }
3806 ASSERT_LE(2u, refs.count());
3807 }
3808
3809 // remove snap
3810 ioctx.selfmanaged_snap_remove(my_snaps[2]);
3811
3812 /*
3813 * snap[1]: [bb] [hi]
3814 * snap[0]: [er] [hi]
3815 * head: [bb] [hi]
3816 */
3817
3818 sleep(10);
3819
3820 // check chunk's refcount
3821 {
3822 bufferlist t;
3823 SHA1 sha1_gen;
3824 int size = strlen("hi");
3825 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
3826 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
3827 sha1_gen.Update((const unsigned char *)"hi", size);
3828 sha1_gen.Final(fingerprint);
3829 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
3830 is_intended_refcount_state(ioctx, "foo", cache_ioctx, p_str, 1);
3831 }
3832
3833 // remove snap
3834 ioctx.selfmanaged_snap_remove(my_snaps[0]);
3835
3836 /*
3837 * snap[1]: [bb] [hi]
3838 * head: [bb] [hi]
3839 */
3840
3841 sleep(10);
3842
3843 // check chunk's refcount
3844 {
3845 bufferlist t;
3846 SHA1 sha1_gen;
3847 int size = strlen("bb");
3848 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
3849 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
3850 sha1_gen.Update((const unsigned char *)"bb", size);
3851 sha1_gen.Final(fingerprint);
3852 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
3853 is_intended_refcount_state(ioctx, "foo", cache_ioctx, p_str, 1);
3854 }
3855
3856 // remove snap
3857 ioctx.selfmanaged_snap_remove(my_snaps[1]);
3858
3859 /*
3860 * snap[1]: [bb] [hi]
3861 */
3862
3863 sleep(10);
3864
3865 // check chunk's refcount
3866 {
3867 bufferlist t;
3868 SHA1 sha1_gen;
3869 int size = strlen("bb");
3870 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
3871 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
3872 sha1_gen.Update((const unsigned char *)"bb", size);
3873 sha1_gen.Final(fingerprint);
3874 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
3875 is_intended_refcount_state(ioctx, "foo", cache_ioctx, p_str, 1);
3876 }
3877
3878 // check chunk's refcount
3879 {
3880 bufferlist t;
3881 SHA1 sha1_gen;
3882 int size = strlen("hi");
3883 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
3884 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
3885 sha1_gen.Update((const unsigned char *)"hi", size);
3886 sha1_gen.Final(fingerprint);
3887 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
3888 is_intended_refcount_state(ioctx, "foo", cache_ioctx, p_str, 1);
3889 }
3890 }
3891
3892 TEST_F(LibRadosTwoPoolsPP, ManifestSnapRefcount2) {
3893 // skip test if not yet octopus
3894 if (_get_required_osd_release(cluster) < "octopus") {
3895 cout << "cluster is not yet octopus, skipping test" << std::endl;
3896 return;
3897 }
3898
3899 bufferlist inbl;
3900 ASSERT_EQ(0, cluster.mon_command(
3901 set_pool_str(pool_name, "fingerprint_algorithm", "sha1"),
3902 inbl, NULL, NULL));
3903 cluster.wait_for_latest_osdmap();
3904
3905 // create object
3906 {
3907 bufferlist bl;
3908 bl.append("Thabe cdHI");
3909 ObjectWriteOperation op;
3910 op.write_full(bl);
3911 ASSERT_EQ(0, ioctx.operate("foo", &op));
3912 }
3913 {
3914 bufferlist bl;
3915 bl.append("there hiHI");
3916 ObjectWriteOperation op;
3917 op.write_full(bl);
3918 ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
3919 }
3920
3921 string ab_fp_oid, cd_fp_oid, ef_fp_oid, BB_fp_oid;
3922
3923 // get fp_oid
3924 ab_fp_oid = get_fp_oid("ab", "sha1");
3925 cd_fp_oid = get_fp_oid("cd", "sha1");
3926 ef_fp_oid = get_fp_oid("ef", "sha1");
3927 BB_fp_oid = get_fp_oid("BB", "sha1");
3928
3929 // write
3930 {
3931 ObjectWriteOperation op;
3932 bufferlist bl;
3933 bl.append("ab");
3934 op.write_full(bl);
3935 ASSERT_EQ(0, cache_ioctx.operate(ab_fp_oid, &op));
3936 }
3937 // write
3938 {
3939 ObjectWriteOperation op;
3940 bufferlist bl;
3941 bl.append("cd");
3942 op.write_full(bl);
3943 ASSERT_EQ(0, cache_ioctx.operate(cd_fp_oid, &op));
3944 }
3945 // write
3946 {
3947 ObjectWriteOperation op;
3948 bufferlist bl;
3949 bl.append("ef");
3950 op.write_full(bl);
3951 ASSERT_EQ(0, cache_ioctx.operate(ef_fp_oid, &op));
3952 }
3953 // write
3954 {
3955 ObjectWriteOperation op;
3956 bufferlist bl;
3957 bl.append("BB");
3958 op.write_full(bl);
3959 ASSERT_EQ(0, cache_ioctx.operate(BB_fp_oid, &op));
3960 }
3961
3962 // set-chunk (dedup)
3963 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, ab_fp_oid, "foo");
3964 // set-chunk (dedup)
3965 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, cd_fp_oid, "foo");
3966 // set-chunk (dedup)
3967 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, ef_fp_oid, "foo");
3968
3969
3970 // make all chunks dirty --> flush
3971 // foo: [ab] [cd] [ef]
3972
3973 // create a snapshot, clone
3974 vector<uint64_t> my_snaps(1);
3975 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
3976 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
3977 my_snaps));
3978
3979 // foo: [BB] [BB] [ef]
3980 // create a clone
3981 {
3982 bufferlist bl;
3983 bl.append("ThBBe BB");
3984 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
3985 }
3986 // make clean
3987 {
3988 bufferlist bl;
3989 bl.append("ThBBe BB");
3990 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
3991 }
3992 // set-chunk (dedup)
3993 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, BB_fp_oid, "foo");
3994 // set-chunk (dedup)
3995 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, BB_fp_oid, "foo");
3996
3997 // and another
3998 my_snaps.resize(2);
3999 my_snaps[1] = my_snaps[0];
4000 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
4001 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4002 my_snaps));
4003
4004 // foo: [ab] [cd] [ef]
4005 // create a clone
4006 {
4007 bufferlist bl;
4008 bl.append("Thabe cd");
4009 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
4010 }
4011 // make clean
4012 {
4013 bufferlist bl;
4014 bl.append("Thabe cd");
4015 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
4016 }
4017 // set-chunk (dedup)
4018 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, ab_fp_oid, "foo");
4019 // set-chunk (dedup)
4020 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, cd_fp_oid, "foo");
4021
4022 /*
4023 * snap[1]: [ab] [cd] [ef]
4024 * snap[0]: [BB] [BB] [ef]
4025 * head: [ab] [cd] [ef]
4026 */
4027
4028 // check chunk's refcount
4029 {
4030 bufferlist t;
4031 SHA1 sha1_gen;
4032 int size = strlen("ab");
4033 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
4034 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
4035 sha1_gen.Update((const unsigned char *)"ab", size);
4036 sha1_gen.Final(fingerprint);
4037 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
4038 cache_ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
4039 chunk_refs_t refs;
4040 try {
4041 auto iter = t.cbegin();
4042 decode(refs, iter);
4043 } catch (buffer::error& err) {
4044 ASSERT_TRUE(0);
4045 }
4046 ASSERT_LE(2u, refs.count());
4047 }
4048
4049 // check chunk's refcount
4050 {
4051 bufferlist t;
4052 SHA1 sha1_gen;
4053 int size = strlen("cd");
4054 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
4055 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
4056 sha1_gen.Update((const unsigned char *)"cd", size);
4057 sha1_gen.Final(fingerprint);
4058 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
4059 cache_ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
4060 chunk_refs_t refs;
4061 try {
4062 auto iter = t.cbegin();
4063 decode(refs, iter);
4064 } catch (buffer::error& err) {
4065 ASSERT_TRUE(0);
4066 }
4067 ASSERT_LE(2u, refs.count());
4068 }
4069
4070 // check chunk's refcount
4071 {
4072 bufferlist t;
4073 SHA1 sha1_gen;
4074 int size = strlen("BB");
4075 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
4076 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
4077 sha1_gen.Update((const unsigned char *)"BB", size);
4078 sha1_gen.Final(fingerprint);
4079 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
4080 cache_ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
4081 chunk_refs_t refs;
4082 try {
4083 auto iter = t.cbegin();
4084 decode(refs, iter);
4085 } catch (buffer::error& err) {
4086 ASSERT_TRUE(0);
4087 }
4088 ASSERT_LE(2u, refs.count());
4089 }
4090
4091 // remove snap
4092 ioctx.selfmanaged_snap_remove(my_snaps[0]);
4093
4094 /*
4095 * snap[1]: [ab] [cd] [ef]
4096 * head: [ab] [cd] [ef]
4097 */
4098
4099 sleep(10);
4100
4101 // check chunk's refcount
4102 {
4103 bufferlist t;
4104 SHA1 sha1_gen;
4105 int size = strlen("BB");
4106 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
4107 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
4108 sha1_gen.Update((const unsigned char *)"BB", size);
4109 sha1_gen.Final(fingerprint);
4110 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
4111 is_intended_refcount_state(ioctx, "foo", cache_ioctx, p_str, 0);
4112 }
4113 }
4114
4115 TEST_F(LibRadosTwoPoolsPP, ManifestTestSnapCreate) {
4116 // skip test if not yet octopus
4117 if (_get_required_osd_release(cluster) < "octopus") {
4118 GTEST_SKIP() << "cluster is not yet octopus, skipping test";
4119 }
4120
4121 // create object
4122 {
4123 bufferlist bl;
4124 bl.append("base chunk");
4125 ObjectWriteOperation op;
4126 op.write_full(bl);
4127 ASSERT_EQ(0, ioctx.operate("foo", &op));
4128 }
4129 {
4130 bufferlist bl;
4131 bl.append("CHUNKS CHUNKS");
4132 ObjectWriteOperation op;
4133 op.write_full(bl);
4134 ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
4135 }
4136
4137 string ba_fp_oid, se_fp_oid, ch_fp_oid;
4138
4139 // get fp_oid
4140 ba_fp_oid = get_fp_oid("ba", "sha1");
4141 se_fp_oid = get_fp_oid("se", "sha1");
4142 ch_fp_oid = get_fp_oid("ch", "sha1");
4143
4144 // write
4145 {
4146 ObjectWriteOperation op;
4147 bufferlist bl;
4148 bl.append("ba");
4149 op.write_full(bl);
4150 ASSERT_EQ(0, cache_ioctx.operate(ba_fp_oid, &op));
4151 }
4152 // write
4153 {
4154 ObjectWriteOperation op;
4155 bufferlist bl;
4156 bl.append("se");
4157 op.write_full(bl);
4158 ASSERT_EQ(0, cache_ioctx.operate(se_fp_oid, &op));
4159 }
4160 // write
4161 {
4162 ObjectWriteOperation op;
4163 bufferlist bl;
4164 bl.append("ch");
4165 op.write_full(bl);
4166 ASSERT_EQ(0, cache_ioctx.operate(ch_fp_oid, &op));
4167 }
4168
4169 // set-chunk (dedup)
4170 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 2, ba_fp_oid, "foo");
4171
4172 // try to create a snapshot, clone
4173 vector<uint64_t> my_snaps(1);
4174 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
4175 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4176 my_snaps));
4177
4178 // set-chunk (dedup)
4179 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, se_fp_oid, "foo");
4180
4181 // check whether clone is created
4182 ioctx.snap_set_read(librados::SNAP_DIR);
4183 {
4184 snap_set_t snap_set;
4185 int snap_ret;
4186 ObjectReadOperation op;
4187 op.list_snaps(&snap_set, &snap_ret);
4188 librados::AioCompletion *completion = cluster.aio_create_completion();
4189 ASSERT_EQ(0, ioctx.aio_operate(
4190 "foo", completion, &op,
4191 0, NULL));
4192 completion->wait_for_complete();
4193 ASSERT_EQ(0, snap_ret);
4194 ASSERT_LT(0u, snap_set.clones.size());
4195 ASSERT_EQ(1, snap_set.clones.size());
4196 }
4197
4198 // create a clone
4199 ioctx.snap_set_read(librados::SNAP_HEAD);
4200 {
4201 bufferlist bl;
4202 bl.append("B");
4203 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 0));
4204 }
4205
4206 ioctx.snap_set_read(my_snaps[0]);
4207 // set-chunk to clone
4208 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, ch_fp_oid, "foo");
4209 }
4210
4211 TEST_F(LibRadosTwoPoolsPP, ManifestRedirectAfterPromote) {
4212 // skip test if not yet octopus
4213 if (_get_required_osd_release(cluster) < "octopus") {
4214 GTEST_SKIP() << "cluster is not yet octopus, skipping test";
4215 }
4216
4217 // create object
4218 {
4219 bufferlist bl;
4220 bl.append("base chunk");
4221 ObjectWriteOperation op;
4222 op.write_full(bl);
4223 ASSERT_EQ(0, ioctx.operate("foo", &op));
4224 }
4225 {
4226 bufferlist bl;
4227 bl.append("BASE CHUNK");
4228 ObjectWriteOperation op;
4229 op.write_full(bl);
4230 ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
4231 }
4232
4233 // set-redirect
4234 {
4235 ObjectWriteOperation op;
4236 op.set_redirect("bar", cache_ioctx, 0);
4237 librados::AioCompletion *completion = cluster.aio_create_completion();
4238 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
4239 completion->wait_for_complete();
4240 ASSERT_EQ(0, completion->get_return_value());
4241 completion->release();
4242 }
4243
4244 // promote
4245 {
4246 ObjectWriteOperation op;
4247 op.tier_promote();
4248 librados::AioCompletion *completion = cluster.aio_create_completion();
4249 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
4250 completion->wait_for_complete();
4251 ASSERT_EQ(0, completion->get_return_value());
4252 completion->release();
4253 }
4254
4255 // write
4256 {
4257 bufferlist bl;
4258 bl.append("a");
4259 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 0));
4260 }
4261
4262 // read and verify the object (redirect)
4263 {
4264 bufferlist bl;
4265 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
4266 ASSERT_EQ('a', bl[0]);
4267 }
4268
4269 // read and verify the object (redirect)
4270 {
4271 bufferlist bl;
4272 ASSERT_EQ(1, cache_ioctx.read("bar", bl, 1, 0));
4273 ASSERT_EQ('B', bl[0]);
4274 }
4275 }
4276
4277 TEST_F(LibRadosTwoPoolsPP, ManifestCheckRefcountWhenModification) {
4278 // skip test if not yet octopus
4279 if (_get_required_osd_release(cluster) < "octopus") {
4280 GTEST_SKIP() << "cluster is not yet octopus, skipping test";
4281 }
4282
4283 bufferlist inbl;
4284 ASSERT_EQ(0, cluster.mon_command(
4285 set_pool_str(pool_name, "fingerprint_algorithm", "sha1"),
4286 inbl, NULL, NULL));
4287 cluster.wait_for_latest_osdmap();
4288
4289 // create object
4290 {
4291 bufferlist bl;
4292 bl.append("there hiHI");
4293 ObjectWriteOperation op;
4294 op.write_full(bl);
4295 ASSERT_EQ(0, ioctx.operate("foo", &op));
4296 }
4297
4298 string er_fp_oid, hi_fp_oid, HI_fp_oid, ai_fp_oid, bi_fp_oid,
4299 Er_fp_oid, Hi_fp_oid, Si_fp_oid;
4300
4301 // get fp_oid
4302 er_fp_oid = get_fp_oid("er", "sha1");
4303 hi_fp_oid = get_fp_oid("hi", "sha1");
4304 HI_fp_oid = get_fp_oid("HI", "sha1");
4305 ai_fp_oid = get_fp_oid("ai", "sha1");
4306 bi_fp_oid = get_fp_oid("bi", "sha1");
4307 Er_fp_oid = get_fp_oid("Er", "sha1");
4308 Hi_fp_oid = get_fp_oid("Hi", "sha1");
4309 Si_fp_oid = get_fp_oid("Si", "sha1");
4310
4311 // write
4312 {
4313 ObjectWriteOperation op;
4314 bufferlist bl;
4315 bl.append("er");
4316 op.write_full(bl);
4317 ASSERT_EQ(0, cache_ioctx.operate(er_fp_oid, &op));
4318 }
4319 // write
4320 {
4321 ObjectWriteOperation op;
4322 bufferlist bl;
4323 bl.append("hi");
4324 op.write_full(bl);
4325 ASSERT_EQ(0, cache_ioctx.operate(hi_fp_oid, &op));
4326 }
4327 // write
4328 {
4329 ObjectWriteOperation op;
4330 bufferlist bl;
4331 bl.append("HI");
4332 op.write_full(bl);
4333 ASSERT_EQ(0, cache_ioctx.operate(HI_fp_oid, &op));
4334 }
4335 // write
4336 {
4337 ObjectWriteOperation op;
4338 bufferlist bl;
4339 bl.append("ai");
4340 op.write_full(bl);
4341 ASSERT_EQ(0, cache_ioctx.operate(ai_fp_oid, &op));
4342 }
4343 // write
4344 {
4345 ObjectWriteOperation op;
4346 bufferlist bl;
4347 bl.append("bi");
4348 op.write_full(bl);
4349 ASSERT_EQ(0, cache_ioctx.operate(bi_fp_oid, &op));
4350 }
4351 // write
4352 {
4353 ObjectWriteOperation op;
4354 bufferlist bl;
4355 bl.append("Er");
4356 op.write_full(bl);
4357 ASSERT_EQ(0, cache_ioctx.operate(Er_fp_oid, &op));
4358 }
4359 // write
4360 {
4361 ObjectWriteOperation op;
4362 bufferlist bl;
4363 bl.append("Hi");
4364 op.write_full(bl);
4365 ASSERT_EQ(0, cache_ioctx.operate(Hi_fp_oid, &op));
4366 }
4367 // write
4368 {
4369 ObjectWriteOperation op;
4370 bufferlist bl;
4371 bl.append("Si");
4372 op.write_full(bl);
4373 ASSERT_EQ(0, cache_ioctx.operate(Si_fp_oid, &op));
4374 }
4375
4376 // set-chunk (dedup)
4377 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, er_fp_oid, "foo");
4378 // set-chunk (dedup)
4379 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, hi_fp_oid, "foo");
4380 // set-chunk (dedup)
4381 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, HI_fp_oid, "foo");
4382
4383 // foo head: [er] [hi] [HI]
4384
4385 // create a snapshot, clone
4386 vector<uint64_t> my_snaps(1);
4387 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
4388 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4389 my_snaps));
4390
4391
4392 // foo snap[0]: [er] [hi] [HI]
4393 // foo head : [er] [ai] [HI]
4394 // create a clone
4395 {
4396 bufferlist bl;
4397 bl.append("a");
4398 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 6));
4399 }
4400 // write
4401 {
4402 bufferlist bl;
4403 bl.append("a");
4404 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 6));
4405 }
4406
4407 // set-chunk (dedup)
4408 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, ai_fp_oid, "foo");
4409
4410 // foo snap[0]: [er] [hi] [HI]
4411 // foo head : [er] [bi] [HI]
4412 // create a clone
4413 {
4414 bufferlist bl;
4415 bl.append("b");
4416 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 6));
4417 }
4418 // write
4419 {
4420 bufferlist bl;
4421 bl.append("b");
4422 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 6));
4423 }
4424
4425 // set-chunk (dedup)
4426 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, bi_fp_oid, "foo");
4427
4428 sleep(10);
4429
4430 // check chunk's refcount
4431 // [ai]'s refcount should be 0
4432 {
4433 bufferlist t;
4434 SHA1 sha1_gen;
4435 int size = strlen("ai");
4436 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
4437 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
4438 sha1_gen.Update((const unsigned char *)"ai", size);
4439 sha1_gen.Final(fingerprint);
4440 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
4441 is_intended_refcount_state(ioctx, "foo", cache_ioctx, p_str, 0);
4442 }
4443
4444 // foo snap[0]: [er] [hi] [HI]
4445 // foo head : [Er] [Hi] [Si]
4446 // create a clone
4447 {
4448 bufferlist bl;
4449 bl.append("thEre HiSi");
4450 ObjectWriteOperation op;
4451 op.write_full(bl);
4452 ASSERT_EQ(0, ioctx.operate("foo", &op));
4453 }
4454 // write
4455 {
4456 bufferlist bl;
4457 bl.append("thEre HiSi");
4458 ObjectWriteOperation op;
4459 op.write_full(bl);
4460 ASSERT_EQ(0, ioctx.operate("foo", &op));
4461 }
4462
4463 // set-chunk (dedup)
4464 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, Er_fp_oid, "foo");
4465 // set-chunk (dedup)
4466 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, Hi_fp_oid, "foo");
4467 // set-chunk (dedup)
4468 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, Si_fp_oid, "foo");
4469
4470 // foo snap[0]: [er] [hi] [HI]
4471 // foo head : [ER] [HI] [SI]
4472 // write
4473 {
4474 bufferlist bl;
4475 bl.append("thERe HISI");
4476 ObjectWriteOperation op;
4477 op.write_full(bl);
4478 ASSERT_EQ(0, ioctx.operate("foo", &op));
4479 }
4480
4481 sleep(10);
4482
4483 // check chunk's refcount
4484 // [Er]'s refcount should be 0
4485 {
4486 bufferlist t;
4487 SHA1 sha1_gen;
4488 int size = strlen("Er");
4489 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
4490 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
4491 sha1_gen.Update((const unsigned char *)"Er", size);
4492 sha1_gen.Final(fingerprint);
4493 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
4494 is_intended_refcount_state(ioctx, "foo", cache_ioctx, p_str, 0);
4495 }
4496 }
4497
4498 TEST_F(LibRadosTwoPoolsPP, ManifestSnapIncCount) {
4499 // skip test if not yet octopus
4500 if (_get_required_osd_release(cluster) < "octopus") {
4501 cout << "cluster is not yet octopus, skipping test" << std::endl;
4502 return;
4503 }
4504
4505 // create object
4506 {
4507 bufferlist bl;
4508 bl.append("there hiHI");
4509 ObjectWriteOperation op;
4510 op.write_full(bl);
4511 ASSERT_EQ(0, ioctx.operate("foo", &op));
4512 }
4513 {
4514 bufferlist bl;
4515 bl.append("there hiHI");
4516 ObjectWriteOperation op;
4517 op.write_full(bl);
4518 ASSERT_EQ(0, cache_ioctx.operate("chunk1", &op));
4519 }
4520 {
4521 bufferlist bl;
4522 bl.append("there hiHI");
4523 ObjectWriteOperation op;
4524 op.write_full(bl);
4525 ASSERT_EQ(0, cache_ioctx.operate("chunk2", &op));
4526 }
4527 {
4528 bufferlist bl;
4529 bl.append("there hiHI");
4530 ObjectWriteOperation op;
4531 op.write_full(bl);
4532 ASSERT_EQ(0, cache_ioctx.operate("chunk3", &op));
4533 }
4534 {
4535 bufferlist bl;
4536 bl.append("there hiHI");
4537 ObjectWriteOperation op;
4538 op.write_full(bl);
4539 ASSERT_EQ(0, cache_ioctx.operate("chunk4", &op));
4540 }
4541
4542 // wait for maps to settle
4543 cluster.wait_for_latest_osdmap();
4544
4545 // create a snapshot, clone
4546 vector<uint64_t> my_snaps(1);
4547 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
4548 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4549 my_snaps));
4550
4551 {
4552 bufferlist bl;
4553 bl.append("there hiHI");
4554 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
4555 }
4556
4557 my_snaps.resize(2);
4558 my_snaps[1] = my_snaps[0];
4559 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
4560 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4561 my_snaps));
4562
4563 {
4564 bufferlist bl;
4565 bl.append("there hiHI");
4566 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
4567 }
4568
4569 // set-chunk
4570 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, "chunk1", "foo");
4571 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, "chunk4", "foo");
4572 // foo snap[1]:
4573 // foo snap[0]:
4574 // foo head : [chunk1] [chunk4]
4575
4576 ioctx.snap_set_read(my_snaps[1]);
4577 // set-chunk
4578 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, "chunk2", "foo");
4579 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, "chunk4", "foo");
4580 // foo snap[1]: [chunk2] [chunk4]
4581 // foo snap[0]:
4582 // foo head : [chunk1] [chunk4]
4583
4584 ioctx.snap_set_read(my_snaps[0]);
4585 // set-chunk
4586 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, "chunk2", "foo");
4587 // foo snap[1]: [chunk2] [chunk4]
4588 // foo snap[0]: [chunk2]
4589 // foo head : [chunk1] [chunk4]
4590
4591 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, "chunk3", "foo");
4592 // foo snap[1]: [chunk2] [chunk4]
4593 // foo snap[0]: [chunk3] [chunk2]
4594 // foo head : [chunk1] [chunk4]
4595 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, "chunk4", "foo");
4596 // foo snap[1]: [chunk2] [chunk4]
4597 // foo snap[0]: [chunk3] [chunk2] [chunk4]
4598 // foo head : [chunk1] [chunk4]
4599
4600 // check chunk's refcount
4601 check_fp_oid_refcount(cache_ioctx, "chunk1", 1u, "");
4602
4603 // check chunk's refcount
4604 check_fp_oid_refcount(cache_ioctx, "chunk2", 1u, "");
4605
4606 // check chunk's refcount
4607 check_fp_oid_refcount(cache_ioctx, "chunk3", 1u, "");
4608 sleep(10);
4609
4610 // check chunk's refcount
4611 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk4", 1);
4612 }
4613
4614 TEST_F(LibRadosTwoPoolsPP, ManifestEvict) {
4615 // skip test if not yet octopus
4616 if (_get_required_osd_release(cluster) < "octopus") {
4617 cout << "cluster is not yet octopus, skipping test" << std::endl;
4618 return;
4619 }
4620
4621 // create object
4622 {
4623 bufferlist bl;
4624 bl.append("there hiHI");
4625 ObjectWriteOperation op;
4626 op.write_full(bl);
4627 ASSERT_EQ(0, ioctx.operate("foo", &op));
4628 }
4629 {
4630 bufferlist bl;
4631 bl.append("there hiHI");
4632 ObjectWriteOperation op;
4633 op.write_full(bl);
4634 ASSERT_EQ(0, cache_ioctx.operate("chunk1", &op));
4635 }
4636 {
4637 bufferlist bl;
4638 bl.append("there hiHI");
4639 ObjectWriteOperation op;
4640 op.write_full(bl);
4641 ASSERT_EQ(0, cache_ioctx.operate("chunk2", &op));
4642 }
4643 {
4644 bufferlist bl;
4645 bl.append("there hiHI");
4646 ObjectWriteOperation op;
4647 op.write_full(bl);
4648 ASSERT_EQ(0, cache_ioctx.operate("chunk3", &op));
4649 }
4650 {
4651 bufferlist bl;
4652 bl.append("there hiHI");
4653 ObjectWriteOperation op;
4654 op.write_full(bl);
4655 ASSERT_EQ(0, cache_ioctx.operate("chunk4", &op));
4656 }
4657
4658 // wait for maps to settle
4659 cluster.wait_for_latest_osdmap();
4660
4661 // create a snapshot, clone
4662 vector<uint64_t> my_snaps(1);
4663 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
4664 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4665 my_snaps));
4666
4667 {
4668 bufferlist bl;
4669 bl.append("there hiHI");
4670 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
4671 }
4672
4673 my_snaps.resize(2);
4674 my_snaps[1] = my_snaps[0];
4675 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
4676 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4677 my_snaps));
4678
4679 {
4680 bufferlist bl;
4681 bl.append("there hiHI");
4682 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
4683 }
4684
4685 // set-chunk
4686 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, "chunk1", "foo");
4687 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, "chunk4", "foo");
4688 // foo snap[1]:
4689 // foo snap[0]:
4690 // foo head : [chunk1] [chunk4]
4691
4692 ioctx.snap_set_read(my_snaps[1]);
4693 // set-chunk
4694 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 10, "chunk2", "foo");
4695 // foo snap[1]: [ chunk2 ]
4696 // foo snap[0]:
4697 // foo head : [chunk1] [chunk4]
4698
4699 ioctx.snap_set_read(my_snaps[0]);
4700 // set-chunk
4701 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, "chunk2", "foo");
4702 // foo snap[1]: [ chunk2 ]
4703 // foo snap[0]: [chunk2]
4704 // foo head : [chunk1] [chunk4]
4705
4706 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, "chunk3", "foo");
4707 // foo snap[1]: [ chunk2 ]
4708 // foo snap[0]: [chunk3] [chunk2]
4709 // foo head : [chunk1] [chunk4]
4710 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, "chunk4", "foo");
4711 // foo snap[1]: [ chunk2 ]
4712 // foo snap[0]: [chunk3] [chunk2] [chunk4]
4713 // foo head : [chunk1] [chunk4]
4714 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 2, "chunk4", "foo");
4715 // foo snap[1]: [ chunk2 ]
4716 // foo snap[0]: [chunk4] [chunk3] [chunk2] [chunk4]
4717 // foo head : [chunk1] [chunk4]
4718 manifest_set_chunk(cluster, cache_ioctx, ioctx, 4, 2, "chunk1", "foo");
4719 // foo snap[1]: [ chunk2 ]
4720 // foo snap[0]: [chunk4] [chunk3] [chunk1] [chunk2] [chunk4]
4721 // foo head : [chunk1] [chunk4]
4722
4723 {
4724 ObjectReadOperation op, stat_op;
4725 uint64_t size;
4726 op.tier_evict();
4727 librados::AioCompletion *completion = cluster.aio_create_completion();
4728 ASSERT_EQ(0, ioctx.aio_operate(
4729 "foo", completion, &op,
4730 librados::OPERATION_IGNORE_OVERLAY, NULL));
4731 completion->wait_for_complete();
4732 ASSERT_EQ(0, completion->get_return_value());
4733
4734 stat_op.stat(&size, NULL, NULL);
4735 ASSERT_EQ(0, ioctx.operate("foo", &stat_op, NULL));
4736 ASSERT_EQ(10, size);
4737 }
4738
4739 ioctx.snap_set_read(librados::SNAP_HEAD);
4740 {
4741 ObjectReadOperation op, stat_op;
4742 uint64_t size;
4743 op.tier_evict();
4744 librados::AioCompletion *completion = cluster.aio_create_completion();
4745 ASSERT_EQ(0, ioctx.aio_operate(
4746 "foo", completion, &op,
4747 librados::OPERATION_IGNORE_OVERLAY, NULL));
4748 completion->wait_for_complete();
4749 ASSERT_EQ(0, completion->get_return_value());
4750
4751 stat_op.stat(&size, NULL, NULL);
4752 ASSERT_EQ(0, ioctx.operate("foo", &stat_op, NULL));
4753 ASSERT_EQ(strlen("there hiHI"), size);
4754 }
4755
4756 }
4757
4758 TEST_F(LibRadosTwoPoolsPP, ManifestEvictPromote) {
4759 // skip test if not yet octopus
4760 if (_get_required_osd_release(cluster) < "octopus") {
4761 cout << "cluster is not yet octopus, skipping test" << std::endl;
4762 return;
4763 }
4764
4765 // create object
4766 {
4767 bufferlist bl;
4768 bl.append("there hiHI");
4769 ObjectWriteOperation op;
4770 op.write_full(bl);
4771 ASSERT_EQ(0, ioctx.operate("foo", &op));
4772 }
4773 {
4774 bufferlist bl;
4775 bl.append("EREHT hiHI");
4776 ObjectWriteOperation op;
4777 op.write_full(bl);
4778 ASSERT_EQ(0, cache_ioctx.operate("chunk1", &op));
4779 }
4780 {
4781 bufferlist bl;
4782 bl.append("there hiHI");
4783 ObjectWriteOperation op;
4784 op.write_full(bl);
4785 ASSERT_EQ(0, cache_ioctx.operate("chunk2", &op));
4786 }
4787 {
4788 bufferlist bl;
4789 bl.append("THERE HIHI");
4790 ObjectWriteOperation op;
4791 op.write_full(bl);
4792 ASSERT_EQ(0, cache_ioctx.operate("chunk3", &op));
4793 }
4794
4795 // wait for maps to settle
4796 cluster.wait_for_latest_osdmap();
4797
4798 // create a snapshot, clone
4799 vector<uint64_t> my_snaps(1);
4800 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
4801 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4802 my_snaps));
4803
4804 {
4805 bufferlist bl;
4806 bl.append("there");
4807 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
4808 }
4809
4810 // set-chunk
4811 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 2, "chunk1", "foo");
4812 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, "chunk2", "foo");
4813 // foo snap[0]:
4814 // foo head : [chunk1] [chunk2]
4815
4816 ioctx.snap_set_read(my_snaps[0]);
4817 // set-chunk
4818 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 10, "chunk3", "foo");
4819 // foo snap[0]: [ chunk3 ]
4820 // foo head : [chunk1] [chunk2]
4821
4822
4823 {
4824 ObjectReadOperation op, stat_op;
4825 uint64_t size;
4826 op.tier_evict();
4827 librados::AioCompletion *completion = cluster.aio_create_completion();
4828 ASSERT_EQ(0, ioctx.aio_operate(
4829 "foo", completion, &op,
4830 librados::OPERATION_IGNORE_OVERLAY, NULL));
4831 completion->wait_for_complete();
4832 ASSERT_EQ(0, completion->get_return_value());
4833
4834 stat_op.stat(&size, NULL, NULL);
4835 ASSERT_EQ(0, ioctx.operate("foo", &stat_op, NULL));
4836 ASSERT_EQ(10, size);
4837
4838 }
4839 {
4840 bufferlist bl;
4841 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
4842 ASSERT_EQ('T', bl[0]);
4843 }
4844
4845 ioctx.snap_set_read(librados::SNAP_HEAD);
4846 {
4847 bufferlist bl;
4848 ASSERT_EQ(10, ioctx.read("foo", bl, 10, 0));
4849 ASSERT_EQ('H', bl[8]);
4850 }
4851 }
4852
4853
4854 TEST_F(LibRadosTwoPoolsPP, ManifestSnapSizeMismatch) {
4855 // skip test if not yet octopus
4856 if (_get_required_osd_release(cluster) < "octopus") {
4857 cout << "cluster is not yet octopus, skipping test" << std::endl;
4858 return;
4859 }
4860
4861 // create object
4862 {
4863 bufferlist bl;
4864 bl.append("there hiHI");
4865 ObjectWriteOperation op;
4866 op.write_full(bl);
4867 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
4868 }
4869 {
4870 bufferlist bl;
4871 bl.append("there hiHI");
4872 ObjectWriteOperation op;
4873 op.write_full(bl);
4874 ASSERT_EQ(0, ioctx.operate("chunk1", &op));
4875 }
4876 {
4877 bufferlist bl;
4878 bl.append("there HIHI");
4879 ObjectWriteOperation op;
4880 op.write_full(bl);
4881 ASSERT_EQ(0, ioctx.operate("chunk2", &op));
4882 }
4883
4884 // wait for maps to settle
4885 cluster.wait_for_latest_osdmap();
4886
4887 // create a snapshot, clone
4888 vector<uint64_t> my_snaps(1);
4889 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_create(&my_snaps[0]));
4890 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4891 my_snaps));
4892
4893 {
4894 bufferlist bl;
4895 bl.append("There hiHI");
4896 ASSERT_EQ(0, cache_ioctx.write("foo", bl, bl.length(), 0));
4897 }
4898
4899 my_snaps.resize(2);
4900 my_snaps[1] = my_snaps[0];
4901 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_create(&my_snaps[0]));
4902 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4903 my_snaps));
4904
4905 {
4906 bufferlist bl;
4907 bl.append("tHere hiHI");
4908 ASSERT_EQ(0, cache_ioctx.write("foo", bl, bl.length(), 0));
4909 }
4910
4911 // set-chunk
4912 manifest_set_chunk(cluster, ioctx, cache_ioctx, 0, 10, "chunk1", "foo");
4913
4914 cache_ioctx.snap_set_read(my_snaps[1]);
4915
4916 // set-chunk
4917 manifest_set_chunk(cluster, ioctx, cache_ioctx, 0, 10, "chunk2", "foo");
4918
4919 // evict
4920 {
4921 ObjectReadOperation op, stat_op;
4922 op.tier_evict();
4923 librados::AioCompletion *completion = cluster.aio_create_completion();
4924 ASSERT_EQ(0, cache_ioctx.aio_operate(
4925 "foo", completion, &op,
4926 librados::OPERATION_IGNORE_OVERLAY, NULL));
4927 completion->wait_for_complete();
4928 ASSERT_EQ(0, completion->get_return_value());
4929 }
4930
4931 uint32_t hash;
4932 ASSERT_EQ(0, cache_ioctx.get_object_pg_hash_position2("foo", &hash));
4933
4934 // scrub
4935 {
4936 for (int tries = 0; tries < 5; ++tries) {
4937 bufferlist inbl;
4938 ostringstream ss;
4939 ss << "{\"prefix\": \"pg deep-scrub\", \"pgid\": \""
4940 << cache_ioctx.get_id() << "."
4941 << std::hex << hash
4942 << "\"}";
4943 int r = cluster.mon_command(ss.str(), inbl, NULL, NULL);
4944 if (r == -ENOENT ||
4945 r == -EAGAIN) {
4946 sleep(5);
4947 continue;
4948 }
4949 ASSERT_EQ(0, r);
4950 break;
4951 }
4952 cout << "waiting for scrubs..." << std::endl;
4953 sleep(20);
4954 cout << "done waiting" << std::endl;
4955 }
4956
4957 {
4958 bufferlist bl;
4959 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
4960 ASSERT_EQ('t', bl[0]);
4961 }
4962 }
4963
4964 #include <common/CDC.h>
4965 TEST_F(LibRadosTwoPoolsPP, DedupFlushRead) {
4966 // skip test if not yet octopus
4967 if (_get_required_osd_release(cluster) < "octopus") {
4968 GTEST_SKIP() << "cluster is not yet octopus, skipping test";
4969 }
4970
4971 bufferlist inbl;
4972 ASSERT_EQ(0, cluster.mon_command(
4973 set_pool_str(cache_pool_name, "fingerprint_algorithm", "sha1"),
4974 inbl, NULL, NULL));
4975 ASSERT_EQ(0, cluster.mon_command(
4976 set_pool_str(cache_pool_name, "dedup_tier", pool_name),
4977 inbl, NULL, NULL));
4978 ASSERT_EQ(0, cluster.mon_command(
4979 set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"),
4980 inbl, NULL, NULL));
4981 ASSERT_EQ(0, cluster.mon_command(
4982 set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024),
4983 inbl, NULL, NULL));
4984
4985 // wait for maps to settle
4986 cluster.wait_for_latest_osdmap();
4987
4988 // create object
4989 bufferlist gbl;
4990 {
4991 generate_buffer(1024*8, &gbl);
4992 ObjectWriteOperation op;
4993 op.write_full(gbl);
4994 ASSERT_EQ(0, cache_ioctx.operate("foo-chunk", &op));
4995 }
4996 {
4997 bufferlist bl;
4998 bl.append("DDse chunk");
4999 ObjectWriteOperation op;
5000 op.write_full(bl);
5001 ASSERT_EQ(0, ioctx.operate("bar-chunk", &op));
5002 }
5003
5004 // set-chunk to set manifest object
5005 {
5006 ObjectReadOperation op;
5007 op.set_chunk(0, 2, ioctx, "bar-chunk", 0,
5008 CEPH_OSD_OP_FLAG_WITH_REFERENCE);
5009 librados::AioCompletion *completion = cluster.aio_create_completion();
5010 ASSERT_EQ(0, cache_ioctx.aio_operate("foo-chunk", completion, &op,
5011 librados::OPERATION_IGNORE_CACHE, NULL));
5012 completion->wait_for_complete();
5013 ASSERT_EQ(0, completion->get_return_value());
5014 completion->release();
5015 }
5016 // flush
5017 {
5018 ObjectReadOperation op;
5019 op.tier_flush();
5020 librados::AioCompletion *completion = cluster.aio_create_completion();
5021 ASSERT_EQ(0, cache_ioctx.aio_operate(
5022 "foo-chunk", completion, &op,
5023 librados::OPERATION_IGNORE_CACHE, NULL));
5024 completion->wait_for_complete();
5025 ASSERT_EQ(0, completion->get_return_value());
5026 completion->release();
5027 }
5028
5029 std::unique_ptr<CDC> cdc = CDC::create("fastcdc", cbits(1024)-1);
5030 vector<pair<uint64_t, uint64_t>> chunks;
5031 bufferlist chunk;
5032 cdc->calc_chunks(gbl, &chunks);
5033 chunk.substr_of(gbl, chunks[1].first, chunks[1].second);
5034 string tgt_oid;
5035 {
5036 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0};
5037 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
5038 SHA1 sha1_gen;
5039 int size = chunk.length();
5040 sha1_gen.Update((const unsigned char *)chunk.c_str(), size);
5041 sha1_gen.Final(fingerprint);
5042 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
5043 tgt_oid = string(p_str);
5044 }
5045
5046 // read and verify the chunked object
5047 {
5048 bufferlist test_bl;
5049 ASSERT_EQ(2, ioctx.read(tgt_oid, test_bl, 2, 0));
5050 ASSERT_EQ(test_bl[1], chunk[1]);
5051 }
5052
5053 ASSERT_EQ(0, cluster.mon_command(
5054 set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 512),
5055 inbl, NULL, NULL));
5056 cluster.wait_for_latest_osdmap();
5057
5058 // make a dirty chunks
5059 {
5060 bufferlist bl;
5061 bl.append("hi");
5062 ASSERT_EQ(0, cache_ioctx.write("foo-chunk", bl, bl.length(), 0));
5063 }
5064
5065 // flush
5066 {
5067 ObjectReadOperation op;
5068 op.tier_flush();
5069 librados::AioCompletion *completion = cluster.aio_create_completion();
5070 ASSERT_EQ(0, cache_ioctx.aio_operate(
5071 "foo-chunk", completion, &op,
5072 librados::OPERATION_IGNORE_CACHE, NULL));
5073 completion->wait_for_complete();
5074 ASSERT_EQ(0, completion->get_return_value());
5075 completion->release();
5076 }
5077
5078 cdc = CDC::create("fastcdc", cbits(512)-1);
5079 chunks.clear();
5080 cdc->calc_chunks(gbl, &chunks);
5081 bufferlist chunk_512;
5082 chunk_512.substr_of(gbl, chunks[3].first, chunks[3].second);
5083 {
5084 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0};
5085 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
5086 SHA1 sha1_gen;
5087 int size = chunk_512.length();
5088 sha1_gen.Update((const unsigned char *)chunk_512.c_str(), size);
5089 sha1_gen.Final(fingerprint);
5090 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
5091 tgt_oid = string(p_str);
5092 }
5093
5094 // read and verify the chunked object
5095 {
5096 bufferlist test_bl;
5097 ASSERT_EQ(2, ioctx.read(tgt_oid, test_bl, 2, 0));
5098 ASSERT_EQ(test_bl[1], chunk_512[1]);
5099 }
5100
5101 ASSERT_EQ(0, cluster.mon_command(
5102 set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 16384),
5103 inbl, NULL, NULL));
5104 cluster.wait_for_latest_osdmap();
5105
5106 // make a dirty chunks
5107 {
5108 bufferlist bl;
5109 bl.append("hi");
5110 ASSERT_EQ(0, cache_ioctx.write("foo-chunk", bl, bl.length(), 0));
5111 gbl.begin(0).copy_in(bl.length(), bl);
5112 }
5113 // flush
5114 {
5115 ObjectReadOperation op;
5116 op.tier_flush();
5117 librados::AioCompletion *completion = cluster.aio_create_completion();
5118 ASSERT_EQ(0, cache_ioctx.aio_operate(
5119 "foo-chunk", completion, &op,
5120 librados::OPERATION_IGNORE_CACHE, NULL));
5121 completion->wait_for_complete();
5122 ASSERT_EQ(0, completion->get_return_value());
5123 completion->release();
5124 }
5125
5126 cdc = CDC::create("fastcdc", cbits(16384)-1);
5127 chunks.clear();
5128 cdc->calc_chunks(gbl, &chunks);
5129 bufferlist chunk_16384;
5130 chunk_16384.substr_of(gbl, chunks[0].first, chunks[0].second);
5131 {
5132 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0};
5133 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
5134 SHA1 sha1_gen;
5135 int size = chunk_16384.length();
5136 sha1_gen.Update((const unsigned char *)chunk_16384.c_str(), size);
5137 sha1_gen.Final(fingerprint);
5138 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
5139 tgt_oid = string(p_str);
5140 }
5141 // read and verify the chunked object
5142 {
5143 bufferlist test_bl;
5144 ASSERT_EQ(2, ioctx.read(tgt_oid, test_bl, 2, 0));
5145 ASSERT_EQ(test_bl[0], chunk_16384[0]);
5146 }
5147
5148 // less than object size
5149 ASSERT_EQ(0, cluster.mon_command(
5150 set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024),
5151 inbl, NULL, NULL));
5152 cluster.wait_for_latest_osdmap();
5153
5154 // make a dirty chunks
5155 // a chunk_info is deleted by write, which converts the manifest object to non-manifest object
5156 {
5157 bufferlist bl;
5158 bl.append("hi");
5159 ASSERT_EQ(0, cache_ioctx.write("foo-chunk", bl, bl.length(), 0));
5160 }
5161
5162 // reset set-chunk
5163 {
5164 bufferlist bl;
5165 bl.append("DDse chunk");
5166 ObjectWriteOperation op;
5167 op.write_full(bl);
5168 ASSERT_EQ(0, ioctx.operate("bar-chunk", &op));
5169 }
5170 // set-chunk to set manifest object
5171 {
5172 ObjectReadOperation op;
5173 op.set_chunk(0, 2, ioctx, "bar-chunk", 0,
5174 CEPH_OSD_OP_FLAG_WITH_REFERENCE);
5175 librados::AioCompletion *completion = cluster.aio_create_completion();
5176 ASSERT_EQ(0, cache_ioctx.aio_operate("foo-chunk", completion, &op,
5177 librados::OPERATION_IGNORE_CACHE, NULL));
5178 completion->wait_for_complete();
5179 ASSERT_EQ(0, completion->get_return_value());
5180 completion->release();
5181 }
5182 // flush
5183 {
5184 ObjectReadOperation op;
5185 op.tier_flush();
5186 librados::AioCompletion *completion = cluster.aio_create_completion();
5187 ASSERT_EQ(0, cache_ioctx.aio_operate(
5188 "foo-chunk", completion, &op,
5189 librados::OPERATION_IGNORE_CACHE, NULL));
5190 completion->wait_for_complete();
5191 ASSERT_EQ(0, completion->get_return_value());
5192 completion->release();
5193 }
5194
5195 cdc = CDC::create("fastcdc", cbits(1024)-1);
5196 chunks.clear();
5197 cdc->calc_chunks(gbl, &chunks);
5198 bufferlist small_chunk;
5199 small_chunk.substr_of(gbl, chunks[1].first, chunks[1].second);
5200 {
5201 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0};
5202 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
5203 SHA1 sha1_gen;
5204 int size = small_chunk.length();
5205 sha1_gen.Update((const unsigned char *)small_chunk.c_str(), size);
5206 sha1_gen.Final(fingerprint);
5207 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
5208 tgt_oid = string(p_str);
5209 }
5210 // read and verify the chunked object
5211 {
5212 bufferlist test_bl;
5213 ASSERT_EQ(2, ioctx.read(tgt_oid, test_bl, 2, 0));
5214 ASSERT_EQ(test_bl[0], small_chunk[0]);
5215 }
5216
5217 }
5218
5219 TEST_F(LibRadosTwoPoolsPP, ManifestFlushSnap) {
5220 // skip test if not yet octopus
5221 if (_get_required_osd_release(cluster) < "octopus") {
5222 cout << "cluster is not yet octopus, skipping test" << std::endl;
5223 return;
5224 }
5225
5226 bufferlist inbl;
5227 ASSERT_EQ(0, cluster.mon_command(
5228 set_pool_str(cache_pool_name, "fingerprint_algorithm", "sha1"),
5229 inbl, NULL, NULL));
5230 ASSERT_EQ(0, cluster.mon_command(
5231 set_pool_str(cache_pool_name, "dedup_tier", pool_name),
5232 inbl, NULL, NULL));
5233 ASSERT_EQ(0, cluster.mon_command(
5234 set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"),
5235 inbl, NULL, NULL));
5236 ASSERT_EQ(0, cluster.mon_command(
5237 set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024),
5238 inbl, NULL, NULL));
5239
5240 // wait for maps to settle
5241 cluster.wait_for_latest_osdmap();
5242
5243 // create object
5244 bufferlist gbl;
5245 {
5246 //bufferlist bl;
5247 //bl.append("there hi");
5248 generate_buffer(1024*8, &gbl);
5249 ObjectWriteOperation op;
5250 op.write_full(gbl);
5251 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
5252 }
5253 {
5254 bufferlist bl;
5255 bl.append("there hi");
5256 ObjectWriteOperation op;
5257 op.write_full(bl);
5258 ASSERT_EQ(0, ioctx.operate("bar", &op));
5259 }
5260
5261 // set-chunk (dedup)
5262 manifest_set_chunk(cluster, ioctx, cache_ioctx, 2, 2, "bar", "foo");
5263 manifest_set_chunk(cluster, ioctx, cache_ioctx, 6, 2, "bar", "foo");
5264
5265 // create a snapshot, clone
5266 vector<uint64_t> my_snaps(1);
5267 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_create(&my_snaps[0]));
5268 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
5269 my_snaps));
5270
5271 // make a dirty chunks
5272 {
5273 bufferlist bl;
5274 bl.append("Thbbe");
5275 ASSERT_EQ(0, cache_ioctx.write("foo", bl, bl.length(), 0));
5276 }
5277
5278 // and another
5279 my_snaps.resize(2);
5280 my_snaps[1] = my_snaps[0];
5281 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_create(&my_snaps[0]));
5282 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
5283 my_snaps));
5284
5285 // make a dirty chunks
5286 {
5287 bufferlist bl;
5288 bl.append("Thcce");
5289 ASSERT_EQ(0, cache_ioctx.write("foo", bl, bl.length(), 0));
5290 }
5291
5292 // flush on head (should fail)
5293 cache_ioctx.snap_set_read(librados::SNAP_HEAD);
5294 // flush
5295 {
5296 ObjectReadOperation op;
5297 op.tier_flush();
5298 librados::AioCompletion *completion = cluster.aio_create_completion();
5299 ASSERT_EQ(0, cache_ioctx.aio_operate(
5300 "foo", completion, &op,
5301 librados::OPERATION_IGNORE_CACHE, NULL));
5302 completion->wait_for_complete();
5303 ASSERT_EQ(-EBUSY, completion->get_return_value());
5304 completion->release();
5305 }
5306
5307 // flush on recent snap (should fail)
5308 cache_ioctx.snap_set_read(my_snaps[0]);
5309 {
5310 ObjectReadOperation op;
5311 op.tier_flush();
5312 librados::AioCompletion *completion = cluster.aio_create_completion();
5313 ASSERT_EQ(0, cache_ioctx.aio_operate(
5314 "foo", completion, &op,
5315 librados::OPERATION_IGNORE_CACHE, NULL));
5316 completion->wait_for_complete();
5317 ASSERT_EQ(-EBUSY, completion->get_return_value());
5318 completion->release();
5319 }
5320
5321 // flush on oldest snap
5322 cache_ioctx.snap_set_read(my_snaps[1]);
5323 {
5324 ObjectReadOperation op;
5325 op.tier_flush();
5326 librados::AioCompletion *completion = cluster.aio_create_completion();
5327 ASSERT_EQ(0, cache_ioctx.aio_operate(
5328 "foo", completion, &op,
5329 librados::OPERATION_IGNORE_CACHE, NULL));
5330 completion->wait_for_complete();
5331 ASSERT_EQ(0, completion->get_return_value());
5332 completion->release();
5333 }
5334
5335 // flush on oldest snap
5336 cache_ioctx.snap_set_read(my_snaps[0]);
5337 {
5338 ObjectReadOperation op;
5339 op.tier_flush();
5340 librados::AioCompletion *completion = cluster.aio_create_completion();
5341 ASSERT_EQ(0, cache_ioctx.aio_operate(
5342 "foo", completion, &op,
5343 librados::OPERATION_IGNORE_CACHE, NULL));
5344 completion->wait_for_complete();
5345 ASSERT_EQ(0, completion->get_return_value());
5346 completion->release();
5347 }
5348
5349 // flush on oldest snap
5350 cache_ioctx.snap_set_read(librados::SNAP_HEAD);
5351 {
5352 ObjectReadOperation op;
5353 op.tier_flush();
5354 librados::AioCompletion *completion = cluster.aio_create_completion();
5355 ASSERT_EQ(0, cache_ioctx.aio_operate(
5356 "foo", completion, &op,
5357 librados::OPERATION_IGNORE_CACHE, NULL));
5358 completion->wait_for_complete();
5359 ASSERT_EQ(0, completion->get_return_value());
5360 completion->release();
5361 }
5362
5363 // check chunk's refcount
5364 std::unique_ptr<CDC> cdc = CDC::create("fastcdc", cbits(1024)-1);
5365 vector<pair<uint64_t, uint64_t>> chunks;
5366 bufferlist chunk;
5367 cdc->calc_chunks(gbl, &chunks);
5368 chunk.substr_of(gbl, chunks[1].first, chunks[1].second);
5369 string tgt_oid;
5370 {
5371 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0};
5372 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
5373 SHA1 sha1_gen;
5374 int size = chunk.length();
5375 sha1_gen.Update((const unsigned char *)chunk.c_str(), size);
5376 sha1_gen.Final(fingerprint);
5377 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
5378 tgt_oid = string(p_str);
5379 }
5380 // read and verify the chunked object
5381 {
5382 bufferlist test_bl;
5383 ASSERT_EQ(2, ioctx.read(tgt_oid, test_bl, 2, 0));
5384 ASSERT_EQ(test_bl[1], chunk[1]);
5385 }
5386
5387 cache_ioctx.snap_set_read(librados::SNAP_HEAD);
5388 {
5389 bufferlist bl;
5390 ASSERT_EQ(4, cache_ioctx.read("foo", bl, 4, 0));
5391 ASSERT_EQ('c', bl[2]);
5392 }
5393
5394 cache_ioctx.snap_set_read(my_snaps[0]);
5395 {
5396 bufferlist bl;
5397 ASSERT_EQ(4, cache_ioctx.read("foo", bl, 4, 0));
5398 ASSERT_EQ('b', bl[2]);
5399 }
5400 }
5401
5402 TEST_F(LibRadosTwoPoolsPP, ManifestFlushDupCount) {
5403 // skip test if not yet octopus
5404 if (_get_required_osd_release(cluster) < "octopus") {
5405 cout << "cluster is not yet octopus, skipping test" << std::endl;
5406 return;
5407 }
5408
5409 bufferlist inbl;
5410 ASSERT_EQ(0, cluster.mon_command(
5411 set_pool_str(cache_pool_name, "fingerprint_algorithm", "sha1"),
5412 inbl, NULL, NULL));
5413 ASSERT_EQ(0, cluster.mon_command(
5414 set_pool_str(cache_pool_name, "dedup_tier", pool_name),
5415 inbl, NULL, NULL));
5416 ASSERT_EQ(0, cluster.mon_command(
5417 set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"),
5418 inbl, NULL, NULL));
5419 ASSERT_EQ(0, cluster.mon_command(
5420 set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024),
5421 inbl, NULL, NULL));
5422
5423 // create object
5424 bufferlist gbl;
5425 {
5426 //bufferlist bl;
5427 generate_buffer(1024*8, &gbl);
5428 ObjectWriteOperation op;
5429 op.write_full(gbl);
5430 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
5431 }
5432 {
5433 bufferlist bl;
5434 bl.append("there hiHI");
5435 ObjectWriteOperation op;
5436 op.write_full(bl);
5437 ASSERT_EQ(0, ioctx.operate("bar", &op));
5438 }
5439
5440 // wait for maps to settle
5441 cluster.wait_for_latest_osdmap();
5442
5443 // set-chunk to set manifest object
5444 {
5445 ObjectReadOperation op;
5446 op.set_chunk(0, 2, ioctx, "bar", 0,
5447 CEPH_OSD_OP_FLAG_WITH_REFERENCE);
5448 librados::AioCompletion *completion = cluster.aio_create_completion();
5449 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
5450 librados::OPERATION_IGNORE_CACHE, NULL));
5451 completion->wait_for_complete();
5452 ASSERT_EQ(0, completion->get_return_value());
5453 completion->release();
5454 }
5455
5456 // create a snapshot, clone
5457 vector<uint64_t> my_snaps(1);
5458 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_create(&my_snaps[0]));
5459 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
5460 my_snaps));
5461
5462 // make a dirty chunks
5463 {
5464 bufferlist bl;
5465 bl.append("Thbbe hi");
5466 ASSERT_EQ(0, cache_ioctx.write("foo", bl, bl.length(), 0));
5467 }
5468
5469 // and another
5470 my_snaps.resize(2);
5471 my_snaps[1] = my_snaps[0];
5472 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_create(&my_snaps[0]));
5473 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
5474 my_snaps));
5475
5476 // make a dirty chunks
5477 {
5478 bufferlist bl;
5479 bl.append("Thcce hi");
5480 ASSERT_EQ(0, cache_ioctx.write("foo", bl, bl.length(), 0));
5481 }
5482
5483 //flush on oldest snap
5484 cache_ioctx.snap_set_read(my_snaps[1]);
5485 // flush
5486 {
5487 ObjectReadOperation op;
5488 op.tier_flush();
5489 librados::AioCompletion *completion = cluster.aio_create_completion();
5490 ASSERT_EQ(0, cache_ioctx.aio_operate(
5491 "foo", completion, &op,
5492 librados::OPERATION_IGNORE_CACHE, NULL));
5493 completion->wait_for_complete();
5494 ASSERT_EQ(0, completion->get_return_value());
5495 completion->release();
5496 }
5497
5498 // flush on oldest snap
5499 cache_ioctx.snap_set_read(my_snaps[0]);
5500 // flush
5501 {
5502 ObjectReadOperation op;
5503 op.tier_flush();
5504 librados::AioCompletion *completion = cluster.aio_create_completion();
5505 ASSERT_EQ(0, cache_ioctx.aio_operate(
5506 "foo", completion, &op,
5507 librados::OPERATION_IGNORE_CACHE, NULL));
5508 completion->wait_for_complete();
5509 ASSERT_EQ(0, completion->get_return_value());
5510 completion->release();
5511 }
5512
5513 cache_ioctx.snap_set_read(librados::SNAP_HEAD);
5514 // flush
5515 {
5516 ObjectReadOperation op;
5517 op.tier_flush();
5518 librados::AioCompletion *completion = cluster.aio_create_completion();
5519 ASSERT_EQ(0, cache_ioctx.aio_operate(
5520 "foo", completion, &op,
5521 librados::OPERATION_IGNORE_CACHE, NULL));
5522 completion->wait_for_complete();
5523 ASSERT_EQ(0, completion->get_return_value());
5524 completion->release();
5525 }
5526
5527 std::unique_ptr<CDC> cdc = CDC::create("fastcdc", cbits(1024)-1);
5528 vector<pair<uint64_t, uint64_t>> chunks;
5529 bufferlist chunk;
5530 cdc->calc_chunks(gbl, &chunks);
5531 chunk.substr_of(gbl, chunks[1].first, chunks[1].second);
5532 string tgt_oid;
5533 // check chunk's refcount
5534 {
5535 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0};
5536 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
5537 bufferlist t;
5538 SHA1 sha1_gen;
5539 int size = chunk.length();
5540 sha1_gen.Update((const unsigned char *)chunk.c_str(), size);
5541 sha1_gen.Final(fingerprint);
5542 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
5543 tgt_oid = string(p_str);
5544 ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
5545 chunk_refs_t refs;
5546 try {
5547 auto iter = t.cbegin();
5548 decode(refs, iter);
5549 } catch (buffer::error& err) {
5550 ASSERT_TRUE(0);
5551 }
5552 ASSERT_LE(1u, refs.count());
5553 }
5554
5555 bufferlist chunk2;
5556 chunk2.substr_of(gbl, chunks[0].first, chunks[0].second);
5557 // check chunk's refcount
5558 {
5559 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0};
5560 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
5561 bufferlist t;
5562 SHA1 sha1_gen;
5563 int size = chunk2.length();
5564 sha1_gen.Update((const unsigned char *)chunk2.c_str(), size);
5565 sha1_gen.Final(fingerprint);
5566 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
5567 tgt_oid = string(p_str);
5568 ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
5569 chunk_refs_t refs;
5570 try {
5571 auto iter = t.cbegin();
5572 decode(refs, iter);
5573 } catch (buffer::error& err) {
5574 ASSERT_TRUE(0);
5575 }
5576 ASSERT_LE(1u, refs.count());
5577 }
5578
5579 // make a dirty chunks
5580 {
5581 bufferlist bl;
5582 bl.append("ThDDe hi");
5583 ASSERT_EQ(0, cache_ioctx.write("foo", bl, bl.length(), 0));
5584 }
5585
5586 // flush
5587 {
5588 ObjectReadOperation op;
5589 op.tier_flush();
5590 librados::AioCompletion *completion = cluster.aio_create_completion();
5591 ASSERT_EQ(0, cache_ioctx.aio_operate(
5592 "foo", completion, &op,
5593 librados::OPERATION_IGNORE_CACHE, NULL));
5594 completion->wait_for_complete();
5595 ASSERT_EQ(0, completion->get_return_value());
5596 completion->release();
5597 }
5598
5599 bufferlist tmp;
5600 tmp.append("Thcce hi");
5601 gbl.begin(0).copy_in(tmp.length(), tmp);
5602 bufferlist chunk3;
5603 cdc->calc_chunks(gbl, &chunks);
5604 chunk3.substr_of(gbl, chunks[0].first, chunks[0].second);
5605 // check chunk's refcount
5606 {
5607 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0};
5608 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
5609 bufferlist t;
5610 SHA1 sha1_gen;
5611 int size = chunk2.length();
5612 sha1_gen.Update((const unsigned char *)chunk2.c_str(), size);
5613 sha1_gen.Final(fingerprint);
5614 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
5615 is_intended_refcount_state(cache_ioctx, "foo", ioctx, p_str, 0);
5616 }
5617 }
5618
5619 TEST_F(LibRadosTwoPoolsPP, TierFlushDuringFlush) {
5620 // skip test if not yet octopus
5621 if (_get_required_osd_release(cluster) < "octopus") {
5622 cout << "cluster is not yet octopus, skipping test" << std::endl;
5623 return;
5624 }
5625
5626 bufferlist inbl;
5627
5628 // create a new pool
5629 std::string temp_pool_name = get_temp_pool_name() + "-test-flush";
5630 ASSERT_EQ(0, cluster.pool_create(temp_pool_name.c_str()));
5631
5632 ASSERT_EQ(0, cluster.mon_command(
5633 set_pool_str(cache_pool_name, "fingerprint_algorithm", "sha1"),
5634 inbl, NULL, NULL));
5635 ASSERT_EQ(0, cluster.mon_command(
5636 set_pool_str(cache_pool_name, "dedup_tier", temp_pool_name),
5637 inbl, NULL, NULL));
5638 ASSERT_EQ(0, cluster.mon_command(
5639 set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"),
5640 inbl, NULL, NULL));
5641 ASSERT_EQ(0, cluster.mon_command(
5642 set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024),
5643 inbl, NULL, NULL));
5644
5645 // create object
5646 bufferlist gbl;
5647 {
5648 //bufferlist bl;
5649 generate_buffer(1024*8, &gbl);
5650 ObjectWriteOperation op;
5651 op.write_full(gbl);
5652 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
5653 }
5654 {
5655 bufferlist bl;
5656 bl.append("there hiHI");
5657 ObjectWriteOperation op;
5658 op.write_full(bl);
5659 ASSERT_EQ(0, ioctx.operate("bar", &op));
5660 }
5661
5662 // wait for maps to settle
5663 cluster.wait_for_latest_osdmap();
5664
5665 // set-chunk to set manifest object
5666 {
5667 ObjectReadOperation op;
5668 op.set_chunk(0, 2, ioctx, "bar", 0,
5669 CEPH_OSD_OP_FLAG_WITH_REFERENCE);
5670 librados::AioCompletion *completion = cluster.aio_create_completion();
5671 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
5672 librados::OPERATION_IGNORE_CACHE, NULL));
5673 completion->wait_for_complete();
5674 ASSERT_EQ(0, completion->get_return_value());
5675 completion->release();
5676 }
5677
5678 // delete temp pool, so flushing chunk will fail
5679 ASSERT_EQ(0, s_cluster.pool_delete(temp_pool_name.c_str()));
5680
5681 // flush to check if proper error is returned
5682 {
5683 ObjectReadOperation op;
5684 op.tier_flush();
5685 librados::AioCompletion *completion = cluster.aio_create_completion();
5686 ASSERT_EQ(0, cache_ioctx.aio_operate(
5687 "foo", completion, &op,
5688 librados::OPERATION_IGNORE_CACHE, NULL));
5689 completion->wait_for_complete();
5690 ASSERT_EQ(-ENOENT, completion->get_return_value());
5691 completion->release();
5692 }
5693
5694 }
5695
5696 TEST_F(LibRadosTwoPoolsPP, ManifestSnapHasChunk) {
5697 // skip test if not yet octopus
5698 if (_get_required_osd_release(cluster) < "octopus") {
5699 cout << "cluster is not yet octopus, skipping test" << std::endl;
5700 return;
5701 }
5702
5703 bufferlist inbl;
5704 ASSERT_EQ(0, cluster.mon_command(
5705 set_pool_str(pool_name, "fingerprint_algorithm", "sha1"),
5706 inbl, NULL, NULL));
5707 cluster.wait_for_latest_osdmap();
5708
5709 // create object
5710 {
5711 bufferlist bl;
5712 bl.append("there HIHI");
5713 ObjectWriteOperation op;
5714 op.write_full(bl);
5715 ASSERT_EQ(0, ioctx.operate("foo", &op));
5716 }
5717
5718 string er_fp_oid, hi_fp_oid, HI_fp_oid, ai_fp_oid, bi_fp_oid,
5719 Er_fp_oid, Hi_fp_oid, SI_fp_oid;
5720
5721 // get fp_oid
5722 er_fp_oid = get_fp_oid("er", "sha1");
5723 hi_fp_oid = get_fp_oid("hi", "sha1");
5724 HI_fp_oid = get_fp_oid("HI", "sha1");
5725 ai_fp_oid = get_fp_oid("ai", "sha1");
5726 bi_fp_oid = get_fp_oid("bi", "sha1");
5727 Er_fp_oid = get_fp_oid("Er", "sha1");
5728 Hi_fp_oid = get_fp_oid("Hi", "sha1");
5729 SI_fp_oid = get_fp_oid("SI", "sha1");
5730
5731 // write
5732 {
5733 ObjectWriteOperation op;
5734 bufferlist bl;
5735 bl.append("er");
5736 op.write_full(bl);
5737 ASSERT_EQ(0, cache_ioctx.operate(er_fp_oid, &op));
5738 }
5739 // write
5740 {
5741 ObjectWriteOperation op;
5742 bufferlist bl;
5743 bl.append("hi");
5744 op.write_full(bl);
5745 ASSERT_EQ(0, cache_ioctx.operate(hi_fp_oid, &op));
5746 }
5747 // write
5748 {
5749 ObjectWriteOperation op;
5750 bufferlist bl;
5751 bl.append("HI");
5752 op.write_full(bl);
5753 ASSERT_EQ(0, cache_ioctx.operate(HI_fp_oid, &op));
5754 }
5755 // write
5756 {
5757 ObjectWriteOperation op;
5758 bufferlist bl;
5759 bl.append("ai");
5760 op.write_full(bl);
5761 ASSERT_EQ(0, cache_ioctx.operate(ai_fp_oid, &op));
5762 }
5763 // write
5764 {
5765 ObjectWriteOperation op;
5766 bufferlist bl;
5767 bl.append("bi");
5768 op.write_full(bl);
5769 ASSERT_EQ(0, cache_ioctx.operate(bi_fp_oid, &op));
5770 }
5771 // write
5772 {
5773 ObjectWriteOperation op;
5774 bufferlist bl;
5775 bl.append("Er");
5776 op.write_full(bl);
5777 ASSERT_EQ(0, cache_ioctx.operate(Er_fp_oid, &op));
5778 }
5779 // write
5780 {
5781 ObjectWriteOperation op;
5782 bufferlist bl;
5783 bl.append("Hi");
5784 op.write_full(bl);
5785 ASSERT_EQ(0, cache_ioctx.operate(Hi_fp_oid, &op));
5786 }
5787 // write
5788 {
5789 ObjectWriteOperation op;
5790 bufferlist bl;
5791 bl.append("SI");
5792 op.write_full(bl);
5793 ASSERT_EQ(0, cache_ioctx.operate(SI_fp_oid, &op));
5794 }
5795
5796 // set-chunk (dedup)
5797 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, HI_fp_oid, "foo");
5798 // set-chunk (dedup)
5799 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, HI_fp_oid, "foo");
5800
5801 // foo head: [hi] [HI]
5802
5803 // create a snapshot, clone
5804 vector<uint64_t> my_snaps(1);
5805 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
5806 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
5807 my_snaps));
5808
5809
5810 // create a clone
5811 {
5812 bufferlist bl;
5813 bl.append("a");
5814 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 6));
5815 }
5816 // write
5817 {
5818 bufferlist bl;
5819 bl.append("a");
5820 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 6));
5821 }
5822 // write
5823 {
5824 bufferlist bl;
5825 bl.append("S");
5826 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 8));
5827 }
5828
5829 // foo snap[0]: [hi] [HI]
5830 // foo head : [er] [ai] [SI]
5831
5832 // set-chunk (dedup)
5833 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, er_fp_oid, "foo");
5834 // set-chunk (dedup)
5835 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, ai_fp_oid, "foo");
5836 // set-chunk (dedup)
5837 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, SI_fp_oid, "foo");
5838
5839 my_snaps.resize(2);
5840 my_snaps[1] = my_snaps[0];
5841 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
5842 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
5843 my_snaps));
5844
5845 // create a clone
5846 {
5847 bufferlist bl;
5848 bl.append("b");
5849 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 6));
5850 }
5851 // write
5852 {
5853 bufferlist bl;
5854 bl.append("b");
5855 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 6));
5856 }
5857
5858 // foo snap[1]: [HI] [HI]
5859 // foo snap[0]: [er] [ai] [SI]
5860 // foo head : [er] [bi] [SI]
5861
5862 // set-chunk (dedup)
5863 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, bi_fp_oid, "foo");
5864
5865 {
5866 ASSERT_EQ(1, cls_cas_references_chunk(ioctx, "foo", SI_fp_oid));
5867 ASSERT_EQ(1, cls_cas_references_chunk(ioctx, "foo", er_fp_oid));
5868 ASSERT_EQ(1, cls_cas_references_chunk(ioctx, "foo", ai_fp_oid));
5869 ASSERT_EQ(2, cls_cas_references_chunk(ioctx, "foo", HI_fp_oid));
5870 ASSERT_EQ(-ENOLINK, cls_cas_references_chunk(ioctx, "foo", Hi_fp_oid));
5871 }
5872 }
5873
5874 TEST_F(LibRadosTwoPoolsPP, ManifestRollback) {
5875 // skip test if not yet pacific
5876 if (_get_required_osd_release(cluster) < "pacific") {
5877 cout << "cluster is not yet pacific, skipping test" << std::endl;
5878 return;
5879 }
5880
5881 // create object
5882 {
5883 bufferlist bl;
5884 bl.append("CDere hiHI");
5885 ObjectWriteOperation op;
5886 op.write_full(bl);
5887 ASSERT_EQ(0, ioctx.operate("foo", &op));
5888 }
5889 {
5890 bufferlist bl;
5891 bl.append("ABere hiHI");
5892 ObjectWriteOperation op;
5893 op.write_full(bl);
5894 ASSERT_EQ(0, cache_ioctx.operate("chunk1", &op));
5895 }
5896 {
5897 bufferlist bl;
5898 bl.append("CDere hiHI");
5899 ObjectWriteOperation op;
5900 op.write_full(bl);
5901 ASSERT_EQ(0, cache_ioctx.operate("chunk2", &op));
5902 }
5903 {
5904 bufferlist bl;
5905 bl.append("EFere hiHI");
5906 ObjectWriteOperation op;
5907 op.write_full(bl);
5908 ASSERT_EQ(0, cache_ioctx.operate("chunk3", &op));
5909 }
5910
5911 // wait for maps to settle
5912 cluster.wait_for_latest_osdmap();
5913
5914 // create a snapshot, clone
5915 vector<uint64_t> my_snaps(1);
5916 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
5917 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
5918 my_snaps));
5919
5920 {
5921 bufferlist bl;
5922 bl.append("there hiHI");
5923 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
5924 }
5925
5926 my_snaps.resize(2);
5927 my_snaps[1] = my_snaps[0];
5928 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
5929 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
5930 my_snaps));
5931
5932 {
5933 bufferlist bl;
5934 bl.append("thABe hiEF");
5935 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
5936 }
5937
5938 // set-chunk
5939 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, "chunk1", "foo");
5940 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, "chunk3", "foo");
5941 // foo snap[1]:
5942 // foo snap[0]:
5943 // foo head : [chunk1] [chunk3]
5944
5945 ioctx.snap_set_read(my_snaps[1]);
5946 // set-chunk
5947 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 10, "chunk2", "foo");
5948 // foo snap[1]: [ chunk2 ]
5949 // foo snap[0]:
5950 // foo head : [chunk1] [chunk3]
5951
5952 // foo snap[1]: [ chunk2 ]
5953 // foo snap[0]:
5954 // foo head : [chunk1] [chunk3]
5955
5956 ASSERT_EQ(0, ioctx.selfmanaged_snap_rollback("foo", my_snaps[0]));
5957
5958 ioctx.snap_set_read(librados::SNAP_HEAD);
5959 {
5960 bufferlist bl;
5961 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
5962 ASSERT_EQ('t', bl[0]);
5963 }
5964
5965 ASSERT_EQ(0, ioctx.selfmanaged_snap_rollback("foo", my_snaps[1]));
5966
5967 {
5968 bufferlist bl;
5969 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
5970 ASSERT_EQ('C', bl[0]);
5971 }
5972
5973 }
5974
5975 TEST_F(LibRadosTwoPoolsPP, ManifestRollbackRefcount) {
5976 // skip test if not yet pacific
5977 if (_get_required_osd_release(cluster) < "pacific") {
5978 cout << "cluster is not yet pacific, skipping test" << std::endl;
5979 return;
5980 }
5981
5982 // create object
5983 {
5984 bufferlist bl;
5985 bl.append("CDere hiHI");
5986 ObjectWriteOperation op;
5987 op.write_full(bl);
5988 ASSERT_EQ(0, ioctx.operate("foo", &op));
5989 }
5990 {
5991 bufferlist bl;
5992 bl.append("ABere hiHI");
5993 ObjectWriteOperation op;
5994 op.write_full(bl);
5995 ASSERT_EQ(0, cache_ioctx.operate("chunk1", &op));
5996 }
5997 {
5998 bufferlist bl;
5999 bl.append("CDere hiHI");
6000 ObjectWriteOperation op;
6001 op.write_full(bl);
6002 ASSERT_EQ(0, cache_ioctx.operate("chunk2", &op));
6003 }
6004 {
6005 bufferlist bl;
6006 bl.append("EFere hiHI");
6007 ObjectWriteOperation op;
6008 op.write_full(bl);
6009 ASSERT_EQ(0, cache_ioctx.operate("chunk3", &op));
6010 }
6011 {
6012 bufferlist bl;
6013 bl.append("DDDDD hiHI");
6014 ObjectWriteOperation op;
6015 op.write_full(bl);
6016 ASSERT_EQ(0, cache_ioctx.operate("chunk4", &op));
6017 }
6018 {
6019 bufferlist bl;
6020 bl.append("EEEEE hiHI");
6021 ObjectWriteOperation op;
6022 op.write_full(bl);
6023 ASSERT_EQ(0, cache_ioctx.operate("chunk5", &op));
6024 }
6025
6026 // wait for maps to settle
6027 cluster.wait_for_latest_osdmap();
6028
6029 // create a snapshot, clone
6030 vector<uint64_t> my_snaps(1);
6031 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
6032 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
6033 my_snaps));
6034
6035 {
6036 bufferlist bl;
6037 bl.append("there hiHI");
6038 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
6039 }
6040
6041 my_snaps.resize(2);
6042 my_snaps[1] = my_snaps[0];
6043 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
6044 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
6045 my_snaps));
6046
6047 {
6048 bufferlist bl;
6049 bl.append("thABe hiEF");
6050 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
6051 }
6052
6053 // set-chunk
6054 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, "chunk1", "foo");
6055 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, "chunk3", "foo");
6056 // foo snap[1]:
6057 // foo snap[0]:
6058 // foo head : [chunk1] [chunk3]
6059
6060 ioctx.snap_set_read(my_snaps[1]);
6061 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, "chunk4", "foo");
6062 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, "chunk5", "foo");
6063 // foo snap[1]: [chunk4] [chunk5]
6064 // foo snap[0]:
6065 // foo head : [chunk1] [chunk3]
6066
6067 ioctx.snap_set_read(my_snaps[0]);
6068 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 10, "chunk2", "foo");
6069 // foo snap[1]: [chunk4] [chunk5]
6070 // foo snap[0]: [ chunk2 ]
6071 // foo head : [chunk1] [chunk3]
6072
6073 ASSERT_EQ(0, ioctx.selfmanaged_snap_rollback("foo", my_snaps[1]));
6074 // foo snap[1]: [chunk4] [chunk5]
6075 // foo snap[0]: [ chunk2 ]
6076 // foo head : [chunk4] [chunk5] <-- will contain these contents
6077
6078 sleep(10);
6079 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk1", 0);
6080 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk3", 0);
6081
6082 ioctx.selfmanaged_snap_remove(my_snaps[1]);
6083 sleep(10);
6084 // foo snap[1]:
6085 // foo snap[0]: [ chunk2 ]
6086 // foo head : [chunk4] [chunk5]
6087 ioctx.snap_set_read(librados::SNAP_HEAD);
6088 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk4", 1);
6089 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk5", 1);
6090
6091 {
6092 bufferlist bl;
6093 bl.append("thABe hiEF");
6094 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
6095 }
6096 // foo snap[1]:
6097 // foo snap[0]: [ chunk2 ]
6098 // foo head :
6099 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk1", 0);
6100 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk3", 0);
6101 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk4", 0);
6102 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk5", 0);
6103 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk2", 1);
6104 }
6105
6106 TEST_F(LibRadosTwoPoolsPP, ManifestEvictRollback) {
6107 // skip test if not yet pacific
6108 if (_get_required_osd_release(cluster) < "pacific") {
6109 cout << "cluster is not yet pacific, skipping test" << std::endl;
6110 return;
6111 }
6112
6113 // create object
6114 {
6115 bufferlist bl;
6116 bl.append("CDere hiHI");
6117 ObjectWriteOperation op;
6118 op.write_full(bl);
6119 ASSERT_EQ(0, ioctx.operate("foo", &op));
6120 }
6121 {
6122 bufferlist bl;
6123 bl.append("ABere hiHI");
6124 ObjectWriteOperation op;
6125 op.write_full(bl);
6126 ASSERT_EQ(0, cache_ioctx.operate("chunk1", &op));
6127 }
6128 {
6129 bufferlist bl;
6130 bl.append("CDere hiHI");
6131 ObjectWriteOperation op;
6132 op.write_full(bl);
6133 ASSERT_EQ(0, cache_ioctx.operate("chunk2", &op));
6134 }
6135 {
6136 bufferlist bl;
6137 bl.append("EFere hiHI");
6138 ObjectWriteOperation op;
6139 op.write_full(bl);
6140 ASSERT_EQ(0, cache_ioctx.operate("chunk3", &op));
6141 }
6142
6143 // wait for maps to settle
6144 cluster.wait_for_latest_osdmap();
6145
6146 // create a snapshot, clone
6147 vector<uint64_t> my_snaps(1);
6148 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
6149 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
6150 my_snaps));
6151
6152 {
6153 bufferlist bl;
6154 bl.append("there hiHI");
6155 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
6156 }
6157
6158
6159 // set-chunk
6160 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, "chunk1", "foo");
6161 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, "chunk3", "foo");
6162 // foo snap[0]:
6163 // foo head : [chunk1] [chunk3]
6164
6165 ioctx.snap_set_read(my_snaps[0]);
6166 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 10, "chunk2", "foo");
6167 // foo snap[0]: [ chunk2 ]
6168 // foo head : [chunk1] [chunk3]
6169
6170 sleep(10);
6171 ioctx.snap_set_read(librados::SNAP_HEAD);
6172 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk1", 1);
6173 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk3", 1);
6174
6175
6176 ioctx.snap_set_read(my_snaps[0]);
6177 // evict--this makes the chunk missing state
6178 {
6179 ObjectReadOperation op, stat_op;
6180 op.tier_evict();
6181 librados::AioCompletion *completion = cluster.aio_create_completion();
6182 ASSERT_EQ(0, ioctx.aio_operate(
6183 "foo", completion, &op,
6184 librados::OPERATION_IGNORE_OVERLAY, NULL));
6185 completion->wait_for_complete();
6186 ASSERT_EQ(0, completion->get_return_value());
6187 }
6188
6189 // rollback to my_snaps[0]
6190 ASSERT_EQ(0, ioctx.selfmanaged_snap_rollback("foo", my_snaps[0]));
6191
6192 ioctx.snap_set_read(librados::SNAP_HEAD);
6193 {
6194 bufferlist bl;
6195 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
6196 ASSERT_EQ('C', bl[0]);
6197 }
6198
6199 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk2", 1);
6200 }
6201
6202 class LibRadosTwoPoolsECPP : public RadosTestECPP
6203 {
6204 public:
6205 LibRadosTwoPoolsECPP() {};
6206 ~LibRadosTwoPoolsECPP() override {};
6207 protected:
6208 static void SetUpTestCase() {
6209 pool_name = get_temp_pool_name();
6210 ASSERT_EQ("", create_one_ec_pool_pp(pool_name, s_cluster));
6211 }
6212 static void TearDownTestCase() {
6213 ASSERT_EQ(0, destroy_one_ec_pool_pp(pool_name, s_cluster));
6214 }
6215 static std::string cache_pool_name;
6216
6217 void SetUp() override {
6218 cache_pool_name = get_temp_pool_name();
6219 ASSERT_EQ(0, s_cluster.pool_create(cache_pool_name.c_str()));
6220 RadosTestECPP::SetUp();
6221
6222 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
6223 cache_ioctx.application_enable("rados", true);
6224 cache_ioctx.set_namespace(nspace);
6225 }
6226 void TearDown() override {
6227 // flush + evict cache
6228 flush_evict_all(cluster, cache_ioctx);
6229
6230 bufferlist inbl;
6231 // tear down tiers
6232 ASSERT_EQ(0, cluster.mon_command(
6233 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
6234 "\"}",
6235 inbl, NULL, NULL));
6236 ASSERT_EQ(0, cluster.mon_command(
6237 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
6238 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
6239 inbl, NULL, NULL));
6240
6241 // wait for maps to settle before next test
6242 cluster.wait_for_latest_osdmap();
6243
6244 RadosTestECPP::TearDown();
6245
6246 cleanup_default_namespace(cache_ioctx);
6247 cleanup_namespace(cache_ioctx, nspace);
6248
6249 cache_ioctx.close();
6250 ASSERT_EQ(0, s_cluster.pool_delete(cache_pool_name.c_str()));
6251 }
6252
6253 librados::IoCtx cache_ioctx;
6254 };
6255
6256 std::string LibRadosTwoPoolsECPP::cache_pool_name;
6257
6258 TEST_F(LibRadosTierECPP, Dirty) {
6259 {
6260 ObjectWriteOperation op;
6261 op.undirty();
6262 ASSERT_EQ(0, ioctx.operate("foo", &op)); // still get 0 if it dne
6263 }
6264 {
6265 ObjectWriteOperation op;
6266 op.create(true);
6267 ASSERT_EQ(0, ioctx.operate("foo", &op));
6268 }
6269 {
6270 bool dirty = false;
6271 int r = -1;
6272 ObjectReadOperation op;
6273 op.is_dirty(&dirty, &r);
6274 ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
6275 ASSERT_TRUE(dirty);
6276 ASSERT_EQ(0, r);
6277 }
6278 {
6279 ObjectWriteOperation op;
6280 op.undirty();
6281 ASSERT_EQ(0, ioctx.operate("foo", &op));
6282 }
6283 {
6284 ObjectWriteOperation op;
6285 op.undirty();
6286 ASSERT_EQ(0, ioctx.operate("foo", &op)); // still 0 if already clean
6287 }
6288 {
6289 bool dirty = false;
6290 int r = -1;
6291 ObjectReadOperation op;
6292 op.is_dirty(&dirty, &r);
6293 ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
6294 ASSERT_FALSE(dirty);
6295 ASSERT_EQ(0, r);
6296 }
6297 //{
6298 // ObjectWriteOperation op;
6299 // op.truncate(0); // still a write even tho it is a no-op
6300 // ASSERT_EQ(0, ioctx.operate("foo", &op));
6301 //}
6302 //{
6303 // bool dirty = false;
6304 // int r = -1;
6305 // ObjectReadOperation op;
6306 // op.is_dirty(&dirty, &r);
6307 // ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
6308 // ASSERT_TRUE(dirty);
6309 // ASSERT_EQ(0, r);
6310 //}
6311 }
6312
6313 TEST_F(LibRadosTwoPoolsECPP, Overlay) {
6314 // create objects
6315 {
6316 bufferlist bl;
6317 bl.append("base");
6318 ObjectWriteOperation op;
6319 op.write_full(bl);
6320 ASSERT_EQ(0, ioctx.operate("foo", &op));
6321 }
6322 {
6323 bufferlist bl;
6324 bl.append("cache");
6325 ObjectWriteOperation op;
6326 op.write_full(bl);
6327 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
6328 }
6329
6330 // configure cache
6331 bufferlist inbl;
6332 ASSERT_EQ(0, cluster.mon_command(
6333 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
6334 "\", \"tierpool\": \"" + cache_pool_name +
6335 "\", \"force_nonempty\": \"--force-nonempty\" }",
6336 inbl, NULL, NULL));
6337 ASSERT_EQ(0, cluster.mon_command(
6338 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
6339 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
6340 inbl, NULL, NULL));
6341
6342 // wait for maps to settle
6343 cluster.wait_for_latest_osdmap();
6344
6345 // by default, the overlay sends us to cache pool
6346 {
6347 bufferlist bl;
6348 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
6349 ASSERT_EQ('c', bl[0]);
6350 }
6351 {
6352 bufferlist bl;
6353 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
6354 ASSERT_EQ('c', bl[0]);
6355 }
6356
6357 // unless we say otherwise
6358 {
6359 bufferlist bl;
6360 ObjectReadOperation op;
6361 op.read(0, 1, &bl, NULL);
6362 librados::AioCompletion *completion = cluster.aio_create_completion();
6363 ASSERT_EQ(0, ioctx.aio_operate(
6364 "foo", completion, &op,
6365 librados::OPERATION_IGNORE_OVERLAY, NULL));
6366 completion->wait_for_complete();
6367 ASSERT_EQ(0, completion->get_return_value());
6368 completion->release();
6369 ASSERT_EQ('b', bl[0]);
6370 }
6371 }
6372
6373 TEST_F(LibRadosTwoPoolsECPP, Promote) {
6374 // create object
6375 {
6376 bufferlist bl;
6377 bl.append("hi there");
6378 ObjectWriteOperation op;
6379 op.write_full(bl);
6380 ASSERT_EQ(0, ioctx.operate("foo", &op));
6381 }
6382
6383 // configure cache
6384 bufferlist inbl;
6385 ASSERT_EQ(0, cluster.mon_command(
6386 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
6387 "\", \"tierpool\": \"" + cache_pool_name +
6388 "\", \"force_nonempty\": \"--force-nonempty\" }",
6389 inbl, NULL, NULL));
6390 ASSERT_EQ(0, cluster.mon_command(
6391 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
6392 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
6393 inbl, NULL, NULL));
6394 ASSERT_EQ(0, cluster.mon_command(
6395 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
6396 "\", \"mode\": \"writeback\"}",
6397 inbl, NULL, NULL));
6398
6399 // wait for maps to settle
6400 cluster.wait_for_latest_osdmap();
6401
6402 // read, trigger a promote
6403 {
6404 bufferlist bl;
6405 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
6406 }
6407
6408 // read, trigger a whiteout
6409 {
6410 bufferlist bl;
6411 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
6412 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
6413 }
6414
6415 // verify the object is present in the cache tier
6416 {
6417 NObjectIterator it = cache_ioctx.nobjects_begin();
6418 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
6419 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
6420 ++it;
6421 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
6422 ++it;
6423 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
6424 }
6425 }
6426
6427 TEST_F(LibRadosTwoPoolsECPP, PromoteSnap) {
6428 // create object
6429 {
6430 bufferlist bl;
6431 bl.append("hi there");
6432 ObjectWriteOperation op;
6433 op.write_full(bl);
6434 ASSERT_EQ(0, ioctx.operate("foo", &op));
6435 }
6436 {
6437 bufferlist bl;
6438 bl.append("hi there");
6439 ObjectWriteOperation op;
6440 op.write_full(bl);
6441 ASSERT_EQ(0, ioctx.operate("bar", &op));
6442 }
6443 {
6444 bufferlist bl;
6445 bl.append("hi there");
6446 ObjectWriteOperation op;
6447 op.write_full(bl);
6448 ASSERT_EQ(0, ioctx.operate("baz", &op));
6449 }
6450 {
6451 bufferlist bl;
6452 bl.append("hi there");
6453 ObjectWriteOperation op;
6454 op.write_full(bl);
6455 ASSERT_EQ(0, ioctx.operate("bam", &op));
6456 }
6457
6458 // create a snapshot, clone
6459 vector<uint64_t> my_snaps(1);
6460 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
6461 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
6462 my_snaps));
6463 {
6464 bufferlist bl;
6465 bl.append("ciao!");
6466 ObjectWriteOperation op;
6467 op.write_full(bl);
6468 ASSERT_EQ(0, ioctx.operate("foo", &op));
6469 }
6470 {
6471 bufferlist bl;
6472 bl.append("ciao!");
6473 ObjectWriteOperation op;
6474 op.write_full(bl);
6475 ASSERT_EQ(0, ioctx.operate("bar", &op));
6476 }
6477 {
6478 ObjectWriteOperation op;
6479 op.remove();
6480 ASSERT_EQ(0, ioctx.operate("baz", &op));
6481 }
6482 {
6483 bufferlist bl;
6484 bl.append("ciao!");
6485 ObjectWriteOperation op;
6486 op.write_full(bl);
6487 ASSERT_EQ(0, ioctx.operate("bam", &op));
6488 }
6489
6490 // configure cache
6491 bufferlist inbl;
6492 ASSERT_EQ(0, cluster.mon_command(
6493 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
6494 "\", \"tierpool\": \"" + cache_pool_name +
6495 "\", \"force_nonempty\": \"--force-nonempty\" }",
6496 inbl, NULL, NULL));
6497 ASSERT_EQ(0, cluster.mon_command(
6498 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
6499 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
6500 inbl, NULL, NULL));
6501 ASSERT_EQ(0, cluster.mon_command(
6502 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
6503 "\", \"mode\": \"writeback\"}",
6504 inbl, NULL, NULL));
6505
6506 // wait for maps to settle
6507 cluster.wait_for_latest_osdmap();
6508
6509 // read, trigger a promote on the head
6510 {
6511 bufferlist bl;
6512 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
6513 ASSERT_EQ('c', bl[0]);
6514 }
6515 {
6516 bufferlist bl;
6517 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
6518 ASSERT_EQ('c', bl[0]);
6519 }
6520
6521 ioctx.snap_set_read(my_snaps[0]);
6522
6523 // stop and scrub this pg (to make sure scrub can handle missing
6524 // clones in the cache tier)
6525 // This test requires cache tier and base tier to have the same pg_num/pgp_num
6526 {
6527 for (int tries = 0; tries < 5; ++tries) {
6528 IoCtx cache_ioctx;
6529 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
6530 uint32_t hash;
6531 ASSERT_EQ(0, ioctx.get_object_pg_hash_position2("foo", &hash));
6532 ostringstream ss;
6533 ss << "{\"prefix\": \"pg scrub\", \"pgid\": \""
6534 << cache_ioctx.get_id() << "."
6535 << hash
6536 << "\"}";
6537 int r = cluster.mon_command(ss.str(), inbl, NULL, NULL);
6538 if (r == -EAGAIN ||
6539 r == -ENOENT) { // in case mgr osdmap is a bit stale
6540 sleep(5);
6541 continue;
6542 }
6543 ASSERT_EQ(0, r);
6544 break;
6545 }
6546 // give it a few seconds to go. this is sloppy but is usually enough time
6547 cout << "waiting for scrub..." << std::endl;
6548 sleep(15);
6549 cout << "done waiting" << std::endl;
6550 }
6551
6552 // read foo snap
6553 {
6554 bufferlist bl;
6555 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
6556 ASSERT_EQ('h', bl[0]);
6557 }
6558
6559 // read bar snap
6560 {
6561 bufferlist bl;
6562 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
6563 ASSERT_EQ('h', bl[0]);
6564 }
6565
6566 // read baz snap
6567 {
6568 bufferlist bl;
6569 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
6570 ASSERT_EQ('h', bl[0]);
6571 }
6572
6573 ioctx.snap_set_read(librados::SNAP_HEAD);
6574
6575 // read foo
6576 {
6577 bufferlist bl;
6578 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
6579 ASSERT_EQ('c', bl[0]);
6580 }
6581
6582 // read bar
6583 {
6584 bufferlist bl;
6585 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
6586 ASSERT_EQ('c', bl[0]);
6587 }
6588
6589 // read baz
6590 {
6591 bufferlist bl;
6592 ASSERT_EQ(-ENOENT, ioctx.read("baz", bl, 1, 0));
6593 }
6594
6595 // cleanup
6596 ioctx.selfmanaged_snap_remove(my_snaps[0]);
6597 }
6598
6599 TEST_F(LibRadosTwoPoolsECPP, PromoteSnapTrimRace) {
6600 // create object
6601 {
6602 bufferlist bl;
6603 bl.append("hi there");
6604 ObjectWriteOperation op;
6605 op.write_full(bl);
6606 ASSERT_EQ(0, ioctx.operate("foo", &op));
6607 }
6608
6609 // create a snapshot, clone
6610 vector<uint64_t> my_snaps(1);
6611 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
6612 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
6613 my_snaps));
6614 {
6615 bufferlist bl;
6616 bl.append("ciao!");
6617 ObjectWriteOperation op;
6618 op.write_full(bl);
6619 ASSERT_EQ(0, ioctx.operate("foo", &op));
6620 }
6621
6622 // configure cache
6623 bufferlist inbl;
6624 ASSERT_EQ(0, cluster.mon_command(
6625 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
6626 "\", \"tierpool\": \"" + cache_pool_name +
6627 "\", \"force_nonempty\": \"--force-nonempty\" }",
6628 inbl, NULL, NULL));
6629 ASSERT_EQ(0, cluster.mon_command(
6630 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
6631 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
6632 inbl, NULL, NULL));
6633 ASSERT_EQ(0, cluster.mon_command(
6634 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
6635 "\", \"mode\": \"writeback\"}",
6636 inbl, NULL, NULL));
6637
6638 // wait for maps to settle
6639 cluster.wait_for_latest_osdmap();
6640
6641 // delete the snap
6642 ASSERT_EQ(0, ioctx.selfmanaged_snap_remove(my_snaps[0]));
6643
6644 ioctx.snap_set_read(my_snaps[0]);
6645
6646 // read foo snap. the OSD may or may not realize that this snap has
6647 // been logically deleted; either response is valid.
6648 {
6649 bufferlist bl;
6650 int r = ioctx.read("foo", bl, 1, 0);
6651 ASSERT_TRUE(r == 1 || r == -ENOENT);
6652 }
6653
6654 // cleanup
6655 ioctx.selfmanaged_snap_remove(my_snaps[0]);
6656 }
6657
6658 TEST_F(LibRadosTwoPoolsECPP, Whiteout) {
6659 // create object
6660 {
6661 bufferlist bl;
6662 bl.append("hi there");
6663 ObjectWriteOperation op;
6664 op.write_full(bl);
6665 ASSERT_EQ(0, ioctx.operate("foo", &op));
6666 }
6667
6668 // configure cache
6669 bufferlist inbl;
6670 ASSERT_EQ(0, cluster.mon_command(
6671 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
6672 "\", \"tierpool\": \"" + cache_pool_name +
6673 "\", \"force_nonempty\": \"--force-nonempty\" }",
6674 inbl, NULL, NULL));
6675 ASSERT_EQ(0, cluster.mon_command(
6676 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
6677 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
6678 inbl, NULL, NULL));
6679 ASSERT_EQ(0, cluster.mon_command(
6680 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
6681 "\", \"mode\": \"writeback\"}",
6682 inbl, NULL, NULL));
6683
6684 // wait for maps to settle
6685 cluster.wait_for_latest_osdmap();
6686
6687 // create some whiteouts, verify they behave
6688 {
6689 ObjectWriteOperation op;
6690 op.assert_exists();
6691 op.remove();
6692 ASSERT_EQ(0, ioctx.operate("foo", &op));
6693 }
6694
6695 {
6696 ObjectWriteOperation op;
6697 op.assert_exists();
6698 op.remove();
6699 ASSERT_EQ(-ENOENT, ioctx.operate("bar", &op));
6700 }
6701 {
6702 ObjectWriteOperation op;
6703 op.assert_exists();
6704 op.remove();
6705 ASSERT_EQ(-ENOENT, ioctx.operate("bar", &op));
6706 }
6707
6708 // verify the whiteouts are there in the cache tier
6709 {
6710 NObjectIterator it = cache_ioctx.nobjects_begin();
6711 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
6712 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
6713 ++it;
6714 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
6715 ++it;
6716 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
6717 }
6718
6719 // delete a whiteout and verify it goes away
6720 ASSERT_EQ(-ENOENT, ioctx.remove("foo"));
6721 {
6722 ObjectWriteOperation op;
6723 op.remove();
6724 librados::AioCompletion *completion = cluster.aio_create_completion();
6725 ASSERT_EQ(0, ioctx.aio_operate("bar", completion, &op,
6726 librados::OPERATION_IGNORE_CACHE));
6727 completion->wait_for_complete();
6728 ASSERT_EQ(0, completion->get_return_value());
6729 completion->release();
6730
6731 NObjectIterator it = cache_ioctx.nobjects_begin();
6732 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
6733 ASSERT_TRUE(it->get_oid() == string("foo"));
6734 ++it;
6735 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
6736 }
6737
6738 // recreate an object and verify we can read it
6739 {
6740 bufferlist bl;
6741 bl.append("hi there");
6742 ObjectWriteOperation op;
6743 op.write_full(bl);
6744 ASSERT_EQ(0, ioctx.operate("foo", &op));
6745 }
6746 {
6747 bufferlist bl;
6748 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
6749 ASSERT_EQ('h', bl[0]);
6750 }
6751 }
6752
6753 TEST_F(LibRadosTwoPoolsECPP, Evict) {
6754 // create object
6755 {
6756 bufferlist bl;
6757 bl.append("hi there");
6758 ObjectWriteOperation op;
6759 op.write_full(bl);
6760 ASSERT_EQ(0, ioctx.operate("foo", &op));
6761 }
6762
6763 // configure cache
6764 bufferlist inbl;
6765 ASSERT_EQ(0, cluster.mon_command(
6766 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
6767 "\", \"tierpool\": \"" + cache_pool_name +
6768 "\", \"force_nonempty\": \"--force-nonempty\" }",
6769 inbl, NULL, NULL));
6770 ASSERT_EQ(0, cluster.mon_command(
6771 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
6772 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
6773 inbl, NULL, NULL));
6774 ASSERT_EQ(0, cluster.mon_command(
6775 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
6776 "\", \"mode\": \"writeback\"}",
6777 inbl, NULL, NULL));
6778
6779 // wait for maps to settle
6780 cluster.wait_for_latest_osdmap();
6781
6782 // read, trigger a promote
6783 {
6784 bufferlist bl;
6785 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
6786 }
6787
6788 // read, trigger a whiteout, and a dirty object
6789 {
6790 bufferlist bl;
6791 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
6792 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
6793 ASSERT_EQ(0, ioctx.write("bar", bl, bl.length(), 0));
6794 }
6795
6796 // verify the object is present in the cache tier
6797 {
6798 NObjectIterator it = cache_ioctx.nobjects_begin();
6799 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
6800 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
6801 ++it;
6802 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
6803 ++it;
6804 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
6805 }
6806
6807 // pin
6808 {
6809 ObjectWriteOperation op;
6810 op.cache_pin();
6811 librados::AioCompletion *completion = cluster.aio_create_completion();
6812 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
6813 completion->wait_for_complete();
6814 ASSERT_EQ(0, completion->get_return_value());
6815 completion->release();
6816 }
6817
6818 // evict the pinned object with -EPERM
6819 {
6820 ObjectReadOperation op;
6821 op.cache_evict();
6822 librados::AioCompletion *completion = cluster.aio_create_completion();
6823 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
6824 librados::OPERATION_IGNORE_CACHE,
6825 NULL));
6826 completion->wait_for_complete();
6827 ASSERT_EQ(-EPERM, completion->get_return_value());
6828 completion->release();
6829 }
6830
6831 // unpin
6832 {
6833 ObjectWriteOperation op;
6834 op.cache_unpin();
6835 librados::AioCompletion *completion = cluster.aio_create_completion();
6836 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
6837 completion->wait_for_complete();
6838 ASSERT_EQ(0, completion->get_return_value());
6839 completion->release();
6840 }
6841
6842 // flush
6843 {
6844 ObjectReadOperation op;
6845 op.cache_flush();
6846 librados::AioCompletion *completion = cluster.aio_create_completion();
6847 ASSERT_EQ(0, cache_ioctx.aio_operate(
6848 "foo", completion, &op,
6849 librados::OPERATION_IGNORE_OVERLAY, NULL));
6850 completion->wait_for_complete();
6851 ASSERT_EQ(0, completion->get_return_value());
6852 completion->release();
6853 }
6854
6855 // verify clean
6856 {
6857 bool dirty = false;
6858 int r = -1;
6859 ObjectReadOperation op;
6860 op.is_dirty(&dirty, &r);
6861 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
6862 ASSERT_FALSE(dirty);
6863 ASSERT_EQ(0, r);
6864 }
6865
6866 // evict
6867 {
6868 ObjectReadOperation op;
6869 op.cache_evict();
6870 librados::AioCompletion *completion = cluster.aio_create_completion();
6871 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
6872 librados::OPERATION_IGNORE_CACHE,
6873 NULL));
6874 completion->wait_for_complete();
6875 ASSERT_EQ(0, completion->get_return_value());
6876 completion->release();
6877 }
6878 {
6879 ObjectReadOperation op;
6880 op.cache_evict();
6881 librados::AioCompletion *completion = cluster.aio_create_completion();
6882 ASSERT_EQ(0, cache_ioctx.aio_operate(
6883 "foo", completion, &op,
6884 librados::OPERATION_IGNORE_CACHE, NULL));
6885 completion->wait_for_complete();
6886 ASSERT_EQ(0, completion->get_return_value());
6887 completion->release();
6888 }
6889 {
6890 ObjectReadOperation op;
6891 op.cache_evict();
6892 librados::AioCompletion *completion = cluster.aio_create_completion();
6893 ASSERT_EQ(0, cache_ioctx.aio_operate(
6894 "bar", completion, &op,
6895 librados::OPERATION_IGNORE_CACHE, NULL));
6896 completion->wait_for_complete();
6897 ASSERT_EQ(-EBUSY, completion->get_return_value());
6898 completion->release();
6899 }
6900 }
6901
6902 TEST_F(LibRadosTwoPoolsECPP, EvictSnap) {
6903 // create object
6904 {
6905 bufferlist bl;
6906 bl.append("hi there");
6907 ObjectWriteOperation op;
6908 op.write_full(bl);
6909 ASSERT_EQ(0, ioctx.operate("foo", &op));
6910 }
6911 {
6912 bufferlist bl;
6913 bl.append("hi there");
6914 ObjectWriteOperation op;
6915 op.write_full(bl);
6916 ASSERT_EQ(0, ioctx.operate("bar", &op));
6917 }
6918 {
6919 bufferlist bl;
6920 bl.append("hi there");
6921 ObjectWriteOperation op;
6922 op.write_full(bl);
6923 ASSERT_EQ(0, ioctx.operate("baz", &op));
6924 }
6925 {
6926 bufferlist bl;
6927 bl.append("hi there");
6928 ObjectWriteOperation op;
6929 op.write_full(bl);
6930 ASSERT_EQ(0, ioctx.operate("bam", &op));
6931 }
6932
6933 // create a snapshot, clone
6934 vector<uint64_t> my_snaps(1);
6935 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
6936 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
6937 my_snaps));
6938 {
6939 bufferlist bl;
6940 bl.append("ciao!");
6941 ObjectWriteOperation op;
6942 op.write_full(bl);
6943 ASSERT_EQ(0, ioctx.operate("foo", &op));
6944 }
6945 {
6946 bufferlist bl;
6947 bl.append("ciao!");
6948 ObjectWriteOperation op;
6949 op.write_full(bl);
6950 ASSERT_EQ(0, ioctx.operate("bar", &op));
6951 }
6952 {
6953 ObjectWriteOperation op;
6954 op.remove();
6955 ASSERT_EQ(0, ioctx.operate("baz", &op));
6956 }
6957 {
6958 bufferlist bl;
6959 bl.append("ciao!");
6960 ObjectWriteOperation op;
6961 op.write_full(bl);
6962 ASSERT_EQ(0, ioctx.operate("bam", &op));
6963 }
6964
6965 // configure cache
6966 bufferlist inbl;
6967 ASSERT_EQ(0, cluster.mon_command(
6968 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
6969 "\", \"tierpool\": \"" + cache_pool_name +
6970 "\", \"force_nonempty\": \"--force-nonempty\" }",
6971 inbl, NULL, NULL));
6972 ASSERT_EQ(0, cluster.mon_command(
6973 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
6974 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
6975 inbl, NULL, NULL));
6976 ASSERT_EQ(0, cluster.mon_command(
6977 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
6978 "\", \"mode\": \"writeback\"}",
6979 inbl, NULL, NULL));
6980
6981 // wait for maps to settle
6982 cluster.wait_for_latest_osdmap();
6983
6984 // read, trigger a promote on the head
6985 {
6986 bufferlist bl;
6987 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
6988 ASSERT_EQ('c', bl[0]);
6989 }
6990 {
6991 bufferlist bl;
6992 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
6993 ASSERT_EQ('c', bl[0]);
6994 }
6995
6996 // evict bam
6997 {
6998 ObjectReadOperation op;
6999 op.cache_evict();
7000 librados::AioCompletion *completion = cluster.aio_create_completion();
7001 ASSERT_EQ(0, cache_ioctx.aio_operate(
7002 "bam", completion, &op,
7003 librados::OPERATION_IGNORE_CACHE, NULL));
7004 completion->wait_for_complete();
7005 ASSERT_EQ(0, completion->get_return_value());
7006 completion->release();
7007 }
7008 {
7009 bufferlist bl;
7010 ObjectReadOperation op;
7011 op.read(1, 0, &bl, NULL);
7012 librados::AioCompletion *completion = cluster.aio_create_completion();
7013 ASSERT_EQ(0, cache_ioctx.aio_operate(
7014 "bam", completion, &op,
7015 librados::OPERATION_IGNORE_CACHE, NULL));
7016 completion->wait_for_complete();
7017 ASSERT_EQ(-ENOENT, completion->get_return_value());
7018 completion->release();
7019 }
7020
7021 // read foo snap
7022 ioctx.snap_set_read(my_snaps[0]);
7023 {
7024 bufferlist bl;
7025 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
7026 ASSERT_EQ('h', bl[0]);
7027 }
7028
7029 // evict foo snap
7030 {
7031 ObjectReadOperation op;
7032 op.cache_evict();
7033 librados::AioCompletion *completion = cluster.aio_create_completion();
7034 ASSERT_EQ(0, ioctx.aio_operate(
7035 "foo", completion, &op,
7036 librados::OPERATION_IGNORE_CACHE, NULL));
7037 completion->wait_for_complete();
7038 ASSERT_EQ(0, completion->get_return_value());
7039 completion->release();
7040 }
7041 // snap is gone...
7042 {
7043 bufferlist bl;
7044 ObjectReadOperation op;
7045 op.read(1, 0, &bl, NULL);
7046 librados::AioCompletion *completion = cluster.aio_create_completion();
7047 ASSERT_EQ(0, ioctx.aio_operate(
7048 "foo", completion, &op,
7049 librados::OPERATION_IGNORE_CACHE, NULL));
7050 completion->wait_for_complete();
7051 ASSERT_EQ(-ENOENT, completion->get_return_value());
7052 completion->release();
7053 }
7054 // head is still there...
7055 ioctx.snap_set_read(librados::SNAP_HEAD);
7056 {
7057 bufferlist bl;
7058 ObjectReadOperation op;
7059 op.read(1, 0, &bl, NULL);
7060 librados::AioCompletion *completion = cluster.aio_create_completion();
7061 ASSERT_EQ(0, ioctx.aio_operate(
7062 "foo", completion, &op,
7063 librados::OPERATION_IGNORE_CACHE, NULL));
7064 completion->wait_for_complete();
7065 ASSERT_EQ(0, completion->get_return_value());
7066 completion->release();
7067 }
7068
7069 // promote head + snap of bar
7070 ioctx.snap_set_read(librados::SNAP_HEAD);
7071 {
7072 bufferlist bl;
7073 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
7074 ASSERT_EQ('c', bl[0]);
7075 }
7076 ioctx.snap_set_read(my_snaps[0]);
7077 {
7078 bufferlist bl;
7079 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
7080 ASSERT_EQ('h', bl[0]);
7081 }
7082
7083 // evict bar head (fail)
7084 ioctx.snap_set_read(librados::SNAP_HEAD);
7085 {
7086 ObjectReadOperation op;
7087 op.cache_evict();
7088 librados::AioCompletion *completion = cluster.aio_create_completion();
7089 ASSERT_EQ(0, ioctx.aio_operate(
7090 "bar", completion, &op,
7091 librados::OPERATION_IGNORE_CACHE, NULL));
7092 completion->wait_for_complete();
7093 ASSERT_EQ(-EBUSY, completion->get_return_value());
7094 completion->release();
7095 }
7096
7097 // evict bar snap
7098 ioctx.snap_set_read(my_snaps[0]);
7099 {
7100 ObjectReadOperation op;
7101 op.cache_evict();
7102 librados::AioCompletion *completion = cluster.aio_create_completion();
7103 ASSERT_EQ(0, ioctx.aio_operate(
7104 "bar", completion, &op,
7105 librados::OPERATION_IGNORE_CACHE, NULL));
7106 completion->wait_for_complete();
7107 ASSERT_EQ(0, completion->get_return_value());
7108 completion->release();
7109 }
7110 // ...and then head
7111 ioctx.snap_set_read(librados::SNAP_HEAD);
7112 {
7113 bufferlist bl;
7114 ObjectReadOperation op;
7115 op.read(1, 0, &bl, NULL);
7116 librados::AioCompletion *completion = cluster.aio_create_completion();
7117 ASSERT_EQ(0, ioctx.aio_operate(
7118 "bar", completion, &op,
7119 librados::OPERATION_IGNORE_CACHE, NULL));
7120 completion->wait_for_complete();
7121 ASSERT_EQ(0, completion->get_return_value());
7122 completion->release();
7123 }
7124 {
7125 ObjectReadOperation op;
7126 op.cache_evict();
7127 librados::AioCompletion *completion = cluster.aio_create_completion();
7128 ASSERT_EQ(0, ioctx.aio_operate(
7129 "bar", completion, &op,
7130 librados::OPERATION_IGNORE_CACHE, NULL));
7131 completion->wait_for_complete();
7132 ASSERT_EQ(0, completion->get_return_value());
7133 completion->release();
7134 }
7135
7136 // cleanup
7137 ioctx.selfmanaged_snap_remove(my_snaps[0]);
7138 }
7139
7140 TEST_F(LibRadosTwoPoolsECPP, TryFlush) {
7141 // configure cache
7142 bufferlist inbl;
7143 ASSERT_EQ(0, cluster.mon_command(
7144 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
7145 "\", \"tierpool\": \"" + cache_pool_name +
7146 "\", \"force_nonempty\": \"--force-nonempty\" }",
7147 inbl, NULL, NULL));
7148 ASSERT_EQ(0, cluster.mon_command(
7149 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
7150 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
7151 inbl, NULL, NULL));
7152 ASSERT_EQ(0, cluster.mon_command(
7153 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
7154 "\", \"mode\": \"writeback\"}",
7155 inbl, NULL, NULL));
7156
7157 // wait for maps to settle
7158 cluster.wait_for_latest_osdmap();
7159
7160 // create object
7161 {
7162 bufferlist bl;
7163 bl.append("hi there");
7164 ObjectWriteOperation op;
7165 op.write_full(bl);
7166 ASSERT_EQ(0, ioctx.operate("foo", &op));
7167 }
7168
7169 // verify the object is present in the cache tier
7170 {
7171 NObjectIterator it = cache_ioctx.nobjects_begin();
7172 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
7173 ASSERT_TRUE(it->get_oid() == string("foo"));
7174 ++it;
7175 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
7176 }
7177
7178 // verify the object is NOT present in the base tier
7179 {
7180 NObjectIterator it = ioctx.nobjects_begin();
7181 ASSERT_TRUE(it == ioctx.nobjects_end());
7182 }
7183
7184 // verify dirty
7185 {
7186 bool dirty = false;
7187 int r = -1;
7188 ObjectReadOperation op;
7189 op.is_dirty(&dirty, &r);
7190 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
7191 ASSERT_TRUE(dirty);
7192 ASSERT_EQ(0, r);
7193 }
7194
7195 // pin
7196 {
7197 ObjectWriteOperation op;
7198 op.cache_pin();
7199 librados::AioCompletion *completion = cluster.aio_create_completion();
7200 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
7201 completion->wait_for_complete();
7202 ASSERT_EQ(0, completion->get_return_value());
7203 completion->release();
7204 }
7205
7206 // flush the pinned object with -EPERM
7207 {
7208 ObjectReadOperation op;
7209 op.cache_try_flush();
7210 librados::AioCompletion *completion = cluster.aio_create_completion();
7211 ASSERT_EQ(0, cache_ioctx.aio_operate(
7212 "foo", completion, &op,
7213 librados::OPERATION_IGNORE_OVERLAY |
7214 librados::OPERATION_SKIPRWLOCKS, NULL));
7215 completion->wait_for_complete();
7216 ASSERT_EQ(-EPERM, completion->get_return_value());
7217 completion->release();
7218 }
7219
7220 // unpin
7221 {
7222 ObjectWriteOperation op;
7223 op.cache_unpin();
7224 librados::AioCompletion *completion = cluster.aio_create_completion();
7225 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
7226 completion->wait_for_complete();
7227 ASSERT_EQ(0, completion->get_return_value());
7228 completion->release();
7229 }
7230
7231 // flush
7232 {
7233 ObjectReadOperation op;
7234 op.cache_try_flush();
7235 librados::AioCompletion *completion = cluster.aio_create_completion();
7236 ASSERT_EQ(0, cache_ioctx.aio_operate(
7237 "foo", completion, &op,
7238 librados::OPERATION_IGNORE_OVERLAY |
7239 librados::OPERATION_SKIPRWLOCKS, NULL));
7240 completion->wait_for_complete();
7241 ASSERT_EQ(0, completion->get_return_value());
7242 completion->release();
7243 }
7244
7245 // verify clean
7246 {
7247 bool dirty = false;
7248 int r = -1;
7249 ObjectReadOperation op;
7250 op.is_dirty(&dirty, &r);
7251 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
7252 ASSERT_FALSE(dirty);
7253 ASSERT_EQ(0, r);
7254 }
7255
7256 // verify in base tier
7257 {
7258 NObjectIterator it = ioctx.nobjects_begin();
7259 ASSERT_TRUE(it != ioctx.nobjects_end());
7260 ASSERT_TRUE(it->get_oid() == string("foo"));
7261 ++it;
7262 ASSERT_TRUE(it == ioctx.nobjects_end());
7263 }
7264
7265 // evict it
7266 {
7267 ObjectReadOperation op;
7268 op.cache_evict();
7269 librados::AioCompletion *completion = cluster.aio_create_completion();
7270 ASSERT_EQ(0, cache_ioctx.aio_operate(
7271 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
7272 completion->wait_for_complete();
7273 ASSERT_EQ(0, completion->get_return_value());
7274 completion->release();
7275 }
7276
7277 // verify no longer in cache tier
7278 {
7279 NObjectIterator it = cache_ioctx.nobjects_begin();
7280 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
7281 }
7282 }
7283
7284 TEST_F(LibRadosTwoPoolsECPP, FailedFlush) {
7285 // configure cache
7286 bufferlist inbl;
7287 ASSERT_EQ(0, cluster.mon_command(
7288 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
7289 "\", \"tierpool\": \"" + cache_pool_name +
7290 "\", \"force_nonempty\": \"--force-nonempty\" }",
7291 inbl, NULL, NULL));
7292 ASSERT_EQ(0, cluster.mon_command(
7293 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
7294 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
7295 inbl, NULL, NULL));
7296 ASSERT_EQ(0, cluster.mon_command(
7297 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
7298 "\", \"mode\": \"writeback\"}",
7299 inbl, NULL, NULL));
7300
7301 // wait for maps to settle
7302 cluster.wait_for_latest_osdmap();
7303
7304 // create object
7305 {
7306 bufferlist bl;
7307 bl.append("hi there");
7308 ObjectWriteOperation op;
7309 op.write_full(bl);
7310 ASSERT_EQ(0, ioctx.operate("foo", &op));
7311 }
7312
7313 // verify the object is present in the cache tier
7314 {
7315 NObjectIterator it = cache_ioctx.nobjects_begin();
7316 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
7317 ASSERT_TRUE(it->get_oid() == string("foo"));
7318 ++it;
7319 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
7320 }
7321
7322 // verify the object is NOT present in the base tier
7323 {
7324 NObjectIterator it = ioctx.nobjects_begin();
7325 ASSERT_TRUE(it == ioctx.nobjects_end());
7326 }
7327
7328 // set omap
7329 {
7330 ObjectWriteOperation op;
7331 std::map<std::string, bufferlist> omap;
7332 omap["somekey"] = bufferlist();
7333 op.omap_set(omap);
7334 librados::AioCompletion *completion = cluster.aio_create_completion();
7335 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
7336 completion->wait_for_complete();
7337 ASSERT_EQ(0, completion->get_return_value());
7338 completion->release();
7339 }
7340
7341 // flush
7342 {
7343 ObjectReadOperation op;
7344 op.cache_flush();
7345 librados::AioCompletion *completion = cluster.aio_create_completion();
7346 ASSERT_EQ(0, cache_ioctx.aio_operate(
7347 "foo", completion, &op,
7348 librados::OPERATION_IGNORE_OVERLAY, NULL));
7349 completion->wait_for_complete();
7350 ASSERT_NE(0, completion->get_return_value());
7351 completion->release();
7352 }
7353
7354 // get omap
7355 {
7356 ObjectReadOperation op;
7357 bufferlist bl;
7358 int prval = 0;
7359 std::set<std::string> keys;
7360 keys.insert("somekey");
7361 std::map<std::string, bufferlist> map;
7362
7363 op.omap_get_vals_by_keys(keys, &map, &prval);
7364 librados::AioCompletion *completion = cluster.aio_create_completion();
7365 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op, &bl));
7366 sleep(5);
7367 bool completed = completion->is_complete();
7368 if( !completed ) {
7369 cache_ioctx.aio_cancel(completion);
7370 std::cerr << "Most probably test case will hang here, please reset manually" << std::endl;
7371 ASSERT_TRUE(completed); //in fact we are locked forever at test case shutdown unless fix for http://tracker.ceph.com/issues/14511 is applied. Seems there is no workaround for that
7372 }
7373 completion->release();
7374 }
7375 // verify still not in base tier
7376 {
7377 ASSERT_TRUE(ioctx.nobjects_begin() == ioctx.nobjects_end());
7378 }
7379 // erase it
7380 {
7381 ObjectWriteOperation op;
7382 op.remove();
7383 ASSERT_EQ(0, ioctx.operate("foo", &op));
7384 }
7385 // flush whiteout
7386 {
7387 ObjectReadOperation op;
7388 op.cache_flush();
7389 librados::AioCompletion *completion = cluster.aio_create_completion();
7390 ASSERT_EQ(0, cache_ioctx.aio_operate(
7391 "foo", completion, &op,
7392 librados::OPERATION_IGNORE_OVERLAY, NULL));
7393 completion->wait_for_complete();
7394 ASSERT_EQ(0, completion->get_return_value());
7395 completion->release();
7396 }
7397 // evict
7398 {
7399 ObjectReadOperation op;
7400 op.cache_evict();
7401 librados::AioCompletion *completion = cluster.aio_create_completion();
7402 ASSERT_EQ(0, cache_ioctx.aio_operate(
7403 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
7404 completion->wait_for_complete();
7405 ASSERT_EQ(0, completion->get_return_value());
7406 completion->release();
7407 }
7408
7409 // verify no longer in cache tier
7410 {
7411 NObjectIterator it = cache_ioctx.nobjects_begin();
7412 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
7413 }
7414 // or base tier
7415 {
7416 NObjectIterator it = ioctx.nobjects_begin();
7417 ASSERT_TRUE(it == ioctx.nobjects_end());
7418 }
7419 }
7420
7421 TEST_F(LibRadosTwoPoolsECPP, Flush) {
7422 // configure cache
7423 bufferlist inbl;
7424 ASSERT_EQ(0, cluster.mon_command(
7425 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
7426 "\", \"tierpool\": \"" + cache_pool_name +
7427 "\", \"force_nonempty\": \"--force-nonempty\" }",
7428 inbl, NULL, NULL));
7429 ASSERT_EQ(0, cluster.mon_command(
7430 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
7431 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
7432 inbl, NULL, NULL));
7433 ASSERT_EQ(0, cluster.mon_command(
7434 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
7435 "\", \"mode\": \"writeback\"}",
7436 inbl, NULL, NULL));
7437
7438 // wait for maps to settle
7439 cluster.wait_for_latest_osdmap();
7440
7441 uint64_t user_version = 0;
7442
7443 // create object
7444 {
7445 bufferlist bl;
7446 bl.append("hi there");
7447 ObjectWriteOperation op;
7448 op.write_full(bl);
7449 ASSERT_EQ(0, ioctx.operate("foo", &op));
7450 }
7451
7452 // verify the object is present in the cache tier
7453 {
7454 NObjectIterator it = cache_ioctx.nobjects_begin();
7455 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
7456 ASSERT_TRUE(it->get_oid() == string("foo"));
7457 ++it;
7458 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
7459 }
7460
7461 // verify the object is NOT present in the base tier
7462 {
7463 NObjectIterator it = ioctx.nobjects_begin();
7464 ASSERT_TRUE(it == ioctx.nobjects_end());
7465 }
7466
7467 // verify dirty
7468 {
7469 bool dirty = false;
7470 int r = -1;
7471 ObjectReadOperation op;
7472 op.is_dirty(&dirty, &r);
7473 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
7474 ASSERT_TRUE(dirty);
7475 ASSERT_EQ(0, r);
7476 user_version = cache_ioctx.get_last_version();
7477 }
7478
7479 // pin
7480 {
7481 ObjectWriteOperation op;
7482 op.cache_pin();
7483 librados::AioCompletion *completion = cluster.aio_create_completion();
7484 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
7485 completion->wait_for_complete();
7486 ASSERT_EQ(0, completion->get_return_value());
7487 completion->release();
7488 }
7489
7490 // flush the pinned object with -EPERM
7491 {
7492 ObjectReadOperation op;
7493 op.cache_try_flush();
7494 librados::AioCompletion *completion = cluster.aio_create_completion();
7495 ASSERT_EQ(0, cache_ioctx.aio_operate(
7496 "foo", completion, &op,
7497 librados::OPERATION_IGNORE_OVERLAY |
7498 librados::OPERATION_SKIPRWLOCKS, NULL));
7499 completion->wait_for_complete();
7500 ASSERT_EQ(-EPERM, completion->get_return_value());
7501 completion->release();
7502 }
7503
7504 // unpin
7505 {
7506 ObjectWriteOperation op;
7507 op.cache_unpin();
7508 librados::AioCompletion *completion = cluster.aio_create_completion();
7509 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
7510 completion->wait_for_complete();
7511 ASSERT_EQ(0, completion->get_return_value());
7512 completion->release();
7513 }
7514
7515 // flush
7516 {
7517 ObjectReadOperation op;
7518 op.cache_flush();
7519 librados::AioCompletion *completion = cluster.aio_create_completion();
7520 ASSERT_EQ(0, cache_ioctx.aio_operate(
7521 "foo", completion, &op,
7522 librados::OPERATION_IGNORE_OVERLAY, NULL));
7523 completion->wait_for_complete();
7524 ASSERT_EQ(0, completion->get_return_value());
7525 completion->release();
7526 }
7527
7528 // verify clean
7529 {
7530 bool dirty = false;
7531 int r = -1;
7532 ObjectReadOperation op;
7533 op.is_dirty(&dirty, &r);
7534 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
7535 ASSERT_FALSE(dirty);
7536 ASSERT_EQ(0, r);
7537 }
7538
7539 // verify in base tier
7540 {
7541 NObjectIterator it = ioctx.nobjects_begin();
7542 ASSERT_TRUE(it != ioctx.nobjects_end());
7543 ASSERT_TRUE(it->get_oid() == string("foo"));
7544 ++it;
7545 ASSERT_TRUE(it == ioctx.nobjects_end());
7546 }
7547
7548 // evict it
7549 {
7550 ObjectReadOperation op;
7551 op.cache_evict();
7552 librados::AioCompletion *completion = cluster.aio_create_completion();
7553 ASSERT_EQ(0, cache_ioctx.aio_operate(
7554 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
7555 completion->wait_for_complete();
7556 ASSERT_EQ(0, completion->get_return_value());
7557 completion->release();
7558 }
7559
7560 // verify no longer in cache tier
7561 {
7562 NObjectIterator it = cache_ioctx.nobjects_begin();
7563 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
7564 }
7565
7566 // read it again and verify the version is consistent
7567 {
7568 bufferlist bl;
7569 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
7570 ASSERT_EQ(user_version, cache_ioctx.get_last_version());
7571 }
7572
7573 // erase it
7574 {
7575 ObjectWriteOperation op;
7576 op.remove();
7577 ASSERT_EQ(0, ioctx.operate("foo", &op));
7578 }
7579
7580 // flush whiteout
7581 {
7582 ObjectReadOperation op;
7583 op.cache_flush();
7584 librados::AioCompletion *completion = cluster.aio_create_completion();
7585 ASSERT_EQ(0, cache_ioctx.aio_operate(
7586 "foo", completion, &op,
7587 librados::OPERATION_IGNORE_OVERLAY, NULL));
7588 completion->wait_for_complete();
7589 ASSERT_EQ(0, completion->get_return_value());
7590 completion->release();
7591 }
7592
7593 // evict
7594 {
7595 ObjectReadOperation op;
7596 op.cache_evict();
7597 librados::AioCompletion *completion = cluster.aio_create_completion();
7598 ASSERT_EQ(0, cache_ioctx.aio_operate(
7599 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
7600 completion->wait_for_complete();
7601 ASSERT_EQ(0, completion->get_return_value());
7602 completion->release();
7603 }
7604
7605 // verify no longer in cache tier
7606 {
7607 NObjectIterator it = cache_ioctx.nobjects_begin();
7608 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
7609 }
7610 // or base tier
7611 {
7612 NObjectIterator it = ioctx.nobjects_begin();
7613 ASSERT_TRUE(it == ioctx.nobjects_end());
7614 }
7615 }
7616
7617 TEST_F(LibRadosTwoPoolsECPP, FlushSnap) {
7618 // configure cache
7619 bufferlist inbl;
7620 ASSERT_EQ(0, cluster.mon_command(
7621 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
7622 "\", \"tierpool\": \"" + cache_pool_name +
7623 "\", \"force_nonempty\": \"--force-nonempty\" }",
7624 inbl, NULL, NULL));
7625 ASSERT_EQ(0, cluster.mon_command(
7626 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
7627 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
7628 inbl, NULL, NULL));
7629 ASSERT_EQ(0, cluster.mon_command(
7630 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
7631 "\", \"mode\": \"writeback\"}",
7632 inbl, NULL, NULL));
7633
7634 // wait for maps to settle
7635 cluster.wait_for_latest_osdmap();
7636
7637 // create object
7638 {
7639 bufferlist bl;
7640 bl.append("a");
7641 ObjectWriteOperation op;
7642 op.write_full(bl);
7643 ASSERT_EQ(0, ioctx.operate("foo", &op));
7644 }
7645
7646 // create a snapshot, clone
7647 vector<uint64_t> my_snaps(1);
7648 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
7649 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
7650 my_snaps));
7651 {
7652 bufferlist bl;
7653 bl.append("b");
7654 ObjectWriteOperation op;
7655 op.write_full(bl);
7656 ASSERT_EQ(0, ioctx.operate("foo", &op));
7657 }
7658
7659 // and another
7660 my_snaps.resize(2);
7661 my_snaps[1] = my_snaps[0];
7662 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
7663 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
7664 my_snaps));
7665 {
7666 bufferlist bl;
7667 bl.append("c");
7668 ObjectWriteOperation op;
7669 op.write_full(bl);
7670 ASSERT_EQ(0, ioctx.operate("foo", &op));
7671 }
7672
7673 // verify the object is present in the cache tier
7674 {
7675 NObjectIterator it = cache_ioctx.nobjects_begin();
7676 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
7677 ASSERT_TRUE(it->get_oid() == string("foo"));
7678 ++it;
7679 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
7680 }
7681
7682 // verify the object is NOT present in the base tier
7683 {
7684 NObjectIterator it = ioctx.nobjects_begin();
7685 ASSERT_TRUE(it == ioctx.nobjects_end());
7686 }
7687
7688 // flush on head (should fail)
7689 ioctx.snap_set_read(librados::SNAP_HEAD);
7690 {
7691 ObjectReadOperation op;
7692 op.cache_flush();
7693 librados::AioCompletion *completion = cluster.aio_create_completion();
7694 ASSERT_EQ(0, ioctx.aio_operate(
7695 "foo", completion, &op,
7696 librados::OPERATION_IGNORE_CACHE, NULL));
7697 completion->wait_for_complete();
7698 ASSERT_EQ(-EBUSY, completion->get_return_value());
7699 completion->release();
7700 }
7701 // flush on recent snap (should fail)
7702 ioctx.snap_set_read(my_snaps[0]);
7703 {
7704 ObjectReadOperation op;
7705 op.cache_flush();
7706 librados::AioCompletion *completion = cluster.aio_create_completion();
7707 ASSERT_EQ(0, ioctx.aio_operate(
7708 "foo", completion, &op,
7709 librados::OPERATION_IGNORE_CACHE, NULL));
7710 completion->wait_for_complete();
7711 ASSERT_EQ(-EBUSY, completion->get_return_value());
7712 completion->release();
7713 }
7714 // flush on oldest snap
7715 ioctx.snap_set_read(my_snaps[1]);
7716 {
7717 ObjectReadOperation op;
7718 op.cache_flush();
7719 librados::AioCompletion *completion = cluster.aio_create_completion();
7720 ASSERT_EQ(0, ioctx.aio_operate(
7721 "foo", completion, &op,
7722 librados::OPERATION_IGNORE_CACHE, NULL));
7723 completion->wait_for_complete();
7724 ASSERT_EQ(0, completion->get_return_value());
7725 completion->release();
7726 }
7727 // flush on next oldest snap
7728 ioctx.snap_set_read(my_snaps[0]);
7729 {
7730 ObjectReadOperation op;
7731 op.cache_flush();
7732 librados::AioCompletion *completion = cluster.aio_create_completion();
7733 ASSERT_EQ(0, ioctx.aio_operate(
7734 "foo", completion, &op,
7735 librados::OPERATION_IGNORE_CACHE, NULL));
7736 completion->wait_for_complete();
7737 ASSERT_EQ(0, completion->get_return_value());
7738 completion->release();
7739 }
7740 // flush on head
7741 ioctx.snap_set_read(librados::SNAP_HEAD);
7742 {
7743 ObjectReadOperation op;
7744 op.cache_flush();
7745 librados::AioCompletion *completion = cluster.aio_create_completion();
7746 ASSERT_EQ(0, ioctx.aio_operate(
7747 "foo", completion, &op,
7748 librados::OPERATION_IGNORE_CACHE, NULL));
7749 completion->wait_for_complete();
7750 ASSERT_EQ(0, completion->get_return_value());
7751 completion->release();
7752 }
7753
7754 // verify i can read the snaps from the cache pool
7755 ioctx.snap_set_read(librados::SNAP_HEAD);
7756 {
7757 bufferlist bl;
7758 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
7759 ASSERT_EQ('c', bl[0]);
7760 }
7761 ioctx.snap_set_read(my_snaps[0]);
7762 {
7763 bufferlist bl;
7764 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
7765 ASSERT_EQ('b', bl[0]);
7766 }
7767 ioctx.snap_set_read(my_snaps[1]);
7768 {
7769 bufferlist bl;
7770 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
7771 ASSERT_EQ('a', bl[0]);
7772 }
7773
7774 // tear down tiers
7775 ASSERT_EQ(0, cluster.mon_command(
7776 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
7777 "\"}",
7778 inbl, NULL, NULL));
7779
7780 // wait for maps to settle
7781 cluster.wait_for_latest_osdmap();
7782
7783 // verify i can read the snaps from the base pool
7784 ioctx.snap_set_read(librados::SNAP_HEAD);
7785 {
7786 bufferlist bl;
7787 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
7788 ASSERT_EQ('c', bl[0]);
7789 }
7790 ioctx.snap_set_read(my_snaps[0]);
7791 {
7792 bufferlist bl;
7793 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
7794 ASSERT_EQ('b', bl[0]);
7795 }
7796 ioctx.snap_set_read(my_snaps[1]);
7797 {
7798 bufferlist bl;
7799 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
7800 ASSERT_EQ('a', bl[0]);
7801 }
7802
7803 ASSERT_EQ(0, cluster.mon_command(
7804 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
7805 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
7806 inbl, NULL, NULL));
7807 cluster.wait_for_latest_osdmap();
7808
7809 // cleanup
7810 ioctx.selfmanaged_snap_remove(my_snaps[0]);
7811 }
7812
7813 TEST_F(LibRadosTierECPP, FlushWriteRaces) {
7814 Rados cluster;
7815 std::string pool_name = get_temp_pool_name();
7816 std::string cache_pool_name = pool_name + "-cache";
7817 ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
7818 ASSERT_EQ(0, cluster.pool_create(cache_pool_name.c_str()));
7819 IoCtx cache_ioctx;
7820 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
7821 cache_ioctx.application_enable("rados", true);
7822 IoCtx ioctx;
7823 ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), ioctx));
7824
7825 // configure cache
7826 bufferlist inbl;
7827 ASSERT_EQ(0, cluster.mon_command(
7828 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
7829 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
7830 inbl, NULL, NULL));
7831 ASSERT_EQ(0, cluster.mon_command(
7832 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
7833 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
7834 inbl, NULL, NULL));
7835 ASSERT_EQ(0, cluster.mon_command(
7836 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
7837 "\", \"mode\": \"writeback\"}",
7838 inbl, NULL, NULL));
7839
7840 // wait for maps to settle
7841 cluster.wait_for_latest_osdmap();
7842
7843 // create/dirty object
7844 bufferlist bl;
7845 bl.append("hi there");
7846 {
7847 ObjectWriteOperation op;
7848 op.write_full(bl);
7849 ASSERT_EQ(0, ioctx.operate("foo", &op));
7850 }
7851
7852 // flush + write
7853 {
7854 ObjectReadOperation op;
7855 op.cache_flush();
7856 librados::AioCompletion *completion = cluster.aio_create_completion();
7857 ASSERT_EQ(0, cache_ioctx.aio_operate(
7858 "foo", completion, &op,
7859 librados::OPERATION_IGNORE_OVERLAY, NULL));
7860
7861 ObjectWriteOperation op2;
7862 op2.write_full(bl);
7863 librados::AioCompletion *completion2 = cluster.aio_create_completion();
7864 ASSERT_EQ(0, ioctx.aio_operate(
7865 "foo", completion2, &op2, 0));
7866
7867 completion->wait_for_complete();
7868 completion2->wait_for_complete();
7869 ASSERT_EQ(0, completion->get_return_value());
7870 ASSERT_EQ(0, completion2->get_return_value());
7871 completion->release();
7872 completion2->release();
7873 }
7874
7875 int tries = 1000;
7876 do {
7877 // create/dirty object
7878 {
7879 bufferlist bl;
7880 bl.append("hi there");
7881 ObjectWriteOperation op;
7882 op.write_full(bl);
7883 ASSERT_EQ(0, ioctx.operate("foo", &op));
7884 }
7885
7886 // try-flush + write
7887 {
7888 ObjectReadOperation op;
7889 op.cache_try_flush();
7890 librados::AioCompletion *completion = cluster.aio_create_completion();
7891 ASSERT_EQ(0, cache_ioctx.aio_operate(
7892 "foo", completion, &op,
7893 librados::OPERATION_IGNORE_OVERLAY |
7894 librados::OPERATION_SKIPRWLOCKS, NULL));
7895
7896 ObjectWriteOperation op2;
7897 op2.write_full(bl);
7898 librados::AioCompletion *completion2 = cluster.aio_create_completion();
7899 ASSERT_EQ(0, ioctx.aio_operate("foo", completion2, &op2, 0));
7900
7901 completion->wait_for_complete();
7902 completion2->wait_for_complete();
7903 int r = completion->get_return_value();
7904 ASSERT_TRUE(r == -EBUSY || r == 0);
7905 ASSERT_EQ(0, completion2->get_return_value());
7906 completion->release();
7907 completion2->release();
7908 if (r == -EBUSY)
7909 break;
7910 cout << "didn't get EBUSY, trying again" << std::endl;
7911 }
7912 ASSERT_TRUE(--tries);
7913 } while (true);
7914
7915 // tear down tiers
7916 ASSERT_EQ(0, cluster.mon_command(
7917 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
7918 "\"}",
7919 inbl, NULL, NULL));
7920 ASSERT_EQ(0, cluster.mon_command(
7921 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
7922 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
7923 inbl, NULL, NULL));
7924
7925 // wait for maps to settle before next test
7926 cluster.wait_for_latest_osdmap();
7927
7928 ASSERT_EQ(0, cluster.pool_delete(cache_pool_name.c_str()));
7929 ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
7930 }
7931
7932 TEST_F(LibRadosTwoPoolsECPP, FlushTryFlushRaces) {
7933 // configure cache
7934 bufferlist inbl;
7935 ASSERT_EQ(0, cluster.mon_command(
7936 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
7937 "\", \"tierpool\": \"" + cache_pool_name +
7938 "\", \"force_nonempty\": \"--force-nonempty\" }",
7939 inbl, NULL, NULL));
7940 ASSERT_EQ(0, cluster.mon_command(
7941 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
7942 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
7943 inbl, NULL, NULL));
7944 ASSERT_EQ(0, cluster.mon_command(
7945 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
7946 "\", \"mode\": \"writeback\"}",
7947 inbl, NULL, NULL));
7948
7949 // wait for maps to settle
7950 cluster.wait_for_latest_osdmap();
7951
7952 // create/dirty object
7953 {
7954 bufferlist bl;
7955 bl.append("hi there");
7956 ObjectWriteOperation op;
7957 op.write_full(bl);
7958 ASSERT_EQ(0, ioctx.operate("foo", &op));
7959 }
7960
7961 // flush + flush
7962 {
7963 ObjectReadOperation op;
7964 op.cache_flush();
7965 librados::AioCompletion *completion = cluster.aio_create_completion();
7966 ASSERT_EQ(0, cache_ioctx.aio_operate(
7967 "foo", completion, &op,
7968 librados::OPERATION_IGNORE_OVERLAY, NULL));
7969
7970 ObjectReadOperation op2;
7971 op2.cache_flush();
7972 librados::AioCompletion *completion2 = cluster.aio_create_completion();
7973 ASSERT_EQ(0, cache_ioctx.aio_operate(
7974 "foo", completion2, &op2,
7975 librados::OPERATION_IGNORE_OVERLAY, NULL));
7976
7977 completion->wait_for_complete();
7978 completion2->wait_for_complete();
7979 ASSERT_EQ(0, completion->get_return_value());
7980 ASSERT_EQ(0, completion2->get_return_value());
7981 completion->release();
7982 completion2->release();
7983 }
7984
7985 // create/dirty object
7986 {
7987 bufferlist bl;
7988 bl.append("hi there");
7989 ObjectWriteOperation op;
7990 op.write_full(bl);
7991 ASSERT_EQ(0, ioctx.operate("foo", &op));
7992 }
7993
7994 // flush + try-flush
7995 {
7996 ObjectReadOperation op;
7997 op.cache_flush();
7998 librados::AioCompletion *completion = cluster.aio_create_completion();
7999 ASSERT_EQ(0, cache_ioctx.aio_operate(
8000 "foo", completion, &op,
8001 librados::OPERATION_IGNORE_OVERLAY, NULL));
8002
8003 ObjectReadOperation op2;
8004 op2.cache_try_flush();
8005 librados::AioCompletion *completion2 = cluster.aio_create_completion();
8006 ASSERT_EQ(0, cache_ioctx.aio_operate(
8007 "foo", completion2, &op2,
8008 librados::OPERATION_IGNORE_OVERLAY |
8009 librados::OPERATION_SKIPRWLOCKS, NULL));
8010
8011 completion->wait_for_complete();
8012 completion2->wait_for_complete();
8013 ASSERT_EQ(0, completion->get_return_value());
8014 ASSERT_EQ(0, completion2->get_return_value());
8015 completion->release();
8016 completion2->release();
8017 }
8018
8019 // create/dirty object
8020 int tries = 1000;
8021 do {
8022 {
8023 bufferlist bl;
8024 bl.append("hi there");
8025 ObjectWriteOperation op;
8026 op.write_full(bl);
8027 ASSERT_EQ(0, ioctx.operate("foo", &op));
8028 }
8029
8030 // try-flush + flush
8031 // (flush will not piggyback on try-flush)
8032 {
8033 ObjectReadOperation op;
8034 op.cache_try_flush();
8035 librados::AioCompletion *completion = cluster.aio_create_completion();
8036 ASSERT_EQ(0, cache_ioctx.aio_operate(
8037 "foo", completion, &op,
8038 librados::OPERATION_IGNORE_OVERLAY |
8039 librados::OPERATION_SKIPRWLOCKS, NULL));
8040
8041 ObjectReadOperation op2;
8042 op2.cache_flush();
8043 librados::AioCompletion *completion2 = cluster.aio_create_completion();
8044 ASSERT_EQ(0, cache_ioctx.aio_operate(
8045 "foo", completion2, &op2,
8046 librados::OPERATION_IGNORE_OVERLAY, NULL));
8047
8048 completion->wait_for_complete();
8049 completion2->wait_for_complete();
8050 int r = completion->get_return_value();
8051 ASSERT_TRUE(r == -EBUSY || r == 0);
8052 ASSERT_EQ(0, completion2->get_return_value());
8053 completion->release();
8054 completion2->release();
8055 if (r == -EBUSY)
8056 break;
8057 cout << "didn't get EBUSY, trying again" << std::endl;
8058 }
8059 ASSERT_TRUE(--tries);
8060 } while (true);
8061
8062 // create/dirty object
8063 {
8064 bufferlist bl;
8065 bl.append("hi there");
8066 ObjectWriteOperation op;
8067 op.write_full(bl);
8068 ASSERT_EQ(0, ioctx.operate("foo", &op));
8069 }
8070
8071 // try-flush + try-flush
8072 {
8073 ObjectReadOperation op;
8074 op.cache_try_flush();
8075 librados::AioCompletion *completion = cluster.aio_create_completion();
8076 ASSERT_EQ(0, cache_ioctx.aio_operate(
8077 "foo", completion, &op,
8078 librados::OPERATION_IGNORE_OVERLAY |
8079 librados::OPERATION_SKIPRWLOCKS, NULL));
8080
8081 ObjectReadOperation op2;
8082 op2.cache_try_flush();
8083 librados::AioCompletion *completion2 = cluster.aio_create_completion();
8084 ASSERT_EQ(0, cache_ioctx.aio_operate(
8085 "foo", completion2, &op2,
8086 librados::OPERATION_IGNORE_OVERLAY |
8087 librados::OPERATION_SKIPRWLOCKS, NULL));
8088
8089 completion->wait_for_complete();
8090 completion2->wait_for_complete();
8091 ASSERT_EQ(0, completion->get_return_value());
8092 ASSERT_EQ(0, completion2->get_return_value());
8093 completion->release();
8094 completion2->release();
8095 }
8096 }
8097
8098 TEST_F(LibRadosTwoPoolsECPP, TryFlushReadRace) {
8099 // configure cache
8100 bufferlist inbl;
8101 ASSERT_EQ(0, cluster.mon_command(
8102 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
8103 "\", \"tierpool\": \"" + cache_pool_name +
8104 "\", \"force_nonempty\": \"--force-nonempty\" }",
8105 inbl, NULL, NULL));
8106 ASSERT_EQ(0, cluster.mon_command(
8107 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
8108 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
8109 inbl, NULL, NULL));
8110 ASSERT_EQ(0, cluster.mon_command(
8111 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
8112 "\", \"mode\": \"writeback\"}",
8113 inbl, NULL, NULL));
8114
8115 // wait for maps to settle
8116 cluster.wait_for_latest_osdmap();
8117
8118 // create/dirty object
8119 {
8120 bufferlist bl;
8121 bl.append("hi there");
8122 bufferptr bp(4000000); // make it big!
8123 bp.zero();
8124 bl.append(bp);
8125 ObjectWriteOperation op;
8126 op.write_full(bl);
8127 ASSERT_EQ(0, ioctx.operate("foo", &op));
8128 }
8129
8130 // start a continuous stream of reads
8131 read_ioctx = &ioctx;
8132 test_lock.lock();
8133 for (int i = 0; i < max_reads; ++i) {
8134 start_flush_read();
8135 num_reads++;
8136 }
8137 test_lock.unlock();
8138
8139 // try-flush
8140 ObjectReadOperation op;
8141 op.cache_try_flush();
8142 librados::AioCompletion *completion = cluster.aio_create_completion();
8143 ASSERT_EQ(0, cache_ioctx.aio_operate(
8144 "foo", completion, &op,
8145 librados::OPERATION_IGNORE_OVERLAY |
8146 librados::OPERATION_SKIPRWLOCKS, NULL));
8147
8148 completion->wait_for_complete();
8149 ASSERT_EQ(0, completion->get_return_value());
8150 completion->release();
8151
8152 // stop reads
8153 std::unique_lock locker{test_lock};
8154 max_reads = 0;
8155 cond.wait(locker, [] { return num_reads == 0;});
8156 }
8157
8158 TEST_F(LibRadosTierECPP, CallForcesPromote) {
8159 Rados cluster;
8160 std::string pool_name = get_temp_pool_name();
8161 std::string cache_pool_name = pool_name + "-cache";
8162 ASSERT_EQ("", create_one_ec_pool_pp(pool_name, cluster));
8163 ASSERT_EQ(0, cluster.pool_create(cache_pool_name.c_str()));
8164 IoCtx cache_ioctx;
8165 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
8166 cache_ioctx.application_enable("rados", true);
8167 IoCtx ioctx;
8168 ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), ioctx));
8169
8170 // configure cache
8171 bufferlist inbl;
8172 ASSERT_EQ(0, cluster.mon_command(
8173 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
8174 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
8175 inbl, NULL, NULL));
8176 ASSERT_EQ(0, cluster.mon_command(
8177 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
8178 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
8179 inbl, NULL, NULL));
8180 ASSERT_EQ(0, cluster.mon_command(
8181 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
8182 "\", \"mode\": \"writeback\"}",
8183 inbl, NULL, NULL));
8184
8185 // set things up such that the op would normally be proxied
8186 ASSERT_EQ(0, cluster.mon_command(
8187 set_pool_str(cache_pool_name, "hit_set_count", 2),
8188 inbl, NULL, NULL));
8189 ASSERT_EQ(0, cluster.mon_command(
8190 set_pool_str(cache_pool_name, "hit_set_period", 600),
8191 inbl, NULL, NULL));
8192 ASSERT_EQ(0, cluster.mon_command(
8193 set_pool_str(cache_pool_name, "hit_set_type",
8194 "explicit_object"),
8195 inbl, NULL, NULL));
8196 ASSERT_EQ(0, cluster.mon_command(
8197 set_pool_str(cache_pool_name, "min_read_recency_for_promote",
8198 "4"),
8199 inbl, NULL, NULL));
8200
8201 // wait for maps to settle
8202 cluster.wait_for_latest_osdmap();
8203
8204 // create/dirty object
8205 bufferlist bl;
8206 bl.append("hi there");
8207 {
8208 ObjectWriteOperation op;
8209 op.write_full(bl);
8210 ASSERT_EQ(0, ioctx.operate("foo", &op));
8211 }
8212
8213 // flush
8214 {
8215 ObjectReadOperation op;
8216 op.cache_flush();
8217 librados::AioCompletion *completion = cluster.aio_create_completion();
8218 ASSERT_EQ(0, cache_ioctx.aio_operate(
8219 "foo", completion, &op,
8220 librados::OPERATION_IGNORE_OVERLAY, NULL));
8221 completion->wait_for_complete();
8222 ASSERT_EQ(0, completion->get_return_value());
8223 completion->release();
8224 }
8225
8226 // evict
8227 {
8228 ObjectReadOperation op;
8229 op.cache_evict();
8230 librados::AioCompletion *completion = cluster.aio_create_completion();
8231 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
8232 librados::OPERATION_IGNORE_CACHE,
8233 NULL));
8234 completion->wait_for_complete();
8235 ASSERT_EQ(0, completion->get_return_value());
8236 completion->release();
8237 }
8238
8239 // call
8240 {
8241 ObjectReadOperation op;
8242 bufferlist bl;
8243 op.exec("rbd", "get_id", bl);
8244 bufferlist out;
8245 // should get EIO (not an rbd object), not -EOPNOTSUPP (we didn't promote)
8246 ASSERT_EQ(-5, ioctx.operate("foo", &op, &out));
8247 }
8248
8249 // make sure foo is back in the cache tier
8250 {
8251 NObjectIterator it = cache_ioctx.nobjects_begin();
8252 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
8253 ASSERT_TRUE(it->get_oid() == string("foo"));
8254 ++it;
8255 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
8256 }
8257
8258 // tear down tiers
8259 ASSERT_EQ(0, cluster.mon_command(
8260 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
8261 "\"}",
8262 inbl, NULL, NULL));
8263 ASSERT_EQ(0, cluster.mon_command(
8264 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
8265 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
8266 inbl, NULL, NULL));
8267
8268 // wait for maps to settle before next test
8269 cluster.wait_for_latest_osdmap();
8270
8271 ASSERT_EQ(0, cluster.pool_delete(cache_pool_name.c_str()));
8272 ASSERT_EQ(0, destroy_one_ec_pool_pp(pool_name, cluster));
8273 }
8274
8275 TEST_F(LibRadosTierECPP, HitSetNone) {
8276 {
8277 list< pair<time_t,time_t> > ls;
8278 AioCompletion *c = librados::Rados::aio_create_completion();
8279 ASSERT_EQ(0, ioctx.hit_set_list(123, c, &ls));
8280 c->wait_for_complete();
8281 ASSERT_EQ(0, c->get_return_value());
8282 ASSERT_TRUE(ls.empty());
8283 c->release();
8284 }
8285 {
8286 bufferlist bl;
8287 AioCompletion *c = librados::Rados::aio_create_completion();
8288 ASSERT_EQ(0, ioctx.hit_set_get(123, c, 12345, &bl));
8289 c->wait_for_complete();
8290 ASSERT_EQ(-ENOENT, c->get_return_value());
8291 c->release();
8292 }
8293 }
8294
8295 TEST_F(LibRadosTwoPoolsECPP, HitSetRead) {
8296 // make it a tier
8297 bufferlist inbl;
8298 ASSERT_EQ(0, cluster.mon_command(
8299 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
8300 "\", \"tierpool\": \"" + cache_pool_name +
8301 "\", \"force_nonempty\": \"--force-nonempty\" }",
8302 inbl, NULL, NULL));
8303
8304 // enable hitset tracking for this pool
8305 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_count", 2),
8306 inbl, NULL, NULL));
8307 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_period", 600),
8308 inbl, NULL, NULL));
8309 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_type",
8310 "explicit_object"),
8311 inbl, NULL, NULL));
8312
8313 // wait for maps to settle
8314 cluster.wait_for_latest_osdmap();
8315
8316 cache_ioctx.set_namespace("");
8317
8318 // keep reading until we see our object appear in the HitSet
8319 utime_t start = ceph_clock_now();
8320 utime_t hard_stop = start + utime_t(600, 0);
8321
8322 while (true) {
8323 utime_t now = ceph_clock_now();
8324 ASSERT_TRUE(now < hard_stop);
8325
8326 string name = "foo";
8327 uint32_t hash;
8328 ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(name, &hash));
8329 hobject_t oid(sobject_t(name, CEPH_NOSNAP), "", hash,
8330 cluster.pool_lookup(cache_pool_name.c_str()), "");
8331
8332 bufferlist bl;
8333 ASSERT_EQ(-ENOENT, cache_ioctx.read("foo", bl, 1, 0));
8334
8335 bufferlist hbl;
8336 AioCompletion *c = librados::Rados::aio_create_completion();
8337 ASSERT_EQ(0, cache_ioctx.hit_set_get(hash, c, now.sec(), &hbl));
8338 c->wait_for_complete();
8339 c->release();
8340
8341 if (hbl.length()) {
8342 auto p = hbl.cbegin();
8343 HitSet hs;
8344 decode(hs, p);
8345 if (hs.contains(oid)) {
8346 cout << "ok, hit_set contains " << oid << std::endl;
8347 break;
8348 }
8349 cout << "hmm, not in HitSet yet" << std::endl;
8350 } else {
8351 cout << "hmm, no HitSet yet" << std::endl;
8352 }
8353
8354 sleep(1);
8355 }
8356 }
8357
8358 // disable this test until hitset-get reliably works on EC pools
8359 #if 0
8360 TEST_F(LibRadosTierECPP, HitSetWrite) {
8361 int num_pg = _get_pg_num(cluster, pool_name);
8362 ceph_assert(num_pg > 0);
8363
8364 // enable hitset tracking for this pool
8365 bufferlist inbl;
8366 ASSERT_EQ(0, cluster.mon_command(set_pool_str(pool_name, "hit_set_count", 8),
8367 inbl, NULL, NULL));
8368 ASSERT_EQ(0, cluster.mon_command(set_pool_str(pool_name, "hit_set_period", 600),
8369 inbl, NULL, NULL));
8370 ASSERT_EQ(0, cluster.mon_command(set_pool_str(pool_name, "hit_set_type",
8371 "explicit_hash"),
8372 inbl, NULL, NULL));
8373
8374 // wait for maps to settle
8375 cluster.wait_for_latest_osdmap();
8376
8377 ioctx.set_namespace("");
8378
8379 // do a bunch of writes
8380 for (int i=0; i<1000; ++i) {
8381 bufferlist bl;
8382 bl.append("a");
8383 ASSERT_EQ(0, ioctx.write(stringify(i), bl, 1, 0));
8384 }
8385
8386 // get HitSets
8387 std::map<int,HitSet> hitsets;
8388 for (int i=0; i<num_pg; ++i) {
8389 list< pair<time_t,time_t> > ls;
8390 AioCompletion *c = librados::Rados::aio_create_completion();
8391 ASSERT_EQ(0, ioctx.hit_set_list(i, c, &ls));
8392 c->wait_for_complete();
8393 c->release();
8394 std::cout << "pg " << i << " ls " << ls << std::endl;
8395 ASSERT_FALSE(ls.empty());
8396
8397 // get the latest
8398 c = librados::Rados::aio_create_completion();
8399 bufferlist bl;
8400 ASSERT_EQ(0, ioctx.hit_set_get(i, c, ls.back().first, &bl));
8401 c->wait_for_complete();
8402 c->release();
8403
8404 //std::cout << "bl len is " << bl.length() << "\n";
8405 //bl.hexdump(std::cout);
8406 //std::cout << std::endl;
8407
8408 auto p = bl.cbegin();
8409 decode(hitsets[i], p);
8410
8411 // cope with racing splits by refreshing pg_num
8412 if (i == num_pg - 1)
8413 num_pg = _get_pg_num(cluster, pool_name);
8414 }
8415
8416 for (int i=0; i<1000; ++i) {
8417 string n = stringify(i);
8418 uint32_t hash = ioctx.get_object_hash_position(n);
8419 hobject_t oid(sobject_t(n, CEPH_NOSNAP), "", hash,
8420 cluster.pool_lookup(pool_name.c_str()), "");
8421 std::cout << "checking for " << oid << std::endl;
8422 bool found = false;
8423 for (int p=0; p<num_pg; ++p) {
8424 if (hitsets[p].contains(oid)) {
8425 found = true;
8426 break;
8427 }
8428 }
8429 ASSERT_TRUE(found);
8430 }
8431 }
8432 #endif
8433
8434 TEST_F(LibRadosTwoPoolsECPP, HitSetTrim) {
8435 unsigned count = 3;
8436 unsigned period = 3;
8437
8438 // make it a tier
8439 bufferlist inbl;
8440 ASSERT_EQ(0, cluster.mon_command(
8441 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
8442 "\", \"tierpool\": \"" + cache_pool_name +
8443 "\", \"force_nonempty\": \"--force-nonempty\" }",
8444 inbl, NULL, NULL));
8445
8446 // enable hitset tracking for this pool
8447 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_count", count),
8448 inbl, NULL, NULL));
8449 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_period", period),
8450 inbl, NULL, NULL));
8451 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
8452 inbl, NULL, NULL));
8453 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_fpp", ".01"),
8454 inbl, NULL, NULL));
8455
8456 // wait for maps to settle
8457 cluster.wait_for_latest_osdmap();
8458
8459 cache_ioctx.set_namespace("");
8460
8461 // do a bunch of writes and make sure the hitsets rotate
8462 utime_t start = ceph_clock_now();
8463 utime_t hard_stop = start + utime_t(count * period * 50, 0);
8464
8465 time_t first = 0;
8466 int bsize = alignment;
8467 char *buf = (char *)new char[bsize];
8468 memset(buf, 'f', bsize);
8469
8470 while (true) {
8471 string name = "foo";
8472 uint32_t hash;
8473 ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(name, &hash));
8474 hobject_t oid(sobject_t(name, CEPH_NOSNAP), "", hash, -1, "");
8475
8476 bufferlist bl;
8477 bl.append(buf, bsize);
8478 ASSERT_EQ(0, cache_ioctx.append("foo", bl, bsize));
8479
8480 list<pair<time_t, time_t> > ls;
8481 AioCompletion *c = librados::Rados::aio_create_completion();
8482 ASSERT_EQ(0, cache_ioctx.hit_set_list(hash, c, &ls));
8483 c->wait_for_complete();
8484 c->release();
8485
8486 cout << " got ls " << ls << std::endl;
8487 if (!ls.empty()) {
8488 if (!first) {
8489 first = ls.front().first;
8490 cout << "first is " << first << std::endl;
8491 } else {
8492 if (ls.front().first != first) {
8493 cout << "first now " << ls.front().first << ", trimmed" << std::endl;
8494 break;
8495 }
8496 }
8497 }
8498
8499 utime_t now = ceph_clock_now();
8500 ASSERT_TRUE(now < hard_stop);
8501
8502 sleep(1);
8503 }
8504 delete[] buf;
8505 }
8506
8507 TEST_F(LibRadosTwoPoolsECPP, PromoteOn2ndRead) {
8508 // create object
8509 for (int i=0; i<20; ++i) {
8510 bufferlist bl;
8511 bl.append("hi there");
8512 ObjectWriteOperation op;
8513 op.write_full(bl);
8514 ASSERT_EQ(0, ioctx.operate("foo" + stringify(i), &op));
8515 }
8516
8517 // configure cache
8518 bufferlist inbl;
8519 ASSERT_EQ(0, cluster.mon_command(
8520 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
8521 "\", \"tierpool\": \"" + cache_pool_name +
8522 "\", \"force_nonempty\": \"--force-nonempty\" }",
8523 inbl, NULL, NULL));
8524 ASSERT_EQ(0, cluster.mon_command(
8525 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
8526 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
8527 inbl, NULL, NULL));
8528 ASSERT_EQ(0, cluster.mon_command(
8529 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
8530 "\", \"mode\": \"writeback\"}",
8531 inbl, NULL, NULL));
8532
8533 // enable hitset tracking for this pool
8534 ASSERT_EQ(0, cluster.mon_command(
8535 set_pool_str(cache_pool_name, "hit_set_count", 2),
8536 inbl, NULL, NULL));
8537 ASSERT_EQ(0, cluster.mon_command(
8538 set_pool_str(cache_pool_name, "hit_set_period", 600),
8539 inbl, NULL, NULL));
8540 ASSERT_EQ(0, cluster.mon_command(
8541 set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
8542 inbl, NULL, NULL));
8543 ASSERT_EQ(0, cluster.mon_command(
8544 set_pool_str(cache_pool_name, "min_read_recency_for_promote", 1),
8545 inbl, NULL, NULL));
8546 ASSERT_EQ(0, cluster.mon_command(
8547 set_pool_str(cache_pool_name, "hit_set_grade_decay_rate", 20),
8548 inbl, NULL, NULL));
8549 ASSERT_EQ(0, cluster.mon_command(
8550 set_pool_str(cache_pool_name, "hit_set_search_last_n", 1),
8551 inbl, NULL, NULL));
8552
8553 // wait for maps to settle
8554 cluster.wait_for_latest_osdmap();
8555
8556 int fake = 0; // set this to non-zero to test spurious promotion,
8557 // e.g. from thrashing
8558 int attempt = 0;
8559 string obj;
8560 while (true) {
8561 // 1st read, don't trigger a promote
8562 obj = "foo" + stringify(attempt);
8563 cout << obj << std::endl;
8564 {
8565 bufferlist bl;
8566 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
8567 if (--fake >= 0) {
8568 sleep(1);
8569 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
8570 sleep(1);
8571 }
8572 }
8573
8574 // verify the object is NOT present in the cache tier
8575 {
8576 bool found = false;
8577 NObjectIterator it = cache_ioctx.nobjects_begin();
8578 while (it != cache_ioctx.nobjects_end()) {
8579 cout << " see " << it->get_oid() << std::endl;
8580 if (it->get_oid() == string(obj.c_str())) {
8581 found = true;
8582 break;
8583 }
8584 ++it;
8585 }
8586 if (!found)
8587 break;
8588 }
8589
8590 ++attempt;
8591 ASSERT_LE(attempt, 20);
8592 cout << "hrm, object is present in cache on attempt " << attempt
8593 << ", retrying" << std::endl;
8594 }
8595
8596 // Read until the object is present in the cache tier
8597 cout << "verifying " << obj << " is eventually promoted" << std::endl;
8598 while (true) {
8599 bufferlist bl;
8600 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
8601
8602 bool there = false;
8603 NObjectIterator it = cache_ioctx.nobjects_begin();
8604 while (it != cache_ioctx.nobjects_end()) {
8605 if (it->get_oid() == string(obj.c_str())) {
8606 there = true;
8607 break;
8608 }
8609 ++it;
8610 }
8611 if (there)
8612 break;
8613
8614 sleep(1);
8615 }
8616
8617 // tear down tiers
8618 ASSERT_EQ(0, cluster.mon_command(
8619 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
8620 "\"}",
8621 inbl, NULL, NULL));
8622 ASSERT_EQ(0, cluster.mon_command(
8623 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
8624 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
8625 inbl, NULL, NULL));
8626
8627 // wait for maps to settle before next test
8628 cluster.wait_for_latest_osdmap();
8629 }
8630
8631 TEST_F(LibRadosTwoPoolsECPP, ProxyRead) {
8632 // create object
8633 {
8634 bufferlist bl;
8635 bl.append("hi there");
8636 ObjectWriteOperation op;
8637 op.write_full(bl);
8638 ASSERT_EQ(0, ioctx.operate("foo", &op));
8639 }
8640
8641 // configure cache
8642 bufferlist inbl;
8643 ASSERT_EQ(0, cluster.mon_command(
8644 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
8645 "\", \"tierpool\": \"" + cache_pool_name +
8646 "\", \"force_nonempty\": \"--force-nonempty\" }",
8647 inbl, NULL, NULL));
8648 ASSERT_EQ(0, cluster.mon_command(
8649 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
8650 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
8651 inbl, NULL, NULL));
8652 ASSERT_EQ(0, cluster.mon_command(
8653 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
8654 "\", \"mode\": \"readproxy\"}",
8655 inbl, NULL, NULL));
8656
8657 // wait for maps to settle
8658 cluster.wait_for_latest_osdmap();
8659
8660 // read and verify the object
8661 {
8662 bufferlist bl;
8663 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
8664 ASSERT_EQ('h', bl[0]);
8665 }
8666
8667 // Verify 10 times the object is NOT present in the cache tier
8668 uint32_t i = 0;
8669 while (i++ < 10) {
8670 NObjectIterator it = cache_ioctx.nobjects_begin();
8671 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
8672 sleep(1);
8673 }
8674
8675 // tear down tiers
8676 ASSERT_EQ(0, cluster.mon_command(
8677 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
8678 "\"}",
8679 inbl, NULL, NULL));
8680 ASSERT_EQ(0, cluster.mon_command(
8681 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
8682 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
8683 inbl, NULL, NULL));
8684
8685 // wait for maps to settle before next test
8686 cluster.wait_for_latest_osdmap();
8687 }
8688
8689 TEST_F(LibRadosTwoPoolsECPP, CachePin) {
8690 // create object
8691 {
8692 bufferlist bl;
8693 bl.append("hi there");
8694 ObjectWriteOperation op;
8695 op.write_full(bl);
8696 ASSERT_EQ(0, ioctx.operate("foo", &op));
8697 }
8698 {
8699 bufferlist bl;
8700 bl.append("hi there");
8701 ObjectWriteOperation op;
8702 op.write_full(bl);
8703 ASSERT_EQ(0, ioctx.operate("bar", &op));
8704 }
8705 {
8706 bufferlist bl;
8707 bl.append("hi there");
8708 ObjectWriteOperation op;
8709 op.write_full(bl);
8710 ASSERT_EQ(0, ioctx.operate("baz", &op));
8711 }
8712 {
8713 bufferlist bl;
8714 bl.append("hi there");
8715 ObjectWriteOperation op;
8716 op.write_full(bl);
8717 ASSERT_EQ(0, ioctx.operate("bam", &op));
8718 }
8719
8720 // configure cache
8721 bufferlist inbl;
8722 ASSERT_EQ(0, cluster.mon_command(
8723 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
8724 "\", \"tierpool\": \"" + cache_pool_name +
8725 "\", \"force_nonempty\": \"--force-nonempty\" }",
8726 inbl, NULL, NULL));
8727 ASSERT_EQ(0, cluster.mon_command(
8728 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
8729 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
8730 inbl, NULL, NULL));
8731 ASSERT_EQ(0, cluster.mon_command(
8732 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
8733 "\", \"mode\": \"writeback\"}",
8734 inbl, NULL, NULL));
8735
8736 // wait for maps to settle
8737 cluster.wait_for_latest_osdmap();
8738
8739 // read, trigger promote
8740 {
8741 bufferlist bl;
8742 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
8743 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
8744 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
8745 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
8746 }
8747
8748 // verify the objects are present in the cache tier
8749 {
8750 NObjectIterator it = cache_ioctx.nobjects_begin();
8751 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
8752 for (uint32_t i = 0; i < 4; i++) {
8753 ASSERT_TRUE(it->get_oid() == string("foo") ||
8754 it->get_oid() == string("bar") ||
8755 it->get_oid() == string("baz") ||
8756 it->get_oid() == string("bam"));
8757 ++it;
8758 }
8759 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
8760 }
8761
8762 // pin objects
8763 {
8764 ObjectWriteOperation op;
8765 op.cache_pin();
8766 librados::AioCompletion *completion = cluster.aio_create_completion();
8767 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
8768 completion->wait_for_complete();
8769 ASSERT_EQ(0, completion->get_return_value());
8770 completion->release();
8771 }
8772 {
8773 ObjectWriteOperation op;
8774 op.cache_pin();
8775 librados::AioCompletion *completion = cluster.aio_create_completion();
8776 ASSERT_EQ(0, cache_ioctx.aio_operate("baz", completion, &op));
8777 completion->wait_for_complete();
8778 ASSERT_EQ(0, completion->get_return_value());
8779 completion->release();
8780 }
8781
8782 // enable agent
8783 ASSERT_EQ(0, cluster.mon_command(
8784 set_pool_str(cache_pool_name, "hit_set_count", 2),
8785 inbl, NULL, NULL));
8786 ASSERT_EQ(0, cluster.mon_command(
8787 set_pool_str(cache_pool_name, "hit_set_period", 600),
8788 inbl, NULL, NULL));
8789 ASSERT_EQ(0, cluster.mon_command(
8790 set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
8791 inbl, NULL, NULL));
8792 ASSERT_EQ(0, cluster.mon_command(
8793 set_pool_str(cache_pool_name, "min_read_recency_for_promote", 1),
8794 inbl, NULL, NULL));
8795 ASSERT_EQ(0, cluster.mon_command(
8796 set_pool_str(cache_pool_name, "target_max_objects", 1),
8797 inbl, NULL, NULL));
8798
8799 sleep(10);
8800
8801 // Verify the pinned object 'foo' is not flushed/evicted
8802 uint32_t count = 0;
8803 while (true) {
8804 bufferlist bl;
8805 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
8806
8807 count = 0;
8808 NObjectIterator it = cache_ioctx.nobjects_begin();
8809 while (it != cache_ioctx.nobjects_end()) {
8810 ASSERT_TRUE(it->get_oid() == string("foo") ||
8811 it->get_oid() == string("bar") ||
8812 it->get_oid() == string("baz") ||
8813 it->get_oid() == string("bam"));
8814 ++count;
8815 ++it;
8816 }
8817 if (count == 2) {
8818 ASSERT_TRUE(it->get_oid() == string("foo") ||
8819 it->get_oid() == string("baz"));
8820 break;
8821 }
8822
8823 sleep(1);
8824 }
8825
8826 // tear down tiers
8827 ASSERT_EQ(0, cluster.mon_command(
8828 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
8829 "\"}",
8830 inbl, NULL, NULL));
8831 ASSERT_EQ(0, cluster.mon_command(
8832 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
8833 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
8834 inbl, NULL, NULL));
8835
8836 // wait for maps to settle before next test
8837 cluster.wait_for_latest_osdmap();
8838 }
8839 TEST_F(LibRadosTwoPoolsECPP, SetRedirectRead) {
8840 // create object
8841 {
8842 bufferlist bl;
8843 bl.append("hi there");
8844 ObjectWriteOperation op;
8845 op.write_full(bl);
8846 ASSERT_EQ(0, ioctx.operate("foo", &op));
8847 }
8848 {
8849 bufferlist bl;
8850 bl.append("there");
8851 ObjectWriteOperation op;
8852 op.write_full(bl);
8853 ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
8854 }
8855
8856 // configure tier
8857 bufferlist inbl;
8858 ASSERT_EQ(0, cluster.mon_command(
8859 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
8860 "\", \"tierpool\": \"" + cache_pool_name +
8861 "\", \"force_nonempty\": \"--force-nonempty\" }",
8862 inbl, NULL, NULL));
8863
8864 // wait for maps to settle
8865 cluster.wait_for_latest_osdmap();
8866
8867 {
8868 ObjectWriteOperation op;
8869 op.set_redirect("bar", cache_ioctx, 0);
8870 librados::AioCompletion *completion = cluster.aio_create_completion();
8871 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
8872 completion->wait_for_complete();
8873 ASSERT_EQ(0, completion->get_return_value());
8874 completion->release();
8875 }
8876 // read and verify the object
8877 {
8878 bufferlist bl;
8879 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
8880 ASSERT_EQ('t', bl[0]);
8881 }
8882
8883 ASSERT_EQ(0, cluster.mon_command(
8884 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
8885 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
8886 inbl, NULL, NULL));
8887
8888 // wait for maps to settle before next test
8889 cluster.wait_for_latest_osdmap();
8890 }
8891
8892 TEST_F(LibRadosTwoPoolsECPP, SetChunkRead) {
8893 // note: require >= mimic
8894
8895 {
8896 bufferlist bl;
8897 bl.append("there hi");
8898 ObjectWriteOperation op;
8899 op.write_full(bl);
8900 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
8901 }
8902
8903 {
8904 bufferlist bl;
8905 bl.append("There hi");
8906 ObjectWriteOperation op;
8907 op.write_full(bl);
8908 ASSERT_EQ(0, ioctx.operate("bar", &op));
8909 }
8910
8911 // wait for maps to settle
8912 cluster.wait_for_latest_osdmap();
8913
8914 // set_chunk
8915 manifest_set_chunk(cluster, ioctx, cache_ioctx, 0, 4, "bar", "foo");
8916
8917 // promote
8918 {
8919 ObjectWriteOperation op;
8920 op.tier_promote();
8921 librados::AioCompletion *completion = cluster.aio_create_completion();
8922 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
8923 completion->wait_for_complete();
8924 ASSERT_EQ(0, completion->get_return_value());
8925 completion->release();
8926 }
8927
8928 // read and verify the object
8929 {
8930 bufferlist bl;
8931 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
8932 ASSERT_EQ('T', bl[0]);
8933 }
8934
8935 // wait for maps to settle before next test
8936 cluster.wait_for_latest_osdmap();
8937 }
8938
8939 TEST_F(LibRadosTwoPoolsECPP, ManifestPromoteRead) {
8940 // note: require >= mimic
8941
8942 // create object
8943 {
8944 bufferlist bl;
8945 bl.append("hiaa there");
8946 ObjectWriteOperation op;
8947 op.write_full(bl);
8948 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
8949 }
8950 {
8951 bufferlist bl;
8952 bl.append("base chunk");
8953 ObjectWriteOperation op;
8954 op.write_full(bl);
8955 ASSERT_EQ(0, cache_ioctx.operate("foo-chunk", &op));
8956 }
8957 {
8958 bufferlist bl;
8959 bl.append("HIaa there");
8960 ObjectWriteOperation op;
8961 op.write_full(bl);
8962 ASSERT_EQ(0, ioctx.operate("bar", &op));
8963 }
8964 {
8965 bufferlist bl;
8966 bl.append("BASE CHUNK");
8967 ObjectWriteOperation op;
8968 op.write_full(bl);
8969 ASSERT_EQ(0, ioctx.operate("bar-chunk", &op));
8970 }
8971
8972 // set-redirect
8973 {
8974 ObjectWriteOperation op;
8975 op.set_redirect("bar", ioctx, 0);
8976 librados::AioCompletion *completion = cluster.aio_create_completion();
8977 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
8978 completion->wait_for_complete();
8979 ASSERT_EQ(0, completion->get_return_value());
8980 completion->release();
8981 }
8982 // set-chunk
8983 manifest_set_chunk(cluster, ioctx, cache_ioctx, 0, 10, "bar-chunk", "foo-chunk");
8984 // promote
8985 {
8986 ObjectWriteOperation op;
8987 op.tier_promote();
8988 librados::AioCompletion *completion = cluster.aio_create_completion();
8989 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
8990 completion->wait_for_complete();
8991 ASSERT_EQ(0, completion->get_return_value());
8992 completion->release();
8993 }
8994 // read and verify the object (redirect)
8995 {
8996 bufferlist bl;
8997 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
8998 ASSERT_EQ('H', bl[0]);
8999 }
9000 // promote
9001 {
9002 ObjectWriteOperation op;
9003 op.tier_promote();
9004 librados::AioCompletion *completion = cluster.aio_create_completion();
9005 ASSERT_EQ(0, cache_ioctx.aio_operate("foo-chunk", completion, &op));
9006 completion->wait_for_complete();
9007 ASSERT_EQ(0, completion->get_return_value());
9008 completion->release();
9009 }
9010 // read and verify the object
9011 {
9012 bufferlist bl;
9013 ASSERT_EQ(1, cache_ioctx.read("foo-chunk", bl, 1, 0));
9014 ASSERT_EQ('B', bl[0]);
9015 }
9016
9017 // wait for maps to settle before next test
9018 cluster.wait_for_latest_osdmap();
9019 }
9020
9021 TEST_F(LibRadosTwoPoolsECPP, TrySetDedupTier) {
9022 // note: require >= mimic
9023
9024 bufferlist inbl;
9025 ASSERT_EQ(-EOPNOTSUPP, cluster.mon_command(
9026 set_pool_str(pool_name, "dedup_tier", cache_pool_name),
9027 inbl, NULL, NULL));
9028 }
9029
9030 TEST_F(LibRadosTwoPoolsPP, PropagateBaseTierError) {
9031 // write object to base tier
9032 bufferlist omap_bl;
9033 encode(static_cast<uint32_t>(0U), omap_bl);
9034
9035 ObjectWriteOperation op1;
9036 op1.omap_set({{"somekey", omap_bl}});
9037 ASSERT_EQ(0, ioctx.operate("propagate-base-tier-error", &op1));
9038
9039 // configure cache
9040 bufferlist inbl;
9041 ASSERT_EQ(0, cluster.mon_command(
9042 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
9043 "\", \"tierpool\": \"" + cache_pool_name +
9044 "\", \"force_nonempty\": \"--force-nonempty\" }",
9045 inbl, NULL, NULL));
9046 ASSERT_EQ(0, cluster.mon_command(
9047 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
9048 "\", \"mode\": \"writeback\"}",
9049 inbl, NULL, NULL));
9050 ASSERT_EQ(0, cluster.mon_command(
9051 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
9052 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
9053 inbl, NULL, NULL));
9054
9055 ASSERT_EQ(0, cluster.mon_command(
9056 set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
9057 inbl, NULL, NULL));
9058 ASSERT_EQ(0, cluster.mon_command(
9059 set_pool_str(cache_pool_name, "hit_set_count", 1),
9060 inbl, NULL, NULL));
9061 ASSERT_EQ(0, cluster.mon_command(
9062 set_pool_str(cache_pool_name, "hit_set_period", 600),
9063 inbl, NULL, NULL));
9064 ASSERT_EQ(0, cluster.mon_command(
9065 set_pool_str(cache_pool_name, "target_max_objects", 250),
9066 inbl, NULL, NULL));
9067
9068 // wait for maps to settle
9069 cluster.wait_for_latest_osdmap();
9070
9071 // guarded op should fail so expect error to propagate to cache tier
9072 bufferlist test_omap_bl;
9073 encode(static_cast<uint32_t>(1U), test_omap_bl);
9074
9075 ObjectWriteOperation op2;
9076 op2.omap_cmp({{"somekey", {test_omap_bl, CEPH_OSD_CMPXATTR_OP_EQ}}}, nullptr);
9077 op2.omap_set({{"somekey", test_omap_bl}});
9078
9079 ASSERT_EQ(-ECANCELED, ioctx.operate("propagate-base-tier-error", &op2));
9080 }
9081
9082 TEST_F(LibRadosTwoPoolsPP, HelloWriteReturn) {
9083 // configure cache
9084 bufferlist inbl;
9085 ASSERT_EQ(0, cluster.mon_command(
9086 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
9087 "\", \"tierpool\": \"" + cache_pool_name +
9088 "\", \"force_nonempty\": \"--force-nonempty\" }",
9089 inbl, NULL, NULL));
9090 ASSERT_EQ(0, cluster.mon_command(
9091 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
9092 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
9093 inbl, NULL, NULL));
9094 ASSERT_EQ(0, cluster.mon_command(
9095 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
9096 "\", \"mode\": \"writeback\"}",
9097 inbl, NULL, NULL));
9098
9099 // set things up such that the op would normally be proxied
9100 ASSERT_EQ(0, cluster.mon_command(
9101 set_pool_str(cache_pool_name, "hit_set_count", 2),
9102 inbl, NULL, NULL));
9103 ASSERT_EQ(0, cluster.mon_command(
9104 set_pool_str(cache_pool_name, "hit_set_period", 600),
9105 inbl, NULL, NULL));
9106 ASSERT_EQ(0, cluster.mon_command(
9107 set_pool_str(cache_pool_name, "hit_set_type",
9108 "explicit_object"),
9109 inbl, NULL, NULL));
9110 ASSERT_EQ(0, cluster.mon_command(
9111 set_pool_str(cache_pool_name, "min_read_recency_for_promote",
9112 "10000"),
9113 inbl, NULL, NULL));
9114
9115 // wait for maps to settle
9116 cluster.wait_for_latest_osdmap();
9117
9118 // this *will* return data due to the RETURNVEC flag
9119 {
9120 bufferlist in, out;
9121 int rval;
9122 ObjectWriteOperation o;
9123 o.exec("hello", "write_return_data", in, &out, &rval);
9124 librados::AioCompletion *completion = cluster.aio_create_completion();
9125 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &o,
9126 librados::OPERATION_RETURNVEC));
9127 completion->wait_for_complete();
9128 ASSERT_EQ(42, completion->get_return_value());
9129 ASSERT_EQ(42, rval);
9130 out.hexdump(std::cout);
9131 ASSERT_EQ("you might see this", std::string(out.c_str(), out.length()));
9132 }
9133
9134 // this will overflow because the return data is too big
9135 {
9136 bufferlist in, out;
9137 int rval;
9138 ObjectWriteOperation o;
9139 o.exec("hello", "write_too_much_return_data", in, &out, &rval);
9140 librados::AioCompletion *completion = cluster.aio_create_completion();
9141 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &o,
9142 librados::OPERATION_RETURNVEC));
9143 completion->wait_for_complete();
9144 ASSERT_EQ(-EOVERFLOW, completion->get_return_value());
9145 ASSERT_EQ(-EOVERFLOW, rval);
9146 ASSERT_EQ("", std::string(out.c_str(), out.length()));
9147 }
9148 }
9149
9150 TEST_F(LibRadosTwoPoolsPP, TierFlushDuringUnsetDedupTier) {
9151 // skip test if not yet octopus
9152 if (_get_required_osd_release(cluster) < "octopus") {
9153 cout << "cluster is not yet octopus, skipping test" << std::endl;
9154 return;
9155 }
9156
9157 bufferlist inbl;
9158
9159 // set dedup parameters without dedup_tier
9160 ASSERT_EQ(0, cluster.mon_command(
9161 set_pool_str(cache_pool_name, "fingerprint_algorithm", "sha1"),
9162 inbl, NULL, NULL));
9163 ASSERT_EQ(0, cluster.mon_command(
9164 set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"),
9165 inbl, NULL, NULL));
9166 ASSERT_EQ(0, cluster.mon_command(
9167 set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024),
9168 inbl, NULL, NULL));
9169
9170 // create object
9171 bufferlist gbl;
9172 {
9173 generate_buffer(1024*8, &gbl);
9174 ObjectWriteOperation op;
9175 op.write_full(gbl);
9176 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
9177 }
9178 {
9179 bufferlist bl;
9180 bl.append("there hiHI");
9181 ObjectWriteOperation op;
9182 op.write_full(bl);
9183 ASSERT_EQ(0, ioctx.operate("bar", &op));
9184 }
9185
9186 // wait for maps to settle
9187 cluster.wait_for_latest_osdmap();
9188
9189 // set-chunk to set manifest object
9190 {
9191 ObjectReadOperation op;
9192 op.set_chunk(0, 2, ioctx, "bar", 0, CEPH_OSD_OP_FLAG_WITH_REFERENCE);
9193 librados::AioCompletion *completion = cluster.aio_create_completion();
9194 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
9195 librados::OPERATION_IGNORE_CACHE, NULL));
9196 completion->wait_for_complete();
9197 ASSERT_EQ(0, completion->get_return_value());
9198 completion->release();
9199 }
9200
9201 // flush to check if proper error is returned
9202 {
9203 ObjectReadOperation op;
9204 op.tier_flush();
9205 librados::AioCompletion *completion = cluster.aio_create_completion();
9206 ASSERT_EQ(0, cache_ioctx.aio_operate(
9207 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
9208 completion->wait_for_complete();
9209 ASSERT_EQ(-EINVAL, completion->get_return_value());
9210 completion->release();
9211 }
9212 }
9213