]> git.proxmox.com Git - ceph.git/blame_incremental - ceph/src/test/librados/tier_cxx.cc
bump version to 19.2.0-pve1
[ceph.git] / ceph / src / test / librados / tier_cxx.cc
... / ...
CommitLineData
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3#include "gtest/gtest.h"
4
5#include "mds/mdstypes.h"
6#include "include/buffer.h"
7#include "include/rbd_types.h"
8#include "include/rados/librados.hpp"
9#include "include/stringify.h"
10#include "include/types.h"
11#include "global/global_context.h"
12#include "common/Cond.h"
13#include "common/ceph_crypto.h"
14#include "test/librados/test_cxx.h"
15#include "test/librados/testcase_cxx.h"
16#include "json_spirit/json_spirit.h"
17#include "cls/cas/cls_cas_ops.h"
18#include "cls/cas/cls_cas_internal.h"
19
20#include "osd/HitSet.h"
21
22#include <errno.h>
23#include <map>
24#include <sstream>
25#include <string>
26
27#include "cls/cas/cls_cas_client.h"
28#include "cls/cas/cls_cas_internal.h"
29#include "crimson_utils.h"
30
31using namespace std;
32using namespace librados;
33using ceph::crypto::SHA1;
34
35typedef RadosTestPP LibRadosTierPP;
36typedef RadosTestECPP LibRadosTierECPP;
37
38void flush_evict_all(librados::Rados& cluster, librados::IoCtx& cache_ioctx)
39{
40 bufferlist inbl;
41 cache_ioctx.set_namespace(all_nspaces);
42 for (NObjectIterator it = cache_ioctx.nobjects_begin();
43 it != cache_ioctx.nobjects_end(); ++it) {
44 cache_ioctx.locator_set_key(it->get_locator());
45 cache_ioctx.set_namespace(it->get_nspace());
46 {
47 ObjectReadOperation op;
48 op.cache_flush();
49 librados::AioCompletion *completion = cluster.aio_create_completion();
50 cache_ioctx.aio_operate(
51 it->get_oid(), completion, &op,
52 librados::OPERATION_IGNORE_OVERLAY, NULL);
53 completion->wait_for_complete();
54 completion->get_return_value();
55 completion->release();
56 }
57 {
58 ObjectReadOperation op;
59 op.cache_evict();
60 librados::AioCompletion *completion = cluster.aio_create_completion();
61 cache_ioctx.aio_operate(
62 it->get_oid(), completion, &op,
63 librados::OPERATION_IGNORE_OVERLAY, NULL);
64 completion->wait_for_complete();
65 completion->get_return_value();
66 completion->release();
67 }
68 }
69}
70
71static string _get_required_osd_release(Rados& cluster)
72{
73 bufferlist inbl;
74 string cmd = string("{\"prefix\": \"osd dump\",\"format\":\"json\"}");
75 bufferlist outbl;
76 int r = cluster.mon_command(cmd, inbl, &outbl, NULL);
77 ceph_assert(r >= 0);
78 string outstr(outbl.c_str(), outbl.length());
79 json_spirit::Value v;
80 if (!json_spirit::read(outstr, v)) {
81 cerr <<" unable to parse json " << outstr << std::endl;
82 return "";
83 }
84
85 json_spirit::Object& o = v.get_obj();
86 for (json_spirit::Object::size_type i=0; i<o.size(); i++) {
87 json_spirit::Pair& p = o[i];
88 if (p.name_ == "require_osd_release") {
89 cout << "require_osd_release = " << p.value_.get_str() << std::endl;
90 return p.value_.get_str();
91 }
92 }
93 cerr << "didn't find require_osd_release in " << outstr << std::endl;
94 return "";
95}
96
97void manifest_set_chunk(Rados& cluster, librados::IoCtx& src_ioctx,
98 librados::IoCtx& tgt_ioctx,
99 uint64_t src_offset, uint64_t length,
100 std::string src_oid, std::string tgt_oid)
101{
102 ObjectReadOperation op;
103 op.set_chunk(src_offset, length, src_ioctx, src_oid, 0,
104 CEPH_OSD_OP_FLAG_WITH_REFERENCE);
105 librados::AioCompletion *completion = cluster.aio_create_completion();
106 ASSERT_EQ(0, tgt_ioctx.aio_operate(tgt_oid, completion, &op,
107 librados::OPERATION_IGNORE_CACHE, NULL));
108 completion->wait_for_complete();
109 ASSERT_EQ(0, completion->get_return_value());
110 completion->release();
111}
112
113static inline void buf_to_hex(const unsigned char *buf, int len, char *str)
114{
115 int i;
116 str[0] = '\0';
117 for (i = 0; i < len; i++) {
118 sprintf(&str[i*2], "%02x", (int)buf[i]);
119 }
120}
121
122void check_fp_oid_refcount(librados::IoCtx& ioctx, std::string foid, uint64_t count,
123 std::string fp_algo = NULL)
124{
125 bufferlist t;
126 int size = foid.length();
127 if (fp_algo == "sha1") {
128 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
129 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
130 SHA1 sha1_gen;
131 sha1_gen.Update((const unsigned char *)foid.c_str(), size);
132 sha1_gen.Final(fingerprint);
133 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
134 ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
135 } else if (fp_algo.empty()) {
136 ioctx.getxattr(foid, CHUNK_REFCOUNT_ATTR, t);
137 } else if (!fp_algo.empty()) {
138 ceph_assert(0 == "unrecognized fingerprint algorithm");
139 }
140
141 chunk_refs_t refs;
142 try {
143 auto iter = t.cbegin();
144 decode(refs, iter);
145 } catch (buffer::error& err) {
146 ASSERT_TRUE(0);
147 }
148 ASSERT_LE(count, refs.count());
149}
150
151string get_fp_oid(string oid, std::string fp_algo = NULL)
152{
153 if (fp_algo == "sha1") {
154 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
155 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
156 SHA1 sha1_gen;
157 int size = oid.length();
158 sha1_gen.Update((const unsigned char *)oid.c_str(), size);
159 sha1_gen.Final(fingerprint);
160 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
161 return string(p_str);
162 }
163
164 return string();
165}
166
167void is_intended_refcount_state(librados::IoCtx& src_ioctx,
168 std::string src_oid,
169 librados::IoCtx& dst_ioctx,
170 std::string dst_oid,
171 int expected_refcount)
172{
173 int src_refcount = 0, dst_refcount = 0;
174 bufferlist t;
175 int r = dst_ioctx.getxattr(dst_oid, CHUNK_REFCOUNT_ATTR, t);
176 if (r == -ENOENT) {
177 dst_refcount = 0;
178 } else {
179 chunk_refs_t refs;
180 try {
181 auto iter = t.cbegin();
182 decode(refs, iter);
183 } catch (buffer::error& err) {
184 ceph_assert(0);
185 }
186 dst_refcount = refs.count();
187 }
188 int tries = 0;
189 for (; tries < 30; ++tries) {
190 r = cls_cas_references_chunk(src_ioctx, src_oid, dst_oid);
191 if (r == -ENOENT || r == -ENOLINK) {
192 src_refcount = 0;
193 } else if (r == -EBUSY) {
194 sleep(20);
195 continue;
196 } else {
197 src_refcount = r;
198 }
199 break;
200 }
201 ASSERT_TRUE(tries < 30);
202 ASSERT_TRUE(src_refcount >= 0);
203 ASSERT_TRUE(src_refcount == expected_refcount);
204 ASSERT_TRUE(src_refcount <= dst_refcount);
205}
206
207class LibRadosTwoPoolsPP : public RadosTestPP
208{
209public:
210 LibRadosTwoPoolsPP() {};
211 ~LibRadosTwoPoolsPP() override {};
212protected:
213 static void SetUpTestCase() {
214 pool_name = get_temp_pool_name();
215 ASSERT_EQ("", create_one_pool_pp(pool_name, s_cluster));
216 }
217 static void TearDownTestCase() {
218 ASSERT_EQ(0, destroy_one_pool_pp(pool_name, s_cluster));
219 }
220 static std::string cache_pool_name;
221
222 void SetUp() override {
223 SKIP_IF_CRIMSON();
224 cache_pool_name = get_temp_pool_name();
225 ASSERT_EQ(0, s_cluster.pool_create(cache_pool_name.c_str()));
226 RadosTestPP::SetUp();
227
228 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
229 cache_ioctx.application_enable("rados", true);
230 cache_ioctx.set_namespace(nspace);
231 }
232 void TearDown() override {
233 SKIP_IF_CRIMSON();
234 // flush + evict cache
235 flush_evict_all(cluster, cache_ioctx);
236
237 bufferlist inbl;
238 // tear down tiers
239 ASSERT_EQ(0, cluster.mon_command(
240 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
241 "\"}",
242 inbl, NULL, NULL));
243 ASSERT_EQ(0, cluster.mon_command(
244 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
245 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
246 inbl, NULL, NULL));
247
248 // wait for maps to settle before next test
249 cluster.wait_for_latest_osdmap();
250
251 RadosTestPP::TearDown();
252
253 cleanup_default_namespace(cache_ioctx);
254 cleanup_namespace(cache_ioctx, nspace);
255
256 cache_ioctx.close();
257 ASSERT_EQ(0, s_cluster.pool_delete(cache_pool_name.c_str()));
258 }
259 librados::IoCtx cache_ioctx;
260};
261
262class Completions
263{
264public:
265 Completions() = default;
266 librados::AioCompletion* getCompletion() {
267 librados::AioCompletion* comp = librados::Rados::aio_create_completion();
268 m_completions.push_back(comp);
269 return comp;
270 }
271
272 ~Completions() {
273 for (auto& comp : m_completions) {
274 comp->release();
275 }
276 }
277
278private:
279 vector<librados::AioCompletion *> m_completions;
280};
281
282Completions completions;
283
284std::string LibRadosTwoPoolsPP::cache_pool_name;
285
286TEST_F(LibRadosTierPP, Dirty) {
287 SKIP_IF_CRIMSON();
288 {
289 ObjectWriteOperation op;
290 op.undirty();
291 ASSERT_EQ(0, ioctx.operate("foo", &op)); // still get 0 if it dne
292 }
293 {
294 ObjectWriteOperation op;
295 op.create(true);
296 ASSERT_EQ(0, ioctx.operate("foo", &op));
297 }
298 {
299 bool dirty = false;
300 int r = -1;
301 ObjectReadOperation op;
302 op.is_dirty(&dirty, &r);
303 ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
304 ASSERT_TRUE(dirty);
305 ASSERT_EQ(0, r);
306 }
307 {
308 ObjectWriteOperation op;
309 op.undirty();
310 ASSERT_EQ(0, ioctx.operate("foo", &op));
311 }
312 {
313 ObjectWriteOperation op;
314 op.undirty();
315 ASSERT_EQ(0, ioctx.operate("foo", &op)); // still 0 if already clean
316 }
317 {
318 bool dirty = false;
319 int r = -1;
320 ObjectReadOperation op;
321 op.is_dirty(&dirty, &r);
322 ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
323 ASSERT_FALSE(dirty);
324 ASSERT_EQ(0, r);
325 }
326 {
327 ObjectWriteOperation op;
328 op.truncate(0); // still a write even tho it is a no-op
329 ASSERT_EQ(0, ioctx.operate("foo", &op));
330 }
331 {
332 bool dirty = false;
333 int r = -1;
334 ObjectReadOperation op;
335 op.is_dirty(&dirty, &r);
336 ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
337 ASSERT_TRUE(dirty);
338 ASSERT_EQ(0, r);
339 }
340}
341
342TEST_F(LibRadosTwoPoolsPP, Overlay) {
343 SKIP_IF_CRIMSON();
344 // create objects
345 {
346 bufferlist bl;
347 bl.append("base");
348 ObjectWriteOperation op;
349 op.write_full(bl);
350 ASSERT_EQ(0, ioctx.operate("foo", &op));
351 }
352 {
353 bufferlist bl;
354 bl.append("cache");
355 ObjectWriteOperation op;
356 op.write_full(bl);
357 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
358 }
359
360 // configure cache
361 bufferlist inbl;
362 ASSERT_EQ(0, cluster.mon_command(
363 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
364 "\", \"tierpool\": \"" + cache_pool_name +
365 "\", \"force_nonempty\": \"--force-nonempty\" }",
366 inbl, NULL, NULL));
367 ASSERT_EQ(0, cluster.mon_command(
368 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
369 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
370 inbl, NULL, NULL));
371
372 // wait for maps to settle
373 cluster.wait_for_latest_osdmap();
374
375 // by default, the overlay sends us to cache pool
376 {
377 bufferlist bl;
378 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
379 ASSERT_EQ('c', bl[0]);
380 }
381 {
382 bufferlist bl;
383 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
384 ASSERT_EQ('c', bl[0]);
385 }
386
387 // unless we say otherwise
388 {
389 bufferlist bl;
390 ObjectReadOperation op;
391 op.read(0, 1, &bl, NULL);
392 librados::AioCompletion *completion = cluster.aio_create_completion();
393 ASSERT_EQ(0, ioctx.aio_operate(
394 "foo", completion, &op,
395 librados::OPERATION_IGNORE_OVERLAY, NULL));
396 completion->wait_for_complete();
397 ASSERT_EQ(0, completion->get_return_value());
398 completion->release();
399 ASSERT_EQ('b', bl[0]);
400 }
401}
402
403TEST_F(LibRadosTwoPoolsPP, Promote) {
404 SKIP_IF_CRIMSON();
405 // create object
406 {
407 bufferlist bl;
408 bl.append("hi there");
409 ObjectWriteOperation op;
410 op.write_full(bl);
411 ASSERT_EQ(0, ioctx.operate("foo", &op));
412 }
413
414 // configure cache
415 bufferlist inbl;
416 ASSERT_EQ(0, cluster.mon_command(
417 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
418 "\", \"tierpool\": \"" + cache_pool_name +
419 "\", \"force_nonempty\": \"--force-nonempty\" }",
420 inbl, NULL, NULL));
421 ASSERT_EQ(0, cluster.mon_command(
422 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
423 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
424 inbl, NULL, NULL));
425 ASSERT_EQ(0, cluster.mon_command(
426 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
427 "\", \"mode\": \"writeback\"}",
428 inbl, NULL, NULL));
429
430 // wait for maps to settle
431 cluster.wait_for_latest_osdmap();
432
433 // read, trigger a promote
434 {
435 bufferlist bl;
436 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
437 }
438
439 // read, trigger a whiteout
440 {
441 bufferlist bl;
442 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
443 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
444 }
445
446 // verify the object is present in the cache tier
447 {
448 NObjectIterator it = cache_ioctx.nobjects_begin();
449 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
450 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
451 ++it;
452 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
453 ++it;
454 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
455 }
456}
457
458TEST_F(LibRadosTwoPoolsPP, PromoteSnap) {
459 SKIP_IF_CRIMSON();
460 // create object
461 {
462 bufferlist bl;
463 bl.append("hi there");
464 ObjectWriteOperation op;
465 op.write_full(bl);
466 ASSERT_EQ(0, ioctx.operate("foo", &op));
467 }
468 {
469 bufferlist bl;
470 bl.append("hi there");
471 ObjectWriteOperation op;
472 op.write_full(bl);
473 ASSERT_EQ(0, ioctx.operate("bar", &op));
474 }
475 {
476 bufferlist bl;
477 bl.append("hi there");
478 ObjectWriteOperation op;
479 op.write_full(bl);
480 ASSERT_EQ(0, ioctx.operate("baz", &op));
481 }
482 {
483 bufferlist bl;
484 bl.append("hi there");
485 ObjectWriteOperation op;
486 op.write_full(bl);
487 ASSERT_EQ(0, ioctx.operate("bam", &op));
488 }
489
490 // create a snapshot, clone
491 vector<uint64_t> my_snaps(1);
492 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
493 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
494 my_snaps));
495 {
496 bufferlist bl;
497 bl.append("ciao!");
498 ObjectWriteOperation op;
499 op.write_full(bl);
500 ASSERT_EQ(0, ioctx.operate("foo", &op));
501 }
502 {
503 bufferlist bl;
504 bl.append("ciao!");
505 ObjectWriteOperation op;
506 op.write_full(bl);
507 ASSERT_EQ(0, ioctx.operate("bar", &op));
508 }
509 {
510 ObjectWriteOperation op;
511 op.remove();
512 ASSERT_EQ(0, ioctx.operate("baz", &op));
513 }
514 {
515 bufferlist bl;
516 bl.append("ciao!");
517 ObjectWriteOperation op;
518 op.write_full(bl);
519 ASSERT_EQ(0, ioctx.operate("bam", &op));
520 }
521
522 // configure cache
523 bufferlist inbl;
524 ASSERT_EQ(0, cluster.mon_command(
525 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
526 "\", \"tierpool\": \"" + cache_pool_name +
527 "\", \"force_nonempty\": \"--force-nonempty\" }",
528 inbl, NULL, NULL));
529 ASSERT_EQ(0, cluster.mon_command(
530 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
531 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
532 inbl, NULL, NULL));
533 ASSERT_EQ(0, cluster.mon_command(
534 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
535 "\", \"mode\": \"writeback\"}",
536 inbl, NULL, NULL));
537
538 // wait for maps to settle
539 cluster.wait_for_latest_osdmap();
540
541 // read, trigger a promote on the head
542 {
543 bufferlist bl;
544 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
545 ASSERT_EQ('c', bl[0]);
546 }
547 {
548 bufferlist bl;
549 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
550 ASSERT_EQ('c', bl[0]);
551 }
552
553 ioctx.snap_set_read(my_snaps[0]);
554
555 // read foo snap
556 {
557 bufferlist bl;
558 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
559 ASSERT_EQ('h', bl[0]);
560 }
561
562 // read bar snap
563 {
564 bufferlist bl;
565 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
566 ASSERT_EQ('h', bl[0]);
567 }
568
569 // read baz snap
570 {
571 bufferlist bl;
572 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
573 ASSERT_EQ('h', bl[0]);
574 }
575
576 ioctx.snap_set_read(librados::SNAP_HEAD);
577
578 // read foo
579 {
580 bufferlist bl;
581 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
582 ASSERT_EQ('c', bl[0]);
583 }
584
585 // read bar
586 {
587 bufferlist bl;
588 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
589 ASSERT_EQ('c', bl[0]);
590 }
591
592 // read baz
593 {
594 bufferlist bl;
595 ASSERT_EQ(-ENOENT, ioctx.read("baz", bl, 1, 0));
596 }
597
598 // cleanup
599 ioctx.selfmanaged_snap_remove(my_snaps[0]);
600}
601
602TEST_F(LibRadosTwoPoolsPP, PromoteSnapScrub) {
603 SKIP_IF_CRIMSON();
604 int num = 100;
605
606 // create objects
607 for (int i=0; i<num; ++i) {
608 bufferlist bl;
609 bl.append("hi there");
610 ObjectWriteOperation op;
611 op.write_full(bl);
612 ASSERT_EQ(0, ioctx.operate(string("foo") + stringify(i), &op));
613 }
614
615 vector<uint64_t> my_snaps;
616 for (int snap=0; snap<4; ++snap) {
617 // create a snapshot, clone
618 vector<uint64_t> ns(1);
619 ns.insert(ns.end(), my_snaps.begin(), my_snaps.end());
620 my_snaps.swap(ns);
621 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
622 cout << "my_snaps " << my_snaps << std::endl;
623 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
624 my_snaps));
625 for (int i=0; i<num; ++i) {
626 bufferlist bl;
627 bl.append(string("ciao! snap") + stringify(snap));
628 ObjectWriteOperation op;
629 op.write_full(bl);
630 ASSERT_EQ(0, ioctx.operate(string("foo") + stringify(i), &op));
631 }
632 }
633
634 // configure cache
635 bufferlist inbl;
636 ASSERT_EQ(0, cluster.mon_command(
637 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
638 "\", \"tierpool\": \"" + cache_pool_name +
639 "\", \"force_nonempty\": \"--force-nonempty\" }",
640 inbl, NULL, NULL));
641 ASSERT_EQ(0, cluster.mon_command(
642 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
643 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
644 inbl, NULL, NULL));
645 ASSERT_EQ(0, cluster.mon_command(
646 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
647 "\", \"mode\": \"writeback\"}",
648 inbl, NULL, NULL));
649
650 // wait for maps to settle
651 cluster.wait_for_latest_osdmap();
652
653 // read, trigger a promote on _some_ heads to make sure we handle cases
654 // where snaps are present and where they are not.
655 cout << "promoting some heads" << std::endl;
656 for (int i=0; i<num; ++i) {
657 if (i % 5 == 0 || i > num - 3) {
658 bufferlist bl;
659 ASSERT_EQ(1, ioctx.read(string("foo") + stringify(i), bl, 1, 0));
660 ASSERT_EQ('c', bl[0]);
661 }
662 }
663
664 for (unsigned snap = 0; snap < my_snaps.size(); ++snap) {
665 cout << "promoting from clones for snap " << my_snaps[snap] << std::endl;
666 ioctx.snap_set_read(my_snaps[snap]);
667
668 // read some snaps, semi-randomly
669 for (int i=0; i<50; ++i) {
670 bufferlist bl;
671 string o = string("foo") + stringify((snap * i * 137) % 80);
672 //cout << o << std::endl;
673 ASSERT_EQ(1, ioctx.read(o, bl, 1, 0));
674 }
675 }
676
677 // ok, stop and scrub this pool (to make sure scrub can handle
678 // missing clones in the cache tier).
679 {
680 IoCtx cache_ioctx;
681 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
682 for (int i=0; i<10; ++i) {
683 do {
684 ostringstream ss;
685 ss << "{\"prefix\": \"pg scrub\", \"pgid\": \""
686 << cache_ioctx.get_id() << "." << i
687 << "\"}";
688 int r = cluster.mon_command(ss.str(), inbl, NULL, NULL);
689 if (r == -ENOENT || // in case mgr osdmap is stale
690 r == -EAGAIN) {
691 sleep(5);
692 continue;
693 }
694 } while (false);
695 }
696
697 // give it a few seconds to go. this is sloppy but is usually enough time
698 cout << "waiting for scrubs..." << std::endl;
699 sleep(30);
700 cout << "done waiting" << std::endl;
701 }
702
703 ioctx.snap_set_read(librados::SNAP_HEAD);
704
705 //cleanup
706 for (unsigned snap = 0; snap < my_snaps.size(); ++snap) {
707 ioctx.selfmanaged_snap_remove(my_snaps[snap]);
708 }
709}
710
711TEST_F(LibRadosTwoPoolsPP, PromoteSnapTrimRace) {
712 SKIP_IF_CRIMSON();
713 // create object
714 {
715 bufferlist bl;
716 bl.append("hi there");
717 ObjectWriteOperation op;
718 op.write_full(bl);
719 ASSERT_EQ(0, ioctx.operate("foo", &op));
720 }
721
722 // create a snapshot, clone
723 vector<uint64_t> my_snaps(1);
724 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
725 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
726 my_snaps));
727 {
728 bufferlist bl;
729 bl.append("ciao!");
730 ObjectWriteOperation op;
731 op.write_full(bl);
732 ASSERT_EQ(0, ioctx.operate("foo", &op));
733 }
734
735 // configure cache
736 bufferlist inbl;
737 ASSERT_EQ(0, cluster.mon_command(
738 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
739 "\", \"tierpool\": \"" + cache_pool_name +
740 "\", \"force_nonempty\": \"--force-nonempty\" }",
741 inbl, NULL, NULL));
742 ASSERT_EQ(0, cluster.mon_command(
743 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
744 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
745 inbl, NULL, NULL));
746 ASSERT_EQ(0, cluster.mon_command(
747 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
748 "\", \"mode\": \"writeback\"}",
749 inbl, NULL, NULL));
750
751 // wait for maps to settle
752 cluster.wait_for_latest_osdmap();
753
754 // delete the snap
755 ASSERT_EQ(0, ioctx.selfmanaged_snap_remove(my_snaps[0]));
756
757 ioctx.snap_set_read(my_snaps[0]);
758
759 // read foo snap. the OSD may or may not realize that this snap has
760 // been logically deleted; either response is valid.
761 {
762 bufferlist bl;
763 int r = ioctx.read("foo", bl, 1, 0);
764 ASSERT_TRUE(r == 1 || r == -ENOENT);
765 }
766
767 // cleanup
768 ioctx.selfmanaged_snap_remove(my_snaps[0]);
769}
770
771TEST_F(LibRadosTwoPoolsPP, Whiteout) {
772 SKIP_IF_CRIMSON();
773 // create object
774 {
775 bufferlist bl;
776 bl.append("hi there");
777 ObjectWriteOperation op;
778 op.write_full(bl);
779 ASSERT_EQ(0, ioctx.operate("foo", &op));
780 }
781
782 // configure cache
783 bufferlist inbl;
784 ASSERT_EQ(0, cluster.mon_command(
785 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
786 "\", \"tierpool\": \"" + cache_pool_name +
787 "\", \"force_nonempty\": \"--force-nonempty\" }",
788 inbl, NULL, NULL));
789 ASSERT_EQ(0, cluster.mon_command(
790 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
791 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
792 inbl, NULL, NULL));
793 ASSERT_EQ(0, cluster.mon_command(
794 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
795 "\", \"mode\": \"writeback\"}",
796 inbl, NULL, NULL));
797
798 // wait for maps to settle
799 cluster.wait_for_latest_osdmap();
800
801 // create some whiteouts, verify they behave
802 {
803 ObjectWriteOperation op;
804 op.assert_exists();
805 op.remove();
806 ASSERT_EQ(0, ioctx.operate("foo", &op));
807 }
808
809 {
810 ObjectWriteOperation op;
811 op.assert_exists();
812 op.remove();
813 ASSERT_EQ(-ENOENT, ioctx.operate("bar", &op));
814 }
815 {
816 ObjectWriteOperation op;
817 op.assert_exists();
818 op.remove();
819 ASSERT_EQ(-ENOENT, ioctx.operate("bar", &op));
820 }
821
822 // verify the whiteouts are there in the cache tier
823 {
824 NObjectIterator it = cache_ioctx.nobjects_begin();
825 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
826 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
827 ++it;
828 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
829 ++it;
830 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
831 }
832
833 // delete a whiteout and verify it goes away
834 ASSERT_EQ(-ENOENT, ioctx.remove("foo"));
835 {
836 ObjectWriteOperation op;
837 op.remove();
838 librados::AioCompletion *completion = cluster.aio_create_completion();
839 ASSERT_EQ(0, ioctx.aio_operate("bar", completion, &op,
840 librados::OPERATION_IGNORE_CACHE));
841 completion->wait_for_complete();
842 ASSERT_EQ(0, completion->get_return_value());
843 completion->release();
844
845 NObjectIterator it = cache_ioctx.nobjects_begin();
846 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
847 ASSERT_TRUE(it->get_oid() == string("foo"));
848 ++it;
849 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
850 }
851
852 // recreate an object and verify we can read it
853 {
854 bufferlist bl;
855 bl.append("hi there");
856 ObjectWriteOperation op;
857 op.write_full(bl);
858 ASSERT_EQ(0, ioctx.operate("foo", &op));
859 }
860 {
861 bufferlist bl;
862 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
863 ASSERT_EQ('h', bl[0]);
864 }
865}
866
867TEST_F(LibRadosTwoPoolsPP, WhiteoutDeleteCreate) {
868 SKIP_IF_CRIMSON();
869 // configure cache
870 bufferlist inbl;
871 ASSERT_EQ(0, cluster.mon_command(
872 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
873 "\", \"tierpool\": \"" + cache_pool_name +
874 "\", \"force_nonempty\": \"--force-nonempty\" }",
875 inbl, NULL, NULL));
876 ASSERT_EQ(0, cluster.mon_command(
877 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
878 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
879 inbl, NULL, NULL));
880 ASSERT_EQ(0, cluster.mon_command(
881 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
882 "\", \"mode\": \"writeback\"}",
883 inbl, NULL, NULL));
884
885 // wait for maps to settle
886 cluster.wait_for_latest_osdmap();
887
888 // create an object
889 {
890 bufferlist bl;
891 bl.append("foo");
892 ASSERT_EQ(0, ioctx.write_full("foo", bl));
893 }
894
895 // do delete + create operation
896 {
897 ObjectWriteOperation op;
898 op.remove();
899 bufferlist bl;
900 bl.append("bar");
901 op.write_full(bl);
902 ASSERT_EQ(0, ioctx.operate("foo", &op));
903 }
904
905 // verify it still "exists" (w/ new content)
906 {
907 bufferlist bl;
908 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
909 ASSERT_EQ('b', bl[0]);
910 }
911}
912
913TEST_F(LibRadosTwoPoolsPP, Evict) {
914 SKIP_IF_CRIMSON();
915 // create object
916 {
917 bufferlist bl;
918 bl.append("hi there");
919 ObjectWriteOperation op;
920 op.write_full(bl);
921 ASSERT_EQ(0, ioctx.operate("foo", &op));
922 }
923
924 // configure cache
925 bufferlist inbl;
926 ASSERT_EQ(0, cluster.mon_command(
927 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
928 "\", \"tierpool\": \"" + cache_pool_name +
929 "\", \"force_nonempty\": \"--force-nonempty\" }",
930 inbl, NULL, NULL));
931 ASSERT_EQ(0, cluster.mon_command(
932 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
933 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
934 inbl, NULL, NULL));
935 ASSERT_EQ(0, cluster.mon_command(
936 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
937 "\", \"mode\": \"writeback\"}",
938 inbl, NULL, NULL));
939
940 // wait for maps to settle
941 cluster.wait_for_latest_osdmap();
942
943 // read, trigger a promote
944 {
945 bufferlist bl;
946 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
947 }
948
949 // read, trigger a whiteout, and a dirty object
950 {
951 bufferlist bl;
952 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
953 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
954 ASSERT_EQ(0, ioctx.write("bar", bl, bl.length(), 0));
955 }
956
957 // verify the object is present in the cache tier
958 {
959 NObjectIterator it = cache_ioctx.nobjects_begin();
960 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
961 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
962 ++it;
963 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
964 ++it;
965 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
966 }
967
968 // pin
969 {
970 ObjectWriteOperation op;
971 op.cache_pin();
972 librados::AioCompletion *completion = cluster.aio_create_completion();
973 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
974 completion->wait_for_complete();
975 ASSERT_EQ(0, completion->get_return_value());
976 completion->release();
977 }
978
979 // evict the pinned object with -EPERM
980 {
981 ObjectReadOperation op;
982 op.cache_evict();
983 librados::AioCompletion *completion = cluster.aio_create_completion();
984 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
985 librados::OPERATION_IGNORE_CACHE,
986 NULL));
987 completion->wait_for_complete();
988 ASSERT_EQ(-EPERM, completion->get_return_value());
989 completion->release();
990 }
991
992 // unpin
993 {
994 ObjectWriteOperation op;
995 op.cache_unpin();
996 librados::AioCompletion *completion = cluster.aio_create_completion();
997 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
998 completion->wait_for_complete();
999 ASSERT_EQ(0, completion->get_return_value());
1000 completion->release();
1001 }
1002
1003 // flush
1004 {
1005 ObjectReadOperation op;
1006 op.cache_flush();
1007 librados::AioCompletion *completion = cluster.aio_create_completion();
1008 ASSERT_EQ(0, cache_ioctx.aio_operate(
1009 "foo", completion, &op,
1010 librados::OPERATION_IGNORE_OVERLAY, NULL));
1011 completion->wait_for_complete();
1012 ASSERT_EQ(0, completion->get_return_value());
1013 completion->release();
1014 }
1015
1016 // verify clean
1017 {
1018 bool dirty = false;
1019 int r = -1;
1020 ObjectReadOperation op;
1021 op.is_dirty(&dirty, &r);
1022 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
1023 ASSERT_FALSE(dirty);
1024 ASSERT_EQ(0, r);
1025 }
1026
1027 // evict
1028 {
1029 ObjectReadOperation op;
1030 op.cache_evict();
1031 librados::AioCompletion *completion = cluster.aio_create_completion();
1032 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
1033 librados::OPERATION_IGNORE_CACHE,
1034 NULL));
1035 completion->wait_for_complete();
1036 ASSERT_EQ(0, completion->get_return_value());
1037 completion->release();
1038 }
1039 {
1040 ObjectReadOperation op;
1041 op.cache_evict();
1042 librados::AioCompletion *completion = cluster.aio_create_completion();
1043 ASSERT_EQ(0, cache_ioctx.aio_operate(
1044 "foo", completion, &op,
1045 librados::OPERATION_IGNORE_CACHE, NULL));
1046 completion->wait_for_complete();
1047 ASSERT_EQ(0, completion->get_return_value());
1048 completion->release();
1049 }
1050 {
1051 ObjectReadOperation op;
1052 op.cache_evict();
1053 librados::AioCompletion *completion = cluster.aio_create_completion();
1054 ASSERT_EQ(0, cache_ioctx.aio_operate(
1055 "bar", completion, &op,
1056 librados::OPERATION_IGNORE_CACHE, NULL));
1057 completion->wait_for_complete();
1058 ASSERT_EQ(-EBUSY, completion->get_return_value());
1059 completion->release();
1060 }
1061}
1062
1063TEST_F(LibRadosTwoPoolsPP, EvictSnap) {
1064 SKIP_IF_CRIMSON();
1065 // create object
1066 {
1067 bufferlist bl;
1068 bl.append("hi there");
1069 ObjectWriteOperation op;
1070 op.write_full(bl);
1071 ASSERT_EQ(0, ioctx.operate("foo", &op));
1072 }
1073 {
1074 bufferlist bl;
1075 bl.append("hi there");
1076 ObjectWriteOperation op;
1077 op.write_full(bl);
1078 ASSERT_EQ(0, ioctx.operate("bar", &op));
1079 }
1080 {
1081 bufferlist bl;
1082 bl.append("hi there");
1083 ObjectWriteOperation op;
1084 op.write_full(bl);
1085 ASSERT_EQ(0, ioctx.operate("baz", &op));
1086 }
1087 {
1088 bufferlist bl;
1089 bl.append("hi there");
1090 ObjectWriteOperation op;
1091 op.write_full(bl);
1092 ASSERT_EQ(0, ioctx.operate("bam", &op));
1093 }
1094
1095 // create a snapshot, clone
1096 vector<uint64_t> my_snaps(1);
1097 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
1098 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
1099 my_snaps));
1100 {
1101 bufferlist bl;
1102 bl.append("ciao!");
1103 ObjectWriteOperation op;
1104 op.write_full(bl);
1105 ASSERT_EQ(0, ioctx.operate("foo", &op));
1106 }
1107 {
1108 bufferlist bl;
1109 bl.append("ciao!");
1110 ObjectWriteOperation op;
1111 op.write_full(bl);
1112 ASSERT_EQ(0, ioctx.operate("bar", &op));
1113 }
1114 {
1115 ObjectWriteOperation op;
1116 op.remove();
1117 ASSERT_EQ(0, ioctx.operate("baz", &op));
1118 }
1119 {
1120 bufferlist bl;
1121 bl.append("ciao!");
1122 ObjectWriteOperation op;
1123 op.write_full(bl);
1124 ASSERT_EQ(0, ioctx.operate("bam", &op));
1125 }
1126
1127 // configure cache
1128 bufferlist inbl;
1129 ASSERT_EQ(0, cluster.mon_command(
1130 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1131 "\", \"tierpool\": \"" + cache_pool_name +
1132 "\", \"force_nonempty\": \"--force-nonempty\" }",
1133 inbl, NULL, NULL));
1134 ASSERT_EQ(0, cluster.mon_command(
1135 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1136 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1137 inbl, NULL, NULL));
1138 ASSERT_EQ(0, cluster.mon_command(
1139 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1140 "\", \"mode\": \"writeback\"}",
1141 inbl, NULL, NULL));
1142
1143 // wait for maps to settle
1144 cluster.wait_for_latest_osdmap();
1145
1146 // read, trigger a promote on the head
1147 {
1148 bufferlist bl;
1149 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1150 ASSERT_EQ('c', bl[0]);
1151 }
1152 {
1153 bufferlist bl;
1154 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
1155 ASSERT_EQ('c', bl[0]);
1156 }
1157
1158 // evict bam
1159 {
1160 ObjectReadOperation op;
1161 op.cache_evict();
1162 librados::AioCompletion *completion = cluster.aio_create_completion();
1163 ASSERT_EQ(0, cache_ioctx.aio_operate(
1164 "bam", completion, &op,
1165 librados::OPERATION_IGNORE_CACHE, NULL));
1166 completion->wait_for_complete();
1167 ASSERT_EQ(0, completion->get_return_value());
1168 completion->release();
1169 }
1170 {
1171 bufferlist bl;
1172 ObjectReadOperation op;
1173 op.read(1, 0, &bl, NULL);
1174 librados::AioCompletion *completion = cluster.aio_create_completion();
1175 ASSERT_EQ(0, cache_ioctx.aio_operate(
1176 "bam", completion, &op,
1177 librados::OPERATION_IGNORE_CACHE, NULL));
1178 completion->wait_for_complete();
1179 ASSERT_EQ(-ENOENT, completion->get_return_value());
1180 completion->release();
1181 }
1182
1183 // read foo snap
1184 ioctx.snap_set_read(my_snaps[0]);
1185 {
1186 bufferlist bl;
1187 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1188 ASSERT_EQ('h', bl[0]);
1189 }
1190
1191 // evict foo snap
1192 {
1193 ObjectReadOperation op;
1194 op.cache_evict();
1195 librados::AioCompletion *completion = cluster.aio_create_completion();
1196 ASSERT_EQ(0, ioctx.aio_operate(
1197 "foo", completion, &op,
1198 librados::OPERATION_IGNORE_CACHE, NULL));
1199 completion->wait_for_complete();
1200 ASSERT_EQ(0, completion->get_return_value());
1201 completion->release();
1202 }
1203 // snap is gone...
1204 {
1205 bufferlist bl;
1206 ObjectReadOperation op;
1207 op.read(1, 0, &bl, NULL);
1208 librados::AioCompletion *completion = cluster.aio_create_completion();
1209 ASSERT_EQ(0, ioctx.aio_operate(
1210 "foo", completion, &op,
1211 librados::OPERATION_IGNORE_CACHE, NULL));
1212 completion->wait_for_complete();
1213 ASSERT_EQ(-ENOENT, completion->get_return_value());
1214 completion->release();
1215 }
1216 // head is still there...
1217 ioctx.snap_set_read(librados::SNAP_HEAD);
1218 {
1219 bufferlist bl;
1220 ObjectReadOperation op;
1221 op.read(1, 0, &bl, NULL);
1222 librados::AioCompletion *completion = cluster.aio_create_completion();
1223 ASSERT_EQ(0, ioctx.aio_operate(
1224 "foo", completion, &op,
1225 librados::OPERATION_IGNORE_CACHE, NULL));
1226 completion->wait_for_complete();
1227 ASSERT_EQ(0, completion->get_return_value());
1228 completion->release();
1229 }
1230
1231 // promote head + snap of bar
1232 ioctx.snap_set_read(librados::SNAP_HEAD);
1233 {
1234 bufferlist bl;
1235 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
1236 ASSERT_EQ('c', bl[0]);
1237 }
1238 ioctx.snap_set_read(my_snaps[0]);
1239 {
1240 bufferlist bl;
1241 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
1242 ASSERT_EQ('h', bl[0]);
1243 }
1244
1245 // evict bar head (fail)
1246 ioctx.snap_set_read(librados::SNAP_HEAD);
1247 {
1248 ObjectReadOperation op;
1249 op.cache_evict();
1250 librados::AioCompletion *completion = cluster.aio_create_completion();
1251 ASSERT_EQ(0, ioctx.aio_operate(
1252 "bar", completion, &op,
1253 librados::OPERATION_IGNORE_CACHE, NULL));
1254 completion->wait_for_complete();
1255 ASSERT_EQ(-EBUSY, completion->get_return_value());
1256 completion->release();
1257 }
1258
1259 // evict bar snap
1260 ioctx.snap_set_read(my_snaps[0]);
1261 {
1262 ObjectReadOperation op;
1263 op.cache_evict();
1264 librados::AioCompletion *completion = cluster.aio_create_completion();
1265 ASSERT_EQ(0, ioctx.aio_operate(
1266 "bar", completion, &op,
1267 librados::OPERATION_IGNORE_CACHE, NULL));
1268 completion->wait_for_complete();
1269 ASSERT_EQ(0, completion->get_return_value());
1270 completion->release();
1271 }
1272 // ...and then head
1273 ioctx.snap_set_read(librados::SNAP_HEAD);
1274 {
1275 bufferlist bl;
1276 ObjectReadOperation op;
1277 op.read(1, 0, &bl, NULL);
1278 librados::AioCompletion *completion = cluster.aio_create_completion();
1279 ASSERT_EQ(0, ioctx.aio_operate(
1280 "bar", completion, &op,
1281 librados::OPERATION_IGNORE_CACHE, NULL));
1282 completion->wait_for_complete();
1283 ASSERT_EQ(0, completion->get_return_value());
1284 completion->release();
1285 }
1286 {
1287 ObjectReadOperation op;
1288 op.cache_evict();
1289 librados::AioCompletion *completion = cluster.aio_create_completion();
1290 ASSERT_EQ(0, ioctx.aio_operate(
1291 "bar", completion, &op,
1292 librados::OPERATION_IGNORE_CACHE, NULL));
1293 completion->wait_for_complete();
1294 ASSERT_EQ(0, completion->get_return_value());
1295 completion->release();
1296 }
1297
1298 // cleanup
1299 ioctx.selfmanaged_snap_remove(my_snaps[0]);
1300}
1301
1302// this test case reproduces http://tracker.ceph.com/issues/8629
1303TEST_F(LibRadosTwoPoolsPP, EvictSnap2) {
1304 SKIP_IF_CRIMSON();
1305 // create object
1306 {
1307 bufferlist bl;
1308 bl.append("hi there");
1309 ObjectWriteOperation op;
1310 op.write_full(bl);
1311 ASSERT_EQ(0, ioctx.operate("foo", &op));
1312 }
1313 // create a snapshot, clone
1314 vector<uint64_t> my_snaps(1);
1315 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
1316 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
1317 my_snaps));
1318 {
1319 bufferlist bl;
1320 bl.append("ciao!");
1321 ObjectWriteOperation op;
1322 op.write_full(bl);
1323 ASSERT_EQ(0, ioctx.operate("foo", &op));
1324 }
1325 // configure cache
1326 bufferlist inbl;
1327 ASSERT_EQ(0, cluster.mon_command(
1328 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1329 "\", \"tierpool\": \"" + cache_pool_name +
1330 "\", \"force_nonempty\": \"--force-nonempty\" }",
1331 inbl, NULL, NULL));
1332 ASSERT_EQ(0, cluster.mon_command(
1333 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1334 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1335 inbl, NULL, NULL));
1336 ASSERT_EQ(0, cluster.mon_command(
1337 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1338 "\", \"mode\": \"writeback\"}",
1339 inbl, NULL, NULL));
1340
1341 // wait for maps to settle
1342 cluster.wait_for_latest_osdmap();
1343
1344 // read, trigger a promote on the head
1345 {
1346 bufferlist bl;
1347 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1348 ASSERT_EQ('c', bl[0]);
1349 }
1350
1351 // evict
1352 {
1353 ObjectReadOperation op;
1354 op.cache_evict();
1355 librados::AioCompletion *completion = cluster.aio_create_completion();
1356 ASSERT_EQ(0, cache_ioctx.aio_operate(
1357 "foo", completion, &op,
1358 librados::OPERATION_IGNORE_CACHE, NULL));
1359 completion->wait_for_complete();
1360 ASSERT_EQ(0, completion->get_return_value());
1361 completion->release();
1362 }
1363
1364 // verify the snapdir is not present in the cache pool
1365 {
1366 ObjectReadOperation op;
1367 librados::snap_set_t snapset;
1368 op.list_snaps(&snapset, NULL);
1369 ioctx.snap_set_read(librados::SNAP_DIR);
1370 librados::AioCompletion *completion = cluster.aio_create_completion();
1371 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op,
1372 librados::OPERATION_IGNORE_CACHE, NULL));
1373 completion->wait_for_complete();
1374 ASSERT_EQ(-ENOENT, completion->get_return_value());
1375 completion->release();
1376 }
1377}
1378
1379//This test case reproduces http://tracker.ceph.com/issues/17445
1380TEST_F(LibRadosTwoPoolsPP, ListSnap){
1381 SKIP_IF_CRIMSON();
1382 // Create object
1383 {
1384 bufferlist bl;
1385 bl.append("hi there");
1386 ObjectWriteOperation op;
1387 op.write_full(bl);
1388 ASSERT_EQ(0, ioctx.operate("foo", &op));
1389 }
1390 {
1391 bufferlist bl;
1392 bl.append("hi there");
1393 ObjectWriteOperation op;
1394 op.write_full(bl);
1395 ASSERT_EQ(0, ioctx.operate("bar", &op));
1396 }
1397 {
1398 bufferlist bl;
1399 bl.append("hi there");
1400 ObjectWriteOperation op;
1401 op.write_full(bl);
1402 ASSERT_EQ(0, ioctx.operate("baz", &op));
1403 }
1404 {
1405 bufferlist bl;
1406 bl.append("hi there");
1407 ObjectWriteOperation op;
1408 op.write_full(bl);
1409 ASSERT_EQ(0, ioctx.operate("bam", &op));
1410 }
1411
1412 // Create a snapshot, clone
1413 vector<uint64_t> my_snaps(1);
1414 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
1415 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
1416 my_snaps));
1417 {
1418 bufferlist bl;
1419 bl.append("ciao!");
1420 ObjectWriteOperation op;
1421 op.write_full(bl);
1422 ASSERT_EQ(0, ioctx.operate("foo", &op));
1423 }
1424 {
1425 bufferlist bl;
1426 bl.append("ciao!");
1427 ObjectWriteOperation op;
1428 op.write_full(bl);
1429 ASSERT_EQ(0, ioctx.operate("bar", &op));
1430 }
1431 {
1432 ObjectWriteOperation op;
1433 op.remove();
1434 ASSERT_EQ(0, ioctx.operate("baz", &op));
1435 }
1436 {
1437 bufferlist bl;
1438 bl.append("ciao!");
1439 ObjectWriteOperation op;
1440 op.write_full(bl);
1441 ASSERT_EQ(0, ioctx.operate("bam", &op));
1442 }
1443
1444 // Configure cache
1445 bufferlist inbl;
1446 ASSERT_EQ(0, cluster.mon_command(
1447 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1448 "\", \"tierpool\": \"" + cache_pool_name +
1449 "\", \"force_nonempty\": \"--force-nonempty\" }",
1450 inbl, NULL, NULL));
1451 ASSERT_EQ(0, cluster.mon_command(
1452 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1453 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1454 inbl, NULL, NULL));
1455 ASSERT_EQ(0, cluster.mon_command(
1456 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1457 "\", \"mode\": \"writeback\"}",
1458 inbl, NULL, NULL));
1459
1460 // Wait for maps to settle
1461 cluster.wait_for_latest_osdmap();
1462
1463 // Read, trigger a promote on the head
1464 {
1465 bufferlist bl;
1466 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1467 ASSERT_EQ('c', bl[0]);
1468 }
1469
1470 // Read foo snap
1471 ioctx.snap_set_read(my_snaps[0]);
1472 {
1473 bufferlist bl;
1474 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1475 ASSERT_EQ('h', bl[0]);
1476 }
1477
1478 // Evict foo snap
1479 {
1480 ObjectReadOperation op;
1481 op.cache_evict();
1482 librados::AioCompletion *completion = cluster.aio_create_completion();
1483 ASSERT_EQ(0, ioctx.aio_operate(
1484 "foo", completion, &op,
1485 librados::OPERATION_IGNORE_CACHE, NULL));
1486 completion->wait_for_complete();
1487 ASSERT_EQ(0, completion->get_return_value());
1488 completion->release();
1489 }
1490 // Snap is gone...
1491 {
1492 bufferlist bl;
1493 ObjectReadOperation op;
1494 op.read(1, 0, &bl, NULL);
1495 librados::AioCompletion *completion = cluster.aio_create_completion();
1496 ASSERT_EQ(0, ioctx.aio_operate(
1497 "foo", completion, &op,
1498 librados::OPERATION_IGNORE_CACHE, NULL));
1499 completion->wait_for_complete();
1500 ASSERT_EQ(-ENOENT, completion->get_return_value());
1501 completion->release();
1502 }
1503
1504 // Do list-snaps
1505 ioctx.snap_set_read(CEPH_SNAPDIR);
1506 {
1507 snap_set_t snap_set;
1508 int snap_ret;
1509 ObjectReadOperation op;
1510 op.list_snaps(&snap_set, &snap_ret);
1511 librados::AioCompletion *completion = cluster.aio_create_completion();
1512 ASSERT_EQ(0, ioctx.aio_operate(
1513 "foo", completion, &op,
1514 0, NULL));
1515 completion->wait_for_complete();
1516 ASSERT_EQ(0, snap_ret);
1517 ASSERT_LT(0u, snap_set.clones.size());
1518 for (vector<librados::clone_info_t>::const_iterator r = snap_set.clones.begin();
1519 r != snap_set.clones.end();
1520 ++r) {
1521 if (r->cloneid != librados::SNAP_HEAD) {
1522 ASSERT_LT(0u, r->snaps.size());
1523 }
1524 }
1525 }
1526
1527 // Cleanup
1528 ioctx.selfmanaged_snap_remove(my_snaps[0]);
1529}
1530
1531// This test case reproduces https://tracker.ceph.com/issues/49409
1532TEST_F(LibRadosTwoPoolsPP, EvictSnapRollbackReadRace) {
1533 SKIP_IF_CRIMSON();
1534 // create object
1535 {
1536 bufferlist bl;
1537 int len = string("hi there").length() * 2;
1538 // append more chrunk data make sure the second promote
1539 // op coming before the first promote op finished
1540 for (int i=0; i<4*1024*1024/len; ++i)
1541 bl.append("hi therehi there");
1542 ObjectWriteOperation op;
1543 op.write_full(bl);
1544 ASSERT_EQ(0, ioctx.operate("foo", &op));
1545 }
1546
1547 // create two snapshot, a clone
1548 vector<uint64_t> my_snaps(2);
1549 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[1]));
1550 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
1551 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
1552 my_snaps));
1553 {
1554 bufferlist bl;
1555 bl.append("ciao!");
1556 ObjectWriteOperation op;
1557 op.write_full(bl);
1558 ASSERT_EQ(0, ioctx.operate("foo", &op));
1559 }
1560
1561 // configure cache
1562 bufferlist inbl;
1563 ASSERT_EQ(0, cluster.mon_command(
1564 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1565 "\", \"tierpool\": \"" + cache_pool_name +
1566 "\", \"force_nonempty\": \"--force-nonempty\" }",
1567 inbl, NULL, NULL));
1568 ASSERT_EQ(0, cluster.mon_command(
1569 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1570 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1571 inbl, NULL, NULL));
1572 ASSERT_EQ(0, cluster.mon_command(
1573 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1574 "\", \"mode\": \"writeback\"}",
1575 inbl, NULL, NULL));
1576
1577 // wait for maps to settle
1578 cluster.wait_for_latest_osdmap();
1579
1580 // read, trigger a promote on the head
1581 {
1582 bufferlist bl;
1583 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1584 ASSERT_EQ('c', bl[0]);
1585 }
1586
1587 // try more times
1588 int retries = 50;
1589 for (int i=0; i<retries; ++i)
1590 {
1591 {
1592 librados::AioCompletion * completion = cluster.aio_create_completion();
1593 librados::AioCompletion * completion1 = cluster.aio_create_completion();
1594
1595 // send a snap rollback op and a snap read op parallel
1596 // trigger two promote(copy) to the same snap clone obj
1597 // the second snap read op is read-ordered make sure
1598 // op not wait for objects_blocked_on_snap_promotion
1599 ObjectWriteOperation op;
1600 op.selfmanaged_snap_rollback(my_snaps[0]);
1601 ASSERT_EQ(0, ioctx.aio_operate(
1602 "foo", completion, &op));
1603
1604 ioctx.snap_set_read(my_snaps[1]);
1605 std::map<uint64_t, uint64_t> extents;
1606 bufferlist read_bl;
1607 int rval = -1;
1608 ObjectReadOperation op1;
1609 op1.sparse_read(0, 8, &extents, &read_bl, &rval);
1610 ASSERT_EQ(0, ioctx.aio_operate("foo", completion1, &op1, &read_bl));
1611 ioctx.snap_set_read(librados::SNAP_HEAD);
1612
1613 completion->wait_for_complete();
1614 ASSERT_EQ(0, completion->get_return_value());
1615 completion->release();
1616
1617 completion1->wait_for_complete();
1618 ASSERT_EQ(0, completion1->get_return_value());
1619 completion1->release();
1620 }
1621
1622 // evict foo snap
1623 ioctx.snap_set_read(my_snaps[0]);
1624 {
1625 ObjectReadOperation op;
1626 op.cache_evict();
1627 librados::AioCompletion *completion = cluster.aio_create_completion();
1628 ASSERT_EQ(0, ioctx.aio_operate(
1629 "foo", completion, &op,
1630 librados::OPERATION_IGNORE_CACHE, NULL));
1631 completion->wait_for_complete();
1632 ASSERT_EQ(0, completion->get_return_value());
1633 completion->release();
1634 }
1635 ioctx.snap_set_read(librados::SNAP_HEAD);
1636 }
1637
1638 // cleanup
1639 ioctx.selfmanaged_snap_remove(my_snaps[0]);
1640 ioctx.selfmanaged_snap_remove(my_snaps[1]);
1641}
1642
1643TEST_F(LibRadosTwoPoolsPP, TryFlush) {
1644 SKIP_IF_CRIMSON();
1645 // configure cache
1646 bufferlist inbl;
1647 ASSERT_EQ(0, cluster.mon_command(
1648 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1649 "\", \"tierpool\": \"" + cache_pool_name +
1650 "\", \"force_nonempty\": \"--force-nonempty\" }",
1651 inbl, NULL, NULL));
1652 ASSERT_EQ(0, cluster.mon_command(
1653 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1654 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1655 inbl, NULL, NULL));
1656 ASSERT_EQ(0, cluster.mon_command(
1657 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1658 "\", \"mode\": \"writeback\"}",
1659 inbl, NULL, NULL));
1660
1661 // wait for maps to settle
1662 cluster.wait_for_latest_osdmap();
1663
1664 // create object
1665 {
1666 bufferlist bl;
1667 bl.append("hi there");
1668 ObjectWriteOperation op;
1669 op.write_full(bl);
1670 ASSERT_EQ(0, ioctx.operate("foo", &op));
1671 }
1672
1673 // verify the object is present in the cache tier
1674 {
1675 NObjectIterator it = cache_ioctx.nobjects_begin();
1676 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
1677 ASSERT_TRUE(it->get_oid() == string("foo"));
1678 ++it;
1679 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1680 }
1681
1682 // verify the object is NOT present in the base tier
1683 {
1684 NObjectIterator it = ioctx.nobjects_begin();
1685 ASSERT_TRUE(it == ioctx.nobjects_end());
1686 }
1687
1688 // verify dirty
1689 {
1690 bool dirty = false;
1691 int r = -1;
1692 ObjectReadOperation op;
1693 op.is_dirty(&dirty, &r);
1694 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
1695 ASSERT_TRUE(dirty);
1696 ASSERT_EQ(0, r);
1697 }
1698
1699 // pin
1700 {
1701 ObjectWriteOperation op;
1702 op.cache_pin();
1703 librados::AioCompletion *completion = cluster.aio_create_completion();
1704 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
1705 completion->wait_for_complete();
1706 ASSERT_EQ(0, completion->get_return_value());
1707 completion->release();
1708 }
1709
1710 // flush the pinned object with -EPERM
1711 {
1712 ObjectReadOperation op;
1713 op.cache_try_flush();
1714 librados::AioCompletion *completion = cluster.aio_create_completion();
1715 ASSERT_EQ(0, cache_ioctx.aio_operate(
1716 "foo", completion, &op,
1717 librados::OPERATION_IGNORE_OVERLAY |
1718 librados::OPERATION_SKIPRWLOCKS, NULL));
1719 completion->wait_for_complete();
1720 ASSERT_EQ(-EPERM, completion->get_return_value());
1721 completion->release();
1722 }
1723
1724 // unpin
1725 {
1726 ObjectWriteOperation op;
1727 op.cache_unpin();
1728 librados::AioCompletion *completion = cluster.aio_create_completion();
1729 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
1730 completion->wait_for_complete();
1731 ASSERT_EQ(0, completion->get_return_value());
1732 completion->release();
1733 }
1734
1735 // flush
1736 {
1737 ObjectReadOperation op;
1738 op.cache_try_flush();
1739 librados::AioCompletion *completion = cluster.aio_create_completion();
1740 ASSERT_EQ(0, cache_ioctx.aio_operate(
1741 "foo", completion, &op,
1742 librados::OPERATION_IGNORE_OVERLAY |
1743 librados::OPERATION_SKIPRWLOCKS, NULL));
1744 completion->wait_for_complete();
1745 ASSERT_EQ(0, completion->get_return_value());
1746 completion->release();
1747 }
1748
1749 // verify clean
1750 {
1751 bool dirty = false;
1752 int r = -1;
1753 ObjectReadOperation op;
1754 op.is_dirty(&dirty, &r);
1755 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
1756 ASSERT_FALSE(dirty);
1757 ASSERT_EQ(0, r);
1758 }
1759
1760 // verify in base tier
1761 {
1762 NObjectIterator it = ioctx.nobjects_begin();
1763 ASSERT_TRUE(it != ioctx.nobjects_end());
1764 ASSERT_TRUE(it->get_oid() == string("foo"));
1765 ++it;
1766 ASSERT_TRUE(it == ioctx.nobjects_end());
1767 }
1768
1769 // evict it
1770 {
1771 ObjectReadOperation op;
1772 op.cache_evict();
1773 librados::AioCompletion *completion = cluster.aio_create_completion();
1774 ASSERT_EQ(0, cache_ioctx.aio_operate(
1775 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
1776 completion->wait_for_complete();
1777 ASSERT_EQ(0, completion->get_return_value());
1778 completion->release();
1779 }
1780
1781 // verify no longer in cache tier
1782 {
1783 NObjectIterator it = cache_ioctx.nobjects_begin();
1784 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1785 }
1786}
1787
1788TEST_F(LibRadosTwoPoolsPP, Flush) {
1789 SKIP_IF_CRIMSON();
1790 // configure cache
1791 bufferlist inbl;
1792 ASSERT_EQ(0, cluster.mon_command(
1793 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1794 "\", \"tierpool\": \"" + cache_pool_name +
1795 "\", \"force_nonempty\": \"--force-nonempty\" }",
1796 inbl, NULL, NULL));
1797 ASSERT_EQ(0, cluster.mon_command(
1798 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1799 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1800 inbl, NULL, NULL));
1801 ASSERT_EQ(0, cluster.mon_command(
1802 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1803 "\", \"mode\": \"writeback\"}",
1804 inbl, NULL, NULL));
1805
1806 // wait for maps to settle
1807 cluster.wait_for_latest_osdmap();
1808
1809 uint64_t user_version = 0;
1810
1811 // create object
1812 {
1813 bufferlist bl;
1814 bl.append("hi there");
1815 ObjectWriteOperation op;
1816 op.write_full(bl);
1817 ASSERT_EQ(0, ioctx.operate("foo", &op));
1818 }
1819
1820 // verify the object is present in the cache tier
1821 {
1822 NObjectIterator it = cache_ioctx.nobjects_begin();
1823 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
1824 ASSERT_TRUE(it->get_oid() == string("foo"));
1825 ++it;
1826 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1827 }
1828
1829 // verify the object is NOT present in the base tier
1830 {
1831 NObjectIterator it = ioctx.nobjects_begin();
1832 ASSERT_TRUE(it == ioctx.nobjects_end());
1833 }
1834
1835 // verify dirty
1836 {
1837 bool dirty = false;
1838 int r = -1;
1839 ObjectReadOperation op;
1840 op.is_dirty(&dirty, &r);
1841 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
1842 ASSERT_TRUE(dirty);
1843 ASSERT_EQ(0, r);
1844 user_version = cache_ioctx.get_last_version();
1845 }
1846
1847 // pin
1848 {
1849 ObjectWriteOperation op;
1850 op.cache_pin();
1851 librados::AioCompletion *completion = cluster.aio_create_completion();
1852 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
1853 completion->wait_for_complete();
1854 ASSERT_EQ(0, completion->get_return_value());
1855 completion->release();
1856 }
1857
1858 // flush the pinned object with -EPERM
1859 {
1860 ObjectReadOperation op;
1861 op.cache_try_flush();
1862 librados::AioCompletion *completion = cluster.aio_create_completion();
1863 ASSERT_EQ(0, cache_ioctx.aio_operate(
1864 "foo", completion, &op,
1865 librados::OPERATION_IGNORE_OVERLAY |
1866 librados::OPERATION_SKIPRWLOCKS, NULL));
1867 completion->wait_for_complete();
1868 ASSERT_EQ(-EPERM, completion->get_return_value());
1869 completion->release();
1870 }
1871
1872 // unpin
1873 {
1874 ObjectWriteOperation op;
1875 op.cache_unpin();
1876 librados::AioCompletion *completion = cluster.aio_create_completion();
1877 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
1878 completion->wait_for_complete();
1879 ASSERT_EQ(0, completion->get_return_value());
1880 completion->release();
1881 }
1882
1883 // flush
1884 {
1885 ObjectReadOperation op;
1886 op.cache_flush();
1887 librados::AioCompletion *completion = cluster.aio_create_completion();
1888 ASSERT_EQ(0, cache_ioctx.aio_operate(
1889 "foo", completion, &op,
1890 librados::OPERATION_IGNORE_OVERLAY, NULL));
1891 completion->wait_for_complete();
1892 ASSERT_EQ(0, completion->get_return_value());
1893 completion->release();
1894 }
1895
1896 // verify clean
1897 {
1898 bool dirty = false;
1899 int r = -1;
1900 ObjectReadOperation op;
1901 op.is_dirty(&dirty, &r);
1902 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
1903 ASSERT_FALSE(dirty);
1904 ASSERT_EQ(0, r);
1905 }
1906
1907 // verify in base tier
1908 {
1909 NObjectIterator it = ioctx.nobjects_begin();
1910 ASSERT_TRUE(it != ioctx.nobjects_end());
1911 ASSERT_TRUE(it->get_oid() == string("foo"));
1912 ++it;
1913 ASSERT_TRUE(it == ioctx.nobjects_end());
1914 }
1915
1916 // evict it
1917 {
1918 ObjectReadOperation op;
1919 op.cache_evict();
1920 librados::AioCompletion *completion = cluster.aio_create_completion();
1921 ASSERT_EQ(0, cache_ioctx.aio_operate(
1922 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
1923 completion->wait_for_complete();
1924 ASSERT_EQ(0, completion->get_return_value());
1925 completion->release();
1926 }
1927
1928 // verify no longer in cache tier
1929 {
1930 NObjectIterator it = cache_ioctx.nobjects_begin();
1931 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1932 }
1933
1934 // read it again and verify the version is consistent
1935 {
1936 bufferlist bl;
1937 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
1938 ASSERT_EQ(user_version, cache_ioctx.get_last_version());
1939 }
1940
1941 // erase it
1942 {
1943 ObjectWriteOperation op;
1944 op.remove();
1945 ASSERT_EQ(0, ioctx.operate("foo", &op));
1946 }
1947
1948 // flush whiteout
1949 {
1950 ObjectReadOperation op;
1951 op.cache_flush();
1952 librados::AioCompletion *completion = cluster.aio_create_completion();
1953 ASSERT_EQ(0, cache_ioctx.aio_operate(
1954 "foo", completion, &op,
1955 librados::OPERATION_IGNORE_OVERLAY, NULL));
1956 completion->wait_for_complete();
1957 ASSERT_EQ(0, completion->get_return_value());
1958 completion->release();
1959 }
1960
1961 // evict
1962 {
1963 ObjectReadOperation op;
1964 op.cache_evict();
1965 librados::AioCompletion *completion = cluster.aio_create_completion();
1966 ASSERT_EQ(0, cache_ioctx.aio_operate(
1967 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
1968 completion->wait_for_complete();
1969 ASSERT_EQ(0, completion->get_return_value());
1970 completion->release();
1971 }
1972
1973 // verify no longer in cache tier
1974 {
1975 NObjectIterator it = cache_ioctx.nobjects_begin();
1976 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1977 }
1978 // or base tier
1979 {
1980 NObjectIterator it = ioctx.nobjects_begin();
1981 ASSERT_TRUE(it == ioctx.nobjects_end());
1982 }
1983}
1984
1985TEST_F(LibRadosTwoPoolsPP, FlushSnap) {
1986 SKIP_IF_CRIMSON();
1987 // configure cache
1988 bufferlist inbl;
1989 ASSERT_EQ(0, cluster.mon_command(
1990 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1991 "\", \"tierpool\": \"" + cache_pool_name +
1992 "\", \"force_nonempty\": \"--force-nonempty\" }",
1993 inbl, NULL, NULL));
1994 ASSERT_EQ(0, cluster.mon_command(
1995 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1996 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1997 inbl, NULL, NULL));
1998 ASSERT_EQ(0, cluster.mon_command(
1999 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
2000 "\", \"mode\": \"writeback\"}",
2001 inbl, NULL, NULL));
2002
2003 // wait for maps to settle
2004 cluster.wait_for_latest_osdmap();
2005
2006 // create object
2007 {
2008 bufferlist bl;
2009 bl.append("a");
2010 ObjectWriteOperation op;
2011 op.write_full(bl);
2012 ASSERT_EQ(0, ioctx.operate("foo", &op));
2013 }
2014
2015 // create a snapshot, clone
2016 vector<uint64_t> my_snaps(1);
2017 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
2018 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
2019 my_snaps));
2020 {
2021 bufferlist bl;
2022 bl.append("b");
2023 ObjectWriteOperation op;
2024 op.write_full(bl);
2025 ASSERT_EQ(0, ioctx.operate("foo", &op));
2026 }
2027
2028 // and another
2029 my_snaps.resize(2);
2030 my_snaps[1] = my_snaps[0];
2031 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
2032 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
2033 my_snaps));
2034 {
2035 bufferlist bl;
2036 bl.append("c");
2037 ObjectWriteOperation op;
2038 op.write_full(bl);
2039 ASSERT_EQ(0, ioctx.operate("foo", &op));
2040 }
2041
2042 // verify the object is present in the cache tier
2043 {
2044 NObjectIterator it = cache_ioctx.nobjects_begin();
2045 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
2046 ASSERT_TRUE(it->get_oid() == string("foo"));
2047 ++it;
2048 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
2049 }
2050
2051 // verify the object is NOT present in the base tier
2052 {
2053 NObjectIterator it = ioctx.nobjects_begin();
2054 ASSERT_TRUE(it == ioctx.nobjects_end());
2055 }
2056
2057 // flush on head (should fail)
2058 ioctx.snap_set_read(librados::SNAP_HEAD);
2059 {
2060 ObjectReadOperation op;
2061 op.cache_flush();
2062 librados::AioCompletion *completion = cluster.aio_create_completion();
2063 ASSERT_EQ(0, ioctx.aio_operate(
2064 "foo", completion, &op,
2065 librados::OPERATION_IGNORE_CACHE, NULL));
2066 completion->wait_for_complete();
2067 ASSERT_EQ(-EBUSY, completion->get_return_value());
2068 completion->release();
2069 }
2070 // flush on recent snap (should fail)
2071 ioctx.snap_set_read(my_snaps[0]);
2072 {
2073 ObjectReadOperation op;
2074 op.cache_flush();
2075 librados::AioCompletion *completion = cluster.aio_create_completion();
2076 ASSERT_EQ(0, ioctx.aio_operate(
2077 "foo", completion, &op,
2078 librados::OPERATION_IGNORE_CACHE, NULL));
2079 completion->wait_for_complete();
2080 ASSERT_EQ(-EBUSY, completion->get_return_value());
2081 completion->release();
2082 }
2083 // flush on oldest snap
2084 ioctx.snap_set_read(my_snaps[1]);
2085 {
2086 ObjectReadOperation op;
2087 op.cache_flush();
2088 librados::AioCompletion *completion = cluster.aio_create_completion();
2089 ASSERT_EQ(0, ioctx.aio_operate(
2090 "foo", completion, &op,
2091 librados::OPERATION_IGNORE_CACHE, NULL));
2092 completion->wait_for_complete();
2093 ASSERT_EQ(0, completion->get_return_value());
2094 completion->release();
2095 }
2096 // flush on next oldest snap
2097 ioctx.snap_set_read(my_snaps[0]);
2098 {
2099 ObjectReadOperation op;
2100 op.cache_flush();
2101 librados::AioCompletion *completion = cluster.aio_create_completion();
2102 ASSERT_EQ(0, ioctx.aio_operate(
2103 "foo", completion, &op,
2104 librados::OPERATION_IGNORE_CACHE, NULL));
2105 completion->wait_for_complete();
2106 ASSERT_EQ(0, completion->get_return_value());
2107 completion->release();
2108 }
2109 // flush on head
2110 ioctx.snap_set_read(librados::SNAP_HEAD);
2111 {
2112 ObjectReadOperation op;
2113 op.cache_flush();
2114 librados::AioCompletion *completion = cluster.aio_create_completion();
2115 ASSERT_EQ(0, ioctx.aio_operate(
2116 "foo", completion, &op,
2117 librados::OPERATION_IGNORE_CACHE, NULL));
2118 completion->wait_for_complete();
2119 ASSERT_EQ(0, completion->get_return_value());
2120 completion->release();
2121 }
2122
2123 // verify i can read the snaps from the cache pool
2124 ioctx.snap_set_read(librados::SNAP_HEAD);
2125 {
2126 bufferlist bl;
2127 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
2128 ASSERT_EQ('c', bl[0]);
2129 }
2130 ioctx.snap_set_read(my_snaps[0]);
2131 {
2132 bufferlist bl;
2133 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
2134 ASSERT_EQ('b', bl[0]);
2135 }
2136 ioctx.snap_set_read(my_snaps[1]);
2137 {
2138 bufferlist bl;
2139 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
2140 ASSERT_EQ('a', bl[0]);
2141 }
2142
2143 // remove overlay
2144 ASSERT_EQ(0, cluster.mon_command(
2145 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
2146 "\"}",
2147 inbl, NULL, NULL));
2148
2149 // wait for maps to settle
2150 cluster.wait_for_latest_osdmap();
2151
2152 // verify i can read the snaps from the base pool
2153 ioctx.snap_set_read(librados::SNAP_HEAD);
2154 {
2155 bufferlist bl;
2156 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
2157 ASSERT_EQ('c', bl[0]);
2158 }
2159 ioctx.snap_set_read(my_snaps[0]);
2160 {
2161 bufferlist bl;
2162 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
2163 ASSERT_EQ('b', bl[0]);
2164 }
2165 ioctx.snap_set_read(my_snaps[1]);
2166 {
2167 bufferlist bl;
2168 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
2169 ASSERT_EQ('a', bl[0]);
2170 }
2171
2172 ASSERT_EQ(0, cluster.mon_command(
2173 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2174 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2175 inbl, NULL, NULL));
2176
2177 // cleanup
2178 ioctx.selfmanaged_snap_remove(my_snaps[0]);
2179}
2180
2181TEST_F(LibRadosTierPP, FlushWriteRaces) {
2182 SKIP_IF_CRIMSON();
2183 Rados cluster;
2184 std::string pool_name = get_temp_pool_name();
2185 std::string cache_pool_name = pool_name + "-cache";
2186 ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
2187 ASSERT_EQ(0, cluster.pool_create(cache_pool_name.c_str()));
2188 IoCtx cache_ioctx;
2189 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
2190 cache_ioctx.application_enable("rados", true);
2191 IoCtx ioctx;
2192 ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), ioctx));
2193
2194 // configure cache
2195 bufferlist inbl;
2196 ASSERT_EQ(0, cluster.mon_command(
2197 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2198 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
2199 inbl, NULL, NULL));
2200 ASSERT_EQ(0, cluster.mon_command(
2201 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2202 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2203 inbl, NULL, NULL));
2204 ASSERT_EQ(0, cluster.mon_command(
2205 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
2206 "\", \"mode\": \"writeback\"}",
2207 inbl, NULL, NULL));
2208
2209 // wait for maps to settle
2210 cluster.wait_for_latest_osdmap();
2211
2212 // create/dirty object
2213 bufferlist bl;
2214 bl.append("hi there");
2215 {
2216 ObjectWriteOperation op;
2217 op.write_full(bl);
2218 ASSERT_EQ(0, ioctx.operate("foo", &op));
2219 }
2220
2221 // flush + write
2222 {
2223 ObjectReadOperation op;
2224 op.cache_flush();
2225 librados::AioCompletion *completion = cluster.aio_create_completion();
2226 ASSERT_EQ(0, cache_ioctx.aio_operate(
2227 "foo", completion, &op,
2228 librados::OPERATION_IGNORE_OVERLAY, NULL));
2229
2230 ObjectWriteOperation op2;
2231 op2.write_full(bl);
2232 librados::AioCompletion *completion2 = cluster.aio_create_completion();
2233 ASSERT_EQ(0, ioctx.aio_operate(
2234 "foo", completion2, &op2, 0));
2235
2236 completion->wait_for_complete();
2237 completion2->wait_for_complete();
2238 ASSERT_EQ(0, completion->get_return_value());
2239 ASSERT_EQ(0, completion2->get_return_value());
2240 completion->release();
2241 completion2->release();
2242 }
2243
2244 int tries = 1000;
2245 do {
2246 // create/dirty object
2247 {
2248 bufferlist bl;
2249 bl.append("hi there");
2250 ObjectWriteOperation op;
2251 op.write_full(bl);
2252 ASSERT_EQ(0, ioctx.operate("foo", &op));
2253 }
2254
2255 // try-flush + write
2256 {
2257 ObjectReadOperation op;
2258 op.cache_try_flush();
2259 librados::AioCompletion *completion = cluster.aio_create_completion();
2260 ASSERT_EQ(0, cache_ioctx.aio_operate(
2261 "foo", completion, &op,
2262 librados::OPERATION_IGNORE_OVERLAY |
2263 librados::OPERATION_SKIPRWLOCKS, NULL));
2264
2265 ObjectWriteOperation op2;
2266 op2.write_full(bl);
2267 librados::AioCompletion *completion2 = cluster.aio_create_completion();
2268 ASSERT_EQ(0, ioctx.aio_operate("foo", completion2, &op2, 0));
2269
2270 completion->wait_for_complete();
2271 completion2->wait_for_complete();
2272 int r = completion->get_return_value();
2273 ASSERT_TRUE(r == -EBUSY || r == 0);
2274 ASSERT_EQ(0, completion2->get_return_value());
2275 completion->release();
2276 completion2->release();
2277 if (r == -EBUSY)
2278 break;
2279 cout << "didn't get EBUSY, trying again" << std::endl;
2280 }
2281 ASSERT_TRUE(--tries);
2282 } while (true);
2283
2284 // tear down tiers
2285 ASSERT_EQ(0, cluster.mon_command(
2286 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
2287 "\"}",
2288 inbl, NULL, NULL));
2289 ASSERT_EQ(0, cluster.mon_command(
2290 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
2291 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
2292 inbl, NULL, NULL));
2293
2294 // wait for maps to settle before next test
2295 cluster.wait_for_latest_osdmap();
2296
2297 ASSERT_EQ(0, cluster.pool_delete(cache_pool_name.c_str()));
2298 ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
2299}
2300
2301TEST_F(LibRadosTwoPoolsPP, FlushTryFlushRaces) {
2302 SKIP_IF_CRIMSON();
2303 // configure cache
2304 bufferlist inbl;
2305 ASSERT_EQ(0, cluster.mon_command(
2306 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2307 "\", \"tierpool\": \"" + cache_pool_name +
2308 "\", \"force_nonempty\": \"--force-nonempty\" }",
2309 inbl, NULL, NULL));
2310 ASSERT_EQ(0, cluster.mon_command(
2311 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2312 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2313 inbl, NULL, NULL));
2314 ASSERT_EQ(0, cluster.mon_command(
2315 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
2316 "\", \"mode\": \"writeback\"}",
2317 inbl, NULL, NULL));
2318
2319 // wait for maps to settle
2320 cluster.wait_for_latest_osdmap();
2321
2322 // create/dirty object
2323 {
2324 bufferlist bl;
2325 bl.append("hi there");
2326 ObjectWriteOperation op;
2327 op.write_full(bl);
2328 ASSERT_EQ(0, ioctx.operate("foo", &op));
2329 }
2330
2331 // flush + flush
2332 {
2333 ObjectReadOperation op;
2334 op.cache_flush();
2335 librados::AioCompletion *completion = cluster.aio_create_completion();
2336 ASSERT_EQ(0, cache_ioctx.aio_operate(
2337 "foo", completion, &op,
2338 librados::OPERATION_IGNORE_OVERLAY, NULL));
2339
2340 ObjectReadOperation op2;
2341 op2.cache_flush();
2342 librados::AioCompletion *completion2 = cluster.aio_create_completion();
2343 ASSERT_EQ(0, cache_ioctx.aio_operate(
2344 "foo", completion2, &op2,
2345 librados::OPERATION_IGNORE_OVERLAY, NULL));
2346
2347 completion->wait_for_complete();
2348 completion2->wait_for_complete();
2349 ASSERT_EQ(0, completion->get_return_value());
2350 ASSERT_EQ(0, completion2->get_return_value());
2351 completion->release();
2352 completion2->release();
2353 }
2354
2355 // create/dirty object
2356 {
2357 bufferlist bl;
2358 bl.append("hi there");
2359 ObjectWriteOperation op;
2360 op.write_full(bl);
2361 ASSERT_EQ(0, ioctx.operate("foo", &op));
2362 }
2363
2364 // flush + try-flush
2365 {
2366 ObjectReadOperation op;
2367 op.cache_flush();
2368 librados::AioCompletion *completion = cluster.aio_create_completion();
2369 ASSERT_EQ(0, cache_ioctx.aio_operate(
2370 "foo", completion, &op,
2371 librados::OPERATION_IGNORE_OVERLAY, NULL));
2372
2373 ObjectReadOperation op2;
2374 op2.cache_try_flush();
2375 librados::AioCompletion *completion2 = cluster.aio_create_completion();
2376 ASSERT_EQ(0, cache_ioctx.aio_operate(
2377 "foo", completion2, &op2,
2378 librados::OPERATION_IGNORE_OVERLAY |
2379 librados::OPERATION_SKIPRWLOCKS, NULL));
2380
2381 completion->wait_for_complete();
2382 completion2->wait_for_complete();
2383 ASSERT_EQ(0, completion->get_return_value());
2384 ASSERT_EQ(0, completion2->get_return_value());
2385 completion->release();
2386 completion2->release();
2387 }
2388
2389 // create/dirty object
2390 int tries = 1000;
2391 do {
2392 {
2393 bufferlist bl;
2394 bl.append("hi there");
2395 ObjectWriteOperation op;
2396 op.write_full(bl);
2397 ASSERT_EQ(0, ioctx.operate("foo", &op));
2398 }
2399
2400 // try-flush + flush
2401 // (flush will not piggyback on try-flush)
2402 {
2403 ObjectReadOperation op;
2404 op.cache_try_flush();
2405 librados::AioCompletion *completion = cluster.aio_create_completion();
2406 ASSERT_EQ(0, cache_ioctx.aio_operate(
2407 "foo", completion, &op,
2408 librados::OPERATION_IGNORE_OVERLAY |
2409 librados::OPERATION_SKIPRWLOCKS, NULL));
2410
2411 ObjectReadOperation op2;
2412 op2.cache_flush();
2413 librados::AioCompletion *completion2 = cluster.aio_create_completion();
2414 ASSERT_EQ(0, cache_ioctx.aio_operate(
2415 "foo", completion2, &op2,
2416 librados::OPERATION_IGNORE_OVERLAY, NULL));
2417
2418 completion->wait_for_complete();
2419 completion2->wait_for_complete();
2420 int r = completion->get_return_value();
2421 ASSERT_TRUE(r == -EBUSY || r == 0);
2422 ASSERT_EQ(0, completion2->get_return_value());
2423 completion->release();
2424 completion2->release();
2425 if (r == -EBUSY)
2426 break;
2427 cout << "didn't get EBUSY, trying again" << std::endl;
2428 }
2429 ASSERT_TRUE(--tries);
2430 } while (true);
2431
2432 // create/dirty object
2433 {
2434 bufferlist bl;
2435 bl.append("hi there");
2436 ObjectWriteOperation op;
2437 op.write_full(bl);
2438 ASSERT_EQ(0, ioctx.operate("foo", &op));
2439 }
2440
2441 // try-flush + try-flush
2442 {
2443 ObjectReadOperation op;
2444 op.cache_try_flush();
2445 librados::AioCompletion *completion = cluster.aio_create_completion();
2446 ASSERT_EQ(0, cache_ioctx.aio_operate(
2447 "foo", completion, &op,
2448 librados::OPERATION_IGNORE_OVERLAY |
2449 librados::OPERATION_SKIPRWLOCKS, NULL));
2450
2451 ObjectReadOperation op2;
2452 op2.cache_try_flush();
2453 librados::AioCompletion *completion2 = cluster.aio_create_completion();
2454 ASSERT_EQ(0, cache_ioctx.aio_operate(
2455 "foo", completion2, &op2,
2456 librados::OPERATION_IGNORE_OVERLAY |
2457 librados::OPERATION_SKIPRWLOCKS, NULL));
2458
2459 completion->wait_for_complete();
2460 completion2->wait_for_complete();
2461 ASSERT_EQ(0, completion->get_return_value());
2462 ASSERT_EQ(0, completion2->get_return_value());
2463 completion->release();
2464 completion2->release();
2465 }
2466}
2467
2468
2469IoCtx *read_ioctx = 0;
2470ceph::mutex test_lock = ceph::make_mutex("FlushReadRaces::lock");
2471ceph::condition_variable cond;
2472int max_reads = 100;
2473int num_reads = 0; // in progress
2474
2475void flush_read_race_cb(completion_t cb, void *arg);
2476
2477void start_flush_read()
2478{
2479 //cout << " starting read" << std::endl;
2480 ObjectReadOperation op;
2481 op.stat(NULL, NULL, NULL);
2482 librados::AioCompletion *completion = completions.getCompletion();
2483 completion->set_complete_callback(0, flush_read_race_cb);
2484 read_ioctx->aio_operate("foo", completion, &op, NULL);
2485}
2486
2487void flush_read_race_cb(completion_t cb, void *arg)
2488{
2489 //cout << " finished read" << std::endl;
2490 std::lock_guard l{test_lock};
2491 if (num_reads > max_reads) {
2492 num_reads--;
2493 cond.notify_all();
2494 } else {
2495 start_flush_read();
2496 }
2497}
2498
2499TEST_F(LibRadosTwoPoolsPP, TryFlushReadRace) {
2500 SKIP_IF_CRIMSON();
2501 // configure cache
2502 bufferlist inbl;
2503 ASSERT_EQ(0, cluster.mon_command(
2504 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2505 "\", \"tierpool\": \"" + cache_pool_name +
2506 "\", \"force_nonempty\": \"--force-nonempty\" }",
2507 inbl, NULL, NULL));
2508 ASSERT_EQ(0, cluster.mon_command(
2509 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2510 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2511 inbl, NULL, NULL));
2512 ASSERT_EQ(0, cluster.mon_command(
2513 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
2514 "\", \"mode\": \"writeback\"}",
2515 inbl, NULL, NULL));
2516
2517 // wait for maps to settle
2518 cluster.wait_for_latest_osdmap();
2519
2520 // create/dirty object
2521 {
2522 bufferlist bl;
2523 bl.append("hi there");
2524 bufferptr bp(4000000); // make it big!
2525 bp.zero();
2526 bl.append(bp);
2527 ObjectWriteOperation op;
2528 op.write_full(bl);
2529 ASSERT_EQ(0, ioctx.operate("foo", &op));
2530 }
2531
2532 // start a continuous stream of reads
2533 read_ioctx = &ioctx;
2534 test_lock.lock();
2535 for (int i = 0; i < max_reads; ++i) {
2536 start_flush_read();
2537 num_reads++;
2538 }
2539 test_lock.unlock();
2540
2541 // try-flush
2542 ObjectReadOperation op;
2543 op.cache_try_flush();
2544 librados::AioCompletion *completion = cluster.aio_create_completion();
2545 ASSERT_EQ(0, cache_ioctx.aio_operate(
2546 "foo", completion, &op,
2547 librados::OPERATION_IGNORE_OVERLAY |
2548 librados::OPERATION_SKIPRWLOCKS, NULL));
2549
2550 completion->wait_for_complete();
2551 ASSERT_EQ(0, completion->get_return_value());
2552 completion->release();
2553
2554 // stop reads
2555 std::unique_lock locker{test_lock};
2556 max_reads = 0;
2557 cond.wait(locker, [] { return num_reads == 0;});
2558}
2559
2560TEST_F(LibRadosTierPP, HitSetNone) {
2561 SKIP_IF_CRIMSON();
2562 {
2563 list< pair<time_t,time_t> > ls;
2564 AioCompletion *c = librados::Rados::aio_create_completion();
2565 ASSERT_EQ(0, ioctx.hit_set_list(123, c, &ls));
2566 c->wait_for_complete();
2567 ASSERT_EQ(0, c->get_return_value());
2568 ASSERT_TRUE(ls.empty());
2569 c->release();
2570 }
2571 {
2572 bufferlist bl;
2573 AioCompletion *c = librados::Rados::aio_create_completion();
2574 ASSERT_EQ(0, ioctx.hit_set_get(123, c, 12345, &bl));
2575 c->wait_for_complete();
2576 ASSERT_EQ(-ENOENT, c->get_return_value());
2577 c->release();
2578 }
2579}
2580
2581string set_pool_str(string pool, string var, string val)
2582{
2583 return string("{\"prefix\": \"osd pool set\",\"pool\":\"") + pool
2584 + string("\",\"var\": \"") + var + string("\",\"val\": \"")
2585 + val + string("\"}");
2586}
2587
2588string set_pool_str(string pool, string var, int val)
2589{
2590 return string("{\"prefix\": \"osd pool set\",\"pool\":\"") + pool
2591 + string("\",\"var\": \"") + var + string("\",\"val\": \"")
2592 + stringify(val) + string("\"}");
2593}
2594
2595TEST_F(LibRadosTwoPoolsPP, HitSetRead) {
2596 SKIP_IF_CRIMSON();
2597 // make it a tier
2598 bufferlist inbl;
2599 ASSERT_EQ(0, cluster.mon_command(
2600 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2601 "\", \"tierpool\": \"" + cache_pool_name +
2602 "\", \"force_nonempty\": \"--force-nonempty\" }",
2603 inbl, NULL, NULL));
2604
2605 // enable hitset tracking for this pool
2606 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_count", 2),
2607 inbl, NULL, NULL));
2608 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_period", 600),
2609 inbl, NULL, NULL));
2610 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_type",
2611 "explicit_object"),
2612 inbl, NULL, NULL));
2613
2614 // wait for maps to settle
2615 cluster.wait_for_latest_osdmap();
2616
2617 cache_ioctx.set_namespace("");
2618
2619 // keep reading until we see our object appear in the HitSet
2620 utime_t start = ceph_clock_now();
2621 utime_t hard_stop = start + utime_t(600, 0);
2622
2623 while (true) {
2624 utime_t now = ceph_clock_now();
2625 ASSERT_TRUE(now < hard_stop);
2626
2627 string name = "foo";
2628 uint32_t hash;
2629 ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(name, &hash));
2630 hobject_t oid(sobject_t(name, CEPH_NOSNAP), "", hash,
2631 cluster.pool_lookup(cache_pool_name.c_str()), "");
2632
2633 bufferlist bl;
2634 ASSERT_EQ(-ENOENT, cache_ioctx.read("foo", bl, 1, 0));
2635
2636 bufferlist hbl;
2637 AioCompletion *c = librados::Rados::aio_create_completion();
2638 ASSERT_EQ(0, cache_ioctx.hit_set_get(hash, c, now.sec(), &hbl));
2639 c->wait_for_complete();
2640 c->release();
2641
2642 if (hbl.length()) {
2643 auto p = hbl.cbegin();
2644 HitSet hs;
2645 decode(hs, p);
2646 if (hs.contains(oid)) {
2647 cout << "ok, hit_set contains " << oid << std::endl;
2648 break;
2649 }
2650 cout << "hmm, not in HitSet yet" << std::endl;
2651 } else {
2652 cout << "hmm, no HitSet yet" << std::endl;
2653 }
2654
2655 sleep(1);
2656 }
2657}
2658
2659static int _get_pg_num(Rados& cluster, string pool_name)
2660{
2661 bufferlist inbl;
2662 string cmd = string("{\"prefix\": \"osd pool get\",\"pool\":\"")
2663 + pool_name
2664 + string("\",\"var\": \"pg_num\",\"format\": \"json\"}");
2665 bufferlist outbl;
2666 int r = cluster.mon_command(cmd, inbl, &outbl, NULL);
2667 ceph_assert(r >= 0);
2668 string outstr(outbl.c_str(), outbl.length());
2669 json_spirit::Value v;
2670 if (!json_spirit::read(outstr, v)) {
2671 cerr <<" unable to parse json " << outstr << std::endl;
2672 return -1;
2673 }
2674
2675 json_spirit::Object& o = v.get_obj();
2676 for (json_spirit::Object::size_type i=0; i<o.size(); i++) {
2677 json_spirit::Pair& p = o[i];
2678 if (p.name_ == "pg_num") {
2679 cout << "pg_num = " << p.value_.get_int() << std::endl;
2680 return p.value_.get_int();
2681 }
2682 }
2683 cerr << "didn't find pg_num in " << outstr << std::endl;
2684 return -1;
2685}
2686
2687int make_hitset(Rados& cluster, librados::IoCtx& cache_ioctx, int num_pg,
2688 int num, std::map<int, HitSet>& hitsets, std::string& cache_pool_name)
2689{
2690 int pg = num_pg;
2691 // do a bunch of writes
2692 for (int i=0; i<num; ++i) {
2693 bufferlist bl;
2694 bl.append("a");
2695 ceph_assert(0 == cache_ioctx.write(stringify(i), bl, 1, 0));
2696 }
2697
2698 // get HitSets
2699 for (int i=0; i<pg; ++i) {
2700 list< pair<time_t,time_t> > ls;
2701 AioCompletion *c = librados::Rados::aio_create_completion();
2702 ceph_assert(0 == cache_ioctx.hit_set_list(i, c, &ls));
2703 c->wait_for_complete();
2704 c->release();
2705 std::cout << "pg " << i << " ls " << ls << std::endl;
2706 ceph_assert(!ls.empty());
2707
2708 // get the latest
2709 c = librados::Rados::aio_create_completion();
2710 bufferlist bl;
2711 ceph_assert(0 == cache_ioctx.hit_set_get(i, c, ls.back().first, &bl));
2712 c->wait_for_complete();
2713 c->release();
2714
2715 try {
2716 auto p = bl.cbegin();
2717 decode(hitsets[i], p);
2718 }
2719 catch (buffer::error& e) {
2720 std::cout << "failed to decode hit set; bl len is " << bl.length() << "\n";
2721 bl.hexdump(std::cout);
2722 std::cout << std::endl;
2723 throw e;
2724 }
2725
2726 // cope with racing splits by refreshing pg_num
2727 if (i == pg - 1)
2728 pg = _get_pg_num(cluster, cache_pool_name);
2729 }
2730 return pg;
2731}
2732
2733TEST_F(LibRadosTwoPoolsPP, HitSetWrite) {
2734 SKIP_IF_CRIMSON();
2735 int num_pg = _get_pg_num(cluster, pool_name);
2736 ceph_assert(num_pg > 0);
2737
2738 // make it a tier
2739 bufferlist inbl;
2740 ASSERT_EQ(0, cluster.mon_command(
2741 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2742 "\", \"tierpool\": \"" + cache_pool_name +
2743 "\", \"force_nonempty\": \"--force-nonempty\" }",
2744 inbl, NULL, NULL));
2745
2746 // enable hitset tracking for this pool
2747 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_count", 8),
2748 inbl, NULL, NULL));
2749 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_period", 600),
2750 inbl, NULL, NULL));
2751 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_type",
2752 "explicit_hash"),
2753 inbl, NULL, NULL));
2754
2755 // wait for maps to settle
2756 cluster.wait_for_latest_osdmap();
2757
2758 cache_ioctx.set_namespace("");
2759
2760 int num = 200;
2761
2762 std::map<int,HitSet> hitsets;
2763
2764 num_pg = make_hitset(cluster, cache_ioctx, num_pg, num, hitsets, cache_pool_name);
2765
2766 int retry = 0;
2767
2768 for (int i=0; i<num; ++i) {
2769 string n = stringify(i);
2770 uint32_t hash;
2771 ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(n, &hash));
2772 hobject_t oid(sobject_t(n, CEPH_NOSNAP), "", hash,
2773 cluster.pool_lookup(cache_pool_name.c_str()), "");
2774 std::cout << "checking for " << oid << std::endl;
2775 bool found = false;
2776 for (int p=0; p<num_pg; ++p) {
2777 if (hitsets[p].contains(oid)) {
2778 found = true;
2779 break;
2780 }
2781 }
2782 if (!found && retry < 5) {
2783 num_pg = make_hitset(cluster, cache_ioctx, num_pg, num, hitsets, cache_pool_name);
2784 i--;
2785 retry++;
2786 continue;
2787 }
2788 ASSERT_TRUE(found);
2789 }
2790}
2791
2792TEST_F(LibRadosTwoPoolsPP, HitSetTrim) {
2793 SKIP_IF_CRIMSON();
2794 unsigned count = 3;
2795 unsigned period = 3;
2796
2797 // make it a tier
2798 bufferlist inbl;
2799 ASSERT_EQ(0, cluster.mon_command(
2800 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2801 "\", \"tierpool\": \"" + cache_pool_name +
2802 "\", \"force_nonempty\": \"--force-nonempty\" }",
2803 inbl, NULL, NULL));
2804
2805 // enable hitset tracking for this pool
2806 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_count", count),
2807 inbl, NULL, NULL));
2808 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_period", period),
2809 inbl, NULL, NULL));
2810 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
2811 inbl, NULL, NULL));
2812 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_fpp", ".01"),
2813 inbl, NULL, NULL));
2814
2815 // wait for maps to settle
2816 cluster.wait_for_latest_osdmap();
2817
2818 cache_ioctx.set_namespace("");
2819
2820 // do a bunch of writes and make sure the hitsets rotate
2821 utime_t start = ceph_clock_now();
2822 utime_t hard_stop = start + utime_t(count * period * 50, 0);
2823
2824 time_t first = 0;
2825 while (true) {
2826 string name = "foo";
2827 uint32_t hash;
2828 ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(name, &hash));
2829 hobject_t oid(sobject_t(name, CEPH_NOSNAP), "", hash, -1, "");
2830
2831 bufferlist bl;
2832 bl.append("f");
2833 ASSERT_EQ(0, cache_ioctx.write("foo", bl, 1, 0));
2834
2835 list<pair<time_t, time_t> > ls;
2836 AioCompletion *c = librados::Rados::aio_create_completion();
2837 ASSERT_EQ(0, cache_ioctx.hit_set_list(hash, c, &ls));
2838 c->wait_for_complete();
2839 c->release();
2840
2841 cout << " got ls " << ls << std::endl;
2842 if (!ls.empty()) {
2843 if (!first) {
2844 first = ls.front().first;
2845 cout << "first is " << first << std::endl;
2846 } else {
2847 if (ls.front().first != first) {
2848 cout << "first now " << ls.front().first << ", trimmed" << std::endl;
2849 break;
2850 }
2851 }
2852 }
2853
2854 utime_t now = ceph_clock_now();
2855 ASSERT_TRUE(now < hard_stop);
2856
2857 sleep(1);
2858 }
2859}
2860
2861TEST_F(LibRadosTwoPoolsPP, PromoteOn2ndRead) {
2862 SKIP_IF_CRIMSON();
2863 // create object
2864 for (int i=0; i<20; ++i) {
2865 bufferlist bl;
2866 bl.append("hi there");
2867 ObjectWriteOperation op;
2868 op.write_full(bl);
2869 ASSERT_EQ(0, ioctx.operate("foo" + stringify(i), &op));
2870 }
2871
2872 // configure cache
2873 bufferlist inbl;
2874 ASSERT_EQ(0, cluster.mon_command(
2875 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2876 "\", \"tierpool\": \"" + cache_pool_name +
2877 "\", \"force_nonempty\": \"--force-nonempty\" }",
2878 inbl, NULL, NULL));
2879 ASSERT_EQ(0, cluster.mon_command(
2880 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2881 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2882 inbl, NULL, NULL));
2883 ASSERT_EQ(0, cluster.mon_command(
2884 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
2885 "\", \"mode\": \"writeback\"}",
2886 inbl, NULL, NULL));
2887
2888 // enable hitset tracking for this pool
2889 ASSERT_EQ(0, cluster.mon_command(
2890 set_pool_str(cache_pool_name, "hit_set_count", 2),
2891 inbl, NULL, NULL));
2892 ASSERT_EQ(0, cluster.mon_command(
2893 set_pool_str(cache_pool_name, "hit_set_period", 600),
2894 inbl, NULL, NULL));
2895 ASSERT_EQ(0, cluster.mon_command(
2896 set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
2897 inbl, NULL, NULL));
2898 ASSERT_EQ(0, cluster.mon_command(
2899 set_pool_str(cache_pool_name, "min_read_recency_for_promote", 1),
2900 inbl, NULL, NULL));
2901 ASSERT_EQ(0, cluster.mon_command(
2902 set_pool_str(cache_pool_name, "hit_set_grade_decay_rate", 20),
2903 inbl, NULL, NULL));
2904 ASSERT_EQ(0, cluster.mon_command(
2905 set_pool_str(cache_pool_name, "hit_set_search_last_n", 1),
2906 inbl, NULL, NULL));
2907
2908 // wait for maps to settle
2909 cluster.wait_for_latest_osdmap();
2910
2911 int fake = 0; // set this to non-zero to test spurious promotion,
2912 // e.g. from thrashing
2913 int attempt = 0;
2914 string obj;
2915 while (true) {
2916 // 1st read, don't trigger a promote
2917 obj = "foo" + stringify(attempt);
2918 cout << obj << std::endl;
2919 {
2920 bufferlist bl;
2921 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
2922 if (--fake >= 0) {
2923 sleep(1);
2924 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
2925 sleep(1);
2926 }
2927 }
2928
2929 // verify the object is NOT present in the cache tier
2930 {
2931 bool found = false;
2932 NObjectIterator it = cache_ioctx.nobjects_begin();
2933 while (it != cache_ioctx.nobjects_end()) {
2934 cout << " see " << it->get_oid() << std::endl;
2935 if (it->get_oid() == string(obj.c_str())) {
2936 found = true;
2937 break;
2938 }
2939 ++it;
2940 }
2941 if (!found)
2942 break;
2943 }
2944
2945 ++attempt;
2946 ASSERT_LE(attempt, 20);
2947 cout << "hrm, object is present in cache on attempt " << attempt
2948 << ", retrying" << std::endl;
2949 }
2950
2951 // Read until the object is present in the cache tier
2952 cout << "verifying " << obj << " is eventually promoted" << std::endl;
2953 while (true) {
2954 bufferlist bl;
2955 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
2956
2957 bool there = false;
2958 NObjectIterator it = cache_ioctx.nobjects_begin();
2959 while (it != cache_ioctx.nobjects_end()) {
2960 if (it->get_oid() == string(obj.c_str())) {
2961 there = true;
2962 break;
2963 }
2964 ++it;
2965 }
2966 if (there)
2967 break;
2968
2969 sleep(1);
2970 }
2971
2972 // tear down tiers
2973 ASSERT_EQ(0, cluster.mon_command(
2974 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
2975 "\"}",
2976 inbl, NULL, NULL));
2977 ASSERT_EQ(0, cluster.mon_command(
2978 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
2979 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
2980 inbl, NULL, NULL));
2981
2982 // wait for maps to settle before next test
2983 cluster.wait_for_latest_osdmap();
2984}
2985
2986TEST_F(LibRadosTwoPoolsPP, ProxyRead) {
2987 SKIP_IF_CRIMSON();
2988 // create object
2989 {
2990 bufferlist bl;
2991 bl.append("hi there");
2992 ObjectWriteOperation op;
2993 op.write_full(bl);
2994 ASSERT_EQ(0, ioctx.operate("foo", &op));
2995 }
2996
2997 // configure cache
2998 bufferlist inbl;
2999 ASSERT_EQ(0, cluster.mon_command(
3000 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
3001 "\", \"tierpool\": \"" + cache_pool_name +
3002 "\", \"force_nonempty\": \"--force-nonempty\" }",
3003 inbl, NULL, NULL));
3004 ASSERT_EQ(0, cluster.mon_command(
3005 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
3006 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
3007 inbl, NULL, NULL));
3008 ASSERT_EQ(0, cluster.mon_command(
3009 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
3010 "\", \"mode\": \"readproxy\"}",
3011 inbl, NULL, NULL));
3012
3013 // wait for maps to settle
3014 cluster.wait_for_latest_osdmap();
3015
3016 // read and verify the object
3017 {
3018 bufferlist bl;
3019 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3020 ASSERT_EQ('h', bl[0]);
3021 }
3022
3023 // Verify 10 times the object is NOT present in the cache tier
3024 uint32_t i = 0;
3025 while (i++ < 10) {
3026 NObjectIterator it = cache_ioctx.nobjects_begin();
3027 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
3028 sleep(1);
3029 }
3030
3031 // tear down tiers
3032 ASSERT_EQ(0, cluster.mon_command(
3033 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
3034 "\"}",
3035 inbl, NULL, NULL));
3036 ASSERT_EQ(0, cluster.mon_command(
3037 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
3038 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
3039 inbl, NULL, NULL));
3040
3041 // wait for maps to settle before next test
3042 cluster.wait_for_latest_osdmap();
3043}
3044
3045TEST_F(LibRadosTwoPoolsPP, CachePin) {
3046 SKIP_IF_CRIMSON();
3047 // create object
3048 {
3049 bufferlist bl;
3050 bl.append("hi there");
3051 ObjectWriteOperation op;
3052 op.write_full(bl);
3053 ASSERT_EQ(0, ioctx.operate("foo", &op));
3054 }
3055 {
3056 bufferlist bl;
3057 bl.append("hi there");
3058 ObjectWriteOperation op;
3059 op.write_full(bl);
3060 ASSERT_EQ(0, ioctx.operate("bar", &op));
3061 }
3062 {
3063 bufferlist bl;
3064 bl.append("hi there");
3065 ObjectWriteOperation op;
3066 op.write_full(bl);
3067 ASSERT_EQ(0, ioctx.operate("baz", &op));
3068 }
3069 {
3070 bufferlist bl;
3071 bl.append("hi there");
3072 ObjectWriteOperation op;
3073 op.write_full(bl);
3074 ASSERT_EQ(0, ioctx.operate("bam", &op));
3075 }
3076
3077 // configure cache
3078 bufferlist inbl;
3079 ASSERT_EQ(0, cluster.mon_command(
3080 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
3081 "\", \"tierpool\": \"" + cache_pool_name +
3082 "\", \"force_nonempty\": \"--force-nonempty\" }",
3083 inbl, NULL, NULL));
3084 ASSERT_EQ(0, cluster.mon_command(
3085 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
3086 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
3087 inbl, NULL, NULL));
3088 ASSERT_EQ(0, cluster.mon_command(
3089 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
3090 "\", \"mode\": \"writeback\"}",
3091 inbl, NULL, NULL));
3092
3093 // wait for maps to settle
3094 cluster.wait_for_latest_osdmap();
3095
3096 // read, trigger promote
3097 {
3098 bufferlist bl;
3099 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3100 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
3101 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
3102 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
3103 }
3104
3105 // verify the objects are present in the cache tier
3106 {
3107 NObjectIterator it = cache_ioctx.nobjects_begin();
3108 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
3109 for (uint32_t i = 0; i < 4; i++) {
3110 ASSERT_TRUE(it->get_oid() == string("foo") ||
3111 it->get_oid() == string("bar") ||
3112 it->get_oid() == string("baz") ||
3113 it->get_oid() == string("bam"));
3114 ++it;
3115 }
3116 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
3117 }
3118
3119 // pin objects
3120 {
3121 ObjectWriteOperation op;
3122 op.cache_pin();
3123 librados::AioCompletion *completion = cluster.aio_create_completion();
3124 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
3125 completion->wait_for_complete();
3126 ASSERT_EQ(0, completion->get_return_value());
3127 completion->release();
3128 }
3129 {
3130 ObjectWriteOperation op;
3131 op.cache_pin();
3132 librados::AioCompletion *completion = cluster.aio_create_completion();
3133 ASSERT_EQ(0, cache_ioctx.aio_operate("baz", completion, &op));
3134 completion->wait_for_complete();
3135 ASSERT_EQ(0, completion->get_return_value());
3136 completion->release();
3137 }
3138
3139 // enable agent
3140 ASSERT_EQ(0, cluster.mon_command(
3141 set_pool_str(cache_pool_name, "hit_set_count", 2),
3142 inbl, NULL, NULL));
3143 ASSERT_EQ(0, cluster.mon_command(
3144 set_pool_str(cache_pool_name, "hit_set_period", 600),
3145 inbl, NULL, NULL));
3146 ASSERT_EQ(0, cluster.mon_command(
3147 set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
3148 inbl, NULL, NULL));
3149 ASSERT_EQ(0, cluster.mon_command(
3150 set_pool_str(cache_pool_name, "min_read_recency_for_promote", 1),
3151 inbl, NULL, NULL));
3152 ASSERT_EQ(0, cluster.mon_command(
3153 set_pool_str(cache_pool_name, "target_max_objects", 1),
3154 inbl, NULL, NULL));
3155
3156 sleep(10);
3157
3158 // Verify the pinned object 'foo' is not flushed/evicted
3159 uint32_t count = 0;
3160 while (true) {
3161 bufferlist bl;
3162 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
3163
3164 count = 0;
3165 NObjectIterator it = cache_ioctx.nobjects_begin();
3166 while (it != cache_ioctx.nobjects_end()) {
3167 ASSERT_TRUE(it->get_oid() == string("foo") ||
3168 it->get_oid() == string("bar") ||
3169 it->get_oid() == string("baz") ||
3170 it->get_oid() == string("bam"));
3171 ++count;
3172 ++it;
3173 }
3174 if (count == 2) {
3175 ASSERT_TRUE(it->get_oid() == string("foo") ||
3176 it->get_oid() == string("baz"));
3177 break;
3178 }
3179
3180 sleep(1);
3181 }
3182
3183 // tear down tiers
3184 ASSERT_EQ(0, cluster.mon_command(
3185 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
3186 "\"}",
3187 inbl, NULL, NULL));
3188 ASSERT_EQ(0, cluster.mon_command(
3189 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
3190 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
3191 inbl, NULL, NULL));
3192
3193 // wait for maps to settle before next test
3194 cluster.wait_for_latest_osdmap();
3195}
3196
3197TEST_F(LibRadosTwoPoolsPP, SetRedirectRead) {
3198 SKIP_IF_CRIMSON();
3199 // create object
3200 {
3201 bufferlist bl;
3202 bl.append("hi there");
3203 ObjectWriteOperation op;
3204 op.write_full(bl);
3205 ASSERT_EQ(0, ioctx.operate("foo", &op));
3206 }
3207 {
3208 bufferlist bl;
3209 bl.append("there");
3210 ObjectWriteOperation op;
3211 op.write_full(bl);
3212 ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
3213 }
3214
3215 // wait for maps to settle
3216 cluster.wait_for_latest_osdmap();
3217
3218 {
3219 ObjectWriteOperation op;
3220 op.set_redirect("bar", cache_ioctx, 0);
3221 librados::AioCompletion *completion = cluster.aio_create_completion();
3222 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
3223 completion->wait_for_complete();
3224 ASSERT_EQ(0, completion->get_return_value());
3225 completion->release();
3226 }
3227 // read and verify the object
3228 {
3229 bufferlist bl;
3230 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3231 ASSERT_EQ('t', bl[0]);
3232 }
3233
3234 // wait for maps to settle before next test
3235 cluster.wait_for_latest_osdmap();
3236}
3237
3238TEST_F(LibRadosTwoPoolsPP, ManifestPromoteRead) {
3239 SKIP_IF_CRIMSON();
3240 // skip test if not yet mimic
3241 if (_get_required_osd_release(cluster) < "mimic") {
3242 GTEST_SKIP() << "cluster is not yet mimic, skipping test";
3243 }
3244
3245 // create object
3246 {
3247 bufferlist bl;
3248 bl.append("hi there");
3249 ObjectWriteOperation op;
3250 op.write_full(bl);
3251 ASSERT_EQ(0, ioctx.operate("foo", &op));
3252 }
3253 {
3254 bufferlist bl;
3255 bl.append("base chunk");
3256 ObjectWriteOperation op;
3257 op.write_full(bl);
3258 ASSERT_EQ(0, ioctx.operate("foo-chunk", &op));
3259 }
3260 {
3261 bufferlist bl;
3262 bl.append("there");
3263 ObjectWriteOperation op;
3264 op.write_full(bl);
3265 ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
3266 }
3267 {
3268 bufferlist bl;
3269 bl.append("CHUNK");
3270 ObjectWriteOperation op;
3271 op.write_full(bl);
3272 ASSERT_EQ(0, cache_ioctx.operate("bar-chunk", &op));
3273 }
3274
3275 // wait for maps to settle
3276 cluster.wait_for_latest_osdmap();
3277
3278 // set-redirect
3279 {
3280 ObjectWriteOperation op;
3281 op.set_redirect("bar", cache_ioctx, 0);
3282 librados::AioCompletion *completion = cluster.aio_create_completion();
3283 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
3284 completion->wait_for_complete();
3285 ASSERT_EQ(0, completion->get_return_value());
3286 completion->release();
3287 }
3288 // set-chunk
3289 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 2, "bar-chunk", "foo-chunk");
3290
3291 // promote
3292 {
3293 ObjectWriteOperation op;
3294 op.tier_promote();
3295 librados::AioCompletion *completion = cluster.aio_create_completion();
3296 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
3297 completion->wait_for_complete();
3298 ASSERT_EQ(0, completion->get_return_value());
3299 completion->release();
3300 }
3301 // read and verify the object (redirect)
3302 {
3303 bufferlist bl;
3304 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3305 ASSERT_EQ('t', bl[0]);
3306 }
3307 // promote
3308 {
3309 ObjectWriteOperation op;
3310 op.tier_promote();
3311 librados::AioCompletion *completion = cluster.aio_create_completion();
3312 ASSERT_EQ(0, ioctx.aio_operate("foo-chunk", completion, &op));
3313 completion->wait_for_complete();
3314 ASSERT_EQ(0, completion->get_return_value());
3315 completion->release();
3316 }
3317 // read and verify the object
3318 {
3319 bufferlist bl;
3320 ASSERT_EQ(1, ioctx.read("foo-chunk", bl, 1, 0));
3321 ASSERT_EQ('C', bl[0]);
3322 }
3323
3324 // wait for maps to settle before next test
3325 cluster.wait_for_latest_osdmap();
3326}
3327
3328TEST_F(LibRadosTwoPoolsPP, ManifestRefRead) {
3329 SKIP_IF_CRIMSON();
3330 // note: require >= mimic
3331
3332 // create object
3333 {
3334 bufferlist bl;
3335 bl.append("hi there");
3336 ObjectWriteOperation op;
3337 op.write_full(bl);
3338 ASSERT_EQ(0, ioctx.operate("foo", &op));
3339 }
3340 {
3341 bufferlist bl;
3342 bl.append("base chunk");
3343 ObjectWriteOperation op;
3344 op.write_full(bl);
3345 ASSERT_EQ(0, ioctx.operate("foo-chunk", &op));
3346 }
3347 {
3348 bufferlist bl;
3349 bl.append("there");
3350 ObjectWriteOperation op;
3351 op.write_full(bl);
3352 ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
3353 }
3354 {
3355 bufferlist bl;
3356 bl.append("CHUNK");
3357 ObjectWriteOperation op;
3358 op.write_full(bl);
3359 ASSERT_EQ(0, cache_ioctx.operate("bar-chunk", &op));
3360 }
3361
3362 // wait for maps to settle
3363 cluster.wait_for_latest_osdmap();
3364
3365 // set-redirect
3366 {
3367 ObjectWriteOperation op;
3368 op.set_redirect("bar", cache_ioctx, 0, CEPH_OSD_OP_FLAG_WITH_REFERENCE);
3369 librados::AioCompletion *completion = cluster.aio_create_completion();
3370 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
3371 completion->wait_for_complete();
3372 ASSERT_EQ(0, completion->get_return_value());
3373 completion->release();
3374 }
3375 // set-chunk
3376 {
3377 ObjectReadOperation op;
3378 op.set_chunk(0, 2, cache_ioctx, "bar-chunk", 0, CEPH_OSD_OP_FLAG_WITH_REFERENCE);
3379 librados::AioCompletion *completion = cluster.aio_create_completion();
3380 ASSERT_EQ(0, ioctx.aio_operate("foo-chunk", completion, &op,
3381 librados::OPERATION_IGNORE_CACHE, NULL));
3382 completion->wait_for_complete();
3383 ASSERT_EQ(0, completion->get_return_value());
3384 completion->release();
3385 }
3386 // redirect's refcount
3387 {
3388 bufferlist t;
3389 cache_ioctx.getxattr("bar", CHUNK_REFCOUNT_ATTR, t);
3390 chunk_refs_t refs;
3391 try {
3392 auto iter = t.cbegin();
3393 decode(refs, iter);
3394 } catch (buffer::error& err) {
3395 ASSERT_TRUE(0);
3396 }
3397 ASSERT_EQ(1U, refs.count());
3398 }
3399 // chunk's refcount
3400 {
3401 bufferlist t;
3402 cache_ioctx.getxattr("bar-chunk", CHUNK_REFCOUNT_ATTR, t);
3403 chunk_refs_t refs;
3404 try {
3405 auto iter = t.cbegin();
3406 decode(refs, iter);
3407 } catch (buffer::error& err) {
3408 ASSERT_TRUE(0);
3409 }
3410 ASSERT_EQ(1u, refs.count());
3411 }
3412
3413 // wait for maps to settle before next test
3414 cluster.wait_for_latest_osdmap();
3415}
3416
3417TEST_F(LibRadosTwoPoolsPP, ManifestUnset) {
3418 SKIP_IF_CRIMSON();
3419 // skip test if not yet nautilus
3420 if (_get_required_osd_release(cluster) < "nautilus") {
3421 GTEST_SKIP() << "cluster is not yet nautilus, skipping test";
3422 }
3423
3424 // create object
3425 {
3426 bufferlist bl;
3427 bl.append("hi there");
3428 ObjectWriteOperation op;
3429 op.write_full(bl);
3430 ASSERT_EQ(0, ioctx.operate("foo", &op));
3431 }
3432 {
3433 bufferlist bl;
3434 bl.append("base chunk");
3435 ObjectWriteOperation op;
3436 op.write_full(bl);
3437 ASSERT_EQ(0, ioctx.operate("foo-chunk", &op));
3438 }
3439 {
3440 bufferlist bl;
3441 bl.append("there");
3442 ObjectWriteOperation op;
3443 op.write_full(bl);
3444 ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
3445 }
3446 {
3447 bufferlist bl;
3448 bl.append("CHUNK");
3449 ObjectWriteOperation op;
3450 op.write_full(bl);
3451 ASSERT_EQ(0, cache_ioctx.operate("bar-chunk", &op));
3452 }
3453
3454 // wait for maps to settle
3455 cluster.wait_for_latest_osdmap();
3456
3457 // set-redirect
3458 {
3459 ObjectWriteOperation op;
3460 op.set_redirect("bar", cache_ioctx, 0, CEPH_OSD_OP_FLAG_WITH_REFERENCE);
3461 librados::AioCompletion *completion = cluster.aio_create_completion();
3462 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
3463 completion->wait_for_complete();
3464 ASSERT_EQ(0, completion->get_return_value());
3465 completion->release();
3466 }
3467 // set-chunk
3468 {
3469 ObjectReadOperation op;
3470 op.set_chunk(0, 2, cache_ioctx, "bar-chunk", 0, CEPH_OSD_OP_FLAG_WITH_REFERENCE);
3471 librados::AioCompletion *completion = cluster.aio_create_completion();
3472 ASSERT_EQ(0, ioctx.aio_operate("foo-chunk", completion, &op,
3473 librados::OPERATION_IGNORE_CACHE, NULL));
3474 completion->wait_for_complete();
3475 ASSERT_EQ(0, completion->get_return_value());
3476 completion->release();
3477 }
3478 // redirect's refcount
3479 {
3480 bufferlist t;
3481 cache_ioctx.getxattr("bar", CHUNK_REFCOUNT_ATTR, t);
3482 chunk_refs_t refs;
3483 try {
3484 auto iter = t.cbegin();
3485 decode(refs, iter);
3486 } catch (buffer::error& err) {
3487 ASSERT_TRUE(0);
3488 }
3489 ASSERT_EQ(1u, refs.count());
3490 }
3491 // chunk's refcount
3492 {
3493 bufferlist t;
3494 cache_ioctx.getxattr("bar-chunk", CHUNK_REFCOUNT_ATTR, t);
3495 chunk_refs_t refs;
3496 try {
3497 auto iter = t.cbegin();
3498 decode(refs, iter);
3499 } catch (buffer::error& err) {
3500 ASSERT_TRUE(0);
3501 }
3502 ASSERT_EQ(1u, refs.count());
3503 }
3504
3505 // unset-manifest for set-redirect
3506 {
3507 ObjectWriteOperation op;
3508 op.unset_manifest();
3509 librados::AioCompletion *completion = cluster.aio_create_completion();
3510 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
3511 completion->wait_for_complete();
3512 ASSERT_EQ(0, completion->get_return_value());
3513 completion->release();
3514 }
3515
3516 // unset-manifest for set-chunk
3517 {
3518 ObjectWriteOperation op;
3519 op.unset_manifest();
3520 librados::AioCompletion *completion = cluster.aio_create_completion();
3521 ASSERT_EQ(0, ioctx.aio_operate("foo-chunk", completion, &op));
3522 completion->wait_for_complete();
3523 ASSERT_EQ(0, completion->get_return_value());
3524 completion->release();
3525 }
3526 // redirect's refcount
3527 {
3528 bufferlist t;
3529 cache_ioctx.getxattr("bar-chunk", CHUNK_REFCOUNT_ATTR, t);
3530 if (t.length() != 0U) {
3531 ObjectWriteOperation op;
3532 op.unset_manifest();
3533 librados::AioCompletion *completion = cluster.aio_create_completion();
3534 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
3535 completion->wait_for_complete();
3536 ASSERT_EQ(-EOPNOTSUPP, completion->get_return_value());
3537 completion->release();
3538 }
3539 }
3540 // chunk's refcount
3541 {
3542 bufferlist t;
3543 cache_ioctx.getxattr("bar-chunk", CHUNK_REFCOUNT_ATTR, t);
3544 if (t.length() != 0U) {
3545 ObjectWriteOperation op;
3546 op.unset_manifest();
3547 librados::AioCompletion *completion = cluster.aio_create_completion();
3548 ASSERT_EQ(0, ioctx.aio_operate("foo-chunk", completion, &op));
3549 completion->wait_for_complete();
3550 ASSERT_EQ(-EOPNOTSUPP, completion->get_return_value());
3551 completion->release();
3552 }
3553 }
3554
3555 // wait for maps to settle before next test
3556 cluster.wait_for_latest_osdmap();
3557}
3558
3559TEST_F(LibRadosTwoPoolsPP, ManifestDedupRefRead) {
3560 SKIP_IF_CRIMSON();
3561 // skip test if not yet nautilus
3562 if (_get_required_osd_release(cluster) < "nautilus") {
3563 GTEST_SKIP() << "cluster is not yet nautilus, skipping test";
3564 }
3565
3566 bufferlist inbl;
3567 ASSERT_EQ(0, cluster.mon_command(
3568 set_pool_str(pool_name, "fingerprint_algorithm", "sha1"),
3569 inbl, NULL, NULL));
3570 cluster.wait_for_latest_osdmap();
3571 string tgt_oid;
3572
3573 // get fp_oid
3574 tgt_oid = get_fp_oid("There hi", "sha1");
3575
3576 // create object
3577 {
3578 bufferlist bl;
3579 bl.append("There hi");
3580 ObjectWriteOperation op;
3581 op.write_full(bl);
3582 ASSERT_EQ(0, ioctx.operate("foo", &op));
3583 }
3584 {
3585 bufferlist bl;
3586 bl.append("There hi");
3587 ObjectWriteOperation op;
3588 op.write_full(bl);
3589 ASSERT_EQ(0, ioctx.operate("foo-dedup", &op));
3590 }
3591
3592 // write
3593 {
3594 ObjectWriteOperation op;
3595 bufferlist bl;
3596 bl.append("There hi");
3597 op.write_full(bl);
3598 ASSERT_EQ(0, cache_ioctx.operate(tgt_oid, &op));
3599 }
3600
3601 // set-chunk (dedup)
3602 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 8, tgt_oid, "foo-dedup");
3603 // set-chunk (dedup)
3604 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 8, tgt_oid, "foo");
3605 // chunk's refcount
3606 {
3607 bufferlist t;
3608 cache_ioctx.getxattr(tgt_oid, CHUNK_REFCOUNT_ATTR, t);
3609 chunk_refs_t refs;
3610 try {
3611 auto iter = t.cbegin();
3612 decode(refs, iter);
3613 } catch (buffer::error& err) {
3614 ASSERT_TRUE(0);
3615 }
3616 ASSERT_LE(2u, refs.count());
3617 }
3618
3619 // wait for maps to settle before next test
3620 cluster.wait_for_latest_osdmap();
3621}
3622
3623TEST_F(LibRadosTwoPoolsPP, ManifestSnapRefcount) {
3624 SKIP_IF_CRIMSON();
3625 // skip test if not yet octopus
3626 if (_get_required_osd_release(cluster) < "octopus") {
3627 cout << "cluster is not yet octopus, skipping test" << std::endl;
3628 return;
3629 }
3630
3631 bufferlist inbl;
3632 ASSERT_EQ(0, cluster.mon_command(
3633 set_pool_str(pool_name, "fingerprint_algorithm", "sha1"),
3634 inbl, NULL, NULL));
3635 cluster.wait_for_latest_osdmap();
3636
3637 // create object
3638 {
3639 bufferlist bl;
3640 bl.append("there hi");
3641 ObjectWriteOperation op;
3642 op.write_full(bl);
3643 ASSERT_EQ(0, ioctx.operate("foo", &op));
3644 }
3645 {
3646 bufferlist bl;
3647 bl.append("there hi");
3648 ObjectWriteOperation op;
3649 op.write_full(bl);
3650 ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
3651 }
3652
3653 // wait for maps to settle
3654 cluster.wait_for_latest_osdmap();
3655
3656 string er_fp_oid, hi_fp_oid, bb_fp_oid;
3657
3658 // get fp_oid
3659 er_fp_oid = get_fp_oid("er", "sha1");
3660 hi_fp_oid = get_fp_oid("hi", "sha1");
3661 bb_fp_oid = get_fp_oid("bb", "sha1");
3662
3663 // write
3664 {
3665 ObjectWriteOperation op;
3666 bufferlist bl;
3667 bl.append("er");
3668 op.write_full(bl);
3669 ASSERT_EQ(0, cache_ioctx.operate(er_fp_oid, &op));
3670 }
3671 // write
3672 {
3673 ObjectWriteOperation op;
3674 bufferlist bl;
3675 bl.append("hi");
3676 op.write_full(bl);
3677 ASSERT_EQ(0, cache_ioctx.operate(hi_fp_oid, &op));
3678 }
3679 // write
3680 {
3681 ObjectWriteOperation op;
3682 bufferlist bl;
3683 bl.append("bb");
3684 op.write_full(bl);
3685 ASSERT_EQ(0, cache_ioctx.operate(bb_fp_oid, &op));
3686 }
3687
3688 // set-chunk (dedup)
3689 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, er_fp_oid, "foo");
3690 // set-chunk (dedup)
3691 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, hi_fp_oid, "foo");
3692
3693 // make all chunks dirty --> flush
3694 // foo: [er] [hi]
3695
3696 // check chunk's refcount
3697 {
3698 bufferlist t;
3699 SHA1 sha1_gen;
3700 int size = strlen("er");
3701 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
3702 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
3703 sha1_gen.Update((const unsigned char *)"er", size);
3704 sha1_gen.Final(fingerprint);
3705 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
3706 cache_ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
3707 chunk_refs_t refs;
3708 try {
3709 auto iter = t.cbegin();
3710 decode(refs, iter);
3711 } catch (buffer::error& err) {
3712 ASSERT_TRUE(0);
3713 }
3714 ASSERT_LE(1u, refs.count());
3715 }
3716
3717 // create a snapshot, clone
3718 vector<uint64_t> my_snaps(1);
3719 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
3720 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
3721 my_snaps));
3722
3723 // foo: [bb] [hi]
3724 // create a clone
3725 {
3726 bufferlist bl;
3727 bl.append("Thbbe");
3728 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
3729 }
3730 // make clean
3731 {
3732 bufferlist bl;
3733 bl.append("Thbbe");
3734 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
3735 }
3736 // set-chunk (dedup)
3737 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, bb_fp_oid, "foo");
3738
3739 // and another
3740 my_snaps.resize(2);
3741 my_snaps[1] = my_snaps[0];
3742 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
3743 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
3744 my_snaps));
3745
3746 // foo: [er] [hi]
3747 // create a clone
3748 {
3749 bufferlist bl;
3750 bl.append("There");
3751 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
3752 }
3753 // make clean
3754 {
3755 bufferlist bl;
3756 bl.append("There");
3757 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
3758 }
3759 // set-chunk (dedup)
3760 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, er_fp_oid, "foo");
3761
3762 // check chunk's refcount
3763 {
3764 bufferlist t;
3765 SHA1 sha1_gen;
3766 int size = strlen("er");
3767 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
3768 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
3769 sha1_gen.Update((const unsigned char *)"er", size);
3770 sha1_gen.Final(fingerprint);
3771 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
3772 cache_ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
3773 chunk_refs_t refs;
3774 try {
3775 auto iter = t.cbegin();
3776 decode(refs, iter);
3777 } catch (buffer::error& err) {
3778 ASSERT_TRUE(0);
3779 }
3780 ASSERT_LE(2u, refs.count());
3781 }
3782
3783 // and another
3784 my_snaps.resize(3);
3785 my_snaps[2] = my_snaps[1];
3786 my_snaps[1] = my_snaps[0];
3787 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
3788 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
3789 my_snaps));
3790
3791 // foo: [bb] [hi]
3792 // create a clone
3793 {
3794 bufferlist bl;
3795 bl.append("Thbbe");
3796 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
3797 }
3798 // make clean
3799 {
3800 bufferlist bl;
3801 bl.append("Thbbe");
3802 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
3803 }
3804 // set-chunk (dedup)
3805 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, bb_fp_oid, "foo");
3806
3807 /*
3808 * snap[2]: [er] [hi]
3809 * snap[1]: [bb] [hi]
3810 * snap[0]: [er] [hi]
3811 * head: [bb] [hi]
3812 */
3813
3814 // check chunk's refcount
3815 {
3816 bufferlist t;
3817 SHA1 sha1_gen;
3818 int size = strlen("hi");
3819 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
3820 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
3821 sha1_gen.Update((const unsigned char *)"hi", size);
3822 sha1_gen.Final(fingerprint);
3823 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
3824 is_intended_refcount_state(ioctx, "foo", cache_ioctx, p_str, 1);
3825 }
3826
3827 // check chunk's refcount
3828 {
3829 bufferlist t;
3830 SHA1 sha1_gen;
3831 int size = strlen("er");
3832 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
3833 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
3834 sha1_gen.Update((const unsigned char *)"er", size);
3835 sha1_gen.Final(fingerprint);
3836 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
3837 cache_ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
3838 chunk_refs_t refs;
3839 try {
3840 auto iter = t.cbegin();
3841 decode(refs, iter);
3842 } catch (buffer::error& err) {
3843 ASSERT_TRUE(0);
3844 }
3845 ASSERT_LE(2u, refs.count());
3846 }
3847
3848 // remove snap
3849 ioctx.selfmanaged_snap_remove(my_snaps[2]);
3850
3851 /*
3852 * snap[1]: [bb] [hi]
3853 * snap[0]: [er] [hi]
3854 * head: [bb] [hi]
3855 */
3856
3857 sleep(10);
3858
3859 // check chunk's refcount
3860 {
3861 bufferlist t;
3862 SHA1 sha1_gen;
3863 int size = strlen("hi");
3864 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
3865 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
3866 sha1_gen.Update((const unsigned char *)"hi", size);
3867 sha1_gen.Final(fingerprint);
3868 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
3869 is_intended_refcount_state(ioctx, "foo", cache_ioctx, p_str, 1);
3870 }
3871
3872 // remove snap
3873 ioctx.selfmanaged_snap_remove(my_snaps[0]);
3874
3875 /*
3876 * snap[1]: [bb] [hi]
3877 * head: [bb] [hi]
3878 */
3879
3880 sleep(10);
3881
3882 // check chunk's refcount
3883 {
3884 bufferlist t;
3885 SHA1 sha1_gen;
3886 int size = strlen("bb");
3887 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
3888 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
3889 sha1_gen.Update((const unsigned char *)"bb", size);
3890 sha1_gen.Final(fingerprint);
3891 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
3892 is_intended_refcount_state(ioctx, "foo", cache_ioctx, p_str, 1);
3893 }
3894
3895 // remove snap
3896 ioctx.selfmanaged_snap_remove(my_snaps[1]);
3897
3898 /*
3899 * snap[1]: [bb] [hi]
3900 */
3901
3902 sleep(10);
3903
3904 // check chunk's refcount
3905 {
3906 bufferlist t;
3907 SHA1 sha1_gen;
3908 int size = strlen("bb");
3909 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
3910 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
3911 sha1_gen.Update((const unsigned char *)"bb", size);
3912 sha1_gen.Final(fingerprint);
3913 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
3914 is_intended_refcount_state(ioctx, "foo", cache_ioctx, p_str, 1);
3915 }
3916
3917 // check chunk's refcount
3918 {
3919 bufferlist t;
3920 SHA1 sha1_gen;
3921 int size = strlen("hi");
3922 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
3923 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
3924 sha1_gen.Update((const unsigned char *)"hi", size);
3925 sha1_gen.Final(fingerprint);
3926 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
3927 is_intended_refcount_state(ioctx, "foo", cache_ioctx, p_str, 1);
3928 }
3929}
3930
3931TEST_F(LibRadosTwoPoolsPP, ManifestSnapRefcount2) {
3932 SKIP_IF_CRIMSON();
3933 // skip test if not yet octopus
3934 if (_get_required_osd_release(cluster) < "octopus") {
3935 cout << "cluster is not yet octopus, skipping test" << std::endl;
3936 return;
3937 }
3938
3939 bufferlist inbl;
3940 ASSERT_EQ(0, cluster.mon_command(
3941 set_pool_str(pool_name, "fingerprint_algorithm", "sha1"),
3942 inbl, NULL, NULL));
3943 cluster.wait_for_latest_osdmap();
3944
3945 // create object
3946 {
3947 bufferlist bl;
3948 bl.append("Thabe cdHI");
3949 ObjectWriteOperation op;
3950 op.write_full(bl);
3951 ASSERT_EQ(0, ioctx.operate("foo", &op));
3952 }
3953 {
3954 bufferlist bl;
3955 bl.append("there hiHI");
3956 ObjectWriteOperation op;
3957 op.write_full(bl);
3958 ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
3959 }
3960
3961 string ab_fp_oid, cd_fp_oid, ef_fp_oid, BB_fp_oid;
3962
3963 // get fp_oid
3964 ab_fp_oid = get_fp_oid("ab", "sha1");
3965 cd_fp_oid = get_fp_oid("cd", "sha1");
3966 ef_fp_oid = get_fp_oid("ef", "sha1");
3967 BB_fp_oid = get_fp_oid("BB", "sha1");
3968
3969 // write
3970 {
3971 ObjectWriteOperation op;
3972 bufferlist bl;
3973 bl.append("ab");
3974 op.write_full(bl);
3975 ASSERT_EQ(0, cache_ioctx.operate(ab_fp_oid, &op));
3976 }
3977 // write
3978 {
3979 ObjectWriteOperation op;
3980 bufferlist bl;
3981 bl.append("cd");
3982 op.write_full(bl);
3983 ASSERT_EQ(0, cache_ioctx.operate(cd_fp_oid, &op));
3984 }
3985 // write
3986 {
3987 ObjectWriteOperation op;
3988 bufferlist bl;
3989 bl.append("ef");
3990 op.write_full(bl);
3991 ASSERT_EQ(0, cache_ioctx.operate(ef_fp_oid, &op));
3992 }
3993 // write
3994 {
3995 ObjectWriteOperation op;
3996 bufferlist bl;
3997 bl.append("BB");
3998 op.write_full(bl);
3999 ASSERT_EQ(0, cache_ioctx.operate(BB_fp_oid, &op));
4000 }
4001
4002 // set-chunk (dedup)
4003 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, ab_fp_oid, "foo");
4004 // set-chunk (dedup)
4005 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, cd_fp_oid, "foo");
4006 // set-chunk (dedup)
4007 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, ef_fp_oid, "foo");
4008
4009
4010 // make all chunks dirty --> flush
4011 // foo: [ab] [cd] [ef]
4012
4013 // create a snapshot, clone
4014 vector<uint64_t> my_snaps(1);
4015 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
4016 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4017 my_snaps));
4018
4019 // foo: [BB] [BB] [ef]
4020 // create a clone
4021 {
4022 bufferlist bl;
4023 bl.append("ThBBe BB");
4024 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
4025 }
4026 // make clean
4027 {
4028 bufferlist bl;
4029 bl.append("ThBBe BB");
4030 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
4031 }
4032 // set-chunk (dedup)
4033 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, BB_fp_oid, "foo");
4034 // set-chunk (dedup)
4035 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, BB_fp_oid, "foo");
4036
4037 // and another
4038 my_snaps.resize(2);
4039 my_snaps[1] = my_snaps[0];
4040 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
4041 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4042 my_snaps));
4043
4044 // foo: [ab] [cd] [ef]
4045 // create a clone
4046 {
4047 bufferlist bl;
4048 bl.append("Thabe cd");
4049 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
4050 }
4051 // make clean
4052 {
4053 bufferlist bl;
4054 bl.append("Thabe cd");
4055 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
4056 }
4057 // set-chunk (dedup)
4058 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, ab_fp_oid, "foo");
4059 // set-chunk (dedup)
4060 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, cd_fp_oid, "foo");
4061
4062 /*
4063 * snap[1]: [ab] [cd] [ef]
4064 * snap[0]: [BB] [BB] [ef]
4065 * head: [ab] [cd] [ef]
4066 */
4067
4068 // check chunk's refcount
4069 {
4070 bufferlist t;
4071 SHA1 sha1_gen;
4072 int size = strlen("ab");
4073 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
4074 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
4075 sha1_gen.Update((const unsigned char *)"ab", size);
4076 sha1_gen.Final(fingerprint);
4077 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
4078 cache_ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
4079 chunk_refs_t refs;
4080 try {
4081 auto iter = t.cbegin();
4082 decode(refs, iter);
4083 } catch (buffer::error& err) {
4084 ASSERT_TRUE(0);
4085 }
4086 ASSERT_LE(2u, refs.count());
4087 }
4088
4089 // check chunk's refcount
4090 {
4091 bufferlist t;
4092 SHA1 sha1_gen;
4093 int size = strlen("cd");
4094 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
4095 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
4096 sha1_gen.Update((const unsigned char *)"cd", size);
4097 sha1_gen.Final(fingerprint);
4098 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
4099 cache_ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
4100 chunk_refs_t refs;
4101 try {
4102 auto iter = t.cbegin();
4103 decode(refs, iter);
4104 } catch (buffer::error& err) {
4105 ASSERT_TRUE(0);
4106 }
4107 ASSERT_LE(2u, refs.count());
4108 }
4109
4110 // check chunk's refcount
4111 {
4112 bufferlist t;
4113 SHA1 sha1_gen;
4114 int size = strlen("BB");
4115 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
4116 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
4117 sha1_gen.Update((const unsigned char *)"BB", size);
4118 sha1_gen.Final(fingerprint);
4119 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
4120 cache_ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
4121 chunk_refs_t refs;
4122 try {
4123 auto iter = t.cbegin();
4124 decode(refs, iter);
4125 } catch (buffer::error& err) {
4126 ASSERT_TRUE(0);
4127 }
4128 ASSERT_LE(2u, refs.count());
4129 }
4130
4131 // remove snap
4132 ioctx.selfmanaged_snap_remove(my_snaps[0]);
4133
4134 /*
4135 * snap[1]: [ab] [cd] [ef]
4136 * head: [ab] [cd] [ef]
4137 */
4138
4139 sleep(10);
4140
4141 // check chunk's refcount
4142 {
4143 bufferlist t;
4144 SHA1 sha1_gen;
4145 int size = strlen("BB");
4146 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
4147 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
4148 sha1_gen.Update((const unsigned char *)"BB", size);
4149 sha1_gen.Final(fingerprint);
4150 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
4151 is_intended_refcount_state(ioctx, "foo", cache_ioctx, p_str, 0);
4152 }
4153}
4154
4155TEST_F(LibRadosTwoPoolsPP, ManifestTestSnapCreate) {
4156 SKIP_IF_CRIMSON();
4157 // skip test if not yet octopus
4158 if (_get_required_osd_release(cluster) < "octopus") {
4159 GTEST_SKIP() << "cluster is not yet octopus, skipping test";
4160 }
4161
4162 // create object
4163 {
4164 bufferlist bl;
4165 bl.append("base chunk");
4166 ObjectWriteOperation op;
4167 op.write_full(bl);
4168 ASSERT_EQ(0, ioctx.operate("foo", &op));
4169 }
4170 {
4171 bufferlist bl;
4172 bl.append("CHUNKS CHUNKS");
4173 ObjectWriteOperation op;
4174 op.write_full(bl);
4175 ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
4176 }
4177
4178 string ba_fp_oid, se_fp_oid, ch_fp_oid;
4179
4180 // get fp_oid
4181 ba_fp_oid = get_fp_oid("ba", "sha1");
4182 se_fp_oid = get_fp_oid("se", "sha1");
4183 ch_fp_oid = get_fp_oid("ch", "sha1");
4184
4185 // write
4186 {
4187 ObjectWriteOperation op;
4188 bufferlist bl;
4189 bl.append("ba");
4190 op.write_full(bl);
4191 ASSERT_EQ(0, cache_ioctx.operate(ba_fp_oid, &op));
4192 }
4193 // write
4194 {
4195 ObjectWriteOperation op;
4196 bufferlist bl;
4197 bl.append("se");
4198 op.write_full(bl);
4199 ASSERT_EQ(0, cache_ioctx.operate(se_fp_oid, &op));
4200 }
4201 // write
4202 {
4203 ObjectWriteOperation op;
4204 bufferlist bl;
4205 bl.append("ch");
4206 op.write_full(bl);
4207 ASSERT_EQ(0, cache_ioctx.operate(ch_fp_oid, &op));
4208 }
4209
4210 // set-chunk (dedup)
4211 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 2, ba_fp_oid, "foo");
4212
4213 // try to create a snapshot, clone
4214 vector<uint64_t> my_snaps(1);
4215 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
4216 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4217 my_snaps));
4218
4219 // set-chunk (dedup)
4220 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, se_fp_oid, "foo");
4221
4222 // check whether clone is created
4223 ioctx.snap_set_read(librados::SNAP_DIR);
4224 {
4225 snap_set_t snap_set;
4226 int snap_ret;
4227 ObjectReadOperation op;
4228 op.list_snaps(&snap_set, &snap_ret);
4229 librados::AioCompletion *completion = cluster.aio_create_completion();
4230 ASSERT_EQ(0, ioctx.aio_operate(
4231 "foo", completion, &op,
4232 0, NULL));
4233 completion->wait_for_complete();
4234 ASSERT_EQ(0, snap_ret);
4235 ASSERT_LT(0u, snap_set.clones.size());
4236 ASSERT_EQ(1, snap_set.clones.size());
4237 }
4238
4239 // create a clone
4240 ioctx.snap_set_read(librados::SNAP_HEAD);
4241 {
4242 bufferlist bl;
4243 bl.append("B");
4244 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 0));
4245 }
4246
4247 ioctx.snap_set_read(my_snaps[0]);
4248 // set-chunk to clone
4249 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, ch_fp_oid, "foo");
4250}
4251
4252TEST_F(LibRadosTwoPoolsPP, ManifestRedirectAfterPromote) {
4253 SKIP_IF_CRIMSON();
4254 // skip test if not yet octopus
4255 if (_get_required_osd_release(cluster) < "octopus") {
4256 GTEST_SKIP() << "cluster is not yet octopus, skipping test";
4257 }
4258
4259 // create object
4260 {
4261 bufferlist bl;
4262 bl.append("base chunk");
4263 ObjectWriteOperation op;
4264 op.write_full(bl);
4265 ASSERT_EQ(0, ioctx.operate("foo", &op));
4266 }
4267 {
4268 bufferlist bl;
4269 bl.append("BASE CHUNK");
4270 ObjectWriteOperation op;
4271 op.write_full(bl);
4272 ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
4273 }
4274
4275 // set-redirect
4276 {
4277 ObjectWriteOperation op;
4278 op.set_redirect("bar", cache_ioctx, 0);
4279 librados::AioCompletion *completion = cluster.aio_create_completion();
4280 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
4281 completion->wait_for_complete();
4282 ASSERT_EQ(0, completion->get_return_value());
4283 completion->release();
4284 }
4285
4286 // promote
4287 {
4288 ObjectWriteOperation op;
4289 op.tier_promote();
4290 librados::AioCompletion *completion = cluster.aio_create_completion();
4291 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
4292 completion->wait_for_complete();
4293 ASSERT_EQ(0, completion->get_return_value());
4294 completion->release();
4295 }
4296
4297 // write
4298 {
4299 bufferlist bl;
4300 bl.append("a");
4301 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 0));
4302 }
4303
4304 // read and verify the object (redirect)
4305 {
4306 bufferlist bl;
4307 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
4308 ASSERT_EQ('a', bl[0]);
4309 }
4310
4311 // read and verify the object (redirect)
4312 {
4313 bufferlist bl;
4314 ASSERT_EQ(1, cache_ioctx.read("bar", bl, 1, 0));
4315 ASSERT_EQ('B', bl[0]);
4316 }
4317}
4318
4319TEST_F(LibRadosTwoPoolsPP, ManifestCheckRefcountWhenModification) {
4320 SKIP_IF_CRIMSON();
4321 // skip test if not yet octopus
4322 if (_get_required_osd_release(cluster) < "octopus") {
4323 GTEST_SKIP() << "cluster is not yet octopus, skipping test";
4324 }
4325
4326 bufferlist inbl;
4327 ASSERT_EQ(0, cluster.mon_command(
4328 set_pool_str(pool_name, "fingerprint_algorithm", "sha1"),
4329 inbl, NULL, NULL));
4330 cluster.wait_for_latest_osdmap();
4331
4332 // create object
4333 {
4334 bufferlist bl;
4335 bl.append("there hiHI");
4336 ObjectWriteOperation op;
4337 op.write_full(bl);
4338 ASSERT_EQ(0, ioctx.operate("foo", &op));
4339 }
4340
4341 string er_fp_oid, hi_fp_oid, HI_fp_oid, ai_fp_oid, bi_fp_oid,
4342 Er_fp_oid, Hi_fp_oid, Si_fp_oid;
4343
4344 // get fp_oid
4345 er_fp_oid = get_fp_oid("er", "sha1");
4346 hi_fp_oid = get_fp_oid("hi", "sha1");
4347 HI_fp_oid = get_fp_oid("HI", "sha1");
4348 ai_fp_oid = get_fp_oid("ai", "sha1");
4349 bi_fp_oid = get_fp_oid("bi", "sha1");
4350 Er_fp_oid = get_fp_oid("Er", "sha1");
4351 Hi_fp_oid = get_fp_oid("Hi", "sha1");
4352 Si_fp_oid = get_fp_oid("Si", "sha1");
4353
4354 // write
4355 {
4356 ObjectWriteOperation op;
4357 bufferlist bl;
4358 bl.append("er");
4359 op.write_full(bl);
4360 ASSERT_EQ(0, cache_ioctx.operate(er_fp_oid, &op));
4361 }
4362 // write
4363 {
4364 ObjectWriteOperation op;
4365 bufferlist bl;
4366 bl.append("hi");
4367 op.write_full(bl);
4368 ASSERT_EQ(0, cache_ioctx.operate(hi_fp_oid, &op));
4369 }
4370 // write
4371 {
4372 ObjectWriteOperation op;
4373 bufferlist bl;
4374 bl.append("HI");
4375 op.write_full(bl);
4376 ASSERT_EQ(0, cache_ioctx.operate(HI_fp_oid, &op));
4377 }
4378 // write
4379 {
4380 ObjectWriteOperation op;
4381 bufferlist bl;
4382 bl.append("ai");
4383 op.write_full(bl);
4384 ASSERT_EQ(0, cache_ioctx.operate(ai_fp_oid, &op));
4385 }
4386 // write
4387 {
4388 ObjectWriteOperation op;
4389 bufferlist bl;
4390 bl.append("bi");
4391 op.write_full(bl);
4392 ASSERT_EQ(0, cache_ioctx.operate(bi_fp_oid, &op));
4393 }
4394 // write
4395 {
4396 ObjectWriteOperation op;
4397 bufferlist bl;
4398 bl.append("Er");
4399 op.write_full(bl);
4400 ASSERT_EQ(0, cache_ioctx.operate(Er_fp_oid, &op));
4401 }
4402 // write
4403 {
4404 ObjectWriteOperation op;
4405 bufferlist bl;
4406 bl.append("Hi");
4407 op.write_full(bl);
4408 ASSERT_EQ(0, cache_ioctx.operate(Hi_fp_oid, &op));
4409 }
4410 // write
4411 {
4412 ObjectWriteOperation op;
4413 bufferlist bl;
4414 bl.append("Si");
4415 op.write_full(bl);
4416 ASSERT_EQ(0, cache_ioctx.operate(Si_fp_oid, &op));
4417 }
4418
4419 // set-chunk (dedup)
4420 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, er_fp_oid, "foo");
4421 // set-chunk (dedup)
4422 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, hi_fp_oid, "foo");
4423 // set-chunk (dedup)
4424 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, HI_fp_oid, "foo");
4425
4426 // foo head: [er] [hi] [HI]
4427
4428 // create a snapshot, clone
4429 vector<uint64_t> my_snaps(1);
4430 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
4431 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4432 my_snaps));
4433
4434
4435 // foo snap[0]: [er] [hi] [HI]
4436 // foo head : [er] [ai] [HI]
4437 // create a clone
4438 {
4439 bufferlist bl;
4440 bl.append("a");
4441 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 6));
4442 }
4443 // write
4444 {
4445 bufferlist bl;
4446 bl.append("a");
4447 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 6));
4448 }
4449
4450 // set-chunk (dedup)
4451 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, ai_fp_oid, "foo");
4452
4453 // foo snap[0]: [er] [hi] [HI]
4454 // foo head : [er] [bi] [HI]
4455 // create a clone
4456 {
4457 bufferlist bl;
4458 bl.append("b");
4459 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 6));
4460 }
4461 // write
4462 {
4463 bufferlist bl;
4464 bl.append("b");
4465 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 6));
4466 }
4467
4468 // set-chunk (dedup)
4469 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, bi_fp_oid, "foo");
4470
4471 sleep(10);
4472
4473 // check chunk's refcount
4474 // [ai]'s refcount should be 0
4475 {
4476 bufferlist t;
4477 SHA1 sha1_gen;
4478 int size = strlen("ai");
4479 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
4480 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
4481 sha1_gen.Update((const unsigned char *)"ai", size);
4482 sha1_gen.Final(fingerprint);
4483 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
4484 is_intended_refcount_state(ioctx, "foo", cache_ioctx, p_str, 0);
4485 }
4486
4487 // foo snap[0]: [er] [hi] [HI]
4488 // foo head : [Er] [Hi] [Si]
4489 // create a clone
4490 {
4491 bufferlist bl;
4492 bl.append("thEre HiSi");
4493 ObjectWriteOperation op;
4494 op.write_full(bl);
4495 ASSERT_EQ(0, ioctx.operate("foo", &op));
4496 }
4497 // write
4498 {
4499 bufferlist bl;
4500 bl.append("thEre HiSi");
4501 ObjectWriteOperation op;
4502 op.write_full(bl);
4503 ASSERT_EQ(0, ioctx.operate("foo", &op));
4504 }
4505
4506 // set-chunk (dedup)
4507 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, Er_fp_oid, "foo");
4508 // set-chunk (dedup)
4509 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, Hi_fp_oid, "foo");
4510 // set-chunk (dedup)
4511 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, Si_fp_oid, "foo");
4512
4513 // foo snap[0]: [er] [hi] [HI]
4514 // foo head : [ER] [HI] [SI]
4515 // write
4516 {
4517 bufferlist bl;
4518 bl.append("thERe HISI");
4519 ObjectWriteOperation op;
4520 op.write_full(bl);
4521 ASSERT_EQ(0, ioctx.operate("foo", &op));
4522 }
4523
4524 sleep(10);
4525
4526 // check chunk's refcount
4527 // [Er]'s refcount should be 0
4528 {
4529 bufferlist t;
4530 SHA1 sha1_gen;
4531 int size = strlen("Er");
4532 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1];
4533 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
4534 sha1_gen.Update((const unsigned char *)"Er", size);
4535 sha1_gen.Final(fingerprint);
4536 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
4537 is_intended_refcount_state(ioctx, "foo", cache_ioctx, p_str, 0);
4538 }
4539}
4540
4541TEST_F(LibRadosTwoPoolsPP, ManifestSnapIncCount) {
4542 SKIP_IF_CRIMSON();
4543 // skip test if not yet octopus
4544 if (_get_required_osd_release(cluster) < "octopus") {
4545 cout << "cluster is not yet octopus, skipping test" << std::endl;
4546 return;
4547 }
4548
4549 // create object
4550 {
4551 bufferlist bl;
4552 bl.append("there hiHI");
4553 ObjectWriteOperation op;
4554 op.write_full(bl);
4555 ASSERT_EQ(0, ioctx.operate("foo", &op));
4556 }
4557 {
4558 bufferlist bl;
4559 bl.append("there hiHI");
4560 ObjectWriteOperation op;
4561 op.write_full(bl);
4562 ASSERT_EQ(0, cache_ioctx.operate("chunk1", &op));
4563 }
4564 {
4565 bufferlist bl;
4566 bl.append("there hiHI");
4567 ObjectWriteOperation op;
4568 op.write_full(bl);
4569 ASSERT_EQ(0, cache_ioctx.operate("chunk2", &op));
4570 }
4571 {
4572 bufferlist bl;
4573 bl.append("there hiHI");
4574 ObjectWriteOperation op;
4575 op.write_full(bl);
4576 ASSERT_EQ(0, cache_ioctx.operate("chunk3", &op));
4577 }
4578 {
4579 bufferlist bl;
4580 bl.append("there hiHI");
4581 ObjectWriteOperation op;
4582 op.write_full(bl);
4583 ASSERT_EQ(0, cache_ioctx.operate("chunk4", &op));
4584 }
4585
4586 // wait for maps to settle
4587 cluster.wait_for_latest_osdmap();
4588
4589 // create a snapshot, clone
4590 vector<uint64_t> my_snaps(1);
4591 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
4592 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4593 my_snaps));
4594
4595 {
4596 bufferlist bl;
4597 bl.append("there hiHI");
4598 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
4599 }
4600
4601 my_snaps.resize(2);
4602 my_snaps[1] = my_snaps[0];
4603 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
4604 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4605 my_snaps));
4606
4607 {
4608 bufferlist bl;
4609 bl.append("there hiHI");
4610 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
4611 }
4612
4613 // set-chunk
4614 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, "chunk1", "foo");
4615 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, "chunk4", "foo");
4616 // foo snap[1]:
4617 // foo snap[0]:
4618 // foo head : [chunk1] [chunk4]
4619
4620 ioctx.snap_set_read(my_snaps[1]);
4621 // set-chunk
4622 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, "chunk2", "foo");
4623 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, "chunk4", "foo");
4624 // foo snap[1]: [chunk2] [chunk4]
4625 // foo snap[0]:
4626 // foo head : [chunk1] [chunk4]
4627
4628 ioctx.snap_set_read(my_snaps[0]);
4629 // set-chunk
4630 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, "chunk2", "foo");
4631 // foo snap[1]: [chunk2] [chunk4]
4632 // foo snap[0]: [chunk2]
4633 // foo head : [chunk1] [chunk4]
4634
4635 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, "chunk3", "foo");
4636 // foo snap[1]: [chunk2] [chunk4]
4637 // foo snap[0]: [chunk3] [chunk2]
4638 // foo head : [chunk1] [chunk4]
4639 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, "chunk4", "foo");
4640 // foo snap[1]: [chunk2] [chunk4]
4641 // foo snap[0]: [chunk3] [chunk2] [chunk4]
4642 // foo head : [chunk1] [chunk4]
4643
4644 // check chunk's refcount
4645 check_fp_oid_refcount(cache_ioctx, "chunk1", 1u, "");
4646
4647 // check chunk's refcount
4648 check_fp_oid_refcount(cache_ioctx, "chunk2", 1u, "");
4649
4650 // check chunk's refcount
4651 check_fp_oid_refcount(cache_ioctx, "chunk3", 1u, "");
4652 sleep(10);
4653
4654 // check chunk's refcount
4655 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk4", 1);
4656}
4657
4658TEST_F(LibRadosTwoPoolsPP, ManifestEvict) {
4659 SKIP_IF_CRIMSON();
4660 // skip test if not yet octopus
4661 if (_get_required_osd_release(cluster) < "octopus") {
4662 cout << "cluster is not yet octopus, skipping test" << std::endl;
4663 return;
4664 }
4665
4666 // create object
4667 {
4668 bufferlist bl;
4669 bl.append("there hiHI");
4670 ObjectWriteOperation op;
4671 op.write_full(bl);
4672 ASSERT_EQ(0, ioctx.operate("foo", &op));
4673 }
4674 {
4675 bufferlist bl;
4676 bl.append("there hiHI");
4677 ObjectWriteOperation op;
4678 op.write_full(bl);
4679 ASSERT_EQ(0, cache_ioctx.operate("chunk1", &op));
4680 }
4681 {
4682 bufferlist bl;
4683 bl.append("there hiHI");
4684 ObjectWriteOperation op;
4685 op.write_full(bl);
4686 ASSERT_EQ(0, cache_ioctx.operate("chunk2", &op));
4687 }
4688 {
4689 bufferlist bl;
4690 bl.append("there hiHI");
4691 ObjectWriteOperation op;
4692 op.write_full(bl);
4693 ASSERT_EQ(0, cache_ioctx.operate("chunk3", &op));
4694 }
4695 {
4696 bufferlist bl;
4697 bl.append("there hiHI");
4698 ObjectWriteOperation op;
4699 op.write_full(bl);
4700 ASSERT_EQ(0, cache_ioctx.operate("chunk4", &op));
4701 }
4702
4703 // wait for maps to settle
4704 cluster.wait_for_latest_osdmap();
4705
4706 // create a snapshot, clone
4707 vector<uint64_t> my_snaps(1);
4708 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
4709 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4710 my_snaps));
4711
4712 {
4713 bufferlist bl;
4714 bl.append("there hiHI");
4715 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
4716 }
4717
4718 my_snaps.resize(2);
4719 my_snaps[1] = my_snaps[0];
4720 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
4721 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4722 my_snaps));
4723
4724 {
4725 bufferlist bl;
4726 bl.append("there hiHI");
4727 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
4728 }
4729
4730 // set-chunk
4731 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, "chunk1", "foo");
4732 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, "chunk4", "foo");
4733 // foo snap[1]:
4734 // foo snap[0]:
4735 // foo head : [chunk1] [chunk4]
4736
4737 ioctx.snap_set_read(my_snaps[1]);
4738 // set-chunk
4739 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 10, "chunk2", "foo");
4740 // foo snap[1]: [ chunk2 ]
4741 // foo snap[0]:
4742 // foo head : [chunk1] [chunk4]
4743
4744 ioctx.snap_set_read(my_snaps[0]);
4745 // set-chunk
4746 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, "chunk2", "foo");
4747 // foo snap[1]: [ chunk2 ]
4748 // foo snap[0]: [chunk2]
4749 // foo head : [chunk1] [chunk4]
4750
4751 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, "chunk3", "foo");
4752 // foo snap[1]: [ chunk2 ]
4753 // foo snap[0]: [chunk3] [chunk2]
4754 // foo head : [chunk1] [chunk4]
4755 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, "chunk4", "foo");
4756 // foo snap[1]: [ chunk2 ]
4757 // foo snap[0]: [chunk3] [chunk2] [chunk4]
4758 // foo head : [chunk1] [chunk4]
4759 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 2, "chunk4", "foo");
4760 // foo snap[1]: [ chunk2 ]
4761 // foo snap[0]: [chunk4] [chunk3] [chunk2] [chunk4]
4762 // foo head : [chunk1] [chunk4]
4763 manifest_set_chunk(cluster, cache_ioctx, ioctx, 4, 2, "chunk1", "foo");
4764 // foo snap[1]: [ chunk2 ]
4765 // foo snap[0]: [chunk4] [chunk3] [chunk1] [chunk2] [chunk4]
4766 // foo head : [chunk1] [chunk4]
4767
4768 {
4769 ObjectReadOperation op, stat_op;
4770 uint64_t size;
4771 op.tier_evict();
4772 librados::AioCompletion *completion = cluster.aio_create_completion();
4773 ASSERT_EQ(0, ioctx.aio_operate(
4774 "foo", completion, &op,
4775 librados::OPERATION_IGNORE_OVERLAY, NULL));
4776 completion->wait_for_complete();
4777 ASSERT_EQ(0, completion->get_return_value());
4778
4779 stat_op.stat(&size, NULL, NULL);
4780 ASSERT_EQ(0, ioctx.operate("foo", &stat_op, NULL));
4781 ASSERT_EQ(10, size);
4782 }
4783
4784 ioctx.snap_set_read(librados::SNAP_HEAD);
4785 {
4786 ObjectReadOperation op, stat_op;
4787 uint64_t size;
4788 op.tier_evict();
4789 librados::AioCompletion *completion = cluster.aio_create_completion();
4790 ASSERT_EQ(0, ioctx.aio_operate(
4791 "foo", completion, &op,
4792 librados::OPERATION_IGNORE_OVERLAY, NULL));
4793 completion->wait_for_complete();
4794 ASSERT_EQ(0, completion->get_return_value());
4795
4796 stat_op.stat(&size, NULL, NULL);
4797 ASSERT_EQ(0, ioctx.operate("foo", &stat_op, NULL));
4798 ASSERT_EQ(strlen("there hiHI"), size);
4799 }
4800
4801}
4802
4803TEST_F(LibRadosTwoPoolsPP, ManifestEvictPromote) {
4804 SKIP_IF_CRIMSON();
4805 // skip test if not yet octopus
4806 if (_get_required_osd_release(cluster) < "octopus") {
4807 cout << "cluster is not yet octopus, skipping test" << std::endl;
4808 return;
4809 }
4810
4811 // create object
4812 {
4813 bufferlist bl;
4814 bl.append("there hiHI");
4815 ObjectWriteOperation op;
4816 op.write_full(bl);
4817 ASSERT_EQ(0, ioctx.operate("foo", &op));
4818 }
4819 {
4820 bufferlist bl;
4821 bl.append("EREHT hiHI");
4822 ObjectWriteOperation op;
4823 op.write_full(bl);
4824 ASSERT_EQ(0, cache_ioctx.operate("chunk1", &op));
4825 }
4826 {
4827 bufferlist bl;
4828 bl.append("there hiHI");
4829 ObjectWriteOperation op;
4830 op.write_full(bl);
4831 ASSERT_EQ(0, cache_ioctx.operate("chunk2", &op));
4832 }
4833 {
4834 bufferlist bl;
4835 bl.append("THERE HIHI");
4836 ObjectWriteOperation op;
4837 op.write_full(bl);
4838 ASSERT_EQ(0, cache_ioctx.operate("chunk3", &op));
4839 }
4840
4841 // wait for maps to settle
4842 cluster.wait_for_latest_osdmap();
4843
4844 // create a snapshot, clone
4845 vector<uint64_t> my_snaps(1);
4846 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
4847 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4848 my_snaps));
4849
4850 {
4851 bufferlist bl;
4852 bl.append("there");
4853 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
4854 }
4855
4856 // set-chunk
4857 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 2, "chunk1", "foo");
4858 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, "chunk2", "foo");
4859 // foo snap[0]:
4860 // foo head : [chunk1] [chunk2]
4861
4862 ioctx.snap_set_read(my_snaps[0]);
4863 // set-chunk
4864 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 10, "chunk3", "foo");
4865 // foo snap[0]: [ chunk3 ]
4866 // foo head : [chunk1] [chunk2]
4867
4868
4869 {
4870 ObjectReadOperation op, stat_op;
4871 uint64_t size;
4872 op.tier_evict();
4873 librados::AioCompletion *completion = cluster.aio_create_completion();
4874 ASSERT_EQ(0, ioctx.aio_operate(
4875 "foo", completion, &op,
4876 librados::OPERATION_IGNORE_OVERLAY, NULL));
4877 completion->wait_for_complete();
4878 ASSERT_EQ(0, completion->get_return_value());
4879
4880 stat_op.stat(&size, NULL, NULL);
4881 ASSERT_EQ(0, ioctx.operate("foo", &stat_op, NULL));
4882 ASSERT_EQ(10, size);
4883
4884 }
4885 {
4886 bufferlist bl;
4887 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
4888 ASSERT_EQ('T', bl[0]);
4889 }
4890
4891 ioctx.snap_set_read(librados::SNAP_HEAD);
4892 {
4893 bufferlist bl;
4894 ASSERT_EQ(10, ioctx.read("foo", bl, 10, 0));
4895 ASSERT_EQ('H', bl[8]);
4896 }
4897}
4898
4899
4900TEST_F(LibRadosTwoPoolsPP, ManifestSnapSizeMismatch) {
4901 SKIP_IF_CRIMSON();
4902 // skip test if not yet octopus
4903 if (_get_required_osd_release(cluster) < "octopus") {
4904 cout << "cluster is not yet octopus, skipping test" << std::endl;
4905 return;
4906 }
4907
4908 // create object
4909 {
4910 bufferlist bl;
4911 bl.append("there hiHI");
4912 ObjectWriteOperation op;
4913 op.write_full(bl);
4914 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
4915 }
4916 {
4917 bufferlist bl;
4918 bl.append("there hiHI");
4919 ObjectWriteOperation op;
4920 op.write_full(bl);
4921 ASSERT_EQ(0, ioctx.operate("chunk1", &op));
4922 }
4923 {
4924 bufferlist bl;
4925 bl.append("there HIHI");
4926 ObjectWriteOperation op;
4927 op.write_full(bl);
4928 ASSERT_EQ(0, ioctx.operate("chunk2", &op));
4929 }
4930
4931 // wait for maps to settle
4932 cluster.wait_for_latest_osdmap();
4933
4934 // create a snapshot, clone
4935 vector<uint64_t> my_snaps(1);
4936 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_create(&my_snaps[0]));
4937 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4938 my_snaps));
4939
4940 {
4941 bufferlist bl;
4942 bl.append("There hiHI");
4943 ASSERT_EQ(0, cache_ioctx.write("foo", bl, bl.length(), 0));
4944 }
4945
4946 my_snaps.resize(2);
4947 my_snaps[1] = my_snaps[0];
4948 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_create(&my_snaps[0]));
4949 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4950 my_snaps));
4951
4952 {
4953 bufferlist bl;
4954 bl.append("tHere hiHI");
4955 ASSERT_EQ(0, cache_ioctx.write("foo", bl, bl.length(), 0));
4956 }
4957
4958 // set-chunk
4959 manifest_set_chunk(cluster, ioctx, cache_ioctx, 0, 10, "chunk1", "foo");
4960
4961 cache_ioctx.snap_set_read(my_snaps[1]);
4962
4963 // set-chunk
4964 manifest_set_chunk(cluster, ioctx, cache_ioctx, 0, 10, "chunk2", "foo");
4965
4966 // evict
4967 {
4968 ObjectReadOperation op, stat_op;
4969 op.tier_evict();
4970 librados::AioCompletion *completion = cluster.aio_create_completion();
4971 ASSERT_EQ(0, cache_ioctx.aio_operate(
4972 "foo", completion, &op,
4973 librados::OPERATION_IGNORE_OVERLAY, NULL));
4974 completion->wait_for_complete();
4975 ASSERT_EQ(0, completion->get_return_value());
4976 }
4977
4978 uint32_t hash;
4979 ASSERT_EQ(0, cache_ioctx.get_object_pg_hash_position2("foo", &hash));
4980
4981 // scrub
4982 {
4983 for (int tries = 0; tries < 5; ++tries) {
4984 bufferlist inbl;
4985 ostringstream ss;
4986 ss << "{\"prefix\": \"pg deep-scrub\", \"pgid\": \""
4987 << cache_ioctx.get_id() << "."
4988 << std::hex << hash
4989 << "\"}";
4990 int r = cluster.mon_command(ss.str(), inbl, NULL, NULL);
4991 if (r == -ENOENT ||
4992 r == -EAGAIN) {
4993 sleep(5);
4994 continue;
4995 }
4996 ASSERT_EQ(0, r);
4997 break;
4998 }
4999 cout << "waiting for scrubs..." << std::endl;
5000 sleep(20);
5001 cout << "done waiting" << std::endl;
5002 }
5003
5004 {
5005 bufferlist bl;
5006 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
5007 ASSERT_EQ('t', bl[0]);
5008 }
5009}
5010
5011#include <common/CDC.h>
5012TEST_F(LibRadosTwoPoolsPP, DedupFlushRead) {
5013 SKIP_IF_CRIMSON();
5014 // skip test if not yet octopus
5015 if (_get_required_osd_release(cluster) < "octopus") {
5016 GTEST_SKIP() << "cluster is not yet octopus, skipping test";
5017 }
5018
5019 bufferlist inbl;
5020 ASSERT_EQ(0, cluster.mon_command(
5021 set_pool_str(cache_pool_name, "fingerprint_algorithm", "sha1"),
5022 inbl, NULL, NULL));
5023 ASSERT_EQ(0, cluster.mon_command(
5024 set_pool_str(cache_pool_name, "dedup_tier", pool_name),
5025 inbl, NULL, NULL));
5026 ASSERT_EQ(0, cluster.mon_command(
5027 set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"),
5028 inbl, NULL, NULL));
5029 ASSERT_EQ(0, cluster.mon_command(
5030 set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024),
5031 inbl, NULL, NULL));
5032
5033 // wait for maps to settle
5034 cluster.wait_for_latest_osdmap();
5035
5036 // create object
5037 bufferlist gbl;
5038 {
5039 generate_buffer(1024*8, &gbl);
5040 ObjectWriteOperation op;
5041 op.write_full(gbl);
5042 ASSERT_EQ(0, cache_ioctx.operate("foo-chunk", &op));
5043 }
5044 {
5045 bufferlist bl;
5046 bl.append("DDse chunk");
5047 ObjectWriteOperation op;
5048 op.write_full(bl);
5049 ASSERT_EQ(0, ioctx.operate("bar-chunk", &op));
5050 }
5051
5052 // set-chunk to set manifest object
5053 {
5054 ObjectReadOperation op;
5055 op.set_chunk(0, 2, ioctx, "bar-chunk", 0,
5056 CEPH_OSD_OP_FLAG_WITH_REFERENCE);
5057 librados::AioCompletion *completion = cluster.aio_create_completion();
5058 ASSERT_EQ(0, cache_ioctx.aio_operate("foo-chunk", completion, &op,
5059 librados::OPERATION_IGNORE_CACHE, NULL));
5060 completion->wait_for_complete();
5061 ASSERT_EQ(0, completion->get_return_value());
5062 completion->release();
5063 }
5064 // flush
5065 {
5066 ObjectReadOperation op;
5067 op.tier_flush();
5068 librados::AioCompletion *completion = cluster.aio_create_completion();
5069 ASSERT_EQ(0, cache_ioctx.aio_operate(
5070 "foo-chunk", completion, &op,
5071 librados::OPERATION_IGNORE_CACHE, NULL));
5072 completion->wait_for_complete();
5073 ASSERT_EQ(0, completion->get_return_value());
5074 completion->release();
5075 }
5076
5077 std::unique_ptr<CDC> cdc = CDC::create("fastcdc", cbits(1024)-1);
5078 vector<pair<uint64_t, uint64_t>> chunks;
5079 bufferlist chunk;
5080 cdc->calc_chunks(gbl, &chunks);
5081 chunk.substr_of(gbl, chunks[1].first, chunks[1].second);
5082 string tgt_oid;
5083 {
5084 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0};
5085 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
5086 SHA1 sha1_gen;
5087 int size = chunk.length();
5088 sha1_gen.Update((const unsigned char *)chunk.c_str(), size);
5089 sha1_gen.Final(fingerprint);
5090 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
5091 tgt_oid = string(p_str);
5092 }
5093
5094 // read and verify the chunked object
5095 {
5096 bufferlist test_bl;
5097 ASSERT_EQ(2, ioctx.read(tgt_oid, test_bl, 2, 0));
5098 ASSERT_EQ(test_bl[1], chunk[1]);
5099 }
5100
5101 ASSERT_EQ(0, cluster.mon_command(
5102 set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 512),
5103 inbl, NULL, NULL));
5104 cluster.wait_for_latest_osdmap();
5105
5106 // make a dirty chunks
5107 {
5108 bufferlist bl;
5109 bl.append("hi");
5110 ASSERT_EQ(0, cache_ioctx.write("foo-chunk", bl, bl.length(), 0));
5111 }
5112
5113 // flush
5114 {
5115 ObjectReadOperation op;
5116 op.tier_flush();
5117 librados::AioCompletion *completion = cluster.aio_create_completion();
5118 ASSERT_EQ(0, cache_ioctx.aio_operate(
5119 "foo-chunk", completion, &op,
5120 librados::OPERATION_IGNORE_CACHE, NULL));
5121 completion->wait_for_complete();
5122 ASSERT_EQ(0, completion->get_return_value());
5123 completion->release();
5124 }
5125
5126 cdc = CDC::create("fastcdc", cbits(512)-1);
5127 chunks.clear();
5128 cdc->calc_chunks(gbl, &chunks);
5129 bufferlist chunk_512;
5130 chunk_512.substr_of(gbl, chunks[3].first, chunks[3].second);
5131 {
5132 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0};
5133 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
5134 SHA1 sha1_gen;
5135 int size = chunk_512.length();
5136 sha1_gen.Update((const unsigned char *)chunk_512.c_str(), size);
5137 sha1_gen.Final(fingerprint);
5138 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
5139 tgt_oid = string(p_str);
5140 }
5141
5142 // read and verify the chunked object
5143 {
5144 bufferlist test_bl;
5145 ASSERT_EQ(2, ioctx.read(tgt_oid, test_bl, 2, 0));
5146 ASSERT_EQ(test_bl[1], chunk_512[1]);
5147 }
5148
5149 ASSERT_EQ(0, cluster.mon_command(
5150 set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 16384),
5151 inbl, NULL, NULL));
5152 cluster.wait_for_latest_osdmap();
5153
5154 // make a dirty chunks
5155 {
5156 bufferlist bl;
5157 bl.append("hi");
5158 ASSERT_EQ(0, cache_ioctx.write("foo-chunk", bl, bl.length(), 0));
5159 gbl.begin(0).copy_in(bl.length(), bl);
5160 }
5161 // flush
5162 {
5163 ObjectReadOperation op;
5164 op.tier_flush();
5165 librados::AioCompletion *completion = cluster.aio_create_completion();
5166 ASSERT_EQ(0, cache_ioctx.aio_operate(
5167 "foo-chunk", completion, &op,
5168 librados::OPERATION_IGNORE_CACHE, NULL));
5169 completion->wait_for_complete();
5170 ASSERT_EQ(0, completion->get_return_value());
5171 completion->release();
5172 }
5173
5174 cdc = CDC::create("fastcdc", cbits(16384)-1);
5175 chunks.clear();
5176 cdc->calc_chunks(gbl, &chunks);
5177 bufferlist chunk_16384;
5178 chunk_16384.substr_of(gbl, chunks[0].first, chunks[0].second);
5179 {
5180 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0};
5181 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
5182 SHA1 sha1_gen;
5183 int size = chunk_16384.length();
5184 sha1_gen.Update((const unsigned char *)chunk_16384.c_str(), size);
5185 sha1_gen.Final(fingerprint);
5186 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
5187 tgt_oid = string(p_str);
5188 }
5189 // read and verify the chunked object
5190 {
5191 bufferlist test_bl;
5192 ASSERT_EQ(2, ioctx.read(tgt_oid, test_bl, 2, 0));
5193 ASSERT_EQ(test_bl[0], chunk_16384[0]);
5194 }
5195
5196 // less than object size
5197 ASSERT_EQ(0, cluster.mon_command(
5198 set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024),
5199 inbl, NULL, NULL));
5200 cluster.wait_for_latest_osdmap();
5201
5202 // make a dirty chunks
5203 // a chunk_info is deleted by write, which converts the manifest object to non-manifest object
5204 {
5205 bufferlist bl;
5206 bl.append("hi");
5207 ASSERT_EQ(0, cache_ioctx.write("foo-chunk", bl, bl.length(), 0));
5208 }
5209
5210 // reset set-chunk
5211 {
5212 bufferlist bl;
5213 bl.append("DDse chunk");
5214 ObjectWriteOperation op;
5215 op.write_full(bl);
5216 ASSERT_EQ(0, ioctx.operate("bar-chunk", &op));
5217 }
5218 // set-chunk to set manifest object
5219 {
5220 ObjectReadOperation op;
5221 op.set_chunk(0, 2, ioctx, "bar-chunk", 0,
5222 CEPH_OSD_OP_FLAG_WITH_REFERENCE);
5223 librados::AioCompletion *completion = cluster.aio_create_completion();
5224 ASSERT_EQ(0, cache_ioctx.aio_operate("foo-chunk", completion, &op,
5225 librados::OPERATION_IGNORE_CACHE, NULL));
5226 completion->wait_for_complete();
5227 ASSERT_EQ(0, completion->get_return_value());
5228 completion->release();
5229 }
5230 // flush
5231 {
5232 ObjectReadOperation op;
5233 op.tier_flush();
5234 librados::AioCompletion *completion = cluster.aio_create_completion();
5235 ASSERT_EQ(0, cache_ioctx.aio_operate(
5236 "foo-chunk", completion, &op,
5237 librados::OPERATION_IGNORE_CACHE, NULL));
5238 completion->wait_for_complete();
5239 ASSERT_EQ(0, completion->get_return_value());
5240 completion->release();
5241 }
5242
5243 cdc = CDC::create("fastcdc", cbits(1024)-1);
5244 chunks.clear();
5245 cdc->calc_chunks(gbl, &chunks);
5246 bufferlist small_chunk;
5247 small_chunk.substr_of(gbl, chunks[1].first, chunks[1].second);
5248 {
5249 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0};
5250 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
5251 SHA1 sha1_gen;
5252 int size = small_chunk.length();
5253 sha1_gen.Update((const unsigned char *)small_chunk.c_str(), size);
5254 sha1_gen.Final(fingerprint);
5255 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
5256 tgt_oid = string(p_str);
5257 }
5258 // read and verify the chunked object
5259 {
5260 bufferlist test_bl;
5261 ASSERT_EQ(2, ioctx.read(tgt_oid, test_bl, 2, 0));
5262 ASSERT_EQ(test_bl[0], small_chunk[0]);
5263 }
5264
5265}
5266
5267TEST_F(LibRadosTwoPoolsPP, ManifestFlushSnap) {
5268 SKIP_IF_CRIMSON();
5269 // skip test if not yet octopus
5270 if (_get_required_osd_release(cluster) < "octopus") {
5271 cout << "cluster is not yet octopus, skipping test" << std::endl;
5272 return;
5273 }
5274
5275 bufferlist inbl;
5276 ASSERT_EQ(0, cluster.mon_command(
5277 set_pool_str(cache_pool_name, "fingerprint_algorithm", "sha1"),
5278 inbl, NULL, NULL));
5279 ASSERT_EQ(0, cluster.mon_command(
5280 set_pool_str(cache_pool_name, "dedup_tier", pool_name),
5281 inbl, NULL, NULL));
5282 ASSERT_EQ(0, cluster.mon_command(
5283 set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"),
5284 inbl, NULL, NULL));
5285 ASSERT_EQ(0, cluster.mon_command(
5286 set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024),
5287 inbl, NULL, NULL));
5288
5289 // wait for maps to settle
5290 cluster.wait_for_latest_osdmap();
5291
5292 // create object
5293 bufferlist gbl;
5294 {
5295 //bufferlist bl;
5296 //bl.append("there hi");
5297 generate_buffer(1024*8, &gbl);
5298 ObjectWriteOperation op;
5299 op.write_full(gbl);
5300 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
5301 }
5302 {
5303 bufferlist bl;
5304 bl.append("there hi");
5305 ObjectWriteOperation op;
5306 op.write_full(bl);
5307 ASSERT_EQ(0, ioctx.operate("bar", &op));
5308 }
5309
5310 // set-chunk (dedup)
5311 manifest_set_chunk(cluster, ioctx, cache_ioctx, 2, 2, "bar", "foo");
5312 manifest_set_chunk(cluster, ioctx, cache_ioctx, 6, 2, "bar", "foo");
5313
5314 // create a snapshot, clone
5315 vector<uint64_t> my_snaps(1);
5316 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_create(&my_snaps[0]));
5317 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
5318 my_snaps));
5319
5320 // make a dirty chunks
5321 {
5322 bufferlist bl;
5323 bl.append("Thbbe");
5324 ASSERT_EQ(0, cache_ioctx.write("foo", bl, bl.length(), 0));
5325 }
5326
5327 // and another
5328 my_snaps.resize(2);
5329 my_snaps[1] = my_snaps[0];
5330 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_create(&my_snaps[0]));
5331 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
5332 my_snaps));
5333
5334 // make a dirty chunks
5335 {
5336 bufferlist bl;
5337 bl.append("Thcce");
5338 ASSERT_EQ(0, cache_ioctx.write("foo", bl, bl.length(), 0));
5339 }
5340
5341 // flush on head (should fail)
5342 cache_ioctx.snap_set_read(librados::SNAP_HEAD);
5343 // flush
5344 {
5345 ObjectReadOperation op;
5346 op.tier_flush();
5347 librados::AioCompletion *completion = cluster.aio_create_completion();
5348 ASSERT_EQ(0, cache_ioctx.aio_operate(
5349 "foo", completion, &op,
5350 librados::OPERATION_IGNORE_CACHE, NULL));
5351 completion->wait_for_complete();
5352 ASSERT_EQ(-EBUSY, completion->get_return_value());
5353 completion->release();
5354 }
5355
5356 // flush on recent snap (should fail)
5357 cache_ioctx.snap_set_read(my_snaps[0]);
5358 {
5359 ObjectReadOperation op;
5360 op.tier_flush();
5361 librados::AioCompletion *completion = cluster.aio_create_completion();
5362 ASSERT_EQ(0, cache_ioctx.aio_operate(
5363 "foo", completion, &op,
5364 librados::OPERATION_IGNORE_CACHE, NULL));
5365 completion->wait_for_complete();
5366 ASSERT_EQ(-EBUSY, completion->get_return_value());
5367 completion->release();
5368 }
5369
5370 // flush on oldest snap
5371 cache_ioctx.snap_set_read(my_snaps[1]);
5372 {
5373 ObjectReadOperation op;
5374 op.tier_flush();
5375 librados::AioCompletion *completion = cluster.aio_create_completion();
5376 ASSERT_EQ(0, cache_ioctx.aio_operate(
5377 "foo", completion, &op,
5378 librados::OPERATION_IGNORE_CACHE, NULL));
5379 completion->wait_for_complete();
5380 ASSERT_EQ(0, completion->get_return_value());
5381 completion->release();
5382 }
5383
5384 // flush on oldest snap
5385 cache_ioctx.snap_set_read(my_snaps[0]);
5386 {
5387 ObjectReadOperation op;
5388 op.tier_flush();
5389 librados::AioCompletion *completion = cluster.aio_create_completion();
5390 ASSERT_EQ(0, cache_ioctx.aio_operate(
5391 "foo", completion, &op,
5392 librados::OPERATION_IGNORE_CACHE, NULL));
5393 completion->wait_for_complete();
5394 ASSERT_EQ(0, completion->get_return_value());
5395 completion->release();
5396 }
5397
5398 // flush on oldest snap
5399 cache_ioctx.snap_set_read(librados::SNAP_HEAD);
5400 {
5401 ObjectReadOperation op;
5402 op.tier_flush();
5403 librados::AioCompletion *completion = cluster.aio_create_completion();
5404 ASSERT_EQ(0, cache_ioctx.aio_operate(
5405 "foo", completion, &op,
5406 librados::OPERATION_IGNORE_CACHE, NULL));
5407 completion->wait_for_complete();
5408 ASSERT_EQ(0, completion->get_return_value());
5409 completion->release();
5410 }
5411
5412 // check chunk's refcount
5413 std::unique_ptr<CDC> cdc = CDC::create("fastcdc", cbits(1024)-1);
5414 vector<pair<uint64_t, uint64_t>> chunks;
5415 bufferlist chunk;
5416 cdc->calc_chunks(gbl, &chunks);
5417 chunk.substr_of(gbl, chunks[1].first, chunks[1].second);
5418 string tgt_oid;
5419 {
5420 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0};
5421 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
5422 SHA1 sha1_gen;
5423 int size = chunk.length();
5424 sha1_gen.Update((const unsigned char *)chunk.c_str(), size);
5425 sha1_gen.Final(fingerprint);
5426 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
5427 tgt_oid = string(p_str);
5428 }
5429 // read and verify the chunked object
5430 {
5431 bufferlist test_bl;
5432 ASSERT_EQ(2, ioctx.read(tgt_oid, test_bl, 2, 0));
5433 ASSERT_EQ(test_bl[1], chunk[1]);
5434 }
5435
5436 cache_ioctx.snap_set_read(librados::SNAP_HEAD);
5437 {
5438 bufferlist bl;
5439 ASSERT_EQ(4, cache_ioctx.read("foo", bl, 4, 0));
5440 ASSERT_EQ('c', bl[2]);
5441 }
5442
5443 cache_ioctx.snap_set_read(my_snaps[0]);
5444 {
5445 bufferlist bl;
5446 ASSERT_EQ(4, cache_ioctx.read("foo", bl, 4, 0));
5447 ASSERT_EQ('b', bl[2]);
5448 }
5449}
5450
5451TEST_F(LibRadosTwoPoolsPP, ManifestFlushDupCount) {
5452 SKIP_IF_CRIMSON();
5453 // skip test if not yet octopus
5454 if (_get_required_osd_release(cluster) < "octopus") {
5455 cout << "cluster is not yet octopus, skipping test" << std::endl;
5456 return;
5457 }
5458
5459 bufferlist inbl;
5460 ASSERT_EQ(0, cluster.mon_command(
5461 set_pool_str(cache_pool_name, "fingerprint_algorithm", "sha1"),
5462 inbl, NULL, NULL));
5463 ASSERT_EQ(0, cluster.mon_command(
5464 set_pool_str(cache_pool_name, "dedup_tier", pool_name),
5465 inbl, NULL, NULL));
5466 ASSERT_EQ(0, cluster.mon_command(
5467 set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"),
5468 inbl, NULL, NULL));
5469 ASSERT_EQ(0, cluster.mon_command(
5470 set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024),
5471 inbl, NULL, NULL));
5472
5473 // create object
5474 bufferlist gbl;
5475 {
5476 //bufferlist bl;
5477 generate_buffer(1024*8, &gbl);
5478 ObjectWriteOperation op;
5479 op.write_full(gbl);
5480 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
5481 }
5482 {
5483 bufferlist bl;
5484 bl.append("there hiHI");
5485 ObjectWriteOperation op;
5486 op.write_full(bl);
5487 ASSERT_EQ(0, ioctx.operate("bar", &op));
5488 }
5489
5490 // wait for maps to settle
5491 cluster.wait_for_latest_osdmap();
5492
5493 // set-chunk to set manifest object
5494 {
5495 ObjectReadOperation op;
5496 op.set_chunk(0, 2, ioctx, "bar", 0,
5497 CEPH_OSD_OP_FLAG_WITH_REFERENCE);
5498 librados::AioCompletion *completion = cluster.aio_create_completion();
5499 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
5500 librados::OPERATION_IGNORE_CACHE, NULL));
5501 completion->wait_for_complete();
5502 ASSERT_EQ(0, completion->get_return_value());
5503 completion->release();
5504 }
5505
5506 // create a snapshot, clone
5507 vector<uint64_t> my_snaps(1);
5508 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_create(&my_snaps[0]));
5509 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
5510 my_snaps));
5511
5512 // make a dirty chunks
5513 {
5514 bufferlist bl;
5515 bl.append("Thbbe hi");
5516 ASSERT_EQ(0, cache_ioctx.write("foo", bl, bl.length(), 0));
5517 }
5518
5519 // and another
5520 my_snaps.resize(2);
5521 my_snaps[1] = my_snaps[0];
5522 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_create(&my_snaps[0]));
5523 ASSERT_EQ(0, cache_ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
5524 my_snaps));
5525
5526 // make a dirty chunks
5527 {
5528 bufferlist bl;
5529 bl.append("Thcce hi");
5530 ASSERT_EQ(0, cache_ioctx.write("foo", bl, bl.length(), 0));
5531 }
5532
5533 //flush on oldest snap
5534 cache_ioctx.snap_set_read(my_snaps[1]);
5535 // flush
5536 {
5537 ObjectReadOperation op;
5538 op.tier_flush();
5539 librados::AioCompletion *completion = cluster.aio_create_completion();
5540 ASSERT_EQ(0, cache_ioctx.aio_operate(
5541 "foo", completion, &op,
5542 librados::OPERATION_IGNORE_CACHE, NULL));
5543 completion->wait_for_complete();
5544 ASSERT_EQ(0, completion->get_return_value());
5545 completion->release();
5546 }
5547
5548 // flush on oldest snap
5549 cache_ioctx.snap_set_read(my_snaps[0]);
5550 // flush
5551 {
5552 ObjectReadOperation op;
5553 op.tier_flush();
5554 librados::AioCompletion *completion = cluster.aio_create_completion();
5555 ASSERT_EQ(0, cache_ioctx.aio_operate(
5556 "foo", completion, &op,
5557 librados::OPERATION_IGNORE_CACHE, NULL));
5558 completion->wait_for_complete();
5559 ASSERT_EQ(0, completion->get_return_value());
5560 completion->release();
5561 }
5562
5563 cache_ioctx.snap_set_read(librados::SNAP_HEAD);
5564 // flush
5565 {
5566 ObjectReadOperation op;
5567 op.tier_flush();
5568 librados::AioCompletion *completion = cluster.aio_create_completion();
5569 ASSERT_EQ(0, cache_ioctx.aio_operate(
5570 "foo", completion, &op,
5571 librados::OPERATION_IGNORE_CACHE, NULL));
5572 completion->wait_for_complete();
5573 ASSERT_EQ(0, completion->get_return_value());
5574 completion->release();
5575 }
5576
5577 std::unique_ptr<CDC> cdc = CDC::create("fastcdc", cbits(1024)-1);
5578 vector<pair<uint64_t, uint64_t>> chunks;
5579 bufferlist chunk;
5580 cdc->calc_chunks(gbl, &chunks);
5581 chunk.substr_of(gbl, chunks[1].first, chunks[1].second);
5582 string tgt_oid;
5583 // check chunk's refcount
5584 {
5585 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0};
5586 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
5587 bufferlist t;
5588 SHA1 sha1_gen;
5589 int size = chunk.length();
5590 sha1_gen.Update((const unsigned char *)chunk.c_str(), size);
5591 sha1_gen.Final(fingerprint);
5592 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
5593 tgt_oid = string(p_str);
5594 ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
5595 chunk_refs_t refs;
5596 try {
5597 auto iter = t.cbegin();
5598 decode(refs, iter);
5599 } catch (buffer::error& err) {
5600 ASSERT_TRUE(0);
5601 }
5602 ASSERT_LE(1u, refs.count());
5603 }
5604
5605 bufferlist chunk2;
5606 chunk2.substr_of(gbl, chunks[0].first, chunks[0].second);
5607 // check chunk's refcount
5608 {
5609 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0};
5610 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
5611 bufferlist t;
5612 SHA1 sha1_gen;
5613 int size = chunk2.length();
5614 sha1_gen.Update((const unsigned char *)chunk2.c_str(), size);
5615 sha1_gen.Final(fingerprint);
5616 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
5617 tgt_oid = string(p_str);
5618 ioctx.getxattr(p_str, CHUNK_REFCOUNT_ATTR, t);
5619 chunk_refs_t refs;
5620 try {
5621 auto iter = t.cbegin();
5622 decode(refs, iter);
5623 } catch (buffer::error& err) {
5624 ASSERT_TRUE(0);
5625 }
5626 ASSERT_LE(1u, refs.count());
5627 }
5628
5629 // make a dirty chunks
5630 {
5631 bufferlist bl;
5632 bl.append("ThDDe hi");
5633 ASSERT_EQ(0, cache_ioctx.write("foo", bl, bl.length(), 0));
5634 }
5635
5636 // flush
5637 {
5638 ObjectReadOperation op;
5639 op.tier_flush();
5640 librados::AioCompletion *completion = cluster.aio_create_completion();
5641 ASSERT_EQ(0, cache_ioctx.aio_operate(
5642 "foo", completion, &op,
5643 librados::OPERATION_IGNORE_CACHE, NULL));
5644 completion->wait_for_complete();
5645 ASSERT_EQ(0, completion->get_return_value());
5646 completion->release();
5647 }
5648
5649 bufferlist tmp;
5650 tmp.append("Thcce hi");
5651 gbl.begin(0).copy_in(tmp.length(), tmp);
5652 bufferlist chunk3;
5653 cdc->calc_chunks(gbl, &chunks);
5654 chunk3.substr_of(gbl, chunks[0].first, chunks[0].second);
5655 // check chunk's refcount
5656 {
5657 unsigned char fingerprint[CEPH_CRYPTO_SHA1_DIGESTSIZE + 1] = {0};
5658 char p_str[CEPH_CRYPTO_SHA1_DIGESTSIZE*2+1] = {0};
5659 bufferlist t;
5660 SHA1 sha1_gen;
5661 int size = chunk2.length();
5662 sha1_gen.Update((const unsigned char *)chunk2.c_str(), size);
5663 sha1_gen.Final(fingerprint);
5664 buf_to_hex(fingerprint, CEPH_CRYPTO_SHA1_DIGESTSIZE, p_str);
5665 is_intended_refcount_state(cache_ioctx, "foo", ioctx, p_str, 0);
5666 }
5667}
5668
5669TEST_F(LibRadosTwoPoolsPP, TierFlushDuringFlush) {
5670 SKIP_IF_CRIMSON();
5671 // skip test if not yet octopus
5672 if (_get_required_osd_release(cluster) < "octopus") {
5673 cout << "cluster is not yet octopus, skipping test" << std::endl;
5674 return;
5675 }
5676
5677 bufferlist inbl;
5678
5679 // create a new pool
5680 std::string temp_pool_name = get_temp_pool_name() + "-test-flush";
5681 ASSERT_EQ(0, cluster.pool_create(temp_pool_name.c_str()));
5682
5683 ASSERT_EQ(0, cluster.mon_command(
5684 set_pool_str(cache_pool_name, "fingerprint_algorithm", "sha1"),
5685 inbl, NULL, NULL));
5686 ASSERT_EQ(0, cluster.mon_command(
5687 set_pool_str(cache_pool_name, "dedup_tier", temp_pool_name),
5688 inbl, NULL, NULL));
5689 ASSERT_EQ(0, cluster.mon_command(
5690 set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"),
5691 inbl, NULL, NULL));
5692 ASSERT_EQ(0, cluster.mon_command(
5693 set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024),
5694 inbl, NULL, NULL));
5695
5696 // create object
5697 bufferlist gbl;
5698 {
5699 //bufferlist bl;
5700 generate_buffer(1024*8, &gbl);
5701 ObjectWriteOperation op;
5702 op.write_full(gbl);
5703 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
5704 }
5705 {
5706 bufferlist bl;
5707 bl.append("there hiHI");
5708 ObjectWriteOperation op;
5709 op.write_full(bl);
5710 ASSERT_EQ(0, ioctx.operate("bar", &op));
5711 }
5712
5713 // wait for maps to settle
5714 cluster.wait_for_latest_osdmap();
5715
5716 // set-chunk to set manifest object
5717 {
5718 ObjectReadOperation op;
5719 op.set_chunk(0, 2, ioctx, "bar", 0,
5720 CEPH_OSD_OP_FLAG_WITH_REFERENCE);
5721 librados::AioCompletion *completion = cluster.aio_create_completion();
5722 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
5723 librados::OPERATION_IGNORE_CACHE, NULL));
5724 completion->wait_for_complete();
5725 ASSERT_EQ(0, completion->get_return_value());
5726 completion->release();
5727 }
5728
5729 // delete temp pool, so flushing chunk will fail
5730 ASSERT_EQ(0, s_cluster.pool_delete(temp_pool_name.c_str()));
5731
5732 // wait for maps to settle
5733 cluster.wait_for_latest_osdmap();
5734
5735 // flush to check if proper error is returned
5736 {
5737 ObjectReadOperation op;
5738 op.tier_flush();
5739 librados::AioCompletion *completion = cluster.aio_create_completion();
5740 ASSERT_EQ(0, cache_ioctx.aio_operate(
5741 "foo", completion, &op,
5742 librados::OPERATION_IGNORE_CACHE, NULL));
5743 completion->wait_for_complete();
5744 ASSERT_EQ(-ENOENT, completion->get_return_value());
5745 completion->release();
5746 }
5747
5748}
5749
5750TEST_F(LibRadosTwoPoolsPP, ManifestSnapHasChunk) {
5751 SKIP_IF_CRIMSON();
5752 // skip test if not yet octopus
5753 if (_get_required_osd_release(cluster) < "octopus") {
5754 cout << "cluster is not yet octopus, skipping test" << std::endl;
5755 return;
5756 }
5757
5758 bufferlist inbl;
5759 ASSERT_EQ(0, cluster.mon_command(
5760 set_pool_str(pool_name, "fingerprint_algorithm", "sha1"),
5761 inbl, NULL, NULL));
5762 cluster.wait_for_latest_osdmap();
5763
5764 // create object
5765 {
5766 bufferlist bl;
5767 bl.append("there HIHI");
5768 ObjectWriteOperation op;
5769 op.write_full(bl);
5770 ASSERT_EQ(0, ioctx.operate("foo", &op));
5771 }
5772
5773 string er_fp_oid, hi_fp_oid, HI_fp_oid, ai_fp_oid, bi_fp_oid,
5774 Er_fp_oid, Hi_fp_oid, SI_fp_oid;
5775
5776 // get fp_oid
5777 er_fp_oid = get_fp_oid("er", "sha1");
5778 hi_fp_oid = get_fp_oid("hi", "sha1");
5779 HI_fp_oid = get_fp_oid("HI", "sha1");
5780 ai_fp_oid = get_fp_oid("ai", "sha1");
5781 bi_fp_oid = get_fp_oid("bi", "sha1");
5782 Er_fp_oid = get_fp_oid("Er", "sha1");
5783 Hi_fp_oid = get_fp_oid("Hi", "sha1");
5784 SI_fp_oid = get_fp_oid("SI", "sha1");
5785
5786 // write
5787 {
5788 ObjectWriteOperation op;
5789 bufferlist bl;
5790 bl.append("er");
5791 op.write_full(bl);
5792 ASSERT_EQ(0, cache_ioctx.operate(er_fp_oid, &op));
5793 }
5794 // write
5795 {
5796 ObjectWriteOperation op;
5797 bufferlist bl;
5798 bl.append("hi");
5799 op.write_full(bl);
5800 ASSERT_EQ(0, cache_ioctx.operate(hi_fp_oid, &op));
5801 }
5802 // write
5803 {
5804 ObjectWriteOperation op;
5805 bufferlist bl;
5806 bl.append("HI");
5807 op.write_full(bl);
5808 ASSERT_EQ(0, cache_ioctx.operate(HI_fp_oid, &op));
5809 }
5810 // write
5811 {
5812 ObjectWriteOperation op;
5813 bufferlist bl;
5814 bl.append("ai");
5815 op.write_full(bl);
5816 ASSERT_EQ(0, cache_ioctx.operate(ai_fp_oid, &op));
5817 }
5818 // write
5819 {
5820 ObjectWriteOperation op;
5821 bufferlist bl;
5822 bl.append("bi");
5823 op.write_full(bl);
5824 ASSERT_EQ(0, cache_ioctx.operate(bi_fp_oid, &op));
5825 }
5826 // write
5827 {
5828 ObjectWriteOperation op;
5829 bufferlist bl;
5830 bl.append("Er");
5831 op.write_full(bl);
5832 ASSERT_EQ(0, cache_ioctx.operate(Er_fp_oid, &op));
5833 }
5834 // write
5835 {
5836 ObjectWriteOperation op;
5837 bufferlist bl;
5838 bl.append("Hi");
5839 op.write_full(bl);
5840 ASSERT_EQ(0, cache_ioctx.operate(Hi_fp_oid, &op));
5841 }
5842 // write
5843 {
5844 ObjectWriteOperation op;
5845 bufferlist bl;
5846 bl.append("SI");
5847 op.write_full(bl);
5848 ASSERT_EQ(0, cache_ioctx.operate(SI_fp_oid, &op));
5849 }
5850
5851 // set-chunk (dedup)
5852 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, HI_fp_oid, "foo");
5853 // set-chunk (dedup)
5854 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, HI_fp_oid, "foo");
5855
5856 // foo head: [hi] [HI]
5857
5858 // create a snapshot, clone
5859 vector<uint64_t> my_snaps(1);
5860 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
5861 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
5862 my_snaps));
5863
5864
5865 // create a clone
5866 {
5867 bufferlist bl;
5868 bl.append("a");
5869 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 6));
5870 }
5871 // write
5872 {
5873 bufferlist bl;
5874 bl.append("a");
5875 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 6));
5876 }
5877 // write
5878 {
5879 bufferlist bl;
5880 bl.append("S");
5881 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 8));
5882 }
5883
5884 // foo snap[0]: [hi] [HI]
5885 // foo head : [er] [ai] [SI]
5886
5887 // set-chunk (dedup)
5888 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, er_fp_oid, "foo");
5889 // set-chunk (dedup)
5890 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, ai_fp_oid, "foo");
5891 // set-chunk (dedup)
5892 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, SI_fp_oid, "foo");
5893
5894 my_snaps.resize(2);
5895 my_snaps[1] = my_snaps[0];
5896 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
5897 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
5898 my_snaps));
5899
5900 // create a clone
5901 {
5902 bufferlist bl;
5903 bl.append("b");
5904 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 6));
5905 }
5906 // write
5907 {
5908 bufferlist bl;
5909 bl.append("b");
5910 ASSERT_EQ(0, ioctx.write("foo", bl, 1, 6));
5911 }
5912
5913 // foo snap[1]: [HI] [HI]
5914 // foo snap[0]: [er] [ai] [SI]
5915 // foo head : [er] [bi] [SI]
5916
5917 // set-chunk (dedup)
5918 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, bi_fp_oid, "foo");
5919
5920 {
5921 ASSERT_EQ(1, cls_cas_references_chunk(ioctx, "foo", SI_fp_oid));
5922 ASSERT_EQ(1, cls_cas_references_chunk(ioctx, "foo", er_fp_oid));
5923 ASSERT_EQ(1, cls_cas_references_chunk(ioctx, "foo", ai_fp_oid));
5924 ASSERT_EQ(2, cls_cas_references_chunk(ioctx, "foo", HI_fp_oid));
5925 ASSERT_EQ(-ENOLINK, cls_cas_references_chunk(ioctx, "foo", Hi_fp_oid));
5926 }
5927}
5928
5929TEST_F(LibRadosTwoPoolsPP, ManifestRollback) {
5930 SKIP_IF_CRIMSON();
5931 // skip test if not yet pacific
5932 if (_get_required_osd_release(cluster) < "pacific") {
5933 cout << "cluster is not yet pacific, skipping test" << std::endl;
5934 return;
5935 }
5936
5937 // create object
5938 {
5939 bufferlist bl;
5940 bl.append("CDere hiHI");
5941 ObjectWriteOperation op;
5942 op.write_full(bl);
5943 ASSERT_EQ(0, ioctx.operate("foo", &op));
5944 }
5945 {
5946 bufferlist bl;
5947 bl.append("ABere hiHI");
5948 ObjectWriteOperation op;
5949 op.write_full(bl);
5950 ASSERT_EQ(0, cache_ioctx.operate("chunk1", &op));
5951 }
5952 {
5953 bufferlist bl;
5954 bl.append("CDere hiHI");
5955 ObjectWriteOperation op;
5956 op.write_full(bl);
5957 ASSERT_EQ(0, cache_ioctx.operate("chunk2", &op));
5958 }
5959 {
5960 bufferlist bl;
5961 bl.append("EFere hiHI");
5962 ObjectWriteOperation op;
5963 op.write_full(bl);
5964 ASSERT_EQ(0, cache_ioctx.operate("chunk3", &op));
5965 }
5966
5967 // wait for maps to settle
5968 cluster.wait_for_latest_osdmap();
5969
5970 // create a snapshot, clone
5971 vector<uint64_t> my_snaps(1);
5972 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
5973 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
5974 my_snaps));
5975
5976 {
5977 bufferlist bl;
5978 bl.append("there hiHI");
5979 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
5980 }
5981
5982 my_snaps.resize(2);
5983 my_snaps[1] = my_snaps[0];
5984 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
5985 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
5986 my_snaps));
5987
5988 {
5989 bufferlist bl;
5990 bl.append("thABe hiEF");
5991 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
5992 }
5993
5994 // set-chunk
5995 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, "chunk1", "foo");
5996 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, "chunk3", "foo");
5997 // foo snap[1]:
5998 // foo snap[0]:
5999 // foo head : [chunk1] [chunk3]
6000
6001 ioctx.snap_set_read(my_snaps[1]);
6002 // set-chunk
6003 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 10, "chunk2", "foo");
6004 // foo snap[1]: [ chunk2 ]
6005 // foo snap[0]:
6006 // foo head : [chunk1] [chunk3]
6007
6008 // foo snap[1]: [ chunk2 ]
6009 // foo snap[0]:
6010 // foo head : [chunk1] [chunk3]
6011
6012 ASSERT_EQ(0, ioctx.selfmanaged_snap_rollback("foo", my_snaps[0]));
6013
6014 ioctx.snap_set_read(librados::SNAP_HEAD);
6015 {
6016 bufferlist bl;
6017 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
6018 ASSERT_EQ('t', bl[0]);
6019 }
6020
6021 ASSERT_EQ(0, ioctx.selfmanaged_snap_rollback("foo", my_snaps[1]));
6022
6023 {
6024 bufferlist bl;
6025 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
6026 ASSERT_EQ('C', bl[0]);
6027 }
6028
6029}
6030
6031TEST_F(LibRadosTwoPoolsPP, ManifestRollbackRefcount) {
6032 SKIP_IF_CRIMSON();
6033 // skip test if not yet pacific
6034 if (_get_required_osd_release(cluster) < "pacific") {
6035 cout << "cluster is not yet pacific, skipping test" << std::endl;
6036 return;
6037 }
6038
6039 // create object
6040 {
6041 bufferlist bl;
6042 bl.append("CDere hiHI");
6043 ObjectWriteOperation op;
6044 op.write_full(bl);
6045 ASSERT_EQ(0, ioctx.operate("foo", &op));
6046 }
6047 {
6048 bufferlist bl;
6049 bl.append("ABere hiHI");
6050 ObjectWriteOperation op;
6051 op.write_full(bl);
6052 ASSERT_EQ(0, cache_ioctx.operate("chunk1", &op));
6053 }
6054 {
6055 bufferlist bl;
6056 bl.append("CDere hiHI");
6057 ObjectWriteOperation op;
6058 op.write_full(bl);
6059 ASSERT_EQ(0, cache_ioctx.operate("chunk2", &op));
6060 }
6061 {
6062 bufferlist bl;
6063 bl.append("EFere hiHI");
6064 ObjectWriteOperation op;
6065 op.write_full(bl);
6066 ASSERT_EQ(0, cache_ioctx.operate("chunk3", &op));
6067 }
6068 {
6069 bufferlist bl;
6070 bl.append("DDDDD hiHI");
6071 ObjectWriteOperation op;
6072 op.write_full(bl);
6073 ASSERT_EQ(0, cache_ioctx.operate("chunk4", &op));
6074 }
6075 {
6076 bufferlist bl;
6077 bl.append("EEEEE hiHI");
6078 ObjectWriteOperation op;
6079 op.write_full(bl);
6080 ASSERT_EQ(0, cache_ioctx.operate("chunk5", &op));
6081 }
6082
6083 // wait for maps to settle
6084 cluster.wait_for_latest_osdmap();
6085
6086 // create a snapshot, clone
6087 vector<uint64_t> my_snaps(1);
6088 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
6089 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
6090 my_snaps));
6091
6092 {
6093 bufferlist bl;
6094 bl.append("there hiHI");
6095 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
6096 }
6097
6098 my_snaps.resize(2);
6099 my_snaps[1] = my_snaps[0];
6100 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
6101 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
6102 my_snaps));
6103
6104 {
6105 bufferlist bl;
6106 bl.append("thABe hiEF");
6107 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
6108 }
6109
6110 // set-chunk
6111 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, "chunk1", "foo");
6112 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, "chunk3", "foo");
6113 // foo snap[1]:
6114 // foo snap[0]:
6115 // foo head : [chunk1] [chunk3]
6116
6117 ioctx.snap_set_read(my_snaps[1]);
6118 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, "chunk4", "foo");
6119 manifest_set_chunk(cluster, cache_ioctx, ioctx, 6, 2, "chunk5", "foo");
6120 // foo snap[1]: [chunk4] [chunk5]
6121 // foo snap[0]:
6122 // foo head : [chunk1] [chunk3]
6123
6124 ioctx.snap_set_read(my_snaps[0]);
6125 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 10, "chunk2", "foo");
6126 // foo snap[1]: [chunk4] [chunk5]
6127 // foo snap[0]: [ chunk2 ]
6128 // foo head : [chunk1] [chunk3]
6129
6130 ASSERT_EQ(0, ioctx.selfmanaged_snap_rollback("foo", my_snaps[1]));
6131 // foo snap[1]: [chunk4] [chunk5]
6132 // foo snap[0]: [ chunk2 ]
6133 // foo head : [chunk4] [chunk5] <-- will contain these contents
6134
6135 sleep(10);
6136 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk1", 0);
6137 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk3", 0);
6138
6139 ioctx.selfmanaged_snap_remove(my_snaps[1]);
6140 sleep(10);
6141 // foo snap[1]:
6142 // foo snap[0]: [ chunk2 ]
6143 // foo head : [chunk4] [chunk5]
6144 ioctx.snap_set_read(librados::SNAP_HEAD);
6145 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk4", 1);
6146 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk5", 1);
6147
6148 {
6149 bufferlist bl;
6150 bl.append("thABe hiEF");
6151 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
6152 }
6153 // foo snap[1]:
6154 // foo snap[0]: [ chunk2 ]
6155 // foo head :
6156 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk1", 0);
6157 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk3", 0);
6158 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk4", 0);
6159 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk5", 0);
6160 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk2", 1);
6161}
6162
6163TEST_F(LibRadosTwoPoolsPP, ManifestEvictRollback) {
6164 SKIP_IF_CRIMSON();
6165 // skip test if not yet pacific
6166 if (_get_required_osd_release(cluster) < "pacific") {
6167 cout << "cluster is not yet pacific, skipping test" << std::endl;
6168 return;
6169 }
6170
6171 // create object
6172 {
6173 bufferlist bl;
6174 bl.append("CDere hiHI");
6175 ObjectWriteOperation op;
6176 op.write_full(bl);
6177 ASSERT_EQ(0, ioctx.operate("foo", &op));
6178 }
6179 {
6180 bufferlist bl;
6181 bl.append("ABere hiHI");
6182 ObjectWriteOperation op;
6183 op.write_full(bl);
6184 ASSERT_EQ(0, cache_ioctx.operate("chunk1", &op));
6185 }
6186 {
6187 bufferlist bl;
6188 bl.append("CDere hiHI");
6189 ObjectWriteOperation op;
6190 op.write_full(bl);
6191 ASSERT_EQ(0, cache_ioctx.operate("chunk2", &op));
6192 }
6193 {
6194 bufferlist bl;
6195 bl.append("EFere hiHI");
6196 ObjectWriteOperation op;
6197 op.write_full(bl);
6198 ASSERT_EQ(0, cache_ioctx.operate("chunk3", &op));
6199 }
6200
6201 // wait for maps to settle
6202 cluster.wait_for_latest_osdmap();
6203
6204 // create a snapshot, clone
6205 vector<uint64_t> my_snaps(1);
6206 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
6207 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
6208 my_snaps));
6209
6210 {
6211 bufferlist bl;
6212 bl.append("there hiHI");
6213 ASSERT_EQ(0, ioctx.write("foo", bl, bl.length(), 0));
6214 }
6215
6216
6217 // set-chunk
6218 manifest_set_chunk(cluster, cache_ioctx, ioctx, 2, 2, "chunk1", "foo");
6219 manifest_set_chunk(cluster, cache_ioctx, ioctx, 8, 2, "chunk3", "foo");
6220 // foo snap[0]:
6221 // foo head : [chunk1] [chunk3]
6222
6223 ioctx.snap_set_read(my_snaps[0]);
6224 manifest_set_chunk(cluster, cache_ioctx, ioctx, 0, 10, "chunk2", "foo");
6225 // foo snap[0]: [ chunk2 ]
6226 // foo head : [chunk1] [chunk3]
6227
6228 sleep(10);
6229 ioctx.snap_set_read(librados::SNAP_HEAD);
6230 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk1", 1);
6231 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk3", 1);
6232
6233
6234 ioctx.snap_set_read(my_snaps[0]);
6235 // evict--this makes the chunk missing state
6236 {
6237 ObjectReadOperation op, stat_op;
6238 op.tier_evict();
6239 librados::AioCompletion *completion = cluster.aio_create_completion();
6240 ASSERT_EQ(0, ioctx.aio_operate(
6241 "foo", completion, &op,
6242 librados::OPERATION_IGNORE_OVERLAY, NULL));
6243 completion->wait_for_complete();
6244 ASSERT_EQ(0, completion->get_return_value());
6245 }
6246
6247 // rollback to my_snaps[0]
6248 ASSERT_EQ(0, ioctx.selfmanaged_snap_rollback("foo", my_snaps[0]));
6249
6250 ioctx.snap_set_read(librados::SNAP_HEAD);
6251 {
6252 bufferlist bl;
6253 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
6254 ASSERT_EQ('C', bl[0]);
6255 }
6256
6257 is_intended_refcount_state(ioctx, "foo", cache_ioctx, "chunk2", 1);
6258}
6259
6260class LibRadosTwoPoolsECPP : public RadosTestECPP
6261{
6262public:
6263 LibRadosTwoPoolsECPP() {};
6264 ~LibRadosTwoPoolsECPP() override {};
6265protected:
6266 static void SetUpTestCase() {
6267 SKIP_IF_CRIMSON();
6268 pool_name = get_temp_pool_name();
6269 ASSERT_EQ("", create_one_ec_pool_pp(pool_name, s_cluster));
6270 }
6271 static void TearDownTestCase() {
6272 SKIP_IF_CRIMSON();
6273 ASSERT_EQ(0, destroy_one_ec_pool_pp(pool_name, s_cluster));
6274 }
6275 static std::string cache_pool_name;
6276
6277 void SetUp() override {
6278 SKIP_IF_CRIMSON();
6279 cache_pool_name = get_temp_pool_name();
6280 ASSERT_EQ(0, s_cluster.pool_create(cache_pool_name.c_str()));
6281 RadosTestECPP::SetUp();
6282
6283 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
6284 cache_ioctx.application_enable("rados", true);
6285 cache_ioctx.set_namespace(nspace);
6286 }
6287 void TearDown() override {
6288 SKIP_IF_CRIMSON();
6289 // flush + evict cache
6290 flush_evict_all(cluster, cache_ioctx);
6291
6292 bufferlist inbl;
6293 // tear down tiers
6294 ASSERT_EQ(0, cluster.mon_command(
6295 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
6296 "\"}",
6297 inbl, NULL, NULL));
6298 ASSERT_EQ(0, cluster.mon_command(
6299 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
6300 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
6301 inbl, NULL, NULL));
6302
6303 // wait for maps to settle before next test
6304 cluster.wait_for_latest_osdmap();
6305
6306 RadosTestECPP::TearDown();
6307
6308 cleanup_default_namespace(cache_ioctx);
6309 cleanup_namespace(cache_ioctx, nspace);
6310
6311 cache_ioctx.close();
6312 ASSERT_EQ(0, s_cluster.pool_delete(cache_pool_name.c_str()));
6313 }
6314
6315 librados::IoCtx cache_ioctx;
6316};
6317
6318std::string LibRadosTwoPoolsECPP::cache_pool_name;
6319
6320TEST_F(LibRadosTierECPP, Dirty) {
6321 SKIP_IF_CRIMSON();
6322 {
6323 ObjectWriteOperation op;
6324 op.undirty();
6325 ASSERT_EQ(0, ioctx.operate("foo", &op)); // still get 0 if it dne
6326 }
6327 {
6328 ObjectWriteOperation op;
6329 op.create(true);
6330 ASSERT_EQ(0, ioctx.operate("foo", &op));
6331 }
6332 {
6333 bool dirty = false;
6334 int r = -1;
6335 ObjectReadOperation op;
6336 op.is_dirty(&dirty, &r);
6337 ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
6338 ASSERT_TRUE(dirty);
6339 ASSERT_EQ(0, r);
6340 }
6341 {
6342 ObjectWriteOperation op;
6343 op.undirty();
6344 ASSERT_EQ(0, ioctx.operate("foo", &op));
6345 }
6346 {
6347 ObjectWriteOperation op;
6348 op.undirty();
6349 ASSERT_EQ(0, ioctx.operate("foo", &op)); // still 0 if already clean
6350 }
6351 {
6352 bool dirty = false;
6353 int r = -1;
6354 ObjectReadOperation op;
6355 op.is_dirty(&dirty, &r);
6356 ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
6357 ASSERT_FALSE(dirty);
6358 ASSERT_EQ(0, r);
6359 }
6360 //{
6361 // ObjectWriteOperation op;
6362 // op.truncate(0); // still a write even tho it is a no-op
6363 // ASSERT_EQ(0, ioctx.operate("foo", &op));
6364 //}
6365 //{
6366 // bool dirty = false;
6367 // int r = -1;
6368 // ObjectReadOperation op;
6369 // op.is_dirty(&dirty, &r);
6370 // ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
6371 // ASSERT_TRUE(dirty);
6372 // ASSERT_EQ(0, r);
6373 //}
6374}
6375
6376TEST_F(LibRadosTwoPoolsECPP, Overlay) {
6377 SKIP_IF_CRIMSON();
6378 // create objects
6379 {
6380 bufferlist bl;
6381 bl.append("base");
6382 ObjectWriteOperation op;
6383 op.write_full(bl);
6384 ASSERT_EQ(0, ioctx.operate("foo", &op));
6385 }
6386 {
6387 bufferlist bl;
6388 bl.append("cache");
6389 ObjectWriteOperation op;
6390 op.write_full(bl);
6391 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
6392 }
6393
6394 // configure cache
6395 bufferlist inbl;
6396 ASSERT_EQ(0, cluster.mon_command(
6397 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
6398 "\", \"tierpool\": \"" + cache_pool_name +
6399 "\", \"force_nonempty\": \"--force-nonempty\" }",
6400 inbl, NULL, NULL));
6401 ASSERT_EQ(0, cluster.mon_command(
6402 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
6403 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
6404 inbl, NULL, NULL));
6405
6406 // wait for maps to settle
6407 cluster.wait_for_latest_osdmap();
6408
6409 // by default, the overlay sends us to cache pool
6410 {
6411 bufferlist bl;
6412 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
6413 ASSERT_EQ('c', bl[0]);
6414 }
6415 {
6416 bufferlist bl;
6417 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
6418 ASSERT_EQ('c', bl[0]);
6419 }
6420
6421 // unless we say otherwise
6422 {
6423 bufferlist bl;
6424 ObjectReadOperation op;
6425 op.read(0, 1, &bl, NULL);
6426 librados::AioCompletion *completion = cluster.aio_create_completion();
6427 ASSERT_EQ(0, ioctx.aio_operate(
6428 "foo", completion, &op,
6429 librados::OPERATION_IGNORE_OVERLAY, NULL));
6430 completion->wait_for_complete();
6431 ASSERT_EQ(0, completion->get_return_value());
6432 completion->release();
6433 ASSERT_EQ('b', bl[0]);
6434 }
6435}
6436
6437TEST_F(LibRadosTwoPoolsECPP, Promote) {
6438 SKIP_IF_CRIMSON();
6439 // create object
6440 {
6441 bufferlist bl;
6442 bl.append("hi there");
6443 ObjectWriteOperation op;
6444 op.write_full(bl);
6445 ASSERT_EQ(0, ioctx.operate("foo", &op));
6446 }
6447
6448 // configure cache
6449 bufferlist inbl;
6450 ASSERT_EQ(0, cluster.mon_command(
6451 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
6452 "\", \"tierpool\": \"" + cache_pool_name +
6453 "\", \"force_nonempty\": \"--force-nonempty\" }",
6454 inbl, NULL, NULL));
6455 ASSERT_EQ(0, cluster.mon_command(
6456 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
6457 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
6458 inbl, NULL, NULL));
6459 ASSERT_EQ(0, cluster.mon_command(
6460 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
6461 "\", \"mode\": \"writeback\"}",
6462 inbl, NULL, NULL));
6463
6464 // wait for maps to settle
6465 cluster.wait_for_latest_osdmap();
6466
6467 // read, trigger a promote
6468 {
6469 bufferlist bl;
6470 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
6471 }
6472
6473 // read, trigger a whiteout
6474 {
6475 bufferlist bl;
6476 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
6477 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
6478 }
6479
6480 // verify the object is present in the cache tier
6481 {
6482 NObjectIterator it = cache_ioctx.nobjects_begin();
6483 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
6484 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
6485 ++it;
6486 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
6487 ++it;
6488 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
6489 }
6490}
6491
6492TEST_F(LibRadosTwoPoolsECPP, PromoteSnap) {
6493 SKIP_IF_CRIMSON();
6494 // create object
6495 {
6496 bufferlist bl;
6497 bl.append("hi there");
6498 ObjectWriteOperation op;
6499 op.write_full(bl);
6500 ASSERT_EQ(0, ioctx.operate("foo", &op));
6501 }
6502 {
6503 bufferlist bl;
6504 bl.append("hi there");
6505 ObjectWriteOperation op;
6506 op.write_full(bl);
6507 ASSERT_EQ(0, ioctx.operate("bar", &op));
6508 }
6509 {
6510 bufferlist bl;
6511 bl.append("hi there");
6512 ObjectWriteOperation op;
6513 op.write_full(bl);
6514 ASSERT_EQ(0, ioctx.operate("baz", &op));
6515 }
6516 {
6517 bufferlist bl;
6518 bl.append("hi there");
6519 ObjectWriteOperation op;
6520 op.write_full(bl);
6521 ASSERT_EQ(0, ioctx.operate("bam", &op));
6522 }
6523
6524 // create a snapshot, clone
6525 vector<uint64_t> my_snaps(1);
6526 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
6527 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
6528 my_snaps));
6529 {
6530 bufferlist bl;
6531 bl.append("ciao!");
6532 ObjectWriteOperation op;
6533 op.write_full(bl);
6534 ASSERT_EQ(0, ioctx.operate("foo", &op));
6535 }
6536 {
6537 bufferlist bl;
6538 bl.append("ciao!");
6539 ObjectWriteOperation op;
6540 op.write_full(bl);
6541 ASSERT_EQ(0, ioctx.operate("bar", &op));
6542 }
6543 {
6544 ObjectWriteOperation op;
6545 op.remove();
6546 ASSERT_EQ(0, ioctx.operate("baz", &op));
6547 }
6548 {
6549 bufferlist bl;
6550 bl.append("ciao!");
6551 ObjectWriteOperation op;
6552 op.write_full(bl);
6553 ASSERT_EQ(0, ioctx.operate("bam", &op));
6554 }
6555
6556 // configure cache
6557 bufferlist inbl;
6558 ASSERT_EQ(0, cluster.mon_command(
6559 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
6560 "\", \"tierpool\": \"" + cache_pool_name +
6561 "\", \"force_nonempty\": \"--force-nonempty\" }",
6562 inbl, NULL, NULL));
6563 ASSERT_EQ(0, cluster.mon_command(
6564 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
6565 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
6566 inbl, NULL, NULL));
6567 ASSERT_EQ(0, cluster.mon_command(
6568 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
6569 "\", \"mode\": \"writeback\"}",
6570 inbl, NULL, NULL));
6571
6572 // wait for maps to settle
6573 cluster.wait_for_latest_osdmap();
6574
6575 // read, trigger a promote on the head
6576 {
6577 bufferlist bl;
6578 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
6579 ASSERT_EQ('c', bl[0]);
6580 }
6581 {
6582 bufferlist bl;
6583 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
6584 ASSERT_EQ('c', bl[0]);
6585 }
6586
6587 ioctx.snap_set_read(my_snaps[0]);
6588
6589 // stop and scrub this pg (to make sure scrub can handle missing
6590 // clones in the cache tier)
6591 // This test requires cache tier and base tier to have the same pg_num/pgp_num
6592 {
6593 for (int tries = 0; tries < 5; ++tries) {
6594 IoCtx cache_ioctx;
6595 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
6596 uint32_t hash;
6597 ASSERT_EQ(0, ioctx.get_object_pg_hash_position2("foo", &hash));
6598 ostringstream ss;
6599 ss << "{\"prefix\": \"pg scrub\", \"pgid\": \""
6600 << cache_ioctx.get_id() << "."
6601 << hash
6602 << "\"}";
6603 int r = cluster.mon_command(ss.str(), inbl, NULL, NULL);
6604 if (r == -EAGAIN ||
6605 r == -ENOENT) { // in case mgr osdmap is a bit stale
6606 sleep(5);
6607 continue;
6608 }
6609 ASSERT_EQ(0, r);
6610 break;
6611 }
6612 // give it a few seconds to go. this is sloppy but is usually enough time
6613 cout << "waiting for scrub..." << std::endl;
6614 sleep(15);
6615 cout << "done waiting" << std::endl;
6616 }
6617
6618 // read foo snap
6619 {
6620 bufferlist bl;
6621 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
6622 ASSERT_EQ('h', bl[0]);
6623 }
6624
6625 // read bar snap
6626 {
6627 bufferlist bl;
6628 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
6629 ASSERT_EQ('h', bl[0]);
6630 }
6631
6632 // read baz snap
6633 {
6634 bufferlist bl;
6635 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
6636 ASSERT_EQ('h', bl[0]);
6637 }
6638
6639 ioctx.snap_set_read(librados::SNAP_HEAD);
6640
6641 // read foo
6642 {
6643 bufferlist bl;
6644 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
6645 ASSERT_EQ('c', bl[0]);
6646 }
6647
6648 // read bar
6649 {
6650 bufferlist bl;
6651 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
6652 ASSERT_EQ('c', bl[0]);
6653 }
6654
6655 // read baz
6656 {
6657 bufferlist bl;
6658 ASSERT_EQ(-ENOENT, ioctx.read("baz", bl, 1, 0));
6659 }
6660
6661 // cleanup
6662 ioctx.selfmanaged_snap_remove(my_snaps[0]);
6663}
6664
6665TEST_F(LibRadosTwoPoolsECPP, PromoteSnapTrimRace) {
6666 SKIP_IF_CRIMSON();
6667 // create object
6668 {
6669 bufferlist bl;
6670 bl.append("hi there");
6671 ObjectWriteOperation op;
6672 op.write_full(bl);
6673 ASSERT_EQ(0, ioctx.operate("foo", &op));
6674 }
6675
6676 // create a snapshot, clone
6677 vector<uint64_t> my_snaps(1);
6678 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
6679 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
6680 my_snaps));
6681 {
6682 bufferlist bl;
6683 bl.append("ciao!");
6684 ObjectWriteOperation op;
6685 op.write_full(bl);
6686 ASSERT_EQ(0, ioctx.operate("foo", &op));
6687 }
6688
6689 // configure cache
6690 bufferlist inbl;
6691 ASSERT_EQ(0, cluster.mon_command(
6692 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
6693 "\", \"tierpool\": \"" + cache_pool_name +
6694 "\", \"force_nonempty\": \"--force-nonempty\" }",
6695 inbl, NULL, NULL));
6696 ASSERT_EQ(0, cluster.mon_command(
6697 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
6698 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
6699 inbl, NULL, NULL));
6700 ASSERT_EQ(0, cluster.mon_command(
6701 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
6702 "\", \"mode\": \"writeback\"}",
6703 inbl, NULL, NULL));
6704
6705 // wait for maps to settle
6706 cluster.wait_for_latest_osdmap();
6707
6708 // delete the snap
6709 ASSERT_EQ(0, ioctx.selfmanaged_snap_remove(my_snaps[0]));
6710
6711 ioctx.snap_set_read(my_snaps[0]);
6712
6713 // read foo snap. the OSD may or may not realize that this snap has
6714 // been logically deleted; either response is valid.
6715 {
6716 bufferlist bl;
6717 int r = ioctx.read("foo", bl, 1, 0);
6718 ASSERT_TRUE(r == 1 || r == -ENOENT);
6719 }
6720
6721 // cleanup
6722 ioctx.selfmanaged_snap_remove(my_snaps[0]);
6723}
6724
6725TEST_F(LibRadosTwoPoolsECPP, Whiteout) {
6726 SKIP_IF_CRIMSON();
6727 // create object
6728 {
6729 bufferlist bl;
6730 bl.append("hi there");
6731 ObjectWriteOperation op;
6732 op.write_full(bl);
6733 ASSERT_EQ(0, ioctx.operate("foo", &op));
6734 }
6735
6736 // configure cache
6737 bufferlist inbl;
6738 ASSERT_EQ(0, cluster.mon_command(
6739 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
6740 "\", \"tierpool\": \"" + cache_pool_name +
6741 "\", \"force_nonempty\": \"--force-nonempty\" }",
6742 inbl, NULL, NULL));
6743 ASSERT_EQ(0, cluster.mon_command(
6744 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
6745 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
6746 inbl, NULL, NULL));
6747 ASSERT_EQ(0, cluster.mon_command(
6748 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
6749 "\", \"mode\": \"writeback\"}",
6750 inbl, NULL, NULL));
6751
6752 // wait for maps to settle
6753 cluster.wait_for_latest_osdmap();
6754
6755 // create some whiteouts, verify they behave
6756 {
6757 ObjectWriteOperation op;
6758 op.assert_exists();
6759 op.remove();
6760 ASSERT_EQ(0, ioctx.operate("foo", &op));
6761 }
6762
6763 {
6764 ObjectWriteOperation op;
6765 op.assert_exists();
6766 op.remove();
6767 ASSERT_EQ(-ENOENT, ioctx.operate("bar", &op));
6768 }
6769 {
6770 ObjectWriteOperation op;
6771 op.assert_exists();
6772 op.remove();
6773 ASSERT_EQ(-ENOENT, ioctx.operate("bar", &op));
6774 }
6775
6776 // verify the whiteouts are there in the cache tier
6777 {
6778 NObjectIterator it = cache_ioctx.nobjects_begin();
6779 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
6780 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
6781 ++it;
6782 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
6783 ++it;
6784 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
6785 }
6786
6787 // delete a whiteout and verify it goes away
6788 ASSERT_EQ(-ENOENT, ioctx.remove("foo"));
6789 {
6790 ObjectWriteOperation op;
6791 op.remove();
6792 librados::AioCompletion *completion = cluster.aio_create_completion();
6793 ASSERT_EQ(0, ioctx.aio_operate("bar", completion, &op,
6794 librados::OPERATION_IGNORE_CACHE));
6795 completion->wait_for_complete();
6796 ASSERT_EQ(0, completion->get_return_value());
6797 completion->release();
6798
6799 NObjectIterator it = cache_ioctx.nobjects_begin();
6800 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
6801 ASSERT_TRUE(it->get_oid() == string("foo"));
6802 ++it;
6803 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
6804 }
6805
6806 // recreate an object and verify we can read it
6807 {
6808 bufferlist bl;
6809 bl.append("hi there");
6810 ObjectWriteOperation op;
6811 op.write_full(bl);
6812 ASSERT_EQ(0, ioctx.operate("foo", &op));
6813 }
6814 {
6815 bufferlist bl;
6816 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
6817 ASSERT_EQ('h', bl[0]);
6818 }
6819}
6820
6821TEST_F(LibRadosTwoPoolsECPP, Evict) {
6822 SKIP_IF_CRIMSON();
6823 // create object
6824 {
6825 bufferlist bl;
6826 bl.append("hi there");
6827 ObjectWriteOperation op;
6828 op.write_full(bl);
6829 ASSERT_EQ(0, ioctx.operate("foo", &op));
6830 }
6831
6832 // configure cache
6833 bufferlist inbl;
6834 ASSERT_EQ(0, cluster.mon_command(
6835 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
6836 "\", \"tierpool\": \"" + cache_pool_name +
6837 "\", \"force_nonempty\": \"--force-nonempty\" }",
6838 inbl, NULL, NULL));
6839 ASSERT_EQ(0, cluster.mon_command(
6840 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
6841 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
6842 inbl, NULL, NULL));
6843 ASSERT_EQ(0, cluster.mon_command(
6844 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
6845 "\", \"mode\": \"writeback\"}",
6846 inbl, NULL, NULL));
6847
6848 // wait for maps to settle
6849 cluster.wait_for_latest_osdmap();
6850
6851 // read, trigger a promote
6852 {
6853 bufferlist bl;
6854 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
6855 }
6856
6857 // read, trigger a whiteout, and a dirty object
6858 {
6859 bufferlist bl;
6860 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
6861 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
6862 ASSERT_EQ(0, ioctx.write("bar", bl, bl.length(), 0));
6863 }
6864
6865 // verify the object is present in the cache tier
6866 {
6867 NObjectIterator it = cache_ioctx.nobjects_begin();
6868 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
6869 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
6870 ++it;
6871 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
6872 ++it;
6873 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
6874 }
6875
6876 // pin
6877 {
6878 ObjectWriteOperation op;
6879 op.cache_pin();
6880 librados::AioCompletion *completion = cluster.aio_create_completion();
6881 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
6882 completion->wait_for_complete();
6883 ASSERT_EQ(0, completion->get_return_value());
6884 completion->release();
6885 }
6886
6887 // evict the pinned object with -EPERM
6888 {
6889 ObjectReadOperation op;
6890 op.cache_evict();
6891 librados::AioCompletion *completion = cluster.aio_create_completion();
6892 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
6893 librados::OPERATION_IGNORE_CACHE,
6894 NULL));
6895 completion->wait_for_complete();
6896 ASSERT_EQ(-EPERM, completion->get_return_value());
6897 completion->release();
6898 }
6899
6900 // unpin
6901 {
6902 ObjectWriteOperation op;
6903 op.cache_unpin();
6904 librados::AioCompletion *completion = cluster.aio_create_completion();
6905 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
6906 completion->wait_for_complete();
6907 ASSERT_EQ(0, completion->get_return_value());
6908 completion->release();
6909 }
6910
6911 // flush
6912 {
6913 ObjectReadOperation op;
6914 op.cache_flush();
6915 librados::AioCompletion *completion = cluster.aio_create_completion();
6916 ASSERT_EQ(0, cache_ioctx.aio_operate(
6917 "foo", completion, &op,
6918 librados::OPERATION_IGNORE_OVERLAY, NULL));
6919 completion->wait_for_complete();
6920 ASSERT_EQ(0, completion->get_return_value());
6921 completion->release();
6922 }
6923
6924 // verify clean
6925 {
6926 bool dirty = false;
6927 int r = -1;
6928 ObjectReadOperation op;
6929 op.is_dirty(&dirty, &r);
6930 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
6931 ASSERT_FALSE(dirty);
6932 ASSERT_EQ(0, r);
6933 }
6934
6935 // evict
6936 {
6937 ObjectReadOperation op;
6938 op.cache_evict();
6939 librados::AioCompletion *completion = cluster.aio_create_completion();
6940 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
6941 librados::OPERATION_IGNORE_CACHE,
6942 NULL));
6943 completion->wait_for_complete();
6944 ASSERT_EQ(0, completion->get_return_value());
6945 completion->release();
6946 }
6947 {
6948 ObjectReadOperation op;
6949 op.cache_evict();
6950 librados::AioCompletion *completion = cluster.aio_create_completion();
6951 ASSERT_EQ(0, cache_ioctx.aio_operate(
6952 "foo", completion, &op,
6953 librados::OPERATION_IGNORE_CACHE, NULL));
6954 completion->wait_for_complete();
6955 ASSERT_EQ(0, completion->get_return_value());
6956 completion->release();
6957 }
6958 {
6959 ObjectReadOperation op;
6960 op.cache_evict();
6961 librados::AioCompletion *completion = cluster.aio_create_completion();
6962 ASSERT_EQ(0, cache_ioctx.aio_operate(
6963 "bar", completion, &op,
6964 librados::OPERATION_IGNORE_CACHE, NULL));
6965 completion->wait_for_complete();
6966 ASSERT_EQ(-EBUSY, completion->get_return_value());
6967 completion->release();
6968 }
6969}
6970
6971TEST_F(LibRadosTwoPoolsECPP, EvictSnap) {
6972 SKIP_IF_CRIMSON();
6973 // create object
6974 {
6975 bufferlist bl;
6976 bl.append("hi there");
6977 ObjectWriteOperation op;
6978 op.write_full(bl);
6979 ASSERT_EQ(0, ioctx.operate("foo", &op));
6980 }
6981 {
6982 bufferlist bl;
6983 bl.append("hi there");
6984 ObjectWriteOperation op;
6985 op.write_full(bl);
6986 ASSERT_EQ(0, ioctx.operate("bar", &op));
6987 }
6988 {
6989 bufferlist bl;
6990 bl.append("hi there");
6991 ObjectWriteOperation op;
6992 op.write_full(bl);
6993 ASSERT_EQ(0, ioctx.operate("baz", &op));
6994 }
6995 {
6996 bufferlist bl;
6997 bl.append("hi there");
6998 ObjectWriteOperation op;
6999 op.write_full(bl);
7000 ASSERT_EQ(0, ioctx.operate("bam", &op));
7001 }
7002
7003 // create a snapshot, clone
7004 vector<uint64_t> my_snaps(1);
7005 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
7006 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
7007 my_snaps));
7008 {
7009 bufferlist bl;
7010 bl.append("ciao!");
7011 ObjectWriteOperation op;
7012 op.write_full(bl);
7013 ASSERT_EQ(0, ioctx.operate("foo", &op));
7014 }
7015 {
7016 bufferlist bl;
7017 bl.append("ciao!");
7018 ObjectWriteOperation op;
7019 op.write_full(bl);
7020 ASSERT_EQ(0, ioctx.operate("bar", &op));
7021 }
7022 {
7023 ObjectWriteOperation op;
7024 op.remove();
7025 ASSERT_EQ(0, ioctx.operate("baz", &op));
7026 }
7027 {
7028 bufferlist bl;
7029 bl.append("ciao!");
7030 ObjectWriteOperation op;
7031 op.write_full(bl);
7032 ASSERT_EQ(0, ioctx.operate("bam", &op));
7033 }
7034
7035 // configure cache
7036 bufferlist inbl;
7037 ASSERT_EQ(0, cluster.mon_command(
7038 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
7039 "\", \"tierpool\": \"" + cache_pool_name +
7040 "\", \"force_nonempty\": \"--force-nonempty\" }",
7041 inbl, NULL, NULL));
7042 ASSERT_EQ(0, cluster.mon_command(
7043 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
7044 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
7045 inbl, NULL, NULL));
7046 ASSERT_EQ(0, cluster.mon_command(
7047 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
7048 "\", \"mode\": \"writeback\"}",
7049 inbl, NULL, NULL));
7050
7051 // wait for maps to settle
7052 cluster.wait_for_latest_osdmap();
7053
7054 // read, trigger a promote on the head
7055 {
7056 bufferlist bl;
7057 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
7058 ASSERT_EQ('c', bl[0]);
7059 }
7060 {
7061 bufferlist bl;
7062 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
7063 ASSERT_EQ('c', bl[0]);
7064 }
7065
7066 // evict bam
7067 {
7068 ObjectReadOperation op;
7069 op.cache_evict();
7070 librados::AioCompletion *completion = cluster.aio_create_completion();
7071 ASSERT_EQ(0, cache_ioctx.aio_operate(
7072 "bam", completion, &op,
7073 librados::OPERATION_IGNORE_CACHE, NULL));
7074 completion->wait_for_complete();
7075 ASSERT_EQ(0, completion->get_return_value());
7076 completion->release();
7077 }
7078 {
7079 bufferlist bl;
7080 ObjectReadOperation op;
7081 op.read(1, 0, &bl, NULL);
7082 librados::AioCompletion *completion = cluster.aio_create_completion();
7083 ASSERT_EQ(0, cache_ioctx.aio_operate(
7084 "bam", completion, &op,
7085 librados::OPERATION_IGNORE_CACHE, NULL));
7086 completion->wait_for_complete();
7087 ASSERT_EQ(-ENOENT, completion->get_return_value());
7088 completion->release();
7089 }
7090
7091 // read foo snap
7092 ioctx.snap_set_read(my_snaps[0]);
7093 {
7094 bufferlist bl;
7095 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
7096 ASSERT_EQ('h', bl[0]);
7097 }
7098
7099 // evict foo snap
7100 {
7101 ObjectReadOperation op;
7102 op.cache_evict();
7103 librados::AioCompletion *completion = cluster.aio_create_completion();
7104 ASSERT_EQ(0, ioctx.aio_operate(
7105 "foo", completion, &op,
7106 librados::OPERATION_IGNORE_CACHE, NULL));
7107 completion->wait_for_complete();
7108 ASSERT_EQ(0, completion->get_return_value());
7109 completion->release();
7110 }
7111 // snap is gone...
7112 {
7113 bufferlist bl;
7114 ObjectReadOperation op;
7115 op.read(1, 0, &bl, NULL);
7116 librados::AioCompletion *completion = cluster.aio_create_completion();
7117 ASSERT_EQ(0, ioctx.aio_operate(
7118 "foo", completion, &op,
7119 librados::OPERATION_IGNORE_CACHE, NULL));
7120 completion->wait_for_complete();
7121 ASSERT_EQ(-ENOENT, completion->get_return_value());
7122 completion->release();
7123 }
7124 // head is still there...
7125 ioctx.snap_set_read(librados::SNAP_HEAD);
7126 {
7127 bufferlist bl;
7128 ObjectReadOperation op;
7129 op.read(1, 0, &bl, NULL);
7130 librados::AioCompletion *completion = cluster.aio_create_completion();
7131 ASSERT_EQ(0, ioctx.aio_operate(
7132 "foo", completion, &op,
7133 librados::OPERATION_IGNORE_CACHE, NULL));
7134 completion->wait_for_complete();
7135 ASSERT_EQ(0, completion->get_return_value());
7136 completion->release();
7137 }
7138
7139 // promote head + snap of bar
7140 ioctx.snap_set_read(librados::SNAP_HEAD);
7141 {
7142 bufferlist bl;
7143 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
7144 ASSERT_EQ('c', bl[0]);
7145 }
7146 ioctx.snap_set_read(my_snaps[0]);
7147 {
7148 bufferlist bl;
7149 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
7150 ASSERT_EQ('h', bl[0]);
7151 }
7152
7153 // evict bar head (fail)
7154 ioctx.snap_set_read(librados::SNAP_HEAD);
7155 {
7156 ObjectReadOperation op;
7157 op.cache_evict();
7158 librados::AioCompletion *completion = cluster.aio_create_completion();
7159 ASSERT_EQ(0, ioctx.aio_operate(
7160 "bar", completion, &op,
7161 librados::OPERATION_IGNORE_CACHE, NULL));
7162 completion->wait_for_complete();
7163 ASSERT_EQ(-EBUSY, completion->get_return_value());
7164 completion->release();
7165 }
7166
7167 // evict bar snap
7168 ioctx.snap_set_read(my_snaps[0]);
7169 {
7170 ObjectReadOperation op;
7171 op.cache_evict();
7172 librados::AioCompletion *completion = cluster.aio_create_completion();
7173 ASSERT_EQ(0, ioctx.aio_operate(
7174 "bar", completion, &op,
7175 librados::OPERATION_IGNORE_CACHE, NULL));
7176 completion->wait_for_complete();
7177 ASSERT_EQ(0, completion->get_return_value());
7178 completion->release();
7179 }
7180 // ...and then head
7181 ioctx.snap_set_read(librados::SNAP_HEAD);
7182 {
7183 bufferlist bl;
7184 ObjectReadOperation op;
7185 op.read(1, 0, &bl, NULL);
7186 librados::AioCompletion *completion = cluster.aio_create_completion();
7187 ASSERT_EQ(0, ioctx.aio_operate(
7188 "bar", completion, &op,
7189 librados::OPERATION_IGNORE_CACHE, NULL));
7190 completion->wait_for_complete();
7191 ASSERT_EQ(0, completion->get_return_value());
7192 completion->release();
7193 }
7194 {
7195 ObjectReadOperation op;
7196 op.cache_evict();
7197 librados::AioCompletion *completion = cluster.aio_create_completion();
7198 ASSERT_EQ(0, ioctx.aio_operate(
7199 "bar", completion, &op,
7200 librados::OPERATION_IGNORE_CACHE, NULL));
7201 completion->wait_for_complete();
7202 ASSERT_EQ(0, completion->get_return_value());
7203 completion->release();
7204 }
7205
7206 // cleanup
7207 ioctx.selfmanaged_snap_remove(my_snaps[0]);
7208}
7209
7210TEST_F(LibRadosTwoPoolsECPP, TryFlush) {
7211 SKIP_IF_CRIMSON();
7212 // configure cache
7213 bufferlist inbl;
7214 ASSERT_EQ(0, cluster.mon_command(
7215 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
7216 "\", \"tierpool\": \"" + cache_pool_name +
7217 "\", \"force_nonempty\": \"--force-nonempty\" }",
7218 inbl, NULL, NULL));
7219 ASSERT_EQ(0, cluster.mon_command(
7220 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
7221 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
7222 inbl, NULL, NULL));
7223 ASSERT_EQ(0, cluster.mon_command(
7224 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
7225 "\", \"mode\": \"writeback\"}",
7226 inbl, NULL, NULL));
7227
7228 // wait for maps to settle
7229 cluster.wait_for_latest_osdmap();
7230
7231 // create object
7232 {
7233 bufferlist bl;
7234 bl.append("hi there");
7235 ObjectWriteOperation op;
7236 op.write_full(bl);
7237 ASSERT_EQ(0, ioctx.operate("foo", &op));
7238 }
7239
7240 // verify the object is present in the cache tier
7241 {
7242 NObjectIterator it = cache_ioctx.nobjects_begin();
7243 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
7244 ASSERT_TRUE(it->get_oid() == string("foo"));
7245 ++it;
7246 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
7247 }
7248
7249 // verify the object is NOT present in the base tier
7250 {
7251 NObjectIterator it = ioctx.nobjects_begin();
7252 ASSERT_TRUE(it == ioctx.nobjects_end());
7253 }
7254
7255 // verify dirty
7256 {
7257 bool dirty = false;
7258 int r = -1;
7259 ObjectReadOperation op;
7260 op.is_dirty(&dirty, &r);
7261 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
7262 ASSERT_TRUE(dirty);
7263 ASSERT_EQ(0, r);
7264 }
7265
7266 // pin
7267 {
7268 ObjectWriteOperation op;
7269 op.cache_pin();
7270 librados::AioCompletion *completion = cluster.aio_create_completion();
7271 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
7272 completion->wait_for_complete();
7273 ASSERT_EQ(0, completion->get_return_value());
7274 completion->release();
7275 }
7276
7277 // flush the pinned object with -EPERM
7278 {
7279 ObjectReadOperation op;
7280 op.cache_try_flush();
7281 librados::AioCompletion *completion = cluster.aio_create_completion();
7282 ASSERT_EQ(0, cache_ioctx.aio_operate(
7283 "foo", completion, &op,
7284 librados::OPERATION_IGNORE_OVERLAY |
7285 librados::OPERATION_SKIPRWLOCKS, NULL));
7286 completion->wait_for_complete();
7287 ASSERT_EQ(-EPERM, completion->get_return_value());
7288 completion->release();
7289 }
7290
7291 // unpin
7292 {
7293 ObjectWriteOperation op;
7294 op.cache_unpin();
7295 librados::AioCompletion *completion = cluster.aio_create_completion();
7296 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
7297 completion->wait_for_complete();
7298 ASSERT_EQ(0, completion->get_return_value());
7299 completion->release();
7300 }
7301
7302 // flush
7303 {
7304 ObjectReadOperation op;
7305 op.cache_try_flush();
7306 librados::AioCompletion *completion = cluster.aio_create_completion();
7307 ASSERT_EQ(0, cache_ioctx.aio_operate(
7308 "foo", completion, &op,
7309 librados::OPERATION_IGNORE_OVERLAY |
7310 librados::OPERATION_SKIPRWLOCKS, NULL));
7311 completion->wait_for_complete();
7312 ASSERT_EQ(0, completion->get_return_value());
7313 completion->release();
7314 }
7315
7316 // verify clean
7317 {
7318 bool dirty = false;
7319 int r = -1;
7320 ObjectReadOperation op;
7321 op.is_dirty(&dirty, &r);
7322 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
7323 ASSERT_FALSE(dirty);
7324 ASSERT_EQ(0, r);
7325 }
7326
7327 // verify in base tier
7328 {
7329 NObjectIterator it = ioctx.nobjects_begin();
7330 ASSERT_TRUE(it != ioctx.nobjects_end());
7331 ASSERT_TRUE(it->get_oid() == string("foo"));
7332 ++it;
7333 ASSERT_TRUE(it == ioctx.nobjects_end());
7334 }
7335
7336 // evict it
7337 {
7338 ObjectReadOperation op;
7339 op.cache_evict();
7340 librados::AioCompletion *completion = cluster.aio_create_completion();
7341 ASSERT_EQ(0, cache_ioctx.aio_operate(
7342 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
7343 completion->wait_for_complete();
7344 ASSERT_EQ(0, completion->get_return_value());
7345 completion->release();
7346 }
7347
7348 // verify no longer in cache tier
7349 {
7350 NObjectIterator it = cache_ioctx.nobjects_begin();
7351 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
7352 }
7353}
7354
7355TEST_F(LibRadosTwoPoolsECPP, FailedFlush) {
7356 SKIP_IF_CRIMSON();
7357 // configure cache
7358 bufferlist inbl;
7359 ASSERT_EQ(0, cluster.mon_command(
7360 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
7361 "\", \"tierpool\": \"" + cache_pool_name +
7362 "\", \"force_nonempty\": \"--force-nonempty\" }",
7363 inbl, NULL, NULL));
7364 ASSERT_EQ(0, cluster.mon_command(
7365 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
7366 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
7367 inbl, NULL, NULL));
7368 ASSERT_EQ(0, cluster.mon_command(
7369 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
7370 "\", \"mode\": \"writeback\"}",
7371 inbl, NULL, NULL));
7372
7373 // wait for maps to settle
7374 cluster.wait_for_latest_osdmap();
7375
7376 // create object
7377 {
7378 bufferlist bl;
7379 bl.append("hi there");
7380 ObjectWriteOperation op;
7381 op.write_full(bl);
7382 ASSERT_EQ(0, ioctx.operate("foo", &op));
7383 }
7384
7385 // verify the object is present in the cache tier
7386 {
7387 NObjectIterator it = cache_ioctx.nobjects_begin();
7388 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
7389 ASSERT_TRUE(it->get_oid() == string("foo"));
7390 ++it;
7391 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
7392 }
7393
7394 // verify the object is NOT present in the base tier
7395 {
7396 NObjectIterator it = ioctx.nobjects_begin();
7397 ASSERT_TRUE(it == ioctx.nobjects_end());
7398 }
7399
7400 // set omap
7401 {
7402 ObjectWriteOperation op;
7403 std::map<std::string, bufferlist> omap;
7404 omap["somekey"] = bufferlist();
7405 op.omap_set(omap);
7406 librados::AioCompletion *completion = cluster.aio_create_completion();
7407 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
7408 completion->wait_for_complete();
7409 ASSERT_EQ(0, completion->get_return_value());
7410 completion->release();
7411 }
7412
7413 // flush
7414 {
7415 ObjectReadOperation op;
7416 op.cache_flush();
7417 librados::AioCompletion *completion = cluster.aio_create_completion();
7418 ASSERT_EQ(0, cache_ioctx.aio_operate(
7419 "foo", completion, &op,
7420 librados::OPERATION_IGNORE_OVERLAY, NULL));
7421 completion->wait_for_complete();
7422 ASSERT_NE(0, completion->get_return_value());
7423 completion->release();
7424 }
7425
7426 // get omap
7427 {
7428 ObjectReadOperation op;
7429 bufferlist bl;
7430 int prval = 0;
7431 std::set<std::string> keys;
7432 keys.insert("somekey");
7433 std::map<std::string, bufferlist> map;
7434
7435 op.omap_get_vals_by_keys(keys, &map, &prval);
7436 librados::AioCompletion *completion = cluster.aio_create_completion();
7437 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op, &bl));
7438 sleep(5);
7439 bool completed = completion->is_complete();
7440 if( !completed ) {
7441 cache_ioctx.aio_cancel(completion);
7442 std::cerr << "Most probably test case will hang here, please reset manually" << std::endl;
7443 ASSERT_TRUE(completed); //in fact we are locked forever at test case shutdown unless fix for http://tracker.ceph.com/issues/14511 is applied. Seems there is no workaround for that
7444 }
7445 completion->release();
7446 }
7447 // verify still not in base tier
7448 {
7449 ASSERT_TRUE(ioctx.nobjects_begin() == ioctx.nobjects_end());
7450 }
7451 // erase it
7452 {
7453 ObjectWriteOperation op;
7454 op.remove();
7455 ASSERT_EQ(0, ioctx.operate("foo", &op));
7456 }
7457 // flush whiteout
7458 {
7459 ObjectReadOperation op;
7460 op.cache_flush();
7461 librados::AioCompletion *completion = cluster.aio_create_completion();
7462 ASSERT_EQ(0, cache_ioctx.aio_operate(
7463 "foo", completion, &op,
7464 librados::OPERATION_IGNORE_OVERLAY, NULL));
7465 completion->wait_for_complete();
7466 ASSERT_EQ(0, completion->get_return_value());
7467 completion->release();
7468 }
7469 // evict
7470 {
7471 ObjectReadOperation op;
7472 op.cache_evict();
7473 librados::AioCompletion *completion = cluster.aio_create_completion();
7474 ASSERT_EQ(0, cache_ioctx.aio_operate(
7475 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
7476 completion->wait_for_complete();
7477 ASSERT_EQ(0, completion->get_return_value());
7478 completion->release();
7479 }
7480
7481 // verify no longer in cache tier
7482 {
7483 NObjectIterator it = cache_ioctx.nobjects_begin();
7484 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
7485 }
7486 // or base tier
7487 {
7488 NObjectIterator it = ioctx.nobjects_begin();
7489 ASSERT_TRUE(it == ioctx.nobjects_end());
7490 }
7491}
7492
7493TEST_F(LibRadosTwoPoolsECPP, Flush) {
7494 SKIP_IF_CRIMSON();
7495 // configure cache
7496 bufferlist inbl;
7497 ASSERT_EQ(0, cluster.mon_command(
7498 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
7499 "\", \"tierpool\": \"" + cache_pool_name +
7500 "\", \"force_nonempty\": \"--force-nonempty\" }",
7501 inbl, NULL, NULL));
7502 ASSERT_EQ(0, cluster.mon_command(
7503 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
7504 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
7505 inbl, NULL, NULL));
7506 ASSERT_EQ(0, cluster.mon_command(
7507 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
7508 "\", \"mode\": \"writeback\"}",
7509 inbl, NULL, NULL));
7510
7511 // wait for maps to settle
7512 cluster.wait_for_latest_osdmap();
7513
7514 uint64_t user_version = 0;
7515
7516 // create object
7517 {
7518 bufferlist bl;
7519 bl.append("hi there");
7520 ObjectWriteOperation op;
7521 op.write_full(bl);
7522 ASSERT_EQ(0, ioctx.operate("foo", &op));
7523 }
7524
7525 // verify the object is present in the cache tier
7526 {
7527 NObjectIterator it = cache_ioctx.nobjects_begin();
7528 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
7529 ASSERT_TRUE(it->get_oid() == string("foo"));
7530 ++it;
7531 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
7532 }
7533
7534 // verify the object is NOT present in the base tier
7535 {
7536 NObjectIterator it = ioctx.nobjects_begin();
7537 ASSERT_TRUE(it == ioctx.nobjects_end());
7538 }
7539
7540 // verify dirty
7541 {
7542 bool dirty = false;
7543 int r = -1;
7544 ObjectReadOperation op;
7545 op.is_dirty(&dirty, &r);
7546 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
7547 ASSERT_TRUE(dirty);
7548 ASSERT_EQ(0, r);
7549 user_version = cache_ioctx.get_last_version();
7550 }
7551
7552 // pin
7553 {
7554 ObjectWriteOperation op;
7555 op.cache_pin();
7556 librados::AioCompletion *completion = cluster.aio_create_completion();
7557 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
7558 completion->wait_for_complete();
7559 ASSERT_EQ(0, completion->get_return_value());
7560 completion->release();
7561 }
7562
7563 // flush the pinned object with -EPERM
7564 {
7565 ObjectReadOperation op;
7566 op.cache_try_flush();
7567 librados::AioCompletion *completion = cluster.aio_create_completion();
7568 ASSERT_EQ(0, cache_ioctx.aio_operate(
7569 "foo", completion, &op,
7570 librados::OPERATION_IGNORE_OVERLAY |
7571 librados::OPERATION_SKIPRWLOCKS, NULL));
7572 completion->wait_for_complete();
7573 ASSERT_EQ(-EPERM, completion->get_return_value());
7574 completion->release();
7575 }
7576
7577 // unpin
7578 {
7579 ObjectWriteOperation op;
7580 op.cache_unpin();
7581 librados::AioCompletion *completion = cluster.aio_create_completion();
7582 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
7583 completion->wait_for_complete();
7584 ASSERT_EQ(0, completion->get_return_value());
7585 completion->release();
7586 }
7587
7588 // flush
7589 {
7590 ObjectReadOperation op;
7591 op.cache_flush();
7592 librados::AioCompletion *completion = cluster.aio_create_completion();
7593 ASSERT_EQ(0, cache_ioctx.aio_operate(
7594 "foo", completion, &op,
7595 librados::OPERATION_IGNORE_OVERLAY, NULL));
7596 completion->wait_for_complete();
7597 ASSERT_EQ(0, completion->get_return_value());
7598 completion->release();
7599 }
7600
7601 // verify clean
7602 {
7603 bool dirty = false;
7604 int r = -1;
7605 ObjectReadOperation op;
7606 op.is_dirty(&dirty, &r);
7607 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
7608 ASSERT_FALSE(dirty);
7609 ASSERT_EQ(0, r);
7610 }
7611
7612 // verify in base tier
7613 {
7614 NObjectIterator it = ioctx.nobjects_begin();
7615 ASSERT_TRUE(it != ioctx.nobjects_end());
7616 ASSERT_TRUE(it->get_oid() == string("foo"));
7617 ++it;
7618 ASSERT_TRUE(it == ioctx.nobjects_end());
7619 }
7620
7621 // evict it
7622 {
7623 ObjectReadOperation op;
7624 op.cache_evict();
7625 librados::AioCompletion *completion = cluster.aio_create_completion();
7626 ASSERT_EQ(0, cache_ioctx.aio_operate(
7627 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
7628 completion->wait_for_complete();
7629 ASSERT_EQ(0, completion->get_return_value());
7630 completion->release();
7631 }
7632
7633 // verify no longer in cache tier
7634 {
7635 NObjectIterator it = cache_ioctx.nobjects_begin();
7636 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
7637 }
7638
7639 // read it again and verify the version is consistent
7640 {
7641 bufferlist bl;
7642 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
7643 ASSERT_EQ(user_version, cache_ioctx.get_last_version());
7644 }
7645
7646 // erase it
7647 {
7648 ObjectWriteOperation op;
7649 op.remove();
7650 ASSERT_EQ(0, ioctx.operate("foo", &op));
7651 }
7652
7653 // flush whiteout
7654 {
7655 ObjectReadOperation op;
7656 op.cache_flush();
7657 librados::AioCompletion *completion = cluster.aio_create_completion();
7658 ASSERT_EQ(0, cache_ioctx.aio_operate(
7659 "foo", completion, &op,
7660 librados::OPERATION_IGNORE_OVERLAY, NULL));
7661 completion->wait_for_complete();
7662 ASSERT_EQ(0, completion->get_return_value());
7663 completion->release();
7664 }
7665
7666 // evict
7667 {
7668 ObjectReadOperation op;
7669 op.cache_evict();
7670 librados::AioCompletion *completion = cluster.aio_create_completion();
7671 ASSERT_EQ(0, cache_ioctx.aio_operate(
7672 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
7673 completion->wait_for_complete();
7674 ASSERT_EQ(0, completion->get_return_value());
7675 completion->release();
7676 }
7677
7678 // verify no longer in cache tier
7679 {
7680 NObjectIterator it = cache_ioctx.nobjects_begin();
7681 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
7682 }
7683 // or base tier
7684 {
7685 NObjectIterator it = ioctx.nobjects_begin();
7686 ASSERT_TRUE(it == ioctx.nobjects_end());
7687 }
7688}
7689
7690TEST_F(LibRadosTwoPoolsECPP, FlushSnap) {
7691 SKIP_IF_CRIMSON();
7692 // configure cache
7693 bufferlist inbl;
7694 ASSERT_EQ(0, cluster.mon_command(
7695 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
7696 "\", \"tierpool\": \"" + cache_pool_name +
7697 "\", \"force_nonempty\": \"--force-nonempty\" }",
7698 inbl, NULL, NULL));
7699 ASSERT_EQ(0, cluster.mon_command(
7700 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
7701 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
7702 inbl, NULL, NULL));
7703 ASSERT_EQ(0, cluster.mon_command(
7704 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
7705 "\", \"mode\": \"writeback\"}",
7706 inbl, NULL, NULL));
7707
7708 // wait for maps to settle
7709 cluster.wait_for_latest_osdmap();
7710
7711 // create object
7712 {
7713 bufferlist bl;
7714 bl.append("a");
7715 ObjectWriteOperation op;
7716 op.write_full(bl);
7717 ASSERT_EQ(0, ioctx.operate("foo", &op));
7718 }
7719
7720 // create a snapshot, clone
7721 vector<uint64_t> my_snaps(1);
7722 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
7723 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
7724 my_snaps));
7725 {
7726 bufferlist bl;
7727 bl.append("b");
7728 ObjectWriteOperation op;
7729 op.write_full(bl);
7730 ASSERT_EQ(0, ioctx.operate("foo", &op));
7731 }
7732
7733 // and another
7734 my_snaps.resize(2);
7735 my_snaps[1] = my_snaps[0];
7736 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
7737 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
7738 my_snaps));
7739 {
7740 bufferlist bl;
7741 bl.append("c");
7742 ObjectWriteOperation op;
7743 op.write_full(bl);
7744 ASSERT_EQ(0, ioctx.operate("foo", &op));
7745 }
7746
7747 // verify the object is present in the cache tier
7748 {
7749 NObjectIterator it = cache_ioctx.nobjects_begin();
7750 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
7751 ASSERT_TRUE(it->get_oid() == string("foo"));
7752 ++it;
7753 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
7754 }
7755
7756 // verify the object is NOT present in the base tier
7757 {
7758 NObjectIterator it = ioctx.nobjects_begin();
7759 ASSERT_TRUE(it == ioctx.nobjects_end());
7760 }
7761
7762 // flush on head (should fail)
7763 ioctx.snap_set_read(librados::SNAP_HEAD);
7764 {
7765 ObjectReadOperation op;
7766 op.cache_flush();
7767 librados::AioCompletion *completion = cluster.aio_create_completion();
7768 ASSERT_EQ(0, ioctx.aio_operate(
7769 "foo", completion, &op,
7770 librados::OPERATION_IGNORE_CACHE, NULL));
7771 completion->wait_for_complete();
7772 ASSERT_EQ(-EBUSY, completion->get_return_value());
7773 completion->release();
7774 }
7775 // flush on recent snap (should fail)
7776 ioctx.snap_set_read(my_snaps[0]);
7777 {
7778 ObjectReadOperation op;
7779 op.cache_flush();
7780 librados::AioCompletion *completion = cluster.aio_create_completion();
7781 ASSERT_EQ(0, ioctx.aio_operate(
7782 "foo", completion, &op,
7783 librados::OPERATION_IGNORE_CACHE, NULL));
7784 completion->wait_for_complete();
7785 ASSERT_EQ(-EBUSY, completion->get_return_value());
7786 completion->release();
7787 }
7788 // flush on oldest snap
7789 ioctx.snap_set_read(my_snaps[1]);
7790 {
7791 ObjectReadOperation op;
7792 op.cache_flush();
7793 librados::AioCompletion *completion = cluster.aio_create_completion();
7794 ASSERT_EQ(0, ioctx.aio_operate(
7795 "foo", completion, &op,
7796 librados::OPERATION_IGNORE_CACHE, NULL));
7797 completion->wait_for_complete();
7798 ASSERT_EQ(0, completion->get_return_value());
7799 completion->release();
7800 }
7801 // flush on next oldest snap
7802 ioctx.snap_set_read(my_snaps[0]);
7803 {
7804 ObjectReadOperation op;
7805 op.cache_flush();
7806 librados::AioCompletion *completion = cluster.aio_create_completion();
7807 ASSERT_EQ(0, ioctx.aio_operate(
7808 "foo", completion, &op,
7809 librados::OPERATION_IGNORE_CACHE, NULL));
7810 completion->wait_for_complete();
7811 ASSERT_EQ(0, completion->get_return_value());
7812 completion->release();
7813 }
7814 // flush on head
7815 ioctx.snap_set_read(librados::SNAP_HEAD);
7816 {
7817 ObjectReadOperation op;
7818 op.cache_flush();
7819 librados::AioCompletion *completion = cluster.aio_create_completion();
7820 ASSERT_EQ(0, ioctx.aio_operate(
7821 "foo", completion, &op,
7822 librados::OPERATION_IGNORE_CACHE, NULL));
7823 completion->wait_for_complete();
7824 ASSERT_EQ(0, completion->get_return_value());
7825 completion->release();
7826 }
7827
7828 // verify i can read the snaps from the cache pool
7829 ioctx.snap_set_read(librados::SNAP_HEAD);
7830 {
7831 bufferlist bl;
7832 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
7833 ASSERT_EQ('c', bl[0]);
7834 }
7835 ioctx.snap_set_read(my_snaps[0]);
7836 {
7837 bufferlist bl;
7838 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
7839 ASSERT_EQ('b', bl[0]);
7840 }
7841 ioctx.snap_set_read(my_snaps[1]);
7842 {
7843 bufferlist bl;
7844 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
7845 ASSERT_EQ('a', bl[0]);
7846 }
7847
7848 // tear down tiers
7849 ASSERT_EQ(0, cluster.mon_command(
7850 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
7851 "\"}",
7852 inbl, NULL, NULL));
7853
7854 // wait for maps to settle
7855 cluster.wait_for_latest_osdmap();
7856
7857 // verify i can read the snaps from the base pool
7858 ioctx.snap_set_read(librados::SNAP_HEAD);
7859 {
7860 bufferlist bl;
7861 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
7862 ASSERT_EQ('c', bl[0]);
7863 }
7864 ioctx.snap_set_read(my_snaps[0]);
7865 {
7866 bufferlist bl;
7867 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
7868 ASSERT_EQ('b', bl[0]);
7869 }
7870 ioctx.snap_set_read(my_snaps[1]);
7871 {
7872 bufferlist bl;
7873 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
7874 ASSERT_EQ('a', bl[0]);
7875 }
7876
7877 ASSERT_EQ(0, cluster.mon_command(
7878 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
7879 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
7880 inbl, NULL, NULL));
7881 cluster.wait_for_latest_osdmap();
7882
7883 // cleanup
7884 ioctx.selfmanaged_snap_remove(my_snaps[0]);
7885}
7886
7887TEST_F(LibRadosTierECPP, FlushWriteRaces) {
7888 SKIP_IF_CRIMSON();
7889 Rados cluster;
7890 std::string pool_name = get_temp_pool_name();
7891 std::string cache_pool_name = pool_name + "-cache";
7892 ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
7893 ASSERT_EQ(0, cluster.pool_create(cache_pool_name.c_str()));
7894 IoCtx cache_ioctx;
7895 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
7896 cache_ioctx.application_enable("rados", true);
7897 IoCtx ioctx;
7898 ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), ioctx));
7899
7900 // configure cache
7901 bufferlist inbl;
7902 ASSERT_EQ(0, cluster.mon_command(
7903 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
7904 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
7905 inbl, NULL, NULL));
7906 ASSERT_EQ(0, cluster.mon_command(
7907 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
7908 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
7909 inbl, NULL, NULL));
7910 ASSERT_EQ(0, cluster.mon_command(
7911 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
7912 "\", \"mode\": \"writeback\"}",
7913 inbl, NULL, NULL));
7914
7915 // wait for maps to settle
7916 cluster.wait_for_latest_osdmap();
7917
7918 // create/dirty object
7919 bufferlist bl;
7920 bl.append("hi there");
7921 {
7922 ObjectWriteOperation op;
7923 op.write_full(bl);
7924 ASSERT_EQ(0, ioctx.operate("foo", &op));
7925 }
7926
7927 // flush + write
7928 {
7929 ObjectReadOperation op;
7930 op.cache_flush();
7931 librados::AioCompletion *completion = cluster.aio_create_completion();
7932 ASSERT_EQ(0, cache_ioctx.aio_operate(
7933 "foo", completion, &op,
7934 librados::OPERATION_IGNORE_OVERLAY, NULL));
7935
7936 ObjectWriteOperation op2;
7937 op2.write_full(bl);
7938 librados::AioCompletion *completion2 = cluster.aio_create_completion();
7939 ASSERT_EQ(0, ioctx.aio_operate(
7940 "foo", completion2, &op2, 0));
7941
7942 completion->wait_for_complete();
7943 completion2->wait_for_complete();
7944 ASSERT_EQ(0, completion->get_return_value());
7945 ASSERT_EQ(0, completion2->get_return_value());
7946 completion->release();
7947 completion2->release();
7948 }
7949
7950 int tries = 1000;
7951 do {
7952 // create/dirty object
7953 {
7954 bufferlist bl;
7955 bl.append("hi there");
7956 ObjectWriteOperation op;
7957 op.write_full(bl);
7958 ASSERT_EQ(0, ioctx.operate("foo", &op));
7959 }
7960
7961 // try-flush + write
7962 {
7963 ObjectReadOperation op;
7964 op.cache_try_flush();
7965 librados::AioCompletion *completion = cluster.aio_create_completion();
7966 ASSERT_EQ(0, cache_ioctx.aio_operate(
7967 "foo", completion, &op,
7968 librados::OPERATION_IGNORE_OVERLAY |
7969 librados::OPERATION_SKIPRWLOCKS, NULL));
7970
7971 ObjectWriteOperation op2;
7972 op2.write_full(bl);
7973 librados::AioCompletion *completion2 = cluster.aio_create_completion();
7974 ASSERT_EQ(0, ioctx.aio_operate("foo", completion2, &op2, 0));
7975
7976 completion->wait_for_complete();
7977 completion2->wait_for_complete();
7978 int r = completion->get_return_value();
7979 ASSERT_TRUE(r == -EBUSY || r == 0);
7980 ASSERT_EQ(0, completion2->get_return_value());
7981 completion->release();
7982 completion2->release();
7983 if (r == -EBUSY)
7984 break;
7985 cout << "didn't get EBUSY, trying again" << std::endl;
7986 }
7987 ASSERT_TRUE(--tries);
7988 } while (true);
7989
7990 // tear down tiers
7991 ASSERT_EQ(0, cluster.mon_command(
7992 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
7993 "\"}",
7994 inbl, NULL, NULL));
7995 ASSERT_EQ(0, cluster.mon_command(
7996 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
7997 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
7998 inbl, NULL, NULL));
7999
8000 // wait for maps to settle before next test
8001 cluster.wait_for_latest_osdmap();
8002
8003 ASSERT_EQ(0, cluster.pool_delete(cache_pool_name.c_str()));
8004 ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
8005}
8006
8007TEST_F(LibRadosTwoPoolsECPP, FlushTryFlushRaces) {
8008 SKIP_IF_CRIMSON();
8009 // configure cache
8010 bufferlist inbl;
8011 ASSERT_EQ(0, cluster.mon_command(
8012 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
8013 "\", \"tierpool\": \"" + cache_pool_name +
8014 "\", \"force_nonempty\": \"--force-nonempty\" }",
8015 inbl, NULL, NULL));
8016 ASSERT_EQ(0, cluster.mon_command(
8017 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
8018 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
8019 inbl, NULL, NULL));
8020 ASSERT_EQ(0, cluster.mon_command(
8021 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
8022 "\", \"mode\": \"writeback\"}",
8023 inbl, NULL, NULL));
8024
8025 // wait for maps to settle
8026 cluster.wait_for_latest_osdmap();
8027
8028 // create/dirty object
8029 {
8030 bufferlist bl;
8031 bl.append("hi there");
8032 ObjectWriteOperation op;
8033 op.write_full(bl);
8034 ASSERT_EQ(0, ioctx.operate("foo", &op));
8035 }
8036
8037 // flush + flush
8038 {
8039 ObjectReadOperation op;
8040 op.cache_flush();
8041 librados::AioCompletion *completion = cluster.aio_create_completion();
8042 ASSERT_EQ(0, cache_ioctx.aio_operate(
8043 "foo", completion, &op,
8044 librados::OPERATION_IGNORE_OVERLAY, NULL));
8045
8046 ObjectReadOperation op2;
8047 op2.cache_flush();
8048 librados::AioCompletion *completion2 = cluster.aio_create_completion();
8049 ASSERT_EQ(0, cache_ioctx.aio_operate(
8050 "foo", completion2, &op2,
8051 librados::OPERATION_IGNORE_OVERLAY, NULL));
8052
8053 completion->wait_for_complete();
8054 completion2->wait_for_complete();
8055 ASSERT_EQ(0, completion->get_return_value());
8056 ASSERT_EQ(0, completion2->get_return_value());
8057 completion->release();
8058 completion2->release();
8059 }
8060
8061 // create/dirty object
8062 {
8063 bufferlist bl;
8064 bl.append("hi there");
8065 ObjectWriteOperation op;
8066 op.write_full(bl);
8067 ASSERT_EQ(0, ioctx.operate("foo", &op));
8068 }
8069
8070 // flush + try-flush
8071 {
8072 ObjectReadOperation op;
8073 op.cache_flush();
8074 librados::AioCompletion *completion = cluster.aio_create_completion();
8075 ASSERT_EQ(0, cache_ioctx.aio_operate(
8076 "foo", completion, &op,
8077 librados::OPERATION_IGNORE_OVERLAY, NULL));
8078
8079 ObjectReadOperation op2;
8080 op2.cache_try_flush();
8081 librados::AioCompletion *completion2 = cluster.aio_create_completion();
8082 ASSERT_EQ(0, cache_ioctx.aio_operate(
8083 "foo", completion2, &op2,
8084 librados::OPERATION_IGNORE_OVERLAY |
8085 librados::OPERATION_SKIPRWLOCKS, NULL));
8086
8087 completion->wait_for_complete();
8088 completion2->wait_for_complete();
8089 ASSERT_EQ(0, completion->get_return_value());
8090 ASSERT_EQ(0, completion2->get_return_value());
8091 completion->release();
8092 completion2->release();
8093 }
8094
8095 // create/dirty object
8096 int tries = 1000;
8097 do {
8098 {
8099 bufferlist bl;
8100 bl.append("hi there");
8101 ObjectWriteOperation op;
8102 op.write_full(bl);
8103 ASSERT_EQ(0, ioctx.operate("foo", &op));
8104 }
8105
8106 // try-flush + flush
8107 // (flush will not piggyback on try-flush)
8108 {
8109 ObjectReadOperation op;
8110 op.cache_try_flush();
8111 librados::AioCompletion *completion = cluster.aio_create_completion();
8112 ASSERT_EQ(0, cache_ioctx.aio_operate(
8113 "foo", completion, &op,
8114 librados::OPERATION_IGNORE_OVERLAY |
8115 librados::OPERATION_SKIPRWLOCKS, NULL));
8116
8117 ObjectReadOperation op2;
8118 op2.cache_flush();
8119 librados::AioCompletion *completion2 = cluster.aio_create_completion();
8120 ASSERT_EQ(0, cache_ioctx.aio_operate(
8121 "foo", completion2, &op2,
8122 librados::OPERATION_IGNORE_OVERLAY, NULL));
8123
8124 completion->wait_for_complete();
8125 completion2->wait_for_complete();
8126 int r = completion->get_return_value();
8127 ASSERT_TRUE(r == -EBUSY || r == 0);
8128 ASSERT_EQ(0, completion2->get_return_value());
8129 completion->release();
8130 completion2->release();
8131 if (r == -EBUSY)
8132 break;
8133 cout << "didn't get EBUSY, trying again" << std::endl;
8134 }
8135 ASSERT_TRUE(--tries);
8136 } while (true);
8137
8138 // create/dirty object
8139 {
8140 bufferlist bl;
8141 bl.append("hi there");
8142 ObjectWriteOperation op;
8143 op.write_full(bl);
8144 ASSERT_EQ(0, ioctx.operate("foo", &op));
8145 }
8146
8147 // try-flush + try-flush
8148 {
8149 ObjectReadOperation op;
8150 op.cache_try_flush();
8151 librados::AioCompletion *completion = cluster.aio_create_completion();
8152 ASSERT_EQ(0, cache_ioctx.aio_operate(
8153 "foo", completion, &op,
8154 librados::OPERATION_IGNORE_OVERLAY |
8155 librados::OPERATION_SKIPRWLOCKS, NULL));
8156
8157 ObjectReadOperation op2;
8158 op2.cache_try_flush();
8159 librados::AioCompletion *completion2 = cluster.aio_create_completion();
8160 ASSERT_EQ(0, cache_ioctx.aio_operate(
8161 "foo", completion2, &op2,
8162 librados::OPERATION_IGNORE_OVERLAY |
8163 librados::OPERATION_SKIPRWLOCKS, NULL));
8164
8165 completion->wait_for_complete();
8166 completion2->wait_for_complete();
8167 ASSERT_EQ(0, completion->get_return_value());
8168 ASSERT_EQ(0, completion2->get_return_value());
8169 completion->release();
8170 completion2->release();
8171 }
8172}
8173
8174TEST_F(LibRadosTwoPoolsECPP, TryFlushReadRace) {
8175 SKIP_IF_CRIMSON();
8176 // configure cache
8177 bufferlist inbl;
8178 ASSERT_EQ(0, cluster.mon_command(
8179 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
8180 "\", \"tierpool\": \"" + cache_pool_name +
8181 "\", \"force_nonempty\": \"--force-nonempty\" }",
8182 inbl, NULL, NULL));
8183 ASSERT_EQ(0, cluster.mon_command(
8184 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
8185 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
8186 inbl, NULL, NULL));
8187 ASSERT_EQ(0, cluster.mon_command(
8188 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
8189 "\", \"mode\": \"writeback\"}",
8190 inbl, NULL, NULL));
8191
8192 // wait for maps to settle
8193 cluster.wait_for_latest_osdmap();
8194
8195 // create/dirty object
8196 {
8197 bufferlist bl;
8198 bl.append("hi there");
8199 bufferptr bp(4000000); // make it big!
8200 bp.zero();
8201 bl.append(bp);
8202 ObjectWriteOperation op;
8203 op.write_full(bl);
8204 ASSERT_EQ(0, ioctx.operate("foo", &op));
8205 }
8206
8207 // start a continuous stream of reads
8208 read_ioctx = &ioctx;
8209 test_lock.lock();
8210 for (int i = 0; i < max_reads; ++i) {
8211 start_flush_read();
8212 num_reads++;
8213 }
8214 test_lock.unlock();
8215
8216 // try-flush
8217 ObjectReadOperation op;
8218 op.cache_try_flush();
8219 librados::AioCompletion *completion = cluster.aio_create_completion();
8220 ASSERT_EQ(0, cache_ioctx.aio_operate(
8221 "foo", completion, &op,
8222 librados::OPERATION_IGNORE_OVERLAY |
8223 librados::OPERATION_SKIPRWLOCKS, NULL));
8224
8225 completion->wait_for_complete();
8226 ASSERT_EQ(0, completion->get_return_value());
8227 completion->release();
8228
8229 // stop reads
8230 std::unique_lock locker{test_lock};
8231 max_reads = 0;
8232 cond.wait(locker, [] { return num_reads == 0;});
8233}
8234
8235TEST_F(LibRadosTierECPP, CallForcesPromote) {
8236 SKIP_IF_CRIMSON();
8237 Rados cluster;
8238 std::string pool_name = get_temp_pool_name();
8239 std::string cache_pool_name = pool_name + "-cache";
8240 ASSERT_EQ("", create_one_ec_pool_pp(pool_name, cluster));
8241 ASSERT_EQ(0, cluster.pool_create(cache_pool_name.c_str()));
8242 IoCtx cache_ioctx;
8243 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
8244 cache_ioctx.application_enable("rados", true);
8245 IoCtx ioctx;
8246 ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), ioctx));
8247
8248 // configure cache
8249 bufferlist inbl;
8250 ASSERT_EQ(0, cluster.mon_command(
8251 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
8252 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
8253 inbl, NULL, NULL));
8254 ASSERT_EQ(0, cluster.mon_command(
8255 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
8256 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
8257 inbl, NULL, NULL));
8258 ASSERT_EQ(0, cluster.mon_command(
8259 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
8260 "\", \"mode\": \"writeback\"}",
8261 inbl, NULL, NULL));
8262
8263 // set things up such that the op would normally be proxied
8264 ASSERT_EQ(0, cluster.mon_command(
8265 set_pool_str(cache_pool_name, "hit_set_count", 2),
8266 inbl, NULL, NULL));
8267 ASSERT_EQ(0, cluster.mon_command(
8268 set_pool_str(cache_pool_name, "hit_set_period", 600),
8269 inbl, NULL, NULL));
8270 ASSERT_EQ(0, cluster.mon_command(
8271 set_pool_str(cache_pool_name, "hit_set_type",
8272 "explicit_object"),
8273 inbl, NULL, NULL));
8274 ASSERT_EQ(0, cluster.mon_command(
8275 set_pool_str(cache_pool_name, "min_read_recency_for_promote",
8276 "4"),
8277 inbl, NULL, NULL));
8278
8279 // wait for maps to settle
8280 cluster.wait_for_latest_osdmap();
8281
8282 // create/dirty object
8283 bufferlist bl;
8284 bl.append("hi there");
8285 {
8286 ObjectWriteOperation op;
8287 op.write_full(bl);
8288 ASSERT_EQ(0, ioctx.operate("foo", &op));
8289 }
8290
8291 // flush
8292 {
8293 ObjectReadOperation op;
8294 op.cache_flush();
8295 librados::AioCompletion *completion = cluster.aio_create_completion();
8296 ASSERT_EQ(0, cache_ioctx.aio_operate(
8297 "foo", completion, &op,
8298 librados::OPERATION_IGNORE_OVERLAY, NULL));
8299 completion->wait_for_complete();
8300 ASSERT_EQ(0, completion->get_return_value());
8301 completion->release();
8302 }
8303
8304 // evict
8305 {
8306 ObjectReadOperation op;
8307 op.cache_evict();
8308 librados::AioCompletion *completion = cluster.aio_create_completion();
8309 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
8310 librados::OPERATION_IGNORE_CACHE,
8311 NULL));
8312 completion->wait_for_complete();
8313 ASSERT_EQ(0, completion->get_return_value());
8314 completion->release();
8315 }
8316
8317 // call
8318 {
8319 ObjectReadOperation op;
8320 bufferlist bl;
8321 op.exec("rbd", "get_id", bl);
8322 bufferlist out;
8323 // should get EIO (not an rbd object), not -EOPNOTSUPP (we didn't promote)
8324 ASSERT_EQ(-5, ioctx.operate("foo", &op, &out));
8325 }
8326
8327 // make sure foo is back in the cache tier
8328 {
8329 NObjectIterator it = cache_ioctx.nobjects_begin();
8330 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
8331 ASSERT_TRUE(it->get_oid() == string("foo"));
8332 ++it;
8333 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
8334 }
8335
8336 // tear down tiers
8337 ASSERT_EQ(0, cluster.mon_command(
8338 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
8339 "\"}",
8340 inbl, NULL, NULL));
8341 ASSERT_EQ(0, cluster.mon_command(
8342 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
8343 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
8344 inbl, NULL, NULL));
8345
8346 // wait for maps to settle before next test
8347 cluster.wait_for_latest_osdmap();
8348
8349 ASSERT_EQ(0, cluster.pool_delete(cache_pool_name.c_str()));
8350 ASSERT_EQ(0, destroy_one_ec_pool_pp(pool_name, cluster));
8351}
8352
8353TEST_F(LibRadosTierECPP, HitSetNone) {
8354 SKIP_IF_CRIMSON();
8355 {
8356 list< pair<time_t,time_t> > ls;
8357 AioCompletion *c = librados::Rados::aio_create_completion();
8358 ASSERT_EQ(0, ioctx.hit_set_list(123, c, &ls));
8359 c->wait_for_complete();
8360 ASSERT_EQ(0, c->get_return_value());
8361 ASSERT_TRUE(ls.empty());
8362 c->release();
8363 }
8364 {
8365 bufferlist bl;
8366 AioCompletion *c = librados::Rados::aio_create_completion();
8367 ASSERT_EQ(0, ioctx.hit_set_get(123, c, 12345, &bl));
8368 c->wait_for_complete();
8369 ASSERT_EQ(-ENOENT, c->get_return_value());
8370 c->release();
8371 }
8372}
8373
8374TEST_F(LibRadosTwoPoolsECPP, HitSetRead) {
8375 SKIP_IF_CRIMSON();
8376 // make it a tier
8377 bufferlist inbl;
8378 ASSERT_EQ(0, cluster.mon_command(
8379 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
8380 "\", \"tierpool\": \"" + cache_pool_name +
8381 "\", \"force_nonempty\": \"--force-nonempty\" }",
8382 inbl, NULL, NULL));
8383
8384 // enable hitset tracking for this pool
8385 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_count", 2),
8386 inbl, NULL, NULL));
8387 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_period", 600),
8388 inbl, NULL, NULL));
8389 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_type",
8390 "explicit_object"),
8391 inbl, NULL, NULL));
8392
8393 // wait for maps to settle
8394 cluster.wait_for_latest_osdmap();
8395
8396 cache_ioctx.set_namespace("");
8397
8398 // keep reading until we see our object appear in the HitSet
8399 utime_t start = ceph_clock_now();
8400 utime_t hard_stop = start + utime_t(600, 0);
8401
8402 while (true) {
8403 utime_t now = ceph_clock_now();
8404 ASSERT_TRUE(now < hard_stop);
8405
8406 string name = "foo";
8407 uint32_t hash;
8408 ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(name, &hash));
8409 hobject_t oid(sobject_t(name, CEPH_NOSNAP), "", hash,
8410 cluster.pool_lookup(cache_pool_name.c_str()), "");
8411
8412 bufferlist bl;
8413 ASSERT_EQ(-ENOENT, cache_ioctx.read("foo", bl, 1, 0));
8414
8415 bufferlist hbl;
8416 AioCompletion *c = librados::Rados::aio_create_completion();
8417 ASSERT_EQ(0, cache_ioctx.hit_set_get(hash, c, now.sec(), &hbl));
8418 c->wait_for_complete();
8419 c->release();
8420
8421 if (hbl.length()) {
8422 auto p = hbl.cbegin();
8423 HitSet hs;
8424 decode(hs, p);
8425 if (hs.contains(oid)) {
8426 cout << "ok, hit_set contains " << oid << std::endl;
8427 break;
8428 }
8429 cout << "hmm, not in HitSet yet" << std::endl;
8430 } else {
8431 cout << "hmm, no HitSet yet" << std::endl;
8432 }
8433
8434 sleep(1);
8435 }
8436}
8437
8438// disable this test until hitset-get reliably works on EC pools
8439#if 0
8440TEST_F(LibRadosTierECPP, HitSetWrite) {
8441 int num_pg = _get_pg_num(cluster, pool_name);
8442 ceph_assert(num_pg > 0);
8443
8444 // enable hitset tracking for this pool
8445 bufferlist inbl;
8446 ASSERT_EQ(0, cluster.mon_command(set_pool_str(pool_name, "hit_set_count", 8),
8447 inbl, NULL, NULL));
8448 ASSERT_EQ(0, cluster.mon_command(set_pool_str(pool_name, "hit_set_period", 600),
8449 inbl, NULL, NULL));
8450 ASSERT_EQ(0, cluster.mon_command(set_pool_str(pool_name, "hit_set_type",
8451 "explicit_hash"),
8452 inbl, NULL, NULL));
8453
8454 // wait for maps to settle
8455 cluster.wait_for_latest_osdmap();
8456
8457 ioctx.set_namespace("");
8458
8459 // do a bunch of writes
8460 for (int i=0; i<1000; ++i) {
8461 bufferlist bl;
8462 bl.append("a");
8463 ASSERT_EQ(0, ioctx.write(stringify(i), bl, 1, 0));
8464 }
8465
8466 // get HitSets
8467 std::map<int,HitSet> hitsets;
8468 for (int i=0; i<num_pg; ++i) {
8469 list< pair<time_t,time_t> > ls;
8470 AioCompletion *c = librados::Rados::aio_create_completion();
8471 ASSERT_EQ(0, ioctx.hit_set_list(i, c, &ls));
8472 c->wait_for_complete();
8473 c->release();
8474 std::cout << "pg " << i << " ls " << ls << std::endl;
8475 ASSERT_FALSE(ls.empty());
8476
8477 // get the latest
8478 c = librados::Rados::aio_create_completion();
8479 bufferlist bl;
8480 ASSERT_EQ(0, ioctx.hit_set_get(i, c, ls.back().first, &bl));
8481 c->wait_for_complete();
8482 c->release();
8483
8484 //std::cout << "bl len is " << bl.length() << "\n";
8485 //bl.hexdump(std::cout);
8486 //std::cout << std::endl;
8487
8488 auto p = bl.cbegin();
8489 decode(hitsets[i], p);
8490
8491 // cope with racing splits by refreshing pg_num
8492 if (i == num_pg - 1)
8493 num_pg = _get_pg_num(cluster, pool_name);
8494 }
8495
8496 for (int i=0; i<1000; ++i) {
8497 string n = stringify(i);
8498 uint32_t hash = ioctx.get_object_hash_position(n);
8499 hobject_t oid(sobject_t(n, CEPH_NOSNAP), "", hash,
8500 cluster.pool_lookup(pool_name.c_str()), "");
8501 std::cout << "checking for " << oid << std::endl;
8502 bool found = false;
8503 for (int p=0; p<num_pg; ++p) {
8504 if (hitsets[p].contains(oid)) {
8505 found = true;
8506 break;
8507 }
8508 }
8509 ASSERT_TRUE(found);
8510 }
8511}
8512#endif
8513
8514TEST_F(LibRadosTwoPoolsECPP, HitSetTrim) {
8515 SKIP_IF_CRIMSON();
8516 unsigned count = 3;
8517 unsigned period = 3;
8518
8519 // make it a tier
8520 bufferlist inbl;
8521 ASSERT_EQ(0, cluster.mon_command(
8522 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
8523 "\", \"tierpool\": \"" + cache_pool_name +
8524 "\", \"force_nonempty\": \"--force-nonempty\" }",
8525 inbl, NULL, NULL));
8526
8527 // enable hitset tracking for this pool
8528 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_count", count),
8529 inbl, NULL, NULL));
8530 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_period", period),
8531 inbl, NULL, NULL));
8532 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
8533 inbl, NULL, NULL));
8534 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_fpp", ".01"),
8535 inbl, NULL, NULL));
8536
8537 // wait for maps to settle
8538 cluster.wait_for_latest_osdmap();
8539
8540 cache_ioctx.set_namespace("");
8541
8542 // do a bunch of writes and make sure the hitsets rotate
8543 utime_t start = ceph_clock_now();
8544 utime_t hard_stop = start + utime_t(count * period * 50, 0);
8545
8546 time_t first = 0;
8547 int bsize = alignment;
8548 char *buf = (char *)new char[bsize];
8549 memset(buf, 'f', bsize);
8550
8551 while (true) {
8552 string name = "foo";
8553 uint32_t hash;
8554 ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(name, &hash));
8555 hobject_t oid(sobject_t(name, CEPH_NOSNAP), "", hash, -1, "");
8556
8557 bufferlist bl;
8558 bl.append(buf, bsize);
8559 ASSERT_EQ(0, cache_ioctx.append("foo", bl, bsize));
8560
8561 list<pair<time_t, time_t> > ls;
8562 AioCompletion *c = librados::Rados::aio_create_completion();
8563 ASSERT_EQ(0, cache_ioctx.hit_set_list(hash, c, &ls));
8564 c->wait_for_complete();
8565 c->release();
8566
8567 cout << " got ls " << ls << std::endl;
8568 if (!ls.empty()) {
8569 if (!first) {
8570 first = ls.front().first;
8571 cout << "first is " << first << std::endl;
8572 } else {
8573 if (ls.front().first != first) {
8574 cout << "first now " << ls.front().first << ", trimmed" << std::endl;
8575 break;
8576 }
8577 }
8578 }
8579
8580 utime_t now = ceph_clock_now();
8581 ASSERT_TRUE(now < hard_stop);
8582
8583 sleep(1);
8584 }
8585 delete[] buf;
8586}
8587
8588TEST_F(LibRadosTwoPoolsECPP, PromoteOn2ndRead) {
8589 SKIP_IF_CRIMSON();
8590 // create object
8591 for (int i=0; i<20; ++i) {
8592 bufferlist bl;
8593 bl.append("hi there");
8594 ObjectWriteOperation op;
8595 op.write_full(bl);
8596 ASSERT_EQ(0, ioctx.operate("foo" + stringify(i), &op));
8597 }
8598
8599 // configure cache
8600 bufferlist inbl;
8601 ASSERT_EQ(0, cluster.mon_command(
8602 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
8603 "\", \"tierpool\": \"" + cache_pool_name +
8604 "\", \"force_nonempty\": \"--force-nonempty\" }",
8605 inbl, NULL, NULL));
8606 ASSERT_EQ(0, cluster.mon_command(
8607 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
8608 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
8609 inbl, NULL, NULL));
8610 ASSERT_EQ(0, cluster.mon_command(
8611 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
8612 "\", \"mode\": \"writeback\"}",
8613 inbl, NULL, NULL));
8614
8615 // enable hitset tracking for this pool
8616 ASSERT_EQ(0, cluster.mon_command(
8617 set_pool_str(cache_pool_name, "hit_set_count", 2),
8618 inbl, NULL, NULL));
8619 ASSERT_EQ(0, cluster.mon_command(
8620 set_pool_str(cache_pool_name, "hit_set_period", 600),
8621 inbl, NULL, NULL));
8622 ASSERT_EQ(0, cluster.mon_command(
8623 set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
8624 inbl, NULL, NULL));
8625 ASSERT_EQ(0, cluster.mon_command(
8626 set_pool_str(cache_pool_name, "min_read_recency_for_promote", 1),
8627 inbl, NULL, NULL));
8628 ASSERT_EQ(0, cluster.mon_command(
8629 set_pool_str(cache_pool_name, "hit_set_grade_decay_rate", 20),
8630 inbl, NULL, NULL));
8631 ASSERT_EQ(0, cluster.mon_command(
8632 set_pool_str(cache_pool_name, "hit_set_search_last_n", 1),
8633 inbl, NULL, NULL));
8634
8635 // wait for maps to settle
8636 cluster.wait_for_latest_osdmap();
8637
8638 int fake = 0; // set this to non-zero to test spurious promotion,
8639 // e.g. from thrashing
8640 int attempt = 0;
8641 string obj;
8642 while (true) {
8643 // 1st read, don't trigger a promote
8644 obj = "foo" + stringify(attempt);
8645 cout << obj << std::endl;
8646 {
8647 bufferlist bl;
8648 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
8649 if (--fake >= 0) {
8650 sleep(1);
8651 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
8652 sleep(1);
8653 }
8654 }
8655
8656 // verify the object is NOT present in the cache tier
8657 {
8658 bool found = false;
8659 NObjectIterator it = cache_ioctx.nobjects_begin();
8660 while (it != cache_ioctx.nobjects_end()) {
8661 cout << " see " << it->get_oid() << std::endl;
8662 if (it->get_oid() == string(obj.c_str())) {
8663 found = true;
8664 break;
8665 }
8666 ++it;
8667 }
8668 if (!found)
8669 break;
8670 }
8671
8672 ++attempt;
8673 ASSERT_LE(attempt, 20);
8674 cout << "hrm, object is present in cache on attempt " << attempt
8675 << ", retrying" << std::endl;
8676 }
8677
8678 // Read until the object is present in the cache tier
8679 cout << "verifying " << obj << " is eventually promoted" << std::endl;
8680 while (true) {
8681 bufferlist bl;
8682 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
8683
8684 bool there = false;
8685 NObjectIterator it = cache_ioctx.nobjects_begin();
8686 while (it != cache_ioctx.nobjects_end()) {
8687 if (it->get_oid() == string(obj.c_str())) {
8688 there = true;
8689 break;
8690 }
8691 ++it;
8692 }
8693 if (there)
8694 break;
8695
8696 sleep(1);
8697 }
8698
8699 // tear down tiers
8700 ASSERT_EQ(0, cluster.mon_command(
8701 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
8702 "\"}",
8703 inbl, NULL, NULL));
8704 ASSERT_EQ(0, cluster.mon_command(
8705 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
8706 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
8707 inbl, NULL, NULL));
8708
8709 // wait for maps to settle before next test
8710 cluster.wait_for_latest_osdmap();
8711}
8712
8713TEST_F(LibRadosTwoPoolsECPP, ProxyRead) {
8714 SKIP_IF_CRIMSON();
8715 // create object
8716 {
8717 bufferlist bl;
8718 bl.append("hi there");
8719 ObjectWriteOperation op;
8720 op.write_full(bl);
8721 ASSERT_EQ(0, ioctx.operate("foo", &op));
8722 }
8723
8724 // configure cache
8725 bufferlist inbl;
8726 ASSERT_EQ(0, cluster.mon_command(
8727 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
8728 "\", \"tierpool\": \"" + cache_pool_name +
8729 "\", \"force_nonempty\": \"--force-nonempty\" }",
8730 inbl, NULL, NULL));
8731 ASSERT_EQ(0, cluster.mon_command(
8732 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
8733 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
8734 inbl, NULL, NULL));
8735 ASSERT_EQ(0, cluster.mon_command(
8736 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
8737 "\", \"mode\": \"readproxy\"}",
8738 inbl, NULL, NULL));
8739
8740 // wait for maps to settle
8741 cluster.wait_for_latest_osdmap();
8742
8743 // read and verify the object
8744 {
8745 bufferlist bl;
8746 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
8747 ASSERT_EQ('h', bl[0]);
8748 }
8749
8750 // Verify 10 times the object is NOT present in the cache tier
8751 uint32_t i = 0;
8752 while (i++ < 10) {
8753 NObjectIterator it = cache_ioctx.nobjects_begin();
8754 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
8755 sleep(1);
8756 }
8757
8758 // tear down tiers
8759 ASSERT_EQ(0, cluster.mon_command(
8760 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
8761 "\"}",
8762 inbl, NULL, NULL));
8763 ASSERT_EQ(0, cluster.mon_command(
8764 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
8765 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
8766 inbl, NULL, NULL));
8767
8768 // wait for maps to settle before next test
8769 cluster.wait_for_latest_osdmap();
8770}
8771
8772TEST_F(LibRadosTwoPoolsECPP, CachePin) {
8773 SKIP_IF_CRIMSON();
8774 // create object
8775 {
8776 bufferlist bl;
8777 bl.append("hi there");
8778 ObjectWriteOperation op;
8779 op.write_full(bl);
8780 ASSERT_EQ(0, ioctx.operate("foo", &op));
8781 }
8782 {
8783 bufferlist bl;
8784 bl.append("hi there");
8785 ObjectWriteOperation op;
8786 op.write_full(bl);
8787 ASSERT_EQ(0, ioctx.operate("bar", &op));
8788 }
8789 {
8790 bufferlist bl;
8791 bl.append("hi there");
8792 ObjectWriteOperation op;
8793 op.write_full(bl);
8794 ASSERT_EQ(0, ioctx.operate("baz", &op));
8795 }
8796 {
8797 bufferlist bl;
8798 bl.append("hi there");
8799 ObjectWriteOperation op;
8800 op.write_full(bl);
8801 ASSERT_EQ(0, ioctx.operate("bam", &op));
8802 }
8803
8804 // configure cache
8805 bufferlist inbl;
8806 ASSERT_EQ(0, cluster.mon_command(
8807 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
8808 "\", \"tierpool\": \"" + cache_pool_name +
8809 "\", \"force_nonempty\": \"--force-nonempty\" }",
8810 inbl, NULL, NULL));
8811 ASSERT_EQ(0, cluster.mon_command(
8812 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
8813 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
8814 inbl, NULL, NULL));
8815 ASSERT_EQ(0, cluster.mon_command(
8816 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
8817 "\", \"mode\": \"writeback\"}",
8818 inbl, NULL, NULL));
8819
8820 // wait for maps to settle
8821 cluster.wait_for_latest_osdmap();
8822
8823 // read, trigger promote
8824 {
8825 bufferlist bl;
8826 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
8827 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
8828 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
8829 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
8830 }
8831
8832 // verify the objects are present in the cache tier
8833 {
8834 NObjectIterator it = cache_ioctx.nobjects_begin();
8835 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
8836 for (uint32_t i = 0; i < 4; i++) {
8837 ASSERT_TRUE(it->get_oid() == string("foo") ||
8838 it->get_oid() == string("bar") ||
8839 it->get_oid() == string("baz") ||
8840 it->get_oid() == string("bam"));
8841 ++it;
8842 }
8843 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
8844 }
8845
8846 // pin objects
8847 {
8848 ObjectWriteOperation op;
8849 op.cache_pin();
8850 librados::AioCompletion *completion = cluster.aio_create_completion();
8851 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
8852 completion->wait_for_complete();
8853 ASSERT_EQ(0, completion->get_return_value());
8854 completion->release();
8855 }
8856 {
8857 ObjectWriteOperation op;
8858 op.cache_pin();
8859 librados::AioCompletion *completion = cluster.aio_create_completion();
8860 ASSERT_EQ(0, cache_ioctx.aio_operate("baz", completion, &op));
8861 completion->wait_for_complete();
8862 ASSERT_EQ(0, completion->get_return_value());
8863 completion->release();
8864 }
8865
8866 // enable agent
8867 ASSERT_EQ(0, cluster.mon_command(
8868 set_pool_str(cache_pool_name, "hit_set_count", 2),
8869 inbl, NULL, NULL));
8870 ASSERT_EQ(0, cluster.mon_command(
8871 set_pool_str(cache_pool_name, "hit_set_period", 600),
8872 inbl, NULL, NULL));
8873 ASSERT_EQ(0, cluster.mon_command(
8874 set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
8875 inbl, NULL, NULL));
8876 ASSERT_EQ(0, cluster.mon_command(
8877 set_pool_str(cache_pool_name, "min_read_recency_for_promote", 1),
8878 inbl, NULL, NULL));
8879 ASSERT_EQ(0, cluster.mon_command(
8880 set_pool_str(cache_pool_name, "target_max_objects", 1),
8881 inbl, NULL, NULL));
8882
8883 sleep(10);
8884
8885 // Verify the pinned object 'foo' is not flushed/evicted
8886 uint32_t count = 0;
8887 while (true) {
8888 bufferlist bl;
8889 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
8890
8891 count = 0;
8892 NObjectIterator it = cache_ioctx.nobjects_begin();
8893 while (it != cache_ioctx.nobjects_end()) {
8894 ASSERT_TRUE(it->get_oid() == string("foo") ||
8895 it->get_oid() == string("bar") ||
8896 it->get_oid() == string("baz") ||
8897 it->get_oid() == string("bam"));
8898 ++count;
8899 ++it;
8900 }
8901 if (count == 2) {
8902 ASSERT_TRUE(it->get_oid() == string("foo") ||
8903 it->get_oid() == string("baz"));
8904 break;
8905 }
8906
8907 sleep(1);
8908 }
8909
8910 // tear down tiers
8911 ASSERT_EQ(0, cluster.mon_command(
8912 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
8913 "\"}",
8914 inbl, NULL, NULL));
8915 ASSERT_EQ(0, cluster.mon_command(
8916 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
8917 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
8918 inbl, NULL, NULL));
8919
8920 // wait for maps to settle before next test
8921 cluster.wait_for_latest_osdmap();
8922}
8923TEST_F(LibRadosTwoPoolsECPP, SetRedirectRead) {
8924 SKIP_IF_CRIMSON();
8925 // create object
8926 {
8927 bufferlist bl;
8928 bl.append("hi there");
8929 ObjectWriteOperation op;
8930 op.write_full(bl);
8931 ASSERT_EQ(0, ioctx.operate("foo", &op));
8932 }
8933 {
8934 bufferlist bl;
8935 bl.append("there");
8936 ObjectWriteOperation op;
8937 op.write_full(bl);
8938 ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
8939 }
8940
8941 // configure tier
8942 bufferlist inbl;
8943 ASSERT_EQ(0, cluster.mon_command(
8944 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
8945 "\", \"tierpool\": \"" + cache_pool_name +
8946 "\", \"force_nonempty\": \"--force-nonempty\" }",
8947 inbl, NULL, NULL));
8948
8949 // wait for maps to settle
8950 cluster.wait_for_latest_osdmap();
8951
8952 {
8953 ObjectWriteOperation op;
8954 op.set_redirect("bar", cache_ioctx, 0);
8955 librados::AioCompletion *completion = cluster.aio_create_completion();
8956 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
8957 completion->wait_for_complete();
8958 ASSERT_EQ(0, completion->get_return_value());
8959 completion->release();
8960 }
8961 // read and verify the object
8962 {
8963 bufferlist bl;
8964 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
8965 ASSERT_EQ('t', bl[0]);
8966 }
8967
8968 ASSERT_EQ(0, cluster.mon_command(
8969 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
8970 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
8971 inbl, NULL, NULL));
8972
8973 // wait for maps to settle before next test
8974 cluster.wait_for_latest_osdmap();
8975}
8976
8977TEST_F(LibRadosTwoPoolsECPP, SetChunkRead) {
8978 SKIP_IF_CRIMSON();
8979 // note: require >= mimic
8980
8981 {
8982 bufferlist bl;
8983 bl.append("there hi");
8984 ObjectWriteOperation op;
8985 op.write_full(bl);
8986 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
8987 }
8988
8989 {
8990 bufferlist bl;
8991 bl.append("There hi");
8992 ObjectWriteOperation op;
8993 op.write_full(bl);
8994 ASSERT_EQ(0, ioctx.operate("bar", &op));
8995 }
8996
8997 // wait for maps to settle
8998 cluster.wait_for_latest_osdmap();
8999
9000 // set_chunk
9001 manifest_set_chunk(cluster, ioctx, cache_ioctx, 0, 4, "bar", "foo");
9002
9003 // promote
9004 {
9005 ObjectWriteOperation op;
9006 op.tier_promote();
9007 librados::AioCompletion *completion = cluster.aio_create_completion();
9008 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
9009 completion->wait_for_complete();
9010 ASSERT_EQ(0, completion->get_return_value());
9011 completion->release();
9012 }
9013
9014 // read and verify the object
9015 {
9016 bufferlist bl;
9017 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
9018 ASSERT_EQ('T', bl[0]);
9019 }
9020
9021 // wait for maps to settle before next test
9022 cluster.wait_for_latest_osdmap();
9023}
9024
9025TEST_F(LibRadosTwoPoolsECPP, ManifestPromoteRead) {
9026 SKIP_IF_CRIMSON();
9027 // note: require >= mimic
9028
9029 // create object
9030 {
9031 bufferlist bl;
9032 bl.append("hiaa there");
9033 ObjectWriteOperation op;
9034 op.write_full(bl);
9035 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
9036 }
9037 {
9038 bufferlist bl;
9039 bl.append("base chunk");
9040 ObjectWriteOperation op;
9041 op.write_full(bl);
9042 ASSERT_EQ(0, cache_ioctx.operate("foo-chunk", &op));
9043 }
9044 {
9045 bufferlist bl;
9046 bl.append("HIaa there");
9047 ObjectWriteOperation op;
9048 op.write_full(bl);
9049 ASSERT_EQ(0, ioctx.operate("bar", &op));
9050 }
9051 {
9052 bufferlist bl;
9053 bl.append("BASE CHUNK");
9054 ObjectWriteOperation op;
9055 op.write_full(bl);
9056 ASSERT_EQ(0, ioctx.operate("bar-chunk", &op));
9057 }
9058
9059 // set-redirect
9060 {
9061 ObjectWriteOperation op;
9062 op.set_redirect("bar", ioctx, 0);
9063 librados::AioCompletion *completion = cluster.aio_create_completion();
9064 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
9065 completion->wait_for_complete();
9066 ASSERT_EQ(0, completion->get_return_value());
9067 completion->release();
9068 }
9069 // set-chunk
9070 manifest_set_chunk(cluster, ioctx, cache_ioctx, 0, 10, "bar-chunk", "foo-chunk");
9071 // promote
9072 {
9073 ObjectWriteOperation op;
9074 op.tier_promote();
9075 librados::AioCompletion *completion = cluster.aio_create_completion();
9076 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
9077 completion->wait_for_complete();
9078 ASSERT_EQ(0, completion->get_return_value());
9079 completion->release();
9080 }
9081 // read and verify the object (redirect)
9082 {
9083 bufferlist bl;
9084 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
9085 ASSERT_EQ('H', bl[0]);
9086 }
9087 // promote
9088 {
9089 ObjectWriteOperation op;
9090 op.tier_promote();
9091 librados::AioCompletion *completion = cluster.aio_create_completion();
9092 ASSERT_EQ(0, cache_ioctx.aio_operate("foo-chunk", completion, &op));
9093 completion->wait_for_complete();
9094 ASSERT_EQ(0, completion->get_return_value());
9095 completion->release();
9096 }
9097 // read and verify the object
9098 {
9099 bufferlist bl;
9100 ASSERT_EQ(1, cache_ioctx.read("foo-chunk", bl, 1, 0));
9101 ASSERT_EQ('B', bl[0]);
9102 }
9103
9104 // wait for maps to settle before next test
9105 cluster.wait_for_latest_osdmap();
9106}
9107
9108TEST_F(LibRadosTwoPoolsECPP, TrySetDedupTier) {
9109 SKIP_IF_CRIMSON();
9110 // note: require >= mimic
9111
9112 bufferlist inbl;
9113 ASSERT_EQ(-EOPNOTSUPP, cluster.mon_command(
9114 set_pool_str(pool_name, "dedup_tier", cache_pool_name),
9115 inbl, NULL, NULL));
9116}
9117
9118TEST_F(LibRadosTwoPoolsPP, PropagateBaseTierError) {
9119 SKIP_IF_CRIMSON();
9120 // write object to base tier
9121 bufferlist omap_bl;
9122 encode(static_cast<uint32_t>(0U), omap_bl);
9123
9124 ObjectWriteOperation op1;
9125 op1.omap_set({{"somekey", omap_bl}});
9126 ASSERT_EQ(0, ioctx.operate("propagate-base-tier-error", &op1));
9127
9128 // configure cache
9129 bufferlist inbl;
9130 ASSERT_EQ(0, cluster.mon_command(
9131 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
9132 "\", \"tierpool\": \"" + cache_pool_name +
9133 "\", \"force_nonempty\": \"--force-nonempty\" }",
9134 inbl, NULL, NULL));
9135 ASSERT_EQ(0, cluster.mon_command(
9136 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
9137 "\", \"mode\": \"writeback\"}",
9138 inbl, NULL, NULL));
9139 ASSERT_EQ(0, cluster.mon_command(
9140 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
9141 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
9142 inbl, NULL, NULL));
9143
9144 ASSERT_EQ(0, cluster.mon_command(
9145 set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
9146 inbl, NULL, NULL));
9147 ASSERT_EQ(0, cluster.mon_command(
9148 set_pool_str(cache_pool_name, "hit_set_count", 1),
9149 inbl, NULL, NULL));
9150 ASSERT_EQ(0, cluster.mon_command(
9151 set_pool_str(cache_pool_name, "hit_set_period", 600),
9152 inbl, NULL, NULL));
9153 ASSERT_EQ(0, cluster.mon_command(
9154 set_pool_str(cache_pool_name, "target_max_objects", 250),
9155 inbl, NULL, NULL));
9156
9157 // wait for maps to settle
9158 cluster.wait_for_latest_osdmap();
9159
9160 // guarded op should fail so expect error to propagate to cache tier
9161 bufferlist test_omap_bl;
9162 encode(static_cast<uint32_t>(1U), test_omap_bl);
9163
9164 ObjectWriteOperation op2;
9165 op2.omap_cmp({{"somekey", {test_omap_bl, CEPH_OSD_CMPXATTR_OP_EQ}}}, nullptr);
9166 op2.omap_set({{"somekey", test_omap_bl}});
9167
9168 ASSERT_EQ(-ECANCELED, ioctx.operate("propagate-base-tier-error", &op2));
9169}
9170
9171TEST_F(LibRadosTwoPoolsPP, HelloWriteReturn) {
9172 SKIP_IF_CRIMSON();
9173 // configure cache
9174 bufferlist inbl;
9175 ASSERT_EQ(0, cluster.mon_command(
9176 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
9177 "\", \"tierpool\": \"" + cache_pool_name +
9178 "\", \"force_nonempty\": \"--force-nonempty\" }",
9179 inbl, NULL, NULL));
9180 ASSERT_EQ(0, cluster.mon_command(
9181 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
9182 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
9183 inbl, NULL, NULL));
9184 ASSERT_EQ(0, cluster.mon_command(
9185 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
9186 "\", \"mode\": \"writeback\"}",
9187 inbl, NULL, NULL));
9188
9189 // set things up such that the op would normally be proxied
9190 ASSERT_EQ(0, cluster.mon_command(
9191 set_pool_str(cache_pool_name, "hit_set_count", 2),
9192 inbl, NULL, NULL));
9193 ASSERT_EQ(0, cluster.mon_command(
9194 set_pool_str(cache_pool_name, "hit_set_period", 600),
9195 inbl, NULL, NULL));
9196 ASSERT_EQ(0, cluster.mon_command(
9197 set_pool_str(cache_pool_name, "hit_set_type",
9198 "explicit_object"),
9199 inbl, NULL, NULL));
9200 ASSERT_EQ(0, cluster.mon_command(
9201 set_pool_str(cache_pool_name, "min_read_recency_for_promote",
9202 "10000"),
9203 inbl, NULL, NULL));
9204
9205 // wait for maps to settle
9206 cluster.wait_for_latest_osdmap();
9207
9208 // this *will* return data due to the RETURNVEC flag
9209 {
9210 bufferlist in, out;
9211 int rval;
9212 ObjectWriteOperation o;
9213 o.exec("hello", "write_return_data", in, &out, &rval);
9214 librados::AioCompletion *completion = cluster.aio_create_completion();
9215 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &o,
9216 librados::OPERATION_RETURNVEC));
9217 completion->wait_for_complete();
9218 ASSERT_EQ(42, completion->get_return_value());
9219 ASSERT_EQ(42, rval);
9220 out.hexdump(std::cout);
9221 ASSERT_EQ("you might see this", std::string(out.c_str(), out.length()));
9222 }
9223
9224 // this will overflow because the return data is too big
9225 {
9226 bufferlist in, out;
9227 int rval;
9228 ObjectWriteOperation o;
9229 o.exec("hello", "write_too_much_return_data", in, &out, &rval);
9230 librados::AioCompletion *completion = cluster.aio_create_completion();
9231 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &o,
9232 librados::OPERATION_RETURNVEC));
9233 completion->wait_for_complete();
9234 ASSERT_EQ(-EOVERFLOW, completion->get_return_value());
9235 ASSERT_EQ(-EOVERFLOW, rval);
9236 ASSERT_EQ("", std::string(out.c_str(), out.length()));
9237 }
9238}
9239
9240TEST_F(LibRadosTwoPoolsPP, TierFlushDuringUnsetDedupTier) {
9241 SKIP_IF_CRIMSON();
9242 // skip test if not yet octopus
9243 if (_get_required_osd_release(cluster) < "octopus") {
9244 cout << "cluster is not yet octopus, skipping test" << std::endl;
9245 return;
9246 }
9247
9248 bufferlist inbl;
9249
9250 // set dedup parameters without dedup_tier
9251 ASSERT_EQ(0, cluster.mon_command(
9252 set_pool_str(cache_pool_name, "fingerprint_algorithm", "sha1"),
9253 inbl, NULL, NULL));
9254 ASSERT_EQ(0, cluster.mon_command(
9255 set_pool_str(cache_pool_name, "dedup_chunk_algorithm", "fastcdc"),
9256 inbl, NULL, NULL));
9257 ASSERT_EQ(0, cluster.mon_command(
9258 set_pool_str(cache_pool_name, "dedup_cdc_chunk_size", 1024),
9259 inbl, NULL, NULL));
9260
9261 // create object
9262 bufferlist gbl;
9263 {
9264 generate_buffer(1024*8, &gbl);
9265 ObjectWriteOperation op;
9266 op.write_full(gbl);
9267 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
9268 }
9269 {
9270 bufferlist bl;
9271 bl.append("there hiHI");
9272 ObjectWriteOperation op;
9273 op.write_full(bl);
9274 ASSERT_EQ(0, ioctx.operate("bar", &op));
9275 }
9276
9277 // wait for maps to settle
9278 cluster.wait_for_latest_osdmap();
9279
9280 // set-chunk to set manifest object
9281 {
9282 ObjectReadOperation op;
9283 op.set_chunk(0, 2, ioctx, "bar", 0, CEPH_OSD_OP_FLAG_WITH_REFERENCE);
9284 librados::AioCompletion *completion = cluster.aio_create_completion();
9285 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
9286 librados::OPERATION_IGNORE_CACHE, NULL));
9287 completion->wait_for_complete();
9288 ASSERT_EQ(0, completion->get_return_value());
9289 completion->release();
9290 }
9291
9292 // flush to check if proper error is returned
9293 {
9294 ObjectReadOperation op;
9295 op.tier_flush();
9296 librados::AioCompletion *completion = cluster.aio_create_completion();
9297 ASSERT_EQ(0, cache_ioctx.aio_operate(
9298 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
9299 completion->wait_for_complete();
9300 ASSERT_EQ(-EINVAL, completion->get_return_value());
9301 completion->release();
9302 }
9303}
9304