]> git.proxmox.com Git - ceph.git/blame - ceph/src/test/librados/tier.cc
bump version to 12.1.1-pve1 while rebasing patches
[ceph.git] / ceph / src / test / librados / tier.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3#include "gtest/gtest.h"
4
5#include "mds/mdstypes.h"
6#include "include/buffer.h"
7#include "include/rbd_types.h"
8#include "include/rados/librados.h"
9#include "include/rados/librados.hpp"
10#include "include/stringify.h"
11#include "include/types.h"
12#include "global/global_context.h"
13#include "common/Cond.h"
14#include "test/librados/test.h"
15#include "test/librados/TestCase.h"
16#include "json_spirit/json_spirit.h"
17
18#include "osd/HitSet.h"
19
20#include <errno.h>
21#include <map>
22#include <sstream>
23#include <string>
24
25using namespace librados;
26using std::map;
27using std::ostringstream;
28using std::string;
29
30typedef RadosTestPP LibRadosTierPP;
31typedef RadosTestECPP LibRadosTierECPP;
32
33void flush_evict_all(librados::Rados& cluster, librados::IoCtx& cache_ioctx)
34{
35 bufferlist inbl;
36 cache_ioctx.set_namespace(all_nspaces);
37 for (NObjectIterator it = cache_ioctx.nobjects_begin();
38 it != cache_ioctx.nobjects_end(); ++it) {
39 cache_ioctx.locator_set_key(it->get_locator());
40 cache_ioctx.set_namespace(it->get_nspace());
41 {
42 ObjectReadOperation op;
43 op.cache_flush();
44 librados::AioCompletion *completion = cluster.aio_create_completion();
45 cache_ioctx.aio_operate(
46 it->get_oid(), completion, &op,
47 librados::OPERATION_IGNORE_OVERLAY, NULL);
48 completion->wait_for_safe();
49 completion->get_return_value();
50 completion->release();
51 }
52 {
53 ObjectReadOperation op;
54 op.cache_evict();
55 librados::AioCompletion *completion = cluster.aio_create_completion();
56 cache_ioctx.aio_operate(
57 it->get_oid(), completion, &op,
58 librados::OPERATION_IGNORE_OVERLAY, NULL);
59 completion->wait_for_safe();
60 completion->get_return_value();
61 completion->release();
62 }
63 }
64}
65
66class LibRadosTwoPoolsPP : public RadosTestPP
67{
68public:
69 LibRadosTwoPoolsPP() {};
70 ~LibRadosTwoPoolsPP() override {};
71protected:
72 static void SetUpTestCase() {
73 pool_name = get_temp_pool_name();
74 ASSERT_EQ("", create_one_pool_pp(pool_name, s_cluster));
75 }
76 static void TearDownTestCase() {
77 ASSERT_EQ(0, destroy_one_pool_pp(pool_name, s_cluster));
78 }
79 static std::string cache_pool_name;
80
81 void SetUp() override {
82 cache_pool_name = get_temp_pool_name();
83 ASSERT_EQ(0, s_cluster.pool_create(cache_pool_name.c_str()));
84 RadosTestPP::SetUp();
85 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
86 cache_ioctx.set_namespace(nspace);
87 }
88 void TearDown() override {
89 // flush + evict cache
90 flush_evict_all(cluster, cache_ioctx);
91
92 bufferlist inbl;
93 // tear down tiers
94 ASSERT_EQ(0, cluster.mon_command(
95 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
96 "\"}",
97 inbl, NULL, NULL));
98 ASSERT_EQ(0, cluster.mon_command(
99 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
100 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
101 inbl, NULL, NULL));
102
103 // wait for maps to settle before next test
104 cluster.wait_for_latest_osdmap();
105
106 RadosTestPP::TearDown();
107
108 cleanup_default_namespace(cache_ioctx);
109 cleanup_namespace(cache_ioctx, nspace);
110
111 cache_ioctx.close();
112 ASSERT_EQ(0, s_cluster.pool_delete(cache_pool_name.c_str()));
113 }
114 librados::IoCtx cache_ioctx;
115};
116
117class Completions
118{
119public:
120 Completions() = default;
121 librados::AioCompletion* getCompletion() {
122 librados::AioCompletion* comp = librados::Rados::aio_create_completion();
123 m_completions.push_back(comp);
124 return comp;
125 }
126
127 ~Completions() {
128 for (auto& comp : m_completions) {
129 comp->release();
130 }
131 }
132
133private:
134 vector<librados::AioCompletion *> m_completions;
135};
136
137Completions completions;
138
139std::string LibRadosTwoPoolsPP::cache_pool_name;
140
141TEST_F(LibRadosTierPP, Dirty) {
142 {
143 ObjectWriteOperation op;
144 op.undirty();
145 ASSERT_EQ(0, ioctx.operate("foo", &op)); // still get 0 if it dne
146 }
147 {
148 ObjectWriteOperation op;
149 op.create(true);
150 ASSERT_EQ(0, ioctx.operate("foo", &op));
151 }
152 {
153 bool dirty = false;
154 int r = -1;
155 ObjectReadOperation op;
156 op.is_dirty(&dirty, &r);
157 ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
158 ASSERT_TRUE(dirty);
159 ASSERT_EQ(0, r);
160 }
161 {
162 ObjectWriteOperation op;
163 op.undirty();
164 ASSERT_EQ(0, ioctx.operate("foo", &op));
165 }
166 {
167 ObjectWriteOperation op;
168 op.undirty();
169 ASSERT_EQ(0, ioctx.operate("foo", &op)); // still 0 if already clean
170 }
171 {
172 bool dirty = false;
173 int r = -1;
174 ObjectReadOperation op;
175 op.is_dirty(&dirty, &r);
176 ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
177 ASSERT_FALSE(dirty);
178 ASSERT_EQ(0, r);
179 }
180 {
181 ObjectWriteOperation op;
182 op.truncate(0); // still a write even tho it is a no-op
183 ASSERT_EQ(0, ioctx.operate("foo", &op));
184 }
185 {
186 bool dirty = false;
187 int r = -1;
188 ObjectReadOperation op;
189 op.is_dirty(&dirty, &r);
190 ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
191 ASSERT_TRUE(dirty);
192 ASSERT_EQ(0, r);
193 }
194}
195
196TEST_F(LibRadosTwoPoolsPP, Overlay) {
197 // create objects
198 {
199 bufferlist bl;
200 bl.append("base");
201 ObjectWriteOperation op;
202 op.write_full(bl);
203 ASSERT_EQ(0, ioctx.operate("foo", &op));
204 }
205 {
206 bufferlist bl;
207 bl.append("cache");
208 ObjectWriteOperation op;
209 op.write_full(bl);
210 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
211 }
212
213 // configure cache
214 bufferlist inbl;
215 ASSERT_EQ(0, cluster.mon_command(
216 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
217 "\", \"tierpool\": \"" + cache_pool_name +
218 "\", \"force_nonempty\": \"--force-nonempty\" }",
219 inbl, NULL, NULL));
220 ASSERT_EQ(0, cluster.mon_command(
221 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
222 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
223 inbl, NULL, NULL));
224
225 // wait for maps to settle
226 cluster.wait_for_latest_osdmap();
227
228 // by default, the overlay sends us to cache pool
229 {
230 bufferlist bl;
231 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
232 ASSERT_EQ('c', bl[0]);
233 }
234 {
235 bufferlist bl;
236 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
237 ASSERT_EQ('c', bl[0]);
238 }
239
240 // unless we say otherwise
241 {
242 bufferlist bl;
243 ObjectReadOperation op;
244 op.read(0, 1, &bl, NULL);
245 librados::AioCompletion *completion = cluster.aio_create_completion();
246 ASSERT_EQ(0, ioctx.aio_operate(
247 "foo", completion, &op,
248 librados::OPERATION_IGNORE_OVERLAY, NULL));
249 completion->wait_for_safe();
250 ASSERT_EQ(0, completion->get_return_value());
251 completion->release();
252 ASSERT_EQ('b', bl[0]);
253 }
254}
255
256TEST_F(LibRadosTwoPoolsPP, Promote) {
257 // create object
258 {
259 bufferlist bl;
260 bl.append("hi there");
261 ObjectWriteOperation op;
262 op.write_full(bl);
263 ASSERT_EQ(0, ioctx.operate("foo", &op));
264 }
265
266 // configure cache
267 bufferlist inbl;
268 ASSERT_EQ(0, cluster.mon_command(
269 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
270 "\", \"tierpool\": \"" + cache_pool_name +
271 "\", \"force_nonempty\": \"--force-nonempty\" }",
272 inbl, NULL, NULL));
273 ASSERT_EQ(0, cluster.mon_command(
274 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
275 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
276 inbl, NULL, NULL));
277 ASSERT_EQ(0, cluster.mon_command(
278 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
279 "\", \"mode\": \"writeback\"}",
280 inbl, NULL, NULL));
281
282 // wait for maps to settle
283 cluster.wait_for_latest_osdmap();
284
285 // read, trigger a promote
286 {
287 bufferlist bl;
288 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
289 }
290
291 // read, trigger a whiteout
292 {
293 bufferlist bl;
294 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
295 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
296 }
297
298 // verify the object is present in the cache tier
299 {
300 NObjectIterator it = cache_ioctx.nobjects_begin();
301 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
302 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
303 ++it;
304 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
305 ++it;
306 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
307 }
308}
309
310TEST_F(LibRadosTwoPoolsPP, PromoteSnap) {
311 // create object
312 {
313 bufferlist bl;
314 bl.append("hi there");
315 ObjectWriteOperation op;
316 op.write_full(bl);
317 ASSERT_EQ(0, ioctx.operate("foo", &op));
318 }
319 {
320 bufferlist bl;
321 bl.append("hi there");
322 ObjectWriteOperation op;
323 op.write_full(bl);
324 ASSERT_EQ(0, ioctx.operate("bar", &op));
325 }
326 {
327 bufferlist bl;
328 bl.append("hi there");
329 ObjectWriteOperation op;
330 op.write_full(bl);
331 ASSERT_EQ(0, ioctx.operate("baz", &op));
332 }
333 {
334 bufferlist bl;
335 bl.append("hi there");
336 ObjectWriteOperation op;
337 op.write_full(bl);
338 ASSERT_EQ(0, ioctx.operate("bam", &op));
339 }
340
341 // create a snapshot, clone
342 vector<uint64_t> my_snaps(1);
343 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
344 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
345 my_snaps));
346 {
347 bufferlist bl;
348 bl.append("ciao!");
349 ObjectWriteOperation op;
350 op.write_full(bl);
351 ASSERT_EQ(0, ioctx.operate("foo", &op));
352 }
353 {
354 bufferlist bl;
355 bl.append("ciao!");
356 ObjectWriteOperation op;
357 op.write_full(bl);
358 ASSERT_EQ(0, ioctx.operate("bar", &op));
359 }
360 {
361 ObjectWriteOperation op;
362 op.remove();
363 ASSERT_EQ(0, ioctx.operate("baz", &op));
364 }
365 {
366 bufferlist bl;
367 bl.append("ciao!");
368 ObjectWriteOperation op;
369 op.write_full(bl);
370 ASSERT_EQ(0, ioctx.operate("bam", &op));
371 }
372
373 // configure cache
374 bufferlist inbl;
375 ASSERT_EQ(0, cluster.mon_command(
376 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
377 "\", \"tierpool\": \"" + cache_pool_name +
378 "\", \"force_nonempty\": \"--force-nonempty\" }",
379 inbl, NULL, NULL));
380 ASSERT_EQ(0, cluster.mon_command(
381 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
382 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
383 inbl, NULL, NULL));
384 ASSERT_EQ(0, cluster.mon_command(
385 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
386 "\", \"mode\": \"writeback\"}",
387 inbl, NULL, NULL));
388
389 // wait for maps to settle
390 cluster.wait_for_latest_osdmap();
391
392 // read, trigger a promote on the head
393 {
394 bufferlist bl;
395 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
396 ASSERT_EQ('c', bl[0]);
397 }
398 {
399 bufferlist bl;
400 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
401 ASSERT_EQ('c', bl[0]);
402 }
403
404 ioctx.snap_set_read(my_snaps[0]);
405
406 // read foo snap
407 {
408 bufferlist bl;
409 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
410 ASSERT_EQ('h', bl[0]);
411 }
412
413 // read bar snap
414 {
415 bufferlist bl;
416 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
417 ASSERT_EQ('h', bl[0]);
418 }
419
420 // read baz snap
421 {
422 bufferlist bl;
423 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
424 ASSERT_EQ('h', bl[0]);
425 }
426
427 ioctx.snap_set_read(librados::SNAP_HEAD);
428
429 // read foo
430 {
431 bufferlist bl;
432 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
433 ASSERT_EQ('c', bl[0]);
434 }
435
436 // read bar
437 {
438 bufferlist bl;
439 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
440 ASSERT_EQ('c', bl[0]);
441 }
442
443 // read baz
444 {
445 bufferlist bl;
446 ASSERT_EQ(-ENOENT, ioctx.read("baz", bl, 1, 0));
447 }
448
449 // cleanup
450 ioctx.selfmanaged_snap_remove(my_snaps[0]);
451}
452
453TEST_F(LibRadosTwoPoolsPP, PromoteSnapScrub) {
454 int num = 100;
455
456 // create objects
457 for (int i=0; i<num; ++i) {
458 bufferlist bl;
459 bl.append("hi there");
460 ObjectWriteOperation op;
461 op.write_full(bl);
462 ASSERT_EQ(0, ioctx.operate(string("foo") + stringify(i), &op));
463 }
464
465 vector<uint64_t> my_snaps;
466 for (int snap=0; snap<4; ++snap) {
467 // create a snapshot, clone
468 vector<uint64_t> ns(1);
469 ns.insert(ns.end(), my_snaps.begin(), my_snaps.end());
470 my_snaps.swap(ns);
471 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
472 cout << "my_snaps " << my_snaps << std::endl;
473 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
474 my_snaps));
475 for (int i=0; i<num; ++i) {
476 bufferlist bl;
477 bl.append(string("ciao! snap") + stringify(snap));
478 ObjectWriteOperation op;
479 op.write_full(bl);
480 ASSERT_EQ(0, ioctx.operate(string("foo") + stringify(i), &op));
481 }
482 }
483
484 // configure cache
485 bufferlist inbl;
486 ASSERT_EQ(0, cluster.mon_command(
487 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
488 "\", \"tierpool\": \"" + cache_pool_name +
489 "\", \"force_nonempty\": \"--force-nonempty\" }",
490 inbl, NULL, NULL));
491 ASSERT_EQ(0, cluster.mon_command(
492 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
493 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
494 inbl, NULL, NULL));
495 ASSERT_EQ(0, cluster.mon_command(
496 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
497 "\", \"mode\": \"writeback\"}",
498 inbl, NULL, NULL));
499
500 // wait for maps to settle
501 cluster.wait_for_latest_osdmap();
502
503 // read, trigger a promote on _some_ heads to make sure we handle cases
504 // where snaps are present and where they are not.
505 cout << "promoting some heads" << std::endl;
506 for (int i=0; i<num; ++i) {
507 if (i % 5 == 0 || i > num - 3) {
508 bufferlist bl;
509 ASSERT_EQ(1, ioctx.read(string("foo") + stringify(i), bl, 1, 0));
510 ASSERT_EQ('c', bl[0]);
511 }
512 }
513
514 for (unsigned snap = 0; snap < my_snaps.size(); ++snap) {
515 cout << "promoting from clones for snap " << my_snaps[snap] << std::endl;
516 ioctx.snap_set_read(my_snaps[snap]);
517
518 // read some snaps, semi-randomly
519 for (int i=0; i<50; ++i) {
520 bufferlist bl;
521 string o = string("foo") + stringify((snap * i * 137) % 80);
522 //cout << o << std::endl;
523 ASSERT_EQ(1, ioctx.read(o, bl, 1, 0));
524 }
525 }
526
527 // ok, stop and scrub this pool (to make sure scrub can handle
528 // missing clones in the cache tier).
529 {
530 IoCtx cache_ioctx;
531 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
532 for (int i=0; i<10; ++i) {
533 do {
534 ostringstream ss;
535 ss << "{\"prefix\": \"pg scrub\", \"pgid\": \""
536 << cache_ioctx.get_id() << "." << i
537 << "\"}";
538 int r = cluster.mon_command(ss.str(), inbl, NULL, NULL);
539 if (r == -ENOENT || // in case mgr osdmap is stale
540 r == -EAGAIN) {
541 sleep(5);
542 continue;
543 }
544 } while (false);
545 }
546
547 // give it a few seconds to go. this is sloppy but is usually enough time
548 cout << "waiting for scrubs..." << std::endl;
549 sleep(30);
550 cout << "done waiting" << std::endl;
551 }
552
553 ioctx.snap_set_read(librados::SNAP_HEAD);
554
555 //cleanup
556 for (unsigned snap = 0; snap < my_snaps.size(); ++snap) {
557 ioctx.selfmanaged_snap_remove(my_snaps[snap]);
558 }
559}
560
561TEST_F(LibRadosTwoPoolsPP, PromoteSnapTrimRace) {
562 // create object
563 {
564 bufferlist bl;
565 bl.append("hi there");
566 ObjectWriteOperation op;
567 op.write_full(bl);
568 ASSERT_EQ(0, ioctx.operate("foo", &op));
569 }
570
571 // create a snapshot, clone
572 vector<uint64_t> my_snaps(1);
573 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
574 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
575 my_snaps));
576 {
577 bufferlist bl;
578 bl.append("ciao!");
579 ObjectWriteOperation op;
580 op.write_full(bl);
581 ASSERT_EQ(0, ioctx.operate("foo", &op));
582 }
583
584 // configure cache
585 bufferlist inbl;
586 ASSERT_EQ(0, cluster.mon_command(
587 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
588 "\", \"tierpool\": \"" + cache_pool_name +
589 "\", \"force_nonempty\": \"--force-nonempty\" }",
590 inbl, NULL, NULL));
591 ASSERT_EQ(0, cluster.mon_command(
592 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
593 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
594 inbl, NULL, NULL));
595 ASSERT_EQ(0, cluster.mon_command(
596 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
597 "\", \"mode\": \"writeback\"}",
598 inbl, NULL, NULL));
599
600 // wait for maps to settle
601 cluster.wait_for_latest_osdmap();
602
603 // delete the snap
604 ASSERT_EQ(0, ioctx.selfmanaged_snap_remove(my_snaps[0]));
605
606 ioctx.snap_set_read(my_snaps[0]);
607
608 // read foo snap
609 {
610 bufferlist bl;
611 ASSERT_EQ(-ENOENT, ioctx.read("foo", bl, 1, 0));
612 }
613
614 // cleanup
615 ioctx.selfmanaged_snap_remove(my_snaps[0]);
616}
617
618TEST_F(LibRadosTwoPoolsPP, Whiteout) {
619 // create object
620 {
621 bufferlist bl;
622 bl.append("hi there");
623 ObjectWriteOperation op;
624 op.write_full(bl);
625 ASSERT_EQ(0, ioctx.operate("foo", &op));
626 }
627
628 // configure cache
629 bufferlist inbl;
630 ASSERT_EQ(0, cluster.mon_command(
631 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
632 "\", \"tierpool\": \"" + cache_pool_name +
633 "\", \"force_nonempty\": \"--force-nonempty\" }",
634 inbl, NULL, NULL));
635 ASSERT_EQ(0, cluster.mon_command(
636 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
637 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
638 inbl, NULL, NULL));
639 ASSERT_EQ(0, cluster.mon_command(
640 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
641 "\", \"mode\": \"writeback\"}",
642 inbl, NULL, NULL));
643
644 // wait for maps to settle
645 cluster.wait_for_latest_osdmap();
646
647 // create some whiteouts, verify they behave
648 {
649 ObjectWriteOperation op;
650 op.assert_exists();
651 op.remove();
652 ASSERT_EQ(0, ioctx.operate("foo", &op));
653 }
654
655 {
656 ObjectWriteOperation op;
657 op.assert_exists();
658 op.remove();
659 ASSERT_EQ(-ENOENT, ioctx.operate("bar", &op));
660 }
661 {
662 ObjectWriteOperation op;
663 op.assert_exists();
664 op.remove();
665 ASSERT_EQ(-ENOENT, ioctx.operate("bar", &op));
666 }
667
668 // verify the whiteouts are there in the cache tier
669 {
670 NObjectIterator it = cache_ioctx.nobjects_begin();
671 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
672 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
673 ++it;
674 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
675 ++it;
676 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
677 }
678
679 // delete a whiteout and verify it goes away
680 ASSERT_EQ(-ENOENT, ioctx.remove("foo"));
681 {
682 ObjectWriteOperation op;
683 op.remove();
684 librados::AioCompletion *completion = cluster.aio_create_completion();
685 ASSERT_EQ(0, ioctx.aio_operate("bar", completion, &op,
686 librados::OPERATION_IGNORE_CACHE));
687 completion->wait_for_safe();
688 ASSERT_EQ(0, completion->get_return_value());
689 completion->release();
690
691 NObjectIterator it = cache_ioctx.nobjects_begin();
692 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
693 ASSERT_TRUE(it->get_oid() == string("foo"));
694 ++it;
695 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
696 }
697
698 // recreate an object and verify we can read it
699 {
700 bufferlist bl;
701 bl.append("hi there");
702 ObjectWriteOperation op;
703 op.write_full(bl);
704 ASSERT_EQ(0, ioctx.operate("foo", &op));
705 }
706 {
707 bufferlist bl;
708 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
709 ASSERT_EQ('h', bl[0]);
710 }
711}
712
713TEST_F(LibRadosTwoPoolsPP, WhiteoutDeleteCreate) {
714 // configure cache
715 bufferlist inbl;
716 ASSERT_EQ(0, cluster.mon_command(
717 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
718 "\", \"tierpool\": \"" + cache_pool_name +
719 "\", \"force_nonempty\": \"--force-nonempty\" }",
720 inbl, NULL, NULL));
721 ASSERT_EQ(0, cluster.mon_command(
722 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
723 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
724 inbl, NULL, NULL));
725 ASSERT_EQ(0, cluster.mon_command(
726 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
727 "\", \"mode\": \"writeback\"}",
728 inbl, NULL, NULL));
729
730 // wait for maps to settle
731 cluster.wait_for_latest_osdmap();
732
733 // create an object
734 {
735 bufferlist bl;
736 bl.append("foo");
737 ASSERT_EQ(0, ioctx.write_full("foo", bl));
738 }
739
740 // do delete + create operation
741 {
742 ObjectWriteOperation op;
743 op.remove();
744 bufferlist bl;
745 bl.append("bar");
746 op.write_full(bl);
747 ASSERT_EQ(0, ioctx.operate("foo", &op));
748 }
749
750 // verify it still "exists" (w/ new content)
751 {
752 bufferlist bl;
753 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
754 ASSERT_EQ('b', bl[0]);
755 }
756}
757
758TEST_F(LibRadosTwoPoolsPP, Evict) {
759 // create object
760 {
761 bufferlist bl;
762 bl.append("hi there");
763 ObjectWriteOperation op;
764 op.write_full(bl);
765 ASSERT_EQ(0, ioctx.operate("foo", &op));
766 }
767
768 // configure cache
769 bufferlist inbl;
770 ASSERT_EQ(0, cluster.mon_command(
771 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
772 "\", \"tierpool\": \"" + cache_pool_name +
773 "\", \"force_nonempty\": \"--force-nonempty\" }",
774 inbl, NULL, NULL));
775 ASSERT_EQ(0, cluster.mon_command(
776 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
777 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
778 inbl, NULL, NULL));
779 ASSERT_EQ(0, cluster.mon_command(
780 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
781 "\", \"mode\": \"writeback\"}",
782 inbl, NULL, NULL));
783
784 // wait for maps to settle
785 cluster.wait_for_latest_osdmap();
786
787 // read, trigger a promote
788 {
789 bufferlist bl;
790 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
791 }
792
793 // read, trigger a whiteout, and a dirty object
794 {
795 bufferlist bl;
796 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
797 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
798 ASSERT_EQ(0, ioctx.write("bar", bl, bl.length(), 0));
799 }
800
801 // verify the object is present in the cache tier
802 {
803 NObjectIterator it = cache_ioctx.nobjects_begin();
804 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
805 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
806 ++it;
807 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
808 ++it;
809 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
810 }
811
812 // pin
813 {
814 ObjectWriteOperation op;
815 op.cache_pin();
816 librados::AioCompletion *completion = cluster.aio_create_completion();
817 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
818 completion->wait_for_safe();
819 ASSERT_EQ(0, completion->get_return_value());
820 completion->release();
821 }
822
823 // evict the pinned object with -EPERM
824 {
825 ObjectReadOperation op;
826 op.cache_evict();
827 librados::AioCompletion *completion = cluster.aio_create_completion();
828 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
829 librados::OPERATION_IGNORE_CACHE,
830 NULL));
831 completion->wait_for_safe();
832 ASSERT_EQ(-EPERM, completion->get_return_value());
833 completion->release();
834 }
835
836 // unpin
837 {
838 ObjectWriteOperation op;
839 op.cache_unpin();
840 librados::AioCompletion *completion = cluster.aio_create_completion();
841 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
842 completion->wait_for_safe();
843 ASSERT_EQ(0, completion->get_return_value());
844 completion->release();
845 }
846
847 // flush
848 {
849 ObjectReadOperation op;
850 op.cache_flush();
851 librados::AioCompletion *completion = cluster.aio_create_completion();
852 ASSERT_EQ(0, cache_ioctx.aio_operate(
853 "foo", completion, &op,
854 librados::OPERATION_IGNORE_OVERLAY, NULL));
855 completion->wait_for_safe();
856 ASSERT_EQ(0, completion->get_return_value());
857 completion->release();
858 }
859
860 // verify clean
861 {
862 bool dirty = false;
863 int r = -1;
864 ObjectReadOperation op;
865 op.is_dirty(&dirty, &r);
866 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
867 ASSERT_FALSE(dirty);
868 ASSERT_EQ(0, r);
869 }
870
871 // evict
872 {
873 ObjectReadOperation op;
874 op.cache_evict();
875 librados::AioCompletion *completion = cluster.aio_create_completion();
876 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
877 librados::OPERATION_IGNORE_CACHE,
878 NULL));
879 completion->wait_for_safe();
880 ASSERT_EQ(0, completion->get_return_value());
881 completion->release();
882 }
883 {
884 ObjectReadOperation op;
885 op.cache_evict();
886 librados::AioCompletion *completion = cluster.aio_create_completion();
887 ASSERT_EQ(0, cache_ioctx.aio_operate(
888 "foo", completion, &op,
889 librados::OPERATION_IGNORE_CACHE, NULL));
890 completion->wait_for_safe();
891 ASSERT_EQ(0, completion->get_return_value());
892 completion->release();
893 }
894 {
895 ObjectReadOperation op;
896 op.cache_evict();
897 librados::AioCompletion *completion = cluster.aio_create_completion();
898 ASSERT_EQ(0, cache_ioctx.aio_operate(
899 "bar", completion, &op,
900 librados::OPERATION_IGNORE_CACHE, NULL));
901 completion->wait_for_safe();
902 ASSERT_EQ(-EBUSY, completion->get_return_value());
903 completion->release();
904 }
905}
906
907TEST_F(LibRadosTwoPoolsPP, EvictSnap) {
908 // create object
909 {
910 bufferlist bl;
911 bl.append("hi there");
912 ObjectWriteOperation op;
913 op.write_full(bl);
914 ASSERT_EQ(0, ioctx.operate("foo", &op));
915 }
916 {
917 bufferlist bl;
918 bl.append("hi there");
919 ObjectWriteOperation op;
920 op.write_full(bl);
921 ASSERT_EQ(0, ioctx.operate("bar", &op));
922 }
923 {
924 bufferlist bl;
925 bl.append("hi there");
926 ObjectWriteOperation op;
927 op.write_full(bl);
928 ASSERT_EQ(0, ioctx.operate("baz", &op));
929 }
930 {
931 bufferlist bl;
932 bl.append("hi there");
933 ObjectWriteOperation op;
934 op.write_full(bl);
935 ASSERT_EQ(0, ioctx.operate("bam", &op));
936 }
937
938 // create a snapshot, clone
939 vector<uint64_t> my_snaps(1);
940 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
941 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
942 my_snaps));
943 {
944 bufferlist bl;
945 bl.append("ciao!");
946 ObjectWriteOperation op;
947 op.write_full(bl);
948 ASSERT_EQ(0, ioctx.operate("foo", &op));
949 }
950 {
951 bufferlist bl;
952 bl.append("ciao!");
953 ObjectWriteOperation op;
954 op.write_full(bl);
955 ASSERT_EQ(0, ioctx.operate("bar", &op));
956 }
957 {
958 ObjectWriteOperation op;
959 op.remove();
960 ASSERT_EQ(0, ioctx.operate("baz", &op));
961 }
962 {
963 bufferlist bl;
964 bl.append("ciao!");
965 ObjectWriteOperation op;
966 op.write_full(bl);
967 ASSERT_EQ(0, ioctx.operate("bam", &op));
968 }
969
970 // configure cache
971 bufferlist inbl;
972 ASSERT_EQ(0, cluster.mon_command(
973 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
974 "\", \"tierpool\": \"" + cache_pool_name +
975 "\", \"force_nonempty\": \"--force-nonempty\" }",
976 inbl, NULL, NULL));
977 ASSERT_EQ(0, cluster.mon_command(
978 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
979 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
980 inbl, NULL, NULL));
981 ASSERT_EQ(0, cluster.mon_command(
982 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
983 "\", \"mode\": \"writeback\"}",
984 inbl, NULL, NULL));
985
986 // wait for maps to settle
987 cluster.wait_for_latest_osdmap();
988
989 // read, trigger a promote on the head
990 {
991 bufferlist bl;
992 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
993 ASSERT_EQ('c', bl[0]);
994 }
995 {
996 bufferlist bl;
997 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
998 ASSERT_EQ('c', bl[0]);
999 }
1000
1001 // evict bam
1002 {
1003 ObjectReadOperation op;
1004 op.cache_evict();
1005 librados::AioCompletion *completion = cluster.aio_create_completion();
1006 ASSERT_EQ(0, cache_ioctx.aio_operate(
1007 "bam", completion, &op,
1008 librados::OPERATION_IGNORE_CACHE, NULL));
1009 completion->wait_for_safe();
1010 ASSERT_EQ(0, completion->get_return_value());
1011 completion->release();
1012 }
1013 {
1014 bufferlist bl;
1015 ObjectReadOperation op;
1016 op.read(1, 0, &bl, NULL);
1017 librados::AioCompletion *completion = cluster.aio_create_completion();
1018 ASSERT_EQ(0, cache_ioctx.aio_operate(
1019 "bam", completion, &op,
1020 librados::OPERATION_IGNORE_CACHE, NULL));
1021 completion->wait_for_safe();
1022 ASSERT_EQ(-ENOENT, completion->get_return_value());
1023 completion->release();
1024 }
1025
1026 // read foo snap
1027 ioctx.snap_set_read(my_snaps[0]);
1028 {
1029 bufferlist bl;
1030 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1031 ASSERT_EQ('h', bl[0]);
1032 }
1033
1034 // evict foo snap
1035 {
1036 ObjectReadOperation op;
1037 op.cache_evict();
1038 librados::AioCompletion *completion = cluster.aio_create_completion();
1039 ASSERT_EQ(0, ioctx.aio_operate(
1040 "foo", completion, &op,
1041 librados::OPERATION_IGNORE_CACHE, NULL));
1042 completion->wait_for_safe();
1043 ASSERT_EQ(0, completion->get_return_value());
1044 completion->release();
1045 }
1046 // snap is gone...
1047 {
1048 bufferlist bl;
1049 ObjectReadOperation op;
1050 op.read(1, 0, &bl, NULL);
1051 librados::AioCompletion *completion = cluster.aio_create_completion();
1052 ASSERT_EQ(0, ioctx.aio_operate(
1053 "foo", completion, &op,
1054 librados::OPERATION_IGNORE_CACHE, NULL));
1055 completion->wait_for_safe();
1056 ASSERT_EQ(-ENOENT, completion->get_return_value());
1057 completion->release();
1058 }
1059 // head is still there...
1060 ioctx.snap_set_read(librados::SNAP_HEAD);
1061 {
1062 bufferlist bl;
1063 ObjectReadOperation op;
1064 op.read(1, 0, &bl, NULL);
1065 librados::AioCompletion *completion = cluster.aio_create_completion();
1066 ASSERT_EQ(0, ioctx.aio_operate(
1067 "foo", completion, &op,
1068 librados::OPERATION_IGNORE_CACHE, NULL));
1069 completion->wait_for_safe();
1070 ASSERT_EQ(0, completion->get_return_value());
1071 completion->release();
1072 }
1073
1074 // promote head + snap of bar
1075 ioctx.snap_set_read(librados::SNAP_HEAD);
1076 {
1077 bufferlist bl;
1078 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
1079 ASSERT_EQ('c', bl[0]);
1080 }
1081 ioctx.snap_set_read(my_snaps[0]);
1082 {
1083 bufferlist bl;
1084 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
1085 ASSERT_EQ('h', bl[0]);
1086 }
1087
1088 // evict bar head (fail)
1089 ioctx.snap_set_read(librados::SNAP_HEAD);
1090 {
1091 ObjectReadOperation op;
1092 op.cache_evict();
1093 librados::AioCompletion *completion = cluster.aio_create_completion();
1094 ASSERT_EQ(0, ioctx.aio_operate(
1095 "bar", completion, &op,
1096 librados::OPERATION_IGNORE_CACHE, NULL));
1097 completion->wait_for_safe();
1098 ASSERT_EQ(-EBUSY, completion->get_return_value());
1099 completion->release();
1100 }
1101
1102 // evict bar snap
1103 ioctx.snap_set_read(my_snaps[0]);
1104 {
1105 ObjectReadOperation op;
1106 op.cache_evict();
1107 librados::AioCompletion *completion = cluster.aio_create_completion();
1108 ASSERT_EQ(0, ioctx.aio_operate(
1109 "bar", completion, &op,
1110 librados::OPERATION_IGNORE_CACHE, NULL));
1111 completion->wait_for_safe();
1112 ASSERT_EQ(0, completion->get_return_value());
1113 completion->release();
1114 }
1115 // ...and then head
1116 ioctx.snap_set_read(librados::SNAP_HEAD);
1117 {
1118 bufferlist bl;
1119 ObjectReadOperation op;
1120 op.read(1, 0, &bl, NULL);
1121 librados::AioCompletion *completion = cluster.aio_create_completion();
1122 ASSERT_EQ(0, ioctx.aio_operate(
1123 "bar", completion, &op,
1124 librados::OPERATION_IGNORE_CACHE, NULL));
1125 completion->wait_for_safe();
1126 ASSERT_EQ(0, completion->get_return_value());
1127 completion->release();
1128 }
1129 {
1130 ObjectReadOperation op;
1131 op.cache_evict();
1132 librados::AioCompletion *completion = cluster.aio_create_completion();
1133 ASSERT_EQ(0, ioctx.aio_operate(
1134 "bar", completion, &op,
1135 librados::OPERATION_IGNORE_CACHE, NULL));
1136 completion->wait_for_safe();
1137 ASSERT_EQ(0, completion->get_return_value());
1138 completion->release();
1139 }
1140
1141 // cleanup
1142 ioctx.selfmanaged_snap_remove(my_snaps[0]);
1143}
1144
1145// this test case reproduces http://tracker.ceph.com/issues/8629
1146TEST_F(LibRadosTwoPoolsPP, EvictSnap2) {
1147 // create object
1148 {
1149 bufferlist bl;
1150 bl.append("hi there");
1151 ObjectWriteOperation op;
1152 op.write_full(bl);
1153 ASSERT_EQ(0, ioctx.operate("foo", &op));
1154 }
1155 // create a snapshot, clone
1156 vector<uint64_t> my_snaps(1);
1157 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
1158 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
1159 my_snaps));
1160 {
1161 bufferlist bl;
1162 bl.append("ciao!");
1163 ObjectWriteOperation op;
1164 op.write_full(bl);
1165 ASSERT_EQ(0, ioctx.operate("foo", &op));
1166 }
1167 // configure cache
1168 bufferlist inbl;
1169 ASSERT_EQ(0, cluster.mon_command(
1170 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1171 "\", \"tierpool\": \"" + cache_pool_name +
1172 "\", \"force_nonempty\": \"--force-nonempty\" }",
1173 inbl, NULL, NULL));
1174 ASSERT_EQ(0, cluster.mon_command(
1175 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1176 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1177 inbl, NULL, NULL));
1178 ASSERT_EQ(0, cluster.mon_command(
1179 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1180 "\", \"mode\": \"writeback\"}",
1181 inbl, NULL, NULL));
1182
1183 // wait for maps to settle
1184 cluster.wait_for_latest_osdmap();
1185
1186 // read, trigger a promote on the head
1187 {
1188 bufferlist bl;
1189 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1190 ASSERT_EQ('c', bl[0]);
1191 }
1192
1193 // evict
1194 {
1195 ObjectReadOperation op;
1196 op.cache_evict();
1197 librados::AioCompletion *completion = cluster.aio_create_completion();
1198 ASSERT_EQ(0, cache_ioctx.aio_operate(
1199 "foo", completion, &op,
1200 librados::OPERATION_IGNORE_CACHE, NULL));
1201 completion->wait_for_safe();
1202 ASSERT_EQ(0, completion->get_return_value());
1203 completion->release();
1204 }
1205
1206 // verify the snapdir is not present in the cache pool
1207 {
1208 ObjectReadOperation op;
1209 librados::snap_set_t snapset;
1210 op.list_snaps(&snapset, NULL);
1211 ioctx.snap_set_read(librados::SNAP_DIR);
1212 librados::AioCompletion *completion = cluster.aio_create_completion();
1213 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op,
1214 librados::OPERATION_IGNORE_CACHE, NULL));
1215 completion->wait_for_safe();
1216 ASSERT_EQ(-ENOENT, completion->get_return_value());
1217 completion->release();
1218 }
1219}
1220
1221TEST_F(LibRadosTwoPoolsPP, TryFlush) {
1222 // configure cache
1223 bufferlist inbl;
1224 ASSERT_EQ(0, cluster.mon_command(
1225 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1226 "\", \"tierpool\": \"" + cache_pool_name +
1227 "\", \"force_nonempty\": \"--force-nonempty\" }",
1228 inbl, NULL, NULL));
1229 ASSERT_EQ(0, cluster.mon_command(
1230 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1231 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1232 inbl, NULL, NULL));
1233 ASSERT_EQ(0, cluster.mon_command(
1234 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1235 "\", \"mode\": \"writeback\"}",
1236 inbl, NULL, NULL));
1237
1238 // wait for maps to settle
1239 cluster.wait_for_latest_osdmap();
1240
1241 // create object
1242 {
1243 bufferlist bl;
1244 bl.append("hi there");
1245 ObjectWriteOperation op;
1246 op.write_full(bl);
1247 ASSERT_EQ(0, ioctx.operate("foo", &op));
1248 }
1249
1250 // verify the object is present in the cache tier
1251 {
1252 NObjectIterator it = cache_ioctx.nobjects_begin();
1253 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
1254 ASSERT_TRUE(it->get_oid() == string("foo"));
1255 ++it;
1256 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1257 }
1258
1259 // verify the object is NOT present in the base tier
1260 {
1261 NObjectIterator it = ioctx.nobjects_begin();
1262 ASSERT_TRUE(it == ioctx.nobjects_end());
1263 }
1264
1265 // verify dirty
1266 {
1267 bool dirty = false;
1268 int r = -1;
1269 ObjectReadOperation op;
1270 op.is_dirty(&dirty, &r);
1271 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
1272 ASSERT_TRUE(dirty);
1273 ASSERT_EQ(0, r);
1274 }
1275
1276 // pin
1277 {
1278 ObjectWriteOperation op;
1279 op.cache_pin();
1280 librados::AioCompletion *completion = cluster.aio_create_completion();
1281 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
1282 completion->wait_for_safe();
1283 ASSERT_EQ(0, completion->get_return_value());
1284 completion->release();
1285 }
1286
1287 // flush the pinned object with -EPERM
1288 {
1289 ObjectReadOperation op;
1290 op.cache_try_flush();
1291 librados::AioCompletion *completion = cluster.aio_create_completion();
1292 ASSERT_EQ(0, cache_ioctx.aio_operate(
1293 "foo", completion, &op,
1294 librados::OPERATION_IGNORE_OVERLAY |
1295 librados::OPERATION_SKIPRWLOCKS, NULL));
1296 completion->wait_for_safe();
1297 ASSERT_EQ(-EPERM, completion->get_return_value());
1298 completion->release();
1299 }
1300
1301 // unpin
1302 {
1303 ObjectWriteOperation op;
1304 op.cache_unpin();
1305 librados::AioCompletion *completion = cluster.aio_create_completion();
1306 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
1307 completion->wait_for_safe();
1308 ASSERT_EQ(0, completion->get_return_value());
1309 completion->release();
1310 }
1311
1312 // flush
1313 {
1314 ObjectReadOperation op;
1315 op.cache_try_flush();
1316 librados::AioCompletion *completion = cluster.aio_create_completion();
1317 ASSERT_EQ(0, cache_ioctx.aio_operate(
1318 "foo", completion, &op,
1319 librados::OPERATION_IGNORE_OVERLAY |
1320 librados::OPERATION_SKIPRWLOCKS, NULL));
1321 completion->wait_for_safe();
1322 ASSERT_EQ(0, completion->get_return_value());
1323 completion->release();
1324 }
1325
1326 // verify clean
1327 {
1328 bool dirty = false;
1329 int r = -1;
1330 ObjectReadOperation op;
1331 op.is_dirty(&dirty, &r);
1332 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
1333 ASSERT_FALSE(dirty);
1334 ASSERT_EQ(0, r);
1335 }
1336
1337 // verify in base tier
1338 {
1339 NObjectIterator it = ioctx.nobjects_begin();
1340 ASSERT_TRUE(it != ioctx.nobjects_end());
1341 ASSERT_TRUE(it->get_oid() == string("foo"));
1342 ++it;
1343 ASSERT_TRUE(it == ioctx.nobjects_end());
1344 }
1345
1346 // evict it
1347 {
1348 ObjectReadOperation op;
1349 op.cache_evict();
1350 librados::AioCompletion *completion = cluster.aio_create_completion();
1351 ASSERT_EQ(0, cache_ioctx.aio_operate(
1352 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
1353 completion->wait_for_safe();
1354 ASSERT_EQ(0, completion->get_return_value());
1355 completion->release();
1356 }
1357
1358 // verify no longer in cache tier
1359 {
1360 NObjectIterator it = cache_ioctx.nobjects_begin();
1361 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1362 }
1363}
1364
1365TEST_F(LibRadosTwoPoolsPP, Flush) {
1366 // configure cache
1367 bufferlist inbl;
1368 ASSERT_EQ(0, cluster.mon_command(
1369 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1370 "\", \"tierpool\": \"" + cache_pool_name +
1371 "\", \"force_nonempty\": \"--force-nonempty\" }",
1372 inbl, NULL, NULL));
1373 ASSERT_EQ(0, cluster.mon_command(
1374 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1375 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1376 inbl, NULL, NULL));
1377 ASSERT_EQ(0, cluster.mon_command(
1378 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1379 "\", \"mode\": \"writeback\"}",
1380 inbl, NULL, NULL));
1381
1382 // wait for maps to settle
1383 cluster.wait_for_latest_osdmap();
1384
1385 uint64_t user_version = 0;
1386
1387 // create object
1388 {
1389 bufferlist bl;
1390 bl.append("hi there");
1391 ObjectWriteOperation op;
1392 op.write_full(bl);
1393 ASSERT_EQ(0, ioctx.operate("foo", &op));
1394 }
1395
1396 // verify the object is present in the cache tier
1397 {
1398 NObjectIterator it = cache_ioctx.nobjects_begin();
1399 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
1400 ASSERT_TRUE(it->get_oid() == string("foo"));
1401 ++it;
1402 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1403 }
1404
1405 // verify the object is NOT present in the base tier
1406 {
1407 NObjectIterator it = ioctx.nobjects_begin();
1408 ASSERT_TRUE(it == ioctx.nobjects_end());
1409 }
1410
1411 // verify dirty
1412 {
1413 bool dirty = false;
1414 int r = -1;
1415 ObjectReadOperation op;
1416 op.is_dirty(&dirty, &r);
1417 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
1418 ASSERT_TRUE(dirty);
1419 ASSERT_EQ(0, r);
1420 user_version = cache_ioctx.get_last_version();
1421 }
1422
1423 // pin
1424 {
1425 ObjectWriteOperation op;
1426 op.cache_pin();
1427 librados::AioCompletion *completion = cluster.aio_create_completion();
1428 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
1429 completion->wait_for_safe();
1430 ASSERT_EQ(0, completion->get_return_value());
1431 completion->release();
1432 }
1433
1434 // flush the pinned object with -EPERM
1435 {
1436 ObjectReadOperation op;
1437 op.cache_try_flush();
1438 librados::AioCompletion *completion = cluster.aio_create_completion();
1439 ASSERT_EQ(0, cache_ioctx.aio_operate(
1440 "foo", completion, &op,
1441 librados::OPERATION_IGNORE_OVERLAY |
1442 librados::OPERATION_SKIPRWLOCKS, NULL));
1443 completion->wait_for_safe();
1444 ASSERT_EQ(-EPERM, completion->get_return_value());
1445 completion->release();
1446 }
1447
1448 // unpin
1449 {
1450 ObjectWriteOperation op;
1451 op.cache_unpin();
1452 librados::AioCompletion *completion = cluster.aio_create_completion();
1453 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
1454 completion->wait_for_safe();
1455 ASSERT_EQ(0, completion->get_return_value());
1456 completion->release();
1457 }
1458
1459 // flush
1460 {
1461 ObjectReadOperation op;
1462 op.cache_flush();
1463 librados::AioCompletion *completion = cluster.aio_create_completion();
1464 ASSERT_EQ(0, cache_ioctx.aio_operate(
1465 "foo", completion, &op,
1466 librados::OPERATION_IGNORE_OVERLAY, NULL));
1467 completion->wait_for_safe();
1468 ASSERT_EQ(0, completion->get_return_value());
1469 completion->release();
1470 }
1471
1472 // verify clean
1473 {
1474 bool dirty = false;
1475 int r = -1;
1476 ObjectReadOperation op;
1477 op.is_dirty(&dirty, &r);
1478 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
1479 ASSERT_FALSE(dirty);
1480 ASSERT_EQ(0, r);
1481 }
1482
1483 // verify in base tier
1484 {
1485 NObjectIterator it = ioctx.nobjects_begin();
1486 ASSERT_TRUE(it != ioctx.nobjects_end());
1487 ASSERT_TRUE(it->get_oid() == string("foo"));
1488 ++it;
1489 ASSERT_TRUE(it == ioctx.nobjects_end());
1490 }
1491
1492 // evict it
1493 {
1494 ObjectReadOperation op;
1495 op.cache_evict();
1496 librados::AioCompletion *completion = cluster.aio_create_completion();
1497 ASSERT_EQ(0, cache_ioctx.aio_operate(
1498 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
1499 completion->wait_for_safe();
1500 ASSERT_EQ(0, completion->get_return_value());
1501 completion->release();
1502 }
1503
1504 // verify no longer in cache tier
1505 {
1506 NObjectIterator it = cache_ioctx.nobjects_begin();
1507 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1508 }
1509
1510 // read it again and verify the version is consistent
1511 {
1512 bufferlist bl;
1513 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
1514 ASSERT_EQ(user_version, cache_ioctx.get_last_version());
1515 }
1516
1517 // erase it
1518 {
1519 ObjectWriteOperation op;
1520 op.remove();
1521 ASSERT_EQ(0, ioctx.operate("foo", &op));
1522 }
1523
1524 // flush whiteout
1525 {
1526 ObjectReadOperation op;
1527 op.cache_flush();
1528 librados::AioCompletion *completion = cluster.aio_create_completion();
1529 ASSERT_EQ(0, cache_ioctx.aio_operate(
1530 "foo", completion, &op,
1531 librados::OPERATION_IGNORE_OVERLAY, NULL));
1532 completion->wait_for_safe();
1533 ASSERT_EQ(0, completion->get_return_value());
1534 completion->release();
1535 }
1536
1537 // evict
1538 {
1539 ObjectReadOperation op;
1540 op.cache_evict();
1541 librados::AioCompletion *completion = cluster.aio_create_completion();
1542 ASSERT_EQ(0, cache_ioctx.aio_operate(
1543 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
1544 completion->wait_for_safe();
1545 ASSERT_EQ(0, completion->get_return_value());
1546 completion->release();
1547 }
1548
1549 // verify no longer in cache tier
1550 {
1551 NObjectIterator it = cache_ioctx.nobjects_begin();
1552 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1553 }
1554 // or base tier
1555 {
1556 NObjectIterator it = ioctx.nobjects_begin();
1557 ASSERT_TRUE(it == ioctx.nobjects_end());
1558 }
1559}
1560
1561TEST_F(LibRadosTwoPoolsPP, FlushSnap) {
1562 // configure cache
1563 bufferlist inbl;
1564 ASSERT_EQ(0, cluster.mon_command(
1565 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1566 "\", \"tierpool\": \"" + cache_pool_name +
1567 "\", \"force_nonempty\": \"--force-nonempty\" }",
1568 inbl, NULL, NULL));
1569 ASSERT_EQ(0, cluster.mon_command(
1570 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1571 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1572 inbl, NULL, NULL));
1573 ASSERT_EQ(0, cluster.mon_command(
1574 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1575 "\", \"mode\": \"writeback\"}",
1576 inbl, NULL, NULL));
1577
1578 // wait for maps to settle
1579 cluster.wait_for_latest_osdmap();
1580
1581 // create object
1582 {
1583 bufferlist bl;
1584 bl.append("a");
1585 ObjectWriteOperation op;
1586 op.write_full(bl);
1587 ASSERT_EQ(0, ioctx.operate("foo", &op));
1588 }
1589
1590 // create a snapshot, clone
1591 vector<uint64_t> my_snaps(1);
1592 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
1593 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
1594 my_snaps));
1595 {
1596 bufferlist bl;
1597 bl.append("b");
1598 ObjectWriteOperation op;
1599 op.write_full(bl);
1600 ASSERT_EQ(0, ioctx.operate("foo", &op));
1601 }
1602
1603 // and another
1604 my_snaps.resize(2);
1605 my_snaps[1] = my_snaps[0];
1606 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
1607 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
1608 my_snaps));
1609 {
1610 bufferlist bl;
1611 bl.append("c");
1612 ObjectWriteOperation op;
1613 op.write_full(bl);
1614 ASSERT_EQ(0, ioctx.operate("foo", &op));
1615 }
1616
1617 // verify the object is present in the cache tier
1618 {
1619 NObjectIterator it = cache_ioctx.nobjects_begin();
1620 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
1621 ASSERT_TRUE(it->get_oid() == string("foo"));
1622 ++it;
1623 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1624 }
1625
1626 // verify the object is NOT present in the base tier
1627 {
1628 NObjectIterator it = ioctx.nobjects_begin();
1629 ASSERT_TRUE(it == ioctx.nobjects_end());
1630 }
1631
1632 // flush on head (should fail)
1633 ioctx.snap_set_read(librados::SNAP_HEAD);
1634 {
1635 ObjectReadOperation op;
1636 op.cache_flush();
1637 librados::AioCompletion *completion = cluster.aio_create_completion();
1638 ASSERT_EQ(0, ioctx.aio_operate(
1639 "foo", completion, &op,
1640 librados::OPERATION_IGNORE_CACHE, NULL));
1641 completion->wait_for_safe();
1642 ASSERT_EQ(-EBUSY, completion->get_return_value());
1643 completion->release();
1644 }
1645 // flush on recent snap (should fail)
1646 ioctx.snap_set_read(my_snaps[0]);
1647 {
1648 ObjectReadOperation op;
1649 op.cache_flush();
1650 librados::AioCompletion *completion = cluster.aio_create_completion();
1651 ASSERT_EQ(0, ioctx.aio_operate(
1652 "foo", completion, &op,
1653 librados::OPERATION_IGNORE_CACHE, NULL));
1654 completion->wait_for_safe();
1655 ASSERT_EQ(-EBUSY, completion->get_return_value());
1656 completion->release();
1657 }
1658 // flush on oldest snap
1659 ioctx.snap_set_read(my_snaps[1]);
1660 {
1661 ObjectReadOperation op;
1662 op.cache_flush();
1663 librados::AioCompletion *completion = cluster.aio_create_completion();
1664 ASSERT_EQ(0, ioctx.aio_operate(
1665 "foo", completion, &op,
1666 librados::OPERATION_IGNORE_CACHE, NULL));
1667 completion->wait_for_safe();
1668 ASSERT_EQ(0, completion->get_return_value());
1669 completion->release();
1670 }
1671 // flush on next oldest snap
1672 ioctx.snap_set_read(my_snaps[0]);
1673 {
1674 ObjectReadOperation op;
1675 op.cache_flush();
1676 librados::AioCompletion *completion = cluster.aio_create_completion();
1677 ASSERT_EQ(0, ioctx.aio_operate(
1678 "foo", completion, &op,
1679 librados::OPERATION_IGNORE_CACHE, NULL));
1680 completion->wait_for_safe();
1681 ASSERT_EQ(0, completion->get_return_value());
1682 completion->release();
1683 }
1684 // flush on head
1685 ioctx.snap_set_read(librados::SNAP_HEAD);
1686 {
1687 ObjectReadOperation op;
1688 op.cache_flush();
1689 librados::AioCompletion *completion = cluster.aio_create_completion();
1690 ASSERT_EQ(0, ioctx.aio_operate(
1691 "foo", completion, &op,
1692 librados::OPERATION_IGNORE_CACHE, NULL));
1693 completion->wait_for_safe();
1694 ASSERT_EQ(0, completion->get_return_value());
1695 completion->release();
1696 }
1697
1698 // verify i can read the snaps from the cache pool
1699 ioctx.snap_set_read(librados::SNAP_HEAD);
1700 {
1701 bufferlist bl;
1702 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1703 ASSERT_EQ('c', bl[0]);
1704 }
1705 ioctx.snap_set_read(my_snaps[0]);
1706 {
1707 bufferlist bl;
1708 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1709 ASSERT_EQ('b', bl[0]);
1710 }
1711 ioctx.snap_set_read(my_snaps[1]);
1712 {
1713 bufferlist bl;
1714 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1715 ASSERT_EQ('a', bl[0]);
1716 }
1717
1718 // remove overlay
1719 ASSERT_EQ(0, cluster.mon_command(
1720 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
1721 "\"}",
1722 inbl, NULL, NULL));
1723
1724 // wait for maps to settle
1725 cluster.wait_for_latest_osdmap();
1726
1727 // verify i can read the snaps from the base pool
1728 ioctx.snap_set_read(librados::SNAP_HEAD);
1729 {
1730 bufferlist bl;
1731 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1732 ASSERT_EQ('c', bl[0]);
1733 }
1734 ioctx.snap_set_read(my_snaps[0]);
1735 {
1736 bufferlist bl;
1737 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1738 ASSERT_EQ('b', bl[0]);
1739 }
1740 ioctx.snap_set_read(my_snaps[1]);
1741 {
1742 bufferlist bl;
1743 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1744 ASSERT_EQ('a', bl[0]);
1745 }
1746
1747 ASSERT_EQ(0, cluster.mon_command(
1748 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1749 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1750 inbl, NULL, NULL));
1751
1752 // cleanup
1753 ioctx.selfmanaged_snap_remove(my_snaps[0]);
1754}
1755
1756TEST_F(LibRadosTierPP, FlushWriteRaces) {
1757 Rados cluster;
1758 std::string pool_name = get_temp_pool_name();
1759 std::string cache_pool_name = pool_name + "-cache";
1760 ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
1761 ASSERT_EQ(0, cluster.pool_create(cache_pool_name.c_str()));
1762 IoCtx cache_ioctx;
1763 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
1764 IoCtx ioctx;
1765 ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), ioctx));
1766
1767 // configure cache
1768 bufferlist inbl;
1769 ASSERT_EQ(0, cluster.mon_command(
1770 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1771 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
1772 inbl, NULL, NULL));
1773 ASSERT_EQ(0, cluster.mon_command(
1774 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1775 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1776 inbl, NULL, NULL));
1777 ASSERT_EQ(0, cluster.mon_command(
1778 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1779 "\", \"mode\": \"writeback\"}",
1780 inbl, NULL, NULL));
1781
1782 // wait for maps to settle
1783 cluster.wait_for_latest_osdmap();
1784
1785 // create/dirty object
1786 bufferlist bl;
1787 bl.append("hi there");
1788 {
1789 ObjectWriteOperation op;
1790 op.write_full(bl);
1791 ASSERT_EQ(0, ioctx.operate("foo", &op));
1792 }
1793
1794 // flush + write
1795 {
1796 ObjectReadOperation op;
1797 op.cache_flush();
1798 librados::AioCompletion *completion = cluster.aio_create_completion();
1799 ASSERT_EQ(0, cache_ioctx.aio_operate(
1800 "foo", completion, &op,
1801 librados::OPERATION_IGNORE_OVERLAY, NULL));
1802
1803 ObjectWriteOperation op2;
1804 op2.write_full(bl);
1805 librados::AioCompletion *completion2 = cluster.aio_create_completion();
1806 ASSERT_EQ(0, ioctx.aio_operate(
1807 "foo", completion2, &op2, 0));
1808
1809 completion->wait_for_safe();
1810 completion2->wait_for_safe();
1811 ASSERT_EQ(0, completion->get_return_value());
1812 ASSERT_EQ(0, completion2->get_return_value());
1813 completion->release();
1814 completion2->release();
1815 }
1816
1817 int tries = 1000;
1818 do {
1819 // create/dirty object
1820 {
1821 bufferlist bl;
1822 bl.append("hi there");
1823 ObjectWriteOperation op;
1824 op.write_full(bl);
1825 ASSERT_EQ(0, ioctx.operate("foo", &op));
1826 }
1827
1828 // try-flush + write
1829 {
1830 ObjectReadOperation op;
1831 op.cache_try_flush();
1832 librados::AioCompletion *completion = cluster.aio_create_completion();
1833 ASSERT_EQ(0, cache_ioctx.aio_operate(
1834 "foo", completion, &op,
1835 librados::OPERATION_IGNORE_OVERLAY |
1836 librados::OPERATION_SKIPRWLOCKS, NULL));
1837
1838 ObjectWriteOperation op2;
1839 op2.write_full(bl);
1840 librados::AioCompletion *completion2 = cluster.aio_create_completion();
1841 ASSERT_EQ(0, ioctx.aio_operate("foo", completion2, &op2, 0));
1842
1843 completion->wait_for_safe();
1844 completion2->wait_for_safe();
1845 int r = completion->get_return_value();
1846 ASSERT_TRUE(r == -EBUSY || r == 0);
1847 ASSERT_EQ(0, completion2->get_return_value());
1848 completion->release();
1849 completion2->release();
1850 if (r == -EBUSY)
1851 break;
1852 cout << "didn't get EBUSY, trying again" << std::endl;
1853 }
1854 ASSERT_TRUE(--tries);
1855 } while (true);
1856
1857 // tear down tiers
1858 ASSERT_EQ(0, cluster.mon_command(
1859 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
1860 "\"}",
1861 inbl, NULL, NULL));
1862 ASSERT_EQ(0, cluster.mon_command(
1863 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
1864 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
1865 inbl, NULL, NULL));
1866
1867 // wait for maps to settle before next test
1868 cluster.wait_for_latest_osdmap();
1869
1870 ASSERT_EQ(0, cluster.pool_delete(cache_pool_name.c_str()));
1871 ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
1872}
1873
1874TEST_F(LibRadosTwoPoolsPP, FlushTryFlushRaces) {
1875 // configure cache
1876 bufferlist inbl;
1877 ASSERT_EQ(0, cluster.mon_command(
1878 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1879 "\", \"tierpool\": \"" + cache_pool_name +
1880 "\", \"force_nonempty\": \"--force-nonempty\" }",
1881 inbl, NULL, NULL));
1882 ASSERT_EQ(0, cluster.mon_command(
1883 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1884 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1885 inbl, NULL, NULL));
1886 ASSERT_EQ(0, cluster.mon_command(
1887 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1888 "\", \"mode\": \"writeback\"}",
1889 inbl, NULL, NULL));
1890
1891 // wait for maps to settle
1892 cluster.wait_for_latest_osdmap();
1893
1894 // create/dirty object
1895 {
1896 bufferlist bl;
1897 bl.append("hi there");
1898 ObjectWriteOperation op;
1899 op.write_full(bl);
1900 ASSERT_EQ(0, ioctx.operate("foo", &op));
1901 }
1902
1903 // flush + flush
1904 {
1905 ObjectReadOperation op;
1906 op.cache_flush();
1907 librados::AioCompletion *completion = cluster.aio_create_completion();
1908 ASSERT_EQ(0, cache_ioctx.aio_operate(
1909 "foo", completion, &op,
1910 librados::OPERATION_IGNORE_OVERLAY, NULL));
1911
1912 ObjectReadOperation op2;
1913 op2.cache_flush();
1914 librados::AioCompletion *completion2 = cluster.aio_create_completion();
1915 ASSERT_EQ(0, cache_ioctx.aio_operate(
1916 "foo", completion2, &op2,
1917 librados::OPERATION_IGNORE_OVERLAY, NULL));
1918
1919 completion->wait_for_safe();
1920 completion2->wait_for_safe();
1921 ASSERT_EQ(0, completion->get_return_value());
1922 ASSERT_EQ(0, completion2->get_return_value());
1923 completion->release();
1924 completion2->release();
1925 }
1926
1927 // create/dirty object
1928 {
1929 bufferlist bl;
1930 bl.append("hi there");
1931 ObjectWriteOperation op;
1932 op.write_full(bl);
1933 ASSERT_EQ(0, ioctx.operate("foo", &op));
1934 }
1935
1936 // flush + try-flush
1937 {
1938 ObjectReadOperation op;
1939 op.cache_flush();
1940 librados::AioCompletion *completion = cluster.aio_create_completion();
1941 ASSERT_EQ(0, cache_ioctx.aio_operate(
1942 "foo", completion, &op,
1943 librados::OPERATION_IGNORE_OVERLAY, NULL));
1944
1945 ObjectReadOperation op2;
1946 op2.cache_try_flush();
1947 librados::AioCompletion *completion2 = cluster.aio_create_completion();
1948 ASSERT_EQ(0, cache_ioctx.aio_operate(
1949 "foo", completion2, &op2,
1950 librados::OPERATION_IGNORE_OVERLAY |
1951 librados::OPERATION_SKIPRWLOCKS, NULL));
1952
1953 completion->wait_for_safe();
1954 completion2->wait_for_safe();
1955 ASSERT_EQ(0, completion->get_return_value());
1956 ASSERT_EQ(0, completion2->get_return_value());
1957 completion->release();
1958 completion2->release();
1959 }
1960
1961 // create/dirty object
1962 int tries = 1000;
1963 do {
1964 {
1965 bufferlist bl;
1966 bl.append("hi there");
1967 ObjectWriteOperation op;
1968 op.write_full(bl);
1969 ASSERT_EQ(0, ioctx.operate("foo", &op));
1970 }
1971
1972 // try-flush + flush
1973 // (flush will not piggyback on try-flush)
1974 {
1975 ObjectReadOperation op;
1976 op.cache_try_flush();
1977 librados::AioCompletion *completion = cluster.aio_create_completion();
1978 ASSERT_EQ(0, cache_ioctx.aio_operate(
1979 "foo", completion, &op,
1980 librados::OPERATION_IGNORE_OVERLAY |
1981 librados::OPERATION_SKIPRWLOCKS, NULL));
1982
1983 ObjectReadOperation op2;
1984 op2.cache_flush();
1985 librados::AioCompletion *completion2 = cluster.aio_create_completion();
1986 ASSERT_EQ(0, cache_ioctx.aio_operate(
1987 "foo", completion2, &op2,
1988 librados::OPERATION_IGNORE_OVERLAY, NULL));
1989
1990 completion->wait_for_safe();
1991 completion2->wait_for_safe();
1992 int r = completion->get_return_value();
1993 ASSERT_TRUE(r == -EBUSY || r == 0);
1994 ASSERT_EQ(0, completion2->get_return_value());
1995 completion->release();
1996 completion2->release();
1997 if (r == -EBUSY)
1998 break;
1999 cout << "didn't get EBUSY, trying again" << std::endl;
2000 }
2001 ASSERT_TRUE(--tries);
2002 } while (true);
2003
2004 // create/dirty object
2005 {
2006 bufferlist bl;
2007 bl.append("hi there");
2008 ObjectWriteOperation op;
2009 op.write_full(bl);
2010 ASSERT_EQ(0, ioctx.operate("foo", &op));
2011 }
2012
2013 // try-flush + try-flush
2014 {
2015 ObjectReadOperation op;
2016 op.cache_try_flush();
2017 librados::AioCompletion *completion = cluster.aio_create_completion();
2018 ASSERT_EQ(0, cache_ioctx.aio_operate(
2019 "foo", completion, &op,
2020 librados::OPERATION_IGNORE_OVERLAY |
2021 librados::OPERATION_SKIPRWLOCKS, NULL));
2022
2023 ObjectReadOperation op2;
2024 op2.cache_try_flush();
2025 librados::AioCompletion *completion2 = cluster.aio_create_completion();
2026 ASSERT_EQ(0, cache_ioctx.aio_operate(
2027 "foo", completion2, &op2,
2028 librados::OPERATION_IGNORE_OVERLAY |
2029 librados::OPERATION_SKIPRWLOCKS, NULL));
2030
2031 completion->wait_for_safe();
2032 completion2->wait_for_safe();
2033 ASSERT_EQ(0, completion->get_return_value());
2034 ASSERT_EQ(0, completion2->get_return_value());
2035 completion->release();
2036 completion2->release();
2037 }
2038}
2039
2040
2041IoCtx *read_ioctx = 0;
2042Mutex test_lock("FlushReadRaces::lock");
2043Cond cond;
2044int max_reads = 100;
2045int num_reads = 0; // in progress
2046
2047void flush_read_race_cb(completion_t cb, void *arg);
2048
2049void start_flush_read()
2050{
2051 //cout << " starting read" << std::endl;
2052 ObjectReadOperation op;
2053 op.stat(NULL, NULL, NULL);
2054 librados::AioCompletion *completion = completions.getCompletion();
2055 completion->set_complete_callback(0, flush_read_race_cb);
2056 read_ioctx->aio_operate("foo", completion, &op, NULL);
2057}
2058
2059void flush_read_race_cb(completion_t cb, void *arg)
2060{
2061 //cout << " finished read" << std::endl;
2062 test_lock.Lock();
2063 if (num_reads > max_reads) {
2064 num_reads--;
2065 cond.Signal();
2066 } else {
2067 start_flush_read();
2068 }
2069 test_lock.Unlock();
2070}
2071
2072TEST_F(LibRadosTwoPoolsPP, TryFlushReadRace) {
2073 // configure cache
2074 bufferlist inbl;
2075 ASSERT_EQ(0, cluster.mon_command(
2076 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2077 "\", \"tierpool\": \"" + cache_pool_name +
2078 "\", \"force_nonempty\": \"--force-nonempty\" }",
2079 inbl, NULL, NULL));
2080 ASSERT_EQ(0, cluster.mon_command(
2081 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2082 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2083 inbl, NULL, NULL));
2084 ASSERT_EQ(0, cluster.mon_command(
2085 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
2086 "\", \"mode\": \"writeback\"}",
2087 inbl, NULL, NULL));
2088
2089 // wait for maps to settle
2090 cluster.wait_for_latest_osdmap();
2091
2092 // create/dirty object
2093 {
2094 bufferlist bl;
2095 bl.append("hi there");
2096 bufferptr bp(4000000); // make it big!
2097 bp.zero();
2098 bl.append(bp);
2099 ObjectWriteOperation op;
2100 op.write_full(bl);
2101 ASSERT_EQ(0, ioctx.operate("foo", &op));
2102 }
2103
2104 // start a continuous stream of reads
2105 read_ioctx = &ioctx;
2106 test_lock.Lock();
2107 for (int i = 0; i < max_reads; ++i) {
2108 start_flush_read();
2109 num_reads++;
2110 }
2111 test_lock.Unlock();
2112
2113 // try-flush
2114 ObjectReadOperation op;
2115 op.cache_try_flush();
2116 librados::AioCompletion *completion = cluster.aio_create_completion();
2117 ASSERT_EQ(0, cache_ioctx.aio_operate(
2118 "foo", completion, &op,
2119 librados::OPERATION_IGNORE_OVERLAY |
2120 librados::OPERATION_SKIPRWLOCKS, NULL));
2121
2122 completion->wait_for_safe();
2123 ASSERT_EQ(0, completion->get_return_value());
2124 completion->release();
2125
2126 // stop reads
2127 test_lock.Lock();
2128 max_reads = 0;
2129 while (num_reads > 0)
2130 cond.Wait(test_lock);
2131 test_lock.Unlock();
2132}
2133
2134TEST_F(LibRadosTierPP, HitSetNone) {
2135 {
2136 list< pair<time_t,time_t> > ls;
2137 AioCompletion *c = librados::Rados::aio_create_completion();
2138 ASSERT_EQ(0, ioctx.hit_set_list(123, c, &ls));
2139 c->wait_for_complete();
2140 ASSERT_EQ(0, c->get_return_value());
2141 ASSERT_TRUE(ls.empty());
2142 c->release();
2143 }
2144 {
2145 bufferlist bl;
2146 AioCompletion *c = librados::Rados::aio_create_completion();
2147 ASSERT_EQ(0, ioctx.hit_set_get(123, c, 12345, &bl));
2148 c->wait_for_complete();
2149 ASSERT_EQ(-ENOENT, c->get_return_value());
2150 c->release();
2151 }
2152}
2153
2154string set_pool_str(string pool, string var, string val)
2155{
2156 return string("{\"prefix\": \"osd pool set\",\"pool\":\"") + pool
2157 + string("\",\"var\": \"") + var + string("\",\"val\": \"")
2158 + val + string("\"}");
2159}
2160
2161string set_pool_str(string pool, string var, int val)
2162{
2163 return string("{\"prefix\": \"osd pool set\",\"pool\":\"") + pool
2164 + string("\",\"var\": \"") + var + string("\",\"val\": \"")
2165 + stringify(val) + string("\"}");
2166}
2167
2168TEST_F(LibRadosTwoPoolsPP, HitSetRead) {
2169 // make it a tier
2170 bufferlist inbl;
2171 ASSERT_EQ(0, cluster.mon_command(
2172 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2173 "\", \"tierpool\": \"" + cache_pool_name +
2174 "\", \"force_nonempty\": \"--force-nonempty\" }",
2175 inbl, NULL, NULL));
2176
2177 // enable hitset tracking for this pool
2178 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_count", 2),
2179 inbl, NULL, NULL));
2180 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_period", 600),
2181 inbl, NULL, NULL));
2182 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_type",
2183 "explicit_object"),
2184 inbl, NULL, NULL));
2185
2186 // wait for maps to settle
2187 cluster.wait_for_latest_osdmap();
2188
2189 cache_ioctx.set_namespace("");
2190
2191 // keep reading until we see our object appear in the HitSet
2192 utime_t start = ceph_clock_now();
2193 utime_t hard_stop = start + utime_t(600, 0);
2194
2195 while (true) {
2196 utime_t now = ceph_clock_now();
2197 ASSERT_TRUE(now < hard_stop);
2198
2199 string name = "foo";
2200 uint32_t hash;
2201 ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(name, &hash));
2202 hobject_t oid(sobject_t(name, CEPH_NOSNAP), "", hash,
2203 cluster.pool_lookup(cache_pool_name.c_str()), "");
2204
2205 bufferlist bl;
2206 ASSERT_EQ(-ENOENT, cache_ioctx.read("foo", bl, 1, 0));
2207
2208 bufferlist hbl;
2209 AioCompletion *c = librados::Rados::aio_create_completion();
2210 ASSERT_EQ(0, cache_ioctx.hit_set_get(hash, c, now.sec(), &hbl));
2211 c->wait_for_complete();
2212 c->release();
2213
2214 if (hbl.length()) {
2215 bufferlist::iterator p = hbl.begin();
2216 HitSet hs;
2217 ::decode(hs, p);
2218 if (hs.contains(oid)) {
2219 cout << "ok, hit_set contains " << oid << std::endl;
2220 break;
2221 }
2222 cout << "hmm, not in HitSet yet" << std::endl;
2223 } else {
2224 cout << "hmm, no HitSet yet" << std::endl;
2225 }
2226
2227 sleep(1);
2228 }
2229}
2230
2231static int _get_pg_num(Rados& cluster, string pool_name)
2232{
2233 bufferlist inbl;
2234 string cmd = string("{\"prefix\": \"osd pool get\",\"pool\":\"")
2235 + pool_name
2236 + string("\",\"var\": \"pg_num\",\"format\": \"json\"}");
2237 bufferlist outbl;
2238 int r = cluster.mon_command(cmd, inbl, &outbl, NULL);
2239 assert(r >= 0);
2240 string outstr(outbl.c_str(), outbl.length());
2241 json_spirit::Value v;
2242 if (!json_spirit::read(outstr, v)) {
2243 cerr <<" unable to parse json " << outstr << std::endl;
2244 return -1;
2245 }
2246
2247 json_spirit::Object& o = v.get_obj();
2248 for (json_spirit::Object::size_type i=0; i<o.size(); i++) {
2249 json_spirit::Pair& p = o[i];
2250 if (p.name_ == "pg_num") {
2251 cout << "pg_num = " << p.value_.get_int() << std::endl;
2252 return p.value_.get_int();
2253 }
2254 }
2255 cerr << "didn't find pg_num in " << outstr << std::endl;
2256 return -1;
2257}
2258
2259
2260TEST_F(LibRadosTwoPoolsPP, HitSetWrite) {
2261 int num_pg = _get_pg_num(cluster, pool_name);
2262 assert(num_pg > 0);
2263
2264 // make it a tier
2265 bufferlist inbl;
2266 ASSERT_EQ(0, cluster.mon_command(
2267 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2268 "\", \"tierpool\": \"" + cache_pool_name +
2269 "\", \"force_nonempty\": \"--force-nonempty\" }",
2270 inbl, NULL, NULL));
2271
2272 // enable hitset tracking for this pool
2273 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_count", 8),
2274 inbl, NULL, NULL));
2275 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_period", 600),
2276 inbl, NULL, NULL));
2277 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_type",
2278 "explicit_hash"),
2279 inbl, NULL, NULL));
2280
2281 // wait for maps to settle
2282 cluster.wait_for_latest_osdmap();
2283
2284 cache_ioctx.set_namespace("");
2285
2286 int num = 200;
2287
2288 // do a bunch of writes
2289 for (int i=0; i<num; ++i) {
2290 bufferlist bl;
2291 bl.append("a");
2292 ASSERT_EQ(0, cache_ioctx.write(stringify(i), bl, 1, 0));
2293 }
2294
2295 // get HitSets
2296 std::map<int,HitSet> hitsets;
2297 for (int i=0; i<num_pg; ++i) {
2298 list< pair<time_t,time_t> > ls;
2299 AioCompletion *c = librados::Rados::aio_create_completion();
2300 ASSERT_EQ(0, cache_ioctx.hit_set_list(i, c, &ls));
2301 c->wait_for_complete();
2302 c->release();
2303 std::cout << "pg " << i << " ls " << ls << std::endl;
2304 ASSERT_FALSE(ls.empty());
2305
2306 // get the latest
2307 c = librados::Rados::aio_create_completion();
2308 bufferlist bl;
2309 ASSERT_EQ(0, cache_ioctx.hit_set_get(i, c, ls.back().first, &bl));
2310 c->wait_for_complete();
2311 c->release();
2312
2313 try {
2314 bufferlist::iterator p = bl.begin();
2315 ::decode(hitsets[i], p);
2316 }
2317 catch (buffer::error& e) {
2318 std::cout << "failed to decode hit set; bl len is " << bl.length() << "\n";
2319 bl.hexdump(std::cout);
2320 std::cout << std::endl;
2321 throw e;
2322 }
2323
2324 // cope with racing splits by refreshing pg_num
2325 if (i == num_pg - 1)
2326 num_pg = _get_pg_num(cluster, cache_pool_name);
2327 }
2328
2329 for (int i=0; i<num; ++i) {
2330 string n = stringify(i);
2331 uint32_t hash;
2332 ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(n, &hash));
2333 hobject_t oid(sobject_t(n, CEPH_NOSNAP), "", hash,
2334 cluster.pool_lookup(cache_pool_name.c_str()), "");
2335 std::cout << "checking for " << oid << std::endl;
2336 bool found = false;
2337 for (int p=0; p<num_pg; ++p) {
2338 if (hitsets[p].contains(oid)) {
2339 found = true;
2340 break;
2341 }
2342 }
2343 ASSERT_TRUE(found);
2344 }
2345}
2346
2347TEST_F(LibRadosTwoPoolsPP, HitSetTrim) {
2348 unsigned count = 3;
2349 unsigned period = 3;
2350
2351 // make it a tier
2352 bufferlist inbl;
2353 ASSERT_EQ(0, cluster.mon_command(
2354 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2355 "\", \"tierpool\": \"" + cache_pool_name +
2356 "\", \"force_nonempty\": \"--force-nonempty\" }",
2357 inbl, NULL, NULL));
2358
2359 // enable hitset tracking for this pool
2360 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_count", count),
2361 inbl, NULL, NULL));
2362 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_period", period),
2363 inbl, NULL, NULL));
2364 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
2365 inbl, NULL, NULL));
2366 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_fpp", ".01"),
2367 inbl, NULL, NULL));
2368
2369 // wait for maps to settle
2370 cluster.wait_for_latest_osdmap();
2371
2372 cache_ioctx.set_namespace("");
2373
2374 // do a bunch of writes and make sure the hitsets rotate
2375 utime_t start = ceph_clock_now();
2376 utime_t hard_stop = start + utime_t(count * period * 50, 0);
2377
2378 time_t first = 0;
2379 while (true) {
2380 string name = "foo";
2381 uint32_t hash;
2382 ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(name, &hash));
2383 hobject_t oid(sobject_t(name, CEPH_NOSNAP), "", hash, -1, "");
2384
2385 bufferlist bl;
2386 bl.append("f");
2387 ASSERT_EQ(0, cache_ioctx.write("foo", bl, 1, 0));
2388
2389 list<pair<time_t, time_t> > ls;
2390 AioCompletion *c = librados::Rados::aio_create_completion();
2391 ASSERT_EQ(0, cache_ioctx.hit_set_list(hash, c, &ls));
2392 c->wait_for_complete();
2393 c->release();
2394
2395 cout << " got ls " << ls << std::endl;
2396 if (!ls.empty()) {
2397 if (!first) {
2398 first = ls.front().first;
2399 cout << "first is " << first << std::endl;
2400 } else {
2401 if (ls.front().first != first) {
2402 cout << "first now " << ls.front().first << ", trimmed" << std::endl;
2403 break;
2404 }
2405 }
2406 }
2407
2408 utime_t now = ceph_clock_now();
2409 ASSERT_TRUE(now < hard_stop);
2410
2411 sleep(1);
2412 }
2413}
2414
2415TEST_F(LibRadosTwoPoolsPP, PromoteOn2ndRead) {
2416 // create object
2417 for (int i=0; i<20; ++i) {
2418 bufferlist bl;
2419 bl.append("hi there");
2420 ObjectWriteOperation op;
2421 op.write_full(bl);
2422 ASSERT_EQ(0, ioctx.operate("foo" + stringify(i), &op));
2423 }
2424
2425 // configure cache
2426 bufferlist inbl;
2427 ASSERT_EQ(0, cluster.mon_command(
2428 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2429 "\", \"tierpool\": \"" + cache_pool_name +
2430 "\", \"force_nonempty\": \"--force-nonempty\" }",
2431 inbl, NULL, NULL));
2432 ASSERT_EQ(0, cluster.mon_command(
2433 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2434 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2435 inbl, NULL, NULL));
2436 ASSERT_EQ(0, cluster.mon_command(
2437 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
2438 "\", \"mode\": \"writeback\"}",
2439 inbl, NULL, NULL));
2440
2441 // enable hitset tracking for this pool
2442 ASSERT_EQ(0, cluster.mon_command(
2443 set_pool_str(cache_pool_name, "hit_set_count", 2),
2444 inbl, NULL, NULL));
2445 ASSERT_EQ(0, cluster.mon_command(
2446 set_pool_str(cache_pool_name, "hit_set_period", 600),
2447 inbl, NULL, NULL));
2448 ASSERT_EQ(0, cluster.mon_command(
2449 set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
2450 inbl, NULL, NULL));
2451 ASSERT_EQ(0, cluster.mon_command(
2452 set_pool_str(cache_pool_name, "min_read_recency_for_promote", 1),
2453 inbl, NULL, NULL));
2454 ASSERT_EQ(0, cluster.mon_command(
2455 set_pool_str(cache_pool_name, "hit_set_grade_decay_rate", 20),
2456 inbl, NULL, NULL));
2457 ASSERT_EQ(0, cluster.mon_command(
2458 set_pool_str(cache_pool_name, "hit_set_search_last_n", 1),
2459 inbl, NULL, NULL));
2460
2461 // wait for maps to settle
2462 cluster.wait_for_latest_osdmap();
2463
2464 int fake = 0; // set this to non-zero to test spurious promotion,
2465 // e.g. from thrashing
2466 int attempt = 0;
2467 string obj;
2468 while (true) {
2469 // 1st read, don't trigger a promote
2470 obj = "foo" + stringify(attempt);
2471 cout << obj << std::endl;
2472 {
2473 bufferlist bl;
2474 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
2475 if (--fake >= 0) {
2476 sleep(1);
2477 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
2478 sleep(1);
2479 }
2480 }
2481
2482 // verify the object is NOT present in the cache tier
2483 {
2484 bool found = false;
2485 NObjectIterator it = cache_ioctx.nobjects_begin();
2486 while (it != cache_ioctx.nobjects_end()) {
2487 cout << " see " << it->get_oid() << std::endl;
2488 if (it->get_oid() == string(obj.c_str())) {
2489 found = true;
2490 break;
2491 }
2492 ++it;
2493 }
2494 if (!found)
2495 break;
2496 }
2497
2498 ++attempt;
2499 ASSERT_LE(attempt, 20);
2500 cout << "hrm, object is present in cache on attempt " << attempt
2501 << ", retrying" << std::endl;
2502 }
2503
2504 // Read until the object is present in the cache tier
2505 cout << "verifying " << obj << " is eventually promoted" << std::endl;
2506 while (true) {
2507 bufferlist bl;
2508 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
2509
2510 bool there = false;
2511 NObjectIterator it = cache_ioctx.nobjects_begin();
2512 while (it != cache_ioctx.nobjects_end()) {
2513 if (it->get_oid() == string(obj.c_str())) {
2514 there = true;
2515 break;
2516 }
2517 ++it;
2518 }
2519 if (there)
2520 break;
2521
2522 sleep(1);
2523 }
2524
2525 // tear down tiers
2526 ASSERT_EQ(0, cluster.mon_command(
2527 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
2528 "\"}",
2529 inbl, NULL, NULL));
2530 ASSERT_EQ(0, cluster.mon_command(
2531 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
2532 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
2533 inbl, NULL, NULL));
2534
2535 // wait for maps to settle before next test
2536 cluster.wait_for_latest_osdmap();
2537}
2538
2539TEST_F(LibRadosTwoPoolsPP, ProxyRead) {
2540 // create object
2541 {
2542 bufferlist bl;
2543 bl.append("hi there");
2544 ObjectWriteOperation op;
2545 op.write_full(bl);
2546 ASSERT_EQ(0, ioctx.operate("foo", &op));
2547 }
2548
2549 // configure cache
2550 bufferlist inbl;
2551 ASSERT_EQ(0, cluster.mon_command(
2552 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2553 "\", \"tierpool\": \"" + cache_pool_name +
2554 "\", \"force_nonempty\": \"--force-nonempty\" }",
2555 inbl, NULL, NULL));
2556 ASSERT_EQ(0, cluster.mon_command(
2557 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2558 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2559 inbl, NULL, NULL));
2560 ASSERT_EQ(0, cluster.mon_command(
2561 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
2562 "\", \"mode\": \"readproxy\"}",
2563 inbl, NULL, NULL));
2564
2565 // wait for maps to settle
2566 cluster.wait_for_latest_osdmap();
2567
2568 // read and verify the object
2569 {
2570 bufferlist bl;
2571 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
2572 ASSERT_EQ('h', bl[0]);
2573 }
2574
2575 // Verify 10 times the object is NOT present in the cache tier
2576 uint32_t i = 0;
2577 while (i++ < 10) {
2578 NObjectIterator it = cache_ioctx.nobjects_begin();
2579 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
2580 sleep(1);
2581 }
2582
2583 // tear down tiers
2584 ASSERT_EQ(0, cluster.mon_command(
2585 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
2586 "\"}",
2587 inbl, NULL, NULL));
2588 ASSERT_EQ(0, cluster.mon_command(
2589 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
2590 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
2591 inbl, NULL, NULL));
2592
2593 // wait for maps to settle before next test
2594 cluster.wait_for_latest_osdmap();
2595}
2596
2597TEST_F(LibRadosTwoPoolsPP, CachePin) {
2598 // create object
2599 {
2600 bufferlist bl;
2601 bl.append("hi there");
2602 ObjectWriteOperation op;
2603 op.write_full(bl);
2604 ASSERT_EQ(0, ioctx.operate("foo", &op));
2605 }
2606 {
2607 bufferlist bl;
2608 bl.append("hi there");
2609 ObjectWriteOperation op;
2610 op.write_full(bl);
2611 ASSERT_EQ(0, ioctx.operate("bar", &op));
2612 }
2613 {
2614 bufferlist bl;
2615 bl.append("hi there");
2616 ObjectWriteOperation op;
2617 op.write_full(bl);
2618 ASSERT_EQ(0, ioctx.operate("baz", &op));
2619 }
2620 {
2621 bufferlist bl;
2622 bl.append("hi there");
2623 ObjectWriteOperation op;
2624 op.write_full(bl);
2625 ASSERT_EQ(0, ioctx.operate("bam", &op));
2626 }
2627
2628 // configure cache
2629 bufferlist inbl;
2630 ASSERT_EQ(0, cluster.mon_command(
2631 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2632 "\", \"tierpool\": \"" + cache_pool_name +
2633 "\", \"force_nonempty\": \"--force-nonempty\" }",
2634 inbl, NULL, NULL));
2635 ASSERT_EQ(0, cluster.mon_command(
2636 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2637 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2638 inbl, NULL, NULL));
2639 ASSERT_EQ(0, cluster.mon_command(
2640 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
2641 "\", \"mode\": \"writeback\"}",
2642 inbl, NULL, NULL));
2643
2644 // wait for maps to settle
2645 cluster.wait_for_latest_osdmap();
2646
2647 // read, trigger promote
2648 {
2649 bufferlist bl;
2650 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
2651 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
2652 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
2653 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
2654 }
2655
2656 // verify the objects are present in the cache tier
2657 {
2658 NObjectIterator it = cache_ioctx.nobjects_begin();
2659 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
2660 for (uint32_t i = 0; i < 4; i++) {
2661 ASSERT_TRUE(it->get_oid() == string("foo") ||
2662 it->get_oid() == string("bar") ||
2663 it->get_oid() == string("baz") ||
2664 it->get_oid() == string("bam"));
2665 ++it;
2666 }
2667 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
2668 }
2669
2670 // pin objects
2671 {
2672 ObjectWriteOperation op;
2673 op.cache_pin();
2674 librados::AioCompletion *completion = cluster.aio_create_completion();
2675 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
2676 completion->wait_for_safe();
2677 ASSERT_EQ(0, completion->get_return_value());
2678 completion->release();
2679 }
2680 {
2681 ObjectWriteOperation op;
2682 op.cache_pin();
2683 librados::AioCompletion *completion = cluster.aio_create_completion();
2684 ASSERT_EQ(0, cache_ioctx.aio_operate("baz", completion, &op));
2685 completion->wait_for_safe();
2686 ASSERT_EQ(0, completion->get_return_value());
2687 completion->release();
2688 }
2689
2690 // enable agent
2691 ASSERT_EQ(0, cluster.mon_command(
2692 set_pool_str(cache_pool_name, "hit_set_count", 2),
2693 inbl, NULL, NULL));
2694 ASSERT_EQ(0, cluster.mon_command(
2695 set_pool_str(cache_pool_name, "hit_set_period", 600),
2696 inbl, NULL, NULL));
2697 ASSERT_EQ(0, cluster.mon_command(
2698 set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
2699 inbl, NULL, NULL));
2700 ASSERT_EQ(0, cluster.mon_command(
2701 set_pool_str(cache_pool_name, "min_read_recency_for_promote", 1),
2702 inbl, NULL, NULL));
2703 ASSERT_EQ(0, cluster.mon_command(
2704 set_pool_str(cache_pool_name, "target_max_objects", 1),
2705 inbl, NULL, NULL));
2706
2707 sleep(10);
2708
2709 // Verify the pinned object 'foo' is not flushed/evicted
2710 uint32_t count = 0;
2711 while (true) {
2712 bufferlist bl;
2713 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
2714
2715 count = 0;
2716 NObjectIterator it = cache_ioctx.nobjects_begin();
2717 while (it != cache_ioctx.nobjects_end()) {
2718 ASSERT_TRUE(it->get_oid() == string("foo") ||
2719 it->get_oid() == string("bar") ||
2720 it->get_oid() == string("baz") ||
2721 it->get_oid() == string("bam"));
2722 ++count;
2723 ++it;
2724 }
2725 if (count == 2) {
2726 ASSERT_TRUE(it->get_oid() == string("foo") ||
2727 it->get_oid() == string("baz"));
2728 break;
2729 }
2730
2731 sleep(1);
2732 }
2733
2734 // tear down tiers
2735 ASSERT_EQ(0, cluster.mon_command(
2736 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
2737 "\"}",
2738 inbl, NULL, NULL));
2739 ASSERT_EQ(0, cluster.mon_command(
2740 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
2741 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
2742 inbl, NULL, NULL));
2743
2744 // wait for maps to settle before next test
2745 cluster.wait_for_latest_osdmap();
2746}
2747
31f18b77
FG
2748TEST_F(LibRadosTwoPoolsPP, SetRedirectRead) {
2749 // skip test if not yet luminous
2750 {
2751 bufferlist inbl, outbl;
2752 ASSERT_EQ(0, cluster.mon_command(
2753 "{\"prefix\": \"osd dump\"}",
2754 inbl, &outbl, NULL));
2755 string s(outbl.c_str(), outbl.length());
2756 if (s.find("luminous") == std::string::npos) {
2757 cout << "cluster is not yet luminous, skipping test" << std::endl;
2758 return;
2759 }
2760 }
2761
2762 // create object
2763 {
2764 bufferlist bl;
2765 bl.append("hi there");
2766 ObjectWriteOperation op;
2767 op.write_full(bl);
2768 ASSERT_EQ(0, ioctx.operate("foo", &op));
2769 }
2770 {
2771 bufferlist bl;
2772 bl.append("there");
2773 ObjectWriteOperation op;
2774 op.write_full(bl);
2775 ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
2776 }
2777
2778 // configure tier
2779 bufferlist inbl;
2780 ASSERT_EQ(0, cluster.mon_command(
2781 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2782 "\", \"tierpool\": \"" + cache_pool_name +
2783 "\", \"force_nonempty\": \"--force-nonempty\" }",
2784 inbl, NULL, NULL));
2785
2786 // wait for maps to settle
2787 cluster.wait_for_latest_osdmap();
2788
2789 {
2790 ObjectWriteOperation op;
2791 op.set_redirect("bar", cache_ioctx, 0);
2792 librados::AioCompletion *completion = cluster.aio_create_completion();
2793 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
2794 completion->wait_for_safe();
2795 ASSERT_EQ(0, completion->get_return_value());
2796 completion->release();
2797 }
2798 // read and verify the object
2799 {
2800 bufferlist bl;
2801 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
2802 ASSERT_EQ('t', bl[0]);
2803 }
2804
2805 ASSERT_EQ(0, cluster.mon_command(
2806 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
2807 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
2808 inbl, NULL, NULL));
2809
2810 // wait for maps to settle before next test
2811 cluster.wait_for_latest_osdmap();
2812}
2813
7c673cae
FG
2814class LibRadosTwoPoolsECPP : public RadosTestECPP
2815{
2816public:
2817 LibRadosTwoPoolsECPP() {};
2818 ~LibRadosTwoPoolsECPP() override {};
2819protected:
2820 static void SetUpTestCase() {
2821 pool_name = get_temp_pool_name();
2822 ASSERT_EQ("", create_one_ec_pool_pp(pool_name, s_cluster));
2823 }
2824 static void TearDownTestCase() {
2825 ASSERT_EQ(0, destroy_one_ec_pool_pp(pool_name, s_cluster));
2826 }
2827 static std::string cache_pool_name;
2828
2829 void SetUp() override {
2830 cache_pool_name = get_temp_pool_name();
2831 ASSERT_EQ(0, s_cluster.pool_create(cache_pool_name.c_str()));
2832 RadosTestECPP::SetUp();
2833 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
2834 cache_ioctx.set_namespace(nspace);
2835 }
2836 void TearDown() override {
2837 // flush + evict cache
2838 flush_evict_all(cluster, cache_ioctx);
2839
2840 bufferlist inbl;
2841 // tear down tiers
2842 ASSERT_EQ(0, cluster.mon_command(
2843 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
2844 "\"}",
2845 inbl, NULL, NULL));
2846 ASSERT_EQ(0, cluster.mon_command(
2847 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
2848 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
2849 inbl, NULL, NULL));
2850
2851 // wait for maps to settle before next test
2852 cluster.wait_for_latest_osdmap();
2853
2854 RadosTestECPP::TearDown();
2855
2856 cleanup_default_namespace(cache_ioctx);
2857 cleanup_namespace(cache_ioctx, nspace);
2858
2859 cache_ioctx.close();
2860 ASSERT_EQ(0, s_cluster.pool_delete(cache_pool_name.c_str()));
2861 }
2862
2863 librados::IoCtx cache_ioctx;
2864};
2865
2866std::string LibRadosTwoPoolsECPP::cache_pool_name;
2867
2868TEST_F(LibRadosTierECPP, Dirty) {
2869 {
2870 ObjectWriteOperation op;
2871 op.undirty();
2872 ASSERT_EQ(0, ioctx.operate("foo", &op)); // still get 0 if it dne
2873 }
2874 {
2875 ObjectWriteOperation op;
2876 op.create(true);
2877 ASSERT_EQ(0, ioctx.operate("foo", &op));
2878 }
2879 {
2880 bool dirty = false;
2881 int r = -1;
2882 ObjectReadOperation op;
2883 op.is_dirty(&dirty, &r);
2884 ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
2885 ASSERT_TRUE(dirty);
2886 ASSERT_EQ(0, r);
2887 }
2888 {
2889 ObjectWriteOperation op;
2890 op.undirty();
2891 ASSERT_EQ(0, ioctx.operate("foo", &op));
2892 }
2893 {
2894 ObjectWriteOperation op;
2895 op.undirty();
2896 ASSERT_EQ(0, ioctx.operate("foo", &op)); // still 0 if already clean
2897 }
2898 {
2899 bool dirty = false;
2900 int r = -1;
2901 ObjectReadOperation op;
2902 op.is_dirty(&dirty, &r);
2903 ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
2904 ASSERT_FALSE(dirty);
2905 ASSERT_EQ(0, r);
2906 }
2907 //{
2908 // ObjectWriteOperation op;
2909 // op.truncate(0); // still a write even tho it is a no-op
2910 // ASSERT_EQ(0, ioctx.operate("foo", &op));
2911 //}
2912 //{
2913 // bool dirty = false;
2914 // int r = -1;
2915 // ObjectReadOperation op;
2916 // op.is_dirty(&dirty, &r);
2917 // ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
2918 // ASSERT_TRUE(dirty);
2919 // ASSERT_EQ(0, r);
2920 //}
2921}
2922
2923TEST_F(LibRadosTwoPoolsECPP, Overlay) {
2924 // create objects
2925 {
2926 bufferlist bl;
2927 bl.append("base");
2928 ObjectWriteOperation op;
2929 op.write_full(bl);
2930 ASSERT_EQ(0, ioctx.operate("foo", &op));
2931 }
2932 {
2933 bufferlist bl;
2934 bl.append("cache");
2935 ObjectWriteOperation op;
2936 op.write_full(bl);
2937 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
2938 }
2939
2940 // configure cache
2941 bufferlist inbl;
2942 ASSERT_EQ(0, cluster.mon_command(
2943 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2944 "\", \"tierpool\": \"" + cache_pool_name +
2945 "\", \"force_nonempty\": \"--force-nonempty\" }",
2946 inbl, NULL, NULL));
2947 ASSERT_EQ(0, cluster.mon_command(
2948 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2949 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2950 inbl, NULL, NULL));
2951
2952 // wait for maps to settle
2953 cluster.wait_for_latest_osdmap();
2954
2955 // by default, the overlay sends us to cache pool
2956 {
2957 bufferlist bl;
2958 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
2959 ASSERT_EQ('c', bl[0]);
2960 }
2961 {
2962 bufferlist bl;
2963 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
2964 ASSERT_EQ('c', bl[0]);
2965 }
2966
2967 // unless we say otherwise
2968 {
2969 bufferlist bl;
2970 ObjectReadOperation op;
2971 op.read(0, 1, &bl, NULL);
2972 librados::AioCompletion *completion = cluster.aio_create_completion();
2973 ASSERT_EQ(0, ioctx.aio_operate(
2974 "foo", completion, &op,
2975 librados::OPERATION_IGNORE_OVERLAY, NULL));
2976 completion->wait_for_safe();
2977 ASSERT_EQ(0, completion->get_return_value());
2978 completion->release();
2979 ASSERT_EQ('b', bl[0]);
2980 }
2981}
2982
2983TEST_F(LibRadosTwoPoolsECPP, Promote) {
2984 // create object
2985 {
2986 bufferlist bl;
2987 bl.append("hi there");
2988 ObjectWriteOperation op;
2989 op.write_full(bl);
2990 ASSERT_EQ(0, ioctx.operate("foo", &op));
2991 }
2992
2993 // configure cache
2994 bufferlist inbl;
2995 ASSERT_EQ(0, cluster.mon_command(
2996 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2997 "\", \"tierpool\": \"" + cache_pool_name +
2998 "\", \"force_nonempty\": \"--force-nonempty\" }",
2999 inbl, NULL, NULL));
3000 ASSERT_EQ(0, cluster.mon_command(
3001 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
3002 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
3003 inbl, NULL, NULL));
3004 ASSERT_EQ(0, cluster.mon_command(
3005 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
3006 "\", \"mode\": \"writeback\"}",
3007 inbl, NULL, NULL));
3008
3009 // wait for maps to settle
3010 cluster.wait_for_latest_osdmap();
3011
3012 // read, trigger a promote
3013 {
3014 bufferlist bl;
3015 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3016 }
3017
3018 // read, trigger a whiteout
3019 {
3020 bufferlist bl;
3021 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
3022 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
3023 }
3024
3025 // verify the object is present in the cache tier
3026 {
3027 NObjectIterator it = cache_ioctx.nobjects_begin();
3028 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
3029 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
3030 ++it;
3031 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
3032 ++it;
3033 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
3034 }
3035}
3036
3037TEST_F(LibRadosTwoPoolsECPP, PromoteSnap) {
3038 // create object
3039 {
3040 bufferlist bl;
3041 bl.append("hi there");
3042 ObjectWriteOperation op;
3043 op.write_full(bl);
3044 ASSERT_EQ(0, ioctx.operate("foo", &op));
3045 }
3046 {
3047 bufferlist bl;
3048 bl.append("hi there");
3049 ObjectWriteOperation op;
3050 op.write_full(bl);
3051 ASSERT_EQ(0, ioctx.operate("bar", &op));
3052 }
3053 {
3054 bufferlist bl;
3055 bl.append("hi there");
3056 ObjectWriteOperation op;
3057 op.write_full(bl);
3058 ASSERT_EQ(0, ioctx.operate("baz", &op));
3059 }
3060 {
3061 bufferlist bl;
3062 bl.append("hi there");
3063 ObjectWriteOperation op;
3064 op.write_full(bl);
3065 ASSERT_EQ(0, ioctx.operate("bam", &op));
3066 }
3067
3068 // create a snapshot, clone
3069 vector<uint64_t> my_snaps(1);
3070 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
3071 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
3072 my_snaps));
3073 {
3074 bufferlist bl;
3075 bl.append("ciao!");
3076 ObjectWriteOperation op;
3077 op.write_full(bl);
3078 ASSERT_EQ(0, ioctx.operate("foo", &op));
3079 }
3080 {
3081 bufferlist bl;
3082 bl.append("ciao!");
3083 ObjectWriteOperation op;
3084 op.write_full(bl);
3085 ASSERT_EQ(0, ioctx.operate("bar", &op));
3086 }
3087 {
3088 ObjectWriteOperation op;
3089 op.remove();
3090 ASSERT_EQ(0, ioctx.operate("baz", &op));
3091 }
3092 {
3093 bufferlist bl;
3094 bl.append("ciao!");
3095 ObjectWriteOperation op;
3096 op.write_full(bl);
3097 ASSERT_EQ(0, ioctx.operate("bam", &op));
3098 }
3099
3100 // configure cache
3101 bufferlist inbl;
3102 ASSERT_EQ(0, cluster.mon_command(
3103 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
3104 "\", \"tierpool\": \"" + cache_pool_name +
3105 "\", \"force_nonempty\": \"--force-nonempty\" }",
3106 inbl, NULL, NULL));
3107 ASSERT_EQ(0, cluster.mon_command(
3108 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
3109 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
3110 inbl, NULL, NULL));
3111 ASSERT_EQ(0, cluster.mon_command(
3112 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
3113 "\", \"mode\": \"writeback\"}",
3114 inbl, NULL, NULL));
3115
3116 // wait for maps to settle
3117 cluster.wait_for_latest_osdmap();
3118
3119 // read, trigger a promote on the head
3120 {
3121 bufferlist bl;
3122 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3123 ASSERT_EQ('c', bl[0]);
3124 }
3125 {
3126 bufferlist bl;
3127 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
3128 ASSERT_EQ('c', bl[0]);
3129 }
3130
3131 ioctx.snap_set_read(my_snaps[0]);
3132
3133 // stop and scrub this pg (to make sure scrub can handle missing
3134 // clones in the cache tier)
3135 // This test requires cache tier and base tier to have the same pg_num/pgp_num
3136 {
3137 for (int tries = 0; tries < 5; ++tries) {
3138 IoCtx cache_ioctx;
3139 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
3140 uint32_t hash;
3141 ASSERT_EQ(0, ioctx.get_object_pg_hash_position2("foo", &hash));
3142 ostringstream ss;
3143 ss << "{\"prefix\": \"pg scrub\", \"pgid\": \""
3144 << cache_ioctx.get_id() << "."
3145 << hash
3146 << "\"}";
3147 int r = cluster.mon_command(ss.str(), inbl, NULL, NULL);
3148 if (r == -EAGAIN ||
3149 r == -ENOENT) { // in case mgr osdmap is a bit stale
3150 sleep(5);
3151 continue;
3152 }
3153 ASSERT_EQ(0, r);
3154 break;
3155 }
3156 // give it a few seconds to go. this is sloppy but is usually enough time
3157 cout << "waiting for scrub..." << std::endl;
3158 sleep(15);
3159 cout << "done waiting" << std::endl;
3160 }
3161
3162 // read foo snap
3163 {
3164 bufferlist bl;
3165 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3166 ASSERT_EQ('h', bl[0]);
3167 }
3168
3169 // read bar snap
3170 {
3171 bufferlist bl;
3172 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
3173 ASSERT_EQ('h', bl[0]);
3174 }
3175
3176 // read baz snap
3177 {
3178 bufferlist bl;
3179 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
3180 ASSERT_EQ('h', bl[0]);
3181 }
3182
3183 ioctx.snap_set_read(librados::SNAP_HEAD);
3184
3185 // read foo
3186 {
3187 bufferlist bl;
3188 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3189 ASSERT_EQ('c', bl[0]);
3190 }
3191
3192 // read bar
3193 {
3194 bufferlist bl;
3195 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
3196 ASSERT_EQ('c', bl[0]);
3197 }
3198
3199 // read baz
3200 {
3201 bufferlist bl;
3202 ASSERT_EQ(-ENOENT, ioctx.read("baz", bl, 1, 0));
3203 }
3204
3205 // cleanup
3206 ioctx.selfmanaged_snap_remove(my_snaps[0]);
3207}
3208
3209TEST_F(LibRadosTwoPoolsECPP, PromoteSnapTrimRace) {
3210 // create object
3211 {
3212 bufferlist bl;
3213 bl.append("hi there");
3214 ObjectWriteOperation op;
3215 op.write_full(bl);
3216 ASSERT_EQ(0, ioctx.operate("foo", &op));
3217 }
3218
3219 // create a snapshot, clone
3220 vector<uint64_t> my_snaps(1);
3221 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
3222 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
3223 my_snaps));
3224 {
3225 bufferlist bl;
3226 bl.append("ciao!");
3227 ObjectWriteOperation op;
3228 op.write_full(bl);
3229 ASSERT_EQ(0, ioctx.operate("foo", &op));
3230 }
3231
3232 // configure cache
3233 bufferlist inbl;
3234 ASSERT_EQ(0, cluster.mon_command(
3235 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
3236 "\", \"tierpool\": \"" + cache_pool_name +
3237 "\", \"force_nonempty\": \"--force-nonempty\" }",
3238 inbl, NULL, NULL));
3239 ASSERT_EQ(0, cluster.mon_command(
3240 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
3241 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
3242 inbl, NULL, NULL));
3243 ASSERT_EQ(0, cluster.mon_command(
3244 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
3245 "\", \"mode\": \"writeback\"}",
3246 inbl, NULL, NULL));
3247
3248 // wait for maps to settle
3249 cluster.wait_for_latest_osdmap();
3250
3251 // delete the snap
3252 ASSERT_EQ(0, ioctx.selfmanaged_snap_remove(my_snaps[0]));
3253
3254 ioctx.snap_set_read(my_snaps[0]);
3255
3256 // read foo snap
3257 {
3258 bufferlist bl;
3259 ASSERT_EQ(-ENOENT, ioctx.read("foo", bl, 1, 0));
3260 }
3261
3262 // cleanup
3263 ioctx.selfmanaged_snap_remove(my_snaps[0]);
3264}
3265
3266TEST_F(LibRadosTwoPoolsECPP, Whiteout) {
3267 // create object
3268 {
3269 bufferlist bl;
3270 bl.append("hi there");
3271 ObjectWriteOperation op;
3272 op.write_full(bl);
3273 ASSERT_EQ(0, ioctx.operate("foo", &op));
3274 }
3275
3276 // configure cache
3277 bufferlist inbl;
3278 ASSERT_EQ(0, cluster.mon_command(
3279 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
3280 "\", \"tierpool\": \"" + cache_pool_name +
3281 "\", \"force_nonempty\": \"--force-nonempty\" }",
3282 inbl, NULL, NULL));
3283 ASSERT_EQ(0, cluster.mon_command(
3284 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
3285 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
3286 inbl, NULL, NULL));
3287 ASSERT_EQ(0, cluster.mon_command(
3288 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
3289 "\", \"mode\": \"writeback\"}",
3290 inbl, NULL, NULL));
3291
3292 // wait for maps to settle
3293 cluster.wait_for_latest_osdmap();
3294
3295 // create some whiteouts, verify they behave
3296 {
3297 ObjectWriteOperation op;
3298 op.assert_exists();
3299 op.remove();
3300 ASSERT_EQ(0, ioctx.operate("foo", &op));
3301 }
3302
3303 {
3304 ObjectWriteOperation op;
3305 op.assert_exists();
3306 op.remove();
3307 ASSERT_EQ(-ENOENT, ioctx.operate("bar", &op));
3308 }
3309 {
3310 ObjectWriteOperation op;
3311 op.assert_exists();
3312 op.remove();
3313 ASSERT_EQ(-ENOENT, ioctx.operate("bar", &op));
3314 }
3315
3316 // verify the whiteouts are there in the cache tier
3317 {
3318 NObjectIterator it = cache_ioctx.nobjects_begin();
3319 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
3320 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
3321 ++it;
3322 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
3323 ++it;
3324 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
3325 }
3326
3327 // delete a whiteout and verify it goes away
3328 ASSERT_EQ(-ENOENT, ioctx.remove("foo"));
3329 {
3330 ObjectWriteOperation op;
3331 op.remove();
3332 librados::AioCompletion *completion = cluster.aio_create_completion();
3333 ASSERT_EQ(0, ioctx.aio_operate("bar", completion, &op,
3334 librados::OPERATION_IGNORE_CACHE));
3335 completion->wait_for_safe();
3336 ASSERT_EQ(0, completion->get_return_value());
3337 completion->release();
3338
3339 NObjectIterator it = cache_ioctx.nobjects_begin();
3340 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
3341 ASSERT_TRUE(it->get_oid() == string("foo"));
3342 ++it;
3343 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
3344 }
3345
3346 // recreate an object and verify we can read it
3347 {
3348 bufferlist bl;
3349 bl.append("hi there");
3350 ObjectWriteOperation op;
3351 op.write_full(bl);
3352 ASSERT_EQ(0, ioctx.operate("foo", &op));
3353 }
3354 {
3355 bufferlist bl;
3356 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3357 ASSERT_EQ('h', bl[0]);
3358 }
3359}
3360
3361TEST_F(LibRadosTwoPoolsECPP, Evict) {
3362 // create object
3363 {
3364 bufferlist bl;
3365 bl.append("hi there");
3366 ObjectWriteOperation op;
3367 op.write_full(bl);
3368 ASSERT_EQ(0, ioctx.operate("foo", &op));
3369 }
3370
3371 // configure cache
3372 bufferlist inbl;
3373 ASSERT_EQ(0, cluster.mon_command(
3374 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
3375 "\", \"tierpool\": \"" + cache_pool_name +
3376 "\", \"force_nonempty\": \"--force-nonempty\" }",
3377 inbl, NULL, NULL));
3378 ASSERT_EQ(0, cluster.mon_command(
3379 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
3380 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
3381 inbl, NULL, NULL));
3382 ASSERT_EQ(0, cluster.mon_command(
3383 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
3384 "\", \"mode\": \"writeback\"}",
3385 inbl, NULL, NULL));
3386
3387 // wait for maps to settle
3388 cluster.wait_for_latest_osdmap();
3389
3390 // read, trigger a promote
3391 {
3392 bufferlist bl;
3393 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3394 }
3395
3396 // read, trigger a whiteout, and a dirty object
3397 {
3398 bufferlist bl;
3399 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
3400 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
3401 ASSERT_EQ(0, ioctx.write("bar", bl, bl.length(), 0));
3402 }
3403
3404 // verify the object is present in the cache tier
3405 {
3406 NObjectIterator it = cache_ioctx.nobjects_begin();
3407 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
3408 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
3409 ++it;
3410 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
3411 ++it;
3412 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
3413 }
3414
3415 // pin
3416 {
3417 ObjectWriteOperation op;
3418 op.cache_pin();
3419 librados::AioCompletion *completion = cluster.aio_create_completion();
3420 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
3421 completion->wait_for_safe();
3422 ASSERT_EQ(0, completion->get_return_value());
3423 completion->release();
3424 }
3425
3426 // evict the pinned object with -EPERM
3427 {
3428 ObjectReadOperation op;
3429 op.cache_evict();
3430 librados::AioCompletion *completion = cluster.aio_create_completion();
3431 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
3432 librados::OPERATION_IGNORE_CACHE,
3433 NULL));
3434 completion->wait_for_safe();
3435 ASSERT_EQ(-EPERM, completion->get_return_value());
3436 completion->release();
3437 }
3438
3439 // unpin
3440 {
3441 ObjectWriteOperation op;
3442 op.cache_unpin();
3443 librados::AioCompletion *completion = cluster.aio_create_completion();
3444 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
3445 completion->wait_for_safe();
3446 ASSERT_EQ(0, completion->get_return_value());
3447 completion->release();
3448 }
3449
3450 // flush
3451 {
3452 ObjectReadOperation op;
3453 op.cache_flush();
3454 librados::AioCompletion *completion = cluster.aio_create_completion();
3455 ASSERT_EQ(0, cache_ioctx.aio_operate(
3456 "foo", completion, &op,
3457 librados::OPERATION_IGNORE_OVERLAY, NULL));
3458 completion->wait_for_safe();
3459 ASSERT_EQ(0, completion->get_return_value());
3460 completion->release();
3461 }
3462
3463 // verify clean
3464 {
3465 bool dirty = false;
3466 int r = -1;
3467 ObjectReadOperation op;
3468 op.is_dirty(&dirty, &r);
3469 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
3470 ASSERT_FALSE(dirty);
3471 ASSERT_EQ(0, r);
3472 }
3473
3474 // evict
3475 {
3476 ObjectReadOperation op;
3477 op.cache_evict();
3478 librados::AioCompletion *completion = cluster.aio_create_completion();
3479 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
3480 librados::OPERATION_IGNORE_CACHE,
3481 NULL));
3482 completion->wait_for_safe();
3483 ASSERT_EQ(0, completion->get_return_value());
3484 completion->release();
3485 }
3486 {
3487 ObjectReadOperation op;
3488 op.cache_evict();
3489 librados::AioCompletion *completion = cluster.aio_create_completion();
3490 ASSERT_EQ(0, cache_ioctx.aio_operate(
3491 "foo", completion, &op,
3492 librados::OPERATION_IGNORE_CACHE, NULL));
3493 completion->wait_for_safe();
3494 ASSERT_EQ(0, completion->get_return_value());
3495 completion->release();
3496 }
3497 {
3498 ObjectReadOperation op;
3499 op.cache_evict();
3500 librados::AioCompletion *completion = cluster.aio_create_completion();
3501 ASSERT_EQ(0, cache_ioctx.aio_operate(
3502 "bar", completion, &op,
3503 librados::OPERATION_IGNORE_CACHE, NULL));
3504 completion->wait_for_safe();
3505 ASSERT_EQ(-EBUSY, completion->get_return_value());
3506 completion->release();
3507 }
3508}
3509
3510TEST_F(LibRadosTwoPoolsECPP, EvictSnap) {
3511 // create object
3512 {
3513 bufferlist bl;
3514 bl.append("hi there");
3515 ObjectWriteOperation op;
3516 op.write_full(bl);
3517 ASSERT_EQ(0, ioctx.operate("foo", &op));
3518 }
3519 {
3520 bufferlist bl;
3521 bl.append("hi there");
3522 ObjectWriteOperation op;
3523 op.write_full(bl);
3524 ASSERT_EQ(0, ioctx.operate("bar", &op));
3525 }
3526 {
3527 bufferlist bl;
3528 bl.append("hi there");
3529 ObjectWriteOperation op;
3530 op.write_full(bl);
3531 ASSERT_EQ(0, ioctx.operate("baz", &op));
3532 }
3533 {
3534 bufferlist bl;
3535 bl.append("hi there");
3536 ObjectWriteOperation op;
3537 op.write_full(bl);
3538 ASSERT_EQ(0, ioctx.operate("bam", &op));
3539 }
3540
3541 // create a snapshot, clone
3542 vector<uint64_t> my_snaps(1);
3543 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
3544 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
3545 my_snaps));
3546 {
3547 bufferlist bl;
3548 bl.append("ciao!");
3549 ObjectWriteOperation op;
3550 op.write_full(bl);
3551 ASSERT_EQ(0, ioctx.operate("foo", &op));
3552 }
3553 {
3554 bufferlist bl;
3555 bl.append("ciao!");
3556 ObjectWriteOperation op;
3557 op.write_full(bl);
3558 ASSERT_EQ(0, ioctx.operate("bar", &op));
3559 }
3560 {
3561 ObjectWriteOperation op;
3562 op.remove();
3563 ASSERT_EQ(0, ioctx.operate("baz", &op));
3564 }
3565 {
3566 bufferlist bl;
3567 bl.append("ciao!");
3568 ObjectWriteOperation op;
3569 op.write_full(bl);
3570 ASSERT_EQ(0, ioctx.operate("bam", &op));
3571 }
3572
3573 // configure cache
3574 bufferlist inbl;
3575 ASSERT_EQ(0, cluster.mon_command(
3576 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
3577 "\", \"tierpool\": \"" + cache_pool_name +
3578 "\", \"force_nonempty\": \"--force-nonempty\" }",
3579 inbl, NULL, NULL));
3580 ASSERT_EQ(0, cluster.mon_command(
3581 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
3582 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
3583 inbl, NULL, NULL));
3584 ASSERT_EQ(0, cluster.mon_command(
3585 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
3586 "\", \"mode\": \"writeback\"}",
3587 inbl, NULL, NULL));
3588
3589 // wait for maps to settle
3590 cluster.wait_for_latest_osdmap();
3591
3592 // read, trigger a promote on the head
3593 {
3594 bufferlist bl;
3595 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3596 ASSERT_EQ('c', bl[0]);
3597 }
3598 {
3599 bufferlist bl;
3600 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
3601 ASSERT_EQ('c', bl[0]);
3602 }
3603
3604 // evict bam
3605 {
3606 ObjectReadOperation op;
3607 op.cache_evict();
3608 librados::AioCompletion *completion = cluster.aio_create_completion();
3609 ASSERT_EQ(0, cache_ioctx.aio_operate(
3610 "bam", completion, &op,
3611 librados::OPERATION_IGNORE_CACHE, NULL));
3612 completion->wait_for_safe();
3613 ASSERT_EQ(0, completion->get_return_value());
3614 completion->release();
3615 }
3616 {
3617 bufferlist bl;
3618 ObjectReadOperation op;
3619 op.read(1, 0, &bl, NULL);
3620 librados::AioCompletion *completion = cluster.aio_create_completion();
3621 ASSERT_EQ(0, cache_ioctx.aio_operate(
3622 "bam", completion, &op,
3623 librados::OPERATION_IGNORE_CACHE, NULL));
3624 completion->wait_for_safe();
3625 ASSERT_EQ(-ENOENT, completion->get_return_value());
3626 completion->release();
3627 }
3628
3629 // read foo snap
3630 ioctx.snap_set_read(my_snaps[0]);
3631 {
3632 bufferlist bl;
3633 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3634 ASSERT_EQ('h', bl[0]);
3635 }
3636
3637 // evict foo snap
3638 {
3639 ObjectReadOperation op;
3640 op.cache_evict();
3641 librados::AioCompletion *completion = cluster.aio_create_completion();
3642 ASSERT_EQ(0, ioctx.aio_operate(
3643 "foo", completion, &op,
3644 librados::OPERATION_IGNORE_CACHE, NULL));
3645 completion->wait_for_safe();
3646 ASSERT_EQ(0, completion->get_return_value());
3647 completion->release();
3648 }
3649 // snap is gone...
3650 {
3651 bufferlist bl;
3652 ObjectReadOperation op;
3653 op.read(1, 0, &bl, NULL);
3654 librados::AioCompletion *completion = cluster.aio_create_completion();
3655 ASSERT_EQ(0, ioctx.aio_operate(
3656 "foo", completion, &op,
3657 librados::OPERATION_IGNORE_CACHE, NULL));
3658 completion->wait_for_safe();
3659 ASSERT_EQ(-ENOENT, completion->get_return_value());
3660 completion->release();
3661 }
3662 // head is still there...
3663 ioctx.snap_set_read(librados::SNAP_HEAD);
3664 {
3665 bufferlist bl;
3666 ObjectReadOperation op;
3667 op.read(1, 0, &bl, NULL);
3668 librados::AioCompletion *completion = cluster.aio_create_completion();
3669 ASSERT_EQ(0, ioctx.aio_operate(
3670 "foo", completion, &op,
3671 librados::OPERATION_IGNORE_CACHE, NULL));
3672 completion->wait_for_safe();
3673 ASSERT_EQ(0, completion->get_return_value());
3674 completion->release();
3675 }
3676
3677 // promote head + snap of bar
3678 ioctx.snap_set_read(librados::SNAP_HEAD);
3679 {
3680 bufferlist bl;
3681 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
3682 ASSERT_EQ('c', bl[0]);
3683 }
3684 ioctx.snap_set_read(my_snaps[0]);
3685 {
3686 bufferlist bl;
3687 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
3688 ASSERT_EQ('h', bl[0]);
3689 }
3690
3691 // evict bar head (fail)
3692 ioctx.snap_set_read(librados::SNAP_HEAD);
3693 {
3694 ObjectReadOperation op;
3695 op.cache_evict();
3696 librados::AioCompletion *completion = cluster.aio_create_completion();
3697 ASSERT_EQ(0, ioctx.aio_operate(
3698 "bar", completion, &op,
3699 librados::OPERATION_IGNORE_CACHE, NULL));
3700 completion->wait_for_safe();
3701 ASSERT_EQ(-EBUSY, completion->get_return_value());
3702 completion->release();
3703 }
3704
3705 // evict bar snap
3706 ioctx.snap_set_read(my_snaps[0]);
3707 {
3708 ObjectReadOperation op;
3709 op.cache_evict();
3710 librados::AioCompletion *completion = cluster.aio_create_completion();
3711 ASSERT_EQ(0, ioctx.aio_operate(
3712 "bar", completion, &op,
3713 librados::OPERATION_IGNORE_CACHE, NULL));
3714 completion->wait_for_safe();
3715 ASSERT_EQ(0, completion->get_return_value());
3716 completion->release();
3717 }
3718 // ...and then head
3719 ioctx.snap_set_read(librados::SNAP_HEAD);
3720 {
3721 bufferlist bl;
3722 ObjectReadOperation op;
3723 op.read(1, 0, &bl, NULL);
3724 librados::AioCompletion *completion = cluster.aio_create_completion();
3725 ASSERT_EQ(0, ioctx.aio_operate(
3726 "bar", completion, &op,
3727 librados::OPERATION_IGNORE_CACHE, NULL));
3728 completion->wait_for_safe();
3729 ASSERT_EQ(0, completion->get_return_value());
3730 completion->release();
3731 }
3732 {
3733 ObjectReadOperation op;
3734 op.cache_evict();
3735 librados::AioCompletion *completion = cluster.aio_create_completion();
3736 ASSERT_EQ(0, ioctx.aio_operate(
3737 "bar", completion, &op,
3738 librados::OPERATION_IGNORE_CACHE, NULL));
3739 completion->wait_for_safe();
3740 ASSERT_EQ(0, completion->get_return_value());
3741 completion->release();
3742 }
3743
3744 // cleanup
3745 ioctx.selfmanaged_snap_remove(my_snaps[0]);
3746}
3747
3748TEST_F(LibRadosTwoPoolsECPP, TryFlush) {
3749 // configure cache
3750 bufferlist inbl;
3751 ASSERT_EQ(0, cluster.mon_command(
3752 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
3753 "\", \"tierpool\": \"" + cache_pool_name +
3754 "\", \"force_nonempty\": \"--force-nonempty\" }",
3755 inbl, NULL, NULL));
3756 ASSERT_EQ(0, cluster.mon_command(
3757 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
3758 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
3759 inbl, NULL, NULL));
3760 ASSERT_EQ(0, cluster.mon_command(
3761 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
3762 "\", \"mode\": \"writeback\"}",
3763 inbl, NULL, NULL));
3764
3765 // wait for maps to settle
3766 cluster.wait_for_latest_osdmap();
3767
3768 // create object
3769 {
3770 bufferlist bl;
3771 bl.append("hi there");
3772 ObjectWriteOperation op;
3773 op.write_full(bl);
3774 ASSERT_EQ(0, ioctx.operate("foo", &op));
3775 }
3776
3777 // verify the object is present in the cache tier
3778 {
3779 NObjectIterator it = cache_ioctx.nobjects_begin();
3780 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
3781 ASSERT_TRUE(it->get_oid() == string("foo"));
3782 ++it;
3783 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
3784 }
3785
3786 // verify the object is NOT present in the base tier
3787 {
3788 NObjectIterator it = ioctx.nobjects_begin();
3789 ASSERT_TRUE(it == ioctx.nobjects_end());
3790 }
3791
3792 // verify dirty
3793 {
3794 bool dirty = false;
3795 int r = -1;
3796 ObjectReadOperation op;
3797 op.is_dirty(&dirty, &r);
3798 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
3799 ASSERT_TRUE(dirty);
3800 ASSERT_EQ(0, r);
3801 }
3802
3803 // pin
3804 {
3805 ObjectWriteOperation op;
3806 op.cache_pin();
3807 librados::AioCompletion *completion = cluster.aio_create_completion();
3808 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
3809 completion->wait_for_safe();
3810 ASSERT_EQ(0, completion->get_return_value());
3811 completion->release();
3812 }
3813
3814 // flush the pinned object with -EPERM
3815 {
3816 ObjectReadOperation op;
3817 op.cache_try_flush();
3818 librados::AioCompletion *completion = cluster.aio_create_completion();
3819 ASSERT_EQ(0, cache_ioctx.aio_operate(
3820 "foo", completion, &op,
3821 librados::OPERATION_IGNORE_OVERLAY |
3822 librados::OPERATION_SKIPRWLOCKS, NULL));
3823 completion->wait_for_safe();
3824 ASSERT_EQ(-EPERM, completion->get_return_value());
3825 completion->release();
3826 }
3827
3828 // unpin
3829 {
3830 ObjectWriteOperation op;
3831 op.cache_unpin();
3832 librados::AioCompletion *completion = cluster.aio_create_completion();
3833 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
3834 completion->wait_for_safe();
3835 ASSERT_EQ(0, completion->get_return_value());
3836 completion->release();
3837 }
3838
3839 // flush
3840 {
3841 ObjectReadOperation op;
3842 op.cache_try_flush();
3843 librados::AioCompletion *completion = cluster.aio_create_completion();
3844 ASSERT_EQ(0, cache_ioctx.aio_operate(
3845 "foo", completion, &op,
3846 librados::OPERATION_IGNORE_OVERLAY |
3847 librados::OPERATION_SKIPRWLOCKS, NULL));
3848 completion->wait_for_safe();
3849 ASSERT_EQ(0, completion->get_return_value());
3850 completion->release();
3851 }
3852
3853 // verify clean
3854 {
3855 bool dirty = false;
3856 int r = -1;
3857 ObjectReadOperation op;
3858 op.is_dirty(&dirty, &r);
3859 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
3860 ASSERT_FALSE(dirty);
3861 ASSERT_EQ(0, r);
3862 }
3863
3864 // verify in base tier
3865 {
3866 NObjectIterator it = ioctx.nobjects_begin();
3867 ASSERT_TRUE(it != ioctx.nobjects_end());
3868 ASSERT_TRUE(it->get_oid() == string("foo"));
3869 ++it;
3870 ASSERT_TRUE(it == ioctx.nobjects_end());
3871 }
3872
3873 // evict it
3874 {
3875 ObjectReadOperation op;
3876 op.cache_evict();
3877 librados::AioCompletion *completion = cluster.aio_create_completion();
3878 ASSERT_EQ(0, cache_ioctx.aio_operate(
3879 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
3880 completion->wait_for_safe();
3881 ASSERT_EQ(0, completion->get_return_value());
3882 completion->release();
3883 }
3884
3885 // verify no longer in cache tier
3886 {
3887 NObjectIterator it = cache_ioctx.nobjects_begin();
3888 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
3889 }
3890}
3891
3892TEST_F(LibRadosTwoPoolsECPP, FailedFlush) {
3893 // configure cache
3894 bufferlist inbl;
3895 ASSERT_EQ(0, cluster.mon_command(
3896 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
3897 "\", \"tierpool\": \"" + cache_pool_name +
3898 "\", \"force_nonempty\": \"--force-nonempty\" }",
3899 inbl, NULL, NULL));
3900 ASSERT_EQ(0, cluster.mon_command(
3901 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
3902 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
3903 inbl, NULL, NULL));
3904 ASSERT_EQ(0, cluster.mon_command(
3905 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
3906 "\", \"mode\": \"writeback\"}",
3907 inbl, NULL, NULL));
3908
3909 // wait for maps to settle
3910 cluster.wait_for_latest_osdmap();
3911
3912 // create object
3913 {
3914 bufferlist bl;
3915 bl.append("hi there");
3916 ObjectWriteOperation op;
3917 op.write_full(bl);
3918 ASSERT_EQ(0, ioctx.operate("foo", &op));
3919 }
3920
3921 // verify the object is present in the cache tier
3922 {
3923 NObjectIterator it = cache_ioctx.nobjects_begin();
3924 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
3925 ASSERT_TRUE(it->get_oid() == string("foo"));
3926 ++it;
3927 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
3928 }
3929
3930 // verify the object is NOT present in the base tier
3931 {
3932 NObjectIterator it = ioctx.nobjects_begin();
3933 ASSERT_TRUE(it == ioctx.nobjects_end());
3934 }
3935
3936 // set omap
3937 {
3938 ObjectWriteOperation op;
3939 std::map<std::string, bufferlist> omap;
3940 omap["somekey"] = bufferlist();
3941 op.omap_set(omap);
3942 librados::AioCompletion *completion = cluster.aio_create_completion();
3943 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
3944 completion->wait_for_safe();
3945 ASSERT_EQ(0, completion->get_return_value());
3946 completion->release();
3947 }
3948
3949 // flush
3950 {
3951 ObjectReadOperation op;
3952 op.cache_flush();
3953 librados::AioCompletion *completion = cluster.aio_create_completion();
3954 ASSERT_EQ(0, cache_ioctx.aio_operate(
3955 "foo", completion, &op,
3956 librados::OPERATION_IGNORE_OVERLAY, NULL));
3957 completion->wait_for_safe();
3958 ASSERT_NE(0, completion->get_return_value());
3959 completion->release();
3960 }
3961
3962 // get omap
3963 {
3964 ObjectReadOperation op;
3965 bufferlist bl;
3966 int prval = 0;
3967 std::set<std::string> keys;
3968 keys.insert("somekey");
3969 std::map<std::string, bufferlist> map;
3970
3971 op.omap_get_vals_by_keys(keys, &map, &prval);
3972 librados::AioCompletion *completion = cluster.aio_create_completion();
3973 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op, &bl));
3974 sleep(5);
3975 bool completed = completion->is_complete();
3976 if( !completed ) {
3977 cache_ioctx.aio_cancel(completion);
3978 std::cerr << "Most probably test case will hang here, please reset manually" << std::endl;
3979 ASSERT_TRUE(completed); //in fact we are locked forever at test case shutdown unless fix for http://tracker.ceph.com/issues/14511 is applied. Seems there is no workaround for that
3980 }
3981 completion->release();
3982 }
3983 // verify still not in base tier
3984 {
3985 ASSERT_TRUE(ioctx.nobjects_begin() == ioctx.nobjects_end());
3986 }
3987 // erase it
3988 {
3989 ObjectWriteOperation op;
3990 op.remove();
3991 ASSERT_EQ(0, ioctx.operate("foo", &op));
3992 }
3993 // flush whiteout
3994 {
3995 ObjectReadOperation op;
3996 op.cache_flush();
3997 librados::AioCompletion *completion = cluster.aio_create_completion();
3998 ASSERT_EQ(0, cache_ioctx.aio_operate(
3999 "foo", completion, &op,
4000 librados::OPERATION_IGNORE_OVERLAY, NULL));
4001 completion->wait_for_safe();
4002 ASSERT_EQ(0, completion->get_return_value());
4003 completion->release();
4004 }
4005 // evict
4006 {
4007 ObjectReadOperation op;
4008 op.cache_evict();
4009 librados::AioCompletion *completion = cluster.aio_create_completion();
4010 ASSERT_EQ(0, cache_ioctx.aio_operate(
4011 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
4012 completion->wait_for_safe();
4013 ASSERT_EQ(0, completion->get_return_value());
4014 completion->release();
4015 }
4016
4017 // verify no longer in cache tier
4018 {
4019 NObjectIterator it = cache_ioctx.nobjects_begin();
4020 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
4021 }
4022 // or base tier
4023 {
4024 NObjectIterator it = ioctx.nobjects_begin();
4025 ASSERT_TRUE(it == ioctx.nobjects_end());
4026 }
4027}
4028
4029TEST_F(LibRadosTwoPoolsECPP, Flush) {
4030 // configure cache
4031 bufferlist inbl;
4032 ASSERT_EQ(0, cluster.mon_command(
4033 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
4034 "\", \"tierpool\": \"" + cache_pool_name +
4035 "\", \"force_nonempty\": \"--force-nonempty\" }",
4036 inbl, NULL, NULL));
4037 ASSERT_EQ(0, cluster.mon_command(
4038 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
4039 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
4040 inbl, NULL, NULL));
4041 ASSERT_EQ(0, cluster.mon_command(
4042 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
4043 "\", \"mode\": \"writeback\"}",
4044 inbl, NULL, NULL));
4045
4046 // wait for maps to settle
4047 cluster.wait_for_latest_osdmap();
4048
4049 uint64_t user_version = 0;
4050
4051 // create object
4052 {
4053 bufferlist bl;
4054 bl.append("hi there");
4055 ObjectWriteOperation op;
4056 op.write_full(bl);
4057 ASSERT_EQ(0, ioctx.operate("foo", &op));
4058 }
4059
4060 // verify the object is present in the cache tier
4061 {
4062 NObjectIterator it = cache_ioctx.nobjects_begin();
4063 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
4064 ASSERT_TRUE(it->get_oid() == string("foo"));
4065 ++it;
4066 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
4067 }
4068
4069 // verify the object is NOT present in the base tier
4070 {
4071 NObjectIterator it = ioctx.nobjects_begin();
4072 ASSERT_TRUE(it == ioctx.nobjects_end());
4073 }
4074
4075 // verify dirty
4076 {
4077 bool dirty = false;
4078 int r = -1;
4079 ObjectReadOperation op;
4080 op.is_dirty(&dirty, &r);
4081 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
4082 ASSERT_TRUE(dirty);
4083 ASSERT_EQ(0, r);
4084 user_version = cache_ioctx.get_last_version();
4085 }
4086
4087 // pin
4088 {
4089 ObjectWriteOperation op;
4090 op.cache_pin();
4091 librados::AioCompletion *completion = cluster.aio_create_completion();
4092 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
4093 completion->wait_for_safe();
4094 ASSERT_EQ(0, completion->get_return_value());
4095 completion->release();
4096 }
4097
4098 // flush the pinned object with -EPERM
4099 {
4100 ObjectReadOperation op;
4101 op.cache_try_flush();
4102 librados::AioCompletion *completion = cluster.aio_create_completion();
4103 ASSERT_EQ(0, cache_ioctx.aio_operate(
4104 "foo", completion, &op,
4105 librados::OPERATION_IGNORE_OVERLAY |
4106 librados::OPERATION_SKIPRWLOCKS, NULL));
4107 completion->wait_for_safe();
4108 ASSERT_EQ(-EPERM, completion->get_return_value());
4109 completion->release();
4110 }
4111
4112 // unpin
4113 {
4114 ObjectWriteOperation op;
4115 op.cache_unpin();
4116 librados::AioCompletion *completion = cluster.aio_create_completion();
4117 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
4118 completion->wait_for_safe();
4119 ASSERT_EQ(0, completion->get_return_value());
4120 completion->release();
4121 }
4122
4123 // flush
4124 {
4125 ObjectReadOperation op;
4126 op.cache_flush();
4127 librados::AioCompletion *completion = cluster.aio_create_completion();
4128 ASSERT_EQ(0, cache_ioctx.aio_operate(
4129 "foo", completion, &op,
4130 librados::OPERATION_IGNORE_OVERLAY, NULL));
4131 completion->wait_for_safe();
4132 ASSERT_EQ(0, completion->get_return_value());
4133 completion->release();
4134 }
4135
4136 // verify clean
4137 {
4138 bool dirty = false;
4139 int r = -1;
4140 ObjectReadOperation op;
4141 op.is_dirty(&dirty, &r);
4142 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
4143 ASSERT_FALSE(dirty);
4144 ASSERT_EQ(0, r);
4145 }
4146
4147 // verify in base tier
4148 {
4149 NObjectIterator it = ioctx.nobjects_begin();
4150 ASSERT_TRUE(it != ioctx.nobjects_end());
4151 ASSERT_TRUE(it->get_oid() == string("foo"));
4152 ++it;
4153 ASSERT_TRUE(it == ioctx.nobjects_end());
4154 }
4155
4156 // evict it
4157 {
4158 ObjectReadOperation op;
4159 op.cache_evict();
4160 librados::AioCompletion *completion = cluster.aio_create_completion();
4161 ASSERT_EQ(0, cache_ioctx.aio_operate(
4162 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
4163 completion->wait_for_safe();
4164 ASSERT_EQ(0, completion->get_return_value());
4165 completion->release();
4166 }
4167
4168 // verify no longer in cache tier
4169 {
4170 NObjectIterator it = cache_ioctx.nobjects_begin();
4171 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
4172 }
4173
4174 // read it again and verify the version is consistent
4175 {
4176 bufferlist bl;
4177 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
4178 ASSERT_EQ(user_version, cache_ioctx.get_last_version());
4179 }
4180
4181 // erase it
4182 {
4183 ObjectWriteOperation op;
4184 op.remove();
4185 ASSERT_EQ(0, ioctx.operate("foo", &op));
4186 }
4187
4188 // flush whiteout
4189 {
4190 ObjectReadOperation op;
4191 op.cache_flush();
4192 librados::AioCompletion *completion = cluster.aio_create_completion();
4193 ASSERT_EQ(0, cache_ioctx.aio_operate(
4194 "foo", completion, &op,
4195 librados::OPERATION_IGNORE_OVERLAY, NULL));
4196 completion->wait_for_safe();
4197 ASSERT_EQ(0, completion->get_return_value());
4198 completion->release();
4199 }
4200
4201 // evict
4202 {
4203 ObjectReadOperation op;
4204 op.cache_evict();
4205 librados::AioCompletion *completion = cluster.aio_create_completion();
4206 ASSERT_EQ(0, cache_ioctx.aio_operate(
4207 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
4208 completion->wait_for_safe();
4209 ASSERT_EQ(0, completion->get_return_value());
4210 completion->release();
4211 }
4212
4213 // verify no longer in cache tier
4214 {
4215 NObjectIterator it = cache_ioctx.nobjects_begin();
4216 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
4217 }
4218 // or base tier
4219 {
4220 NObjectIterator it = ioctx.nobjects_begin();
4221 ASSERT_TRUE(it == ioctx.nobjects_end());
4222 }
4223}
4224
4225TEST_F(LibRadosTwoPoolsECPP, FlushSnap) {
4226 // configure cache
4227 bufferlist inbl;
4228 ASSERT_EQ(0, cluster.mon_command(
4229 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
4230 "\", \"tierpool\": \"" + cache_pool_name +
4231 "\", \"force_nonempty\": \"--force-nonempty\" }",
4232 inbl, NULL, NULL));
4233 ASSERT_EQ(0, cluster.mon_command(
4234 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
4235 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
4236 inbl, NULL, NULL));
4237 ASSERT_EQ(0, cluster.mon_command(
4238 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
4239 "\", \"mode\": \"writeback\"}",
4240 inbl, NULL, NULL));
4241
4242 // wait for maps to settle
4243 cluster.wait_for_latest_osdmap();
4244
4245 // create object
4246 {
4247 bufferlist bl;
4248 bl.append("a");
4249 ObjectWriteOperation op;
4250 op.write_full(bl);
4251 ASSERT_EQ(0, ioctx.operate("foo", &op));
4252 }
4253
4254 // create a snapshot, clone
4255 vector<uint64_t> my_snaps(1);
4256 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
4257 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4258 my_snaps));
4259 {
4260 bufferlist bl;
4261 bl.append("b");
4262 ObjectWriteOperation op;
4263 op.write_full(bl);
4264 ASSERT_EQ(0, ioctx.operate("foo", &op));
4265 }
4266
4267 // and another
4268 my_snaps.resize(2);
4269 my_snaps[1] = my_snaps[0];
4270 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
4271 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4272 my_snaps));
4273 {
4274 bufferlist bl;
4275 bl.append("c");
4276 ObjectWriteOperation op;
4277 op.write_full(bl);
4278 ASSERT_EQ(0, ioctx.operate("foo", &op));
4279 }
4280
4281 // verify the object is present in the cache tier
4282 {
4283 NObjectIterator it = cache_ioctx.nobjects_begin();
4284 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
4285 ASSERT_TRUE(it->get_oid() == string("foo"));
4286 ++it;
4287 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
4288 }
4289
4290 // verify the object is NOT present in the base tier
4291 {
4292 NObjectIterator it = ioctx.nobjects_begin();
4293 ASSERT_TRUE(it == ioctx.nobjects_end());
4294 }
4295
4296 // flush on head (should fail)
4297 ioctx.snap_set_read(librados::SNAP_HEAD);
4298 {
4299 ObjectReadOperation op;
4300 op.cache_flush();
4301 librados::AioCompletion *completion = cluster.aio_create_completion();
4302 ASSERT_EQ(0, ioctx.aio_operate(
4303 "foo", completion, &op,
4304 librados::OPERATION_IGNORE_CACHE, NULL));
4305 completion->wait_for_safe();
4306 ASSERT_EQ(-EBUSY, completion->get_return_value());
4307 completion->release();
4308 }
4309 // flush on recent snap (should fail)
4310 ioctx.snap_set_read(my_snaps[0]);
4311 {
4312 ObjectReadOperation op;
4313 op.cache_flush();
4314 librados::AioCompletion *completion = cluster.aio_create_completion();
4315 ASSERT_EQ(0, ioctx.aio_operate(
4316 "foo", completion, &op,
4317 librados::OPERATION_IGNORE_CACHE, NULL));
4318 completion->wait_for_safe();
4319 ASSERT_EQ(-EBUSY, completion->get_return_value());
4320 completion->release();
4321 }
4322 // flush on oldest snap
4323 ioctx.snap_set_read(my_snaps[1]);
4324 {
4325 ObjectReadOperation op;
4326 op.cache_flush();
4327 librados::AioCompletion *completion = cluster.aio_create_completion();
4328 ASSERT_EQ(0, ioctx.aio_operate(
4329 "foo", completion, &op,
4330 librados::OPERATION_IGNORE_CACHE, NULL));
4331 completion->wait_for_safe();
4332 ASSERT_EQ(0, completion->get_return_value());
4333 completion->release();
4334 }
4335 // flush on next oldest snap
4336 ioctx.snap_set_read(my_snaps[0]);
4337 {
4338 ObjectReadOperation op;
4339 op.cache_flush();
4340 librados::AioCompletion *completion = cluster.aio_create_completion();
4341 ASSERT_EQ(0, ioctx.aio_operate(
4342 "foo", completion, &op,
4343 librados::OPERATION_IGNORE_CACHE, NULL));
4344 completion->wait_for_safe();
4345 ASSERT_EQ(0, completion->get_return_value());
4346 completion->release();
4347 }
4348 // flush on head
4349 ioctx.snap_set_read(librados::SNAP_HEAD);
4350 {
4351 ObjectReadOperation op;
4352 op.cache_flush();
4353 librados::AioCompletion *completion = cluster.aio_create_completion();
4354 ASSERT_EQ(0, ioctx.aio_operate(
4355 "foo", completion, &op,
4356 librados::OPERATION_IGNORE_CACHE, NULL));
4357 completion->wait_for_safe();
4358 ASSERT_EQ(0, completion->get_return_value());
4359 completion->release();
4360 }
4361
4362 // verify i can read the snaps from the cache pool
4363 ioctx.snap_set_read(librados::SNAP_HEAD);
4364 {
4365 bufferlist bl;
4366 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
4367 ASSERT_EQ('c', bl[0]);
4368 }
4369 ioctx.snap_set_read(my_snaps[0]);
4370 {
4371 bufferlist bl;
4372 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
4373 ASSERT_EQ('b', bl[0]);
4374 }
4375 ioctx.snap_set_read(my_snaps[1]);
4376 {
4377 bufferlist bl;
4378 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
4379 ASSERT_EQ('a', bl[0]);
4380 }
4381
4382 // tear down tiers
4383 ASSERT_EQ(0, cluster.mon_command(
4384 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
4385 "\"}",
4386 inbl, NULL, NULL));
4387
4388 // wait for maps to settle
4389 cluster.wait_for_latest_osdmap();
4390
4391 // verify i can read the snaps from the base pool
4392 ioctx.snap_set_read(librados::SNAP_HEAD);
4393 {
4394 bufferlist bl;
4395 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
4396 ASSERT_EQ('c', bl[0]);
4397 }
4398 ioctx.snap_set_read(my_snaps[0]);
4399 {
4400 bufferlist bl;
4401 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
4402 ASSERT_EQ('b', bl[0]);
4403 }
4404 ioctx.snap_set_read(my_snaps[1]);
4405 {
4406 bufferlist bl;
4407 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
4408 ASSERT_EQ('a', bl[0]);
4409 }
4410
4411 ASSERT_EQ(0, cluster.mon_command(
4412 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
4413 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
4414 inbl, NULL, NULL));
4415 cluster.wait_for_latest_osdmap();
4416
4417 // cleanup
4418 ioctx.selfmanaged_snap_remove(my_snaps[0]);
4419}
4420
4421TEST_F(LibRadosTierECPP, FlushWriteRaces) {
4422 Rados cluster;
4423 std::string pool_name = get_temp_pool_name();
4424 std::string cache_pool_name = pool_name + "-cache";
4425 ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
4426 ASSERT_EQ(0, cluster.pool_create(cache_pool_name.c_str()));
4427 IoCtx cache_ioctx;
4428 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
4429 IoCtx ioctx;
4430 ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), ioctx));
4431
4432 // configure cache
4433 bufferlist inbl;
4434 ASSERT_EQ(0, cluster.mon_command(
4435 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
4436 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
4437 inbl, NULL, NULL));
4438 ASSERT_EQ(0, cluster.mon_command(
4439 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
4440 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
4441 inbl, NULL, NULL));
4442 ASSERT_EQ(0, cluster.mon_command(
4443 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
4444 "\", \"mode\": \"writeback\"}",
4445 inbl, NULL, NULL));
4446
4447 // wait for maps to settle
4448 cluster.wait_for_latest_osdmap();
4449
4450 // create/dirty object
4451 bufferlist bl;
4452 bl.append("hi there");
4453 {
4454 ObjectWriteOperation op;
4455 op.write_full(bl);
4456 ASSERT_EQ(0, ioctx.operate("foo", &op));
4457 }
4458
4459 // flush + write
4460 {
4461 ObjectReadOperation op;
4462 op.cache_flush();
4463 librados::AioCompletion *completion = cluster.aio_create_completion();
4464 ASSERT_EQ(0, cache_ioctx.aio_operate(
4465 "foo", completion, &op,
4466 librados::OPERATION_IGNORE_OVERLAY, NULL));
4467
4468 ObjectWriteOperation op2;
4469 op2.write_full(bl);
4470 librados::AioCompletion *completion2 = cluster.aio_create_completion();
4471 ASSERT_EQ(0, ioctx.aio_operate(
4472 "foo", completion2, &op2, 0));
4473
4474 completion->wait_for_safe();
4475 completion2->wait_for_safe();
4476 ASSERT_EQ(0, completion->get_return_value());
4477 ASSERT_EQ(0, completion2->get_return_value());
4478 completion->release();
4479 completion2->release();
4480 }
4481
4482 int tries = 1000;
4483 do {
4484 // create/dirty object
4485 {
4486 bufferlist bl;
4487 bl.append("hi there");
4488 ObjectWriteOperation op;
4489 op.write_full(bl);
4490 ASSERT_EQ(0, ioctx.operate("foo", &op));
4491 }
4492
4493 // try-flush + write
4494 {
4495 ObjectReadOperation op;
4496 op.cache_try_flush();
4497 librados::AioCompletion *completion = cluster.aio_create_completion();
4498 ASSERT_EQ(0, cache_ioctx.aio_operate(
4499 "foo", completion, &op,
4500 librados::OPERATION_IGNORE_OVERLAY |
4501 librados::OPERATION_SKIPRWLOCKS, NULL));
4502
4503 ObjectWriteOperation op2;
4504 op2.write_full(bl);
4505 librados::AioCompletion *completion2 = cluster.aio_create_completion();
4506 ASSERT_EQ(0, ioctx.aio_operate("foo", completion2, &op2, 0));
4507
4508 completion->wait_for_safe();
4509 completion2->wait_for_safe();
4510 int r = completion->get_return_value();
4511 ASSERT_TRUE(r == -EBUSY || r == 0);
4512 ASSERT_EQ(0, completion2->get_return_value());
4513 completion->release();
4514 completion2->release();
4515 if (r == -EBUSY)
4516 break;
4517 cout << "didn't get EBUSY, trying again" << std::endl;
4518 }
4519 ASSERT_TRUE(--tries);
4520 } while (true);
4521
4522 // tear down tiers
4523 ASSERT_EQ(0, cluster.mon_command(
4524 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
4525 "\"}",
4526 inbl, NULL, NULL));
4527 ASSERT_EQ(0, cluster.mon_command(
4528 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
4529 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
4530 inbl, NULL, NULL));
4531
4532 // wait for maps to settle before next test
4533 cluster.wait_for_latest_osdmap();
4534
4535 ASSERT_EQ(0, cluster.pool_delete(cache_pool_name.c_str()));
4536 ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
4537}
4538
4539TEST_F(LibRadosTwoPoolsECPP, FlushTryFlushRaces) {
4540 // configure cache
4541 bufferlist inbl;
4542 ASSERT_EQ(0, cluster.mon_command(
4543 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
4544 "\", \"tierpool\": \"" + cache_pool_name +
4545 "\", \"force_nonempty\": \"--force-nonempty\" }",
4546 inbl, NULL, NULL));
4547 ASSERT_EQ(0, cluster.mon_command(
4548 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
4549 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
4550 inbl, NULL, NULL));
4551 ASSERT_EQ(0, cluster.mon_command(
4552 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
4553 "\", \"mode\": \"writeback\"}",
4554 inbl, NULL, NULL));
4555
4556 // wait for maps to settle
4557 cluster.wait_for_latest_osdmap();
4558
4559 // create/dirty object
4560 {
4561 bufferlist bl;
4562 bl.append("hi there");
4563 ObjectWriteOperation op;
4564 op.write_full(bl);
4565 ASSERT_EQ(0, ioctx.operate("foo", &op));
4566 }
4567
4568 // flush + flush
4569 {
4570 ObjectReadOperation op;
4571 op.cache_flush();
4572 librados::AioCompletion *completion = cluster.aio_create_completion();
4573 ASSERT_EQ(0, cache_ioctx.aio_operate(
4574 "foo", completion, &op,
4575 librados::OPERATION_IGNORE_OVERLAY, NULL));
4576
4577 ObjectReadOperation op2;
4578 op2.cache_flush();
4579 librados::AioCompletion *completion2 = cluster.aio_create_completion();
4580 ASSERT_EQ(0, cache_ioctx.aio_operate(
4581 "foo", completion2, &op2,
4582 librados::OPERATION_IGNORE_OVERLAY, NULL));
4583
4584 completion->wait_for_safe();
4585 completion2->wait_for_safe();
4586 ASSERT_EQ(0, completion->get_return_value());
4587 ASSERT_EQ(0, completion2->get_return_value());
4588 completion->release();
4589 completion2->release();
4590 }
4591
4592 // create/dirty object
4593 {
4594 bufferlist bl;
4595 bl.append("hi there");
4596 ObjectWriteOperation op;
4597 op.write_full(bl);
4598 ASSERT_EQ(0, ioctx.operate("foo", &op));
4599 }
4600
4601 // flush + try-flush
4602 {
4603 ObjectReadOperation op;
4604 op.cache_flush();
4605 librados::AioCompletion *completion = cluster.aio_create_completion();
4606 ASSERT_EQ(0, cache_ioctx.aio_operate(
4607 "foo", completion, &op,
4608 librados::OPERATION_IGNORE_OVERLAY, NULL));
4609
4610 ObjectReadOperation op2;
4611 op2.cache_try_flush();
4612 librados::AioCompletion *completion2 = cluster.aio_create_completion();
4613 ASSERT_EQ(0, cache_ioctx.aio_operate(
4614 "foo", completion2, &op2,
4615 librados::OPERATION_IGNORE_OVERLAY |
4616 librados::OPERATION_SKIPRWLOCKS, NULL));
4617
4618 completion->wait_for_safe();
4619 completion2->wait_for_safe();
4620 ASSERT_EQ(0, completion->get_return_value());
4621 ASSERT_EQ(0, completion2->get_return_value());
4622 completion->release();
4623 completion2->release();
4624 }
4625
4626 // create/dirty object
4627 int tries = 1000;
4628 do {
4629 {
4630 bufferlist bl;
4631 bl.append("hi there");
4632 ObjectWriteOperation op;
4633 op.write_full(bl);
4634 ASSERT_EQ(0, ioctx.operate("foo", &op));
4635 }
4636
4637 // try-flush + flush
4638 // (flush will not piggyback on try-flush)
4639 {
4640 ObjectReadOperation op;
4641 op.cache_try_flush();
4642 librados::AioCompletion *completion = cluster.aio_create_completion();
4643 ASSERT_EQ(0, cache_ioctx.aio_operate(
4644 "foo", completion, &op,
4645 librados::OPERATION_IGNORE_OVERLAY |
4646 librados::OPERATION_SKIPRWLOCKS, NULL));
4647
4648 ObjectReadOperation op2;
4649 op2.cache_flush();
4650 librados::AioCompletion *completion2 = cluster.aio_create_completion();
4651 ASSERT_EQ(0, cache_ioctx.aio_operate(
4652 "foo", completion2, &op2,
4653 librados::OPERATION_IGNORE_OVERLAY, NULL));
4654
4655 completion->wait_for_safe();
4656 completion2->wait_for_safe();
4657 int r = completion->get_return_value();
4658 ASSERT_TRUE(r == -EBUSY || r == 0);
4659 ASSERT_EQ(0, completion2->get_return_value());
4660 completion->release();
4661 completion2->release();
4662 if (r == -EBUSY)
4663 break;
4664 cout << "didn't get EBUSY, trying again" << std::endl;
4665 }
4666 ASSERT_TRUE(--tries);
4667 } while (true);
4668
4669 // create/dirty object
4670 {
4671 bufferlist bl;
4672 bl.append("hi there");
4673 ObjectWriteOperation op;
4674 op.write_full(bl);
4675 ASSERT_EQ(0, ioctx.operate("foo", &op));
4676 }
4677
4678 // try-flush + try-flush
4679 {
4680 ObjectReadOperation op;
4681 op.cache_try_flush();
4682 librados::AioCompletion *completion = cluster.aio_create_completion();
4683 ASSERT_EQ(0, cache_ioctx.aio_operate(
4684 "foo", completion, &op,
4685 librados::OPERATION_IGNORE_OVERLAY |
4686 librados::OPERATION_SKIPRWLOCKS, NULL));
4687
4688 ObjectReadOperation op2;
4689 op2.cache_try_flush();
4690 librados::AioCompletion *completion2 = cluster.aio_create_completion();
4691 ASSERT_EQ(0, cache_ioctx.aio_operate(
4692 "foo", completion2, &op2,
4693 librados::OPERATION_IGNORE_OVERLAY |
4694 librados::OPERATION_SKIPRWLOCKS, NULL));
4695
4696 completion->wait_for_safe();
4697 completion2->wait_for_safe();
4698 ASSERT_EQ(0, completion->get_return_value());
4699 ASSERT_EQ(0, completion2->get_return_value());
4700 completion->release();
4701 completion2->release();
4702 }
4703}
4704
4705TEST_F(LibRadosTwoPoolsECPP, TryFlushReadRace) {
4706 // configure cache
4707 bufferlist inbl;
4708 ASSERT_EQ(0, cluster.mon_command(
4709 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
4710 "\", \"tierpool\": \"" + cache_pool_name +
4711 "\", \"force_nonempty\": \"--force-nonempty\" }",
4712 inbl, NULL, NULL));
4713 ASSERT_EQ(0, cluster.mon_command(
4714 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
4715 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
4716 inbl, NULL, NULL));
4717 ASSERT_EQ(0, cluster.mon_command(
4718 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
4719 "\", \"mode\": \"writeback\"}",
4720 inbl, NULL, NULL));
4721
4722 // wait for maps to settle
4723 cluster.wait_for_latest_osdmap();
4724
4725 // create/dirty object
4726 {
4727 bufferlist bl;
4728 bl.append("hi there");
4729 bufferptr bp(4000000); // make it big!
4730 bp.zero();
4731 bl.append(bp);
4732 ObjectWriteOperation op;
4733 op.write_full(bl);
4734 ASSERT_EQ(0, ioctx.operate("foo", &op));
4735 }
4736
4737 // start a continuous stream of reads
4738 read_ioctx = &ioctx;
4739 test_lock.Lock();
4740 for (int i = 0; i < max_reads; ++i) {
4741 start_flush_read();
4742 num_reads++;
4743 }
4744 test_lock.Unlock();
4745
4746 // try-flush
4747 ObjectReadOperation op;
4748 op.cache_try_flush();
4749 librados::AioCompletion *completion = cluster.aio_create_completion();
4750 ASSERT_EQ(0, cache_ioctx.aio_operate(
4751 "foo", completion, &op,
4752 librados::OPERATION_IGNORE_OVERLAY |
4753 librados::OPERATION_SKIPRWLOCKS, NULL));
4754
4755 completion->wait_for_safe();
4756 ASSERT_EQ(0, completion->get_return_value());
4757 completion->release();
4758
4759 // stop reads
4760 test_lock.Lock();
4761 max_reads = 0;
4762 while (num_reads > 0)
4763 cond.Wait(test_lock);
4764 test_lock.Unlock();
4765}
4766
4767TEST_F(LibRadosTierECPP, CallForcesPromote) {
4768 Rados cluster;
4769 std::string pool_name = get_temp_pool_name();
4770 std::string cache_pool_name = pool_name + "-cache";
4771 ASSERT_EQ("", create_one_ec_pool_pp(pool_name, cluster));
4772 ASSERT_EQ(0, cluster.pool_create(cache_pool_name.c_str()));
4773 IoCtx cache_ioctx;
4774 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
4775 IoCtx ioctx;
4776 ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), ioctx));
4777
4778 // configure cache
4779 bufferlist inbl;
4780 ASSERT_EQ(0, cluster.mon_command(
4781 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
4782 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
4783 inbl, NULL, NULL));
4784 ASSERT_EQ(0, cluster.mon_command(
4785 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
4786 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
4787 inbl, NULL, NULL));
4788 ASSERT_EQ(0, cluster.mon_command(
4789 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
4790 "\", \"mode\": \"writeback\"}",
4791 inbl, NULL, NULL));
4792
4793 // set things up such that the op would normally be proxied
4794 ASSERT_EQ(0, cluster.mon_command(
4795 set_pool_str(cache_pool_name, "hit_set_count", 2),
4796 inbl, NULL, NULL));
4797 ASSERT_EQ(0, cluster.mon_command(
4798 set_pool_str(cache_pool_name, "hit_set_period", 600),
4799 inbl, NULL, NULL));
4800 ASSERT_EQ(0, cluster.mon_command(
4801 set_pool_str(cache_pool_name, "hit_set_type",
4802 "explicit_object"),
4803 inbl, NULL, NULL));
4804 ASSERT_EQ(0, cluster.mon_command(
4805 set_pool_str(cache_pool_name, "min_read_recency_for_promote",
4806 "4"),
4807 inbl, NULL, NULL));
4808
4809 // wait for maps to settle
4810 cluster.wait_for_latest_osdmap();
4811
4812 // create/dirty object
4813 bufferlist bl;
4814 bl.append("hi there");
4815 {
4816 ObjectWriteOperation op;
4817 op.write_full(bl);
4818 ASSERT_EQ(0, ioctx.operate("foo", &op));
4819 }
4820
4821 // flush
4822 {
4823 ObjectReadOperation op;
4824 op.cache_flush();
4825 librados::AioCompletion *completion = cluster.aio_create_completion();
4826 ASSERT_EQ(0, cache_ioctx.aio_operate(
4827 "foo", completion, &op,
4828 librados::OPERATION_IGNORE_OVERLAY, NULL));
4829 completion->wait_for_safe();
4830 ASSERT_EQ(0, completion->get_return_value());
4831 completion->release();
4832 }
4833
4834 // evict
4835 {
4836 ObjectReadOperation op;
4837 op.cache_evict();
4838 librados::AioCompletion *completion = cluster.aio_create_completion();
4839 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
4840 librados::OPERATION_IGNORE_CACHE,
4841 NULL));
4842 completion->wait_for_safe();
4843 ASSERT_EQ(0, completion->get_return_value());
4844 completion->release();
4845 }
4846
4847 // call
4848 {
4849 ObjectReadOperation op;
4850 bufferlist bl;
4851 op.exec("rbd", "get_id", bl);
4852 bufferlist out;
4853 // should get EIO (not an rbd object), not -EOPNOTSUPP (we didn't promote)
4854 ASSERT_EQ(-5, ioctx.operate("foo", &op, &out));
4855 }
4856
4857 // make sure foo is back in the cache tier
4858 {
4859 NObjectIterator it = cache_ioctx.nobjects_begin();
4860 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
4861 ASSERT_TRUE(it->get_oid() == string("foo"));
4862 ++it;
4863 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
4864 }
4865
4866 // tear down tiers
4867 ASSERT_EQ(0, cluster.mon_command(
4868 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
4869 "\"}",
4870 inbl, NULL, NULL));
4871 ASSERT_EQ(0, cluster.mon_command(
4872 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
4873 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
4874 inbl, NULL, NULL));
4875
4876 // wait for maps to settle before next test
4877 cluster.wait_for_latest_osdmap();
4878
4879 ASSERT_EQ(0, cluster.pool_delete(cache_pool_name.c_str()));
4880 ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
4881}
4882
4883TEST_F(LibRadosTierECPP, HitSetNone) {
4884 {
4885 list< pair<time_t,time_t> > ls;
4886 AioCompletion *c = librados::Rados::aio_create_completion();
4887 ASSERT_EQ(0, ioctx.hit_set_list(123, c, &ls));
4888 c->wait_for_complete();
4889 ASSERT_EQ(0, c->get_return_value());
4890 ASSERT_TRUE(ls.empty());
4891 c->release();
4892 }
4893 {
4894 bufferlist bl;
4895 AioCompletion *c = librados::Rados::aio_create_completion();
4896 ASSERT_EQ(0, ioctx.hit_set_get(123, c, 12345, &bl));
4897 c->wait_for_complete();
4898 ASSERT_EQ(-ENOENT, c->get_return_value());
4899 c->release();
4900 }
4901}
4902
4903TEST_F(LibRadosTwoPoolsECPP, HitSetRead) {
4904 // make it a tier
4905 bufferlist inbl;
4906 ASSERT_EQ(0, cluster.mon_command(
4907 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
4908 "\", \"tierpool\": \"" + cache_pool_name +
4909 "\", \"force_nonempty\": \"--force-nonempty\" }",
4910 inbl, NULL, NULL));
4911
4912 // enable hitset tracking for this pool
4913 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_count", 2),
4914 inbl, NULL, NULL));
4915 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_period", 600),
4916 inbl, NULL, NULL));
4917 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_type",
4918 "explicit_object"),
4919 inbl, NULL, NULL));
4920
4921 // wait for maps to settle
4922 cluster.wait_for_latest_osdmap();
4923
4924 cache_ioctx.set_namespace("");
4925
4926 // keep reading until we see our object appear in the HitSet
4927 utime_t start = ceph_clock_now();
4928 utime_t hard_stop = start + utime_t(600, 0);
4929
4930 while (true) {
4931 utime_t now = ceph_clock_now();
4932 ASSERT_TRUE(now < hard_stop);
4933
4934 string name = "foo";
4935 uint32_t hash;
4936 ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(name, &hash));
4937 hobject_t oid(sobject_t(name, CEPH_NOSNAP), "", hash,
4938 cluster.pool_lookup(cache_pool_name.c_str()), "");
4939
4940 bufferlist bl;
4941 ASSERT_EQ(-ENOENT, cache_ioctx.read("foo", bl, 1, 0));
4942
4943 bufferlist hbl;
4944 AioCompletion *c = librados::Rados::aio_create_completion();
4945 ASSERT_EQ(0, cache_ioctx.hit_set_get(hash, c, now.sec(), &hbl));
4946 c->wait_for_complete();
4947 c->release();
4948
4949 if (hbl.length()) {
4950 bufferlist::iterator p = hbl.begin();
4951 HitSet hs;
4952 ::decode(hs, p);
4953 if (hs.contains(oid)) {
4954 cout << "ok, hit_set contains " << oid << std::endl;
4955 break;
4956 }
4957 cout << "hmm, not in HitSet yet" << std::endl;
4958 } else {
4959 cout << "hmm, no HitSet yet" << std::endl;
4960 }
4961
4962 sleep(1);
4963 }
4964}
4965
4966// disable this test until hitset-get reliably works on EC pools
4967#if 0
4968TEST_F(LibRadosTierECPP, HitSetWrite) {
4969 int num_pg = _get_pg_num(cluster, pool_name);
4970 assert(num_pg > 0);
4971
4972 // enable hitset tracking for this pool
4973 bufferlist inbl;
4974 ASSERT_EQ(0, cluster.mon_command(set_pool_str(pool_name, "hit_set_count", 8),
4975 inbl, NULL, NULL));
4976 ASSERT_EQ(0, cluster.mon_command(set_pool_str(pool_name, "hit_set_period", 600),
4977 inbl, NULL, NULL));
4978 ASSERT_EQ(0, cluster.mon_command(set_pool_str(pool_name, "hit_set_type",
4979 "explicit_hash"),
4980 inbl, NULL, NULL));
4981
4982 // wait for maps to settle
4983 cluster.wait_for_latest_osdmap();
4984
4985 ioctx.set_namespace("");
4986
4987 // do a bunch of writes
4988 for (int i=0; i<1000; ++i) {
4989 bufferlist bl;
4990 bl.append("a");
4991 ASSERT_EQ(0, ioctx.write(stringify(i), bl, 1, 0));
4992 }
4993
4994 // get HitSets
4995 std::map<int,HitSet> hitsets;
4996 for (int i=0; i<num_pg; ++i) {
4997 list< pair<time_t,time_t> > ls;
4998 AioCompletion *c = librados::Rados::aio_create_completion();
4999 ASSERT_EQ(0, ioctx.hit_set_list(i, c, &ls));
5000 c->wait_for_complete();
5001 c->release();
5002 std::cout << "pg " << i << " ls " << ls << std::endl;
5003 ASSERT_FALSE(ls.empty());
5004
5005 // get the latest
5006 c = librados::Rados::aio_create_completion();
5007 bufferlist bl;
5008 ASSERT_EQ(0, ioctx.hit_set_get(i, c, ls.back().first, &bl));
5009 c->wait_for_complete();
5010 c->release();
5011
5012 //std::cout << "bl len is " << bl.length() << "\n";
5013 //bl.hexdump(std::cout);
5014 //std::cout << std::endl;
5015
5016 bufferlist::iterator p = bl.begin();
5017 ::decode(hitsets[i], p);
5018
5019 // cope with racing splits by refreshing pg_num
5020 if (i == num_pg - 1)
5021 num_pg = _get_pg_num(cluster, pool_name);
5022 }
5023
5024 for (int i=0; i<1000; ++i) {
5025 string n = stringify(i);
5026 uint32_t hash = ioctx.get_object_hash_position(n);
5027 hobject_t oid(sobject_t(n, CEPH_NOSNAP), "", hash,
5028 cluster.pool_lookup(pool_name.c_str()), "");
5029 std::cout << "checking for " << oid << std::endl;
5030 bool found = false;
5031 for (int p=0; p<num_pg; ++p) {
5032 if (hitsets[p].contains(oid)) {
5033 found = true;
5034 break;
5035 }
5036 }
5037 ASSERT_TRUE(found);
5038 }
5039}
5040#endif
5041
5042TEST_F(LibRadosTwoPoolsECPP, HitSetTrim) {
5043 unsigned count = 3;
5044 unsigned period = 3;
5045
5046 // make it a tier
5047 bufferlist inbl;
5048 ASSERT_EQ(0, cluster.mon_command(
5049 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
5050 "\", \"tierpool\": \"" + cache_pool_name +
5051 "\", \"force_nonempty\": \"--force-nonempty\" }",
5052 inbl, NULL, NULL));
5053
5054 // enable hitset tracking for this pool
5055 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_count", count),
5056 inbl, NULL, NULL));
5057 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_period", period),
5058 inbl, NULL, NULL));
5059 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
5060 inbl, NULL, NULL));
5061 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_fpp", ".01"),
5062 inbl, NULL, NULL));
5063
5064 // wait for maps to settle
5065 cluster.wait_for_latest_osdmap();
5066
5067 cache_ioctx.set_namespace("");
5068
5069 // do a bunch of writes and make sure the hitsets rotate
5070 utime_t start = ceph_clock_now();
5071 utime_t hard_stop = start + utime_t(count * period * 50, 0);
5072
5073 time_t first = 0;
5074 int bsize = alignment;
5075 char *buf = (char *)new char[bsize];
5076 memset(buf, 'f', bsize);
5077
5078 while (true) {
5079 string name = "foo";
5080 uint32_t hash;
5081 ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(name, &hash));
5082 hobject_t oid(sobject_t(name, CEPH_NOSNAP), "", hash, -1, "");
5083
5084 bufferlist bl;
5085 bl.append(buf, bsize);
5086 ASSERT_EQ(0, cache_ioctx.append("foo", bl, bsize));
5087
5088 list<pair<time_t, time_t> > ls;
5089 AioCompletion *c = librados::Rados::aio_create_completion();
5090 ASSERT_EQ(0, cache_ioctx.hit_set_list(hash, c, &ls));
5091 c->wait_for_complete();
5092 c->release();
5093
5094 cout << " got ls " << ls << std::endl;
5095 if (!ls.empty()) {
5096 if (!first) {
5097 first = ls.front().first;
5098 cout << "first is " << first << std::endl;
5099 } else {
5100 if (ls.front().first != first) {
5101 cout << "first now " << ls.front().first << ", trimmed" << std::endl;
5102 break;
5103 }
5104 }
5105 }
5106
5107 utime_t now = ceph_clock_now();
5108 ASSERT_TRUE(now < hard_stop);
5109
5110 sleep(1);
5111 }
5112 delete[] buf;
5113}
5114
5115TEST_F(LibRadosTwoPoolsECPP, PromoteOn2ndRead) {
5116 // create object
5117 for (int i=0; i<20; ++i) {
5118 bufferlist bl;
5119 bl.append("hi there");
5120 ObjectWriteOperation op;
5121 op.write_full(bl);
5122 ASSERT_EQ(0, ioctx.operate("foo" + stringify(i), &op));
5123 }
5124
5125 // configure cache
5126 bufferlist inbl;
5127 ASSERT_EQ(0, cluster.mon_command(
5128 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
5129 "\", \"tierpool\": \"" + cache_pool_name +
5130 "\", \"force_nonempty\": \"--force-nonempty\" }",
5131 inbl, NULL, NULL));
5132 ASSERT_EQ(0, cluster.mon_command(
5133 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
5134 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
5135 inbl, NULL, NULL));
5136 ASSERT_EQ(0, cluster.mon_command(
5137 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
5138 "\", \"mode\": \"writeback\"}",
5139 inbl, NULL, NULL));
5140
5141 // enable hitset tracking for this pool
5142 ASSERT_EQ(0, cluster.mon_command(
5143 set_pool_str(cache_pool_name, "hit_set_count", 2),
5144 inbl, NULL, NULL));
5145 ASSERT_EQ(0, cluster.mon_command(
5146 set_pool_str(cache_pool_name, "hit_set_period", 600),
5147 inbl, NULL, NULL));
5148 ASSERT_EQ(0, cluster.mon_command(
5149 set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
5150 inbl, NULL, NULL));
5151 ASSERT_EQ(0, cluster.mon_command(
5152 set_pool_str(cache_pool_name, "min_read_recency_for_promote", 1),
5153 inbl, NULL, NULL));
5154 ASSERT_EQ(0, cluster.mon_command(
5155 set_pool_str(cache_pool_name, "hit_set_grade_decay_rate", 20),
5156 inbl, NULL, NULL));
5157 ASSERT_EQ(0, cluster.mon_command(
5158 set_pool_str(cache_pool_name, "hit_set_search_last_n", 1),
5159 inbl, NULL, NULL));
5160
5161 // wait for maps to settle
5162 cluster.wait_for_latest_osdmap();
5163
5164 int fake = 0; // set this to non-zero to test spurious promotion,
5165 // e.g. from thrashing
5166 int attempt = 0;
5167 string obj;
5168 while (true) {
5169 // 1st read, don't trigger a promote
5170 obj = "foo" + stringify(attempt);
5171 cout << obj << std::endl;
5172 {
5173 bufferlist bl;
5174 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
5175 if (--fake >= 0) {
5176 sleep(1);
5177 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
5178 sleep(1);
5179 }
5180 }
5181
5182 // verify the object is NOT present in the cache tier
5183 {
5184 bool found = false;
5185 NObjectIterator it = cache_ioctx.nobjects_begin();
5186 while (it != cache_ioctx.nobjects_end()) {
5187 cout << " see " << it->get_oid() << std::endl;
5188 if (it->get_oid() == string(obj.c_str())) {
5189 found = true;
5190 break;
5191 }
5192 ++it;
5193 }
5194 if (!found)
5195 break;
5196 }
5197
5198 ++attempt;
5199 ASSERT_LE(attempt, 20);
5200 cout << "hrm, object is present in cache on attempt " << attempt
5201 << ", retrying" << std::endl;
5202 }
5203
5204 // Read until the object is present in the cache tier
5205 cout << "verifying " << obj << " is eventually promoted" << std::endl;
5206 while (true) {
5207 bufferlist bl;
5208 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
5209
5210 bool there = false;
5211 NObjectIterator it = cache_ioctx.nobjects_begin();
5212 while (it != cache_ioctx.nobjects_end()) {
5213 if (it->get_oid() == string(obj.c_str())) {
5214 there = true;
5215 break;
5216 }
5217 ++it;
5218 }
5219 if (there)
5220 break;
5221
5222 sleep(1);
5223 }
5224
5225 // tear down tiers
5226 ASSERT_EQ(0, cluster.mon_command(
5227 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
5228 "\"}",
5229 inbl, NULL, NULL));
5230 ASSERT_EQ(0, cluster.mon_command(
5231 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
5232 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
5233 inbl, NULL, NULL));
5234
5235 // wait for maps to settle before next test
5236 cluster.wait_for_latest_osdmap();
5237}
5238
5239TEST_F(LibRadosTwoPoolsECPP, ProxyRead) {
5240 // create object
5241 {
5242 bufferlist bl;
5243 bl.append("hi there");
5244 ObjectWriteOperation op;
5245 op.write_full(bl);
5246 ASSERT_EQ(0, ioctx.operate("foo", &op));
5247 }
5248
5249 // configure cache
5250 bufferlist inbl;
5251 ASSERT_EQ(0, cluster.mon_command(
5252 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
5253 "\", \"tierpool\": \"" + cache_pool_name +
5254 "\", \"force_nonempty\": \"--force-nonempty\" }",
5255 inbl, NULL, NULL));
5256 ASSERT_EQ(0, cluster.mon_command(
5257 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
5258 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
5259 inbl, NULL, NULL));
5260 ASSERT_EQ(0, cluster.mon_command(
5261 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
5262 "\", \"mode\": \"readproxy\"}",
5263 inbl, NULL, NULL));
5264
5265 // wait for maps to settle
5266 cluster.wait_for_latest_osdmap();
5267
5268 // read and verify the object
5269 {
5270 bufferlist bl;
5271 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
5272 ASSERT_EQ('h', bl[0]);
5273 }
5274
5275 // Verify 10 times the object is NOT present in the cache tier
5276 uint32_t i = 0;
5277 while (i++ < 10) {
5278 NObjectIterator it = cache_ioctx.nobjects_begin();
5279 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
5280 sleep(1);
5281 }
5282
5283 // tear down tiers
5284 ASSERT_EQ(0, cluster.mon_command(
5285 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
5286 "\"}",
5287 inbl, NULL, NULL));
5288 ASSERT_EQ(0, cluster.mon_command(
5289 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
5290 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
5291 inbl, NULL, NULL));
5292
5293 // wait for maps to settle before next test
5294 cluster.wait_for_latest_osdmap();
5295}
5296
5297TEST_F(LibRadosTwoPoolsECPP, CachePin) {
5298 // create object
5299 {
5300 bufferlist bl;
5301 bl.append("hi there");
5302 ObjectWriteOperation op;
5303 op.write_full(bl);
5304 ASSERT_EQ(0, ioctx.operate("foo", &op));
5305 }
5306 {
5307 bufferlist bl;
5308 bl.append("hi there");
5309 ObjectWriteOperation op;
5310 op.write_full(bl);
5311 ASSERT_EQ(0, ioctx.operate("bar", &op));
5312 }
5313 {
5314 bufferlist bl;
5315 bl.append("hi there");
5316 ObjectWriteOperation op;
5317 op.write_full(bl);
5318 ASSERT_EQ(0, ioctx.operate("baz", &op));
5319 }
5320 {
5321 bufferlist bl;
5322 bl.append("hi there");
5323 ObjectWriteOperation op;
5324 op.write_full(bl);
5325 ASSERT_EQ(0, ioctx.operate("bam", &op));
5326 }
5327
5328 // configure cache
5329 bufferlist inbl;
5330 ASSERT_EQ(0, cluster.mon_command(
5331 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
5332 "\", \"tierpool\": \"" + cache_pool_name +
5333 "\", \"force_nonempty\": \"--force-nonempty\" }",
5334 inbl, NULL, NULL));
5335 ASSERT_EQ(0, cluster.mon_command(
5336 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
5337 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
5338 inbl, NULL, NULL));
5339 ASSERT_EQ(0, cluster.mon_command(
5340 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
5341 "\", \"mode\": \"writeback\"}",
5342 inbl, NULL, NULL));
5343
5344 // wait for maps to settle
5345 cluster.wait_for_latest_osdmap();
5346
5347 // read, trigger promote
5348 {
5349 bufferlist bl;
5350 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
5351 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
5352 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
5353 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
5354 }
5355
5356 // verify the objects are present in the cache tier
5357 {
5358 NObjectIterator it = cache_ioctx.nobjects_begin();
5359 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
5360 for (uint32_t i = 0; i < 4; i++) {
5361 ASSERT_TRUE(it->get_oid() == string("foo") ||
5362 it->get_oid() == string("bar") ||
5363 it->get_oid() == string("baz") ||
5364 it->get_oid() == string("bam"));
5365 ++it;
5366 }
5367 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
5368 }
5369
5370 // pin objects
5371 {
5372 ObjectWriteOperation op;
5373 op.cache_pin();
5374 librados::AioCompletion *completion = cluster.aio_create_completion();
5375 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
5376 completion->wait_for_safe();
5377 ASSERT_EQ(0, completion->get_return_value());
5378 completion->release();
5379 }
5380 {
5381 ObjectWriteOperation op;
5382 op.cache_pin();
5383 librados::AioCompletion *completion = cluster.aio_create_completion();
5384 ASSERT_EQ(0, cache_ioctx.aio_operate("baz", completion, &op));
5385 completion->wait_for_safe();
5386 ASSERT_EQ(0, completion->get_return_value());
5387 completion->release();
5388 }
5389
5390 // enable agent
5391 ASSERT_EQ(0, cluster.mon_command(
5392 set_pool_str(cache_pool_name, "hit_set_count", 2),
5393 inbl, NULL, NULL));
5394 ASSERT_EQ(0, cluster.mon_command(
5395 set_pool_str(cache_pool_name, "hit_set_period", 600),
5396 inbl, NULL, NULL));
5397 ASSERT_EQ(0, cluster.mon_command(
5398 set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
5399 inbl, NULL, NULL));
5400 ASSERT_EQ(0, cluster.mon_command(
5401 set_pool_str(cache_pool_name, "min_read_recency_for_promote", 1),
5402 inbl, NULL, NULL));
5403 ASSERT_EQ(0, cluster.mon_command(
5404 set_pool_str(cache_pool_name, "target_max_objects", 1),
5405 inbl, NULL, NULL));
5406
5407 sleep(10);
5408
5409 // Verify the pinned object 'foo' is not flushed/evicted
5410 uint32_t count = 0;
5411 while (true) {
5412 bufferlist bl;
5413 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
5414
5415 count = 0;
5416 NObjectIterator it = cache_ioctx.nobjects_begin();
5417 while (it != cache_ioctx.nobjects_end()) {
5418 ASSERT_TRUE(it->get_oid() == string("foo") ||
5419 it->get_oid() == string("bar") ||
5420 it->get_oid() == string("baz") ||
5421 it->get_oid() == string("bam"));
5422 ++count;
5423 ++it;
5424 }
5425 if (count == 2) {
5426 ASSERT_TRUE(it->get_oid() == string("foo") ||
5427 it->get_oid() == string("baz"));
5428 break;
5429 }
5430
5431 sleep(1);
5432 }
5433
5434 // tear down tiers
5435 ASSERT_EQ(0, cluster.mon_command(
5436 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
5437 "\"}",
5438 inbl, NULL, NULL));
5439 ASSERT_EQ(0, cluster.mon_command(
5440 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
5441 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
5442 inbl, NULL, NULL));
5443
5444 // wait for maps to settle before next test
5445 cluster.wait_for_latest_osdmap();
5446}
31f18b77
FG
5447TEST_F(LibRadosTwoPoolsECPP, SetRedirectRead) {
5448 // skip test if not yet luminous
5449 {
5450 bufferlist inbl, outbl;
5451 ASSERT_EQ(0, cluster.mon_command(
5452 "{\"prefix\": \"osd dump\"}",
5453 inbl, &outbl, NULL));
5454 string s(outbl.c_str(), outbl.length());
5455 if (s.find("luminous") == std::string::npos) {
5456 cout << "cluster is not yet luminous, skipping test" << std::endl;
5457 return;
5458 }
5459 }
5460
5461 // create object
5462 {
5463 bufferlist bl;
5464 bl.append("hi there");
5465 ObjectWriteOperation op;
5466 op.write_full(bl);
5467 ASSERT_EQ(0, ioctx.operate("foo", &op));
5468 }
5469 {
5470 bufferlist bl;
5471 bl.append("there");
5472 ObjectWriteOperation op;
5473 op.write_full(bl);
5474 ASSERT_EQ(0, cache_ioctx.operate("bar", &op));
5475 }
5476
5477 // configure tier
5478 bufferlist inbl;
5479 ASSERT_EQ(0, cluster.mon_command(
5480 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
5481 "\", \"tierpool\": \"" + cache_pool_name +
5482 "\", \"force_nonempty\": \"--force-nonempty\" }",
5483 inbl, NULL, NULL));
5484
5485 // wait for maps to settle
5486 cluster.wait_for_latest_osdmap();
5487
5488 {
5489 ObjectWriteOperation op;
5490 op.set_redirect("bar", cache_ioctx, 0);
5491 librados::AioCompletion *completion = cluster.aio_create_completion();
5492 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op));
5493 completion->wait_for_safe();
5494 ASSERT_EQ(0, completion->get_return_value());
5495 completion->release();
5496 }
5497 // read and verify the object
5498 {
5499 bufferlist bl;
5500 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
5501 ASSERT_EQ('t', bl[0]);
5502 }
5503
5504 ASSERT_EQ(0, cluster.mon_command(
5505 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
5506 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
5507 inbl, NULL, NULL));
5508
5509 // wait for maps to settle before next test
5510 cluster.wait_for_latest_osdmap();
5511}