]> git.proxmox.com Git - ceph.git/blame - ceph/src/test/librados/tier.cc
bump version to 12.0.3-pve3
[ceph.git] / ceph / src / test / librados / tier.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3#include "gtest/gtest.h"
4
5#include "mds/mdstypes.h"
6#include "include/buffer.h"
7#include "include/rbd_types.h"
8#include "include/rados/librados.h"
9#include "include/rados/librados.hpp"
10#include "include/stringify.h"
11#include "include/types.h"
12#include "global/global_context.h"
13#include "common/Cond.h"
14#include "test/librados/test.h"
15#include "test/librados/TestCase.h"
16#include "json_spirit/json_spirit.h"
17
18#include "osd/HitSet.h"
19
20#include <errno.h>
21#include <map>
22#include <sstream>
23#include <string>
24
25using namespace librados;
26using std::map;
27using std::ostringstream;
28using std::string;
29
30typedef RadosTestPP LibRadosTierPP;
31typedef RadosTestECPP LibRadosTierECPP;
32
33void flush_evict_all(librados::Rados& cluster, librados::IoCtx& cache_ioctx)
34{
35 bufferlist inbl;
36 cache_ioctx.set_namespace(all_nspaces);
37 for (NObjectIterator it = cache_ioctx.nobjects_begin();
38 it != cache_ioctx.nobjects_end(); ++it) {
39 cache_ioctx.locator_set_key(it->get_locator());
40 cache_ioctx.set_namespace(it->get_nspace());
41 {
42 ObjectReadOperation op;
43 op.cache_flush();
44 librados::AioCompletion *completion = cluster.aio_create_completion();
45 cache_ioctx.aio_operate(
46 it->get_oid(), completion, &op,
47 librados::OPERATION_IGNORE_OVERLAY, NULL);
48 completion->wait_for_safe();
49 completion->get_return_value();
50 completion->release();
51 }
52 {
53 ObjectReadOperation op;
54 op.cache_evict();
55 librados::AioCompletion *completion = cluster.aio_create_completion();
56 cache_ioctx.aio_operate(
57 it->get_oid(), completion, &op,
58 librados::OPERATION_IGNORE_OVERLAY, NULL);
59 completion->wait_for_safe();
60 completion->get_return_value();
61 completion->release();
62 }
63 }
64}
65
66class LibRadosTwoPoolsPP : public RadosTestPP
67{
68public:
69 LibRadosTwoPoolsPP() {};
70 ~LibRadosTwoPoolsPP() override {};
71protected:
72 static void SetUpTestCase() {
73 pool_name = get_temp_pool_name();
74 ASSERT_EQ("", create_one_pool_pp(pool_name, s_cluster));
75 }
76 static void TearDownTestCase() {
77 ASSERT_EQ(0, destroy_one_pool_pp(pool_name, s_cluster));
78 }
79 static std::string cache_pool_name;
80
81 void SetUp() override {
82 cache_pool_name = get_temp_pool_name();
83 ASSERT_EQ(0, s_cluster.pool_create(cache_pool_name.c_str()));
84 RadosTestPP::SetUp();
85 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
86 cache_ioctx.set_namespace(nspace);
87 }
88 void TearDown() override {
89 // flush + evict cache
90 flush_evict_all(cluster, cache_ioctx);
91
92 bufferlist inbl;
93 // tear down tiers
94 ASSERT_EQ(0, cluster.mon_command(
95 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
96 "\"}",
97 inbl, NULL, NULL));
98 ASSERT_EQ(0, cluster.mon_command(
99 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
100 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
101 inbl, NULL, NULL));
102
103 // wait for maps to settle before next test
104 cluster.wait_for_latest_osdmap();
105
106 RadosTestPP::TearDown();
107
108 cleanup_default_namespace(cache_ioctx);
109 cleanup_namespace(cache_ioctx, nspace);
110
111 cache_ioctx.close();
112 ASSERT_EQ(0, s_cluster.pool_delete(cache_pool_name.c_str()));
113 }
114 librados::IoCtx cache_ioctx;
115};
116
117class Completions
118{
119public:
120 Completions() = default;
121 librados::AioCompletion* getCompletion() {
122 librados::AioCompletion* comp = librados::Rados::aio_create_completion();
123 m_completions.push_back(comp);
124 return comp;
125 }
126
127 ~Completions() {
128 for (auto& comp : m_completions) {
129 comp->release();
130 }
131 }
132
133private:
134 vector<librados::AioCompletion *> m_completions;
135};
136
137Completions completions;
138
139std::string LibRadosTwoPoolsPP::cache_pool_name;
140
141TEST_F(LibRadosTierPP, Dirty) {
142 {
143 ObjectWriteOperation op;
144 op.undirty();
145 ASSERT_EQ(0, ioctx.operate("foo", &op)); // still get 0 if it dne
146 }
147 {
148 ObjectWriteOperation op;
149 op.create(true);
150 ASSERT_EQ(0, ioctx.operate("foo", &op));
151 }
152 {
153 bool dirty = false;
154 int r = -1;
155 ObjectReadOperation op;
156 op.is_dirty(&dirty, &r);
157 ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
158 ASSERT_TRUE(dirty);
159 ASSERT_EQ(0, r);
160 }
161 {
162 ObjectWriteOperation op;
163 op.undirty();
164 ASSERT_EQ(0, ioctx.operate("foo", &op));
165 }
166 {
167 ObjectWriteOperation op;
168 op.undirty();
169 ASSERT_EQ(0, ioctx.operate("foo", &op)); // still 0 if already clean
170 }
171 {
172 bool dirty = false;
173 int r = -1;
174 ObjectReadOperation op;
175 op.is_dirty(&dirty, &r);
176 ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
177 ASSERT_FALSE(dirty);
178 ASSERT_EQ(0, r);
179 }
180 {
181 ObjectWriteOperation op;
182 op.truncate(0); // still a write even tho it is a no-op
183 ASSERT_EQ(0, ioctx.operate("foo", &op));
184 }
185 {
186 bool dirty = false;
187 int r = -1;
188 ObjectReadOperation op;
189 op.is_dirty(&dirty, &r);
190 ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
191 ASSERT_TRUE(dirty);
192 ASSERT_EQ(0, r);
193 }
194}
195
196TEST_F(LibRadosTwoPoolsPP, Overlay) {
197 // create objects
198 {
199 bufferlist bl;
200 bl.append("base");
201 ObjectWriteOperation op;
202 op.write_full(bl);
203 ASSERT_EQ(0, ioctx.operate("foo", &op));
204 }
205 {
206 bufferlist bl;
207 bl.append("cache");
208 ObjectWriteOperation op;
209 op.write_full(bl);
210 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
211 }
212
213 // configure cache
214 bufferlist inbl;
215 ASSERT_EQ(0, cluster.mon_command(
216 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
217 "\", \"tierpool\": \"" + cache_pool_name +
218 "\", \"force_nonempty\": \"--force-nonempty\" }",
219 inbl, NULL, NULL));
220 ASSERT_EQ(0, cluster.mon_command(
221 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
222 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
223 inbl, NULL, NULL));
224
225 // wait for maps to settle
226 cluster.wait_for_latest_osdmap();
227
228 // by default, the overlay sends us to cache pool
229 {
230 bufferlist bl;
231 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
232 ASSERT_EQ('c', bl[0]);
233 }
234 {
235 bufferlist bl;
236 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
237 ASSERT_EQ('c', bl[0]);
238 }
239
240 // unless we say otherwise
241 {
242 bufferlist bl;
243 ObjectReadOperation op;
244 op.read(0, 1, &bl, NULL);
245 librados::AioCompletion *completion = cluster.aio_create_completion();
246 ASSERT_EQ(0, ioctx.aio_operate(
247 "foo", completion, &op,
248 librados::OPERATION_IGNORE_OVERLAY, NULL));
249 completion->wait_for_safe();
250 ASSERT_EQ(0, completion->get_return_value());
251 completion->release();
252 ASSERT_EQ('b', bl[0]);
253 }
254}
255
256TEST_F(LibRadosTwoPoolsPP, Promote) {
257 // create object
258 {
259 bufferlist bl;
260 bl.append("hi there");
261 ObjectWriteOperation op;
262 op.write_full(bl);
263 ASSERT_EQ(0, ioctx.operate("foo", &op));
264 }
265
266 // configure cache
267 bufferlist inbl;
268 ASSERT_EQ(0, cluster.mon_command(
269 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
270 "\", \"tierpool\": \"" + cache_pool_name +
271 "\", \"force_nonempty\": \"--force-nonempty\" }",
272 inbl, NULL, NULL));
273 ASSERT_EQ(0, cluster.mon_command(
274 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
275 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
276 inbl, NULL, NULL));
277 ASSERT_EQ(0, cluster.mon_command(
278 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
279 "\", \"mode\": \"writeback\"}",
280 inbl, NULL, NULL));
281
282 // wait for maps to settle
283 cluster.wait_for_latest_osdmap();
284
285 // read, trigger a promote
286 {
287 bufferlist bl;
288 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
289 }
290
291 // read, trigger a whiteout
292 {
293 bufferlist bl;
294 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
295 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
296 }
297
298 // verify the object is present in the cache tier
299 {
300 NObjectIterator it = cache_ioctx.nobjects_begin();
301 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
302 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
303 ++it;
304 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
305 ++it;
306 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
307 }
308}
309
310TEST_F(LibRadosTwoPoolsPP, PromoteSnap) {
311 // create object
312 {
313 bufferlist bl;
314 bl.append("hi there");
315 ObjectWriteOperation op;
316 op.write_full(bl);
317 ASSERT_EQ(0, ioctx.operate("foo", &op));
318 }
319 {
320 bufferlist bl;
321 bl.append("hi there");
322 ObjectWriteOperation op;
323 op.write_full(bl);
324 ASSERT_EQ(0, ioctx.operate("bar", &op));
325 }
326 {
327 bufferlist bl;
328 bl.append("hi there");
329 ObjectWriteOperation op;
330 op.write_full(bl);
331 ASSERT_EQ(0, ioctx.operate("baz", &op));
332 }
333 {
334 bufferlist bl;
335 bl.append("hi there");
336 ObjectWriteOperation op;
337 op.write_full(bl);
338 ASSERT_EQ(0, ioctx.operate("bam", &op));
339 }
340
341 // create a snapshot, clone
342 vector<uint64_t> my_snaps(1);
343 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
344 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
345 my_snaps));
346 {
347 bufferlist bl;
348 bl.append("ciao!");
349 ObjectWriteOperation op;
350 op.write_full(bl);
351 ASSERT_EQ(0, ioctx.operate("foo", &op));
352 }
353 {
354 bufferlist bl;
355 bl.append("ciao!");
356 ObjectWriteOperation op;
357 op.write_full(bl);
358 ASSERT_EQ(0, ioctx.operate("bar", &op));
359 }
360 {
361 ObjectWriteOperation op;
362 op.remove();
363 ASSERT_EQ(0, ioctx.operate("baz", &op));
364 }
365 {
366 bufferlist bl;
367 bl.append("ciao!");
368 ObjectWriteOperation op;
369 op.write_full(bl);
370 ASSERT_EQ(0, ioctx.operate("bam", &op));
371 }
372
373 // configure cache
374 bufferlist inbl;
375 ASSERT_EQ(0, cluster.mon_command(
376 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
377 "\", \"tierpool\": \"" + cache_pool_name +
378 "\", \"force_nonempty\": \"--force-nonempty\" }",
379 inbl, NULL, NULL));
380 ASSERT_EQ(0, cluster.mon_command(
381 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
382 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
383 inbl, NULL, NULL));
384 ASSERT_EQ(0, cluster.mon_command(
385 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
386 "\", \"mode\": \"writeback\"}",
387 inbl, NULL, NULL));
388
389 // wait for maps to settle
390 cluster.wait_for_latest_osdmap();
391
392 // read, trigger a promote on the head
393 {
394 bufferlist bl;
395 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
396 ASSERT_EQ('c', bl[0]);
397 }
398 {
399 bufferlist bl;
400 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
401 ASSERT_EQ('c', bl[0]);
402 }
403
404 ioctx.snap_set_read(my_snaps[0]);
405
406 // read foo snap
407 {
408 bufferlist bl;
409 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
410 ASSERT_EQ('h', bl[0]);
411 }
412
413 // read bar snap
414 {
415 bufferlist bl;
416 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
417 ASSERT_EQ('h', bl[0]);
418 }
419
420 // read baz snap
421 {
422 bufferlist bl;
423 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
424 ASSERT_EQ('h', bl[0]);
425 }
426
427 ioctx.snap_set_read(librados::SNAP_HEAD);
428
429 // read foo
430 {
431 bufferlist bl;
432 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
433 ASSERT_EQ('c', bl[0]);
434 }
435
436 // read bar
437 {
438 bufferlist bl;
439 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
440 ASSERT_EQ('c', bl[0]);
441 }
442
443 // read baz
444 {
445 bufferlist bl;
446 ASSERT_EQ(-ENOENT, ioctx.read("baz", bl, 1, 0));
447 }
448
449 // cleanup
450 ioctx.selfmanaged_snap_remove(my_snaps[0]);
451}
452
453TEST_F(LibRadosTwoPoolsPP, PromoteSnapScrub) {
454 int num = 100;
455
456 // create objects
457 for (int i=0; i<num; ++i) {
458 bufferlist bl;
459 bl.append("hi there");
460 ObjectWriteOperation op;
461 op.write_full(bl);
462 ASSERT_EQ(0, ioctx.operate(string("foo") + stringify(i), &op));
463 }
464
465 vector<uint64_t> my_snaps;
466 for (int snap=0; snap<4; ++snap) {
467 // create a snapshot, clone
468 vector<uint64_t> ns(1);
469 ns.insert(ns.end(), my_snaps.begin(), my_snaps.end());
470 my_snaps.swap(ns);
471 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
472 cout << "my_snaps " << my_snaps << std::endl;
473 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
474 my_snaps));
475 for (int i=0; i<num; ++i) {
476 bufferlist bl;
477 bl.append(string("ciao! snap") + stringify(snap));
478 ObjectWriteOperation op;
479 op.write_full(bl);
480 ASSERT_EQ(0, ioctx.operate(string("foo") + stringify(i), &op));
481 }
482 }
483
484 // configure cache
485 bufferlist inbl;
486 ASSERT_EQ(0, cluster.mon_command(
487 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
488 "\", \"tierpool\": \"" + cache_pool_name +
489 "\", \"force_nonempty\": \"--force-nonempty\" }",
490 inbl, NULL, NULL));
491 ASSERT_EQ(0, cluster.mon_command(
492 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
493 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
494 inbl, NULL, NULL));
495 ASSERT_EQ(0, cluster.mon_command(
496 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
497 "\", \"mode\": \"writeback\"}",
498 inbl, NULL, NULL));
499
500 // wait for maps to settle
501 cluster.wait_for_latest_osdmap();
502
503 // read, trigger a promote on _some_ heads to make sure we handle cases
504 // where snaps are present and where they are not.
505 cout << "promoting some heads" << std::endl;
506 for (int i=0; i<num; ++i) {
507 if (i % 5 == 0 || i > num - 3) {
508 bufferlist bl;
509 ASSERT_EQ(1, ioctx.read(string("foo") + stringify(i), bl, 1, 0));
510 ASSERT_EQ('c', bl[0]);
511 }
512 }
513
514 for (unsigned snap = 0; snap < my_snaps.size(); ++snap) {
515 cout << "promoting from clones for snap " << my_snaps[snap] << std::endl;
516 ioctx.snap_set_read(my_snaps[snap]);
517
518 // read some snaps, semi-randomly
519 for (int i=0; i<50; ++i) {
520 bufferlist bl;
521 string o = string("foo") + stringify((snap * i * 137) % 80);
522 //cout << o << std::endl;
523 ASSERT_EQ(1, ioctx.read(o, bl, 1, 0));
524 }
525 }
526
527 // ok, stop and scrub this pool (to make sure scrub can handle
528 // missing clones in the cache tier).
529 {
530 IoCtx cache_ioctx;
531 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
532 for (int i=0; i<10; ++i) {
533 do {
534 ostringstream ss;
535 ss << "{\"prefix\": \"pg scrub\", \"pgid\": \""
536 << cache_ioctx.get_id() << "." << i
537 << "\"}";
538 int r = cluster.mon_command(ss.str(), inbl, NULL, NULL);
539 if (r == -ENOENT || // in case mgr osdmap is stale
540 r == -EAGAIN) {
541 sleep(5);
542 continue;
543 }
544 } while (false);
545 }
546
547 // give it a few seconds to go. this is sloppy but is usually enough time
548 cout << "waiting for scrubs..." << std::endl;
549 sleep(30);
550 cout << "done waiting" << std::endl;
551 }
552
553 ioctx.snap_set_read(librados::SNAP_HEAD);
554
555 //cleanup
556 for (unsigned snap = 0; snap < my_snaps.size(); ++snap) {
557 ioctx.selfmanaged_snap_remove(my_snaps[snap]);
558 }
559}
560
561TEST_F(LibRadosTwoPoolsPP, PromoteSnapTrimRace) {
562 // create object
563 {
564 bufferlist bl;
565 bl.append("hi there");
566 ObjectWriteOperation op;
567 op.write_full(bl);
568 ASSERT_EQ(0, ioctx.operate("foo", &op));
569 }
570
571 // create a snapshot, clone
572 vector<uint64_t> my_snaps(1);
573 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
574 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
575 my_snaps));
576 {
577 bufferlist bl;
578 bl.append("ciao!");
579 ObjectWriteOperation op;
580 op.write_full(bl);
581 ASSERT_EQ(0, ioctx.operate("foo", &op));
582 }
583
584 // configure cache
585 bufferlist inbl;
586 ASSERT_EQ(0, cluster.mon_command(
587 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
588 "\", \"tierpool\": \"" + cache_pool_name +
589 "\", \"force_nonempty\": \"--force-nonempty\" }",
590 inbl, NULL, NULL));
591 ASSERT_EQ(0, cluster.mon_command(
592 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
593 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
594 inbl, NULL, NULL));
595 ASSERT_EQ(0, cluster.mon_command(
596 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
597 "\", \"mode\": \"writeback\"}",
598 inbl, NULL, NULL));
599
600 // wait for maps to settle
601 cluster.wait_for_latest_osdmap();
602
603 // delete the snap
604 ASSERT_EQ(0, ioctx.selfmanaged_snap_remove(my_snaps[0]));
605
606 ioctx.snap_set_read(my_snaps[0]);
607
608 // read foo snap
609 {
610 bufferlist bl;
611 ASSERT_EQ(-ENOENT, ioctx.read("foo", bl, 1, 0));
612 }
613
614 // cleanup
615 ioctx.selfmanaged_snap_remove(my_snaps[0]);
616}
617
618TEST_F(LibRadosTwoPoolsPP, Whiteout) {
619 // create object
620 {
621 bufferlist bl;
622 bl.append("hi there");
623 ObjectWriteOperation op;
624 op.write_full(bl);
625 ASSERT_EQ(0, ioctx.operate("foo", &op));
626 }
627
628 // configure cache
629 bufferlist inbl;
630 ASSERT_EQ(0, cluster.mon_command(
631 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
632 "\", \"tierpool\": \"" + cache_pool_name +
633 "\", \"force_nonempty\": \"--force-nonempty\" }",
634 inbl, NULL, NULL));
635 ASSERT_EQ(0, cluster.mon_command(
636 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
637 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
638 inbl, NULL, NULL));
639 ASSERT_EQ(0, cluster.mon_command(
640 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
641 "\", \"mode\": \"writeback\"}",
642 inbl, NULL, NULL));
643
644 // wait for maps to settle
645 cluster.wait_for_latest_osdmap();
646
647 // create some whiteouts, verify they behave
648 {
649 ObjectWriteOperation op;
650 op.assert_exists();
651 op.remove();
652 ASSERT_EQ(0, ioctx.operate("foo", &op));
653 }
654
655 {
656 ObjectWriteOperation op;
657 op.assert_exists();
658 op.remove();
659 ASSERT_EQ(-ENOENT, ioctx.operate("bar", &op));
660 }
661 {
662 ObjectWriteOperation op;
663 op.assert_exists();
664 op.remove();
665 ASSERT_EQ(-ENOENT, ioctx.operate("bar", &op));
666 }
667
668 // verify the whiteouts are there in the cache tier
669 {
670 NObjectIterator it = cache_ioctx.nobjects_begin();
671 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
672 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
673 ++it;
674 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
675 ++it;
676 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
677 }
678
679 // delete a whiteout and verify it goes away
680 ASSERT_EQ(-ENOENT, ioctx.remove("foo"));
681 {
682 ObjectWriteOperation op;
683 op.remove();
684 librados::AioCompletion *completion = cluster.aio_create_completion();
685 ASSERT_EQ(0, ioctx.aio_operate("bar", completion, &op,
686 librados::OPERATION_IGNORE_CACHE));
687 completion->wait_for_safe();
688 ASSERT_EQ(0, completion->get_return_value());
689 completion->release();
690
691 NObjectIterator it = cache_ioctx.nobjects_begin();
692 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
693 ASSERT_TRUE(it->get_oid() == string("foo"));
694 ++it;
695 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
696 }
697
698 // recreate an object and verify we can read it
699 {
700 bufferlist bl;
701 bl.append("hi there");
702 ObjectWriteOperation op;
703 op.write_full(bl);
704 ASSERT_EQ(0, ioctx.operate("foo", &op));
705 }
706 {
707 bufferlist bl;
708 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
709 ASSERT_EQ('h', bl[0]);
710 }
711}
712
713TEST_F(LibRadosTwoPoolsPP, WhiteoutDeleteCreate) {
714 // configure cache
715 bufferlist inbl;
716 ASSERT_EQ(0, cluster.mon_command(
717 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
718 "\", \"tierpool\": \"" + cache_pool_name +
719 "\", \"force_nonempty\": \"--force-nonempty\" }",
720 inbl, NULL, NULL));
721 ASSERT_EQ(0, cluster.mon_command(
722 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
723 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
724 inbl, NULL, NULL));
725 ASSERT_EQ(0, cluster.mon_command(
726 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
727 "\", \"mode\": \"writeback\"}",
728 inbl, NULL, NULL));
729
730 // wait for maps to settle
731 cluster.wait_for_latest_osdmap();
732
733 // create an object
734 {
735 bufferlist bl;
736 bl.append("foo");
737 ASSERT_EQ(0, ioctx.write_full("foo", bl));
738 }
739
740 // do delete + create operation
741 {
742 ObjectWriteOperation op;
743 op.remove();
744 bufferlist bl;
745 bl.append("bar");
746 op.write_full(bl);
747 ASSERT_EQ(0, ioctx.operate("foo", &op));
748 }
749
750 // verify it still "exists" (w/ new content)
751 {
752 bufferlist bl;
753 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
754 ASSERT_EQ('b', bl[0]);
755 }
756}
757
758TEST_F(LibRadosTwoPoolsPP, Evict) {
759 // create object
760 {
761 bufferlist bl;
762 bl.append("hi there");
763 ObjectWriteOperation op;
764 op.write_full(bl);
765 ASSERT_EQ(0, ioctx.operate("foo", &op));
766 }
767
768 // configure cache
769 bufferlist inbl;
770 ASSERT_EQ(0, cluster.mon_command(
771 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
772 "\", \"tierpool\": \"" + cache_pool_name +
773 "\", \"force_nonempty\": \"--force-nonempty\" }",
774 inbl, NULL, NULL));
775 ASSERT_EQ(0, cluster.mon_command(
776 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
777 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
778 inbl, NULL, NULL));
779 ASSERT_EQ(0, cluster.mon_command(
780 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
781 "\", \"mode\": \"writeback\"}",
782 inbl, NULL, NULL));
783
784 // wait for maps to settle
785 cluster.wait_for_latest_osdmap();
786
787 // read, trigger a promote
788 {
789 bufferlist bl;
790 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
791 }
792
793 // read, trigger a whiteout, and a dirty object
794 {
795 bufferlist bl;
796 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
797 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
798 ASSERT_EQ(0, ioctx.write("bar", bl, bl.length(), 0));
799 }
800
801 // verify the object is present in the cache tier
802 {
803 NObjectIterator it = cache_ioctx.nobjects_begin();
804 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
805 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
806 ++it;
807 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
808 ++it;
809 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
810 }
811
812 // pin
813 {
814 ObjectWriteOperation op;
815 op.cache_pin();
816 librados::AioCompletion *completion = cluster.aio_create_completion();
817 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
818 completion->wait_for_safe();
819 ASSERT_EQ(0, completion->get_return_value());
820 completion->release();
821 }
822
823 // evict the pinned object with -EPERM
824 {
825 ObjectReadOperation op;
826 op.cache_evict();
827 librados::AioCompletion *completion = cluster.aio_create_completion();
828 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
829 librados::OPERATION_IGNORE_CACHE,
830 NULL));
831 completion->wait_for_safe();
832 ASSERT_EQ(-EPERM, completion->get_return_value());
833 completion->release();
834 }
835
836 // unpin
837 {
838 ObjectWriteOperation op;
839 op.cache_unpin();
840 librados::AioCompletion *completion = cluster.aio_create_completion();
841 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
842 completion->wait_for_safe();
843 ASSERT_EQ(0, completion->get_return_value());
844 completion->release();
845 }
846
847 // flush
848 {
849 ObjectReadOperation op;
850 op.cache_flush();
851 librados::AioCompletion *completion = cluster.aio_create_completion();
852 ASSERT_EQ(0, cache_ioctx.aio_operate(
853 "foo", completion, &op,
854 librados::OPERATION_IGNORE_OVERLAY, NULL));
855 completion->wait_for_safe();
856 ASSERT_EQ(0, completion->get_return_value());
857 completion->release();
858 }
859
860 // verify clean
861 {
862 bool dirty = false;
863 int r = -1;
864 ObjectReadOperation op;
865 op.is_dirty(&dirty, &r);
866 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
867 ASSERT_FALSE(dirty);
868 ASSERT_EQ(0, r);
869 }
870
871 // evict
872 {
873 ObjectReadOperation op;
874 op.cache_evict();
875 librados::AioCompletion *completion = cluster.aio_create_completion();
876 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
877 librados::OPERATION_IGNORE_CACHE,
878 NULL));
879 completion->wait_for_safe();
880 ASSERT_EQ(0, completion->get_return_value());
881 completion->release();
882 }
883 {
884 ObjectReadOperation op;
885 op.cache_evict();
886 librados::AioCompletion *completion = cluster.aio_create_completion();
887 ASSERT_EQ(0, cache_ioctx.aio_operate(
888 "foo", completion, &op,
889 librados::OPERATION_IGNORE_CACHE, NULL));
890 completion->wait_for_safe();
891 ASSERT_EQ(0, completion->get_return_value());
892 completion->release();
893 }
894 {
895 ObjectReadOperation op;
896 op.cache_evict();
897 librados::AioCompletion *completion = cluster.aio_create_completion();
898 ASSERT_EQ(0, cache_ioctx.aio_operate(
899 "bar", completion, &op,
900 librados::OPERATION_IGNORE_CACHE, NULL));
901 completion->wait_for_safe();
902 ASSERT_EQ(-EBUSY, completion->get_return_value());
903 completion->release();
904 }
905}
906
907TEST_F(LibRadosTwoPoolsPP, EvictSnap) {
908 // create object
909 {
910 bufferlist bl;
911 bl.append("hi there");
912 ObjectWriteOperation op;
913 op.write_full(bl);
914 ASSERT_EQ(0, ioctx.operate("foo", &op));
915 }
916 {
917 bufferlist bl;
918 bl.append("hi there");
919 ObjectWriteOperation op;
920 op.write_full(bl);
921 ASSERT_EQ(0, ioctx.operate("bar", &op));
922 }
923 {
924 bufferlist bl;
925 bl.append("hi there");
926 ObjectWriteOperation op;
927 op.write_full(bl);
928 ASSERT_EQ(0, ioctx.operate("baz", &op));
929 }
930 {
931 bufferlist bl;
932 bl.append("hi there");
933 ObjectWriteOperation op;
934 op.write_full(bl);
935 ASSERT_EQ(0, ioctx.operate("bam", &op));
936 }
937
938 // create a snapshot, clone
939 vector<uint64_t> my_snaps(1);
940 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
941 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
942 my_snaps));
943 {
944 bufferlist bl;
945 bl.append("ciao!");
946 ObjectWriteOperation op;
947 op.write_full(bl);
948 ASSERT_EQ(0, ioctx.operate("foo", &op));
949 }
950 {
951 bufferlist bl;
952 bl.append("ciao!");
953 ObjectWriteOperation op;
954 op.write_full(bl);
955 ASSERT_EQ(0, ioctx.operate("bar", &op));
956 }
957 {
958 ObjectWriteOperation op;
959 op.remove();
960 ASSERT_EQ(0, ioctx.operate("baz", &op));
961 }
962 {
963 bufferlist bl;
964 bl.append("ciao!");
965 ObjectWriteOperation op;
966 op.write_full(bl);
967 ASSERT_EQ(0, ioctx.operate("bam", &op));
968 }
969
970 // configure cache
971 bufferlist inbl;
972 ASSERT_EQ(0, cluster.mon_command(
973 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
974 "\", \"tierpool\": \"" + cache_pool_name +
975 "\", \"force_nonempty\": \"--force-nonempty\" }",
976 inbl, NULL, NULL));
977 ASSERT_EQ(0, cluster.mon_command(
978 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
979 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
980 inbl, NULL, NULL));
981 ASSERT_EQ(0, cluster.mon_command(
982 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
983 "\", \"mode\": \"writeback\"}",
984 inbl, NULL, NULL));
985
986 // wait for maps to settle
987 cluster.wait_for_latest_osdmap();
988
989 // read, trigger a promote on the head
990 {
991 bufferlist bl;
992 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
993 ASSERT_EQ('c', bl[0]);
994 }
995 {
996 bufferlist bl;
997 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
998 ASSERT_EQ('c', bl[0]);
999 }
1000
1001 // evict bam
1002 {
1003 ObjectReadOperation op;
1004 op.cache_evict();
1005 librados::AioCompletion *completion = cluster.aio_create_completion();
1006 ASSERT_EQ(0, cache_ioctx.aio_operate(
1007 "bam", completion, &op,
1008 librados::OPERATION_IGNORE_CACHE, NULL));
1009 completion->wait_for_safe();
1010 ASSERT_EQ(0, completion->get_return_value());
1011 completion->release();
1012 }
1013 {
1014 bufferlist bl;
1015 ObjectReadOperation op;
1016 op.read(1, 0, &bl, NULL);
1017 librados::AioCompletion *completion = cluster.aio_create_completion();
1018 ASSERT_EQ(0, cache_ioctx.aio_operate(
1019 "bam", completion, &op,
1020 librados::OPERATION_IGNORE_CACHE, NULL));
1021 completion->wait_for_safe();
1022 ASSERT_EQ(-ENOENT, completion->get_return_value());
1023 completion->release();
1024 }
1025
1026 // read foo snap
1027 ioctx.snap_set_read(my_snaps[0]);
1028 {
1029 bufferlist bl;
1030 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1031 ASSERT_EQ('h', bl[0]);
1032 }
1033
1034 // evict foo snap
1035 {
1036 ObjectReadOperation op;
1037 op.cache_evict();
1038 librados::AioCompletion *completion = cluster.aio_create_completion();
1039 ASSERT_EQ(0, ioctx.aio_operate(
1040 "foo", completion, &op,
1041 librados::OPERATION_IGNORE_CACHE, NULL));
1042 completion->wait_for_safe();
1043 ASSERT_EQ(0, completion->get_return_value());
1044 completion->release();
1045 }
1046 // snap is gone...
1047 {
1048 bufferlist bl;
1049 ObjectReadOperation op;
1050 op.read(1, 0, &bl, NULL);
1051 librados::AioCompletion *completion = cluster.aio_create_completion();
1052 ASSERT_EQ(0, ioctx.aio_operate(
1053 "foo", completion, &op,
1054 librados::OPERATION_IGNORE_CACHE, NULL));
1055 completion->wait_for_safe();
1056 ASSERT_EQ(-ENOENT, completion->get_return_value());
1057 completion->release();
1058 }
1059 // head is still there...
1060 ioctx.snap_set_read(librados::SNAP_HEAD);
1061 {
1062 bufferlist bl;
1063 ObjectReadOperation op;
1064 op.read(1, 0, &bl, NULL);
1065 librados::AioCompletion *completion = cluster.aio_create_completion();
1066 ASSERT_EQ(0, ioctx.aio_operate(
1067 "foo", completion, &op,
1068 librados::OPERATION_IGNORE_CACHE, NULL));
1069 completion->wait_for_safe();
1070 ASSERT_EQ(0, completion->get_return_value());
1071 completion->release();
1072 }
1073
1074 // promote head + snap of bar
1075 ioctx.snap_set_read(librados::SNAP_HEAD);
1076 {
1077 bufferlist bl;
1078 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
1079 ASSERT_EQ('c', bl[0]);
1080 }
1081 ioctx.snap_set_read(my_snaps[0]);
1082 {
1083 bufferlist bl;
1084 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
1085 ASSERT_EQ('h', bl[0]);
1086 }
1087
1088 // evict bar head (fail)
1089 ioctx.snap_set_read(librados::SNAP_HEAD);
1090 {
1091 ObjectReadOperation op;
1092 op.cache_evict();
1093 librados::AioCompletion *completion = cluster.aio_create_completion();
1094 ASSERT_EQ(0, ioctx.aio_operate(
1095 "bar", completion, &op,
1096 librados::OPERATION_IGNORE_CACHE, NULL));
1097 completion->wait_for_safe();
1098 ASSERT_EQ(-EBUSY, completion->get_return_value());
1099 completion->release();
1100 }
1101
1102 // evict bar snap
1103 ioctx.snap_set_read(my_snaps[0]);
1104 {
1105 ObjectReadOperation op;
1106 op.cache_evict();
1107 librados::AioCompletion *completion = cluster.aio_create_completion();
1108 ASSERT_EQ(0, ioctx.aio_operate(
1109 "bar", completion, &op,
1110 librados::OPERATION_IGNORE_CACHE, NULL));
1111 completion->wait_for_safe();
1112 ASSERT_EQ(0, completion->get_return_value());
1113 completion->release();
1114 }
1115 // ...and then head
1116 ioctx.snap_set_read(librados::SNAP_HEAD);
1117 {
1118 bufferlist bl;
1119 ObjectReadOperation op;
1120 op.read(1, 0, &bl, NULL);
1121 librados::AioCompletion *completion = cluster.aio_create_completion();
1122 ASSERT_EQ(0, ioctx.aio_operate(
1123 "bar", completion, &op,
1124 librados::OPERATION_IGNORE_CACHE, NULL));
1125 completion->wait_for_safe();
1126 ASSERT_EQ(0, completion->get_return_value());
1127 completion->release();
1128 }
1129 {
1130 ObjectReadOperation op;
1131 op.cache_evict();
1132 librados::AioCompletion *completion = cluster.aio_create_completion();
1133 ASSERT_EQ(0, ioctx.aio_operate(
1134 "bar", completion, &op,
1135 librados::OPERATION_IGNORE_CACHE, NULL));
1136 completion->wait_for_safe();
1137 ASSERT_EQ(0, completion->get_return_value());
1138 completion->release();
1139 }
1140
1141 // cleanup
1142 ioctx.selfmanaged_snap_remove(my_snaps[0]);
1143}
1144
1145// this test case reproduces http://tracker.ceph.com/issues/8629
1146TEST_F(LibRadosTwoPoolsPP, EvictSnap2) {
1147 // create object
1148 {
1149 bufferlist bl;
1150 bl.append("hi there");
1151 ObjectWriteOperation op;
1152 op.write_full(bl);
1153 ASSERT_EQ(0, ioctx.operate("foo", &op));
1154 }
1155 // create a snapshot, clone
1156 vector<uint64_t> my_snaps(1);
1157 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
1158 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
1159 my_snaps));
1160 {
1161 bufferlist bl;
1162 bl.append("ciao!");
1163 ObjectWriteOperation op;
1164 op.write_full(bl);
1165 ASSERT_EQ(0, ioctx.operate("foo", &op));
1166 }
1167 // configure cache
1168 bufferlist inbl;
1169 ASSERT_EQ(0, cluster.mon_command(
1170 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1171 "\", \"tierpool\": \"" + cache_pool_name +
1172 "\", \"force_nonempty\": \"--force-nonempty\" }",
1173 inbl, NULL, NULL));
1174 ASSERT_EQ(0, cluster.mon_command(
1175 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1176 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1177 inbl, NULL, NULL));
1178 ASSERT_EQ(0, cluster.mon_command(
1179 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1180 "\", \"mode\": \"writeback\"}",
1181 inbl, NULL, NULL));
1182
1183 // wait for maps to settle
1184 cluster.wait_for_latest_osdmap();
1185
1186 // read, trigger a promote on the head
1187 {
1188 bufferlist bl;
1189 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1190 ASSERT_EQ('c', bl[0]);
1191 }
1192
1193 // evict
1194 {
1195 ObjectReadOperation op;
1196 op.cache_evict();
1197 librados::AioCompletion *completion = cluster.aio_create_completion();
1198 ASSERT_EQ(0, cache_ioctx.aio_operate(
1199 "foo", completion, &op,
1200 librados::OPERATION_IGNORE_CACHE, NULL));
1201 completion->wait_for_safe();
1202 ASSERT_EQ(0, completion->get_return_value());
1203 completion->release();
1204 }
1205
1206 // verify the snapdir is not present in the cache pool
1207 {
1208 ObjectReadOperation op;
1209 librados::snap_set_t snapset;
1210 op.list_snaps(&snapset, NULL);
1211 ioctx.snap_set_read(librados::SNAP_DIR);
1212 librados::AioCompletion *completion = cluster.aio_create_completion();
1213 ASSERT_EQ(0, ioctx.aio_operate("foo", completion, &op,
1214 librados::OPERATION_IGNORE_CACHE, NULL));
1215 completion->wait_for_safe();
1216 ASSERT_EQ(-ENOENT, completion->get_return_value());
1217 completion->release();
1218 }
1219}
1220
1221TEST_F(LibRadosTwoPoolsPP, TryFlush) {
1222 // configure cache
1223 bufferlist inbl;
1224 ASSERT_EQ(0, cluster.mon_command(
1225 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1226 "\", \"tierpool\": \"" + cache_pool_name +
1227 "\", \"force_nonempty\": \"--force-nonempty\" }",
1228 inbl, NULL, NULL));
1229 ASSERT_EQ(0, cluster.mon_command(
1230 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1231 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1232 inbl, NULL, NULL));
1233 ASSERT_EQ(0, cluster.mon_command(
1234 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1235 "\", \"mode\": \"writeback\"}",
1236 inbl, NULL, NULL));
1237
1238 // wait for maps to settle
1239 cluster.wait_for_latest_osdmap();
1240
1241 // create object
1242 {
1243 bufferlist bl;
1244 bl.append("hi there");
1245 ObjectWriteOperation op;
1246 op.write_full(bl);
1247 ASSERT_EQ(0, ioctx.operate("foo", &op));
1248 }
1249
1250 // verify the object is present in the cache tier
1251 {
1252 NObjectIterator it = cache_ioctx.nobjects_begin();
1253 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
1254 ASSERT_TRUE(it->get_oid() == string("foo"));
1255 ++it;
1256 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1257 }
1258
1259 // verify the object is NOT present in the base tier
1260 {
1261 NObjectIterator it = ioctx.nobjects_begin();
1262 ASSERT_TRUE(it == ioctx.nobjects_end());
1263 }
1264
1265 // verify dirty
1266 {
1267 bool dirty = false;
1268 int r = -1;
1269 ObjectReadOperation op;
1270 op.is_dirty(&dirty, &r);
1271 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
1272 ASSERT_TRUE(dirty);
1273 ASSERT_EQ(0, r);
1274 }
1275
1276 // pin
1277 {
1278 ObjectWriteOperation op;
1279 op.cache_pin();
1280 librados::AioCompletion *completion = cluster.aio_create_completion();
1281 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
1282 completion->wait_for_safe();
1283 ASSERT_EQ(0, completion->get_return_value());
1284 completion->release();
1285 }
1286
1287 // flush the pinned object with -EPERM
1288 {
1289 ObjectReadOperation op;
1290 op.cache_try_flush();
1291 librados::AioCompletion *completion = cluster.aio_create_completion();
1292 ASSERT_EQ(0, cache_ioctx.aio_operate(
1293 "foo", completion, &op,
1294 librados::OPERATION_IGNORE_OVERLAY |
1295 librados::OPERATION_SKIPRWLOCKS, NULL));
1296 completion->wait_for_safe();
1297 ASSERT_EQ(-EPERM, completion->get_return_value());
1298 completion->release();
1299 }
1300
1301 // unpin
1302 {
1303 ObjectWriteOperation op;
1304 op.cache_unpin();
1305 librados::AioCompletion *completion = cluster.aio_create_completion();
1306 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
1307 completion->wait_for_safe();
1308 ASSERT_EQ(0, completion->get_return_value());
1309 completion->release();
1310 }
1311
1312 // flush
1313 {
1314 ObjectReadOperation op;
1315 op.cache_try_flush();
1316 librados::AioCompletion *completion = cluster.aio_create_completion();
1317 ASSERT_EQ(0, cache_ioctx.aio_operate(
1318 "foo", completion, &op,
1319 librados::OPERATION_IGNORE_OVERLAY |
1320 librados::OPERATION_SKIPRWLOCKS, NULL));
1321 completion->wait_for_safe();
1322 ASSERT_EQ(0, completion->get_return_value());
1323 completion->release();
1324 }
1325
1326 // verify clean
1327 {
1328 bool dirty = false;
1329 int r = -1;
1330 ObjectReadOperation op;
1331 op.is_dirty(&dirty, &r);
1332 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
1333 ASSERT_FALSE(dirty);
1334 ASSERT_EQ(0, r);
1335 }
1336
1337 // verify in base tier
1338 {
1339 NObjectIterator it = ioctx.nobjects_begin();
1340 ASSERT_TRUE(it != ioctx.nobjects_end());
1341 ASSERT_TRUE(it->get_oid() == string("foo"));
1342 ++it;
1343 ASSERT_TRUE(it == ioctx.nobjects_end());
1344 }
1345
1346 // evict it
1347 {
1348 ObjectReadOperation op;
1349 op.cache_evict();
1350 librados::AioCompletion *completion = cluster.aio_create_completion();
1351 ASSERT_EQ(0, cache_ioctx.aio_operate(
1352 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
1353 completion->wait_for_safe();
1354 ASSERT_EQ(0, completion->get_return_value());
1355 completion->release();
1356 }
1357
1358 // verify no longer in cache tier
1359 {
1360 NObjectIterator it = cache_ioctx.nobjects_begin();
1361 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1362 }
1363}
1364
1365TEST_F(LibRadosTwoPoolsPP, Flush) {
1366 // configure cache
1367 bufferlist inbl;
1368 ASSERT_EQ(0, cluster.mon_command(
1369 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1370 "\", \"tierpool\": \"" + cache_pool_name +
1371 "\", \"force_nonempty\": \"--force-nonempty\" }",
1372 inbl, NULL, NULL));
1373 ASSERT_EQ(0, cluster.mon_command(
1374 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1375 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1376 inbl, NULL, NULL));
1377 ASSERT_EQ(0, cluster.mon_command(
1378 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1379 "\", \"mode\": \"writeback\"}",
1380 inbl, NULL, NULL));
1381
1382 // wait for maps to settle
1383 cluster.wait_for_latest_osdmap();
1384
1385 uint64_t user_version = 0;
1386
1387 // create object
1388 {
1389 bufferlist bl;
1390 bl.append("hi there");
1391 ObjectWriteOperation op;
1392 op.write_full(bl);
1393 ASSERT_EQ(0, ioctx.operate("foo", &op));
1394 }
1395
1396 // verify the object is present in the cache tier
1397 {
1398 NObjectIterator it = cache_ioctx.nobjects_begin();
1399 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
1400 ASSERT_TRUE(it->get_oid() == string("foo"));
1401 ++it;
1402 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1403 }
1404
1405 // verify the object is NOT present in the base tier
1406 {
1407 NObjectIterator it = ioctx.nobjects_begin();
1408 ASSERT_TRUE(it == ioctx.nobjects_end());
1409 }
1410
1411 // verify dirty
1412 {
1413 bool dirty = false;
1414 int r = -1;
1415 ObjectReadOperation op;
1416 op.is_dirty(&dirty, &r);
1417 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
1418 ASSERT_TRUE(dirty);
1419 ASSERT_EQ(0, r);
1420 user_version = cache_ioctx.get_last_version();
1421 }
1422
1423 // pin
1424 {
1425 ObjectWriteOperation op;
1426 op.cache_pin();
1427 librados::AioCompletion *completion = cluster.aio_create_completion();
1428 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
1429 completion->wait_for_safe();
1430 ASSERT_EQ(0, completion->get_return_value());
1431 completion->release();
1432 }
1433
1434 // flush the pinned object with -EPERM
1435 {
1436 ObjectReadOperation op;
1437 op.cache_try_flush();
1438 librados::AioCompletion *completion = cluster.aio_create_completion();
1439 ASSERT_EQ(0, cache_ioctx.aio_operate(
1440 "foo", completion, &op,
1441 librados::OPERATION_IGNORE_OVERLAY |
1442 librados::OPERATION_SKIPRWLOCKS, NULL));
1443 completion->wait_for_safe();
1444 ASSERT_EQ(-EPERM, completion->get_return_value());
1445 completion->release();
1446 }
1447
1448 // unpin
1449 {
1450 ObjectWriteOperation op;
1451 op.cache_unpin();
1452 librados::AioCompletion *completion = cluster.aio_create_completion();
1453 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
1454 completion->wait_for_safe();
1455 ASSERT_EQ(0, completion->get_return_value());
1456 completion->release();
1457 }
1458
1459 // flush
1460 {
1461 ObjectReadOperation op;
1462 op.cache_flush();
1463 librados::AioCompletion *completion = cluster.aio_create_completion();
1464 ASSERT_EQ(0, cache_ioctx.aio_operate(
1465 "foo", completion, &op,
1466 librados::OPERATION_IGNORE_OVERLAY, NULL));
1467 completion->wait_for_safe();
1468 ASSERT_EQ(0, completion->get_return_value());
1469 completion->release();
1470 }
1471
1472 // verify clean
1473 {
1474 bool dirty = false;
1475 int r = -1;
1476 ObjectReadOperation op;
1477 op.is_dirty(&dirty, &r);
1478 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
1479 ASSERT_FALSE(dirty);
1480 ASSERT_EQ(0, r);
1481 }
1482
1483 // verify in base tier
1484 {
1485 NObjectIterator it = ioctx.nobjects_begin();
1486 ASSERT_TRUE(it != ioctx.nobjects_end());
1487 ASSERT_TRUE(it->get_oid() == string("foo"));
1488 ++it;
1489 ASSERT_TRUE(it == ioctx.nobjects_end());
1490 }
1491
1492 // evict it
1493 {
1494 ObjectReadOperation op;
1495 op.cache_evict();
1496 librados::AioCompletion *completion = cluster.aio_create_completion();
1497 ASSERT_EQ(0, cache_ioctx.aio_operate(
1498 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
1499 completion->wait_for_safe();
1500 ASSERT_EQ(0, completion->get_return_value());
1501 completion->release();
1502 }
1503
1504 // verify no longer in cache tier
1505 {
1506 NObjectIterator it = cache_ioctx.nobjects_begin();
1507 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1508 }
1509
1510 // read it again and verify the version is consistent
1511 {
1512 bufferlist bl;
1513 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
1514 ASSERT_EQ(user_version, cache_ioctx.get_last_version());
1515 }
1516
1517 // erase it
1518 {
1519 ObjectWriteOperation op;
1520 op.remove();
1521 ASSERT_EQ(0, ioctx.operate("foo", &op));
1522 }
1523
1524 // flush whiteout
1525 {
1526 ObjectReadOperation op;
1527 op.cache_flush();
1528 librados::AioCompletion *completion = cluster.aio_create_completion();
1529 ASSERT_EQ(0, cache_ioctx.aio_operate(
1530 "foo", completion, &op,
1531 librados::OPERATION_IGNORE_OVERLAY, NULL));
1532 completion->wait_for_safe();
1533 ASSERT_EQ(0, completion->get_return_value());
1534 completion->release();
1535 }
1536
1537 // evict
1538 {
1539 ObjectReadOperation op;
1540 op.cache_evict();
1541 librados::AioCompletion *completion = cluster.aio_create_completion();
1542 ASSERT_EQ(0, cache_ioctx.aio_operate(
1543 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
1544 completion->wait_for_safe();
1545 ASSERT_EQ(0, completion->get_return_value());
1546 completion->release();
1547 }
1548
1549 // verify no longer in cache tier
1550 {
1551 NObjectIterator it = cache_ioctx.nobjects_begin();
1552 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1553 }
1554 // or base tier
1555 {
1556 NObjectIterator it = ioctx.nobjects_begin();
1557 ASSERT_TRUE(it == ioctx.nobjects_end());
1558 }
1559}
1560
1561TEST_F(LibRadosTwoPoolsPP, FlushSnap) {
1562 // configure cache
1563 bufferlist inbl;
1564 ASSERT_EQ(0, cluster.mon_command(
1565 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1566 "\", \"tierpool\": \"" + cache_pool_name +
1567 "\", \"force_nonempty\": \"--force-nonempty\" }",
1568 inbl, NULL, NULL));
1569 ASSERT_EQ(0, cluster.mon_command(
1570 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1571 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1572 inbl, NULL, NULL));
1573 ASSERT_EQ(0, cluster.mon_command(
1574 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1575 "\", \"mode\": \"writeback\"}",
1576 inbl, NULL, NULL));
1577
1578 // wait for maps to settle
1579 cluster.wait_for_latest_osdmap();
1580
1581 // create object
1582 {
1583 bufferlist bl;
1584 bl.append("a");
1585 ObjectWriteOperation op;
1586 op.write_full(bl);
1587 ASSERT_EQ(0, ioctx.operate("foo", &op));
1588 }
1589
1590 // create a snapshot, clone
1591 vector<uint64_t> my_snaps(1);
1592 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
1593 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
1594 my_snaps));
1595 {
1596 bufferlist bl;
1597 bl.append("b");
1598 ObjectWriteOperation op;
1599 op.write_full(bl);
1600 ASSERT_EQ(0, ioctx.operate("foo", &op));
1601 }
1602
1603 // and another
1604 my_snaps.resize(2);
1605 my_snaps[1] = my_snaps[0];
1606 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
1607 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
1608 my_snaps));
1609 {
1610 bufferlist bl;
1611 bl.append("c");
1612 ObjectWriteOperation op;
1613 op.write_full(bl);
1614 ASSERT_EQ(0, ioctx.operate("foo", &op));
1615 }
1616
1617 // verify the object is present in the cache tier
1618 {
1619 NObjectIterator it = cache_ioctx.nobjects_begin();
1620 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
1621 ASSERT_TRUE(it->get_oid() == string("foo"));
1622 ++it;
1623 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
1624 }
1625
1626 // verify the object is NOT present in the base tier
1627 {
1628 NObjectIterator it = ioctx.nobjects_begin();
1629 ASSERT_TRUE(it == ioctx.nobjects_end());
1630 }
1631
1632 // flush on head (should fail)
1633 ioctx.snap_set_read(librados::SNAP_HEAD);
1634 {
1635 ObjectReadOperation op;
1636 op.cache_flush();
1637 librados::AioCompletion *completion = cluster.aio_create_completion();
1638 ASSERT_EQ(0, ioctx.aio_operate(
1639 "foo", completion, &op,
1640 librados::OPERATION_IGNORE_CACHE, NULL));
1641 completion->wait_for_safe();
1642 ASSERT_EQ(-EBUSY, completion->get_return_value());
1643 completion->release();
1644 }
1645 // flush on recent snap (should fail)
1646 ioctx.snap_set_read(my_snaps[0]);
1647 {
1648 ObjectReadOperation op;
1649 op.cache_flush();
1650 librados::AioCompletion *completion = cluster.aio_create_completion();
1651 ASSERT_EQ(0, ioctx.aio_operate(
1652 "foo", completion, &op,
1653 librados::OPERATION_IGNORE_CACHE, NULL));
1654 completion->wait_for_safe();
1655 ASSERT_EQ(-EBUSY, completion->get_return_value());
1656 completion->release();
1657 }
1658 // flush on oldest snap
1659 ioctx.snap_set_read(my_snaps[1]);
1660 {
1661 ObjectReadOperation op;
1662 op.cache_flush();
1663 librados::AioCompletion *completion = cluster.aio_create_completion();
1664 ASSERT_EQ(0, ioctx.aio_operate(
1665 "foo", completion, &op,
1666 librados::OPERATION_IGNORE_CACHE, NULL));
1667 completion->wait_for_safe();
1668 ASSERT_EQ(0, completion->get_return_value());
1669 completion->release();
1670 }
1671 // flush on next oldest snap
1672 ioctx.snap_set_read(my_snaps[0]);
1673 {
1674 ObjectReadOperation op;
1675 op.cache_flush();
1676 librados::AioCompletion *completion = cluster.aio_create_completion();
1677 ASSERT_EQ(0, ioctx.aio_operate(
1678 "foo", completion, &op,
1679 librados::OPERATION_IGNORE_CACHE, NULL));
1680 completion->wait_for_safe();
1681 ASSERT_EQ(0, completion->get_return_value());
1682 completion->release();
1683 }
1684 // flush on head
1685 ioctx.snap_set_read(librados::SNAP_HEAD);
1686 {
1687 ObjectReadOperation op;
1688 op.cache_flush();
1689 librados::AioCompletion *completion = cluster.aio_create_completion();
1690 ASSERT_EQ(0, ioctx.aio_operate(
1691 "foo", completion, &op,
1692 librados::OPERATION_IGNORE_CACHE, NULL));
1693 completion->wait_for_safe();
1694 ASSERT_EQ(0, completion->get_return_value());
1695 completion->release();
1696 }
1697
1698 // verify i can read the snaps from the cache pool
1699 ioctx.snap_set_read(librados::SNAP_HEAD);
1700 {
1701 bufferlist bl;
1702 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1703 ASSERT_EQ('c', bl[0]);
1704 }
1705 ioctx.snap_set_read(my_snaps[0]);
1706 {
1707 bufferlist bl;
1708 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1709 ASSERT_EQ('b', bl[0]);
1710 }
1711 ioctx.snap_set_read(my_snaps[1]);
1712 {
1713 bufferlist bl;
1714 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1715 ASSERT_EQ('a', bl[0]);
1716 }
1717
1718 // remove overlay
1719 ASSERT_EQ(0, cluster.mon_command(
1720 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
1721 "\"}",
1722 inbl, NULL, NULL));
1723
1724 // wait for maps to settle
1725 cluster.wait_for_latest_osdmap();
1726
1727 // verify i can read the snaps from the base pool
1728 ioctx.snap_set_read(librados::SNAP_HEAD);
1729 {
1730 bufferlist bl;
1731 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1732 ASSERT_EQ('c', bl[0]);
1733 }
1734 ioctx.snap_set_read(my_snaps[0]);
1735 {
1736 bufferlist bl;
1737 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1738 ASSERT_EQ('b', bl[0]);
1739 }
1740 ioctx.snap_set_read(my_snaps[1]);
1741 {
1742 bufferlist bl;
1743 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
1744 ASSERT_EQ('a', bl[0]);
1745 }
1746
1747 ASSERT_EQ(0, cluster.mon_command(
1748 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1749 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1750 inbl, NULL, NULL));
1751
1752 // cleanup
1753 ioctx.selfmanaged_snap_remove(my_snaps[0]);
1754}
1755
1756TEST_F(LibRadosTierPP, FlushWriteRaces) {
1757 Rados cluster;
1758 std::string pool_name = get_temp_pool_name();
1759 std::string cache_pool_name = pool_name + "-cache";
1760 ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
1761 ASSERT_EQ(0, cluster.pool_create(cache_pool_name.c_str()));
1762 IoCtx cache_ioctx;
1763 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
1764 IoCtx ioctx;
1765 ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), ioctx));
1766
1767 // configure cache
1768 bufferlist inbl;
1769 ASSERT_EQ(0, cluster.mon_command(
1770 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1771 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
1772 inbl, NULL, NULL));
1773 ASSERT_EQ(0, cluster.mon_command(
1774 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1775 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1776 inbl, NULL, NULL));
1777 ASSERT_EQ(0, cluster.mon_command(
1778 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1779 "\", \"mode\": \"writeback\"}",
1780 inbl, NULL, NULL));
1781
1782 // wait for maps to settle
1783 cluster.wait_for_latest_osdmap();
1784
1785 // create/dirty object
1786 bufferlist bl;
1787 bl.append("hi there");
1788 {
1789 ObjectWriteOperation op;
1790 op.write_full(bl);
1791 ASSERT_EQ(0, ioctx.operate("foo", &op));
1792 }
1793
1794 // flush + write
1795 {
1796 ObjectReadOperation op;
1797 op.cache_flush();
1798 librados::AioCompletion *completion = cluster.aio_create_completion();
1799 ASSERT_EQ(0, cache_ioctx.aio_operate(
1800 "foo", completion, &op,
1801 librados::OPERATION_IGNORE_OVERLAY, NULL));
1802
1803 ObjectWriteOperation op2;
1804 op2.write_full(bl);
1805 librados::AioCompletion *completion2 = cluster.aio_create_completion();
1806 ASSERT_EQ(0, ioctx.aio_operate(
1807 "foo", completion2, &op2, 0));
1808
1809 completion->wait_for_safe();
1810 completion2->wait_for_safe();
1811 ASSERT_EQ(0, completion->get_return_value());
1812 ASSERT_EQ(0, completion2->get_return_value());
1813 completion->release();
1814 completion2->release();
1815 }
1816
1817 int tries = 1000;
1818 do {
1819 // create/dirty object
1820 {
1821 bufferlist bl;
1822 bl.append("hi there");
1823 ObjectWriteOperation op;
1824 op.write_full(bl);
1825 ASSERT_EQ(0, ioctx.operate("foo", &op));
1826 }
1827
1828 // try-flush + write
1829 {
1830 ObjectReadOperation op;
1831 op.cache_try_flush();
1832 librados::AioCompletion *completion = cluster.aio_create_completion();
1833 ASSERT_EQ(0, cache_ioctx.aio_operate(
1834 "foo", completion, &op,
1835 librados::OPERATION_IGNORE_OVERLAY |
1836 librados::OPERATION_SKIPRWLOCKS, NULL));
1837
1838 ObjectWriteOperation op2;
1839 op2.write_full(bl);
1840 librados::AioCompletion *completion2 = cluster.aio_create_completion();
1841 ASSERT_EQ(0, ioctx.aio_operate("foo", completion2, &op2, 0));
1842
1843 completion->wait_for_safe();
1844 completion2->wait_for_safe();
1845 int r = completion->get_return_value();
1846 ASSERT_TRUE(r == -EBUSY || r == 0);
1847 ASSERT_EQ(0, completion2->get_return_value());
1848 completion->release();
1849 completion2->release();
1850 if (r == -EBUSY)
1851 break;
1852 cout << "didn't get EBUSY, trying again" << std::endl;
1853 }
1854 ASSERT_TRUE(--tries);
1855 } while (true);
1856
1857 // tear down tiers
1858 ASSERT_EQ(0, cluster.mon_command(
1859 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
1860 "\"}",
1861 inbl, NULL, NULL));
1862 ASSERT_EQ(0, cluster.mon_command(
1863 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
1864 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
1865 inbl, NULL, NULL));
1866
1867 // wait for maps to settle before next test
1868 cluster.wait_for_latest_osdmap();
1869
1870 ASSERT_EQ(0, cluster.pool_delete(cache_pool_name.c_str()));
1871 ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
1872}
1873
1874TEST_F(LibRadosTwoPoolsPP, FlushTryFlushRaces) {
1875 // configure cache
1876 bufferlist inbl;
1877 ASSERT_EQ(0, cluster.mon_command(
1878 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
1879 "\", \"tierpool\": \"" + cache_pool_name +
1880 "\", \"force_nonempty\": \"--force-nonempty\" }",
1881 inbl, NULL, NULL));
1882 ASSERT_EQ(0, cluster.mon_command(
1883 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
1884 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
1885 inbl, NULL, NULL));
1886 ASSERT_EQ(0, cluster.mon_command(
1887 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
1888 "\", \"mode\": \"writeback\"}",
1889 inbl, NULL, NULL));
1890
1891 // wait for maps to settle
1892 cluster.wait_for_latest_osdmap();
1893
1894 // create/dirty object
1895 {
1896 bufferlist bl;
1897 bl.append("hi there");
1898 ObjectWriteOperation op;
1899 op.write_full(bl);
1900 ASSERT_EQ(0, ioctx.operate("foo", &op));
1901 }
1902
1903 // flush + flush
1904 {
1905 ObjectReadOperation op;
1906 op.cache_flush();
1907 librados::AioCompletion *completion = cluster.aio_create_completion();
1908 ASSERT_EQ(0, cache_ioctx.aio_operate(
1909 "foo", completion, &op,
1910 librados::OPERATION_IGNORE_OVERLAY, NULL));
1911
1912 ObjectReadOperation op2;
1913 op2.cache_flush();
1914 librados::AioCompletion *completion2 = cluster.aio_create_completion();
1915 ASSERT_EQ(0, cache_ioctx.aio_operate(
1916 "foo", completion2, &op2,
1917 librados::OPERATION_IGNORE_OVERLAY, NULL));
1918
1919 completion->wait_for_safe();
1920 completion2->wait_for_safe();
1921 ASSERT_EQ(0, completion->get_return_value());
1922 ASSERT_EQ(0, completion2->get_return_value());
1923 completion->release();
1924 completion2->release();
1925 }
1926
1927 // create/dirty object
1928 {
1929 bufferlist bl;
1930 bl.append("hi there");
1931 ObjectWriteOperation op;
1932 op.write_full(bl);
1933 ASSERT_EQ(0, ioctx.operate("foo", &op));
1934 }
1935
1936 // flush + try-flush
1937 {
1938 ObjectReadOperation op;
1939 op.cache_flush();
1940 librados::AioCompletion *completion = cluster.aio_create_completion();
1941 ASSERT_EQ(0, cache_ioctx.aio_operate(
1942 "foo", completion, &op,
1943 librados::OPERATION_IGNORE_OVERLAY, NULL));
1944
1945 ObjectReadOperation op2;
1946 op2.cache_try_flush();
1947 librados::AioCompletion *completion2 = cluster.aio_create_completion();
1948 ASSERT_EQ(0, cache_ioctx.aio_operate(
1949 "foo", completion2, &op2,
1950 librados::OPERATION_IGNORE_OVERLAY |
1951 librados::OPERATION_SKIPRWLOCKS, NULL));
1952
1953 completion->wait_for_safe();
1954 completion2->wait_for_safe();
1955 ASSERT_EQ(0, completion->get_return_value());
1956 ASSERT_EQ(0, completion2->get_return_value());
1957 completion->release();
1958 completion2->release();
1959 }
1960
1961 // create/dirty object
1962 int tries = 1000;
1963 do {
1964 {
1965 bufferlist bl;
1966 bl.append("hi there");
1967 ObjectWriteOperation op;
1968 op.write_full(bl);
1969 ASSERT_EQ(0, ioctx.operate("foo", &op));
1970 }
1971
1972 // try-flush + flush
1973 // (flush will not piggyback on try-flush)
1974 {
1975 ObjectReadOperation op;
1976 op.cache_try_flush();
1977 librados::AioCompletion *completion = cluster.aio_create_completion();
1978 ASSERT_EQ(0, cache_ioctx.aio_operate(
1979 "foo", completion, &op,
1980 librados::OPERATION_IGNORE_OVERLAY |
1981 librados::OPERATION_SKIPRWLOCKS, NULL));
1982
1983 ObjectReadOperation op2;
1984 op2.cache_flush();
1985 librados::AioCompletion *completion2 = cluster.aio_create_completion();
1986 ASSERT_EQ(0, cache_ioctx.aio_operate(
1987 "foo", completion2, &op2,
1988 librados::OPERATION_IGNORE_OVERLAY, NULL));
1989
1990 completion->wait_for_safe();
1991 completion2->wait_for_safe();
1992 int r = completion->get_return_value();
1993 ASSERT_TRUE(r == -EBUSY || r == 0);
1994 ASSERT_EQ(0, completion2->get_return_value());
1995 completion->release();
1996 completion2->release();
1997 if (r == -EBUSY)
1998 break;
1999 cout << "didn't get EBUSY, trying again" << std::endl;
2000 }
2001 ASSERT_TRUE(--tries);
2002 } while (true);
2003
2004 // create/dirty object
2005 {
2006 bufferlist bl;
2007 bl.append("hi there");
2008 ObjectWriteOperation op;
2009 op.write_full(bl);
2010 ASSERT_EQ(0, ioctx.operate("foo", &op));
2011 }
2012
2013 // try-flush + try-flush
2014 {
2015 ObjectReadOperation op;
2016 op.cache_try_flush();
2017 librados::AioCompletion *completion = cluster.aio_create_completion();
2018 ASSERT_EQ(0, cache_ioctx.aio_operate(
2019 "foo", completion, &op,
2020 librados::OPERATION_IGNORE_OVERLAY |
2021 librados::OPERATION_SKIPRWLOCKS, NULL));
2022
2023 ObjectReadOperation op2;
2024 op2.cache_try_flush();
2025 librados::AioCompletion *completion2 = cluster.aio_create_completion();
2026 ASSERT_EQ(0, cache_ioctx.aio_operate(
2027 "foo", completion2, &op2,
2028 librados::OPERATION_IGNORE_OVERLAY |
2029 librados::OPERATION_SKIPRWLOCKS, NULL));
2030
2031 completion->wait_for_safe();
2032 completion2->wait_for_safe();
2033 ASSERT_EQ(0, completion->get_return_value());
2034 ASSERT_EQ(0, completion2->get_return_value());
2035 completion->release();
2036 completion2->release();
2037 }
2038}
2039
2040
2041IoCtx *read_ioctx = 0;
2042Mutex test_lock("FlushReadRaces::lock");
2043Cond cond;
2044int max_reads = 100;
2045int num_reads = 0; // in progress
2046
2047void flush_read_race_cb(completion_t cb, void *arg);
2048
2049void start_flush_read()
2050{
2051 //cout << " starting read" << std::endl;
2052 ObjectReadOperation op;
2053 op.stat(NULL, NULL, NULL);
2054 librados::AioCompletion *completion = completions.getCompletion();
2055 completion->set_complete_callback(0, flush_read_race_cb);
2056 read_ioctx->aio_operate("foo", completion, &op, NULL);
2057}
2058
2059void flush_read_race_cb(completion_t cb, void *arg)
2060{
2061 //cout << " finished read" << std::endl;
2062 test_lock.Lock();
2063 if (num_reads > max_reads) {
2064 num_reads--;
2065 cond.Signal();
2066 } else {
2067 start_flush_read();
2068 }
2069 test_lock.Unlock();
2070}
2071
2072TEST_F(LibRadosTwoPoolsPP, TryFlushReadRace) {
2073 // configure cache
2074 bufferlist inbl;
2075 ASSERT_EQ(0, cluster.mon_command(
2076 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2077 "\", \"tierpool\": \"" + cache_pool_name +
2078 "\", \"force_nonempty\": \"--force-nonempty\" }",
2079 inbl, NULL, NULL));
2080 ASSERT_EQ(0, cluster.mon_command(
2081 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2082 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2083 inbl, NULL, NULL));
2084 ASSERT_EQ(0, cluster.mon_command(
2085 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
2086 "\", \"mode\": \"writeback\"}",
2087 inbl, NULL, NULL));
2088
2089 // wait for maps to settle
2090 cluster.wait_for_latest_osdmap();
2091
2092 // create/dirty object
2093 {
2094 bufferlist bl;
2095 bl.append("hi there");
2096 bufferptr bp(4000000); // make it big!
2097 bp.zero();
2098 bl.append(bp);
2099 ObjectWriteOperation op;
2100 op.write_full(bl);
2101 ASSERT_EQ(0, ioctx.operate("foo", &op));
2102 }
2103
2104 // start a continuous stream of reads
2105 read_ioctx = &ioctx;
2106 test_lock.Lock();
2107 for (int i = 0; i < max_reads; ++i) {
2108 start_flush_read();
2109 num_reads++;
2110 }
2111 test_lock.Unlock();
2112
2113 // try-flush
2114 ObjectReadOperation op;
2115 op.cache_try_flush();
2116 librados::AioCompletion *completion = cluster.aio_create_completion();
2117 ASSERT_EQ(0, cache_ioctx.aio_operate(
2118 "foo", completion, &op,
2119 librados::OPERATION_IGNORE_OVERLAY |
2120 librados::OPERATION_SKIPRWLOCKS, NULL));
2121
2122 completion->wait_for_safe();
2123 ASSERT_EQ(0, completion->get_return_value());
2124 completion->release();
2125
2126 // stop reads
2127 test_lock.Lock();
2128 max_reads = 0;
2129 while (num_reads > 0)
2130 cond.Wait(test_lock);
2131 test_lock.Unlock();
2132}
2133
2134TEST_F(LibRadosTierPP, HitSetNone) {
2135 {
2136 list< pair<time_t,time_t> > ls;
2137 AioCompletion *c = librados::Rados::aio_create_completion();
2138 ASSERT_EQ(0, ioctx.hit_set_list(123, c, &ls));
2139 c->wait_for_complete();
2140 ASSERT_EQ(0, c->get_return_value());
2141 ASSERT_TRUE(ls.empty());
2142 c->release();
2143 }
2144 {
2145 bufferlist bl;
2146 AioCompletion *c = librados::Rados::aio_create_completion();
2147 ASSERT_EQ(0, ioctx.hit_set_get(123, c, 12345, &bl));
2148 c->wait_for_complete();
2149 ASSERT_EQ(-ENOENT, c->get_return_value());
2150 c->release();
2151 }
2152}
2153
2154string set_pool_str(string pool, string var, string val)
2155{
2156 return string("{\"prefix\": \"osd pool set\",\"pool\":\"") + pool
2157 + string("\",\"var\": \"") + var + string("\",\"val\": \"")
2158 + val + string("\"}");
2159}
2160
2161string set_pool_str(string pool, string var, int val)
2162{
2163 return string("{\"prefix\": \"osd pool set\",\"pool\":\"") + pool
2164 + string("\",\"var\": \"") + var + string("\",\"val\": \"")
2165 + stringify(val) + string("\"}");
2166}
2167
2168TEST_F(LibRadosTwoPoolsPP, HitSetRead) {
2169 // make it a tier
2170 bufferlist inbl;
2171 ASSERT_EQ(0, cluster.mon_command(
2172 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2173 "\", \"tierpool\": \"" + cache_pool_name +
2174 "\", \"force_nonempty\": \"--force-nonempty\" }",
2175 inbl, NULL, NULL));
2176
2177 // enable hitset tracking for this pool
2178 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_count", 2),
2179 inbl, NULL, NULL));
2180 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_period", 600),
2181 inbl, NULL, NULL));
2182 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_type",
2183 "explicit_object"),
2184 inbl, NULL, NULL));
2185
2186 // wait for maps to settle
2187 cluster.wait_for_latest_osdmap();
2188
2189 cache_ioctx.set_namespace("");
2190
2191 // keep reading until we see our object appear in the HitSet
2192 utime_t start = ceph_clock_now();
2193 utime_t hard_stop = start + utime_t(600, 0);
2194
2195 while (true) {
2196 utime_t now = ceph_clock_now();
2197 ASSERT_TRUE(now < hard_stop);
2198
2199 string name = "foo";
2200 uint32_t hash;
2201 ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(name, &hash));
2202 hobject_t oid(sobject_t(name, CEPH_NOSNAP), "", hash,
2203 cluster.pool_lookup(cache_pool_name.c_str()), "");
2204
2205 bufferlist bl;
2206 ASSERT_EQ(-ENOENT, cache_ioctx.read("foo", bl, 1, 0));
2207
2208 bufferlist hbl;
2209 AioCompletion *c = librados::Rados::aio_create_completion();
2210 ASSERT_EQ(0, cache_ioctx.hit_set_get(hash, c, now.sec(), &hbl));
2211 c->wait_for_complete();
2212 c->release();
2213
2214 if (hbl.length()) {
2215 bufferlist::iterator p = hbl.begin();
2216 HitSet hs;
2217 ::decode(hs, p);
2218 if (hs.contains(oid)) {
2219 cout << "ok, hit_set contains " << oid << std::endl;
2220 break;
2221 }
2222 cout << "hmm, not in HitSet yet" << std::endl;
2223 } else {
2224 cout << "hmm, no HitSet yet" << std::endl;
2225 }
2226
2227 sleep(1);
2228 }
2229}
2230
2231static int _get_pg_num(Rados& cluster, string pool_name)
2232{
2233 bufferlist inbl;
2234 string cmd = string("{\"prefix\": \"osd pool get\",\"pool\":\"")
2235 + pool_name
2236 + string("\",\"var\": \"pg_num\",\"format\": \"json\"}");
2237 bufferlist outbl;
2238 int r = cluster.mon_command(cmd, inbl, &outbl, NULL);
2239 assert(r >= 0);
2240 string outstr(outbl.c_str(), outbl.length());
2241 json_spirit::Value v;
2242 if (!json_spirit::read(outstr, v)) {
2243 cerr <<" unable to parse json " << outstr << std::endl;
2244 return -1;
2245 }
2246
2247 json_spirit::Object& o = v.get_obj();
2248 for (json_spirit::Object::size_type i=0; i<o.size(); i++) {
2249 json_spirit::Pair& p = o[i];
2250 if (p.name_ == "pg_num") {
2251 cout << "pg_num = " << p.value_.get_int() << std::endl;
2252 return p.value_.get_int();
2253 }
2254 }
2255 cerr << "didn't find pg_num in " << outstr << std::endl;
2256 return -1;
2257}
2258
2259
2260TEST_F(LibRadosTwoPoolsPP, HitSetWrite) {
2261 int num_pg = _get_pg_num(cluster, pool_name);
2262 assert(num_pg > 0);
2263
2264 // make it a tier
2265 bufferlist inbl;
2266 ASSERT_EQ(0, cluster.mon_command(
2267 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2268 "\", \"tierpool\": \"" + cache_pool_name +
2269 "\", \"force_nonempty\": \"--force-nonempty\" }",
2270 inbl, NULL, NULL));
2271
2272 // enable hitset tracking for this pool
2273 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_count", 8),
2274 inbl, NULL, NULL));
2275 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_period", 600),
2276 inbl, NULL, NULL));
2277 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_type",
2278 "explicit_hash"),
2279 inbl, NULL, NULL));
2280
2281 // wait for maps to settle
2282 cluster.wait_for_latest_osdmap();
2283
2284 cache_ioctx.set_namespace("");
2285
2286 int num = 200;
2287
2288 // do a bunch of writes
2289 for (int i=0; i<num; ++i) {
2290 bufferlist bl;
2291 bl.append("a");
2292 ASSERT_EQ(0, cache_ioctx.write(stringify(i), bl, 1, 0));
2293 }
2294
2295 // get HitSets
2296 std::map<int,HitSet> hitsets;
2297 for (int i=0; i<num_pg; ++i) {
2298 list< pair<time_t,time_t> > ls;
2299 AioCompletion *c = librados::Rados::aio_create_completion();
2300 ASSERT_EQ(0, cache_ioctx.hit_set_list(i, c, &ls));
2301 c->wait_for_complete();
2302 c->release();
2303 std::cout << "pg " << i << " ls " << ls << std::endl;
2304 ASSERT_FALSE(ls.empty());
2305
2306 // get the latest
2307 c = librados::Rados::aio_create_completion();
2308 bufferlist bl;
2309 ASSERT_EQ(0, cache_ioctx.hit_set_get(i, c, ls.back().first, &bl));
2310 c->wait_for_complete();
2311 c->release();
2312
2313 try {
2314 bufferlist::iterator p = bl.begin();
2315 ::decode(hitsets[i], p);
2316 }
2317 catch (buffer::error& e) {
2318 std::cout << "failed to decode hit set; bl len is " << bl.length() << "\n";
2319 bl.hexdump(std::cout);
2320 std::cout << std::endl;
2321 throw e;
2322 }
2323
2324 // cope with racing splits by refreshing pg_num
2325 if (i == num_pg - 1)
2326 num_pg = _get_pg_num(cluster, cache_pool_name);
2327 }
2328
2329 for (int i=0; i<num; ++i) {
2330 string n = stringify(i);
2331 uint32_t hash;
2332 ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(n, &hash));
2333 hobject_t oid(sobject_t(n, CEPH_NOSNAP), "", hash,
2334 cluster.pool_lookup(cache_pool_name.c_str()), "");
2335 std::cout << "checking for " << oid << std::endl;
2336 bool found = false;
2337 for (int p=0; p<num_pg; ++p) {
2338 if (hitsets[p].contains(oid)) {
2339 found = true;
2340 break;
2341 }
2342 }
2343 ASSERT_TRUE(found);
2344 }
2345}
2346
2347TEST_F(LibRadosTwoPoolsPP, HitSetTrim) {
2348 unsigned count = 3;
2349 unsigned period = 3;
2350
2351 // make it a tier
2352 bufferlist inbl;
2353 ASSERT_EQ(0, cluster.mon_command(
2354 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2355 "\", \"tierpool\": \"" + cache_pool_name +
2356 "\", \"force_nonempty\": \"--force-nonempty\" }",
2357 inbl, NULL, NULL));
2358
2359 // enable hitset tracking for this pool
2360 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_count", count),
2361 inbl, NULL, NULL));
2362 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_period", period),
2363 inbl, NULL, NULL));
2364 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
2365 inbl, NULL, NULL));
2366 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_fpp", ".01"),
2367 inbl, NULL, NULL));
2368
2369 // wait for maps to settle
2370 cluster.wait_for_latest_osdmap();
2371
2372 cache_ioctx.set_namespace("");
2373
2374 // do a bunch of writes and make sure the hitsets rotate
2375 utime_t start = ceph_clock_now();
2376 utime_t hard_stop = start + utime_t(count * period * 50, 0);
2377
2378 time_t first = 0;
2379 while (true) {
2380 string name = "foo";
2381 uint32_t hash;
2382 ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(name, &hash));
2383 hobject_t oid(sobject_t(name, CEPH_NOSNAP), "", hash, -1, "");
2384
2385 bufferlist bl;
2386 bl.append("f");
2387 ASSERT_EQ(0, cache_ioctx.write("foo", bl, 1, 0));
2388
2389 list<pair<time_t, time_t> > ls;
2390 AioCompletion *c = librados::Rados::aio_create_completion();
2391 ASSERT_EQ(0, cache_ioctx.hit_set_list(hash, c, &ls));
2392 c->wait_for_complete();
2393 c->release();
2394
2395 cout << " got ls " << ls << std::endl;
2396 if (!ls.empty()) {
2397 if (!first) {
2398 first = ls.front().first;
2399 cout << "first is " << first << std::endl;
2400 } else {
2401 if (ls.front().first != first) {
2402 cout << "first now " << ls.front().first << ", trimmed" << std::endl;
2403 break;
2404 }
2405 }
2406 }
2407
2408 utime_t now = ceph_clock_now();
2409 ASSERT_TRUE(now < hard_stop);
2410
2411 sleep(1);
2412 }
2413}
2414
2415TEST_F(LibRadosTwoPoolsPP, PromoteOn2ndRead) {
2416 // create object
2417 for (int i=0; i<20; ++i) {
2418 bufferlist bl;
2419 bl.append("hi there");
2420 ObjectWriteOperation op;
2421 op.write_full(bl);
2422 ASSERT_EQ(0, ioctx.operate("foo" + stringify(i), &op));
2423 }
2424
2425 // configure cache
2426 bufferlist inbl;
2427 ASSERT_EQ(0, cluster.mon_command(
2428 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2429 "\", \"tierpool\": \"" + cache_pool_name +
2430 "\", \"force_nonempty\": \"--force-nonempty\" }",
2431 inbl, NULL, NULL));
2432 ASSERT_EQ(0, cluster.mon_command(
2433 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2434 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2435 inbl, NULL, NULL));
2436 ASSERT_EQ(0, cluster.mon_command(
2437 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
2438 "\", \"mode\": \"writeback\"}",
2439 inbl, NULL, NULL));
2440
2441 // enable hitset tracking for this pool
2442 ASSERT_EQ(0, cluster.mon_command(
2443 set_pool_str(cache_pool_name, "hit_set_count", 2),
2444 inbl, NULL, NULL));
2445 ASSERT_EQ(0, cluster.mon_command(
2446 set_pool_str(cache_pool_name, "hit_set_period", 600),
2447 inbl, NULL, NULL));
2448 ASSERT_EQ(0, cluster.mon_command(
2449 set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
2450 inbl, NULL, NULL));
2451 ASSERT_EQ(0, cluster.mon_command(
2452 set_pool_str(cache_pool_name, "min_read_recency_for_promote", 1),
2453 inbl, NULL, NULL));
2454 ASSERT_EQ(0, cluster.mon_command(
2455 set_pool_str(cache_pool_name, "hit_set_grade_decay_rate", 20),
2456 inbl, NULL, NULL));
2457 ASSERT_EQ(0, cluster.mon_command(
2458 set_pool_str(cache_pool_name, "hit_set_search_last_n", 1),
2459 inbl, NULL, NULL));
2460
2461 // wait for maps to settle
2462 cluster.wait_for_latest_osdmap();
2463
2464 int fake = 0; // set this to non-zero to test spurious promotion,
2465 // e.g. from thrashing
2466 int attempt = 0;
2467 string obj;
2468 while (true) {
2469 // 1st read, don't trigger a promote
2470 obj = "foo" + stringify(attempt);
2471 cout << obj << std::endl;
2472 {
2473 bufferlist bl;
2474 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
2475 if (--fake >= 0) {
2476 sleep(1);
2477 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
2478 sleep(1);
2479 }
2480 }
2481
2482 // verify the object is NOT present in the cache tier
2483 {
2484 bool found = false;
2485 NObjectIterator it = cache_ioctx.nobjects_begin();
2486 while (it != cache_ioctx.nobjects_end()) {
2487 cout << " see " << it->get_oid() << std::endl;
2488 if (it->get_oid() == string(obj.c_str())) {
2489 found = true;
2490 break;
2491 }
2492 ++it;
2493 }
2494 if (!found)
2495 break;
2496 }
2497
2498 ++attempt;
2499 ASSERT_LE(attempt, 20);
2500 cout << "hrm, object is present in cache on attempt " << attempt
2501 << ", retrying" << std::endl;
2502 }
2503
2504 // Read until the object is present in the cache tier
2505 cout << "verifying " << obj << " is eventually promoted" << std::endl;
2506 while (true) {
2507 bufferlist bl;
2508 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
2509
2510 bool there = false;
2511 NObjectIterator it = cache_ioctx.nobjects_begin();
2512 while (it != cache_ioctx.nobjects_end()) {
2513 if (it->get_oid() == string(obj.c_str())) {
2514 there = true;
2515 break;
2516 }
2517 ++it;
2518 }
2519 if (there)
2520 break;
2521
2522 sleep(1);
2523 }
2524
2525 // tear down tiers
2526 ASSERT_EQ(0, cluster.mon_command(
2527 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
2528 "\"}",
2529 inbl, NULL, NULL));
2530 ASSERT_EQ(0, cluster.mon_command(
2531 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
2532 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
2533 inbl, NULL, NULL));
2534
2535 // wait for maps to settle before next test
2536 cluster.wait_for_latest_osdmap();
2537}
2538
2539TEST_F(LibRadosTwoPoolsPP, ProxyRead) {
2540 // create object
2541 {
2542 bufferlist bl;
2543 bl.append("hi there");
2544 ObjectWriteOperation op;
2545 op.write_full(bl);
2546 ASSERT_EQ(0, ioctx.operate("foo", &op));
2547 }
2548
2549 // configure cache
2550 bufferlist inbl;
2551 ASSERT_EQ(0, cluster.mon_command(
2552 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2553 "\", \"tierpool\": \"" + cache_pool_name +
2554 "\", \"force_nonempty\": \"--force-nonempty\" }",
2555 inbl, NULL, NULL));
2556 ASSERT_EQ(0, cluster.mon_command(
2557 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2558 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2559 inbl, NULL, NULL));
2560 ASSERT_EQ(0, cluster.mon_command(
2561 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
2562 "\", \"mode\": \"readproxy\"}",
2563 inbl, NULL, NULL));
2564
2565 // wait for maps to settle
2566 cluster.wait_for_latest_osdmap();
2567
2568 // read and verify the object
2569 {
2570 bufferlist bl;
2571 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
2572 ASSERT_EQ('h', bl[0]);
2573 }
2574
2575 // Verify 10 times the object is NOT present in the cache tier
2576 uint32_t i = 0;
2577 while (i++ < 10) {
2578 NObjectIterator it = cache_ioctx.nobjects_begin();
2579 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
2580 sleep(1);
2581 }
2582
2583 // tear down tiers
2584 ASSERT_EQ(0, cluster.mon_command(
2585 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
2586 "\"}",
2587 inbl, NULL, NULL));
2588 ASSERT_EQ(0, cluster.mon_command(
2589 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
2590 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
2591 inbl, NULL, NULL));
2592
2593 // wait for maps to settle before next test
2594 cluster.wait_for_latest_osdmap();
2595}
2596
2597TEST_F(LibRadosTwoPoolsPP, CachePin) {
2598 // create object
2599 {
2600 bufferlist bl;
2601 bl.append("hi there");
2602 ObjectWriteOperation op;
2603 op.write_full(bl);
2604 ASSERT_EQ(0, ioctx.operate("foo", &op));
2605 }
2606 {
2607 bufferlist bl;
2608 bl.append("hi there");
2609 ObjectWriteOperation op;
2610 op.write_full(bl);
2611 ASSERT_EQ(0, ioctx.operate("bar", &op));
2612 }
2613 {
2614 bufferlist bl;
2615 bl.append("hi there");
2616 ObjectWriteOperation op;
2617 op.write_full(bl);
2618 ASSERT_EQ(0, ioctx.operate("baz", &op));
2619 }
2620 {
2621 bufferlist bl;
2622 bl.append("hi there");
2623 ObjectWriteOperation op;
2624 op.write_full(bl);
2625 ASSERT_EQ(0, ioctx.operate("bam", &op));
2626 }
2627
2628 // configure cache
2629 bufferlist inbl;
2630 ASSERT_EQ(0, cluster.mon_command(
2631 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2632 "\", \"tierpool\": \"" + cache_pool_name +
2633 "\", \"force_nonempty\": \"--force-nonempty\" }",
2634 inbl, NULL, NULL));
2635 ASSERT_EQ(0, cluster.mon_command(
2636 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2637 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2638 inbl, NULL, NULL));
2639 ASSERT_EQ(0, cluster.mon_command(
2640 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
2641 "\", \"mode\": \"writeback\"}",
2642 inbl, NULL, NULL));
2643
2644 // wait for maps to settle
2645 cluster.wait_for_latest_osdmap();
2646
2647 // read, trigger promote
2648 {
2649 bufferlist bl;
2650 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
2651 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
2652 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
2653 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
2654 }
2655
2656 // verify the objects are present in the cache tier
2657 {
2658 NObjectIterator it = cache_ioctx.nobjects_begin();
2659 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
2660 for (uint32_t i = 0; i < 4; i++) {
2661 ASSERT_TRUE(it->get_oid() == string("foo") ||
2662 it->get_oid() == string("bar") ||
2663 it->get_oid() == string("baz") ||
2664 it->get_oid() == string("bam"));
2665 ++it;
2666 }
2667 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
2668 }
2669
2670 // pin objects
2671 {
2672 ObjectWriteOperation op;
2673 op.cache_pin();
2674 librados::AioCompletion *completion = cluster.aio_create_completion();
2675 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
2676 completion->wait_for_safe();
2677 ASSERT_EQ(0, completion->get_return_value());
2678 completion->release();
2679 }
2680 {
2681 ObjectWriteOperation op;
2682 op.cache_pin();
2683 librados::AioCompletion *completion = cluster.aio_create_completion();
2684 ASSERT_EQ(0, cache_ioctx.aio_operate("baz", completion, &op));
2685 completion->wait_for_safe();
2686 ASSERT_EQ(0, completion->get_return_value());
2687 completion->release();
2688 }
2689
2690 // enable agent
2691 ASSERT_EQ(0, cluster.mon_command(
2692 set_pool_str(cache_pool_name, "hit_set_count", 2),
2693 inbl, NULL, NULL));
2694 ASSERT_EQ(0, cluster.mon_command(
2695 set_pool_str(cache_pool_name, "hit_set_period", 600),
2696 inbl, NULL, NULL));
2697 ASSERT_EQ(0, cluster.mon_command(
2698 set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
2699 inbl, NULL, NULL));
2700 ASSERT_EQ(0, cluster.mon_command(
2701 set_pool_str(cache_pool_name, "min_read_recency_for_promote", 1),
2702 inbl, NULL, NULL));
2703 ASSERT_EQ(0, cluster.mon_command(
2704 set_pool_str(cache_pool_name, "target_max_objects", 1),
2705 inbl, NULL, NULL));
2706
2707 sleep(10);
2708
2709 // Verify the pinned object 'foo' is not flushed/evicted
2710 uint32_t count = 0;
2711 while (true) {
2712 bufferlist bl;
2713 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
2714
2715 count = 0;
2716 NObjectIterator it = cache_ioctx.nobjects_begin();
2717 while (it != cache_ioctx.nobjects_end()) {
2718 ASSERT_TRUE(it->get_oid() == string("foo") ||
2719 it->get_oid() == string("bar") ||
2720 it->get_oid() == string("baz") ||
2721 it->get_oid() == string("bam"));
2722 ++count;
2723 ++it;
2724 }
2725 if (count == 2) {
2726 ASSERT_TRUE(it->get_oid() == string("foo") ||
2727 it->get_oid() == string("baz"));
2728 break;
2729 }
2730
2731 sleep(1);
2732 }
2733
2734 // tear down tiers
2735 ASSERT_EQ(0, cluster.mon_command(
2736 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
2737 "\"}",
2738 inbl, NULL, NULL));
2739 ASSERT_EQ(0, cluster.mon_command(
2740 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
2741 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
2742 inbl, NULL, NULL));
2743
2744 // wait for maps to settle before next test
2745 cluster.wait_for_latest_osdmap();
2746}
2747
2748class LibRadosTwoPoolsECPP : public RadosTestECPP
2749{
2750public:
2751 LibRadosTwoPoolsECPP() {};
2752 ~LibRadosTwoPoolsECPP() override {};
2753protected:
2754 static void SetUpTestCase() {
2755 pool_name = get_temp_pool_name();
2756 ASSERT_EQ("", create_one_ec_pool_pp(pool_name, s_cluster));
2757 }
2758 static void TearDownTestCase() {
2759 ASSERT_EQ(0, destroy_one_ec_pool_pp(pool_name, s_cluster));
2760 }
2761 static std::string cache_pool_name;
2762
2763 void SetUp() override {
2764 cache_pool_name = get_temp_pool_name();
2765 ASSERT_EQ(0, s_cluster.pool_create(cache_pool_name.c_str()));
2766 RadosTestECPP::SetUp();
2767 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
2768 cache_ioctx.set_namespace(nspace);
2769 }
2770 void TearDown() override {
2771 // flush + evict cache
2772 flush_evict_all(cluster, cache_ioctx);
2773
2774 bufferlist inbl;
2775 // tear down tiers
2776 ASSERT_EQ(0, cluster.mon_command(
2777 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
2778 "\"}",
2779 inbl, NULL, NULL));
2780 ASSERT_EQ(0, cluster.mon_command(
2781 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
2782 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
2783 inbl, NULL, NULL));
2784
2785 // wait for maps to settle before next test
2786 cluster.wait_for_latest_osdmap();
2787
2788 RadosTestECPP::TearDown();
2789
2790 cleanup_default_namespace(cache_ioctx);
2791 cleanup_namespace(cache_ioctx, nspace);
2792
2793 cache_ioctx.close();
2794 ASSERT_EQ(0, s_cluster.pool_delete(cache_pool_name.c_str()));
2795 }
2796
2797 librados::IoCtx cache_ioctx;
2798};
2799
2800std::string LibRadosTwoPoolsECPP::cache_pool_name;
2801
2802TEST_F(LibRadosTierECPP, Dirty) {
2803 {
2804 ObjectWriteOperation op;
2805 op.undirty();
2806 ASSERT_EQ(0, ioctx.operate("foo", &op)); // still get 0 if it dne
2807 }
2808 {
2809 ObjectWriteOperation op;
2810 op.create(true);
2811 ASSERT_EQ(0, ioctx.operate("foo", &op));
2812 }
2813 {
2814 bool dirty = false;
2815 int r = -1;
2816 ObjectReadOperation op;
2817 op.is_dirty(&dirty, &r);
2818 ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
2819 ASSERT_TRUE(dirty);
2820 ASSERT_EQ(0, r);
2821 }
2822 {
2823 ObjectWriteOperation op;
2824 op.undirty();
2825 ASSERT_EQ(0, ioctx.operate("foo", &op));
2826 }
2827 {
2828 ObjectWriteOperation op;
2829 op.undirty();
2830 ASSERT_EQ(0, ioctx.operate("foo", &op)); // still 0 if already clean
2831 }
2832 {
2833 bool dirty = false;
2834 int r = -1;
2835 ObjectReadOperation op;
2836 op.is_dirty(&dirty, &r);
2837 ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
2838 ASSERT_FALSE(dirty);
2839 ASSERT_EQ(0, r);
2840 }
2841 //{
2842 // ObjectWriteOperation op;
2843 // op.truncate(0); // still a write even tho it is a no-op
2844 // ASSERT_EQ(0, ioctx.operate("foo", &op));
2845 //}
2846 //{
2847 // bool dirty = false;
2848 // int r = -1;
2849 // ObjectReadOperation op;
2850 // op.is_dirty(&dirty, &r);
2851 // ASSERT_EQ(0, ioctx.operate("foo", &op, NULL));
2852 // ASSERT_TRUE(dirty);
2853 // ASSERT_EQ(0, r);
2854 //}
2855}
2856
2857TEST_F(LibRadosTwoPoolsECPP, Overlay) {
2858 // create objects
2859 {
2860 bufferlist bl;
2861 bl.append("base");
2862 ObjectWriteOperation op;
2863 op.write_full(bl);
2864 ASSERT_EQ(0, ioctx.operate("foo", &op));
2865 }
2866 {
2867 bufferlist bl;
2868 bl.append("cache");
2869 ObjectWriteOperation op;
2870 op.write_full(bl);
2871 ASSERT_EQ(0, cache_ioctx.operate("foo", &op));
2872 }
2873
2874 // configure cache
2875 bufferlist inbl;
2876 ASSERT_EQ(0, cluster.mon_command(
2877 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2878 "\", \"tierpool\": \"" + cache_pool_name +
2879 "\", \"force_nonempty\": \"--force-nonempty\" }",
2880 inbl, NULL, NULL));
2881 ASSERT_EQ(0, cluster.mon_command(
2882 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2883 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2884 inbl, NULL, NULL));
2885
2886 // wait for maps to settle
2887 cluster.wait_for_latest_osdmap();
2888
2889 // by default, the overlay sends us to cache pool
2890 {
2891 bufferlist bl;
2892 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
2893 ASSERT_EQ('c', bl[0]);
2894 }
2895 {
2896 bufferlist bl;
2897 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
2898 ASSERT_EQ('c', bl[0]);
2899 }
2900
2901 // unless we say otherwise
2902 {
2903 bufferlist bl;
2904 ObjectReadOperation op;
2905 op.read(0, 1, &bl, NULL);
2906 librados::AioCompletion *completion = cluster.aio_create_completion();
2907 ASSERT_EQ(0, ioctx.aio_operate(
2908 "foo", completion, &op,
2909 librados::OPERATION_IGNORE_OVERLAY, NULL));
2910 completion->wait_for_safe();
2911 ASSERT_EQ(0, completion->get_return_value());
2912 completion->release();
2913 ASSERT_EQ('b', bl[0]);
2914 }
2915}
2916
2917TEST_F(LibRadosTwoPoolsECPP, Promote) {
2918 // create object
2919 {
2920 bufferlist bl;
2921 bl.append("hi there");
2922 ObjectWriteOperation op;
2923 op.write_full(bl);
2924 ASSERT_EQ(0, ioctx.operate("foo", &op));
2925 }
2926
2927 // configure cache
2928 bufferlist inbl;
2929 ASSERT_EQ(0, cluster.mon_command(
2930 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
2931 "\", \"tierpool\": \"" + cache_pool_name +
2932 "\", \"force_nonempty\": \"--force-nonempty\" }",
2933 inbl, NULL, NULL));
2934 ASSERT_EQ(0, cluster.mon_command(
2935 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
2936 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
2937 inbl, NULL, NULL));
2938 ASSERT_EQ(0, cluster.mon_command(
2939 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
2940 "\", \"mode\": \"writeback\"}",
2941 inbl, NULL, NULL));
2942
2943 // wait for maps to settle
2944 cluster.wait_for_latest_osdmap();
2945
2946 // read, trigger a promote
2947 {
2948 bufferlist bl;
2949 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
2950 }
2951
2952 // read, trigger a whiteout
2953 {
2954 bufferlist bl;
2955 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
2956 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
2957 }
2958
2959 // verify the object is present in the cache tier
2960 {
2961 NObjectIterator it = cache_ioctx.nobjects_begin();
2962 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
2963 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
2964 ++it;
2965 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
2966 ++it;
2967 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
2968 }
2969}
2970
2971TEST_F(LibRadosTwoPoolsECPP, PromoteSnap) {
2972 // create object
2973 {
2974 bufferlist bl;
2975 bl.append("hi there");
2976 ObjectWriteOperation op;
2977 op.write_full(bl);
2978 ASSERT_EQ(0, ioctx.operate("foo", &op));
2979 }
2980 {
2981 bufferlist bl;
2982 bl.append("hi there");
2983 ObjectWriteOperation op;
2984 op.write_full(bl);
2985 ASSERT_EQ(0, ioctx.operate("bar", &op));
2986 }
2987 {
2988 bufferlist bl;
2989 bl.append("hi there");
2990 ObjectWriteOperation op;
2991 op.write_full(bl);
2992 ASSERT_EQ(0, ioctx.operate("baz", &op));
2993 }
2994 {
2995 bufferlist bl;
2996 bl.append("hi there");
2997 ObjectWriteOperation op;
2998 op.write_full(bl);
2999 ASSERT_EQ(0, ioctx.operate("bam", &op));
3000 }
3001
3002 // create a snapshot, clone
3003 vector<uint64_t> my_snaps(1);
3004 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
3005 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
3006 my_snaps));
3007 {
3008 bufferlist bl;
3009 bl.append("ciao!");
3010 ObjectWriteOperation op;
3011 op.write_full(bl);
3012 ASSERT_EQ(0, ioctx.operate("foo", &op));
3013 }
3014 {
3015 bufferlist bl;
3016 bl.append("ciao!");
3017 ObjectWriteOperation op;
3018 op.write_full(bl);
3019 ASSERT_EQ(0, ioctx.operate("bar", &op));
3020 }
3021 {
3022 ObjectWriteOperation op;
3023 op.remove();
3024 ASSERT_EQ(0, ioctx.operate("baz", &op));
3025 }
3026 {
3027 bufferlist bl;
3028 bl.append("ciao!");
3029 ObjectWriteOperation op;
3030 op.write_full(bl);
3031 ASSERT_EQ(0, ioctx.operate("bam", &op));
3032 }
3033
3034 // configure cache
3035 bufferlist inbl;
3036 ASSERT_EQ(0, cluster.mon_command(
3037 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
3038 "\", \"tierpool\": \"" + cache_pool_name +
3039 "\", \"force_nonempty\": \"--force-nonempty\" }",
3040 inbl, NULL, NULL));
3041 ASSERT_EQ(0, cluster.mon_command(
3042 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
3043 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
3044 inbl, NULL, NULL));
3045 ASSERT_EQ(0, cluster.mon_command(
3046 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
3047 "\", \"mode\": \"writeback\"}",
3048 inbl, NULL, NULL));
3049
3050 // wait for maps to settle
3051 cluster.wait_for_latest_osdmap();
3052
3053 // read, trigger a promote on the head
3054 {
3055 bufferlist bl;
3056 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3057 ASSERT_EQ('c', bl[0]);
3058 }
3059 {
3060 bufferlist bl;
3061 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
3062 ASSERT_EQ('c', bl[0]);
3063 }
3064
3065 ioctx.snap_set_read(my_snaps[0]);
3066
3067 // stop and scrub this pg (to make sure scrub can handle missing
3068 // clones in the cache tier)
3069 // This test requires cache tier and base tier to have the same pg_num/pgp_num
3070 {
3071 for (int tries = 0; tries < 5; ++tries) {
3072 IoCtx cache_ioctx;
3073 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
3074 uint32_t hash;
3075 ASSERT_EQ(0, ioctx.get_object_pg_hash_position2("foo", &hash));
3076 ostringstream ss;
3077 ss << "{\"prefix\": \"pg scrub\", \"pgid\": \""
3078 << cache_ioctx.get_id() << "."
3079 << hash
3080 << "\"}";
3081 int r = cluster.mon_command(ss.str(), inbl, NULL, NULL);
3082 if (r == -EAGAIN ||
3083 r == -ENOENT) { // in case mgr osdmap is a bit stale
3084 sleep(5);
3085 continue;
3086 }
3087 ASSERT_EQ(0, r);
3088 break;
3089 }
3090 // give it a few seconds to go. this is sloppy but is usually enough time
3091 cout << "waiting for scrub..." << std::endl;
3092 sleep(15);
3093 cout << "done waiting" << std::endl;
3094 }
3095
3096 // read foo snap
3097 {
3098 bufferlist bl;
3099 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3100 ASSERT_EQ('h', bl[0]);
3101 }
3102
3103 // read bar snap
3104 {
3105 bufferlist bl;
3106 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
3107 ASSERT_EQ('h', bl[0]);
3108 }
3109
3110 // read baz snap
3111 {
3112 bufferlist bl;
3113 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
3114 ASSERT_EQ('h', bl[0]);
3115 }
3116
3117 ioctx.snap_set_read(librados::SNAP_HEAD);
3118
3119 // read foo
3120 {
3121 bufferlist bl;
3122 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3123 ASSERT_EQ('c', bl[0]);
3124 }
3125
3126 // read bar
3127 {
3128 bufferlist bl;
3129 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
3130 ASSERT_EQ('c', bl[0]);
3131 }
3132
3133 // read baz
3134 {
3135 bufferlist bl;
3136 ASSERT_EQ(-ENOENT, ioctx.read("baz", bl, 1, 0));
3137 }
3138
3139 // cleanup
3140 ioctx.selfmanaged_snap_remove(my_snaps[0]);
3141}
3142
3143TEST_F(LibRadosTwoPoolsECPP, PromoteSnapTrimRace) {
3144 // create object
3145 {
3146 bufferlist bl;
3147 bl.append("hi there");
3148 ObjectWriteOperation op;
3149 op.write_full(bl);
3150 ASSERT_EQ(0, ioctx.operate("foo", &op));
3151 }
3152
3153 // create a snapshot, clone
3154 vector<uint64_t> my_snaps(1);
3155 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
3156 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
3157 my_snaps));
3158 {
3159 bufferlist bl;
3160 bl.append("ciao!");
3161 ObjectWriteOperation op;
3162 op.write_full(bl);
3163 ASSERT_EQ(0, ioctx.operate("foo", &op));
3164 }
3165
3166 // configure cache
3167 bufferlist inbl;
3168 ASSERT_EQ(0, cluster.mon_command(
3169 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
3170 "\", \"tierpool\": \"" + cache_pool_name +
3171 "\", \"force_nonempty\": \"--force-nonempty\" }",
3172 inbl, NULL, NULL));
3173 ASSERT_EQ(0, cluster.mon_command(
3174 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
3175 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
3176 inbl, NULL, NULL));
3177 ASSERT_EQ(0, cluster.mon_command(
3178 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
3179 "\", \"mode\": \"writeback\"}",
3180 inbl, NULL, NULL));
3181
3182 // wait for maps to settle
3183 cluster.wait_for_latest_osdmap();
3184
3185 // delete the snap
3186 ASSERT_EQ(0, ioctx.selfmanaged_snap_remove(my_snaps[0]));
3187
3188 ioctx.snap_set_read(my_snaps[0]);
3189
3190 // read foo snap
3191 {
3192 bufferlist bl;
3193 ASSERT_EQ(-ENOENT, ioctx.read("foo", bl, 1, 0));
3194 }
3195
3196 // cleanup
3197 ioctx.selfmanaged_snap_remove(my_snaps[0]);
3198}
3199
3200TEST_F(LibRadosTwoPoolsECPP, Whiteout) {
3201 // create object
3202 {
3203 bufferlist bl;
3204 bl.append("hi there");
3205 ObjectWriteOperation op;
3206 op.write_full(bl);
3207 ASSERT_EQ(0, ioctx.operate("foo", &op));
3208 }
3209
3210 // configure cache
3211 bufferlist inbl;
3212 ASSERT_EQ(0, cluster.mon_command(
3213 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
3214 "\", \"tierpool\": \"" + cache_pool_name +
3215 "\", \"force_nonempty\": \"--force-nonempty\" }",
3216 inbl, NULL, NULL));
3217 ASSERT_EQ(0, cluster.mon_command(
3218 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
3219 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
3220 inbl, NULL, NULL));
3221 ASSERT_EQ(0, cluster.mon_command(
3222 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
3223 "\", \"mode\": \"writeback\"}",
3224 inbl, NULL, NULL));
3225
3226 // wait for maps to settle
3227 cluster.wait_for_latest_osdmap();
3228
3229 // create some whiteouts, verify they behave
3230 {
3231 ObjectWriteOperation op;
3232 op.assert_exists();
3233 op.remove();
3234 ASSERT_EQ(0, ioctx.operate("foo", &op));
3235 }
3236
3237 {
3238 ObjectWriteOperation op;
3239 op.assert_exists();
3240 op.remove();
3241 ASSERT_EQ(-ENOENT, ioctx.operate("bar", &op));
3242 }
3243 {
3244 ObjectWriteOperation op;
3245 op.assert_exists();
3246 op.remove();
3247 ASSERT_EQ(-ENOENT, ioctx.operate("bar", &op));
3248 }
3249
3250 // verify the whiteouts are there in the cache tier
3251 {
3252 NObjectIterator it = cache_ioctx.nobjects_begin();
3253 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
3254 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
3255 ++it;
3256 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
3257 ++it;
3258 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
3259 }
3260
3261 // delete a whiteout and verify it goes away
3262 ASSERT_EQ(-ENOENT, ioctx.remove("foo"));
3263 {
3264 ObjectWriteOperation op;
3265 op.remove();
3266 librados::AioCompletion *completion = cluster.aio_create_completion();
3267 ASSERT_EQ(0, ioctx.aio_operate("bar", completion, &op,
3268 librados::OPERATION_IGNORE_CACHE));
3269 completion->wait_for_safe();
3270 ASSERT_EQ(0, completion->get_return_value());
3271 completion->release();
3272
3273 NObjectIterator it = cache_ioctx.nobjects_begin();
3274 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
3275 ASSERT_TRUE(it->get_oid() == string("foo"));
3276 ++it;
3277 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
3278 }
3279
3280 // recreate an object and verify we can read it
3281 {
3282 bufferlist bl;
3283 bl.append("hi there");
3284 ObjectWriteOperation op;
3285 op.write_full(bl);
3286 ASSERT_EQ(0, ioctx.operate("foo", &op));
3287 }
3288 {
3289 bufferlist bl;
3290 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3291 ASSERT_EQ('h', bl[0]);
3292 }
3293}
3294
3295TEST_F(LibRadosTwoPoolsECPP, Evict) {
3296 // create object
3297 {
3298 bufferlist bl;
3299 bl.append("hi there");
3300 ObjectWriteOperation op;
3301 op.write_full(bl);
3302 ASSERT_EQ(0, ioctx.operate("foo", &op));
3303 }
3304
3305 // configure cache
3306 bufferlist inbl;
3307 ASSERT_EQ(0, cluster.mon_command(
3308 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
3309 "\", \"tierpool\": \"" + cache_pool_name +
3310 "\", \"force_nonempty\": \"--force-nonempty\" }",
3311 inbl, NULL, NULL));
3312 ASSERT_EQ(0, cluster.mon_command(
3313 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
3314 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
3315 inbl, NULL, NULL));
3316 ASSERT_EQ(0, cluster.mon_command(
3317 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
3318 "\", \"mode\": \"writeback\"}",
3319 inbl, NULL, NULL));
3320
3321 // wait for maps to settle
3322 cluster.wait_for_latest_osdmap();
3323
3324 // read, trigger a promote
3325 {
3326 bufferlist bl;
3327 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3328 }
3329
3330 // read, trigger a whiteout, and a dirty object
3331 {
3332 bufferlist bl;
3333 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
3334 ASSERT_EQ(-ENOENT, ioctx.read("bar", bl, 1, 0));
3335 ASSERT_EQ(0, ioctx.write("bar", bl, bl.length(), 0));
3336 }
3337
3338 // verify the object is present in the cache tier
3339 {
3340 NObjectIterator it = cache_ioctx.nobjects_begin();
3341 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
3342 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
3343 ++it;
3344 ASSERT_TRUE(it->get_oid() == string("foo") || it->get_oid() == string("bar"));
3345 ++it;
3346 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
3347 }
3348
3349 // pin
3350 {
3351 ObjectWriteOperation op;
3352 op.cache_pin();
3353 librados::AioCompletion *completion = cluster.aio_create_completion();
3354 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
3355 completion->wait_for_safe();
3356 ASSERT_EQ(0, completion->get_return_value());
3357 completion->release();
3358 }
3359
3360 // evict the pinned object with -EPERM
3361 {
3362 ObjectReadOperation op;
3363 op.cache_evict();
3364 librados::AioCompletion *completion = cluster.aio_create_completion();
3365 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
3366 librados::OPERATION_IGNORE_CACHE,
3367 NULL));
3368 completion->wait_for_safe();
3369 ASSERT_EQ(-EPERM, completion->get_return_value());
3370 completion->release();
3371 }
3372
3373 // unpin
3374 {
3375 ObjectWriteOperation op;
3376 op.cache_unpin();
3377 librados::AioCompletion *completion = cluster.aio_create_completion();
3378 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
3379 completion->wait_for_safe();
3380 ASSERT_EQ(0, completion->get_return_value());
3381 completion->release();
3382 }
3383
3384 // flush
3385 {
3386 ObjectReadOperation op;
3387 op.cache_flush();
3388 librados::AioCompletion *completion = cluster.aio_create_completion();
3389 ASSERT_EQ(0, cache_ioctx.aio_operate(
3390 "foo", completion, &op,
3391 librados::OPERATION_IGNORE_OVERLAY, NULL));
3392 completion->wait_for_safe();
3393 ASSERT_EQ(0, completion->get_return_value());
3394 completion->release();
3395 }
3396
3397 // verify clean
3398 {
3399 bool dirty = false;
3400 int r = -1;
3401 ObjectReadOperation op;
3402 op.is_dirty(&dirty, &r);
3403 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
3404 ASSERT_FALSE(dirty);
3405 ASSERT_EQ(0, r);
3406 }
3407
3408 // evict
3409 {
3410 ObjectReadOperation op;
3411 op.cache_evict();
3412 librados::AioCompletion *completion = cluster.aio_create_completion();
3413 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
3414 librados::OPERATION_IGNORE_CACHE,
3415 NULL));
3416 completion->wait_for_safe();
3417 ASSERT_EQ(0, completion->get_return_value());
3418 completion->release();
3419 }
3420 {
3421 ObjectReadOperation op;
3422 op.cache_evict();
3423 librados::AioCompletion *completion = cluster.aio_create_completion();
3424 ASSERT_EQ(0, cache_ioctx.aio_operate(
3425 "foo", completion, &op,
3426 librados::OPERATION_IGNORE_CACHE, NULL));
3427 completion->wait_for_safe();
3428 ASSERT_EQ(0, completion->get_return_value());
3429 completion->release();
3430 }
3431 {
3432 ObjectReadOperation op;
3433 op.cache_evict();
3434 librados::AioCompletion *completion = cluster.aio_create_completion();
3435 ASSERT_EQ(0, cache_ioctx.aio_operate(
3436 "bar", completion, &op,
3437 librados::OPERATION_IGNORE_CACHE, NULL));
3438 completion->wait_for_safe();
3439 ASSERT_EQ(-EBUSY, completion->get_return_value());
3440 completion->release();
3441 }
3442}
3443
3444TEST_F(LibRadosTwoPoolsECPP, EvictSnap) {
3445 // create object
3446 {
3447 bufferlist bl;
3448 bl.append("hi there");
3449 ObjectWriteOperation op;
3450 op.write_full(bl);
3451 ASSERT_EQ(0, ioctx.operate("foo", &op));
3452 }
3453 {
3454 bufferlist bl;
3455 bl.append("hi there");
3456 ObjectWriteOperation op;
3457 op.write_full(bl);
3458 ASSERT_EQ(0, ioctx.operate("bar", &op));
3459 }
3460 {
3461 bufferlist bl;
3462 bl.append("hi there");
3463 ObjectWriteOperation op;
3464 op.write_full(bl);
3465 ASSERT_EQ(0, ioctx.operate("baz", &op));
3466 }
3467 {
3468 bufferlist bl;
3469 bl.append("hi there");
3470 ObjectWriteOperation op;
3471 op.write_full(bl);
3472 ASSERT_EQ(0, ioctx.operate("bam", &op));
3473 }
3474
3475 // create a snapshot, clone
3476 vector<uint64_t> my_snaps(1);
3477 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
3478 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
3479 my_snaps));
3480 {
3481 bufferlist bl;
3482 bl.append("ciao!");
3483 ObjectWriteOperation op;
3484 op.write_full(bl);
3485 ASSERT_EQ(0, ioctx.operate("foo", &op));
3486 }
3487 {
3488 bufferlist bl;
3489 bl.append("ciao!");
3490 ObjectWriteOperation op;
3491 op.write_full(bl);
3492 ASSERT_EQ(0, ioctx.operate("bar", &op));
3493 }
3494 {
3495 ObjectWriteOperation op;
3496 op.remove();
3497 ASSERT_EQ(0, ioctx.operate("baz", &op));
3498 }
3499 {
3500 bufferlist bl;
3501 bl.append("ciao!");
3502 ObjectWriteOperation op;
3503 op.write_full(bl);
3504 ASSERT_EQ(0, ioctx.operate("bam", &op));
3505 }
3506
3507 // configure cache
3508 bufferlist inbl;
3509 ASSERT_EQ(0, cluster.mon_command(
3510 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
3511 "\", \"tierpool\": \"" + cache_pool_name +
3512 "\", \"force_nonempty\": \"--force-nonempty\" }",
3513 inbl, NULL, NULL));
3514 ASSERT_EQ(0, cluster.mon_command(
3515 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
3516 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
3517 inbl, NULL, NULL));
3518 ASSERT_EQ(0, cluster.mon_command(
3519 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
3520 "\", \"mode\": \"writeback\"}",
3521 inbl, NULL, NULL));
3522
3523 // wait for maps to settle
3524 cluster.wait_for_latest_osdmap();
3525
3526 // read, trigger a promote on the head
3527 {
3528 bufferlist bl;
3529 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3530 ASSERT_EQ('c', bl[0]);
3531 }
3532 {
3533 bufferlist bl;
3534 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
3535 ASSERT_EQ('c', bl[0]);
3536 }
3537
3538 // evict bam
3539 {
3540 ObjectReadOperation op;
3541 op.cache_evict();
3542 librados::AioCompletion *completion = cluster.aio_create_completion();
3543 ASSERT_EQ(0, cache_ioctx.aio_operate(
3544 "bam", completion, &op,
3545 librados::OPERATION_IGNORE_CACHE, NULL));
3546 completion->wait_for_safe();
3547 ASSERT_EQ(0, completion->get_return_value());
3548 completion->release();
3549 }
3550 {
3551 bufferlist bl;
3552 ObjectReadOperation op;
3553 op.read(1, 0, &bl, NULL);
3554 librados::AioCompletion *completion = cluster.aio_create_completion();
3555 ASSERT_EQ(0, cache_ioctx.aio_operate(
3556 "bam", completion, &op,
3557 librados::OPERATION_IGNORE_CACHE, NULL));
3558 completion->wait_for_safe();
3559 ASSERT_EQ(-ENOENT, completion->get_return_value());
3560 completion->release();
3561 }
3562
3563 // read foo snap
3564 ioctx.snap_set_read(my_snaps[0]);
3565 {
3566 bufferlist bl;
3567 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
3568 ASSERT_EQ('h', bl[0]);
3569 }
3570
3571 // evict foo snap
3572 {
3573 ObjectReadOperation op;
3574 op.cache_evict();
3575 librados::AioCompletion *completion = cluster.aio_create_completion();
3576 ASSERT_EQ(0, ioctx.aio_operate(
3577 "foo", completion, &op,
3578 librados::OPERATION_IGNORE_CACHE, NULL));
3579 completion->wait_for_safe();
3580 ASSERT_EQ(0, completion->get_return_value());
3581 completion->release();
3582 }
3583 // snap is gone...
3584 {
3585 bufferlist bl;
3586 ObjectReadOperation op;
3587 op.read(1, 0, &bl, NULL);
3588 librados::AioCompletion *completion = cluster.aio_create_completion();
3589 ASSERT_EQ(0, ioctx.aio_operate(
3590 "foo", completion, &op,
3591 librados::OPERATION_IGNORE_CACHE, NULL));
3592 completion->wait_for_safe();
3593 ASSERT_EQ(-ENOENT, completion->get_return_value());
3594 completion->release();
3595 }
3596 // head is still there...
3597 ioctx.snap_set_read(librados::SNAP_HEAD);
3598 {
3599 bufferlist bl;
3600 ObjectReadOperation op;
3601 op.read(1, 0, &bl, NULL);
3602 librados::AioCompletion *completion = cluster.aio_create_completion();
3603 ASSERT_EQ(0, ioctx.aio_operate(
3604 "foo", completion, &op,
3605 librados::OPERATION_IGNORE_CACHE, NULL));
3606 completion->wait_for_safe();
3607 ASSERT_EQ(0, completion->get_return_value());
3608 completion->release();
3609 }
3610
3611 // promote head + snap of bar
3612 ioctx.snap_set_read(librados::SNAP_HEAD);
3613 {
3614 bufferlist bl;
3615 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
3616 ASSERT_EQ('c', bl[0]);
3617 }
3618 ioctx.snap_set_read(my_snaps[0]);
3619 {
3620 bufferlist bl;
3621 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
3622 ASSERT_EQ('h', bl[0]);
3623 }
3624
3625 // evict bar head (fail)
3626 ioctx.snap_set_read(librados::SNAP_HEAD);
3627 {
3628 ObjectReadOperation op;
3629 op.cache_evict();
3630 librados::AioCompletion *completion = cluster.aio_create_completion();
3631 ASSERT_EQ(0, ioctx.aio_operate(
3632 "bar", completion, &op,
3633 librados::OPERATION_IGNORE_CACHE, NULL));
3634 completion->wait_for_safe();
3635 ASSERT_EQ(-EBUSY, completion->get_return_value());
3636 completion->release();
3637 }
3638
3639 // evict bar snap
3640 ioctx.snap_set_read(my_snaps[0]);
3641 {
3642 ObjectReadOperation op;
3643 op.cache_evict();
3644 librados::AioCompletion *completion = cluster.aio_create_completion();
3645 ASSERT_EQ(0, ioctx.aio_operate(
3646 "bar", completion, &op,
3647 librados::OPERATION_IGNORE_CACHE, NULL));
3648 completion->wait_for_safe();
3649 ASSERT_EQ(0, completion->get_return_value());
3650 completion->release();
3651 }
3652 // ...and then head
3653 ioctx.snap_set_read(librados::SNAP_HEAD);
3654 {
3655 bufferlist bl;
3656 ObjectReadOperation op;
3657 op.read(1, 0, &bl, NULL);
3658 librados::AioCompletion *completion = cluster.aio_create_completion();
3659 ASSERT_EQ(0, ioctx.aio_operate(
3660 "bar", completion, &op,
3661 librados::OPERATION_IGNORE_CACHE, NULL));
3662 completion->wait_for_safe();
3663 ASSERT_EQ(0, completion->get_return_value());
3664 completion->release();
3665 }
3666 {
3667 ObjectReadOperation op;
3668 op.cache_evict();
3669 librados::AioCompletion *completion = cluster.aio_create_completion();
3670 ASSERT_EQ(0, ioctx.aio_operate(
3671 "bar", completion, &op,
3672 librados::OPERATION_IGNORE_CACHE, NULL));
3673 completion->wait_for_safe();
3674 ASSERT_EQ(0, completion->get_return_value());
3675 completion->release();
3676 }
3677
3678 // cleanup
3679 ioctx.selfmanaged_snap_remove(my_snaps[0]);
3680}
3681
3682TEST_F(LibRadosTwoPoolsECPP, TryFlush) {
3683 // configure cache
3684 bufferlist inbl;
3685 ASSERT_EQ(0, cluster.mon_command(
3686 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
3687 "\", \"tierpool\": \"" + cache_pool_name +
3688 "\", \"force_nonempty\": \"--force-nonempty\" }",
3689 inbl, NULL, NULL));
3690 ASSERT_EQ(0, cluster.mon_command(
3691 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
3692 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
3693 inbl, NULL, NULL));
3694 ASSERT_EQ(0, cluster.mon_command(
3695 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
3696 "\", \"mode\": \"writeback\"}",
3697 inbl, NULL, NULL));
3698
3699 // wait for maps to settle
3700 cluster.wait_for_latest_osdmap();
3701
3702 // create object
3703 {
3704 bufferlist bl;
3705 bl.append("hi there");
3706 ObjectWriteOperation op;
3707 op.write_full(bl);
3708 ASSERT_EQ(0, ioctx.operate("foo", &op));
3709 }
3710
3711 // verify the object is present in the cache tier
3712 {
3713 NObjectIterator it = cache_ioctx.nobjects_begin();
3714 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
3715 ASSERT_TRUE(it->get_oid() == string("foo"));
3716 ++it;
3717 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
3718 }
3719
3720 // verify the object is NOT present in the base tier
3721 {
3722 NObjectIterator it = ioctx.nobjects_begin();
3723 ASSERT_TRUE(it == ioctx.nobjects_end());
3724 }
3725
3726 // verify dirty
3727 {
3728 bool dirty = false;
3729 int r = -1;
3730 ObjectReadOperation op;
3731 op.is_dirty(&dirty, &r);
3732 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
3733 ASSERT_TRUE(dirty);
3734 ASSERT_EQ(0, r);
3735 }
3736
3737 // pin
3738 {
3739 ObjectWriteOperation op;
3740 op.cache_pin();
3741 librados::AioCompletion *completion = cluster.aio_create_completion();
3742 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
3743 completion->wait_for_safe();
3744 ASSERT_EQ(0, completion->get_return_value());
3745 completion->release();
3746 }
3747
3748 // flush the pinned object with -EPERM
3749 {
3750 ObjectReadOperation op;
3751 op.cache_try_flush();
3752 librados::AioCompletion *completion = cluster.aio_create_completion();
3753 ASSERT_EQ(0, cache_ioctx.aio_operate(
3754 "foo", completion, &op,
3755 librados::OPERATION_IGNORE_OVERLAY |
3756 librados::OPERATION_SKIPRWLOCKS, NULL));
3757 completion->wait_for_safe();
3758 ASSERT_EQ(-EPERM, completion->get_return_value());
3759 completion->release();
3760 }
3761
3762 // unpin
3763 {
3764 ObjectWriteOperation op;
3765 op.cache_unpin();
3766 librados::AioCompletion *completion = cluster.aio_create_completion();
3767 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
3768 completion->wait_for_safe();
3769 ASSERT_EQ(0, completion->get_return_value());
3770 completion->release();
3771 }
3772
3773 // flush
3774 {
3775 ObjectReadOperation op;
3776 op.cache_try_flush();
3777 librados::AioCompletion *completion = cluster.aio_create_completion();
3778 ASSERT_EQ(0, cache_ioctx.aio_operate(
3779 "foo", completion, &op,
3780 librados::OPERATION_IGNORE_OVERLAY |
3781 librados::OPERATION_SKIPRWLOCKS, NULL));
3782 completion->wait_for_safe();
3783 ASSERT_EQ(0, completion->get_return_value());
3784 completion->release();
3785 }
3786
3787 // verify clean
3788 {
3789 bool dirty = false;
3790 int r = -1;
3791 ObjectReadOperation op;
3792 op.is_dirty(&dirty, &r);
3793 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
3794 ASSERT_FALSE(dirty);
3795 ASSERT_EQ(0, r);
3796 }
3797
3798 // verify in base tier
3799 {
3800 NObjectIterator it = ioctx.nobjects_begin();
3801 ASSERT_TRUE(it != ioctx.nobjects_end());
3802 ASSERT_TRUE(it->get_oid() == string("foo"));
3803 ++it;
3804 ASSERT_TRUE(it == ioctx.nobjects_end());
3805 }
3806
3807 // evict it
3808 {
3809 ObjectReadOperation op;
3810 op.cache_evict();
3811 librados::AioCompletion *completion = cluster.aio_create_completion();
3812 ASSERT_EQ(0, cache_ioctx.aio_operate(
3813 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
3814 completion->wait_for_safe();
3815 ASSERT_EQ(0, completion->get_return_value());
3816 completion->release();
3817 }
3818
3819 // verify no longer in cache tier
3820 {
3821 NObjectIterator it = cache_ioctx.nobjects_begin();
3822 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
3823 }
3824}
3825
3826TEST_F(LibRadosTwoPoolsECPP, FailedFlush) {
3827 // configure cache
3828 bufferlist inbl;
3829 ASSERT_EQ(0, cluster.mon_command(
3830 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
3831 "\", \"tierpool\": \"" + cache_pool_name +
3832 "\", \"force_nonempty\": \"--force-nonempty\" }",
3833 inbl, NULL, NULL));
3834 ASSERT_EQ(0, cluster.mon_command(
3835 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
3836 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
3837 inbl, NULL, NULL));
3838 ASSERT_EQ(0, cluster.mon_command(
3839 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
3840 "\", \"mode\": \"writeback\"}",
3841 inbl, NULL, NULL));
3842
3843 // wait for maps to settle
3844 cluster.wait_for_latest_osdmap();
3845
3846 // create object
3847 {
3848 bufferlist bl;
3849 bl.append("hi there");
3850 ObjectWriteOperation op;
3851 op.write_full(bl);
3852 ASSERT_EQ(0, ioctx.operate("foo", &op));
3853 }
3854
3855 // verify the object is present in the cache tier
3856 {
3857 NObjectIterator it = cache_ioctx.nobjects_begin();
3858 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
3859 ASSERT_TRUE(it->get_oid() == string("foo"));
3860 ++it;
3861 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
3862 }
3863
3864 // verify the object is NOT present in the base tier
3865 {
3866 NObjectIterator it = ioctx.nobjects_begin();
3867 ASSERT_TRUE(it == ioctx.nobjects_end());
3868 }
3869
3870 // set omap
3871 {
3872 ObjectWriteOperation op;
3873 std::map<std::string, bufferlist> omap;
3874 omap["somekey"] = bufferlist();
3875 op.omap_set(omap);
3876 librados::AioCompletion *completion = cluster.aio_create_completion();
3877 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
3878 completion->wait_for_safe();
3879 ASSERT_EQ(0, completion->get_return_value());
3880 completion->release();
3881 }
3882
3883 // flush
3884 {
3885 ObjectReadOperation op;
3886 op.cache_flush();
3887 librados::AioCompletion *completion = cluster.aio_create_completion();
3888 ASSERT_EQ(0, cache_ioctx.aio_operate(
3889 "foo", completion, &op,
3890 librados::OPERATION_IGNORE_OVERLAY, NULL));
3891 completion->wait_for_safe();
3892 ASSERT_NE(0, completion->get_return_value());
3893 completion->release();
3894 }
3895
3896 // get omap
3897 {
3898 ObjectReadOperation op;
3899 bufferlist bl;
3900 int prval = 0;
3901 std::set<std::string> keys;
3902 keys.insert("somekey");
3903 std::map<std::string, bufferlist> map;
3904
3905 op.omap_get_vals_by_keys(keys, &map, &prval);
3906 librados::AioCompletion *completion = cluster.aio_create_completion();
3907 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op, &bl));
3908 sleep(5);
3909 bool completed = completion->is_complete();
3910 if( !completed ) {
3911 cache_ioctx.aio_cancel(completion);
3912 std::cerr << "Most probably test case will hang here, please reset manually" << std::endl;
3913 ASSERT_TRUE(completed); //in fact we are locked forever at test case shutdown unless fix for http://tracker.ceph.com/issues/14511 is applied. Seems there is no workaround for that
3914 }
3915 completion->release();
3916 }
3917 // verify still not in base tier
3918 {
3919 ASSERT_TRUE(ioctx.nobjects_begin() == ioctx.nobjects_end());
3920 }
3921 // erase it
3922 {
3923 ObjectWriteOperation op;
3924 op.remove();
3925 ASSERT_EQ(0, ioctx.operate("foo", &op));
3926 }
3927 // flush whiteout
3928 {
3929 ObjectReadOperation op;
3930 op.cache_flush();
3931 librados::AioCompletion *completion = cluster.aio_create_completion();
3932 ASSERT_EQ(0, cache_ioctx.aio_operate(
3933 "foo", completion, &op,
3934 librados::OPERATION_IGNORE_OVERLAY, NULL));
3935 completion->wait_for_safe();
3936 ASSERT_EQ(0, completion->get_return_value());
3937 completion->release();
3938 }
3939 // evict
3940 {
3941 ObjectReadOperation op;
3942 op.cache_evict();
3943 librados::AioCompletion *completion = cluster.aio_create_completion();
3944 ASSERT_EQ(0, cache_ioctx.aio_operate(
3945 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
3946 completion->wait_for_safe();
3947 ASSERT_EQ(0, completion->get_return_value());
3948 completion->release();
3949 }
3950
3951 // verify no longer in cache tier
3952 {
3953 NObjectIterator it = cache_ioctx.nobjects_begin();
3954 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
3955 }
3956 // or base tier
3957 {
3958 NObjectIterator it = ioctx.nobjects_begin();
3959 ASSERT_TRUE(it == ioctx.nobjects_end());
3960 }
3961}
3962
3963TEST_F(LibRadosTwoPoolsECPP, Flush) {
3964 // configure cache
3965 bufferlist inbl;
3966 ASSERT_EQ(0, cluster.mon_command(
3967 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
3968 "\", \"tierpool\": \"" + cache_pool_name +
3969 "\", \"force_nonempty\": \"--force-nonempty\" }",
3970 inbl, NULL, NULL));
3971 ASSERT_EQ(0, cluster.mon_command(
3972 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
3973 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
3974 inbl, NULL, NULL));
3975 ASSERT_EQ(0, cluster.mon_command(
3976 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
3977 "\", \"mode\": \"writeback\"}",
3978 inbl, NULL, NULL));
3979
3980 // wait for maps to settle
3981 cluster.wait_for_latest_osdmap();
3982
3983 uint64_t user_version = 0;
3984
3985 // create object
3986 {
3987 bufferlist bl;
3988 bl.append("hi there");
3989 ObjectWriteOperation op;
3990 op.write_full(bl);
3991 ASSERT_EQ(0, ioctx.operate("foo", &op));
3992 }
3993
3994 // verify the object is present in the cache tier
3995 {
3996 NObjectIterator it = cache_ioctx.nobjects_begin();
3997 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
3998 ASSERT_TRUE(it->get_oid() == string("foo"));
3999 ++it;
4000 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
4001 }
4002
4003 // verify the object is NOT present in the base tier
4004 {
4005 NObjectIterator it = ioctx.nobjects_begin();
4006 ASSERT_TRUE(it == ioctx.nobjects_end());
4007 }
4008
4009 // verify dirty
4010 {
4011 bool dirty = false;
4012 int r = -1;
4013 ObjectReadOperation op;
4014 op.is_dirty(&dirty, &r);
4015 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
4016 ASSERT_TRUE(dirty);
4017 ASSERT_EQ(0, r);
4018 user_version = cache_ioctx.get_last_version();
4019 }
4020
4021 // pin
4022 {
4023 ObjectWriteOperation op;
4024 op.cache_pin();
4025 librados::AioCompletion *completion = cluster.aio_create_completion();
4026 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
4027 completion->wait_for_safe();
4028 ASSERT_EQ(0, completion->get_return_value());
4029 completion->release();
4030 }
4031
4032 // flush the pinned object with -EPERM
4033 {
4034 ObjectReadOperation op;
4035 op.cache_try_flush();
4036 librados::AioCompletion *completion = cluster.aio_create_completion();
4037 ASSERT_EQ(0, cache_ioctx.aio_operate(
4038 "foo", completion, &op,
4039 librados::OPERATION_IGNORE_OVERLAY |
4040 librados::OPERATION_SKIPRWLOCKS, NULL));
4041 completion->wait_for_safe();
4042 ASSERT_EQ(-EPERM, completion->get_return_value());
4043 completion->release();
4044 }
4045
4046 // unpin
4047 {
4048 ObjectWriteOperation op;
4049 op.cache_unpin();
4050 librados::AioCompletion *completion = cluster.aio_create_completion();
4051 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
4052 completion->wait_for_safe();
4053 ASSERT_EQ(0, completion->get_return_value());
4054 completion->release();
4055 }
4056
4057 // flush
4058 {
4059 ObjectReadOperation op;
4060 op.cache_flush();
4061 librados::AioCompletion *completion = cluster.aio_create_completion();
4062 ASSERT_EQ(0, cache_ioctx.aio_operate(
4063 "foo", completion, &op,
4064 librados::OPERATION_IGNORE_OVERLAY, NULL));
4065 completion->wait_for_safe();
4066 ASSERT_EQ(0, completion->get_return_value());
4067 completion->release();
4068 }
4069
4070 // verify clean
4071 {
4072 bool dirty = false;
4073 int r = -1;
4074 ObjectReadOperation op;
4075 op.is_dirty(&dirty, &r);
4076 ASSERT_EQ(0, cache_ioctx.operate("foo", &op, NULL));
4077 ASSERT_FALSE(dirty);
4078 ASSERT_EQ(0, r);
4079 }
4080
4081 // verify in base tier
4082 {
4083 NObjectIterator it = ioctx.nobjects_begin();
4084 ASSERT_TRUE(it != ioctx.nobjects_end());
4085 ASSERT_TRUE(it->get_oid() == string("foo"));
4086 ++it;
4087 ASSERT_TRUE(it == ioctx.nobjects_end());
4088 }
4089
4090 // evict it
4091 {
4092 ObjectReadOperation op;
4093 op.cache_evict();
4094 librados::AioCompletion *completion = cluster.aio_create_completion();
4095 ASSERT_EQ(0, cache_ioctx.aio_operate(
4096 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
4097 completion->wait_for_safe();
4098 ASSERT_EQ(0, completion->get_return_value());
4099 completion->release();
4100 }
4101
4102 // verify no longer in cache tier
4103 {
4104 NObjectIterator it = cache_ioctx.nobjects_begin();
4105 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
4106 }
4107
4108 // read it again and verify the version is consistent
4109 {
4110 bufferlist bl;
4111 ASSERT_EQ(1, cache_ioctx.read("foo", bl, 1, 0));
4112 ASSERT_EQ(user_version, cache_ioctx.get_last_version());
4113 }
4114
4115 // erase it
4116 {
4117 ObjectWriteOperation op;
4118 op.remove();
4119 ASSERT_EQ(0, ioctx.operate("foo", &op));
4120 }
4121
4122 // flush whiteout
4123 {
4124 ObjectReadOperation op;
4125 op.cache_flush();
4126 librados::AioCompletion *completion = cluster.aio_create_completion();
4127 ASSERT_EQ(0, cache_ioctx.aio_operate(
4128 "foo", completion, &op,
4129 librados::OPERATION_IGNORE_OVERLAY, NULL));
4130 completion->wait_for_safe();
4131 ASSERT_EQ(0, completion->get_return_value());
4132 completion->release();
4133 }
4134
4135 // evict
4136 {
4137 ObjectReadOperation op;
4138 op.cache_evict();
4139 librados::AioCompletion *completion = cluster.aio_create_completion();
4140 ASSERT_EQ(0, cache_ioctx.aio_operate(
4141 "foo", completion, &op, librados::OPERATION_IGNORE_CACHE, NULL));
4142 completion->wait_for_safe();
4143 ASSERT_EQ(0, completion->get_return_value());
4144 completion->release();
4145 }
4146
4147 // verify no longer in cache tier
4148 {
4149 NObjectIterator it = cache_ioctx.nobjects_begin();
4150 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
4151 }
4152 // or base tier
4153 {
4154 NObjectIterator it = ioctx.nobjects_begin();
4155 ASSERT_TRUE(it == ioctx.nobjects_end());
4156 }
4157}
4158
4159TEST_F(LibRadosTwoPoolsECPP, FlushSnap) {
4160 // configure cache
4161 bufferlist inbl;
4162 ASSERT_EQ(0, cluster.mon_command(
4163 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
4164 "\", \"tierpool\": \"" + cache_pool_name +
4165 "\", \"force_nonempty\": \"--force-nonempty\" }",
4166 inbl, NULL, NULL));
4167 ASSERT_EQ(0, cluster.mon_command(
4168 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
4169 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
4170 inbl, NULL, NULL));
4171 ASSERT_EQ(0, cluster.mon_command(
4172 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
4173 "\", \"mode\": \"writeback\"}",
4174 inbl, NULL, NULL));
4175
4176 // wait for maps to settle
4177 cluster.wait_for_latest_osdmap();
4178
4179 // create object
4180 {
4181 bufferlist bl;
4182 bl.append("a");
4183 ObjectWriteOperation op;
4184 op.write_full(bl);
4185 ASSERT_EQ(0, ioctx.operate("foo", &op));
4186 }
4187
4188 // create a snapshot, clone
4189 vector<uint64_t> my_snaps(1);
4190 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
4191 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4192 my_snaps));
4193 {
4194 bufferlist bl;
4195 bl.append("b");
4196 ObjectWriteOperation op;
4197 op.write_full(bl);
4198 ASSERT_EQ(0, ioctx.operate("foo", &op));
4199 }
4200
4201 // and another
4202 my_snaps.resize(2);
4203 my_snaps[1] = my_snaps[0];
4204 ASSERT_EQ(0, ioctx.selfmanaged_snap_create(&my_snaps[0]));
4205 ASSERT_EQ(0, ioctx.selfmanaged_snap_set_write_ctx(my_snaps[0],
4206 my_snaps));
4207 {
4208 bufferlist bl;
4209 bl.append("c");
4210 ObjectWriteOperation op;
4211 op.write_full(bl);
4212 ASSERT_EQ(0, ioctx.operate("foo", &op));
4213 }
4214
4215 // verify the object is present in the cache tier
4216 {
4217 NObjectIterator it = cache_ioctx.nobjects_begin();
4218 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
4219 ASSERT_TRUE(it->get_oid() == string("foo"));
4220 ++it;
4221 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
4222 }
4223
4224 // verify the object is NOT present in the base tier
4225 {
4226 NObjectIterator it = ioctx.nobjects_begin();
4227 ASSERT_TRUE(it == ioctx.nobjects_end());
4228 }
4229
4230 // flush on head (should fail)
4231 ioctx.snap_set_read(librados::SNAP_HEAD);
4232 {
4233 ObjectReadOperation op;
4234 op.cache_flush();
4235 librados::AioCompletion *completion = cluster.aio_create_completion();
4236 ASSERT_EQ(0, ioctx.aio_operate(
4237 "foo", completion, &op,
4238 librados::OPERATION_IGNORE_CACHE, NULL));
4239 completion->wait_for_safe();
4240 ASSERT_EQ(-EBUSY, completion->get_return_value());
4241 completion->release();
4242 }
4243 // flush on recent snap (should fail)
4244 ioctx.snap_set_read(my_snaps[0]);
4245 {
4246 ObjectReadOperation op;
4247 op.cache_flush();
4248 librados::AioCompletion *completion = cluster.aio_create_completion();
4249 ASSERT_EQ(0, ioctx.aio_operate(
4250 "foo", completion, &op,
4251 librados::OPERATION_IGNORE_CACHE, NULL));
4252 completion->wait_for_safe();
4253 ASSERT_EQ(-EBUSY, completion->get_return_value());
4254 completion->release();
4255 }
4256 // flush on oldest snap
4257 ioctx.snap_set_read(my_snaps[1]);
4258 {
4259 ObjectReadOperation op;
4260 op.cache_flush();
4261 librados::AioCompletion *completion = cluster.aio_create_completion();
4262 ASSERT_EQ(0, ioctx.aio_operate(
4263 "foo", completion, &op,
4264 librados::OPERATION_IGNORE_CACHE, NULL));
4265 completion->wait_for_safe();
4266 ASSERT_EQ(0, completion->get_return_value());
4267 completion->release();
4268 }
4269 // flush on next oldest snap
4270 ioctx.snap_set_read(my_snaps[0]);
4271 {
4272 ObjectReadOperation op;
4273 op.cache_flush();
4274 librados::AioCompletion *completion = cluster.aio_create_completion();
4275 ASSERT_EQ(0, ioctx.aio_operate(
4276 "foo", completion, &op,
4277 librados::OPERATION_IGNORE_CACHE, NULL));
4278 completion->wait_for_safe();
4279 ASSERT_EQ(0, completion->get_return_value());
4280 completion->release();
4281 }
4282 // flush on head
4283 ioctx.snap_set_read(librados::SNAP_HEAD);
4284 {
4285 ObjectReadOperation op;
4286 op.cache_flush();
4287 librados::AioCompletion *completion = cluster.aio_create_completion();
4288 ASSERT_EQ(0, ioctx.aio_operate(
4289 "foo", completion, &op,
4290 librados::OPERATION_IGNORE_CACHE, NULL));
4291 completion->wait_for_safe();
4292 ASSERT_EQ(0, completion->get_return_value());
4293 completion->release();
4294 }
4295
4296 // verify i can read the snaps from the cache pool
4297 ioctx.snap_set_read(librados::SNAP_HEAD);
4298 {
4299 bufferlist bl;
4300 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
4301 ASSERT_EQ('c', bl[0]);
4302 }
4303 ioctx.snap_set_read(my_snaps[0]);
4304 {
4305 bufferlist bl;
4306 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
4307 ASSERT_EQ('b', bl[0]);
4308 }
4309 ioctx.snap_set_read(my_snaps[1]);
4310 {
4311 bufferlist bl;
4312 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
4313 ASSERT_EQ('a', bl[0]);
4314 }
4315
4316 // tear down tiers
4317 ASSERT_EQ(0, cluster.mon_command(
4318 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
4319 "\"}",
4320 inbl, NULL, NULL));
4321
4322 // wait for maps to settle
4323 cluster.wait_for_latest_osdmap();
4324
4325 // verify i can read the snaps from the base pool
4326 ioctx.snap_set_read(librados::SNAP_HEAD);
4327 {
4328 bufferlist bl;
4329 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
4330 ASSERT_EQ('c', bl[0]);
4331 }
4332 ioctx.snap_set_read(my_snaps[0]);
4333 {
4334 bufferlist bl;
4335 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
4336 ASSERT_EQ('b', bl[0]);
4337 }
4338 ioctx.snap_set_read(my_snaps[1]);
4339 {
4340 bufferlist bl;
4341 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
4342 ASSERT_EQ('a', bl[0]);
4343 }
4344
4345 ASSERT_EQ(0, cluster.mon_command(
4346 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
4347 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
4348 inbl, NULL, NULL));
4349 cluster.wait_for_latest_osdmap();
4350
4351 // cleanup
4352 ioctx.selfmanaged_snap_remove(my_snaps[0]);
4353}
4354
4355TEST_F(LibRadosTierECPP, FlushWriteRaces) {
4356 Rados cluster;
4357 std::string pool_name = get_temp_pool_name();
4358 std::string cache_pool_name = pool_name + "-cache";
4359 ASSERT_EQ("", create_one_pool_pp(pool_name, cluster));
4360 ASSERT_EQ(0, cluster.pool_create(cache_pool_name.c_str()));
4361 IoCtx cache_ioctx;
4362 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
4363 IoCtx ioctx;
4364 ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), ioctx));
4365
4366 // configure cache
4367 bufferlist inbl;
4368 ASSERT_EQ(0, cluster.mon_command(
4369 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
4370 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
4371 inbl, NULL, NULL));
4372 ASSERT_EQ(0, cluster.mon_command(
4373 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
4374 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
4375 inbl, NULL, NULL));
4376 ASSERT_EQ(0, cluster.mon_command(
4377 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
4378 "\", \"mode\": \"writeback\"}",
4379 inbl, NULL, NULL));
4380
4381 // wait for maps to settle
4382 cluster.wait_for_latest_osdmap();
4383
4384 // create/dirty object
4385 bufferlist bl;
4386 bl.append("hi there");
4387 {
4388 ObjectWriteOperation op;
4389 op.write_full(bl);
4390 ASSERT_EQ(0, ioctx.operate("foo", &op));
4391 }
4392
4393 // flush + write
4394 {
4395 ObjectReadOperation op;
4396 op.cache_flush();
4397 librados::AioCompletion *completion = cluster.aio_create_completion();
4398 ASSERT_EQ(0, cache_ioctx.aio_operate(
4399 "foo", completion, &op,
4400 librados::OPERATION_IGNORE_OVERLAY, NULL));
4401
4402 ObjectWriteOperation op2;
4403 op2.write_full(bl);
4404 librados::AioCompletion *completion2 = cluster.aio_create_completion();
4405 ASSERT_EQ(0, ioctx.aio_operate(
4406 "foo", completion2, &op2, 0));
4407
4408 completion->wait_for_safe();
4409 completion2->wait_for_safe();
4410 ASSERT_EQ(0, completion->get_return_value());
4411 ASSERT_EQ(0, completion2->get_return_value());
4412 completion->release();
4413 completion2->release();
4414 }
4415
4416 int tries = 1000;
4417 do {
4418 // create/dirty object
4419 {
4420 bufferlist bl;
4421 bl.append("hi there");
4422 ObjectWriteOperation op;
4423 op.write_full(bl);
4424 ASSERT_EQ(0, ioctx.operate("foo", &op));
4425 }
4426
4427 // try-flush + write
4428 {
4429 ObjectReadOperation op;
4430 op.cache_try_flush();
4431 librados::AioCompletion *completion = cluster.aio_create_completion();
4432 ASSERT_EQ(0, cache_ioctx.aio_operate(
4433 "foo", completion, &op,
4434 librados::OPERATION_IGNORE_OVERLAY |
4435 librados::OPERATION_SKIPRWLOCKS, NULL));
4436
4437 ObjectWriteOperation op2;
4438 op2.write_full(bl);
4439 librados::AioCompletion *completion2 = cluster.aio_create_completion();
4440 ASSERT_EQ(0, ioctx.aio_operate("foo", completion2, &op2, 0));
4441
4442 completion->wait_for_safe();
4443 completion2->wait_for_safe();
4444 int r = completion->get_return_value();
4445 ASSERT_TRUE(r == -EBUSY || r == 0);
4446 ASSERT_EQ(0, completion2->get_return_value());
4447 completion->release();
4448 completion2->release();
4449 if (r == -EBUSY)
4450 break;
4451 cout << "didn't get EBUSY, trying again" << std::endl;
4452 }
4453 ASSERT_TRUE(--tries);
4454 } while (true);
4455
4456 // tear down tiers
4457 ASSERT_EQ(0, cluster.mon_command(
4458 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
4459 "\"}",
4460 inbl, NULL, NULL));
4461 ASSERT_EQ(0, cluster.mon_command(
4462 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
4463 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
4464 inbl, NULL, NULL));
4465
4466 // wait for maps to settle before next test
4467 cluster.wait_for_latest_osdmap();
4468
4469 ASSERT_EQ(0, cluster.pool_delete(cache_pool_name.c_str()));
4470 ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
4471}
4472
4473TEST_F(LibRadosTwoPoolsECPP, FlushTryFlushRaces) {
4474 // configure cache
4475 bufferlist inbl;
4476 ASSERT_EQ(0, cluster.mon_command(
4477 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
4478 "\", \"tierpool\": \"" + cache_pool_name +
4479 "\", \"force_nonempty\": \"--force-nonempty\" }",
4480 inbl, NULL, NULL));
4481 ASSERT_EQ(0, cluster.mon_command(
4482 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
4483 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
4484 inbl, NULL, NULL));
4485 ASSERT_EQ(0, cluster.mon_command(
4486 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
4487 "\", \"mode\": \"writeback\"}",
4488 inbl, NULL, NULL));
4489
4490 // wait for maps to settle
4491 cluster.wait_for_latest_osdmap();
4492
4493 // create/dirty object
4494 {
4495 bufferlist bl;
4496 bl.append("hi there");
4497 ObjectWriteOperation op;
4498 op.write_full(bl);
4499 ASSERT_EQ(0, ioctx.operate("foo", &op));
4500 }
4501
4502 // flush + flush
4503 {
4504 ObjectReadOperation op;
4505 op.cache_flush();
4506 librados::AioCompletion *completion = cluster.aio_create_completion();
4507 ASSERT_EQ(0, cache_ioctx.aio_operate(
4508 "foo", completion, &op,
4509 librados::OPERATION_IGNORE_OVERLAY, NULL));
4510
4511 ObjectReadOperation op2;
4512 op2.cache_flush();
4513 librados::AioCompletion *completion2 = cluster.aio_create_completion();
4514 ASSERT_EQ(0, cache_ioctx.aio_operate(
4515 "foo", completion2, &op2,
4516 librados::OPERATION_IGNORE_OVERLAY, NULL));
4517
4518 completion->wait_for_safe();
4519 completion2->wait_for_safe();
4520 ASSERT_EQ(0, completion->get_return_value());
4521 ASSERT_EQ(0, completion2->get_return_value());
4522 completion->release();
4523 completion2->release();
4524 }
4525
4526 // create/dirty object
4527 {
4528 bufferlist bl;
4529 bl.append("hi there");
4530 ObjectWriteOperation op;
4531 op.write_full(bl);
4532 ASSERT_EQ(0, ioctx.operate("foo", &op));
4533 }
4534
4535 // flush + try-flush
4536 {
4537 ObjectReadOperation op;
4538 op.cache_flush();
4539 librados::AioCompletion *completion = cluster.aio_create_completion();
4540 ASSERT_EQ(0, cache_ioctx.aio_operate(
4541 "foo", completion, &op,
4542 librados::OPERATION_IGNORE_OVERLAY, NULL));
4543
4544 ObjectReadOperation op2;
4545 op2.cache_try_flush();
4546 librados::AioCompletion *completion2 = cluster.aio_create_completion();
4547 ASSERT_EQ(0, cache_ioctx.aio_operate(
4548 "foo", completion2, &op2,
4549 librados::OPERATION_IGNORE_OVERLAY |
4550 librados::OPERATION_SKIPRWLOCKS, NULL));
4551
4552 completion->wait_for_safe();
4553 completion2->wait_for_safe();
4554 ASSERT_EQ(0, completion->get_return_value());
4555 ASSERT_EQ(0, completion2->get_return_value());
4556 completion->release();
4557 completion2->release();
4558 }
4559
4560 // create/dirty object
4561 int tries = 1000;
4562 do {
4563 {
4564 bufferlist bl;
4565 bl.append("hi there");
4566 ObjectWriteOperation op;
4567 op.write_full(bl);
4568 ASSERT_EQ(0, ioctx.operate("foo", &op));
4569 }
4570
4571 // try-flush + flush
4572 // (flush will not piggyback on try-flush)
4573 {
4574 ObjectReadOperation op;
4575 op.cache_try_flush();
4576 librados::AioCompletion *completion = cluster.aio_create_completion();
4577 ASSERT_EQ(0, cache_ioctx.aio_operate(
4578 "foo", completion, &op,
4579 librados::OPERATION_IGNORE_OVERLAY |
4580 librados::OPERATION_SKIPRWLOCKS, NULL));
4581
4582 ObjectReadOperation op2;
4583 op2.cache_flush();
4584 librados::AioCompletion *completion2 = cluster.aio_create_completion();
4585 ASSERT_EQ(0, cache_ioctx.aio_operate(
4586 "foo", completion2, &op2,
4587 librados::OPERATION_IGNORE_OVERLAY, NULL));
4588
4589 completion->wait_for_safe();
4590 completion2->wait_for_safe();
4591 int r = completion->get_return_value();
4592 ASSERT_TRUE(r == -EBUSY || r == 0);
4593 ASSERT_EQ(0, completion2->get_return_value());
4594 completion->release();
4595 completion2->release();
4596 if (r == -EBUSY)
4597 break;
4598 cout << "didn't get EBUSY, trying again" << std::endl;
4599 }
4600 ASSERT_TRUE(--tries);
4601 } while (true);
4602
4603 // create/dirty object
4604 {
4605 bufferlist bl;
4606 bl.append("hi there");
4607 ObjectWriteOperation op;
4608 op.write_full(bl);
4609 ASSERT_EQ(0, ioctx.operate("foo", &op));
4610 }
4611
4612 // try-flush + try-flush
4613 {
4614 ObjectReadOperation op;
4615 op.cache_try_flush();
4616 librados::AioCompletion *completion = cluster.aio_create_completion();
4617 ASSERT_EQ(0, cache_ioctx.aio_operate(
4618 "foo", completion, &op,
4619 librados::OPERATION_IGNORE_OVERLAY |
4620 librados::OPERATION_SKIPRWLOCKS, NULL));
4621
4622 ObjectReadOperation op2;
4623 op2.cache_try_flush();
4624 librados::AioCompletion *completion2 = cluster.aio_create_completion();
4625 ASSERT_EQ(0, cache_ioctx.aio_operate(
4626 "foo", completion2, &op2,
4627 librados::OPERATION_IGNORE_OVERLAY |
4628 librados::OPERATION_SKIPRWLOCKS, NULL));
4629
4630 completion->wait_for_safe();
4631 completion2->wait_for_safe();
4632 ASSERT_EQ(0, completion->get_return_value());
4633 ASSERT_EQ(0, completion2->get_return_value());
4634 completion->release();
4635 completion2->release();
4636 }
4637}
4638
4639TEST_F(LibRadosTwoPoolsECPP, TryFlushReadRace) {
4640 // configure cache
4641 bufferlist inbl;
4642 ASSERT_EQ(0, cluster.mon_command(
4643 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
4644 "\", \"tierpool\": \"" + cache_pool_name +
4645 "\", \"force_nonempty\": \"--force-nonempty\" }",
4646 inbl, NULL, NULL));
4647 ASSERT_EQ(0, cluster.mon_command(
4648 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
4649 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
4650 inbl, NULL, NULL));
4651 ASSERT_EQ(0, cluster.mon_command(
4652 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
4653 "\", \"mode\": \"writeback\"}",
4654 inbl, NULL, NULL));
4655
4656 // wait for maps to settle
4657 cluster.wait_for_latest_osdmap();
4658
4659 // create/dirty object
4660 {
4661 bufferlist bl;
4662 bl.append("hi there");
4663 bufferptr bp(4000000); // make it big!
4664 bp.zero();
4665 bl.append(bp);
4666 ObjectWriteOperation op;
4667 op.write_full(bl);
4668 ASSERT_EQ(0, ioctx.operate("foo", &op));
4669 }
4670
4671 // start a continuous stream of reads
4672 read_ioctx = &ioctx;
4673 test_lock.Lock();
4674 for (int i = 0; i < max_reads; ++i) {
4675 start_flush_read();
4676 num_reads++;
4677 }
4678 test_lock.Unlock();
4679
4680 // try-flush
4681 ObjectReadOperation op;
4682 op.cache_try_flush();
4683 librados::AioCompletion *completion = cluster.aio_create_completion();
4684 ASSERT_EQ(0, cache_ioctx.aio_operate(
4685 "foo", completion, &op,
4686 librados::OPERATION_IGNORE_OVERLAY |
4687 librados::OPERATION_SKIPRWLOCKS, NULL));
4688
4689 completion->wait_for_safe();
4690 ASSERT_EQ(0, completion->get_return_value());
4691 completion->release();
4692
4693 // stop reads
4694 test_lock.Lock();
4695 max_reads = 0;
4696 while (num_reads > 0)
4697 cond.Wait(test_lock);
4698 test_lock.Unlock();
4699}
4700
4701TEST_F(LibRadosTierECPP, CallForcesPromote) {
4702 Rados cluster;
4703 std::string pool_name = get_temp_pool_name();
4704 std::string cache_pool_name = pool_name + "-cache";
4705 ASSERT_EQ("", create_one_ec_pool_pp(pool_name, cluster));
4706 ASSERT_EQ(0, cluster.pool_create(cache_pool_name.c_str()));
4707 IoCtx cache_ioctx;
4708 ASSERT_EQ(0, cluster.ioctx_create(cache_pool_name.c_str(), cache_ioctx));
4709 IoCtx ioctx;
4710 ASSERT_EQ(0, cluster.ioctx_create(pool_name.c_str(), ioctx));
4711
4712 // configure cache
4713 bufferlist inbl;
4714 ASSERT_EQ(0, cluster.mon_command(
4715 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
4716 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
4717 inbl, NULL, NULL));
4718 ASSERT_EQ(0, cluster.mon_command(
4719 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
4720 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
4721 inbl, NULL, NULL));
4722 ASSERT_EQ(0, cluster.mon_command(
4723 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
4724 "\", \"mode\": \"writeback\"}",
4725 inbl, NULL, NULL));
4726
4727 // set things up such that the op would normally be proxied
4728 ASSERT_EQ(0, cluster.mon_command(
4729 set_pool_str(cache_pool_name, "hit_set_count", 2),
4730 inbl, NULL, NULL));
4731 ASSERT_EQ(0, cluster.mon_command(
4732 set_pool_str(cache_pool_name, "hit_set_period", 600),
4733 inbl, NULL, NULL));
4734 ASSERT_EQ(0, cluster.mon_command(
4735 set_pool_str(cache_pool_name, "hit_set_type",
4736 "explicit_object"),
4737 inbl, NULL, NULL));
4738 ASSERT_EQ(0, cluster.mon_command(
4739 set_pool_str(cache_pool_name, "min_read_recency_for_promote",
4740 "4"),
4741 inbl, NULL, NULL));
4742
4743 // wait for maps to settle
4744 cluster.wait_for_latest_osdmap();
4745
4746 // create/dirty object
4747 bufferlist bl;
4748 bl.append("hi there");
4749 {
4750 ObjectWriteOperation op;
4751 op.write_full(bl);
4752 ASSERT_EQ(0, ioctx.operate("foo", &op));
4753 }
4754
4755 // flush
4756 {
4757 ObjectReadOperation op;
4758 op.cache_flush();
4759 librados::AioCompletion *completion = cluster.aio_create_completion();
4760 ASSERT_EQ(0, cache_ioctx.aio_operate(
4761 "foo", completion, &op,
4762 librados::OPERATION_IGNORE_OVERLAY, NULL));
4763 completion->wait_for_safe();
4764 ASSERT_EQ(0, completion->get_return_value());
4765 completion->release();
4766 }
4767
4768 // evict
4769 {
4770 ObjectReadOperation op;
4771 op.cache_evict();
4772 librados::AioCompletion *completion = cluster.aio_create_completion();
4773 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op,
4774 librados::OPERATION_IGNORE_CACHE,
4775 NULL));
4776 completion->wait_for_safe();
4777 ASSERT_EQ(0, completion->get_return_value());
4778 completion->release();
4779 }
4780
4781 // call
4782 {
4783 ObjectReadOperation op;
4784 bufferlist bl;
4785 op.exec("rbd", "get_id", bl);
4786 bufferlist out;
4787 // should get EIO (not an rbd object), not -EOPNOTSUPP (we didn't promote)
4788 ASSERT_EQ(-5, ioctx.operate("foo", &op, &out));
4789 }
4790
4791 // make sure foo is back in the cache tier
4792 {
4793 NObjectIterator it = cache_ioctx.nobjects_begin();
4794 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
4795 ASSERT_TRUE(it->get_oid() == string("foo"));
4796 ++it;
4797 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
4798 }
4799
4800 // tear down tiers
4801 ASSERT_EQ(0, cluster.mon_command(
4802 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
4803 "\"}",
4804 inbl, NULL, NULL));
4805 ASSERT_EQ(0, cluster.mon_command(
4806 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
4807 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
4808 inbl, NULL, NULL));
4809
4810 // wait for maps to settle before next test
4811 cluster.wait_for_latest_osdmap();
4812
4813 ASSERT_EQ(0, cluster.pool_delete(cache_pool_name.c_str()));
4814 ASSERT_EQ(0, destroy_one_pool_pp(pool_name, cluster));
4815}
4816
4817TEST_F(LibRadosTierECPP, HitSetNone) {
4818 {
4819 list< pair<time_t,time_t> > ls;
4820 AioCompletion *c = librados::Rados::aio_create_completion();
4821 ASSERT_EQ(0, ioctx.hit_set_list(123, c, &ls));
4822 c->wait_for_complete();
4823 ASSERT_EQ(0, c->get_return_value());
4824 ASSERT_TRUE(ls.empty());
4825 c->release();
4826 }
4827 {
4828 bufferlist bl;
4829 AioCompletion *c = librados::Rados::aio_create_completion();
4830 ASSERT_EQ(0, ioctx.hit_set_get(123, c, 12345, &bl));
4831 c->wait_for_complete();
4832 ASSERT_EQ(-ENOENT, c->get_return_value());
4833 c->release();
4834 }
4835}
4836
4837TEST_F(LibRadosTwoPoolsECPP, HitSetRead) {
4838 // make it a tier
4839 bufferlist inbl;
4840 ASSERT_EQ(0, cluster.mon_command(
4841 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
4842 "\", \"tierpool\": \"" + cache_pool_name +
4843 "\", \"force_nonempty\": \"--force-nonempty\" }",
4844 inbl, NULL, NULL));
4845
4846 // enable hitset tracking for this pool
4847 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_count", 2),
4848 inbl, NULL, NULL));
4849 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_period", 600),
4850 inbl, NULL, NULL));
4851 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_type",
4852 "explicit_object"),
4853 inbl, NULL, NULL));
4854
4855 // wait for maps to settle
4856 cluster.wait_for_latest_osdmap();
4857
4858 cache_ioctx.set_namespace("");
4859
4860 // keep reading until we see our object appear in the HitSet
4861 utime_t start = ceph_clock_now();
4862 utime_t hard_stop = start + utime_t(600, 0);
4863
4864 while (true) {
4865 utime_t now = ceph_clock_now();
4866 ASSERT_TRUE(now < hard_stop);
4867
4868 string name = "foo";
4869 uint32_t hash;
4870 ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(name, &hash));
4871 hobject_t oid(sobject_t(name, CEPH_NOSNAP), "", hash,
4872 cluster.pool_lookup(cache_pool_name.c_str()), "");
4873
4874 bufferlist bl;
4875 ASSERT_EQ(-ENOENT, cache_ioctx.read("foo", bl, 1, 0));
4876
4877 bufferlist hbl;
4878 AioCompletion *c = librados::Rados::aio_create_completion();
4879 ASSERT_EQ(0, cache_ioctx.hit_set_get(hash, c, now.sec(), &hbl));
4880 c->wait_for_complete();
4881 c->release();
4882
4883 if (hbl.length()) {
4884 bufferlist::iterator p = hbl.begin();
4885 HitSet hs;
4886 ::decode(hs, p);
4887 if (hs.contains(oid)) {
4888 cout << "ok, hit_set contains " << oid << std::endl;
4889 break;
4890 }
4891 cout << "hmm, not in HitSet yet" << std::endl;
4892 } else {
4893 cout << "hmm, no HitSet yet" << std::endl;
4894 }
4895
4896 sleep(1);
4897 }
4898}
4899
4900// disable this test until hitset-get reliably works on EC pools
4901#if 0
4902TEST_F(LibRadosTierECPP, HitSetWrite) {
4903 int num_pg = _get_pg_num(cluster, pool_name);
4904 assert(num_pg > 0);
4905
4906 // enable hitset tracking for this pool
4907 bufferlist inbl;
4908 ASSERT_EQ(0, cluster.mon_command(set_pool_str(pool_name, "hit_set_count", 8),
4909 inbl, NULL, NULL));
4910 ASSERT_EQ(0, cluster.mon_command(set_pool_str(pool_name, "hit_set_period", 600),
4911 inbl, NULL, NULL));
4912 ASSERT_EQ(0, cluster.mon_command(set_pool_str(pool_name, "hit_set_type",
4913 "explicit_hash"),
4914 inbl, NULL, NULL));
4915
4916 // wait for maps to settle
4917 cluster.wait_for_latest_osdmap();
4918
4919 ioctx.set_namespace("");
4920
4921 // do a bunch of writes
4922 for (int i=0; i<1000; ++i) {
4923 bufferlist bl;
4924 bl.append("a");
4925 ASSERT_EQ(0, ioctx.write(stringify(i), bl, 1, 0));
4926 }
4927
4928 // get HitSets
4929 std::map<int,HitSet> hitsets;
4930 for (int i=0; i<num_pg; ++i) {
4931 list< pair<time_t,time_t> > ls;
4932 AioCompletion *c = librados::Rados::aio_create_completion();
4933 ASSERT_EQ(0, ioctx.hit_set_list(i, c, &ls));
4934 c->wait_for_complete();
4935 c->release();
4936 std::cout << "pg " << i << " ls " << ls << std::endl;
4937 ASSERT_FALSE(ls.empty());
4938
4939 // get the latest
4940 c = librados::Rados::aio_create_completion();
4941 bufferlist bl;
4942 ASSERT_EQ(0, ioctx.hit_set_get(i, c, ls.back().first, &bl));
4943 c->wait_for_complete();
4944 c->release();
4945
4946 //std::cout << "bl len is " << bl.length() << "\n";
4947 //bl.hexdump(std::cout);
4948 //std::cout << std::endl;
4949
4950 bufferlist::iterator p = bl.begin();
4951 ::decode(hitsets[i], p);
4952
4953 // cope with racing splits by refreshing pg_num
4954 if (i == num_pg - 1)
4955 num_pg = _get_pg_num(cluster, pool_name);
4956 }
4957
4958 for (int i=0; i<1000; ++i) {
4959 string n = stringify(i);
4960 uint32_t hash = ioctx.get_object_hash_position(n);
4961 hobject_t oid(sobject_t(n, CEPH_NOSNAP), "", hash,
4962 cluster.pool_lookup(pool_name.c_str()), "");
4963 std::cout << "checking for " << oid << std::endl;
4964 bool found = false;
4965 for (int p=0; p<num_pg; ++p) {
4966 if (hitsets[p].contains(oid)) {
4967 found = true;
4968 break;
4969 }
4970 }
4971 ASSERT_TRUE(found);
4972 }
4973}
4974#endif
4975
4976TEST_F(LibRadosTwoPoolsECPP, HitSetTrim) {
4977 unsigned count = 3;
4978 unsigned period = 3;
4979
4980 // make it a tier
4981 bufferlist inbl;
4982 ASSERT_EQ(0, cluster.mon_command(
4983 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
4984 "\", \"tierpool\": \"" + cache_pool_name +
4985 "\", \"force_nonempty\": \"--force-nonempty\" }",
4986 inbl, NULL, NULL));
4987
4988 // enable hitset tracking for this pool
4989 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_count", count),
4990 inbl, NULL, NULL));
4991 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_period", period),
4992 inbl, NULL, NULL));
4993 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
4994 inbl, NULL, NULL));
4995 ASSERT_EQ(0, cluster.mon_command(set_pool_str(cache_pool_name, "hit_set_fpp", ".01"),
4996 inbl, NULL, NULL));
4997
4998 // wait for maps to settle
4999 cluster.wait_for_latest_osdmap();
5000
5001 cache_ioctx.set_namespace("");
5002
5003 // do a bunch of writes and make sure the hitsets rotate
5004 utime_t start = ceph_clock_now();
5005 utime_t hard_stop = start + utime_t(count * period * 50, 0);
5006
5007 time_t first = 0;
5008 int bsize = alignment;
5009 char *buf = (char *)new char[bsize];
5010 memset(buf, 'f', bsize);
5011
5012 while (true) {
5013 string name = "foo";
5014 uint32_t hash;
5015 ASSERT_EQ(0, cache_ioctx.get_object_hash_position2(name, &hash));
5016 hobject_t oid(sobject_t(name, CEPH_NOSNAP), "", hash, -1, "");
5017
5018 bufferlist bl;
5019 bl.append(buf, bsize);
5020 ASSERT_EQ(0, cache_ioctx.append("foo", bl, bsize));
5021
5022 list<pair<time_t, time_t> > ls;
5023 AioCompletion *c = librados::Rados::aio_create_completion();
5024 ASSERT_EQ(0, cache_ioctx.hit_set_list(hash, c, &ls));
5025 c->wait_for_complete();
5026 c->release();
5027
5028 cout << " got ls " << ls << std::endl;
5029 if (!ls.empty()) {
5030 if (!first) {
5031 first = ls.front().first;
5032 cout << "first is " << first << std::endl;
5033 } else {
5034 if (ls.front().first != first) {
5035 cout << "first now " << ls.front().first << ", trimmed" << std::endl;
5036 break;
5037 }
5038 }
5039 }
5040
5041 utime_t now = ceph_clock_now();
5042 ASSERT_TRUE(now < hard_stop);
5043
5044 sleep(1);
5045 }
5046 delete[] buf;
5047}
5048
5049TEST_F(LibRadosTwoPoolsECPP, PromoteOn2ndRead) {
5050 // create object
5051 for (int i=0; i<20; ++i) {
5052 bufferlist bl;
5053 bl.append("hi there");
5054 ObjectWriteOperation op;
5055 op.write_full(bl);
5056 ASSERT_EQ(0, ioctx.operate("foo" + stringify(i), &op));
5057 }
5058
5059 // configure cache
5060 bufferlist inbl;
5061 ASSERT_EQ(0, cluster.mon_command(
5062 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
5063 "\", \"tierpool\": \"" + cache_pool_name +
5064 "\", \"force_nonempty\": \"--force-nonempty\" }",
5065 inbl, NULL, NULL));
5066 ASSERT_EQ(0, cluster.mon_command(
5067 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
5068 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
5069 inbl, NULL, NULL));
5070 ASSERT_EQ(0, cluster.mon_command(
5071 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
5072 "\", \"mode\": \"writeback\"}",
5073 inbl, NULL, NULL));
5074
5075 // enable hitset tracking for this pool
5076 ASSERT_EQ(0, cluster.mon_command(
5077 set_pool_str(cache_pool_name, "hit_set_count", 2),
5078 inbl, NULL, NULL));
5079 ASSERT_EQ(0, cluster.mon_command(
5080 set_pool_str(cache_pool_name, "hit_set_period", 600),
5081 inbl, NULL, NULL));
5082 ASSERT_EQ(0, cluster.mon_command(
5083 set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
5084 inbl, NULL, NULL));
5085 ASSERT_EQ(0, cluster.mon_command(
5086 set_pool_str(cache_pool_name, "min_read_recency_for_promote", 1),
5087 inbl, NULL, NULL));
5088 ASSERT_EQ(0, cluster.mon_command(
5089 set_pool_str(cache_pool_name, "hit_set_grade_decay_rate", 20),
5090 inbl, NULL, NULL));
5091 ASSERT_EQ(0, cluster.mon_command(
5092 set_pool_str(cache_pool_name, "hit_set_search_last_n", 1),
5093 inbl, NULL, NULL));
5094
5095 // wait for maps to settle
5096 cluster.wait_for_latest_osdmap();
5097
5098 int fake = 0; // set this to non-zero to test spurious promotion,
5099 // e.g. from thrashing
5100 int attempt = 0;
5101 string obj;
5102 while (true) {
5103 // 1st read, don't trigger a promote
5104 obj = "foo" + stringify(attempt);
5105 cout << obj << std::endl;
5106 {
5107 bufferlist bl;
5108 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
5109 if (--fake >= 0) {
5110 sleep(1);
5111 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
5112 sleep(1);
5113 }
5114 }
5115
5116 // verify the object is NOT present in the cache tier
5117 {
5118 bool found = false;
5119 NObjectIterator it = cache_ioctx.nobjects_begin();
5120 while (it != cache_ioctx.nobjects_end()) {
5121 cout << " see " << it->get_oid() << std::endl;
5122 if (it->get_oid() == string(obj.c_str())) {
5123 found = true;
5124 break;
5125 }
5126 ++it;
5127 }
5128 if (!found)
5129 break;
5130 }
5131
5132 ++attempt;
5133 ASSERT_LE(attempt, 20);
5134 cout << "hrm, object is present in cache on attempt " << attempt
5135 << ", retrying" << std::endl;
5136 }
5137
5138 // Read until the object is present in the cache tier
5139 cout << "verifying " << obj << " is eventually promoted" << std::endl;
5140 while (true) {
5141 bufferlist bl;
5142 ASSERT_EQ(1, ioctx.read(obj.c_str(), bl, 1, 0));
5143
5144 bool there = false;
5145 NObjectIterator it = cache_ioctx.nobjects_begin();
5146 while (it != cache_ioctx.nobjects_end()) {
5147 if (it->get_oid() == string(obj.c_str())) {
5148 there = true;
5149 break;
5150 }
5151 ++it;
5152 }
5153 if (there)
5154 break;
5155
5156 sleep(1);
5157 }
5158
5159 // tear down tiers
5160 ASSERT_EQ(0, cluster.mon_command(
5161 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
5162 "\"}",
5163 inbl, NULL, NULL));
5164 ASSERT_EQ(0, cluster.mon_command(
5165 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
5166 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
5167 inbl, NULL, NULL));
5168
5169 // wait for maps to settle before next test
5170 cluster.wait_for_latest_osdmap();
5171}
5172
5173TEST_F(LibRadosTwoPoolsECPP, ProxyRead) {
5174 // create object
5175 {
5176 bufferlist bl;
5177 bl.append("hi there");
5178 ObjectWriteOperation op;
5179 op.write_full(bl);
5180 ASSERT_EQ(0, ioctx.operate("foo", &op));
5181 }
5182
5183 // configure cache
5184 bufferlist inbl;
5185 ASSERT_EQ(0, cluster.mon_command(
5186 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
5187 "\", \"tierpool\": \"" + cache_pool_name +
5188 "\", \"force_nonempty\": \"--force-nonempty\" }",
5189 inbl, NULL, NULL));
5190 ASSERT_EQ(0, cluster.mon_command(
5191 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
5192 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
5193 inbl, NULL, NULL));
5194 ASSERT_EQ(0, cluster.mon_command(
5195 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
5196 "\", \"mode\": \"readproxy\"}",
5197 inbl, NULL, NULL));
5198
5199 // wait for maps to settle
5200 cluster.wait_for_latest_osdmap();
5201
5202 // read and verify the object
5203 {
5204 bufferlist bl;
5205 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
5206 ASSERT_EQ('h', bl[0]);
5207 }
5208
5209 // Verify 10 times the object is NOT present in the cache tier
5210 uint32_t i = 0;
5211 while (i++ < 10) {
5212 NObjectIterator it = cache_ioctx.nobjects_begin();
5213 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
5214 sleep(1);
5215 }
5216
5217 // tear down tiers
5218 ASSERT_EQ(0, cluster.mon_command(
5219 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
5220 "\"}",
5221 inbl, NULL, NULL));
5222 ASSERT_EQ(0, cluster.mon_command(
5223 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
5224 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
5225 inbl, NULL, NULL));
5226
5227 // wait for maps to settle before next test
5228 cluster.wait_for_latest_osdmap();
5229}
5230
5231TEST_F(LibRadosTwoPoolsECPP, CachePin) {
5232 // create object
5233 {
5234 bufferlist bl;
5235 bl.append("hi there");
5236 ObjectWriteOperation op;
5237 op.write_full(bl);
5238 ASSERT_EQ(0, ioctx.operate("foo", &op));
5239 }
5240 {
5241 bufferlist bl;
5242 bl.append("hi there");
5243 ObjectWriteOperation op;
5244 op.write_full(bl);
5245 ASSERT_EQ(0, ioctx.operate("bar", &op));
5246 }
5247 {
5248 bufferlist bl;
5249 bl.append("hi there");
5250 ObjectWriteOperation op;
5251 op.write_full(bl);
5252 ASSERT_EQ(0, ioctx.operate("baz", &op));
5253 }
5254 {
5255 bufferlist bl;
5256 bl.append("hi there");
5257 ObjectWriteOperation op;
5258 op.write_full(bl);
5259 ASSERT_EQ(0, ioctx.operate("bam", &op));
5260 }
5261
5262 // configure cache
5263 bufferlist inbl;
5264 ASSERT_EQ(0, cluster.mon_command(
5265 "{\"prefix\": \"osd tier add\", \"pool\": \"" + pool_name +
5266 "\", \"tierpool\": \"" + cache_pool_name +
5267 "\", \"force_nonempty\": \"--force-nonempty\" }",
5268 inbl, NULL, NULL));
5269 ASSERT_EQ(0, cluster.mon_command(
5270 "{\"prefix\": \"osd tier set-overlay\", \"pool\": \"" + pool_name +
5271 "\", \"overlaypool\": \"" + cache_pool_name + "\"}",
5272 inbl, NULL, NULL));
5273 ASSERT_EQ(0, cluster.mon_command(
5274 "{\"prefix\": \"osd tier cache-mode\", \"pool\": \"" + cache_pool_name +
5275 "\", \"mode\": \"writeback\"}",
5276 inbl, NULL, NULL));
5277
5278 // wait for maps to settle
5279 cluster.wait_for_latest_osdmap();
5280
5281 // read, trigger promote
5282 {
5283 bufferlist bl;
5284 ASSERT_EQ(1, ioctx.read("foo", bl, 1, 0));
5285 ASSERT_EQ(1, ioctx.read("bar", bl, 1, 0));
5286 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
5287 ASSERT_EQ(1, ioctx.read("bam", bl, 1, 0));
5288 }
5289
5290 // verify the objects are present in the cache tier
5291 {
5292 NObjectIterator it = cache_ioctx.nobjects_begin();
5293 ASSERT_TRUE(it != cache_ioctx.nobjects_end());
5294 for (uint32_t i = 0; i < 4; i++) {
5295 ASSERT_TRUE(it->get_oid() == string("foo") ||
5296 it->get_oid() == string("bar") ||
5297 it->get_oid() == string("baz") ||
5298 it->get_oid() == string("bam"));
5299 ++it;
5300 }
5301 ASSERT_TRUE(it == cache_ioctx.nobjects_end());
5302 }
5303
5304 // pin objects
5305 {
5306 ObjectWriteOperation op;
5307 op.cache_pin();
5308 librados::AioCompletion *completion = cluster.aio_create_completion();
5309 ASSERT_EQ(0, cache_ioctx.aio_operate("foo", completion, &op));
5310 completion->wait_for_safe();
5311 ASSERT_EQ(0, completion->get_return_value());
5312 completion->release();
5313 }
5314 {
5315 ObjectWriteOperation op;
5316 op.cache_pin();
5317 librados::AioCompletion *completion = cluster.aio_create_completion();
5318 ASSERT_EQ(0, cache_ioctx.aio_operate("baz", completion, &op));
5319 completion->wait_for_safe();
5320 ASSERT_EQ(0, completion->get_return_value());
5321 completion->release();
5322 }
5323
5324 // enable agent
5325 ASSERT_EQ(0, cluster.mon_command(
5326 set_pool_str(cache_pool_name, "hit_set_count", 2),
5327 inbl, NULL, NULL));
5328 ASSERT_EQ(0, cluster.mon_command(
5329 set_pool_str(cache_pool_name, "hit_set_period", 600),
5330 inbl, NULL, NULL));
5331 ASSERT_EQ(0, cluster.mon_command(
5332 set_pool_str(cache_pool_name, "hit_set_type", "bloom"),
5333 inbl, NULL, NULL));
5334 ASSERT_EQ(0, cluster.mon_command(
5335 set_pool_str(cache_pool_name, "min_read_recency_for_promote", 1),
5336 inbl, NULL, NULL));
5337 ASSERT_EQ(0, cluster.mon_command(
5338 set_pool_str(cache_pool_name, "target_max_objects", 1),
5339 inbl, NULL, NULL));
5340
5341 sleep(10);
5342
5343 // Verify the pinned object 'foo' is not flushed/evicted
5344 uint32_t count = 0;
5345 while (true) {
5346 bufferlist bl;
5347 ASSERT_EQ(1, ioctx.read("baz", bl, 1, 0));
5348
5349 count = 0;
5350 NObjectIterator it = cache_ioctx.nobjects_begin();
5351 while (it != cache_ioctx.nobjects_end()) {
5352 ASSERT_TRUE(it->get_oid() == string("foo") ||
5353 it->get_oid() == string("bar") ||
5354 it->get_oid() == string("baz") ||
5355 it->get_oid() == string("bam"));
5356 ++count;
5357 ++it;
5358 }
5359 if (count == 2) {
5360 ASSERT_TRUE(it->get_oid() == string("foo") ||
5361 it->get_oid() == string("baz"));
5362 break;
5363 }
5364
5365 sleep(1);
5366 }
5367
5368 // tear down tiers
5369 ASSERT_EQ(0, cluster.mon_command(
5370 "{\"prefix\": \"osd tier remove-overlay\", \"pool\": \"" + pool_name +
5371 "\"}",
5372 inbl, NULL, NULL));
5373 ASSERT_EQ(0, cluster.mon_command(
5374 "{\"prefix\": \"osd tier remove\", \"pool\": \"" + pool_name +
5375 "\", \"tierpool\": \"" + cache_pool_name + "\"}",
5376 inbl, NULL, NULL));
5377
5378 // wait for maps to settle before next test
5379 cluster.wait_for_latest_osdmap();
5380}