#include "include/buffer.h"
#include "include/encoding.h"
#include "include/ceph_hash.h"
-#include "include/Spinlock.h"
+#include "include/spinlock.h"
#include "common/ceph_argparse.h"
#include "common/Cycles.h"
#include "common/Cond.h"
-#include "common/Mutex.h"
+#include "common/ceph_mutex.h"
#include "common/Thread.h"
#include "common/Timer.h"
#include "msg/async/Event.h"
#include <atomic>
+using namespace std;
using namespace ceph;
/**
{
int count = 1000000;
std::atomic<int64_t> value = { 11 };
- int total = 0;
+ [[maybe_unused]] int total = 0;
uint64_t start = Cycles::rdtsc();
for (int i = 0; i < count; i++) {
total += value;
double mutex_nonblock()
{
int count = 1000000;
- Mutex m("mutex_nonblock::m");
+ ceph::mutex m = ceph::make_mutex("mutex_nonblock::m");
uint64_t start = Cycles::rdtsc();
for (int i = 0; i < count; i++) {
- m.Lock();
- m.Unlock();
+ m.lock();
+ m.unlock();
}
uint64_t stop = Cycles::rdtsc();
return Cycles::to_seconds(stop - start)/count;
int a = 1, b = 2, c = 3, d = 4;
void encode(bufferlist &bl) const {
ENCODE_START(1, 1, bl);
- ::encode(a, bl);
- ::encode(b, bl);
- ::encode(c, bl);
- ::encode(d, bl);
+ encode(a, bl);
+ encode(b, bl);
+ encode(c, bl);
+ encode(d, bl);
ENCODE_FINISH(bl);
}
- void decode(bufferlist::iterator &bl) {
+ void decode(bufferlist::const_iterator &bl) {
DECODE_START(1, bl);
- ::decode(a, bl);
- ::decode(b, bl);
- ::decode(c, bl);
- ::decode(d, bl);
+ decode(a, bl);
+ decode(b, bl);
+ decode(c, bl);
+ decode(d, bl);
DECODE_FINISH(bl);
}
};
for (int i = 0; i < count; i++) {
bufferlist b;
DummyBlock dummy_block;
- ::encode(dummy_block, b);
- bufferlist::iterator iter = b.begin();
- ::decode(dummy_block, iter);
+ encode(dummy_block, b);
+ auto iter = b.cbegin();
+ decode(dummy_block, iter);
}
uint64_t stop = Cycles::rdtsc();
return Cycles::to_seconds(stop - start)/count;
char copy[10];
uint64_t start = Cycles::rdtsc();
for (int i = 0; i < count; i++) {
- b.copy(2, 6, copy);
+ b.cbegin(2).copy(6, copy);
}
uint64_t stop = Cycles::rdtsc();
return Cycles::to_seconds(stop - start)/count;
for (int i = 0; i < count; i++) {
bufferlist b;
DummyBlock dummy_block;
- ::encode(dummy_block, b);
+ encode(dummy_block, b);
uint64_t start = Cycles::rdtsc();
- ::encode(dummy_block, b);
- ::encode(dummy_block, b);
- ::encode(dummy_block, b);
- ::encode(dummy_block, b);
- ::encode(dummy_block, b);
- ::encode(dummy_block, b);
- ::encode(dummy_block, b);
- ::encode(dummy_block, b);
- ::encode(dummy_block, b);
- ::encode(dummy_block, b);
+ encode(dummy_block, b);
+ encode(dummy_block, b);
+ encode(dummy_block, b);
+ encode(dummy_block, b);
+ encode(dummy_block, b);
+ encode(dummy_block, b);
+ encode(dummy_block, b);
+ encode(dummy_block, b);
+ encode(dummy_block, b);
+ encode(dummy_block, b);
total += Cycles::rdtsc() - start;
}
return Cycles::to_seconds(total)/(count*10);
}
-// Measure the cost of retrieving an object from the beginning of a buffer.
-double buffer_get_contiguous()
-{
- int count = 1000000;
- int value = 11;
- bufferlist b;
- b.append((char*)&value, sizeof(value));
- int sum = 0;
- uint64_t start = Cycles::rdtsc();
- for (int i = 0; i < count; i++) {
- sum += *reinterpret_cast<int*>(b.get_contiguous(0, sizeof(value)));
- }
- uint64_t stop = Cycles::rdtsc();
- return Cycles::to_seconds(stop - start)/count;
-}
-
// Measure the cost of creating an iterator and iterating over 10
// chunks in a buffer.
double buffer_iterator()
int sum = 0;
uint64_t start = Cycles::rdtsc();
for (int i = 0; i < count; i++) {
- bufferlist::iterator it = b.begin();
+ auto it = b.cbegin();
while (!it.end()) {
sum += (static_cast<const char*>(it.get_current_ptr().c_str()))[it.get_remaining()-1];
++it;
// Implements the CondPingPong test.
class CondPingPong {
- Mutex mutex;
- Cond cond;
- int prod;
- int cons;
- const int count;
+ ceph::mutex mutex = ceph::make_mutex("CondPingPong::mutex");
+ ceph::condition_variable cond;
+ int prod = 0;
+ int cons = 0;
+ const int count = 10000;
class Consumer : public Thread {
CondPingPong *p;
} consumer;
public:
- CondPingPong(): mutex("CondPingPong::mutex"), prod(0), cons(0), count(10000), consumer(this) {}
+ CondPingPong(): consumer(this) {}
double run() {
consumer.create("consumer");
}
void produce() {
- Mutex::Locker l(mutex);
+ std::unique_lock l{mutex};
while (cons < count) {
- while (cons < prod)
- cond.Wait(mutex);
+ cond.wait(l, [this] { return cons >= prod; });
++prod;
- cond.Signal();
+ cond.notify_all();
}
}
void consume() {
- Mutex::Locker l(mutex);
+ std::unique_lock l{mutex};
while (cons < count) {
- while (cons == prod)
- cond.Wait(mutex);
+ cond.wait(l, [this] { return cons != prod; });
++cons;
- cond.Signal();
+ cond.notify_all();
}
}
};
}
uint64_t stop = Cycles::rdtsc();
return Cycles::to_seconds(stop - start)/count;
+#elif defined(__aarch64__)
+ int count = 1000000;
+ uint64_t start = Cycles::rdtsc();
+ uint64_t numerator = 0xa5a5a5a555aa55aaUL;
+ uint32_t divisor = 0xaa55aa55U;
+ uint32_t result;
+ for (int i = 0; i < count; i++) {
+ asm volatile("udiv %0, %1, %2" : "=r"(result) :
+ "r"(numerator), "r"(divisor));
+ }
+ uint64_t stop = Cycles::rdtsc();
+ return Cycles::to_seconds(stop - start)/count;
#else
return -1;
#endif
public:
explicit CountEvent(std::atomic<int64_t> *atomic): count(atomic) {}
- void do_request(int id) override {
+ void do_request(uint64_t id) override {
(*count)--;
}
};
{
int count = 1000000;
uint64_t start = Cycles::rdtsc();
- uint64_t total = 0;
+ [[maybe_unused]] uint64_t total = 0;
for (int i = 0; i < count; i++) {
total += Cycles::rdtsc();
}
double perf_cycles_to_seconds()
{
int count = 1000000;
- double total = 0;
+ [[maybe_unused]] double total = 0;
uint64_t cycles = 994261;
uint64_t start = Cycles::rdtsc();
for (int i = 0; i < count; i++) {
double perf_cycles_to_nanoseconds()
{
int count = 1000000;
- uint64_t total = 0;
+ [[maybe_unused]] uint64_t total = 0;
uint64_t cycles = 994261;
uint64_t start = Cycles::rdtsc();
for (int i = 0; i < count; i++) {
for (uint64_t i = 0; i < offset + num_bytes; i += 64)
_mm_prefetch(p + i, _MM_HINT_T0);
}
+#elif defined(__aarch64__)
+static inline void prefetch(const void *object, uint64_t num_bytes)
+{
+ uint64_t offset = reinterpret_cast<uint64_t>(object) & 0x3fUL;
+ const char* ptr = reinterpret_cast<const char*>(object) - offset;
+ for (uint64_t i = 0; i < offset + num_bytes; i += 64, ptr += 64)
+ asm volatile("prfm pldl1keep, %a0\n" : : "p" (ptr));
+}
#endif
// Measure the cost of the prefetch instruction.
double perf_prefetch()
{
-#ifdef HAVE_SSE
+#if defined(HAVE_SSE) || defined(__aarch64__)
uint64_t total_ticks = 0;
int count = 10;
char buf[16 * 64];
- uint64_t start, stop;
for (int i = 0; i < count; i++) {
PerfHelper::flush_cache();
- start = Cycles::rdtsc();
+ uint64_t start = Cycles::rdtsc();
prefetch(&buf[576], 64);
prefetch(&buf[0], 64);
prefetch(&buf[512], 64);
prefetch(&buf[832], 64);
prefetch(&buf[64], 64);
prefetch(&buf[192], 64);
- stop = Cycles::rdtsc();
+ uint64_t stop = Cycles::rdtsc();
total_ticks += stop - start;
}
return Cycles::to_seconds(total_ticks) / count / 16;
}
uint64_t stop = Cycles::rdtsc();
return Cycles::to_seconds(stop - start)/count;
+#elif defined(__aarch64__)
+ int count = 1000000;
+ uint64_t start = Cycles::rdtsc();
+ for (int i = 0; i < count; i++) {
+ asm volatile("dmb ishld" ::: "memory");
+ }
+ uint64_t stop = Cycles::rdtsc();
+ return Cycles::to_seconds(stop - start)/count;
#else
return -1;
#endif
}
uint64_t stop = Cycles::rdtsc();
return Cycles::to_seconds(stop - start)/count;
+#elif defined(__aarch64__)
+ int count = 1000000;
+ uint64_t start = Cycles::rdtsc();
+ for (int i = 0; i < count; i++) {
+ asm volatile("dmb ishst" ::: "memory");
+ }
+ uint64_t stop = Cycles::rdtsc();
+ return Cycles::to_seconds(stop - start)/count;
#else
return -1;
#endif
double test_spinlock()
{
int count = 1000000;
- Spinlock lock;
+ ceph::spinlock lock;
uint64_t start = Cycles::rdtsc();
for (int i = 0; i < count; i++) {
lock.lock();
double perf_timer()
{
int count = 1000000;
- Mutex lock("perf_timer::lock");
+ ceph::mutex lock = ceph::make_mutex("perf_timer::lock");
SafeTimer timer(g_ceph_context, lock);
FakeContext **c = new FakeContext*[count];
for (int i = 0; i < count; i++) {
c[i] = new FakeContext();
}
uint64_t start = Cycles::rdtsc();
- Mutex::Locker l(lock);
+ std::lock_guard l{lock};
for (int i = 0; i < count; i++) {
if (timer.add_event_after(12345, c[i])) {
timer.cancel_event(c[i]);
"copy out 2 small ptrs from buffer"},
{"buffer_encode10", buffer_encode,
"buffer encoding 10 structures onto existing ptr"},
- {"buffer_get_contiguous", buffer_get_contiguous,
- "Buffer::get_contiguous"},
{"buffer_iterator", buffer_iterator,
"iterate over buffer with 5 ptrs"},
{"cond_ping_pong", cond_ping_pong,
int main(int argc, char *argv[])
{
- vector<const char*> args;
- argv_to_vec(argc, (const char **)argv, args);
+ auto args = argv_to_vec(argc, argv);
auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
- CODE_ENVIRONMENT_UTILITY, 0);
+ CODE_ENVIRONMENT_UTILITY,
+ CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
common_init_finish(g_ceph_context);
Cycles::init();