]> git.proxmox.com Git - ceph.git/blobdiff - ceph/src/common/obj_bencher.cc
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / common / obj_bencher.cc
index 4eaeba3471f7c5c4bef0b343796f5de0cf34d8b6..de73e70b15bdb362c5f907b380d7d89bdeae3435 100644 (file)
@@ -22,6 +22,8 @@
 
 const std::string BENCH_LASTRUN_METADATA = "benchmark_last_metadata";
 const std::string BENCH_PREFIX = "benchmark_data";
+const std::string BENCH_OBJ_NAME = BENCH_PREFIX + "_%s_%d_object%d";
+
 static char cached_hostname[30] = {0};
 int cached_pid = 0;
 
@@ -47,14 +49,27 @@ static std::string generate_object_prefix(int pid = 0) {
   return oss.str();
 }
 
-static std::string generate_object_name(int objnum, int pid = 0)
+// this is 8x faster than previous impl based on chained, deduped functions call
+static std::string generate_object_name_fast(int objnum, int pid = 0)
 {
-  std::ostringstream oss;
-  oss << generate_object_prefix(pid) << "_object" << objnum;
-  return oss.str();
+  if (cached_hostname[0] == 0) {
+       gethostname(cached_hostname, sizeof(cached_hostname)-1);
+       cached_hostname[sizeof(cached_hostname)-1] = 0;
+  }
+
+  if (pid)
+       cached_pid = pid;
+  else if (!cached_pid)
+       cached_pid = getpid();
+
+  char name[512];
+  int n = snprintf(&name[0], sizeof(name),  BENCH_OBJ_NAME.c_str(), cached_hostname, cached_pid, objnum);
+  ceph_assert(n > 0 && n < (int)sizeof(name));
+  return std::string(&name[0], (size_t)n);
 }
 
 static void sanitize_object_contents (bench_data *data, size_t length) {
+  // FIPS zeroization audit 20191115: this memset is not security related.
   memset(data->object_contents, 'z', length);
 }
 
@@ -82,22 +97,23 @@ void *ObjBencher::status_printer(void *_bencher) {
   int previous_writes = 0;
   int cycleSinceChange = 0;
   double bandwidth;
-  int iops;
-  utime_t ONE_SECOND;
-  ONE_SECOND.set_from_double(1.0);
-  bencher->lock.Lock();
+  int iops = 0;
+  mono_clock::duration ONE_SECOND = std::chrono::seconds(1);
+  bencher->lock.lock();
   if (formatter)
     formatter->open_array_section("datas");
   while(!data.done) {
-    utime_t cur_time = ceph_clock_now();
+    mono_time cur_time = mono_clock::now();
+    utime_t t = ceph_clock_now();
 
     if (i % 20 == 0 && !formatter) {
       if (i > 0)
-        cur_time.localtime(cout) << " min lat: " << data.min_latency
+        t.localtime(cout)
+          << " min lat: " << data.min_latency
           << " max lat: " << data.max_latency
           << " avg lat: " << data.avg_latency << std::endl;
       //I'm naughty and don't reset the fill
-      bencher->out(cout, cur_time) << setfill(' ')
+      bencher->out(cout, t) << setfill(' ')
           << setw(5) << "sec"
           << setw(8) << "Cur ops"
           << setw(10) << "started"
@@ -121,7 +137,10 @@ void *ObjBencher::status_printer(void *_bencher) {
       if (bandwidth < data.idata.min_bandwidth)
         data.idata.min_bandwidth = bandwidth;
 
-      data.history.bandwidth.push_back(bandwidth);
+      ++data.idata.bandwidth_cycles;
+      double delta = bandwidth - data.idata.avg_bandwidth;
+      data.idata.avg_bandwidth += delta / data.idata.bandwidth_cycles;
+      data.idata.bandwidth_diff_sum += delta * (bandwidth - data.idata.avg_bandwidth);
     }
 
     if (cycleSinceChange)
@@ -136,19 +155,24 @@ void *ObjBencher::status_printer(void *_bencher) {
       if (iops < data.idata.min_iops)
         data.idata.min_iops = iops;
 
-      data.history.iops.push_back(iops);
+      ++data.idata.iops_cycles;
+      double delta = iops - data.idata.avg_iops;
+      data.idata.avg_iops += delta / data.idata.iops_cycles;
+      data.idata.iops_diff_sum += delta * (iops - data.idata.avg_iops);
     }
     
     if (formatter)
       formatter->open_object_section("data");
 
+    // elapsed will be in seconds, by default
+    std::chrono::duration<double> elapsed = cur_time - data.start_time;
     double avg_bandwidth = (double) (data.op_size) * (data.finished)
-      / (double)(cur_time - data.start_time) / (1024*1024);
+      / elapsed.count() / (1024*1024);
     if (previous_writes != data.finished) {
       previous_writes = data.finished;
       cycleSinceChange = 0;
       if (!formatter) {
-        bencher->out(cout, cur_time)
+        bencher->out(cout, t)
          << setfill(' ')
           << setw(5) << i
          << ' ' << setw(7) << data.in_flight
@@ -156,7 +180,7 @@ void *ObjBencher::status_printer(void *_bencher) {
           << ' ' << setw(9) << data.finished
           << ' ' << setw(9) << avg_bandwidth
           << ' ' << setw(9) << bandwidth
-          << ' ' << setw(11) << (double)data.cur_latency
+          << ' ' << setw(11) << (double)data.cur_latency.count()
           << ' ' << setw(11) << data.avg_latency << std::endl;
       } else {
         formatter->dump_format("sec", "%d", i);
@@ -165,13 +189,13 @@ void *ObjBencher::status_printer(void *_bencher) {
         formatter->dump_format("finished", "%d", data.finished);
         formatter->dump_format("avg_bw", "%f", avg_bandwidth);
         formatter->dump_format("cur_bw", "%f", bandwidth);
-        formatter->dump_format("last_lat", "%f", (double)data.cur_latency);
+        formatter->dump_format("last_lat", "%f", (double)data.cur_latency.count());
         formatter->dump_format("avg_lat", "%f", data.avg_latency);
       }
     }
     else {
       if (!formatter) {
-        bencher->out(cout, cur_time)
+        bencher->out(cout, t)
          << setfill(' ')
           << setw(5) << i
          << ' ' << setw(7) << data.in_flight
@@ -202,7 +226,11 @@ void *ObjBencher::status_printer(void *_bencher) {
   }
   if (formatter)
     formatter->close_section(); //datas
-  bencher->lock.Unlock();
+  if (iops < 0) {
+    std::chrono::duration<double> runtime = mono_clock::now() - data.start_time;
+    data.idata.min_iops = data.idata.max_iops = data.finished / runtime.count();
+  }
+  bencher->lock.unlock();
   return NULL;
 }
 
@@ -212,27 +240,31 @@ int ObjBencher::aio_bench(
   uint64_t op_size, uint64_t object_size,
   unsigned max_objects,
   bool cleanup, bool hints,
-  const std::string& run_name, bool no_verify) {
+  const std::string& run_name, bool reuse_bench, bool no_verify) {
 
   if (concurrentios <= 0)
     return -EINVAL;
 
   int num_objects = 0;
   int r = 0;
-  int prevPid = 0;
-  utime_t runtime;
+  int prev_pid = 0;
+  std::chrono::duration<double> timePassed;
 
   // default metadata object is used if user does not specify one
   const std::string run_name_meta = (run_name.empty() ? BENCH_LASTRUN_METADATA : run_name);
 
   //get data from previous write run, if available
-  if (operation != OP_WRITE) {
+  if (operation != OP_WRITE || reuse_bench) {
     uint64_t prev_op_size, prev_object_size;
     r = fetch_bench_metadata(run_name_meta, &prev_op_size, &prev_object_size,
-                            &num_objects, &prevPid);
+                            &num_objects, &prev_pid);
     if (r < 0) {
-      if (r == -ENOENT)
-        cerr << "Must write data before running a read benchmark!" << std::endl;
+      if (r == -ENOENT) {
+        if (reuse_bench)
+          cerr << "Must write data before using reuse_bench for a write benchmark!" << std::endl;
+        else
+          cerr << "Must write data before running a read benchmark!" << std::endl;
+      }
       return r;
     }
     object_size = prev_object_size;   
@@ -240,7 +272,7 @@ int ObjBencher::aio_bench(
   }
 
   char* contentsChars = new char[op_size];
-  lock.Lock();
+  lock.lock();
   data.done = false;
   data.hints = hints;
   data.object_size = object_size;
@@ -251,8 +283,9 @@ int ObjBencher::aio_bench(
   data.min_latency = 9999.0; // this better be higher than initial latency!
   data.max_latency = 0;
   data.avg_latency = 0;
+  data.latency_diff_sum = 0;
   data.object_contents = contentsChars;
-  lock.Unlock();
+  lock.unlock();
 
   //fill in contentsChars deterministically so we can check returns
   sanitize_object_contents(&data, data.op_size);
@@ -261,35 +294,35 @@ int ObjBencher::aio_bench(
     formatter->open_object_section("bench");
 
   if (OP_WRITE == operation) {
-    r = write_bench(secondsToRun, concurrentios, run_name_meta, max_objects);
+    r = write_bench(secondsToRun, concurrentios, run_name_meta, max_objects, prev_pid);
     if (r != 0) goto out;
   }
   else if (OP_SEQ_READ == operation) {
-    r = seq_read_bench(secondsToRun, num_objects, concurrentios, prevPid, no_verify);
+    r = seq_read_bench(secondsToRun, num_objects, concurrentios, prev_pid, no_verify);
     if (r != 0) goto out;
   }
   else if (OP_RAND_READ == operation) {
-    r = rand_read_bench(secondsToRun, num_objects, concurrentios, prevPid, no_verify);
+    r = rand_read_bench(secondsToRun, num_objects, concurrentios, prev_pid, no_verify);
     if (r != 0) goto out;
   }
 
   if (OP_WRITE == operation && cleanup) {
     r = fetch_bench_metadata(run_name_meta, &op_size, &object_size,
-                            &num_objects, &prevPid);
+                            &num_objects, &prev_pid);
     if (r < 0) {
       if (r == -ENOENT)
         cerr << "Should never happen: bench metadata missing for current run!" << std::endl;
       goto out;
     }
 
-    data.start_time = ceph_clock_now();
+    data.start_time = mono_clock::now();
     out(cout) << "Cleaning up (deleting benchmark objects)" << std::endl;
 
-    r = clean_up(num_objects, prevPid, concurrentios);
+    r = clean_up(num_objects, prev_pid, concurrentios);
     if (r != 0) goto out;
 
-    runtime = ceph_clock_now() - data.start_time;
-    out(cout) << "Clean up completed and total clean up time :" << runtime << std::endl;
+    timePassed = mono_clock::now() - data.start_time;
+    out(cout) << "Clean up completed and total clean up time :" << timePassed.count() << std::endl;
 
     // lastrun file
     r = sync_remove(run_name_meta);
@@ -314,34 +347,9 @@ struct lock_cond {
 
 void _aio_cb(void *cb, void *arg) {
   struct lock_cond *lc = (struct lock_cond *)arg;
-  lc->lock->Lock();
+  lc->lock->lock();
   lc->cond.Signal();
-  lc->lock->Unlock();
-}
-
-template<class T>
-static T vec_stddev(vector<T>& v)
-{
-  T mean = 0;
-
-  if (v.size() < 2)
-    return 0;
-
-  typename vector<T>::iterator iter;
-  for (iter = v.begin(); iter != v.end(); ++iter) {
-    mean += *iter;
-  }
-
-  mean /= v.size();
-
-  T stddev = 0;
-  for (iter = v.begin(); iter != v.end(); ++iter) {
-    T dev = *iter - mean;
-    dev *= dev;
-    stddev += dev;
-  }
-  stddev /= (v.size() - 1);
-  return sqrt(stddev);
+  lc->lock->unlock();
 }
 
 int ObjBencher::fetch_bench_metadata(const std::string& metadata_file,
@@ -359,12 +367,12 @@ int ObjBencher::fetch_bench_metadata(const std::string& metadata_file,
     }
     return r;
   }
-  bufferlist::iterator p = object_data.begin();
-  ::decode(*object_size, p);
-  ::decode(*num_objects, p);
-  ::decode(*prevPid, p);
+  auto p = object_data.cbegin();
+  decode(*object_size, p);
+  decode(*num_objects, p);
+  decode(*prevPid, p);
   if (!p.end()) {
-    ::decode(*op_size, p);
+    decode(*op_size, p);
   } else {
     *op_size = *object_size;
   }
@@ -374,7 +382,7 @@ int ObjBencher::fetch_bench_metadata(const std::string& metadata_file,
 
 int ObjBencher::write_bench(int secondsToRun,
                            int concurrentios, const string& run_name_meta,
-                           unsigned max_objects) {
+                           unsigned max_objects, int prev_pid) {
   if (concurrentios <= 0) 
     return -EINVAL;
   
@@ -394,7 +402,7 @@ int ObjBencher::write_bench(int secondsToRun,
   }
   bufferlist* newContents = 0;
 
-  std::string prefix = generate_object_prefix();
+  std::string prefix = prev_pid ? generate_object_prefix(prev_pid) : generate_object_prefix();
   if (!formatter)
     out(cout) << "Object prefix: " << prefix << std::endl;
   else
@@ -402,15 +410,14 @@ int ObjBencher::write_bench(int secondsToRun,
 
   std::vector<string> name(concurrentios);
   std::string newName;
-  bufferlist* contents[concurrentios];
-  double total_latency = 0;
-  std::vector<utime_t> start_times(concurrentios);
-  utime_t stopTime;
+  unique_ptr<bufferlist> contents[concurrentios];
   int r = 0;
   bufferlist b_write;
   lock_cond lc(&lock);
-  utime_t runtime;
-  utime_t timePassed;
+  double total_latency = 0;
+  std::vector<mono_time> start_times(concurrentios);
+  mono_time stopTime;
+  std::chrono::duration<double> timePassed;
 
   unsigned writes_per_object = 1;
   if (data.op_size)
@@ -420,8 +427,8 @@ int ObjBencher::write_bench(int secondsToRun,
 
   //set up writes so I can start them together
   for (int i = 0; i<concurrentios; ++i) {
-    name[i] = generate_object_name(i / writes_per_object);
-    contents[i] = new bufferlist();
+    name[i] = generate_object_name_fast(i / writes_per_object);
+    contents[i] = std::make_unique<bufferlist>();
     snprintf(data.object_contents, data.op_size, "I'm the %16dth op!", i);
     contents[i]->append(data.object_contents, data.op_size);
   }
@@ -430,24 +437,24 @@ int ObjBencher::write_bench(int secondsToRun,
 
   pthread_create(&print_thread, NULL, ObjBencher::status_printer, (void *)this);
   ceph_pthread_setname(print_thread, "write_stat");
-  lock.Lock();
+  lock.lock();
   data.finished = 0;
-  data.start_time = ceph_clock_now();
-  lock.Unlock();
+  data.start_time = mono_clock::now();
+  lock.unlock();
   for (int i = 0; i<concurrentios; ++i) {
-    start_times[i] = ceph_clock_now();
+    start_times[i] = mono_clock::now();
     r = create_completion(i, _aio_cb, (void *)&lc);
     if (r < 0)
       goto ERR;
     r = aio_write(name[i], i, *contents[i], data.op_size,
                  data.op_size * (i % writes_per_object));
-    if (r < 0) { //naughty, doesn't clean up heap
+    if (r < 0) {
       goto ERR;
     }
-    lock.Lock();
+    lock.lock();
     ++data.started;
     ++data.in_flight;
-    lock.Unlock();
+    lock.unlock();
   }
 
   //keep on adding new writes as old ones complete until we've passed minimum time
@@ -456,11 +463,10 @@ int ObjBencher::write_bench(int secondsToRun,
 
   //don't need locking for reads because other thread doesn't write
 
-  runtime.set_from_double(secondsToRun);
-  stopTime = data.start_time + runtime;
+  stopTime = data.start_time + std::chrono::seconds(secondsToRun);
   slot = 0;
-  lock.Lock();
-  while (!secondsToRun || ceph_clock_now() < stopTime) {
+  lock.lock();
+  while (secondsToRun && mono_clock::now() < stopTime) {
     bool found = false;
     while (1) {
       int old_slot = slot;
@@ -478,148 +484,167 @@ int ObjBencher::write_bench(int secondsToRun,
         break;
       lc.cond.Wait(lock);
     }
-    lock.Unlock();
+    lock.unlock();
     //create new contents and name on the heap, and fill them
-    newName = generate_object_name(data.started / writes_per_object);
-    newContents = contents[slot];
+    newName = generate_object_name_fast(data.started / writes_per_object);
+    newContents = contents[slot].get();
     snprintf(newContents->c_str(), data.op_size, "I'm the %16dth op!", data.started);
     // we wrote to buffer, going around internal crc cache, so invalidate it now.
     newContents->invalidate_crc();
 
     completion_wait(slot);
-    lock.Lock();
+    lock.lock();
     r = completion_ret(slot);
     if (r != 0) {
-      lock.Unlock();
+      lock.unlock();
       goto ERR;
     }
-    data.cur_latency = ceph_clock_now() - start_times[slot];
-    data.history.latency.push_back(data.cur_latency);
-    total_latency += data.cur_latency;
-    if( data.cur_latency > data.max_latency) data.max_latency = data.cur_latency;
-    if (data.cur_latency < data.min_latency) data.min_latency = data.cur_latency;
+    data.cur_latency = mono_clock::now() - start_times[slot];
+    total_latency += data.cur_latency.count();
+    if( data.cur_latency.count() > data.max_latency)
+      data.max_latency = data.cur_latency.count();
+    if (data.cur_latency.count() < data.min_latency)
+      data.min_latency = data.cur_latency.count();
     ++data.finished;
+    double delta = data.cur_latency.count() - data.avg_latency;
     data.avg_latency = total_latency / data.finished;
+    data.latency_diff_sum += delta * (data.cur_latency.count() - data.avg_latency);
     --data.in_flight;
-    lock.Unlock();
+    lock.unlock();
     release_completion(slot);
-    timePassed = ceph_clock_now() - data.start_time;
 
     //write new stuff to backend
-    start_times[slot] = ceph_clock_now();
+    start_times[slot] = mono_clock::now();
     r = create_completion(slot, _aio_cb, &lc);
     if (r < 0)
       goto ERR;
     r = aio_write(newName, slot, *newContents, data.op_size,
                  data.op_size * (data.started % writes_per_object));
-    if (r < 0) {//naughty; doesn't clean up heap space.
+    if (r < 0) {
       goto ERR;
     }
     name[slot] = newName;
-    lock.Lock();
+    lock.lock();
     ++data.started;
     ++data.in_flight;
-    if (max_objects &&
-       data.started >= (int)((data.object_size * max_objects + data.op_size - 1) /
-                            data.op_size))
-      break;
+    if (data.op_size) {
+      if (max_objects &&
+         data.started >= (int)((data.object_size * max_objects + data.op_size - 1) /
+                              data.op_size))
+        break;
+    }
   }
-  lock.Unlock();
+  lock.unlock();
 
   while (data.finished < data.started) {
     slot = data.finished % concurrentios;
     completion_wait(slot);
-    lock.Lock();
+    lock.lock();
     r = completion_ret(slot);
     if (r != 0) {
-      lock.Unlock();
+      lock.unlock();
       goto ERR;
     }
-    data.cur_latency = ceph_clock_now() - start_times[slot];
-    data.history.latency.push_back(data.cur_latency);
-    total_latency += data.cur_latency;
-    if (data.cur_latency > data.max_latency) data.max_latency = data.cur_latency;
-    if (data.cur_latency < data.min_latency) data.min_latency = data.cur_latency;
+    data.cur_latency = mono_clock::now() - start_times[slot];
+    total_latency += data.cur_latency.count();
+    if (data.cur_latency.count() > data.max_latency)
+      data.max_latency = data.cur_latency.count();
+    if (data.cur_latency.count() < data.min_latency)
+      data.min_latency = data.cur_latency.count();
     ++data.finished;
+    double delta = data.cur_latency.count() - data.avg_latency;
     data.avg_latency = total_latency / data.finished;
+    data.latency_diff_sum += delta * (data.cur_latency.count() - data.avg_latency);
     --data.in_flight;
-    lock.Unlock();
+    lock.unlock();
     release_completion(slot);
-    delete contents[slot];
-    contents[slot] = 0;
   }
 
-  timePassed = ceph_clock_now() - data.start_time;
-  lock.Lock();
+  timePassed = mono_clock::now() - data.start_time;
+  lock.lock();
   data.done = true;
-  lock.Unlock();
+  lock.unlock();
 
   pthread_join(print_thread, NULL);
 
   double bandwidth;
-  bandwidth = ((double)data.finished)*((double)data.op_size)/(double)timePassed;
+  bandwidth = ((double)data.finished)*((double)data.op_size) /
+       timePassed.count();
   bandwidth = bandwidth/(1024*1024); // we want it in MB/sec
 
+  double bandwidth_stddev;
+  double iops_stddev;
+  double latency_stddev;
+  if (data.idata.bandwidth_cycles > 1) {
+    bandwidth_stddev = std::sqrt(data.idata.bandwidth_diff_sum / (data.idata.bandwidth_cycles - 1));
+  } else {
+    bandwidth_stddev = 0;
+  }
+  if (data.idata.iops_cycles > 1) {
+    iops_stddev = std::sqrt(data.idata.iops_diff_sum / (data.idata.iops_cycles - 1));
+  } else {
+    iops_stddev = 0;
+  }
+  if (data.finished > 1) {
+    latency_stddev = std::sqrt(data.latency_diff_sum / (data.finished - 1));
+  } else {
+    latency_stddev = 0;
+  }
+
   if (!formatter) {
-    out(cout) << "Total time run:         " << timePassed << std::endl
+    out(cout) << "Total time run:         " << timePassed.count() << std::endl
        << "Total writes made:      " << data.finished << std::endl
        << "Write size:             " << data.op_size << std::endl
        << "Object size:            " << data.object_size << std::endl      
        << "Bandwidth (MB/sec):     " << setprecision(6) << bandwidth << std::endl
-       << "Stddev Bandwidth:       " << vec_stddev(data.history.bandwidth) << std::endl
+       << "Stddev Bandwidth:       " << bandwidth_stddev << std::endl
        << "Max bandwidth (MB/sec): " << data.idata.max_bandwidth << std::endl
        << "Min bandwidth (MB/sec): " << data.idata.min_bandwidth << std::endl
-       << "Average IOPS:           " << (int)(data.finished/timePassed) << std::endl
-       << "Stddev IOPS:            " << vec_stddev(data.history.iops) << std::endl
+       << "Average IOPS:           " << (int)(data.finished/timePassed.count()) << std::endl
+       << "Stddev IOPS:            " << iops_stddev << std::endl
        << "Max IOPS:               " << data.idata.max_iops << std::endl
        << "Min IOPS:               " << data.idata.min_iops << std::endl
        << "Average Latency(s):     " << data.avg_latency << std::endl
-       << "Stddev Latency(s):      " << vec_stddev(data.history.latency) << std::endl
+       << "Stddev Latency(s):      " << latency_stddev << std::endl
        << "Max latency(s):         " << data.max_latency << std::endl
        << "Min latency(s):         " << data.min_latency << std::endl;
   } else {
-    formatter->dump_format("total_time_run", "%f", (double)timePassed);
+    formatter->dump_format("total_time_run", "%f", timePassed.count());
     formatter->dump_format("total_writes_made", "%d", data.finished);
     formatter->dump_format("write_size", "%d", data.op_size);
     formatter->dump_format("object_size", "%d", data.object_size);
     formatter->dump_format("bandwidth", "%f", bandwidth);
-    formatter->dump_format("stddev_bandwidth", "%f", vec_stddev(data.history.bandwidth));
+    formatter->dump_format("stddev_bandwidth", "%f", bandwidth_stddev);
     formatter->dump_format("max_bandwidth", "%f", data.idata.max_bandwidth);
     formatter->dump_format("min_bandwidth", "%f", data.idata.min_bandwidth);
-    formatter->dump_format("average_iops", "%d", (int)(data.finished/timePassed));
-    formatter->dump_format("stddev_iops", "%d", vec_stddev(data.history.iops));
+    formatter->dump_format("average_iops", "%d", (int)(data.finished/timePassed.count()));
+    formatter->dump_format("stddev_iops", "%d", iops_stddev);
     formatter->dump_format("max_iops", "%d", data.idata.max_iops);
     formatter->dump_format("min_iops", "%d", data.idata.min_iops);
     formatter->dump_format("average_latency", "%f", data.avg_latency);
-    formatter->dump_format("stddev_latency", "%f", vec_stddev(data.history.latency));
-    formatter->dump_format("max_latency:", "%f", data.max_latency);
+    formatter->dump_format("stddev_latency", "%f", latency_stddev);
+    formatter->dump_format("max_latency", "%f", data.max_latency);
     formatter->dump_format("min_latency", "%f", data.min_latency);
   }
   //write object size/number data for read benchmarks
-  ::encode(data.object_size, b_write);
+  encode(data.object_size, b_write);
   num_objects = (data.finished + writes_per_object - 1) / writes_per_object;
-  ::encode(num_objects, b_write);
-  ::encode(getpid(), b_write);
-  ::encode(data.op_size, b_write);
+  encode(num_objects, b_write);
+  encode(prev_pid ? prev_pid : getpid(),  b_write);
+  encode(data.op_size, b_write);
 
   // persist meta-data for further cleanup or read
   sync_write(run_name_meta, b_write, sizeof(int)*3);
 
   completions_done();
-  for (int i = 0; i < concurrentios; i++)
-      if (contents[i])
-          delete contents[i];
 
   return 0;
 
  ERR:
-  lock.Lock();
+  lock.lock();
   data.done = 1;
-  lock.Unlock();
+  lock.unlock();
   pthread_join(print_thread, NULL);
-  for (int i = 0; i < concurrentios; i++)
-      if (contents[i])
-          delete contents[i];
   return r;
 }
 
@@ -631,22 +656,20 @@ int ObjBencher::seq_read_bench(int seconds_to_run, int num_objects, int concurre
 
   std::vector<string> name(concurrentios);
   std::string newName;
-  bufferlist* contents[concurrentios];
+  unique_ptr<bufferlist> contents[concurrentios];
   int index[concurrentios];
   int errors = 0;
-  utime_t start_time;
-  std::vector<utime_t> start_times(concurrentios);
-  utime_t time_to_run;
-  time_to_run.set_from_double(seconds_to_run);
   double total_latency = 0;
   int r = 0;
-  utime_t runtime;
+  std::vector<mono_time> start_times(concurrentios);
+  mono_clock::duration time_to_run = std::chrono::seconds(seconds_to_run);
+  std::chrono::duration<double> timePassed;
   sanitize_object_contents(&data, data.op_size); //clean it up once; subsequent
   //changes will be safe because string length should remain the same
 
-  unsigned writes_per_object = 1;
+  unsigned reads_per_object = 1;
   if (data.op_size)
-    writes_per_object = data.object_size / data.op_size;
+    reads_per_object = data.object_size / data.op_size;
 
   r = completions_init(concurrentios);
   if (r < 0)
@@ -654,35 +677,35 @@ int ObjBencher::seq_read_bench(int seconds_to_run, int num_objects, int concurre
 
   //set up initial reads
   for (int i = 0; i < concurrentios; ++i) {
-    name[i] = generate_object_name(i / writes_per_object, pid);
-    contents[i] = new bufferlist();
+    name[i] = generate_object_name_fast(i / reads_per_object, pid);
+    contents[i] = std::make_unique<bufferlist>();
   }
 
-  lock.Lock();
+  lock.lock();
   data.finished = 0;
-  data.start_time = ceph_clock_now();
-  lock.Unlock();
+  data.start_time = mono_clock::now();
+  lock.unlock();
 
   pthread_t print_thread;
   pthread_create(&print_thread, NULL, status_printer, (void *)this);
   ceph_pthread_setname(print_thread, "seq_read_stat");
 
-  utime_t finish_time = data.start_time + time_to_run;
+  mono_time finish_time = data.start_time + time_to_run;
   //start initial reads
   for (int i = 0; i < concurrentios; ++i) {
     index[i] = i;
-    start_times[i] = ceph_clock_now();
+    start_times[i] = mono_clock::now();
     create_completion(i, _aio_cb, (void *)&lc);
-    r = aio_read(name[i], i, contents[i], data.op_size,
-                data.op_size * (i % writes_per_object));
-    if (r < 0) { //naughty, doesn't clean up heap -- oh, or handle the print thread!
+    r = aio_read(name[i], i, contents[i].get(), data.op_size,
+                data.op_size * (i % reads_per_object));
+    if (r < 0) {
       cerr << "r = " << r << std::endl;
       goto ERR;
     }
-    lock.Lock();
+    lock.lock();
     ++data.started;
     ++data.in_flight;
-    lock.Unlock();
+    lock.unlock();
   }
 
   //keep on adding new reads as old ones complete
@@ -690,9 +713,9 @@ int ObjBencher::seq_read_bench(int seconds_to_run, int num_objects, int concurre
   bufferlist *cur_contents;
 
   slot = 0;
-  while ((!seconds_to_run || ceph_clock_now() < finish_time) &&
+  while ((seconds_to_run && mono_clock::now() < finish_time) &&
         num_objects > data.started) {
-    lock.Lock();
+    lock.lock();
     int old_slot = slot;
     bool found = false;
     while (1) {
@@ -713,9 +736,9 @@ int ObjBencher::seq_read_bench(int seconds_to_run, int num_objects, int concurre
     }
 
     // calculate latency here, so memcmp doesn't inflate it
-    data.cur_latency = ceph_clock_now() - start_times[slot];
+    data.cur_latency = mono_clock::now() - start_times[slot];
 
-    cur_contents = contents[slot];
+    cur_contents = contents[slot].get();
     int current_index = index[slot];
     
     // invalidate internal crc cache
@@ -730,38 +753,40 @@ int ObjBencher::seq_read_bench(int seconds_to_run, int num_objects, int concurre
       }
     }
 
-    newName = generate_object_name(data.started / writes_per_object, pid);
+    newName = generate_object_name_fast(data.started / reads_per_object, pid);
     index[slot] = data.started;
-    lock.Unlock();
+    lock.unlock();
     completion_wait(slot);
-    lock.Lock();
+    lock.lock();
     r = completion_ret(slot);
     if (r < 0) {
       cerr << "read got " << r << std::endl;
-      lock.Unlock();
+      lock.unlock();
       goto ERR;
     }
-    total_latency += data.cur_latency;
-    if (data.cur_latency > data.max_latency) data.max_latency = data.cur_latency;
-    if (data.cur_latency < data.min_latency) data.min_latency = data.cur_latency;
+    total_latency += data.cur_latency.count();
+    if (data.cur_latency.count() > data.max_latency)
+      data.max_latency = data.cur_latency.count();
+    if (data.cur_latency.count() < data.min_latency)
+      data.min_latency = data.cur_latency.count();
     ++data.finished;
     data.avg_latency = total_latency / data.finished;
     --data.in_flight;
-    lock.Unlock();
+    lock.unlock();
     release_completion(slot);
 
     //start new read and check data if requested
-    start_times[slot] = ceph_clock_now();
+    start_times[slot] = mono_clock::now();
     create_completion(slot, _aio_cb, (void *)&lc);
-    r = aio_read(newName, slot, contents[slot], data.op_size,
-                data.op_size * (data.started % writes_per_object));
+    r = aio_read(newName, slot, contents[slot].get(), data.op_size,
+                data.op_size * (data.started % reads_per_object));
     if (r < 0) {
       goto ERR;
     }
-    lock.Lock();
+    lock.lock();
     ++data.started;
     ++data.in_flight;
-    lock.Unlock();
+    lock.unlock();
     name[slot] = newName;
   }
 
@@ -769,67 +794,75 @@ int ObjBencher::seq_read_bench(int seconds_to_run, int num_objects, int concurre
   while (data.finished < data.started) {
     slot = data.finished % concurrentios;
     completion_wait(slot);
-    lock.Lock();
+    lock.lock();
     r = completion_ret(slot);
     if (r < 0) {
       cerr << "read got " << r << std::endl;
-      lock.Unlock();
+      lock.unlock();
       goto ERR;
     }
-    data.cur_latency = ceph_clock_now() - start_times[slot];
-    total_latency += data.cur_latency;
-    if (data.cur_latency > data.max_latency) data.max_latency = data.cur_latency;
-    if (data.cur_latency < data.min_latency) data.min_latency = data.cur_latency;
+    data.cur_latency = mono_clock::now() - start_times[slot];
+    total_latency += data.cur_latency.count();
+    if (data.cur_latency.count() > data.max_latency)
+      data.max_latency = data.cur_latency.count();
+    if (data.cur_latency.count() < data.min_latency)
+      data.min_latency = data.cur_latency.count();
     ++data.finished;
     data.avg_latency = total_latency / data.finished;
     --data.in_flight;
     release_completion(slot);
     if (!no_verify) {
       snprintf(data.object_contents, data.op_size, "I'm the %16dth op!", index[slot]);
-      lock.Unlock();
+      lock.unlock();
       if ((contents[slot]->length() != data.op_size) || 
          (memcmp(data.object_contents, contents[slot]->c_str(), data.op_size) != 0)) {
         cerr << name[slot] << " is not correct!" << std::endl;
         ++errors;
       }
     } else {
-        lock.Unlock();
+        lock.unlock();
     }
-    delete contents[slot];
   }
 
-  runtime = ceph_clock_now() - data.start_time;
-  lock.Lock();
+  timePassed = mono_clock::now() - data.start_time;
+  lock.lock();
   data.done = true;
-  lock.Unlock();
+  lock.unlock();
 
   pthread_join(print_thread, NULL);
 
   double bandwidth;
-  bandwidth = ((double)data.finished)*((double)data.op_size)/(double)runtime;
+  bandwidth = ((double)data.finished)*((double)data.op_size)/timePassed.count();
   bandwidth = bandwidth/(1024*1024); // we want it in MB/sec
+  
+  double iops_stddev;
+  if (data.idata.iops_cycles > 1) {
+    iops_stddev = std::sqrt(data.idata.iops_diff_sum / (data.idata.iops_cycles - 1));
+  } else {
+    iops_stddev = 0;
+  }
 
   if (!formatter) {
-    out(cout) << "Total time run:       " << runtime << std::endl
+    out(cout) << "Total time run:       " << timePassed.count() << std::endl
        << "Total reads made:     " << data.finished << std::endl
        << "Read size:            " << data.op_size << std::endl
        << "Object size:          " << data.object_size << std::endl
        << "Bandwidth (MB/sec):   " << setprecision(6) << bandwidth << std::endl
-       << "Average IOPS:         " << (int)(data.finished/runtime) << std::endl
-       << "Stddev IOPS:          " << vec_stddev(data.history.iops) << std::endl
+       << "Average IOPS:         " << (int)(data.finished/timePassed.count()) << std::endl
+       << "Stddev IOPS:          " << iops_stddev << std::endl
        << "Max IOPS:             " << data.idata.max_iops << std::endl
        << "Min IOPS:             " << data.idata.min_iops << std::endl
        << "Average Latency(s):   " << data.avg_latency << std::endl
        << "Max latency(s):       " << data.max_latency << std::endl
        << "Min latency(s):       " << data.min_latency << std::endl;
   } else {
-    formatter->dump_format("total_time_run", "%f", (double)runtime);
+    formatter->dump_format("total_time_run", "%f", timePassed.count());
     formatter->dump_format("total_reads_made", "%d", data.finished);
     formatter->dump_format("read_size", "%d", data.op_size);
     formatter->dump_format("object_size", "%d", data.object_size);
     formatter->dump_format("bandwidth", "%f", bandwidth);
-    formatter->dump_format("average_iops", "%d", (int)(data.finished/runtime));
-    formatter->dump_format("stddev_iops", "%d", vec_stddev(data.history.iops));
+    formatter->dump_format("average_iops", "%d", (int)(data.finished/timePassed.count()));
+    formatter->dump_format("stddev_iops", "%f", iops_stddev);
     formatter->dump_format("max_iops", "%d", data.idata.max_iops);
     formatter->dump_format("min_iops", "%d", data.idata.min_iops);
     formatter->dump_format("average_latency", "%f", data.avg_latency);
@@ -842,9 +875,9 @@ int ObjBencher::seq_read_bench(int seconds_to_run, int num_objects, int concurre
   return (errors > 0 ? -EIO : 0);
 
  ERR:
-  lock.Lock();
+  lock.lock();
   data.done = 1;
-  lock.Unlock();
+  lock.unlock();
   pthread_join(print_thread, NULL);
   return r;
 }
@@ -858,22 +891,20 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr
 
   std::vector<string> name(concurrentios);
   std::string newName;
-  bufferlist* contents[concurrentios];
+  unique_ptr<bufferlist> contents[concurrentios];
   int index[concurrentios];
   int errors = 0;
-  utime_t start_time;
-  std::vector<utime_t> start_times(concurrentios);
-  utime_t time_to_run;
-  time_to_run.set_from_double(seconds_to_run);
-  double total_latency = 0;
   int r = 0;
-  utime_t runtime;
+  double total_latency = 0;
+  std::vector<mono_time> start_times(concurrentios);
+  mono_clock::duration time_to_run = std::chrono::seconds(seconds_to_run);
+  std::chrono::duration<double> timePassed;
   sanitize_object_contents(&data, data.op_size); //clean it up once; subsequent
   //changes will be safe because string length should remain the same
 
-  unsigned writes_per_object = 1;
+  unsigned reads_per_object = 1;
   if (data.op_size)
-    writes_per_object = data.object_size / data.op_size;
+    reads_per_object = data.object_size / data.op_size;
 
   srand (time(NULL));
 
@@ -883,35 +914,35 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr
 
   //set up initial reads
   for (int i = 0; i < concurrentios; ++i) {
-    name[i] = generate_object_name(i / writes_per_object, pid);
-    contents[i] = new bufferlist();
+    name[i] = generate_object_name_fast(i / reads_per_object, pid);
+    contents[i] = std::make_unique<bufferlist>();
   }
 
-  lock.Lock();
+  lock.lock();
   data.finished = 0;
-  data.start_time = ceph_clock_now();
-  lock.Unlock();
+  data.start_time = mono_clock::now();
+  lock.unlock();
 
   pthread_t print_thread;
   pthread_create(&print_thread, NULL, status_printer, (void *)this);
   ceph_pthread_setname(print_thread, "rand_read_stat");
 
-  utime_t finish_time = data.start_time + time_to_run;
+  mono_time finish_time = data.start_time + time_to_run;
   //start initial reads
   for (int i = 0; i < concurrentios; ++i) {
     index[i] = i;
-    start_times[i] = ceph_clock_now();
+    start_times[i] = mono_clock::now();
     create_completion(i, _aio_cb, (void *)&lc);
-    r = aio_read(name[i], i, contents[i], data.op_size,
-                data.op_size * (i % writes_per_object));
-    if (r < 0) { //naughty, doesn't clean up heap -- oh, or handle the print thread!
+    r = aio_read(name[i], i, contents[i].get(), data.op_size,
+                data.op_size * (i % reads_per_object));
+    if (r < 0) {
       cerr << "r = " << r << std::endl;
       goto ERR;
     }
-    lock.Lock();
+    lock.lock();
     ++data.started;
     ++data.in_flight;
-    lock.Unlock();
+    lock.unlock();
   }
 
   //keep on adding new reads as old ones complete
@@ -920,8 +951,8 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr
   int rand_id;
 
   slot = 0;
-  while ((!seconds_to_run || ceph_clock_now() < finish_time)) {
-    lock.Lock();
+  while ((seconds_to_run && mono_clock::now() < finish_time)) {
+    lock.lock();
     int old_slot = slot;
     bool found = false;
     while (1) {
@@ -942,28 +973,30 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr
     }
 
     // calculate latency here, so memcmp doesn't inflate it
-    data.cur_latency = ceph_clock_now() - start_times[slot];
+    data.cur_latency = mono_clock::now() - start_times[slot];
 
-    lock.Unlock();
+    lock.unlock();
 
     int current_index = index[slot];
-    cur_contents = contents[slot];
+    cur_contents = contents[slot].get();
     completion_wait(slot);
-    lock.Lock();
+    lock.lock();
     r = completion_ret(slot);
     if (r < 0) {
       cerr << "read got " << r << std::endl;
-      lock.Unlock();
+      lock.unlock();
       goto ERR;
     }
 
-    total_latency += data.cur_latency;
-    if (data.cur_latency > data.max_latency) data.max_latency = data.cur_latency;
-    if (data.cur_latency < data.min_latency) data.min_latency = data.cur_latency;
+    total_latency += data.cur_latency.count();
+    if (data.cur_latency.count() > data.max_latency)
+      data.max_latency = data.cur_latency.count();
+    if (data.cur_latency.count() < data.min_latency)
+      data.min_latency = data.cur_latency.count();
     ++data.finished;
     data.avg_latency = total_latency / data.finished;
     --data.in_flight;
-    lock.Unlock();
+    lock.unlock();
     
     if (!no_verify) {
       snprintf(data.object_contents, data.op_size, "I'm the %16dth op!", current_index);
@@ -975,7 +1008,7 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr
     } 
 
     rand_id = rand() % num_objects;
-    newName = generate_object_name(rand_id / writes_per_object, pid);
+    newName = generate_object_name_fast(rand_id / reads_per_object, pid);
     index[slot] = rand_id;
     release_completion(slot);
 
@@ -983,17 +1016,17 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr
     cur_contents->invalidate_crc();
 
     //start new read and check data if requested
-    start_times[slot] = ceph_clock_now();
+    start_times[slot] = mono_clock::now();
     create_completion(slot, _aio_cb, (void *)&lc);
-    r = aio_read(newName, slot, contents[slot], data.op_size,
-                data.op_size * (rand_id % writes_per_object));
+    r = aio_read(newName, slot, contents[slot].get(), data.op_size,
+                data.op_size * (rand_id % reads_per_object));
     if (r < 0) {
       goto ERR;
     }
-    lock.Lock();
+    lock.lock();
     ++data.started;
     ++data.in_flight;
-    lock.Unlock();
+    lock.unlock();
     name[slot] = newName;
   }
 
@@ -1002,67 +1035,75 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr
   while (data.finished < data.started) {
     slot = data.finished % concurrentios;
     completion_wait(slot);
-    lock.Lock();
+    lock.lock();
     r = completion_ret(slot);
     if (r < 0) {
       cerr << "read got " << r << std::endl;
-      lock.Unlock();
+      lock.unlock();
       goto ERR;
     }
-    data.cur_latency = ceph_clock_now() - start_times[slot];
-    total_latency += data.cur_latency;
-    if (data.cur_latency > data.max_latency) data.max_latency = data.cur_latency;
-    if (data.cur_latency < data.min_latency) data.min_latency = data.cur_latency;
+    data.cur_latency = mono_clock::now() - start_times[slot];
+    total_latency += data.cur_latency.count();
+    if (data.cur_latency.count() > data.max_latency)
+      data.max_latency = data.cur_latency.count();
+    if (data.cur_latency.count() < data.min_latency)
+      data.min_latency = data.cur_latency.count();
     ++data.finished;
     data.avg_latency = total_latency / data.finished;
     --data.in_flight;
     release_completion(slot);
     if (!no_verify) {
       snprintf(data.object_contents, data.op_size, "I'm the %16dth op!", index[slot]);
-      lock.Unlock();
+      lock.unlock();
       if ((contents[slot]->length() != data.op_size) || 
           (memcmp(data.object_contents, contents[slot]->c_str(), data.op_size) != 0)) {
         cerr << name[slot] << " is not correct!" << std::endl;
         ++errors;
       }
     } else {
-        lock.Unlock();
+        lock.unlock();
     }
-    delete contents[slot];
   }
 
-  runtime = ceph_clock_now() - data.start_time;
-  lock.Lock();
+  timePassed = mono_clock::now() - data.start_time;
+  lock.lock();
   data.done = true;
-  lock.Unlock();
+  lock.unlock();
 
   pthread_join(print_thread, NULL);
 
   double bandwidth;
-  bandwidth = ((double)data.finished)*((double)data.op_size)/(double)runtime;
+  bandwidth = ((double)data.finished)*((double)data.op_size)/timePassed.count();
   bandwidth = bandwidth/(1024*1024); // we want it in MB/sec
+  
+  double iops_stddev;
+  if (data.idata.iops_cycles > 1) {
+    iops_stddev = std::sqrt(data.idata.iops_diff_sum / (data.idata.iops_cycles - 1));
+  } else {
+    iops_stddev = 0;
+  }
 
   if (!formatter) {
-    out(cout) << "Total time run:       " << runtime << std::endl
+    out(cout) << "Total time run:       " << timePassed.count() << std::endl
        << "Total reads made:     " << data.finished << std::endl
        << "Read size:            " << data.op_size << std::endl
        << "Object size:          " << data.object_size << std::endl
        << "Bandwidth (MB/sec):   " << setprecision(6) << bandwidth << std::endl
-       << "Average IOPS:         " << (int)(data.finished/runtime) << std::endl
-       << "Stddev IOPS:          " << vec_stddev(data.history.iops) << std::endl
+       << "Average IOPS:         " << (int)(data.finished/timePassed.count()) << std::endl
+       << "Stddev IOPS:          " << iops_stddev << std::endl
        << "Max IOPS:             " << data.idata.max_iops << std::endl
        << "Min IOPS:             " << data.idata.min_iops << std::endl
        << "Average Latency(s):   " << data.avg_latency << std::endl
        << "Max latency(s):       " << data.max_latency << std::endl
        << "Min latency(s):       " << data.min_latency << std::endl;
   } else {
-    formatter->dump_format("total_time_run", "%f", (double)runtime);
+    formatter->dump_format("total_time_run", "%f", timePassed.count());
     formatter->dump_format("total_reads_made", "%d", data.finished);
     formatter->dump_format("read_size", "%d", data.op_size);
     formatter->dump_format("object_size", "%d", data.object_size);
     formatter->dump_format("bandwidth", "%f", bandwidth);
-    formatter->dump_format("average_iops", "%d", (int)(data.finished/runtime));
-    formatter->dump_format("stddev_iops", "%d", vec_stddev(data.history.iops));
+    formatter->dump_format("average_iops", "%d", (int)(data.finished/timePassed.count()));
+    formatter->dump_format("stddev_iops", "%f", iops_stddev);
     formatter->dump_format("max_iops", "%d", data.idata.max_iops);
     formatter->dump_format("min_iops", "%d", data.idata.min_iops);
     formatter->dump_format("average_latency", "%f", data.avg_latency);
@@ -1074,9 +1115,9 @@ int ObjBencher::rand_read_bench(int seconds_to_run, int num_objects, int concurr
   return (errors > 0 ? -EIO : 0);
 
  ERR:
-  lock.Lock();
+  lock.lock();
   data.done = 1;
-  lock.Unlock();
+  lock.unlock();
   pthread_join(print_thread, NULL);
   return r;
 }
@@ -1155,15 +1196,14 @@ int ObjBencher::clean_up(int num_objects, int prevPid, int concurrentios) {
   std::vector<string> name(concurrentios);
   std::string newName;
   int r = 0;
-  utime_t runtime;
   int slot = 0;
 
-  lock.Lock();
+  lock.lock();
   data.done = false;
   data.in_flight = 0;
   data.started = 0;
   data.finished = 0;
-  lock.Unlock();
+  lock.unlock();
 
   // don't start more completions than files
   if (num_objects == 0) {
@@ -1178,7 +1218,7 @@ int ObjBencher::clean_up(int num_objects, int prevPid, int concurrentios) {
 
   //set up initial removes
   for (int i = 0; i < concurrentios; ++i) {
-    name[i] = generate_object_name(i, prevPid);
+    name[i] = generate_object_name_fast(i, prevPid);
   }
 
   //start initial removes
@@ -1189,15 +1229,15 @@ int ObjBencher::clean_up(int num_objects, int prevPid, int concurrentios) {
       cerr << "r = " << r << std::endl;
       goto ERR;
     }
-    lock.Lock();
+    lock.lock();
     ++data.started;
     ++data.in_flight;
-    lock.Unlock();
+    lock.unlock();
   }
 
   //keep on adding new removes as old ones complete
   while (data.started < num_objects) {
-    lock.Lock();
+    lock.lock();
     int old_slot = slot;
     bool found = false;
     while (1) {
@@ -1216,19 +1256,19 @@ int ObjBencher::clean_up(int num_objects, int prevPid, int concurrentios) {
       }
       lc.cond.Wait(lock);
     }
-    lock.Unlock();
-    newName = generate_object_name(data.started, prevPid);
+    lock.unlock();
+    newName = generate_object_name_fast(data.started, prevPid);
     completion_wait(slot);
-    lock.Lock();
+    lock.lock();
     r = completion_ret(slot);
     if (r != 0 && r != -ENOENT) { // file does not exist
       cerr << "remove got " << r << std::endl;
-      lock.Unlock();
+      lock.unlock();
       goto ERR;
     }
     ++data.finished;
     --data.in_flight;
-    lock.Unlock();
+    lock.unlock();
     release_completion(slot);
 
     //start new remove and check data if requested
@@ -1237,10 +1277,10 @@ int ObjBencher::clean_up(int num_objects, int prevPid, int concurrentios) {
     if (r < 0) {
       goto ERR;
     }
-    lock.Lock();
+    lock.lock();
     ++data.started;
     ++data.in_flight;
-    lock.Unlock();
+    lock.unlock();
     name[slot] = newName;
   }
 
@@ -1248,22 +1288,22 @@ int ObjBencher::clean_up(int num_objects, int prevPid, int concurrentios) {
   while (data.finished < data.started) {
     slot = data.finished % concurrentios;
     completion_wait(slot);
-    lock.Lock();
+    lock.lock();
     r = completion_ret(slot);
     if (r != 0 && r != -ENOENT) { // file does not exist
       cerr << "remove got " << r << std::endl;
-      lock.Unlock();
+      lock.unlock();
       goto ERR;
     }
     ++data.finished;
     --data.in_flight;
     release_completion(slot);
-    lock.Unlock();
+    lock.unlock();
   }
 
-  lock.Lock();
+  lock.lock();
   data.done = true;
-  lock.Unlock();
+  lock.unlock();
 
   completions_done();
 
@@ -1272,9 +1312,9 @@ int ObjBencher::clean_up(int num_objects, int prevPid, int concurrentios) {
   return 0;
 
  ERR:
-  lock.Lock();
+  lock.lock();
   data.done = 1;
-  lock.Unlock();
+  lock.unlock();
   return r;
 }
 
@@ -1320,17 +1360,16 @@ int ObjBencher::clean_up_slow(const std::string& prefix, int concurrentios) {
   std::vector<Object> name(concurrentios);
   Object newName;
   int r = 0;
-  utime_t runtime;
   int slot = 0;
   std::list<Object> objects;
   bool objects_remain = true;
 
-  lock.Lock();
+  lock.lock();
   data.done = false;
   data.in_flight = 0;
   data.started = 0;
   data.finished = 0;
-  lock.Unlock();
+  lock.unlock();
 
   out(cout) << "Warning: using slow linear search" << std::endl;
 
@@ -1363,15 +1402,15 @@ int ObjBencher::clean_up_slow(const std::string& prefix, int concurrentios) {
       cerr << "r = " << r << std::endl;
       goto ERR;
     }
-    lock.Lock();
+    lock.lock();
     ++data.started;
     ++data.in_flight;
-    lock.Unlock();
+    lock.unlock();
   }
 
   //keep on adding new removes as old ones complete
   while (objects_remain) {
-    lock.Lock();
+    lock.lock();
     int old_slot = slot;
     bool found = false;
     while (1) {
@@ -1390,7 +1429,7 @@ int ObjBencher::clean_up_slow(const std::string& prefix, int concurrentios) {
       }
       lc.cond.Wait(lock);
     }
-    lock.Unlock();
+    lock.unlock();
 
     // get more objects if necessary
     if (objects.empty()) {
@@ -1406,16 +1445,16 @@ int ObjBencher::clean_up_slow(const std::string& prefix, int concurrentios) {
     objects.pop_front();
 
     completion_wait(slot);
-    lock.Lock();
+    lock.lock();
     r = completion_ret(slot);
     if (r != 0 && r != -ENOENT) { // file does not exist
       cerr << "remove got " << r << std::endl;
-      lock.Unlock();
+      lock.unlock();
       goto ERR;
     }
     ++data.finished;
     --data.in_flight;
-    lock.Unlock();
+    lock.unlock();
     release_completion(slot);
 
     //start new remove and check data if requested
@@ -1425,10 +1464,10 @@ int ObjBencher::clean_up_slow(const std::string& prefix, int concurrentios) {
     if (r < 0) {
       goto ERR;
     }
-    lock.Lock();
+    lock.lock();
     ++data.started;
     ++data.in_flight;
-    lock.Unlock();
+    lock.unlock();
     name[slot] = newName;
   }
 
@@ -1436,22 +1475,22 @@ int ObjBencher::clean_up_slow(const std::string& prefix, int concurrentios) {
   while (data.finished < data.started) {
     slot = data.finished % concurrentios;
     completion_wait(slot);
-    lock.Lock();
+    lock.lock();
     r = completion_ret(slot);
     if (r != 0 && r != -ENOENT) { // file does not exist
       cerr << "remove got " << r << std::endl;
-      lock.Unlock();
+      lock.unlock();
       goto ERR;
     }
     ++data.finished;
     --data.in_flight;
     release_completion(slot);
-    lock.Unlock();
+    lock.unlock();
   }
 
-  lock.Lock();
+  lock.lock();
   data.done = true;
-  lock.Unlock();
+  lock.unlock();
 
   completions_done();
 
@@ -1460,8 +1499,8 @@ int ObjBencher::clean_up_slow(const std::string& prefix, int concurrentios) {
   return 0;
 
  ERR:
-  lock.Lock();
+  lock.lock();
   data.done = 1;
-  lock.Unlock();
+  lock.unlock();
   return -EIO;
 }