ceph/src/boost/libs/fiber/performance/fiber/numa/skynet_stealing_detach.cpp

   1
   2 //          Copyright Oliver Kowalke 2015.
   3 // Distributed under the Boost Software License, Version 1.0.
   4 //    (See accompanying file LICENSE_1_0.txt or copy at
   5 //          http://www.boost.org/LICENSE_1_0.txt)
   6
   7 // based on https://github.com/atemerev/skynet from Alexander Temerev
   8
   9 #include <algorithm>
  10 #include <cassert>
  11 #include <chrono>
  12 #include <cmath>
  13 #include <condition_variable>
  14 #include <cstddef>
  15 #include <cstdint>
  16 #include <cstdlib>
  17 #include <queue>
  18 #include <iostream>
  19 #include <memory>
  20 #include <mutex>
  21 #include <numeric>
  22 #include <random>
  23 #include <sstream>
  24 #include <vector>
  25
  26 #include <boost/fiber/all.hpp>
  27 #include <boost/fiber/numa/topology.hpp>
  28 #include <boost/predef.h>
  29
  30 #include "../barrier.hpp"
  31
  32 using clock_type = std::chrono::steady_clock;
  33 using duration_type = clock_type::duration;
  34 using time_point_type = clock_type::time_point;
  35 using channel_type = boost::fibers::buffered_channel< std::uint64_t >;
  36 using allocator_type = boost::fibers::fixedsize_stack;
  37 using lock_type = std::unique_lock< std::mutex >;
  38
  39 static bool done = false;
  40 static std::mutex mtx{};
  41 static boost::fibers::condition_variable_any cnd{};
  42
  43 std::uint32_t hardware_concurrency( std::vector< boost::fibers::numa::node > const& topo) {
  44     std::uint32_t cpus = 0;
  45     for ( auto & node : topo) {
  46         cpus += node.logical_cpus.size();
  47     }
  48     return cpus;
  49 }
  50
  51 // microbenchmark
  52 void skynet( allocator_type & salloc, channel_type & c, std::size_t num, std::size_t size, std::size_t div) {
  53     if ( 1 == size) {
  54         c.push( num);
  55     } else {
  56         channel_type rc{ 16 };
  57         for ( std::size_t i = 0; i < div; ++i) {
  58             auto sub_num = num + i * size / div;
  59             boost::fibers::fiber{ boost::fibers::launch::dispatch,
  60                               std::allocator_arg, salloc,
  61                               skynet,
  62                               std::ref( salloc), std::ref( rc), sub_num, size / div, div }.detach();
  63         }
  64         std::uint64_t sum{ 0 };
  65         for ( std::size_t i = 0; i < div; ++i) {
  66             sum += rc.value_pop();
  67         }
  68         c.push( sum);
  69     }
  70 }
  71
  72 void thread( std::uint32_t cpu_id, std::uint32_t node_id, std::vector< boost::fibers::numa::node > const& topo, barrier * b) {
  73     boost::fibers::use_scheduling_algorithm< boost::fibers::algo::numa::work_stealing >( cpu_id, node_id, topo);
  74     b->wait();
  75     lock_type lk( mtx);
  76     cnd.wait( lk, [](){ return done; });
  77     BOOST_ASSERT( done);
  78 }
  79
  80 int main() {
  81     try {
  82         std::vector< boost::fibers::numa::node > topo = boost::fibers::numa::topology();
  83         auto node = topo[0];
  84         auto main_cpu_id = * node.logical_cpus.begin();
  85         boost::fibers::use_scheduling_algorithm< boost::fibers::algo::numa::work_stealing >( main_cpu_id, node.id, topo);
  86         barrier b{ hardware_concurrency( topo) };
  87         std::size_t size{ 1000000 };
  88         std::size_t div{ 10 };
  89         // Windows 10 and FreeBSD require a fiber stack of 8kb
  90         // otherwise the stack gets exhausted
  91         // stack requirements must be checked for other OS too
  92 #if BOOST_OS_WINDOWS || BOOST_OS_BSD
  93         allocator_type salloc{ 2*allocator_type::traits_type::page_size() };
  94 #else
  95         allocator_type salloc{ allocator_type::traits_type::page_size() };
  96 #endif
  97         std::uint64_t result{ 0 };
  98         channel_type rc{ 2 };
  99         std::vector< std::thread > threads;
 100         for ( auto & node : topo) {
 101             for ( std::uint32_t cpu_id : node.logical_cpus) {
 102                 // exclude main-thread
 103                 if ( main_cpu_id != cpu_id) {
 104                     threads.emplace_back( thread, cpu_id, node.id, std::cref( topo), & b);
 105                 }
 106             }
 107         }
 108         b.wait();
 109         time_point_type start{ clock_type::now() };
 110         skynet( salloc, rc, 0, size, div);
 111         result = rc.value_pop();
 112         if ( 499999500000 != result) {
 113             throw std::runtime_error("invalid result");
 114         }
 115         auto duration = clock_type::now() - start;
 116         lock_type lk( mtx);
 117         done = true;
 118         lk.unlock();
 119         cnd.notify_all();
 120         for ( std::thread & t : threads) {
 121             t.join();
 122         }
 123         std::cout << "duration: " << duration.count() / 1000000 << " ms" << std::endl;
 124         return EXIT_SUCCESS;
 125     } catch ( std::exception const& e) {
 126         std::cerr << "exception: " << e.what() << std::endl;
 127     } catch (...) {
 128         std::cerr << "unhandled exception" << std::endl;
 129     }
 130         return EXIT_FAILURE;
 131 }