]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | |
2 | // Copyright Oliver Kowalke 2015. | |
3 | // Distributed under the Boost Software License, Version 1.0. | |
4 | // (See accompanying file LICENSE_1_0.txt or copy at | |
5 | // http://www.boost.org/LICENSE_1_0.txt) | |
6 | ||
b32b8144 FG |
7 | // based on https://github.com/atemerev/skynet from Alexander Temerev |
8 | ||
7c673cae FG |
9 | #include <algorithm> |
10 | #include <cassert> | |
11 | #include <chrono> | |
b32b8144 | 12 | #include <cmath> |
7c673cae FG |
13 | #include <condition_variable> |
14 | #include <cstddef> | |
15 | #include <cstdint> | |
16 | #include <cstdlib> | |
b32b8144 | 17 | #include <queue> |
7c673cae FG |
18 | #include <iostream> |
19 | #include <memory> | |
20 | #include <mutex> | |
21 | #include <numeric> | |
b32b8144 FG |
22 | #include <random> |
23 | #include <sstream> | |
7c673cae FG |
24 | #include <vector> |
25 | ||
26 | #include <boost/fiber/all.hpp> | |
92f5a8d4 | 27 | #include <boost/fiber/numa/all.hpp> |
b32b8144 | 28 | #include <boost/predef.h> |
7c673cae | 29 | |
b32b8144 | 30 | #include "../barrier.hpp" |
7c673cae | 31 | |
7c673cae FG |
32 | using clock_type = std::chrono::steady_clock; |
33 | using duration_type = clock_type::duration; | |
7c673cae | 34 | using time_point_type = clock_type::time_point; |
b32b8144 FG |
35 | using channel_type = boost::fibers::buffered_channel< std::uint64_t >; |
36 | using allocator_type = boost::fibers::fixedsize_stack; | |
37 | using lock_type = std::unique_lock< std::mutex >; | |
7c673cae FG |
38 | |
39 | static bool done = false; | |
40 | static std::mutex mtx{}; | |
41 | static boost::fibers::condition_variable_any cnd{}; | |
42 | ||
b32b8144 FG |
43 | std::uint32_t hardware_concurrency( std::vector< boost::fibers::numa::node > const& topo) { |
44 | std::uint32_t cpus = 0; | |
45 | for ( auto & node : topo) { | |
46 | cpus += node.logical_cpus.size(); | |
47 | } | |
48 | return cpus; | |
49 | } | |
50 | ||
7c673cae FG |
51 | // microbenchmark |
52 | void skynet( allocator_type & salloc, channel_type & c, std::size_t num, std::size_t size, std::size_t div) { | |
53 | if ( 1 == size) { | |
54 | c.push( num); | |
55 | } else { | |
b32b8144 | 56 | channel_type rc{ 16 }; |
7c673cae FG |
57 | for ( std::size_t i = 0; i < div; ++i) { |
58 | auto sub_num = num + i * size / div; | |
59 | boost::fibers::fiber{ boost::fibers::launch::dispatch, | |
b32b8144 FG |
60 | std::allocator_arg, salloc, |
61 | skynet, | |
62 | std::ref( salloc), std::ref( rc), sub_num, size / div, div }.detach(); | |
7c673cae FG |
63 | } |
64 | std::uint64_t sum{ 0 }; | |
65 | for ( std::size_t i = 0; i < div; ++i) { | |
66 | sum += rc.value_pop(); | |
67 | } | |
68 | c.push( sum); | |
69 | } | |
70 | } | |
71 | ||
92f5a8d4 TL |
72 | void thread( std::uint32_t cpu_id, std::uint32_t node_id, std::vector< boost::fibers::numa::node > const& topo) { |
73 | boost::fibers::use_scheduling_algorithm< boost::fibers::numa::algo::work_stealing >( cpu_id, node_id, topo); | |
7c673cae FG |
74 | lock_type lk( mtx); |
75 | cnd.wait( lk, [](){ return done; }); | |
76 | BOOST_ASSERT( done); | |
77 | } | |
78 | ||
79 | int main() { | |
80 | try { | |
b32b8144 FG |
81 | std::vector< boost::fibers::numa::node > topo = boost::fibers::numa::topology(); |
82 | auto node = topo[0]; | |
83 | auto main_cpu_id = * node.logical_cpus.begin(); | |
b32b8144 | 84 | std::size_t size{ 1000000 }; |
7c673cae | 85 | std::size_t div{ 10 }; |
b32b8144 | 86 | allocator_type salloc{ 2*allocator_type::traits_type::page_size() }; |
7c673cae | 87 | std::uint64_t result{ 0 }; |
b32b8144 FG |
88 | channel_type rc{ 2 }; |
89 | std::vector< std::thread > threads; | |
92f5a8d4 | 90 | for ( auto && node : topo) { |
b32b8144 FG |
91 | for ( std::uint32_t cpu_id : node.logical_cpus) { |
92 | // exclude main-thread | |
93 | if ( main_cpu_id != cpu_id) { | |
92f5a8d4 | 94 | threads.emplace_back( thread, cpu_id, node.id, std::cref( topo) ); |
b32b8144 FG |
95 | } |
96 | } | |
97 | } | |
92f5a8d4 | 98 | boost::fibers::use_scheduling_algorithm< boost::fibers::numa::algo::work_stealing >( main_cpu_id, node.id, topo); |
7c673cae FG |
99 | time_point_type start{ clock_type::now() }; |
100 | skynet( salloc, rc, 0, size, div); | |
101 | result = rc.value_pop(); | |
b32b8144 FG |
102 | if ( 499999500000 != result) { |
103 | throw std::runtime_error("invalid result"); | |
104 | } | |
105 | auto duration = clock_type::now() - start; | |
7c673cae FG |
106 | lock_type lk( mtx); |
107 | done = true; | |
108 | lk.unlock(); | |
109 | cnd.notify_all(); | |
110 | for ( std::thread & t : threads) { | |
111 | t.join(); | |
112 | } | |
b32b8144 | 113 | std::cout << "duration: " << duration.count() / 1000000 << " ms" << std::endl; |
7c673cae FG |
114 | return EXIT_SUCCESS; |
115 | } catch ( std::exception const& e) { | |
116 | std::cerr << "exception: " << e.what() << std::endl; | |
117 | } catch (...) { | |
118 | std::cerr << "unhandled exception" << std::endl; | |
119 | } | |
120 | return EXIT_FAILURE; | |
121 | } |