]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | |
2 | // Copyright Oliver Kowalke 2015. | |
3 | // Distributed under the Boost Software License, Version 1.0. | |
4 | // (See accompanying file LICENSE_1_0.txt or copy at | |
5 | // http://www.boost.org/LICENSE_1_0.txt) | |
6 | ||
b32b8144 FG |
7 | // based on https://github.com/atemerev/skynet from Alexander Temerev |
8 | ||
7c673cae FG |
9 | #include <algorithm> |
10 | #include <cassert> | |
11 | #include <chrono> | |
b32b8144 | 12 | #include <cmath> |
7c673cae FG |
13 | #include <condition_variable> |
14 | #include <cstddef> | |
15 | #include <cstdint> | |
16 | #include <cstdlib> | |
b32b8144 | 17 | #include <queue> |
7c673cae FG |
18 | #include <iostream> |
19 | #include <memory> | |
20 | #include <mutex> | |
21 | #include <numeric> | |
b32b8144 FG |
22 | #include <random> |
23 | #include <sstream> | |
7c673cae FG |
24 | #include <vector> |
25 | ||
26 | #include <boost/fiber/all.hpp> | |
b32b8144 FG |
27 | #include <boost/fiber/numa/topology.hpp> |
28 | #include <boost/predef.h> | |
7c673cae | 29 | |
b32b8144 | 30 | #include "../barrier.hpp" |
7c673cae | 31 | |
7c673cae FG |
32 | using clock_type = std::chrono::steady_clock; |
33 | using duration_type = clock_type::duration; | |
7c673cae | 34 | using time_point_type = clock_type::time_point; |
b32b8144 FG |
35 | using channel_type = boost::fibers::buffered_channel< std::uint64_t >; |
36 | using allocator_type = boost::fibers::fixedsize_stack; | |
37 | using lock_type = std::unique_lock< std::mutex >; | |
7c673cae FG |
38 | |
39 | static bool done = false; | |
40 | static std::mutex mtx{}; | |
41 | static boost::fibers::condition_variable_any cnd{}; | |
42 | ||
b32b8144 FG |
43 | std::uint32_t hardware_concurrency( std::vector< boost::fibers::numa::node > const& topo) { |
44 | std::uint32_t cpus = 0; | |
45 | for ( auto & node : topo) { | |
46 | cpus += node.logical_cpus.size(); | |
47 | } | |
48 | return cpus; | |
49 | } | |
50 | ||
7c673cae FG |
51 | // microbenchmark |
52 | void skynet( allocator_type & salloc, channel_type & c, std::size_t num, std::size_t size, std::size_t div) { | |
53 | if ( 1 == size) { | |
54 | c.push( num); | |
55 | } else { | |
b32b8144 | 56 | channel_type rc{ 16 }; |
7c673cae FG |
57 | for ( std::size_t i = 0; i < div; ++i) { |
58 | auto sub_num = num + i * size / div; | |
59 | boost::fibers::fiber{ boost::fibers::launch::dispatch, | |
b32b8144 FG |
60 | std::allocator_arg, salloc, |
61 | skynet, | |
62 | std::ref( salloc), std::ref( rc), sub_num, size / div, div }.detach(); | |
7c673cae FG |
63 | } |
64 | std::uint64_t sum{ 0 }; | |
65 | for ( std::size_t i = 0; i < div; ++i) { | |
66 | sum += rc.value_pop(); | |
67 | } | |
68 | c.push( sum); | |
69 | } | |
70 | } | |
71 | ||
b32b8144 FG |
72 | void thread( std::uint32_t cpu_id, std::uint32_t node_id, std::vector< boost::fibers::numa::node > const& topo, barrier * b) { |
73 | boost::fibers::use_scheduling_algorithm< boost::fibers::algo::numa::work_stealing >( cpu_id, node_id, topo); | |
7c673cae FG |
74 | b->wait(); |
75 | lock_type lk( mtx); | |
76 | cnd.wait( lk, [](){ return done; }); | |
77 | BOOST_ASSERT( done); | |
78 | } | |
79 | ||
80 | int main() { | |
81 | try { | |
b32b8144 FG |
82 | std::vector< boost::fibers::numa::node > topo = boost::fibers::numa::topology(); |
83 | auto node = topo[0]; | |
84 | auto main_cpu_id = * node.logical_cpus.begin(); | |
85 | boost::fibers::use_scheduling_algorithm< boost::fibers::algo::numa::work_stealing >( main_cpu_id, node.id, topo); | |
86 | barrier b{ hardware_concurrency( topo) }; | |
87 | std::size_t size{ 1000000 }; | |
7c673cae | 88 | std::size_t div{ 10 }; |
b32b8144 FG |
89 | // Windows 10 and FreeBSD require a fiber stack of 8kb |
90 | // otherwise the stack gets exhausted | |
91 | // stack requirements must be checked for other OS too | |
92 | #if BOOST_OS_WINDOWS || BOOST_OS_BSD | |
93 | allocator_type salloc{ 2*allocator_type::traits_type::page_size() }; | |
94 | #else | |
95 | allocator_type salloc{ allocator_type::traits_type::page_size() }; | |
96 | #endif | |
7c673cae | 97 | std::uint64_t result{ 0 }; |
b32b8144 FG |
98 | channel_type rc{ 2 }; |
99 | std::vector< std::thread > threads; | |
100 | for ( auto & node : topo) { | |
101 | for ( std::uint32_t cpu_id : node.logical_cpus) { | |
102 | // exclude main-thread | |
103 | if ( main_cpu_id != cpu_id) { | |
104 | threads.emplace_back( thread, cpu_id, node.id, std::cref( topo), & b); | |
105 | } | |
106 | } | |
107 | } | |
7c673cae FG |
108 | b.wait(); |
109 | time_point_type start{ clock_type::now() }; | |
110 | skynet( salloc, rc, 0, size, div); | |
111 | result = rc.value_pop(); | |
b32b8144 FG |
112 | if ( 499999500000 != result) { |
113 | throw std::runtime_error("invalid result"); | |
114 | } | |
115 | auto duration = clock_type::now() - start; | |
7c673cae FG |
116 | lock_type lk( mtx); |
117 | done = true; | |
118 | lk.unlock(); | |
119 | cnd.notify_all(); | |
120 | for ( std::thread & t : threads) { | |
121 | t.join(); | |
122 | } | |
b32b8144 | 123 | std::cout << "duration: " << duration.count() / 1000000 << " ms" << std::endl; |
7c673cae FG |
124 | return EXIT_SUCCESS; |
125 | } catch ( std::exception const& e) { | |
126 | std::cerr << "exception: " << e.what() << std::endl; | |
127 | } catch (...) { | |
128 | std::cerr << "unhandled exception" << std::endl; | |
129 | } | |
130 | return EXIT_FAILURE; | |
131 | } |