]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2014 UnitedStack <haomai@unitedstack.com> | |
7 | * | |
8 | * Author: Haomai Wang <haomaiwang@gmail.com> | |
9 | * | |
10 | * This is free software; you can redistribute it and/or | |
11 | * modify it under the terms of the GNU Lesser General Public | |
12 | * License version 2.1, as published by the Free Software | |
13 | * Foundation. See file COPYING. | |
14 | * | |
15 | */ | |
16 | /* Copyright (c) 2011-2014 Stanford University | |
17 | * | |
18 | * Permission to use, copy, modify, and distribute this software for any | |
19 | * purpose with or without fee is hereby granted, provided that the above | |
20 | * copyright notice and this permission notice appear in all copies. | |
21 | * | |
22 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR(S) DISCLAIM ALL WARRANTIES | |
23 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
24 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL AUTHORS BE LIABLE FOR | |
25 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
26 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
27 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
28 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
29 | */ | |
30 | ||
7c673cae | 31 | #include "debug.h" |
7c673cae FG |
32 | #include "Cycles.h" |
33 | ||
34 | double Cycles::cycles_per_sec = 0; | |
35 | ||
36 | /** | |
37 | * Perform once-only overall initialization for the Cycles class, such | |
38 | * as calibrating the clock frequency. This method must be called | |
39 | * before using the Cycles module. | |
40 | * | |
41 | * It is not initialized by default because the timing loops cause | |
42 | * general process startup times to balloon | |
43 | * (http://tracker.ceph.com/issues/15225). | |
44 | */ | |
45 | void Cycles::init() | |
46 | { | |
47 | if (cycles_per_sec != 0) | |
48 | return; | |
49 | ||
50 | // Skip initialization if rtdsc is not implemented | |
51 | if (rdtsc() == 0) | |
52 | return; | |
53 | ||
54 | // Compute the frequency of the fine-grained CPU timer: to do this, | |
55 | // take parallel time readings using both rdtsc and gettimeofday. | |
56 | // After 10ms have elapsed, take the ratio between these readings. | |
57 | ||
58 | struct timeval start_time, stop_time; | |
59 | uint64_t micros; | |
60 | double old_cycles; | |
61 | ||
62 | // There is one tricky aspect, which is that we could get interrupted | |
63 | // between calling gettimeofday and reading the cycle counter, in which | |
64 | // case we won't have corresponding readings. To handle this (unlikely) | |
65 | // case, compute the overall result repeatedly, and wait until we get | |
66 | // two successive calculations that are within 0.1% of each other. | |
67 | old_cycles = 0; | |
68 | while (1) { | |
69 | if (gettimeofday(&start_time, NULL) != 0) { | |
11fdf7f2 | 70 | ceph_abort_msg("couldn't read clock"); |
7c673cae FG |
71 | } |
72 | uint64_t start_cycles = rdtsc(); | |
73 | while (1) { | |
74 | if (gettimeofday(&stop_time, NULL) != 0) { | |
11fdf7f2 | 75 | ceph_abort_msg("couldn't read clock"); |
7c673cae FG |
76 | } |
77 | uint64_t stop_cycles = rdtsc(); | |
78 | micros = (stop_time.tv_usec - start_time.tv_usec) + | |
79 | (stop_time.tv_sec - start_time.tv_sec)*1000000; | |
80 | if (micros > 10000) { | |
81 | cycles_per_sec = static_cast<double>(stop_cycles - start_cycles); | |
82 | cycles_per_sec = 1000000.0*cycles_per_sec/ static_cast<double>(micros); | |
83 | break; | |
84 | } | |
85 | } | |
86 | double delta = cycles_per_sec/1000.0; | |
87 | if ((old_cycles > (cycles_per_sec - delta)) && | |
88 | (old_cycles < (cycles_per_sec + delta))) { | |
89 | return; | |
90 | } | |
91 | old_cycles = cycles_per_sec; | |
92 | } | |
93 | } | |
94 | ||
95 | /** | |
96 | * Return the number of CPU cycles per second. | |
97 | */ | |
98 | double Cycles::per_second() | |
99 | { | |
100 | return get_cycles_per_sec(); | |
101 | } | |
102 | ||
103 | /** | |
104 | * Given an elapsed time measured in cycles, return a floating-point number | |
105 | * giving the corresponding time in seconds. | |
106 | * \param cycles | |
107 | * Difference between the results of two calls to rdtsc. | |
108 | * \param cycles_per_sec | |
109 | * Optional parameter to specify the frequency of the counter that #cycles | |
110 | * was taken from. Useful when converting a remote machine's tick counter | |
111 | * to seconds. The default value of 0 will use the local processor's | |
112 | * computed counter frequency. | |
113 | * \return | |
114 | * The time in seconds corresponding to cycles. | |
115 | */ | |
116 | double Cycles::to_seconds(uint64_t cycles, double cycles_per_sec) | |
117 | { | |
118 | if (cycles_per_sec == 0) | |
119 | cycles_per_sec = get_cycles_per_sec(); | |
120 | return static_cast<double>(cycles)/cycles_per_sec; | |
121 | } | |
122 | ||
123 | /** | |
124 | * Given a time in seconds, return the number of cycles that it | |
125 | * corresponds to. | |
126 | * \param seconds | |
127 | * Time in seconds. | |
128 | * \param cycles_per_sec | |
129 | * Optional parameter to specify the frequency of the counter that #cycles | |
130 | * was taken from. Useful when converting a remote machine's tick counter | |
131 | * to seconds. The default value of 0 will use the local processor's | |
132 | * computed counter frequency. | |
133 | * \return | |
134 | * The approximate number of cycles corresponding to #seconds. | |
135 | */ | |
136 | uint64_t Cycles::from_seconds(double seconds, double cycles_per_sec) | |
137 | { | |
138 | if (cycles_per_sec == 0) | |
139 | cycles_per_sec = get_cycles_per_sec(); | |
140 | return (uint64_t) (seconds*cycles_per_sec + 0.5); | |
141 | } | |
142 | ||
143 | /** | |
144 | * Given an elapsed time measured in cycles, return an integer | |
145 | * giving the corresponding time in microseconds. Note: to_seconds() | |
146 | * is faster than this method. | |
147 | * \param cycles | |
148 | * Difference between the results of two calls to rdtsc. | |
149 | * \param cycles_per_sec | |
150 | * Optional parameter to specify the frequency of the counter that #cycles | |
151 | * was taken from. Useful when converting a remote machine's tick counter | |
152 | * to seconds. The default value of 0 will use the local processor's | |
153 | * computed counter frequency. | |
154 | * \return | |
155 | * The time in microseconds corresponding to cycles (rounded). | |
156 | */ | |
157 | uint64_t Cycles::to_microseconds(uint64_t cycles, double cycles_per_sec) | |
158 | { | |
159 | return to_nanoseconds(cycles, cycles_per_sec) / 1000; | |
160 | } | |
161 | ||
162 | /** | |
163 | * Given an elapsed time measured in cycles, return an integer | |
164 | * giving the corresponding time in nanoseconds. Note: to_seconds() | |
165 | * is faster than this method. | |
166 | * \param cycles | |
167 | * Difference between the results of two calls to rdtsc. | |
168 | * \param cycles_per_sec | |
169 | * Optional parameter to specify the frequency of the counter that #cycles | |
170 | * was taken from. Useful when converting a remote machine's tick counter | |
171 | * to seconds. The default value of 0 will use the local processor's | |
172 | * computed counter frequency. | |
173 | * \return | |
174 | * The time in nanoseconds corresponding to cycles (rounded). | |
175 | */ | |
176 | uint64_t Cycles::to_nanoseconds(uint64_t cycles, double cycles_per_sec) | |
177 | { | |
178 | if (cycles_per_sec == 0) | |
179 | cycles_per_sec = get_cycles_per_sec(); | |
180 | return (uint64_t) (1e09*static_cast<double>(cycles)/cycles_per_sec + 0.5); | |
181 | } | |
182 | ||
183 | /** | |
184 | * Given a number of nanoseconds, return an approximate number of | |
185 | * cycles for an equivalent time length. | |
186 | * \param ns | |
187 | * Number of nanoseconds. | |
188 | * \param cycles_per_sec | |
189 | * Optional parameter to specify the frequency of the counter that #cycles | |
190 | * was taken from. Useful when converting a remote machine's tick counter | |
191 | * to seconds. The default value of 0 will use the local processor's | |
192 | * computed counter frequency. | |
193 | * \return | |
194 | * The approximate number of cycles for the same time length. | |
195 | */ | |
196 | uint64_t | |
197 | Cycles::from_nanoseconds(uint64_t ns, double cycles_per_sec) | |
198 | { | |
199 | if (cycles_per_sec == 0) | |
200 | cycles_per_sec = get_cycles_per_sec(); | |
201 | return (uint64_t) (static_cast<double>(ns)*cycles_per_sec/1e09 + 0.5); | |
202 | } | |
203 | ||
204 | /** | |
205 | * Busy wait for a given number of microseconds. | |
206 | * Callers should use this method in most reasonable cases as opposed to | |
207 | * usleep for accurate measurements. Calling usleep may put the the processor | |
208 | * in a low power mode/sleep state which reduces the clock frequency. | |
209 | * So, each time the process/thread wakes up from usleep, it takes some time | |
210 | * to ramp up to maximum frequency. Thus meausrements often incur higher | |
211 | * latencies. | |
212 | * \param us | |
213 | * Number of microseconds. | |
214 | */ | |
215 | void | |
216 | Cycles::sleep(uint64_t us) | |
217 | { | |
218 | uint64_t stop = Cycles::rdtsc() + Cycles::from_nanoseconds(1000*us); | |
219 | while (Cycles::rdtsc() < stop); | |
220 | } |