]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/dpdk/examples/vm_power_manager/oob_monitor_x86.c
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / dpdk / examples / vm_power_manager / oob_monitor_x86.c
CommitLineData
11fdf7f2
TL
1/* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2018 Intel Corporation
3 */
4
5#include <unistd.h>
6#include <fcntl.h>
7#include <rte_log.h>
8
9#include "oob_monitor.h"
10#include "power_manager.h"
11#include "channel_manager.h"
12
13static volatile unsigned run_loop = 1;
14static uint64_t g_branches, g_branch_misses;
15static int g_active;
16
17void branch_monitor_exit(void)
18{
19 run_loop = 0;
20}
21
22/* Number of microseconds between each poll */
23#define INTERVAL 100
24#define PRINT_LOOP_COUNT (1000000/INTERVAL)
25#define IA32_PERFEVTSEL0 0x186
26#define IA32_PERFEVTSEL1 0x187
27#define IA32_PERFCTR0 0xc1
28#define IA32_PERFCTR1 0xc2
29#define IA32_PERFEVT_BRANCH_HITS 0x05300c4
30#define IA32_PERFEVT_BRANCH_MISS 0x05300c5
31
32static float
33apply_policy(int core)
34{
35 struct core_info *ci;
9f95a23c 36 uint64_t counter = 0;
11fdf7f2 37 uint64_t branches, branch_misses;
9f95a23c
TL
38 uint64_t last_branches, last_branch_misses;
39 int64_t hits_diff, miss_diff;
11fdf7f2
TL
40 float ratio;
41 int ret;
f67539c2 42 int freq_window_idx, up_count = 0, i;
11fdf7f2
TL
43
44 g_active = 0;
45 ci = get_core_info();
46
47 last_branches = ci->cd[core].last_branches;
48 last_branch_misses = ci->cd[core].last_branch_misses;
49
50 ret = pread(ci->cd[core].msr_fd, &counter,
51 sizeof(counter), IA32_PERFCTR0);
52 if (ret < 0)
53 RTE_LOG(ERR, POWER_MANAGER,
54 "unable to read counter for core %u\n",
55 core);
56 branches = counter;
57
9f95a23c 58 counter = 0;
11fdf7f2
TL
59 ret = pread(ci->cd[core].msr_fd, &counter,
60 sizeof(counter), IA32_PERFCTR1);
61 if (ret < 0)
62 RTE_LOG(ERR, POWER_MANAGER,
63 "unable to read counter for core %u\n",
64 core);
65 branch_misses = counter;
66
67
68 ci->cd[core].last_branches = branches;
69 ci->cd[core].last_branch_misses = branch_misses;
70
9f95a23c
TL
71 /*
72 * Intentional right shift to make MSB 0 to avoid
73 * possible signed overflow or truncation.
74 */
75 branches >>= 1;
76 last_branches >>= 1;
77 hits_diff = (int64_t)branches - (int64_t)last_branches;
11fdf7f2
TL
78 if (hits_diff <= 0) {
79 /* Likely a counter overflow condition, skip this round */
80 return -1.0;
81 }
82
9f95a23c
TL
83 /*
84 * Intentional right shift to make MSB 0 to avoid
85 * possible signed overflow or truncation.
86 */
87 branch_misses >>= 1;
88 last_branch_misses >>= 1;
89 miss_diff = (int64_t)branch_misses - (int64_t)last_branch_misses;
11fdf7f2
TL
90 if (miss_diff <= 0) {
91 /* Likely a counter overflow condition, skip this round */
92 return -1.0;
93 }
94
95 g_branches = hits_diff;
96 g_branch_misses = miss_diff;
97
98 if (hits_diff < (INTERVAL*100)) {
99 /* Likely no workload running on this core. Skip. */
100 return -1.0;
101 }
102
103 ratio = (float)miss_diff * (float)100 / (float)hits_diff;
104
f67539c2
TL
105 /*
106 * Store the last few directions that the ratio indicates
107 * we should take. If there's on 'up', then we scale up
108 * quickly. If all indicate 'down', only then do we scale
109 * down. Each core_details struct has it's own array.
110 */
111 freq_window_idx = ci->cd[core].freq_window_idx;
112 if (ratio > ci->branch_ratio_threshold)
113 ci->cd[core].freq_directions[freq_window_idx] = 1;
11fdf7f2 114 else
f67539c2
TL
115 ci->cd[core].freq_directions[freq_window_idx] = 0;
116
117 freq_window_idx++;
118 freq_window_idx = freq_window_idx & (FREQ_WINDOW_SIZE-1);
119 ci->cd[core].freq_window_idx = freq_window_idx;
120
121 up_count = 0;
122 for (i = 0; i < FREQ_WINDOW_SIZE; i++)
123 up_count += ci->cd[core].freq_directions[i];
124
125 if (up_count == 0) {
126 if (ci->cd[core].freq_state != FREQ_MIN) {
127 power_manager_scale_core_min(core);
128 ci->cd[core].freq_state = FREQ_MIN;
129 }
130 } else {
131 if (ci->cd[core].freq_state != FREQ_MAX) {
132 power_manager_scale_core_max(core);
133 ci->cd[core].freq_state = FREQ_MAX;
134 }
135 }
11fdf7f2
TL
136
137 g_active = 1;
138 return ratio;
139}
140
141int
142add_core_to_monitor(int core)
143{
144 struct core_info *ci;
145 char proc_file[UNIX_PATH_MAX];
146 int ret;
147
148 ci = get_core_info();
149
150 if (core < ci->core_count) {
151 long setup;
152
153 snprintf(proc_file, UNIX_PATH_MAX, "/dev/cpu/%d/msr", core);
154 ci->cd[core].msr_fd = open(proc_file, O_RDWR | O_SYNC);
155 if (ci->cd[core].msr_fd < 0) {
156 RTE_LOG(ERR, POWER_MANAGER,
157 "Error opening MSR file for core %d "
158 "(is msr kernel module loaded?)\n",
159 core);
160 return -1;
161 }
162 /*
163 * Set up branch counters
164 */
165 setup = IA32_PERFEVT_BRANCH_HITS;
166 ret = pwrite(ci->cd[core].msr_fd, &setup,
167 sizeof(setup), IA32_PERFEVTSEL0);
168 if (ret < 0) {
169 RTE_LOG(ERR, POWER_MANAGER,
170 "unable to set counter for core %u\n",
171 core);
172 return ret;
173 }
174 setup = IA32_PERFEVT_BRANCH_MISS;
175 ret = pwrite(ci->cd[core].msr_fd, &setup,
176 sizeof(setup), IA32_PERFEVTSEL1);
177 if (ret < 0) {
178 RTE_LOG(ERR, POWER_MANAGER,
179 "unable to set counter for core %u\n",
180 core);
181 return ret;
182 }
183 /*
184 * Close the file and re-open as read only so
185 * as not to hog the resource
186 */
187 close(ci->cd[core].msr_fd);
188 ci->cd[core].msr_fd = open(proc_file, O_RDONLY);
189 if (ci->cd[core].msr_fd < 0) {
190 RTE_LOG(ERR, POWER_MANAGER,
191 "Error opening MSR file for core %d "
192 "(is msr kernel module loaded?)\n",
193 core);
194 return -1;
195 }
196 ci->cd[core].oob_enabled = 1;
197 }
198 return 0;
199}
200
201int
202remove_core_from_monitor(int core)
203{
204 struct core_info *ci;
205 char proc_file[UNIX_PATH_MAX];
206 int ret;
207
208 ci = get_core_info();
209
210 if (ci->cd[core].oob_enabled) {
211 long setup;
212
213 /*
214 * close the msr file, then reopen rw so we can
215 * disable the counters
216 */
217 if (ci->cd[core].msr_fd != 0)
218 close(ci->cd[core].msr_fd);
219 snprintf(proc_file, UNIX_PATH_MAX, "/dev/cpu/%d/msr", core);
220 ci->cd[core].msr_fd = open(proc_file, O_RDWR | O_SYNC);
221 if (ci->cd[core].msr_fd < 0) {
222 RTE_LOG(ERR, POWER_MANAGER,
223 "Error opening MSR file for core %d "
224 "(is msr kernel module loaded?)\n",
225 core);
226 return -1;
227 }
228 setup = 0x0; /* clear event */
229 ret = pwrite(ci->cd[core].msr_fd, &setup,
230 sizeof(setup), IA32_PERFEVTSEL0);
231 if (ret < 0) {
232 RTE_LOG(ERR, POWER_MANAGER,
233 "unable to set counter for core %u\n",
234 core);
235 return ret;
236 }
237 setup = 0x0; /* clear event */
238 ret = pwrite(ci->cd[core].msr_fd, &setup,
239 sizeof(setup), IA32_PERFEVTSEL1);
240 if (ret < 0) {
241 RTE_LOG(ERR, POWER_MANAGER,
242 "unable to set counter for core %u\n",
243 core);
244 return ret;
245 }
246
247 close(ci->cd[core].msr_fd);
248 ci->cd[core].msr_fd = 0;
249 ci->cd[core].oob_enabled = 0;
250 }
251 return 0;
252}
253
254int
255branch_monitor_init(void)
256{
257 return 0;
258}
259
260void
261run_branch_monitor(void)
262{
263 struct core_info *ci;
264 int print = 0;
265 float ratio;
266 int printed;
267 int reads = 0;
268
269 ci = get_core_info();
270
271 while (run_loop) {
272
273 if (!run_loop)
274 break;
275 usleep(INTERVAL);
276 int j;
277 print++;
278 printed = 0;
279 for (j = 0; j < ci->core_count; j++) {
280 if (ci->cd[j].oob_enabled) {
281 ratio = apply_policy(j);
282 if ((print > PRINT_LOOP_COUNT) && (g_active)) {
283 printf(" %d: %.4f {%lu} {%d}", j,
284 ratio, g_branches,
285 reads);
286 printed = 1;
287 reads = 0;
288 } else {
289 reads++;
290 }
291 }
292 }
293 if (print > PRINT_LOOP_COUNT) {
294 if (printed)
295 printf("\n");
296 print = 0;
297 }
298 }
299}