]> git.proxmox.com Git - ceph.git/blob - ceph/src/seastar/dpdk/examples/vm_power_manager/oob_monitor_x86.c
import 15.2.0 Octopus source
[ceph.git] / ceph / src / seastar / dpdk / examples / vm_power_manager / oob_monitor_x86.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2018 Intel Corporation
3 */
4
5 #include <unistd.h>
6 #include <fcntl.h>
7 #include <rte_log.h>
8
9 #include "oob_monitor.h"
10 #include "power_manager.h"
11 #include "channel_manager.h"
12
13 static volatile unsigned run_loop = 1;
14 static uint64_t g_branches, g_branch_misses;
15 static int g_active;
16
17 void branch_monitor_exit(void)
18 {
19 run_loop = 0;
20 }
21
22 /* Number of microseconds between each poll */
23 #define INTERVAL 100
24 #define PRINT_LOOP_COUNT (1000000/INTERVAL)
25 #define IA32_PERFEVTSEL0 0x186
26 #define IA32_PERFEVTSEL1 0x187
27 #define IA32_PERFCTR0 0xc1
28 #define IA32_PERFCTR1 0xc2
29 #define IA32_PERFEVT_BRANCH_HITS 0x05300c4
30 #define IA32_PERFEVT_BRANCH_MISS 0x05300c5
31
32 static float
33 apply_policy(int core)
34 {
35 struct core_info *ci;
36 uint64_t counter = 0;
37 uint64_t branches, branch_misses;
38 uint64_t last_branches, last_branch_misses;
39 int64_t hits_diff, miss_diff;
40 float ratio;
41 int ret;
42
43 g_active = 0;
44 ci = get_core_info();
45
46 last_branches = ci->cd[core].last_branches;
47 last_branch_misses = ci->cd[core].last_branch_misses;
48
49 ret = pread(ci->cd[core].msr_fd, &counter,
50 sizeof(counter), IA32_PERFCTR0);
51 if (ret < 0)
52 RTE_LOG(ERR, POWER_MANAGER,
53 "unable to read counter for core %u\n",
54 core);
55 branches = counter;
56
57 counter = 0;
58 ret = pread(ci->cd[core].msr_fd, &counter,
59 sizeof(counter), IA32_PERFCTR1);
60 if (ret < 0)
61 RTE_LOG(ERR, POWER_MANAGER,
62 "unable to read counter for core %u\n",
63 core);
64 branch_misses = counter;
65
66
67 ci->cd[core].last_branches = branches;
68 ci->cd[core].last_branch_misses = branch_misses;
69
70 /*
71 * Intentional right shift to make MSB 0 to avoid
72 * possible signed overflow or truncation.
73 */
74 branches >>= 1;
75 last_branches >>= 1;
76 hits_diff = (int64_t)branches - (int64_t)last_branches;
77 if (hits_diff <= 0) {
78 /* Likely a counter overflow condition, skip this round */
79 return -1.0;
80 }
81
82 /*
83 * Intentional right shift to make MSB 0 to avoid
84 * possible signed overflow or truncation.
85 */
86 branch_misses >>= 1;
87 last_branch_misses >>= 1;
88 miss_diff = (int64_t)branch_misses - (int64_t)last_branch_misses;
89 if (miss_diff <= 0) {
90 /* Likely a counter overflow condition, skip this round */
91 return -1.0;
92 }
93
94 g_branches = hits_diff;
95 g_branch_misses = miss_diff;
96
97 if (hits_diff < (INTERVAL*100)) {
98 /* Likely no workload running on this core. Skip. */
99 return -1.0;
100 }
101
102 ratio = (float)miss_diff * (float)100 / (float)hits_diff;
103
104 if (ratio < ci->branch_ratio_threshold)
105 power_manager_scale_core_min(core);
106 else
107 power_manager_scale_core_max(core);
108
109 g_active = 1;
110 return ratio;
111 }
112
113 int
114 add_core_to_monitor(int core)
115 {
116 struct core_info *ci;
117 char proc_file[UNIX_PATH_MAX];
118 int ret;
119
120 ci = get_core_info();
121
122 if (core < ci->core_count) {
123 long setup;
124
125 snprintf(proc_file, UNIX_PATH_MAX, "/dev/cpu/%d/msr", core);
126 ci->cd[core].msr_fd = open(proc_file, O_RDWR | O_SYNC);
127 if (ci->cd[core].msr_fd < 0) {
128 RTE_LOG(ERR, POWER_MANAGER,
129 "Error opening MSR file for core %d "
130 "(is msr kernel module loaded?)\n",
131 core);
132 return -1;
133 }
134 /*
135 * Set up branch counters
136 */
137 setup = IA32_PERFEVT_BRANCH_HITS;
138 ret = pwrite(ci->cd[core].msr_fd, &setup,
139 sizeof(setup), IA32_PERFEVTSEL0);
140 if (ret < 0) {
141 RTE_LOG(ERR, POWER_MANAGER,
142 "unable to set counter for core %u\n",
143 core);
144 return ret;
145 }
146 setup = IA32_PERFEVT_BRANCH_MISS;
147 ret = pwrite(ci->cd[core].msr_fd, &setup,
148 sizeof(setup), IA32_PERFEVTSEL1);
149 if (ret < 0) {
150 RTE_LOG(ERR, POWER_MANAGER,
151 "unable to set counter for core %u\n",
152 core);
153 return ret;
154 }
155 /*
156 * Close the file and re-open as read only so
157 * as not to hog the resource
158 */
159 close(ci->cd[core].msr_fd);
160 ci->cd[core].msr_fd = open(proc_file, O_RDONLY);
161 if (ci->cd[core].msr_fd < 0) {
162 RTE_LOG(ERR, POWER_MANAGER,
163 "Error opening MSR file for core %d "
164 "(is msr kernel module loaded?)\n",
165 core);
166 return -1;
167 }
168 ci->cd[core].oob_enabled = 1;
169 }
170 return 0;
171 }
172
173 int
174 remove_core_from_monitor(int core)
175 {
176 struct core_info *ci;
177 char proc_file[UNIX_PATH_MAX];
178 int ret;
179
180 ci = get_core_info();
181
182 if (ci->cd[core].oob_enabled) {
183 long setup;
184
185 /*
186 * close the msr file, then reopen rw so we can
187 * disable the counters
188 */
189 if (ci->cd[core].msr_fd != 0)
190 close(ci->cd[core].msr_fd);
191 snprintf(proc_file, UNIX_PATH_MAX, "/dev/cpu/%d/msr", core);
192 ci->cd[core].msr_fd = open(proc_file, O_RDWR | O_SYNC);
193 if (ci->cd[core].msr_fd < 0) {
194 RTE_LOG(ERR, POWER_MANAGER,
195 "Error opening MSR file for core %d "
196 "(is msr kernel module loaded?)\n",
197 core);
198 return -1;
199 }
200 setup = 0x0; /* clear event */
201 ret = pwrite(ci->cd[core].msr_fd, &setup,
202 sizeof(setup), IA32_PERFEVTSEL0);
203 if (ret < 0) {
204 RTE_LOG(ERR, POWER_MANAGER,
205 "unable to set counter for core %u\n",
206 core);
207 return ret;
208 }
209 setup = 0x0; /* clear event */
210 ret = pwrite(ci->cd[core].msr_fd, &setup,
211 sizeof(setup), IA32_PERFEVTSEL1);
212 if (ret < 0) {
213 RTE_LOG(ERR, POWER_MANAGER,
214 "unable to set counter for core %u\n",
215 core);
216 return ret;
217 }
218
219 close(ci->cd[core].msr_fd);
220 ci->cd[core].msr_fd = 0;
221 ci->cd[core].oob_enabled = 0;
222 }
223 return 0;
224 }
225
226 int
227 branch_monitor_init(void)
228 {
229 return 0;
230 }
231
232 void
233 run_branch_monitor(void)
234 {
235 struct core_info *ci;
236 int print = 0;
237 float ratio;
238 int printed;
239 int reads = 0;
240
241 ci = get_core_info();
242
243 while (run_loop) {
244
245 if (!run_loop)
246 break;
247 usleep(INTERVAL);
248 int j;
249 print++;
250 printed = 0;
251 for (j = 0; j < ci->core_count; j++) {
252 if (ci->cd[j].oob_enabled) {
253 ratio = apply_policy(j);
254 if ((print > PRINT_LOOP_COUNT) && (g_active)) {
255 printf(" %d: %.4f {%lu} {%d}", j,
256 ratio, g_branches,
257 reads);
258 printed = 1;
259 reads = 0;
260 } else {
261 reads++;
262 }
263 }
264 }
265 if (print > PRINT_LOOP_COUNT) {
266 if (printed)
267 printf("\n");
268 print = 0;
269 }
270 }
271 }