]>
Commit | Line | Data |
---|---|---|
40e983cc SP |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * DAMON-based LRU-lists Sorting | |
4 | * | |
5 | * Author: SeongJae Park <sj@kernel.org> | |
6 | */ | |
7 | ||
8 | #define pr_fmt(fmt) "damon-lru-sort: " fmt | |
9 | ||
10 | #include <linux/damon.h> | |
11 | #include <linux/ioport.h> | |
12 | #include <linux/module.h> | |
13 | #include <linux/sched.h> | |
14 | #include <linux/workqueue.h> | |
15 | ||
16 | #ifdef MODULE_PARAM_PREFIX | |
17 | #undef MODULE_PARAM_PREFIX | |
18 | #endif | |
19 | #define MODULE_PARAM_PREFIX "damon_lru_sort." | |
20 | ||
21 | /* | |
22 | * Enable or disable DAMON_LRU_SORT. | |
23 | * | |
24 | * You can enable DAMON_LRU_SORT by setting the value of this parameter as | |
25 | * ``Y``. Setting it as ``N`` disables DAMON_LRU_SORT. Note that | |
26 | * DAMON_LRU_SORT could do no real monitoring and LRU-lists sorting due to the | |
27 | * watermarks-based activation condition. Refer to below descriptions for the | |
28 | * watermarks parameter for this. | |
29 | */ | |
30 | static bool enabled __read_mostly; | |
31 | ||
32 | /* | |
33 | * Make DAMON_LRU_SORT reads the input parameters again, except ``enabled``. | |
34 | * | |
35 | * Input parameters that updated while DAMON_LRU_SORT is running are not | |
36 | * applied by default. Once this parameter is set as ``Y``, DAMON_LRU_SORT | |
37 | * reads values of parametrs except ``enabled`` again. Once the re-reading is | |
38 | * done, this parameter is set as ``N``. If invalid parameters are found while | |
39 | * the re-reading, DAMON_LRU_SORT will be disabled. | |
40 | */ | |
41 | static bool commit_inputs __read_mostly; | |
42 | module_param(commit_inputs, bool, 0600); | |
43 | ||
44 | /* | |
45 | * Access frequency threshold for hot memory regions identification in permil. | |
46 | * | |
47 | * If a memory region is accessed in frequency of this or higher, | |
48 | * DAMON_LRU_SORT identifies the region as hot, and mark it as accessed on the | |
49 | * LRU list, so that it could not be reclaimed under memory pressure. 50% by | |
50 | * default. | |
51 | */ | |
52 | static unsigned long hot_thres_access_freq = 500; | |
53 | module_param(hot_thres_access_freq, ulong, 0600); | |
54 | ||
55 | /* | |
56 | * Time threshold for cold memory regions identification in microseconds. | |
57 | * | |
58 | * If a memory region is not accessed for this or longer time, DAMON_LRU_SORT | |
59 | * identifies the region as cold, and mark it as unaccessed on the LRU list, so | |
60 | * that it could be reclaimed first under memory pressure. 120 seconds by | |
61 | * default. | |
62 | */ | |
63 | static unsigned long cold_min_age __read_mostly = 120000000; | |
64 | module_param(cold_min_age, ulong, 0600); | |
65 | ||
66 | /* | |
67 | * Limit of time for trying the LRU lists sorting in milliseconds. | |
68 | * | |
69 | * DAMON_LRU_SORT tries to use only up to this time within a time window | |
70 | * (quota_reset_interval_ms) for trying LRU lists sorting. This can be used | |
71 | * for limiting CPU consumption of DAMON_LRU_SORT. If the value is zero, the | |
72 | * limit is disabled. | |
73 | * | |
74 | * 10 ms by default. | |
75 | */ | |
76 | static unsigned long quota_ms __read_mostly = 10; | |
77 | module_param(quota_ms, ulong, 0600); | |
78 | ||
79 | /* | |
80 | * The time quota charge reset interval in milliseconds. | |
81 | * | |
82 | * The charge reset interval for the quota of time (quota_ms). That is, | |
83 | * DAMON_LRU_SORT does not try LRU-lists sorting for more than quota_ms | |
84 | * milliseconds or quota_sz bytes within quota_reset_interval_ms milliseconds. | |
85 | * | |
86 | * 1 second by default. | |
87 | */ | |
88 | static unsigned long quota_reset_interval_ms __read_mostly = 1000; | |
89 | module_param(quota_reset_interval_ms, ulong, 0600); | |
90 | ||
91 | /* | |
92 | * The watermarks check time interval in microseconds. | |
93 | * | |
94 | * Minimal time to wait before checking the watermarks, when DAMON_LRU_SORT is | |
95 | * enabled but inactive due to its watermarks rule. 5 seconds by default. | |
96 | */ | |
97 | static unsigned long wmarks_interval __read_mostly = 5000000; | |
98 | module_param(wmarks_interval, ulong, 0600); | |
99 | ||
100 | /* | |
101 | * Free memory rate (per thousand) for the high watermark. | |
102 | * | |
103 | * If free memory of the system in bytes per thousand bytes is higher than | |
104 | * this, DAMON_LRU_SORT becomes inactive, so it does nothing but periodically | |
105 | * checks the watermarks. 200 (20%) by default. | |
106 | */ | |
107 | static unsigned long wmarks_high __read_mostly = 200; | |
108 | module_param(wmarks_high, ulong, 0600); | |
109 | ||
110 | /* | |
111 | * Free memory rate (per thousand) for the middle watermark. | |
112 | * | |
113 | * If free memory of the system in bytes per thousand bytes is between this and | |
114 | * the low watermark, DAMON_LRU_SORT becomes active, so starts the monitoring | |
115 | * and the LRU-lists sorting. 150 (15%) by default. | |
116 | */ | |
117 | static unsigned long wmarks_mid __read_mostly = 150; | |
118 | module_param(wmarks_mid, ulong, 0600); | |
119 | ||
120 | /* | |
121 | * Free memory rate (per thousand) for the low watermark. | |
122 | * | |
123 | * If free memory of the system in bytes per thousand bytes is lower than this, | |
124 | * DAMON_LRU_SORT becomes inactive, so it does nothing but periodically checks | |
125 | * the watermarks. 50 (5%) by default. | |
126 | */ | |
127 | static unsigned long wmarks_low __read_mostly = 50; | |
128 | module_param(wmarks_low, ulong, 0600); | |
129 | ||
130 | /* | |
131 | * Sampling interval for the monitoring in microseconds. | |
132 | * | |
133 | * The sampling interval of DAMON for the hot/cold memory monitoring. Please | |
134 | * refer to the DAMON documentation for more detail. 5 ms by default. | |
135 | */ | |
136 | static unsigned long sample_interval __read_mostly = 5000; | |
137 | module_param(sample_interval, ulong, 0600); | |
138 | ||
139 | /* | |
140 | * Aggregation interval for the monitoring in microseconds. | |
141 | * | |
142 | * The aggregation interval of DAMON for the hot/cold memory monitoring. | |
143 | * Please refer to the DAMON documentation for more detail. 100 ms by default. | |
144 | */ | |
145 | static unsigned long aggr_interval __read_mostly = 100000; | |
146 | module_param(aggr_interval, ulong, 0600); | |
147 | ||
148 | /* | |
149 | * Minimum number of monitoring regions. | |
150 | * | |
151 | * The minimal number of monitoring regions of DAMON for the hot/cold memory | |
152 | * monitoring. This can be used to set lower-bound of the monitoring quality. | |
153 | * But, setting this too high could result in increased monitoring overhead. | |
154 | * Please refer to the DAMON documentation for more detail. 10 by default. | |
155 | */ | |
156 | static unsigned long min_nr_regions __read_mostly = 10; | |
157 | module_param(min_nr_regions, ulong, 0600); | |
158 | ||
159 | /* | |
160 | * Maximum number of monitoring regions. | |
161 | * | |
162 | * The maximum number of monitoring regions of DAMON for the hot/cold memory | |
163 | * monitoring. This can be used to set upper-bound of the monitoring overhead. | |
164 | * However, setting this too low could result in bad monitoring quality. | |
165 | * Please refer to the DAMON documentation for more detail. 1000 by default. | |
166 | */ | |
167 | static unsigned long max_nr_regions __read_mostly = 1000; | |
168 | module_param(max_nr_regions, ulong, 0600); | |
169 | ||
170 | /* | |
171 | * Start of the target memory region in physical address. | |
172 | * | |
173 | * The start physical address of memory region that DAMON_LRU_SORT will do work | |
174 | * against. By default, biggest System RAM is used as the region. | |
175 | */ | |
176 | static unsigned long monitor_region_start __read_mostly; | |
177 | module_param(monitor_region_start, ulong, 0600); | |
178 | ||
179 | /* | |
180 | * End of the target memory region in physical address. | |
181 | * | |
182 | * The end physical address of memory region that DAMON_LRU_SORT will do work | |
183 | * against. By default, biggest System RAM is used as the region. | |
184 | */ | |
185 | static unsigned long monitor_region_end __read_mostly; | |
186 | module_param(monitor_region_end, ulong, 0600); | |
187 | ||
188 | /* | |
189 | * PID of the DAMON thread | |
190 | * | |
191 | * If DAMON_LRU_SORT is enabled, this becomes the PID of the worker thread. | |
192 | * Else, -1. | |
193 | */ | |
194 | static int kdamond_pid __read_mostly = -1; | |
195 | module_param(kdamond_pid, int, 0400); | |
196 | ||
197 | /* | |
198 | * Number of hot memory regions that tried to be LRU-sorted. | |
199 | */ | |
200 | static unsigned long nr_lru_sort_tried_hot_regions __read_mostly; | |
201 | module_param(nr_lru_sort_tried_hot_regions, ulong, 0400); | |
202 | ||
203 | /* | |
204 | * Total bytes of hot memory regions that tried to be LRU-sorted. | |
205 | */ | |
206 | static unsigned long bytes_lru_sort_tried_hot_regions __read_mostly; | |
207 | module_param(bytes_lru_sort_tried_hot_regions, ulong, 0400); | |
208 | ||
209 | /* | |
210 | * Number of hot memory regions that successfully be LRU-sorted. | |
211 | */ | |
212 | static unsigned long nr_lru_sorted_hot_regions __read_mostly; | |
213 | module_param(nr_lru_sorted_hot_regions, ulong, 0400); | |
214 | ||
215 | /* | |
216 | * Total bytes of hot memory regions that successfully be LRU-sorted. | |
217 | */ | |
218 | static unsigned long bytes_lru_sorted_hot_regions __read_mostly; | |
219 | module_param(bytes_lru_sorted_hot_regions, ulong, 0400); | |
220 | ||
221 | /* | |
222 | * Number of times that the time quota limit for hot regions have exceeded | |
223 | */ | |
224 | static unsigned long nr_hot_quota_exceeds __read_mostly; | |
225 | module_param(nr_hot_quota_exceeds, ulong, 0400); | |
226 | ||
227 | /* | |
228 | * Number of cold memory regions that tried to be LRU-sorted. | |
229 | */ | |
230 | static unsigned long nr_lru_sort_tried_cold_regions __read_mostly; | |
231 | module_param(nr_lru_sort_tried_cold_regions, ulong, 0400); | |
232 | ||
233 | /* | |
234 | * Total bytes of cold memory regions that tried to be LRU-sorted. | |
235 | */ | |
236 | static unsigned long bytes_lru_sort_tried_cold_regions __read_mostly; | |
237 | module_param(bytes_lru_sort_tried_cold_regions, ulong, 0400); | |
238 | ||
239 | /* | |
240 | * Number of cold memory regions that successfully be LRU-sorted. | |
241 | */ | |
242 | static unsigned long nr_lru_sorted_cold_regions __read_mostly; | |
243 | module_param(nr_lru_sorted_cold_regions, ulong, 0400); | |
244 | ||
245 | /* | |
246 | * Total bytes of cold memory regions that successfully be LRU-sorted. | |
247 | */ | |
248 | static unsigned long bytes_lru_sorted_cold_regions __read_mostly; | |
249 | module_param(bytes_lru_sorted_cold_regions, ulong, 0400); | |
250 | ||
251 | /* | |
252 | * Number of times that the time quota limit for cold regions have exceeded | |
253 | */ | |
254 | static unsigned long nr_cold_quota_exceeds __read_mostly; | |
255 | module_param(nr_cold_quota_exceeds, ulong, 0400); | |
256 | ||
257 | static struct damon_ctx *ctx; | |
258 | static struct damon_target *target; | |
259 | ||
260 | struct damon_lru_sort_ram_walk_arg { | |
261 | unsigned long start; | |
262 | unsigned long end; | |
263 | }; | |
264 | ||
265 | static int walk_system_ram(struct resource *res, void *arg) | |
266 | { | |
267 | struct damon_lru_sort_ram_walk_arg *a = arg; | |
268 | ||
269 | if (a->end - a->start < resource_size(res)) { | |
270 | a->start = res->start; | |
271 | a->end = res->end; | |
272 | } | |
273 | return 0; | |
274 | } | |
275 | ||
276 | /* | |
277 | * Find biggest 'System RAM' resource and store its start and end address in | |
278 | * @start and @end, respectively. If no System RAM is found, returns false. | |
279 | */ | |
280 | static bool get_monitoring_region(unsigned long *start, unsigned long *end) | |
281 | { | |
282 | struct damon_lru_sort_ram_walk_arg arg = {}; | |
283 | ||
284 | walk_system_ram_res(0, ULONG_MAX, &arg, walk_system_ram); | |
285 | if (arg.end <= arg.start) | |
286 | return false; | |
287 | ||
288 | *start = arg.start; | |
289 | *end = arg.end; | |
290 | return true; | |
291 | } | |
292 | ||
293 | /* Create a DAMON-based operation scheme for hot memory regions */ | |
294 | static struct damos *damon_lru_sort_new_hot_scheme(unsigned int hot_thres) | |
295 | { | |
296 | struct damos_watermarks wmarks = { | |
297 | .metric = DAMOS_WMARK_FREE_MEM_RATE, | |
298 | .interval = wmarks_interval, | |
299 | .high = wmarks_high, | |
300 | .mid = wmarks_mid, | |
301 | .low = wmarks_low, | |
302 | }; | |
303 | struct damos_quota quota = { | |
304 | /* | |
305 | * Do not try LRU-lists sorting of hot pages for more than half | |
306 | * of quota_ms milliseconds within quota_reset_interval_ms. | |
307 | */ | |
308 | .ms = quota_ms / 2, | |
309 | .sz = 0, | |
310 | .reset_interval = quota_reset_interval_ms, | |
311 | /* Within the quota, mark hotter regions accessed first. */ | |
312 | .weight_sz = 0, | |
313 | .weight_nr_accesses = 1, | |
314 | .weight_age = 0, | |
315 | }; | |
316 | struct damos *scheme = damon_new_scheme( | |
317 | /* Find regions having PAGE_SIZE or larger size */ | |
318 | PAGE_SIZE, ULONG_MAX, | |
319 | /* and accessed for more than the threshold */ | |
320 | hot_thres, UINT_MAX, | |
321 | /* no matter its age */ | |
322 | 0, UINT_MAX, | |
323 | /* prioritize those on LRU lists, as soon as found */ | |
324 | DAMOS_LRU_PRIO, | |
325 | /* under the quota. */ | |
326 | "a, | |
327 | /* (De)activate this according to the watermarks. */ | |
328 | &wmarks); | |
329 | ||
330 | return scheme; | |
331 | } | |
332 | ||
333 | /* Create a DAMON-based operation scheme for cold memory regions */ | |
334 | static struct damos *damon_lru_sort_new_cold_scheme(unsigned int cold_thres) | |
335 | { | |
336 | struct damos_watermarks wmarks = { | |
337 | .metric = DAMOS_WMARK_FREE_MEM_RATE, | |
338 | .interval = wmarks_interval, | |
339 | .high = wmarks_high, | |
340 | .mid = wmarks_mid, | |
341 | .low = wmarks_low, | |
342 | }; | |
343 | struct damos_quota quota = { | |
344 | /* | |
345 | * Do not try LRU-lists sorting of cold pages for more than | |
346 | * half of quota_ms milliseconds within | |
347 | * quota_reset_interval_ms. | |
348 | */ | |
349 | .ms = quota_ms / 2, | |
350 | .sz = 0, | |
351 | .reset_interval = quota_reset_interval_ms, | |
352 | /* Within the quota, mark colder regions not accessed first. */ | |
353 | .weight_sz = 0, | |
354 | .weight_nr_accesses = 0, | |
355 | .weight_age = 1, | |
356 | }; | |
357 | struct damos *scheme = damon_new_scheme( | |
358 | /* Find regions having PAGE_SIZE or larger size */ | |
359 | PAGE_SIZE, ULONG_MAX, | |
360 | /* and not accessed at all */ | |
361 | 0, 0, | |
362 | /* for cold_thres or more micro-seconds, and */ | |
363 | cold_thres, UINT_MAX, | |
364 | /* mark those as not accessed, as soon as found */ | |
365 | DAMOS_LRU_DEPRIO, | |
366 | /* under the quota. */ | |
367 | "a, | |
368 | /* (De)activate this according to the watermarks. */ | |
369 | &wmarks); | |
370 | ||
371 | return scheme; | |
372 | } | |
373 | ||
374 | static int damon_lru_sort_apply_parameters(void) | |
375 | { | |
376 | struct damos *scheme, *next_scheme; | |
377 | struct damon_addr_range addr_range; | |
378 | unsigned int hot_thres, cold_thres; | |
379 | int err = 0; | |
380 | ||
381 | err = damon_set_attrs(ctx, sample_interval, aggr_interval, 0, | |
382 | min_nr_regions, max_nr_regions); | |
383 | if (err) | |
384 | return err; | |
385 | ||
386 | /* free previously set schemes */ | |
387 | damon_for_each_scheme_safe(scheme, next_scheme, ctx) | |
388 | damon_destroy_scheme(scheme); | |
389 | ||
390 | /* aggr_interval / sample_interval is the maximum nr_accesses */ | |
391 | hot_thres = aggr_interval / sample_interval * hot_thres_access_freq / | |
392 | 1000; | |
393 | scheme = damon_lru_sort_new_hot_scheme(hot_thres); | |
394 | if (!scheme) | |
395 | return -ENOMEM; | |
396 | damon_add_scheme(ctx, scheme); | |
397 | ||
398 | cold_thres = cold_min_age / aggr_interval; | |
399 | scheme = damon_lru_sort_new_cold_scheme(cold_thres); | |
400 | if (!scheme) | |
401 | return -ENOMEM; | |
402 | damon_add_scheme(ctx, scheme); | |
403 | ||
404 | if (monitor_region_start > monitor_region_end) | |
405 | return -EINVAL; | |
406 | if (!monitor_region_start && !monitor_region_end && | |
407 | !get_monitoring_region(&monitor_region_start, | |
408 | &monitor_region_end)) | |
409 | return -EINVAL; | |
410 | addr_range.start = monitor_region_start; | |
411 | addr_range.end = monitor_region_end; | |
412 | return damon_set_regions(target, &addr_range, 1); | |
413 | } | |
414 | ||
415 | static int damon_lru_sort_turn(bool on) | |
416 | { | |
417 | int err; | |
418 | ||
419 | if (!on) { | |
420 | err = damon_stop(&ctx, 1); | |
421 | if (!err) | |
422 | kdamond_pid = -1; | |
423 | return err; | |
424 | } | |
425 | ||
426 | err = damon_lru_sort_apply_parameters(); | |
427 | if (err) | |
428 | return err; | |
429 | ||
430 | err = damon_start(&ctx, 1, true); | |
431 | if (err) | |
432 | return err; | |
433 | kdamond_pid = ctx->kdamond->pid; | |
434 | return 0; | |
435 | } | |
436 | ||
437 | static struct delayed_work damon_lru_sort_timer; | |
438 | static void damon_lru_sort_timer_fn(struct work_struct *work) | |
439 | { | |
440 | static bool last_enabled; | |
441 | bool now_enabled; | |
442 | ||
443 | now_enabled = enabled; | |
444 | if (last_enabled != now_enabled) { | |
445 | if (!damon_lru_sort_turn(now_enabled)) | |
446 | last_enabled = now_enabled; | |
447 | else | |
448 | enabled = last_enabled; | |
449 | } | |
450 | } | |
451 | static DECLARE_DELAYED_WORK(damon_lru_sort_timer, damon_lru_sort_timer_fn); | |
452 | ||
453 | static bool damon_lru_sort_initialized; | |
454 | ||
455 | static int damon_lru_sort_enabled_store(const char *val, | |
456 | const struct kernel_param *kp) | |
457 | { | |
458 | int rc = param_set_bool(val, kp); | |
459 | ||
460 | if (rc < 0) | |
461 | return rc; | |
462 | ||
463 | if (!damon_lru_sort_initialized) | |
464 | return rc; | |
465 | ||
466 | schedule_delayed_work(&damon_lru_sort_timer, 0); | |
467 | ||
468 | return 0; | |
469 | } | |
470 | ||
471 | static const struct kernel_param_ops enabled_param_ops = { | |
472 | .set = damon_lru_sort_enabled_store, | |
473 | .get = param_get_bool, | |
474 | }; | |
475 | ||
476 | module_param_cb(enabled, &enabled_param_ops, &enabled, 0600); | |
477 | MODULE_PARM_DESC(enabled, | |
478 | "Enable or disable DAMON_LRU_SORT (default: disabled)"); | |
479 | ||
480 | static int damon_lru_sort_handle_commit_inputs(void) | |
481 | { | |
482 | int err; | |
483 | ||
484 | if (!commit_inputs) | |
485 | return 0; | |
486 | ||
487 | err = damon_lru_sort_apply_parameters(); | |
488 | commit_inputs = false; | |
489 | return err; | |
490 | } | |
491 | ||
492 | static int damon_lru_sort_after_aggregation(struct damon_ctx *c) | |
493 | { | |
494 | struct damos *s; | |
495 | ||
496 | /* update the stats parameter */ | |
497 | damon_for_each_scheme(s, c) { | |
498 | if (s->action == DAMOS_LRU_PRIO) { | |
499 | nr_lru_sort_tried_hot_regions = s->stat.nr_tried; | |
500 | bytes_lru_sort_tried_hot_regions = s->stat.sz_tried; | |
501 | nr_lru_sorted_hot_regions = s->stat.nr_applied; | |
502 | bytes_lru_sorted_hot_regions = s->stat.sz_applied; | |
503 | nr_hot_quota_exceeds = s->stat.qt_exceeds; | |
504 | } else if (s->action == DAMOS_LRU_DEPRIO) { | |
505 | nr_lru_sort_tried_cold_regions = s->stat.nr_tried; | |
506 | bytes_lru_sort_tried_cold_regions = s->stat.sz_tried; | |
507 | nr_lru_sorted_cold_regions = s->stat.nr_applied; | |
508 | bytes_lru_sorted_cold_regions = s->stat.sz_applied; | |
509 | nr_cold_quota_exceeds = s->stat.qt_exceeds; | |
510 | } | |
511 | } | |
512 | ||
513 | return damon_lru_sort_handle_commit_inputs(); | |
514 | } | |
515 | ||
516 | static int damon_lru_sort_after_wmarks_check(struct damon_ctx *c) | |
517 | { | |
518 | return damon_lru_sort_handle_commit_inputs(); | |
519 | } | |
520 | ||
521 | static int __init damon_lru_sort_init(void) | |
522 | { | |
523 | ctx = damon_new_ctx(); | |
524 | if (!ctx) | |
525 | return -ENOMEM; | |
526 | ||
ec1658f0 SP |
527 | if (damon_select_ops(ctx, DAMON_OPS_PADDR)) { |
528 | damon_destroy_ctx(ctx); | |
40e983cc | 529 | return -EINVAL; |
ec1658f0 | 530 | } |
40e983cc SP |
531 | |
532 | ctx->callback.after_wmarks_check = damon_lru_sort_after_wmarks_check; | |
533 | ctx->callback.after_aggregation = damon_lru_sort_after_aggregation; | |
534 | ||
535 | target = damon_new_target(); | |
536 | if (!target) { | |
537 | damon_destroy_ctx(ctx); | |
538 | return -ENOMEM; | |
539 | } | |
540 | damon_add_target(ctx, target); | |
541 | ||
542 | schedule_delayed_work(&damon_lru_sort_timer, 0); | |
543 | ||
544 | damon_lru_sort_initialized = true; | |
545 | return 0; | |
546 | } | |
547 | ||
548 | module_init(damon_lru_sort_init); |