[mirror_ubuntu-kernels.git] / mm / damon / lru_sort.c

// SPDX-License-Identifier: GPL-2.0
/*
 * DAMON-based LRU-lists Sorting
 *
 * Author: SeongJae Park <sj@kernel.org>
 */

#define pr_fmt(fmt) "damon-lru-sort: " fmt

#include <linux/damon.h>
#include <linux/ioport.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/workqueue.h>

#ifdef MODULE_PARAM_PREFIX
#undef MODULE_PARAM_PREFIX
#endif
#define MODULE_PARAM_PREFIX "damon_lru_sort."

/*
 * Enable or disable DAMON_LRU_SORT.
 *
 * You can enable DAMON_LRU_SORT by setting the value of this parameter as
 * ``Y``.  Setting it as ``N`` disables DAMON_LRU_SORT.  Note that
 * DAMON_LRU_SORT could do no real monitoring and LRU-lists sorting due to the
 * watermarks-based activation condition.  Refer to below descriptions for the
 * watermarks parameter for this.
 */
static bool enabled __read_mostly;

/*
 * Make DAMON_LRU_SORT reads the input parameters again, except ``enabled``.
 *
 * Input parameters that updated while DAMON_LRU_SORT is running are not
 * applied by default.  Once this parameter is set as ``Y``, DAMON_LRU_SORT
 * reads values of parametrs except ``enabled`` again.  Once the re-reading is
 * done, this parameter is set as ``N``.  If invalid parameters are found while
 * the re-reading, DAMON_LRU_SORT will be disabled.
 */
static bool commit_inputs __read_mostly;
module_param(commit_inputs, bool, 0600);

/*
 * Access frequency threshold for hot memory regions identification in permil.
 *
 * If a memory region is accessed in frequency of this or higher,
 * DAMON_LRU_SORT identifies the region as hot, and mark it as accessed on the
 * LRU list, so that it could not be reclaimed under memory pressure.  50% by
 * default.
 */
static unsigned long hot_thres_access_freq = 500;
module_param(hot_thres_access_freq, ulong, 0600);

/*
 * Time threshold for cold memory regions identification in microseconds.
 *
 * If a memory region is not accessed for this or longer time, DAMON_LRU_SORT
 * identifies the region as cold, and mark it as unaccessed on the LRU list, so
 * that it could be reclaimed first under memory pressure.  120 seconds by
 * default.
 */
static unsigned long cold_min_age __read_mostly = 120000000;
module_param(cold_min_age, ulong, 0600);

/*
 * Limit of time for trying the LRU lists sorting in milliseconds.
 *
 * DAMON_LRU_SORT tries to use only up to this time within a time window
 * (quota_reset_interval_ms) for trying LRU lists sorting.  This can be used
 * for limiting CPU consumption of DAMON_LRU_SORT.  If the value is zero, the
 * limit is disabled.
 *
 * 10 ms by default.
 */
static unsigned long quota_ms __read_mostly = 10;
module_param(quota_ms, ulong, 0600);

/*
 * The time quota charge reset interval in milliseconds.
 *
 * The charge reset interval for the quota of time (quota_ms).  That is,
 * DAMON_LRU_SORT does not try LRU-lists sorting for more than quota_ms
 * milliseconds or quota_sz bytes within quota_reset_interval_ms milliseconds.
 *
 * 1 second by default.
 */
static unsigned long quota_reset_interval_ms __read_mostly = 1000;
module_param(quota_reset_interval_ms, ulong, 0600);

/*
 * The watermarks check time interval in microseconds.
 *
 * Minimal time to wait before checking the watermarks, when DAMON_LRU_SORT is
 * enabled but inactive due to its watermarks rule.  5 seconds by default.
 */
static unsigned long wmarks_interval __read_mostly = 5000000;
module_param(wmarks_interval, ulong, 0600);

/*
 * Free memory rate (per thousand) for the high watermark.
 *
 * If free memory of the system in bytes per thousand bytes is higher than
 * this, DAMON_LRU_SORT becomes inactive, so it does nothing but periodically
 * checks the watermarks.  200 (20%) by default.
 */
static unsigned long wmarks_high __read_mostly = 200;
module_param(wmarks_high, ulong, 0600);

/*
 * Free memory rate (per thousand) for the middle watermark.
 *
 * If free memory of the system in bytes per thousand bytes is between this and
 * the low watermark, DAMON_LRU_SORT becomes active, so starts the monitoring
 * and the LRU-lists sorting.  150 (15%) by default.
 */
static unsigned long wmarks_mid __read_mostly = 150;
module_param(wmarks_mid, ulong, 0600);

/*
 * Free memory rate (per thousand) for the low watermark.
 *
 * If free memory of the system in bytes per thousand bytes is lower than this,
 * DAMON_LRU_SORT becomes inactive, so it does nothing but periodically checks
 * the watermarks.  50 (5%) by default.
 */
static unsigned long wmarks_low __read_mostly = 50;
module_param(wmarks_low, ulong, 0600);

/*
 * Sampling interval for the monitoring in microseconds.
 *
 * The sampling interval of DAMON for the hot/cold memory monitoring.  Please
 * refer to the DAMON documentation for more detail.  5 ms by default.
 */
static unsigned long sample_interval __read_mostly = 5000;
module_param(sample_interval, ulong, 0600);

/*
 * Aggregation interval for the monitoring in microseconds.
 *
 * The aggregation interval of DAMON for the hot/cold memory monitoring.
 * Please refer to the DAMON documentation for more detail.  100 ms by default.
 */
static unsigned long aggr_interval __read_mostly = 100000;
module_param(aggr_interval, ulong, 0600);

/*
 * Minimum number of monitoring regions.
 *
 * The minimal number of monitoring regions of DAMON for the hot/cold memory
 * monitoring.  This can be used to set lower-bound of the monitoring quality.
 * But, setting this too high could result in increased monitoring overhead.
 * Please refer to the DAMON documentation for more detail.  10 by default.
 */
static unsigned long min_nr_regions __read_mostly = 10;
module_param(min_nr_regions, ulong, 0600);

/*
 * Maximum number of monitoring regions.
 *
 * The maximum number of monitoring regions of DAMON for the hot/cold memory
 * monitoring.  This can be used to set upper-bound of the monitoring overhead.
 * However, setting this too low could result in bad monitoring quality.
 * Please refer to the DAMON documentation for more detail.  1000 by default.
 */
static unsigned long max_nr_regions __read_mostly = 1000;
module_param(max_nr_regions, ulong, 0600);

/*
 * Start of the target memory region in physical address.
 *
 * The start physical address of memory region that DAMON_LRU_SORT will do work
 * against.  By default, biggest System RAM is used as the region.
 */
static unsigned long monitor_region_start __read_mostly;
module_param(monitor_region_start, ulong, 0600);

/*
 * End of the target memory region in physical address.
 *
 * The end physical address of memory region that DAMON_LRU_SORT will do work
 * against.  By default, biggest System RAM is used as the region.
 */
static unsigned long monitor_region_end __read_mostly;
module_param(monitor_region_end, ulong, 0600);

/*
 * PID of the DAMON thread
 *
 * If DAMON_LRU_SORT is enabled, this becomes the PID of the worker thread.
 * Else, -1.
 */
static int kdamond_pid __read_mostly = -1;
module_param(kdamond_pid, int, 0400);

/*
 * Number of hot memory regions that tried to be LRU-sorted.
 */
static unsigned long nr_lru_sort_tried_hot_regions __read_mostly;
module_param(nr_lru_sort_tried_hot_regions, ulong, 0400);

/*
 * Total bytes of hot memory regions that tried to be LRU-sorted.
 */
static unsigned long bytes_lru_sort_tried_hot_regions __read_mostly;
module_param(bytes_lru_sort_tried_hot_regions, ulong, 0400);

/*
 * Number of hot memory regions that successfully be LRU-sorted.
 */
static unsigned long nr_lru_sorted_hot_regions __read_mostly;
module_param(nr_lru_sorted_hot_regions, ulong, 0400);

/*
 * Total bytes of hot memory regions that successfully be LRU-sorted.
 */
static unsigned long bytes_lru_sorted_hot_regions __read_mostly;
module_param(bytes_lru_sorted_hot_regions, ulong, 0400);

/*
 * Number of times that the time quota limit for hot regions have exceeded
 */
static unsigned long nr_hot_quota_exceeds __read_mostly;
module_param(nr_hot_quota_exceeds, ulong, 0400);

/*
 * Number of cold memory regions that tried to be LRU-sorted.
 */
static unsigned long nr_lru_sort_tried_cold_regions __read_mostly;
module_param(nr_lru_sort_tried_cold_regions, ulong, 0400);

/*
 * Total bytes of cold memory regions that tried to be LRU-sorted.
 */
static unsigned long bytes_lru_sort_tried_cold_regions __read_mostly;
module_param(bytes_lru_sort_tried_cold_regions, ulong, 0400);

/*
 * Number of cold memory regions that successfully be LRU-sorted.
 */
static unsigned long nr_lru_sorted_cold_regions __read_mostly;
module_param(nr_lru_sorted_cold_regions, ulong, 0400);

/*
 * Total bytes of cold memory regions that successfully be LRU-sorted.
 */
static unsigned long bytes_lru_sorted_cold_regions __read_mostly;
module_param(bytes_lru_sorted_cold_regions, ulong, 0400);

/*
 * Number of times that the time quota limit for cold regions have exceeded
 */
static unsigned long nr_cold_quota_exceeds __read_mostly;
module_param(nr_cold_quota_exceeds, ulong, 0400);

static struct damon_ctx *ctx;
static struct damon_target *target;

struct damon_lru_sort_ram_walk_arg {
	unsigned long start;
	unsigned long end;
};

static int walk_system_ram(struct resource *res, void *arg)
{
	struct damon_lru_sort_ram_walk_arg *a = arg;

	if (a->end - a->start < resource_size(res)) {
		a->start = res->start;
		a->end = res->end;
	}
	return 0;
}

/*
 * Find biggest 'System RAM' resource and store its start and end address in
 * @start and @end, respectively.  If no System RAM is found, returns false.
 */
static bool get_monitoring_region(unsigned long *start, unsigned long *end)
{
	struct damon_lru_sort_ram_walk_arg arg = {};

	walk_system_ram_res(0, ULONG_MAX, &arg, walk_system_ram);
	if (arg.end <= arg.start)
		return false;

	*start = arg.start;
	*end = arg.end;
	return true;
}

/* Create a DAMON-based operation scheme for hot memory regions */
static struct damos *damon_lru_sort_new_hot_scheme(unsigned int hot_thres)
{
	struct damos_watermarks wmarks = {
		.metric = DAMOS_WMARK_FREE_MEM_RATE,
		.interval = wmarks_interval,
		.high = wmarks_high,
		.mid = wmarks_mid,
		.low = wmarks_low,
	};
	struct damos_quota quota = {
		/*
		 * Do not try LRU-lists sorting of hot pages for more than half
		 * of quota_ms milliseconds within quota_reset_interval_ms.
		 */
		.ms = quota_ms / 2,
		.sz = 0,
		.reset_interval = quota_reset_interval_ms,
		/* Within the quota, mark hotter regions accessed first. */
		.weight_sz = 0,
		.weight_nr_accesses = 1,
		.weight_age = 0,
	};
	struct damos *scheme = damon_new_scheme(
			/* Find regions having PAGE_SIZE or larger size */
			PAGE_SIZE, ULONG_MAX,
			/* and accessed for more than the threshold */
			hot_thres, UINT_MAX,
			/* no matter its age */
			0, UINT_MAX,
			/* prioritize those on LRU lists, as soon as found */
			DAMOS_LRU_PRIO,
			/* under the quota. */
			&quota,
			/* (De)activate this according to the watermarks. */
			&wmarks);

	return scheme;
}

/* Create a DAMON-based operation scheme for cold memory regions */
static struct damos *damon_lru_sort_new_cold_scheme(unsigned int cold_thres)
{
	struct damos_watermarks wmarks = {
		.metric = DAMOS_WMARK_FREE_MEM_RATE,
		.interval = wmarks_interval,
		.high = wmarks_high,
		.mid = wmarks_mid,
		.low = wmarks_low,
	};
	struct damos_quota quota = {
		/*
		 * Do not try LRU-lists sorting of cold pages for more than
		 * half of quota_ms milliseconds within
		 * quota_reset_interval_ms.
		 */
		.ms = quota_ms / 2,
		.sz = 0,
		.reset_interval = quota_reset_interval_ms,
		/* Within the quota, mark colder regions not accessed first. */
		.weight_sz = 0,
		.weight_nr_accesses = 0,
		.weight_age = 1,
	};
	struct damos *scheme = damon_new_scheme(
			/* Find regions having PAGE_SIZE or larger size */
			PAGE_SIZE, ULONG_MAX,
			/* and not accessed at all */
			0, 0,
			/* for cold_thres or more micro-seconds, and */
			cold_thres, UINT_MAX,
			/* mark those as not accessed, as soon as found */
			DAMOS_LRU_DEPRIO,
			/* under the quota. */
			&quota,
			/* (De)activate this according to the watermarks. */
			&wmarks);

	return scheme;
}

static int damon_lru_sort_apply_parameters(void)
{
	struct damos *scheme, *next_scheme;
	struct damon_addr_range addr_range;
	unsigned int hot_thres, cold_thres;
	int err = 0;

	err = damon_set_attrs(ctx, sample_interval, aggr_interval, 0,
			min_nr_regions, max_nr_regions);
	if (err)
		return err;

	/* free previously set schemes */
	damon_for_each_scheme_safe(scheme, next_scheme, ctx)
		damon_destroy_scheme(scheme);

	/* aggr_interval / sample_interval is the maximum nr_accesses */
	hot_thres = aggr_interval / sample_interval * hot_thres_access_freq /
		1000;
	scheme = damon_lru_sort_new_hot_scheme(hot_thres);
	if (!scheme)
		return -ENOMEM;
	damon_add_scheme(ctx, scheme);

	cold_thres = cold_min_age / aggr_interval;
	scheme = damon_lru_sort_new_cold_scheme(cold_thres);
	if (!scheme)
		return -ENOMEM;
	damon_add_scheme(ctx, scheme);

	if (monitor_region_start > monitor_region_end)
		return -EINVAL;
	if (!monitor_region_start && !monitor_region_end &&
			!get_monitoring_region(&monitor_region_start,
				&monitor_region_end))
		return -EINVAL;
	addr_range.start = monitor_region_start;
	addr_range.end = monitor_region_end;
	return damon_set_regions(target, &addr_range, 1);
}

static int damon_lru_sort_turn(bool on)
{
	int err;

	if (!on) {
		err = damon_stop(&ctx, 1);
		if (!err)
			kdamond_pid = -1;
		return err;
	}

	err = damon_lru_sort_apply_parameters();
	if (err)
		return err;

	err = damon_start(&ctx, 1, true);
	if (err)
		return err;
	kdamond_pid = ctx->kdamond->pid;
	return 0;
}

static struct delayed_work damon_lru_sort_timer;
static void damon_lru_sort_timer_fn(struct work_struct *work)
{
	static bool last_enabled;
	bool now_enabled;

	now_enabled = enabled;
	if (last_enabled != now_enabled) {
		if (!damon_lru_sort_turn(now_enabled))
			last_enabled = now_enabled;
		else
			enabled = last_enabled;
	}
}
static DECLARE_DELAYED_WORK(damon_lru_sort_timer, damon_lru_sort_timer_fn);

static bool damon_lru_sort_initialized;

static int damon_lru_sort_enabled_store(const char *val,
		const struct kernel_param *kp)
{
	int rc = param_set_bool(val, kp);

	if (rc < 0)
		return rc;

	if (!damon_lru_sort_initialized)
		return rc;

	schedule_delayed_work(&damon_lru_sort_timer, 0);

	return 0;
}

static const struct kernel_param_ops enabled_param_ops = {
	.set = damon_lru_sort_enabled_store,
	.get = param_get_bool,
};

module_param_cb(enabled, &enabled_param_ops, &enabled, 0600);
MODULE_PARM_DESC(enabled,
	"Enable or disable DAMON_LRU_SORT (default: disabled)");

static int damon_lru_sort_handle_commit_inputs(void)
{
	int err;

	if (!commit_inputs)
		return 0;

	err = damon_lru_sort_apply_parameters();
	commit_inputs = false;
	return err;
}

static int damon_lru_sort_after_aggregation(struct damon_ctx *c)
{
	struct damos *s;

	/* update the stats parameter */
	damon_for_each_scheme(s, c) {
		if (s->action == DAMOS_LRU_PRIO) {
			nr_lru_sort_tried_hot_regions = s->stat.nr_tried;
			bytes_lru_sort_tried_hot_regions = s->stat.sz_tried;
			nr_lru_sorted_hot_regions = s->stat.nr_applied;
			bytes_lru_sorted_hot_regions = s->stat.sz_applied;
			nr_hot_quota_exceeds = s->stat.qt_exceeds;
		} else if (s->action == DAMOS_LRU_DEPRIO) {
			nr_lru_sort_tried_cold_regions = s->stat.nr_tried;
			bytes_lru_sort_tried_cold_regions = s->stat.sz_tried;
			nr_lru_sorted_cold_regions = s->stat.nr_applied;
			bytes_lru_sorted_cold_regions = s->stat.sz_applied;
			nr_cold_quota_exceeds = s->stat.qt_exceeds;
		}
	}

	return damon_lru_sort_handle_commit_inputs();
}

static int damon_lru_sort_after_wmarks_check(struct damon_ctx *c)
{
	return damon_lru_sort_handle_commit_inputs();
}

static int __init damon_lru_sort_init(void)
{
	ctx = damon_new_ctx();
	if (!ctx)
		return -ENOMEM;

	if (damon_select_ops(ctx, DAMON_OPS_PADDR)) {
		damon_destroy_ctx(ctx);
		return -EINVAL;
	}

	ctx->callback.after_wmarks_check = damon_lru_sort_after_wmarks_check;
	ctx->callback.after_aggregation = damon_lru_sort_after_aggregation;

	target = damon_new_target();
	if (!target) {
		damon_destroy_ctx(ctx);
		return -ENOMEM;
	}
	damon_add_target(ctx, target);

	schedule_delayed_work(&damon_lru_sort_timer, 0);

	damon_lru_sort_initialized = true;
	return 0;
}

module_init(damon_lru_sort_init);
Commit	Line	Data
40e983cc SP	1	// SPDX-License-Identifier: GPL-2.0
	2	/*
	3	* DAMON-based LRU-lists Sorting
	4	*
	5	* Author: SeongJae Park <sj@kernel.org>
	6	*/
	7
	8	#define pr_fmt(fmt) "damon-lru-sort: " fmt
	9
	10	#include <linux/damon.h>
	11	#include <linux/ioport.h>
	12	#include <linux/module.h>
	13	#include <linux/sched.h>
	14	#include <linux/workqueue.h>
	15
	16	#ifdef MODULE_PARAM_PREFIX
	17	#undef MODULE_PARAM_PREFIX
	18	#endif
	19	#define MODULE_PARAM_PREFIX "damon_lru_sort."
	20
	21	/*
	22	* Enable or disable DAMON_LRU_SORT.
	23	*
	24	* You can enable DAMON_LRU_SORT by setting the value of this parameter as
	25	* ``Y``. Setting it as ``N`` disables DAMON_LRU_SORT. Note that
	26	* DAMON_LRU_SORT could do no real monitoring and LRU-lists sorting due to the
	27	* watermarks-based activation condition. Refer to below descriptions for the
	28	* watermarks parameter for this.
	29	*/
	30	static bool enabled __read_mostly;
	31
	32	/*
	33	* Make DAMON_LRU_SORT reads the input parameters again, except ``enabled``.
	34	*
	35	* Input parameters that updated while DAMON_LRU_SORT is running are not
	36	* applied by default. Once this parameter is set as ``Y``, DAMON_LRU_SORT
	37	* reads values of parametrs except ``enabled`` again. Once the re-reading is
	38	* done, this parameter is set as ``N``. If invalid parameters are found while
	39	* the re-reading, DAMON_LRU_SORT will be disabled.
	40	*/
	41	static bool commit_inputs __read_mostly;
	42	module_param(commit_inputs, bool, 0600);
	43
	44	/*
	45	* Access frequency threshold for hot memory regions identification in permil.
	46	*
	47	* If a memory region is accessed in frequency of this or higher,
	48	* DAMON_LRU_SORT identifies the region as hot, and mark it as accessed on the
	49	* LRU list, so that it could not be reclaimed under memory pressure. 50% by
	50	* default.
	51	*/
	52	static unsigned long hot_thres_access_freq = 500;
	53	module_param(hot_thres_access_freq, ulong, 0600);
	54
	55	/*
	56	* Time threshold for cold memory regions identification in microseconds.
	57	*
	58	* If a memory region is not accessed for this or longer time, DAMON_LRU_SORT
	59	* identifies the region as cold, and mark it as unaccessed on the LRU list, so
	60	* that it could be reclaimed first under memory pressure. 120 seconds by
	61	* default.
	62	*/
	63	static unsigned long cold_min_age __read_mostly = 120000000;
	64	module_param(cold_min_age, ulong, 0600);
65
66	/*
67	* Limit of time for trying the LRU lists sorting in milliseconds.
68	*
69	* DAMON_LRU_SORT tries to use only up to this time within a time window
70	* (quota_reset_interval_ms) for trying LRU lists sorting. This can be used
71	* for limiting CPU consumption of DAMON_LRU_SORT. If the value is zero, the
72	* limit is disabled.
73	*
74	* 10 ms by default.
75	*/
76	static unsigned long quota_ms __read_mostly = 10;
77	module_param(quota_ms, ulong, 0600);
78
79	/*
80	* The time quota charge reset interval in milliseconds.
81	*
82	* The charge reset interval for the quota of time (quota_ms). That is,
83	* DAMON_LRU_SORT does not try LRU-lists sorting for more than quota_ms
84	* milliseconds or quota_sz bytes within quota_reset_interval_ms milliseconds.
85	*
86	* 1 second by default.
87	*/
88	static unsigned long quota_reset_interval_ms __read_mostly = 1000;
89	module_param(quota_reset_interval_ms, ulong, 0600);
90
91	/*
92	* The watermarks check time interval in microseconds.
93	*
94	* Minimal time to wait before checking the watermarks, when DAMON_LRU_SORT is
95	* enabled but inactive due to its watermarks rule. 5 seconds by default.
96	*/
97	static unsigned long wmarks_interval __read_mostly = 5000000;
98	module_param(wmarks_interval, ulong, 0600);
99
100	/*
101	* Free memory rate (per thousand) for the high watermark.
102	*
103	* If free memory of the system in bytes per thousand bytes is higher than
104	* this, DAMON_LRU_SORT becomes inactive, so it does nothing but periodically
105	* checks the watermarks. 200 (20%) by default.
106	*/
107	static unsigned long wmarks_high __read_mostly = 200;
108	module_param(wmarks_high, ulong, 0600);
109
110	/*
111	* Free memory rate (per thousand) for the middle watermark.
112	*
113	* If free memory of the system in bytes per thousand bytes is between this and
114	* the low watermark, DAMON_LRU_SORT becomes active, so starts the monitoring
115	* and the LRU-lists sorting. 150 (15%) by default.
116	*/
117	static unsigned long wmarks_mid __read_mostly = 150;
118	module_param(wmarks_mid, ulong, 0600);
119
120	/*
121	* Free memory rate (per thousand) for the low watermark.
122	*
123	* If free memory of the system in bytes per thousand bytes is lower than this,
124	* DAMON_LRU_SORT becomes inactive, so it does nothing but periodically checks
125	* the watermarks. 50 (5%) by default.
126	*/
127	static unsigned long wmarks_low __read_mostly = 50;
128	module_param(wmarks_low, ulong, 0600);
129
130	/*
131	* Sampling interval for the monitoring in microseconds.
132	*
133	* The sampling interval of DAMON for the hot/cold memory monitoring. Please
134	* refer to the DAMON documentation for more detail. 5 ms by default.
135	*/
136	static unsigned long sample_interval __read_mostly = 5000;
137	module_param(sample_interval, ulong, 0600);
138
139	/*
140	* Aggregation interval for the monitoring in microseconds.
141	*
142	* The aggregation interval of DAMON for the hot/cold memory monitoring.
143	* Please refer to the DAMON documentation for more detail. 100 ms by default.
144	*/
145	static unsigned long aggr_interval __read_mostly = 100000;
146	module_param(aggr_interval, ulong, 0600);
147
148	/*
149	* Minimum number of monitoring regions.
150	*
151	* The minimal number of monitoring regions of DAMON for the hot/cold memory
152	* monitoring. This can be used to set lower-bound of the monitoring quality.
153	* But, setting this too high could result in increased monitoring overhead.
154	* Please refer to the DAMON documentation for more detail. 10 by default.
155	*/
156	static unsigned long min_nr_regions __read_mostly = 10;
157	module_param(min_nr_regions, ulong, 0600);
158
159	/*
160	* Maximum number of monitoring regions.
161	*
162	* The maximum number of monitoring regions of DAMON for the hot/cold memory
163	* monitoring. This can be used to set upper-bound of the monitoring overhead.
164	* However, setting this too low could result in bad monitoring quality.
165	* Please refer to the DAMON documentation for more detail. 1000 by default.
166	*/
167	static unsigned long max_nr_regions __read_mostly = 1000;
168	module_param(max_nr_regions, ulong, 0600);
169
170	/*
171	* Start of the target memory region in physical address.
172	*
173	* The start physical address of memory region that DAMON_LRU_SORT will do work
174	* against. By default, biggest System RAM is used as the region.
175	*/
176	static unsigned long monitor_region_start __read_mostly;
177	module_param(monitor_region_start, ulong, 0600);
178
179	/*
180	* End of the target memory region in physical address.
181	*
182	* The end physical address of memory region that DAMON_LRU_SORT will do work
183	* against. By default, biggest System RAM is used as the region.
184	*/
185	static unsigned long monitor_region_end __read_mostly;
186	module_param(monitor_region_end, ulong, 0600);
187
188	/*
189	* PID of the DAMON thread
190	*
191	* If DAMON_LRU_SORT is enabled, this becomes the PID of the worker thread.
192	* Else, -1.
193	*/
194	static int kdamond_pid __read_mostly = -1;
195	module_param(kdamond_pid, int, 0400);
196
197	/*
198	* Number of hot memory regions that tried to be LRU-sorted.
199	*/
200	static unsigned long nr_lru_sort_tried_hot_regions __read_mostly;
201	module_param(nr_lru_sort_tried_hot_regions, ulong, 0400);
202
203	/*
204	* Total bytes of hot memory regions that tried to be LRU-sorted.
205	*/
206	static unsigned long bytes_lru_sort_tried_hot_regions __read_mostly;
207	module_param(bytes_lru_sort_tried_hot_regions, ulong, 0400);
208
209	/*
210	* Number of hot memory regions that successfully be LRU-sorted.
211	*/
212	static unsigned long nr_lru_sorted_hot_regions __read_mostly;
213	module_param(nr_lru_sorted_hot_regions, ulong, 0400);
214
215	/*
216	* Total bytes of hot memory regions that successfully be LRU-sorted.
217	*/
218	static unsigned long bytes_lru_sorted_hot_regions __read_mostly;
219	module_param(bytes_lru_sorted_hot_regions, ulong, 0400);
220
221	/*
222	* Number of times that the time quota limit for hot regions have exceeded
223	*/
224	static unsigned long nr_hot_quota_exceeds __read_mostly;
225	module_param(nr_hot_quota_exceeds, ulong, 0400);
226
227	/*
228	* Number of cold memory regions that tried to be LRU-sorted.
229	*/
230	static unsigned long nr_lru_sort_tried_cold_regions __read_mostly;
231	module_param(nr_lru_sort_tried_cold_regions, ulong, 0400);
232
233	/*
234	* Total bytes of cold memory regions that tried to be LRU-sorted.
235	*/
236	static unsigned long bytes_lru_sort_tried_cold_regions __read_mostly;
237	module_param(bytes_lru_sort_tried_cold_regions, ulong, 0400);
238
239	/*
240	* Number of cold memory regions that successfully be LRU-sorted.
241	*/
242	static unsigned long nr_lru_sorted_cold_regions __read_mostly;
243	module_param(nr_lru_sorted_cold_regions, ulong, 0400);
244
245	/*
246	* Total bytes of cold memory regions that successfully be LRU-sorted.
247	*/
248	static unsigned long bytes_lru_sorted_cold_regions __read_mostly;
249	module_param(bytes_lru_sorted_cold_regions, ulong, 0400);
250
251	/*
252	* Number of times that the time quota limit for cold regions have exceeded
253	*/
254	static unsigned long nr_cold_quota_exceeds __read_mostly;
255	module_param(nr_cold_quota_exceeds, ulong, 0400);
256
257	static struct damon_ctx *ctx;
258	static struct damon_target *target;
259
260	struct damon_lru_sort_ram_walk_arg {
261	unsigned long start;
262	unsigned long end;
263	};
264
265	static int walk_system_ram(struct resource res, void arg)
266	{
267	struct damon_lru_sort_ram_walk_arg *a = arg;
268
269	if (a->end - a->start < resource_size(res)) {
270	a->start = res->start;
271	a->end = res->end;
272	}
273	return 0;
274	}
275
276	/*
277	* Find biggest 'System RAM' resource and store its start and end address in
278	* @start and @end, respectively. If no System RAM is found, returns false.
279	*/
280	static bool get_monitoring_region(unsigned long start, unsigned long end)
281	{
282	struct damon_lru_sort_ram_walk_arg arg = {};
283
284	walk_system_ram_res(0, ULONG_MAX, &arg, walk_system_ram);
285	if (arg.end <= arg.start)
286	return false;
287
288	*start = arg.start;
289	*end = arg.end;
290	return true;
291	}
292
293	/* Create a DAMON-based operation scheme for hot memory regions */
294	static struct damos *damon_lru_sort_new_hot_scheme(unsigned int hot_thres)
295	{
296	struct damos_watermarks wmarks = {
297	.metric = DAMOS_WMARK_FREE_MEM_RATE,
298	.interval = wmarks_interval,
299	.high = wmarks_high,
300	.mid = wmarks_mid,
301	.low = wmarks_low,
302	};
303	struct damos_quota quota = {
304	/*
305	* Do not try LRU-lists sorting of hot pages for more than half
306	* of quota_ms milliseconds within quota_reset_interval_ms.
307	*/
308	.ms = quota_ms / 2,
309	.sz = 0,
310	.reset_interval = quota_reset_interval_ms,
311	/* Within the quota, mark hotter regions accessed first. */
312	.weight_sz = 0,
313	.weight_nr_accesses = 1,
314	.weight_age = 0,
315	};
316	struct damos *scheme = damon_new_scheme(
317	/* Find regions having PAGE_SIZE or larger size */
318	PAGE_SIZE, ULONG_MAX,
319	/* and accessed for more than the threshold */
320	hot_thres, UINT_MAX,
321	/* no matter its age */
322	0, UINT_MAX,
323	/* prioritize those on LRU lists, as soon as found */
324	DAMOS_LRU_PRIO,
325	/* under the quota. */
326	&quota,
327	/* (De)activate this according to the watermarks. */
328	&wmarks);
329
330	return scheme;
331	}
332
333	/* Create a DAMON-based operation scheme for cold memory regions */
334	static struct damos *damon_lru_sort_new_cold_scheme(unsigned int cold_thres)
335	{
336	struct damos_watermarks wmarks = {
337	.metric = DAMOS_WMARK_FREE_MEM_RATE,
338	.interval = wmarks_interval,
339	.high = wmarks_high,
340	.mid = wmarks_mid,
341	.low = wmarks_low,
342	};
343	struct damos_quota quota = {
344	/*
345	* Do not try LRU-lists sorting of cold pages for more than
346	* half of quota_ms milliseconds within
347	* quota_reset_interval_ms.
348	*/
349	.ms = quota_ms / 2,
350	.sz = 0,
351	.reset_interval = quota_reset_interval_ms,
352	/* Within the quota, mark colder regions not accessed first. */
353	.weight_sz = 0,
354	.weight_nr_accesses = 0,
355	.weight_age = 1,
356	};
357	struct damos *scheme = damon_new_scheme(
358	/* Find regions having PAGE_SIZE or larger size */
359	PAGE_SIZE, ULONG_MAX,
360	/* and not accessed at all */
361	0, 0,
362	/* for cold_thres or more micro-seconds, and */
363	cold_thres, UINT_MAX,
364	/* mark those as not accessed, as soon as found */
365	DAMOS_LRU_DEPRIO,
366	/* under the quota. */
367	&quota,
368	/* (De)activate this according to the watermarks. */
369	&wmarks);
370
371	return scheme;
372	}
373
374	static int damon_lru_sort_apply_parameters(void)
375	{
376	struct damos scheme, next_scheme;
377	struct damon_addr_range addr_range;
378	unsigned int hot_thres, cold_thres;
379	int err = 0;
380
381	err = damon_set_attrs(ctx, sample_interval, aggr_interval, 0,
382	min_nr_regions, max_nr_regions);
383	if (err)
384	return err;
385
386	/* free previously set schemes */
387	damon_for_each_scheme_safe(scheme, next_scheme, ctx)
388	damon_destroy_scheme(scheme);
389
390	/* aggr_interval / sample_interval is the maximum nr_accesses */
391	hot_thres = aggr_interval / sample_interval * hot_thres_access_freq /
392	1000;
393	scheme = damon_lru_sort_new_hot_scheme(hot_thres);
394	if (!scheme)
395	return -ENOMEM;
396	damon_add_scheme(ctx, scheme);
397
398	cold_thres = cold_min_age / aggr_interval;
399	scheme = damon_lru_sort_new_cold_scheme(cold_thres);
400	if (!scheme)
401	return -ENOMEM;
402	damon_add_scheme(ctx, scheme);
403
404	if (monitor_region_start > monitor_region_end)
405	return -EINVAL;
406	if (!monitor_region_start && !monitor_region_end &&
407	!get_monitoring_region(&monitor_region_start,
408	&monitor_region_end))
409	return -EINVAL;
410	addr_range.start = monitor_region_start;
411	addr_range.end = monitor_region_end;
412	return damon_set_regions(target, &addr_range, 1);
413	}
414
415	static int damon_lru_sort_turn(bool on)
416	{
417	int err;
418
419	if (!on) {
420	err = damon_stop(&ctx, 1);
421	if (!err)
422	kdamond_pid = -1;
423	return err;
424	}
425
426	err = damon_lru_sort_apply_parameters();
427	if (err)
428	return err;
429
430	err = damon_start(&ctx, 1, true);
431	if (err)
432	return err;
433	kdamond_pid = ctx->kdamond->pid;
434	return 0;
435	}
436
437	static struct delayed_work damon_lru_sort_timer;
438	static void damon_lru_sort_timer_fn(struct work_struct *work)
439	{
440	static bool last_enabled;
441	bool now_enabled;
442
443	now_enabled = enabled;
444	if (last_enabled != now_enabled) {
445	if (!damon_lru_sort_turn(now_enabled))
446	last_enabled = now_enabled;
447	else
448	enabled = last_enabled;
449	}
450	}
451	static DECLARE_DELAYED_WORK(damon_lru_sort_timer, damon_lru_sort_timer_fn);
452
453	static bool damon_lru_sort_initialized;
454
455	static int damon_lru_sort_enabled_store(const char *val,
456	const struct kernel_param *kp)
457	{
458	int rc = param_set_bool(val, kp);
459
460	if (rc < 0)
461	return rc;
462
463	if (!damon_lru_sort_initialized)
464	return rc;
465
466	schedule_delayed_work(&damon_lru_sort_timer, 0);
467
468	return 0;
469	}
470
471	static const struct kernel_param_ops enabled_param_ops = {
472	.set = damon_lru_sort_enabled_store,
473	.get = param_get_bool,
474	};
475
476	module_param_cb(enabled, &enabled_param_ops, &enabled, 0600);
477	MODULE_PARM_DESC(enabled,
478	"Enable or disable DAMON_LRU_SORT (default: disabled)");
479
480	static int damon_lru_sort_handle_commit_inputs(void)
481	{
482	int err;
483
484	if (!commit_inputs)
485	return 0;
486
487	err = damon_lru_sort_apply_parameters();
488	commit_inputs = false;
489	return err;
490	}
491
492	static int damon_lru_sort_after_aggregation(struct damon_ctx *c)
493	{
494	struct damos *s;
495
496	/* update the stats parameter */
497	damon_for_each_scheme(s, c) {
498	if (s->action == DAMOS_LRU_PRIO) {
499	nr_lru_sort_tried_hot_regions = s->stat.nr_tried;
500	bytes_lru_sort_tried_hot_regions = s->stat.sz_tried;
501	nr_lru_sorted_hot_regions = s->stat.nr_applied;
502	bytes_lru_sorted_hot_regions = s->stat.sz_applied;
503	nr_hot_quota_exceeds = s->stat.qt_exceeds;
504	} else if (s->action == DAMOS_LRU_DEPRIO) {
505	nr_lru_sort_tried_cold_regions = s->stat.nr_tried;
506	bytes_lru_sort_tried_cold_regions = s->stat.sz_tried;
507	nr_lru_sorted_cold_regions = s->stat.nr_applied;
508	bytes_lru_sorted_cold_regions = s->stat.sz_applied;
509	nr_cold_quota_exceeds = s->stat.qt_exceeds;
510	}
511	}
512
513	return damon_lru_sort_handle_commit_inputs();
514	}
515
516	static int damon_lru_sort_after_wmarks_check(struct damon_ctx *c)
517	{
518	return damon_lru_sort_handle_commit_inputs();
519	}
520
521	static int __init damon_lru_sort_init(void)
522	{
523	ctx = damon_new_ctx();
524	if (!ctx)
525	return -ENOMEM;
526
ec1658f0 SP	527	if (damon_select_ops(ctx, DAMON_OPS_PADDR)) {
ec1658f0 SP	528	damon_destroy_ctx(ctx);
40e983cc	529	return -EINVAL;
ec1658f0	530	}
40e983cc SP	531
	532	ctx->callback.after_wmarks_check = damon_lru_sort_after_wmarks_check;
	533	ctx->callback.after_aggregation = damon_lru_sort_after_aggregation;
	534
	535	target = damon_new_target();
	536	if (!target) {
	537	damon_destroy_ctx(ctx);
	538	return -ENOMEM;
	539	}
	540	damon_add_target(ctx, target);
	541
	542	schedule_delayed_work(&damon_lru_sort_timer, 0);
	543
	544	damon_lru_sort_initialized = true;
	545	return 0;
	546	}
	547
	548	module_init(damon_lru_sort_init);