]>
Commit | Line | Data |
---|---|---|
c221c0b0 DH |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright(c) 2016-2019 Intel Corporation. All rights reserved. */ | |
3 | #include <linux/memremap.h> | |
4 | #include <linux/pagemap.h> | |
5 | #include <linux/memory.h> | |
6 | #include <linux/module.h> | |
7 | #include <linux/device.h> | |
8 | #include <linux/pfn_t.h> | |
9 | #include <linux/slab.h> | |
10 | #include <linux/dax.h> | |
11 | #include <linux/fs.h> | |
12 | #include <linux/mm.h> | |
13 | #include <linux/mman.h> | |
14 | #include "dax-private.h" | |
15 | #include "bus.h" | |
16 | ||
8a725e46 DH |
17 | /* Memory resource name used for add_memory_driver_managed(). */ |
18 | static const char *kmem_name; | |
19 | /* Set if any memory will remain added when the driver will be unloaded. */ | |
20 | static bool any_hotremove_failed; | |
21 | ||
60e93dc0 | 22 | static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r) |
59bc8d10 | 23 | { |
60e93dc0 DW |
24 | struct dev_dax_range *dax_range = &dev_dax->ranges[i]; |
25 | struct range *range = &dax_range->range; | |
59bc8d10 DW |
26 | |
27 | /* memory-block align the hotplug range */ | |
60e93dc0 DW |
28 | r->start = ALIGN(range->start, memory_block_size_bytes()); |
29 | r->end = ALIGN_DOWN(range->end + 1, memory_block_size_bytes()) - 1; | |
30 | if (r->start >= r->end) { | |
31 | r->start = range->start; | |
32 | r->end = range->end; | |
33 | return -ENOSPC; | |
34 | } | |
35 | return 0; | |
59bc8d10 DW |
36 | } |
37 | ||
a455aa72 DW |
38 | struct dax_kmem_data { |
39 | const char *res_name; | |
eedf634a | 40 | int mgid; |
a455aa72 DW |
41 | struct resource *res[]; |
42 | }; | |
43 | ||
f11cf813 | 44 | static int dev_dax_kmem_probe(struct dev_dax *dev_dax) |
c221c0b0 | 45 | { |
f11cf813 | 46 | struct device *dev = &dev_dax->dev; |
eedf634a | 47 | unsigned long total_len = 0; |
a455aa72 | 48 | struct dax_kmem_data *data; |
eedf634a | 49 | int i, rc, mapped = 0; |
c221c0b0 | 50 | int numa_node; |
c221c0b0 DH |
51 | |
52 | /* | |
53 | * Ensure good NUMA information for the persistent memory. | |
54 | * Without this check, there is a risk that slow memory | |
55 | * could be mixed in a node with faster memory, causing | |
56 | * unavoidable performance issues. | |
57 | */ | |
58 | numa_node = dev_dax->target_node; | |
59 | if (numa_node < 0) { | |
f5516ec5 DW |
60 | dev_warn(dev, "rejecting DAX region with invalid node: %d\n", |
61 | numa_node); | |
c221c0b0 DH |
62 | return -EINVAL; |
63 | } | |
64 | ||
eedf634a DH |
65 | for (i = 0; i < dev_dax->nr_range; i++) { |
66 | struct range range; | |
67 | ||
68 | rc = dax_kmem_range(dev_dax, i, &range); | |
69 | if (rc) { | |
70 | dev_info(dev, "mapping%d: %#llx-%#llx too small after alignment\n", | |
71 | i, range.start, range.end); | |
72 | continue; | |
73 | } | |
74 | total_len += range_len(&range); | |
75 | } | |
76 | ||
77 | if (!total_len) { | |
78 | dev_warn(dev, "rejecting DAX region without any memory after alignment\n"); | |
79 | return -EINVAL; | |
80 | } | |
81 | ||
7d18dd75 | 82 | data = kzalloc(struct_size(data, res, dev_dax->nr_range), GFP_KERNEL); |
a455aa72 | 83 | if (!data) |
60858c00 DH |
84 | return -ENOMEM; |
85 | ||
eedf634a | 86 | rc = -ENOMEM; |
a455aa72 DW |
87 | data->res_name = kstrdup(dev_name(dev), GFP_KERNEL); |
88 | if (!data->res_name) | |
89 | goto err_res_name; | |
90 | ||
eedf634a DH |
91 | rc = memory_group_register_static(numa_node, total_len); |
92 | if (rc < 0) | |
93 | goto err_reg_mgid; | |
94 | data->mgid = rc; | |
95 | ||
60e93dc0 DW |
96 | for (i = 0; i < dev_dax->nr_range; i++) { |
97 | struct resource *res; | |
98 | struct range range; | |
c221c0b0 | 99 | |
60e93dc0 | 100 | rc = dax_kmem_range(dev_dax, i, &range); |
eedf634a | 101 | if (rc) |
60e93dc0 | 102 | continue; |
c221c0b0 | 103 | |
60e93dc0 | 104 | /* Region is permanently reserved if hotremove fails. */ |
a455aa72 | 105 | res = request_mem_region(range.start, range_len(&range), data->res_name); |
60e93dc0 DW |
106 | if (!res) { |
107 | dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve region\n", | |
108 | i, range.start, range.end); | |
109 | /* | |
110 | * Once some memory has been onlined we can't | |
111 | * assume that it can be un-onlined safely. | |
112 | */ | |
113 | if (mapped) | |
114 | continue; | |
a455aa72 DW |
115 | rc = -EBUSY; |
116 | goto err_request_mem; | |
60e93dc0 | 117 | } |
a455aa72 | 118 | data->res[i] = res; |
60e93dc0 DW |
119 | |
120 | /* | |
121 | * Set flags appropriate for System RAM. Leave ..._BUSY clear | |
122 | * so that add_memory() can add a child resource. Do not | |
123 | * inherit flags from the parent since it may set new flags | |
124 | * unknown to us that will break add_memory() below. | |
125 | */ | |
126 | res->flags = IORESOURCE_SYSTEM_RAM; | |
127 | ||
128 | /* | |
129 | * Ensure that future kexec'd kernels will not treat | |
130 | * this as RAM automatically. | |
131 | */ | |
eedf634a DH |
132 | rc = add_memory_driver_managed(data->mgid, range.start, |
133 | range_len(&range), kmem_name, MHP_NID_IS_MGID); | |
60e93dc0 DW |
134 | |
135 | if (rc) { | |
136 | dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n", | |
137 | i, range.start, range.end); | |
a455aa72 DW |
138 | release_resource(res); |
139 | kfree(res); | |
140 | data->res[i] = NULL; | |
60e93dc0 DW |
141 | if (mapped) |
142 | continue; | |
a455aa72 | 143 | goto err_request_mem; |
60e93dc0 DW |
144 | } |
145 | mapped++; | |
31e4ca92 | 146 | } |
7e6b431a | 147 | |
a455aa72 | 148 | dev_set_drvdata(dev, data); |
c221c0b0 DH |
149 | |
150 | return 0; | |
a455aa72 DW |
151 | |
152 | err_request_mem: | |
eedf634a DH |
153 | memory_group_unregister(data->mgid); |
154 | err_reg_mgid: | |
a455aa72 DW |
155 | kfree(data->res_name); |
156 | err_res_name: | |
157 | kfree(data); | |
158 | return rc; | |
c221c0b0 DH |
159 | } |
160 | ||
9f960da7 | 161 | #ifdef CONFIG_MEMORY_HOTREMOVE |
0d519e0d | 162 | static void dev_dax_kmem_remove(struct dev_dax *dev_dax) |
9f960da7 | 163 | { |
60e93dc0 | 164 | int i, success = 0; |
f11cf813 | 165 | struct device *dev = &dev_dax->dev; |
a455aa72 | 166 | struct dax_kmem_data *data = dev_get_drvdata(dev); |
9f960da7 PT |
167 | |
168 | /* | |
169 | * We have one shot for removing memory, if some memory blocks were not | |
170 | * offline prior to calling this function remove_memory() will fail, and | |
171 | * there is no way to hotremove this memory until reboot because device | |
172 | * unbind will succeed even if we return failure. | |
173 | */ | |
60e93dc0 DW |
174 | for (i = 0; i < dev_dax->nr_range; i++) { |
175 | struct range range; | |
176 | int rc; | |
177 | ||
178 | rc = dax_kmem_range(dev_dax, i, &range); | |
179 | if (rc) | |
180 | continue; | |
181 | ||
e1c158e4 | 182 | rc = remove_memory(range.start, range_len(&range)); |
60e93dc0 | 183 | if (rc == 0) { |
a455aa72 DW |
184 | release_resource(data->res[i]); |
185 | kfree(data->res[i]); | |
186 | data->res[i] = NULL; | |
60e93dc0 DW |
187 | success++; |
188 | continue; | |
189 | } | |
8a725e46 | 190 | any_hotremove_failed = true; |
60e93dc0 DW |
191 | dev_err(dev, |
192 | "mapping%d: %#llx-%#llx cannot be hotremoved until the next reboot\n", | |
193 | i, range.start, range.end); | |
9f960da7 PT |
194 | } |
195 | ||
60e93dc0 | 196 | if (success >= dev_dax->nr_range) { |
eedf634a | 197 | memory_group_unregister(data->mgid); |
a455aa72 DW |
198 | kfree(data->res_name); |
199 | kfree(data); | |
60e93dc0 DW |
200 | dev_set_drvdata(dev, NULL); |
201 | } | |
9f960da7 PT |
202 | } |
203 | #else | |
0d519e0d | 204 | static void dev_dax_kmem_remove(struct dev_dax *dev_dax) |
c221c0b0 DH |
205 | { |
206 | /* | |
9f960da7 PT |
207 | * Without hotremove purposely leak the request_mem_region() for the |
208 | * device-dax range and return '0' to ->remove() attempts. The removal | |
209 | * of the device from the driver always succeeds, but the region is | |
210 | * permanently pinned as reserved by the unreleased | |
c221c0b0 DH |
211 | * request_mem_region(). |
212 | */ | |
8a725e46 | 213 | any_hotremove_failed = true; |
c221c0b0 | 214 | } |
9f960da7 | 215 | #endif /* CONFIG_MEMORY_HOTREMOVE */ |
c221c0b0 DH |
216 | |
217 | static struct dax_device_driver device_dax_kmem_driver = { | |
f11cf813 DW |
218 | .probe = dev_dax_kmem_probe, |
219 | .remove = dev_dax_kmem_remove, | |
c221c0b0 DH |
220 | }; |
221 | ||
222 | static int __init dax_kmem_init(void) | |
223 | { | |
8a725e46 DH |
224 | int rc; |
225 | ||
226 | /* Resource name is permanently allocated if any hotremove fails. */ | |
227 | kmem_name = kstrdup_const("System RAM (kmem)", GFP_KERNEL); | |
228 | if (!kmem_name) | |
229 | return -ENOMEM; | |
230 | ||
231 | rc = dax_driver_register(&device_dax_kmem_driver); | |
232 | if (rc) | |
233 | kfree_const(kmem_name); | |
234 | return rc; | |
c221c0b0 DH |
235 | } |
236 | ||
237 | static void __exit dax_kmem_exit(void) | |
238 | { | |
239 | dax_driver_unregister(&device_dax_kmem_driver); | |
8a725e46 DH |
240 | if (!any_hotremove_failed) |
241 | kfree_const(kmem_name); | |
c221c0b0 DH |
242 | } |
243 | ||
244 | MODULE_AUTHOR("Intel Corporation"); | |
245 | MODULE_LICENSE("GPL v2"); | |
246 | module_init(dax_kmem_init); | |
247 | module_exit(dax_kmem_exit); | |
248 | MODULE_ALIAS_DAX_DEVICE(0); |