]>
Commit | Line | Data |
---|---|---|
b5eafe92 JF |
1 | /* |
2 | * Xen leaves the responsibility for maintaining p2m mappings to the | |
3 | * guests themselves, but it must also access and update the p2m array | |
4 | * during suspend/resume when all the pages are reallocated. | |
5 | * | |
054954eb JG |
6 | * The logical flat p2m table is mapped to a linear kernel memory area. |
7 | * For accesses by Xen a three-level tree linked via mfns only is set up to | |
8 | * allow the address space to be sparse. | |
b5eafe92 | 9 | * |
054954eb JG |
10 | * Xen |
11 | * | | |
12 | * p2m_top_mfn | |
13 | * / \ | |
14 | * p2m_mid_mfn p2m_mid_mfn | |
15 | * / / | |
16 | * p2m p2m p2m ... | |
b5eafe92 JF |
17 | * |
18 | * The p2m_mid_mfn pages are mapped by p2m_top_mfn_p. | |
19 | * | |
054954eb JG |
20 | * The p2m_top_mfn level is limited to 1 page, so the maximum representable |
21 | * pseudo-physical address space is: | |
b5eafe92 JF |
22 | * P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE pages |
23 | * | |
24 | * P2M_PER_PAGE depends on the architecture, as a mfn is always | |
25 | * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to | |
a3118beb | 26 | * 512 and 1024 entries respectively. |
f4cec35b KRW |
27 | * |
28 | * In short, these structures contain the Machine Frame Number (MFN) of the PFN. | |
29 | * | |
30 | * However not all entries are filled with MFNs. Specifically for all other | |
31 | * leaf entries, or for the top root, or middle one, for which there is a void | |
32 | * entry, we assume it is "missing". So (for example) | |
33 | * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY. | |
054954eb JG |
34 | * We have a dedicated page p2m_missing with all entries being |
35 | * INVALID_P2M_ENTRY. This page may be referenced multiple times in the p2m | |
36 | * list/tree in case there are multiple areas with P2M_PER_PAGE invalid pfns. | |
f4cec35b KRW |
37 | * |
38 | * We also have the possibility of setting 1-1 mappings on certain regions, so | |
39 | * that: | |
40 | * pfn_to_mfn(0xc0000)=0xc0000 | |
41 | * | |
42 | * The benefit of this is, that we can assume for non-RAM regions (think | |
3cb83e46 | 43 | * PCI BARs, or ACPI spaces), we can create mappings easily because we |
f4cec35b KRW |
44 | * get the PFN value to match the MFN. |
45 | * | |
054954eb JG |
46 | * For this to work efficiently we have one new page p2m_identity. All entries |
47 | * in p2m_identity are set to INVALID_P2M_ENTRY type (Xen toolstack only | |
48 | * recognizes that and MFNs, no other fancy value). | |
f4cec35b KRW |
49 | * |
50 | * On lookup we spot that the entry points to p2m_identity and return the | |
51 | * identity value instead of dereferencing and returning INVALID_P2M_ENTRY. | |
52 | * If the entry points to an allocated page, we just proceed as before and | |
054954eb | 53 | * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in |
f4cec35b KRW |
54 | * appropriate functions (pfn_to_mfn). |
55 | * | |
56 | * The reason for having the IDENTITY_FRAME_BIT instead of just returning the | |
57 | * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a | |
58 | * non-identity pfn. To protect ourselves against we elect to set (and get) the | |
59 | * IDENTITY_FRAME_BIT on all identity mapped PFNs. | |
b5eafe92 JF |
60 | */ |
61 | ||
62 | #include <linux/init.h> | |
7a2463dc | 63 | #include <linux/export.h> |
448f2831 JF |
64 | #include <linux/list.h> |
65 | #include <linux/hash.h> | |
87f1d40a | 66 | #include <linux/sched.h> |
2222e71b | 67 | #include <linux/seq_file.h> |
2c185687 | 68 | #include <linux/bootmem.h> |
7108c9ce | 69 | #include <linux/slab.h> |
d6472302 | 70 | #include <linux/vmalloc.h> |
b5eafe92 JF |
71 | |
72 | #include <asm/cache.h> | |
73 | #include <asm/setup.h> | |
7c0f6ba6 | 74 | #include <linux/uaccess.h> |
b5eafe92 JF |
75 | |
76 | #include <asm/xen/page.h> | |
77 | #include <asm/xen/hypercall.h> | |
78 | #include <asm/xen/hypervisor.h> | |
ee072640 | 79 | #include <xen/balloon.h> |
0930bba6 | 80 | #include <xen/grant_table.h> |
b5eafe92 | 81 | |
0930bba6 | 82 | #include "multicalls.h" |
b5eafe92 JF |
83 | #include "xen-ops.h" |
84 | ||
cb3eb850 JG |
85 | #define P2M_MID_PER_PAGE (PAGE_SIZE / sizeof(unsigned long *)) |
86 | #define P2M_TOP_PER_PAGE (PAGE_SIZE / sizeof(unsigned long **)) | |
87 | ||
88 | #define MAX_P2M_PFN (P2M_TOP_PER_PAGE * P2M_MID_PER_PAGE * P2M_PER_PAGE) | |
89 | ||
054954eb JG |
90 | #define PMDS_PER_MID_PAGE (P2M_MID_PER_PAGE / PTRS_PER_PTE) |
91 | ||
5b8e7d80 JG |
92 | unsigned long *xen_p2m_addr __read_mostly; |
93 | EXPORT_SYMBOL_GPL(xen_p2m_addr); | |
94 | unsigned long xen_p2m_size __read_mostly; | |
95 | EXPORT_SYMBOL_GPL(xen_p2m_size); | |
b5eafe92 | 96 | unsigned long xen_max_p2m_pfn __read_mostly; |
5b8e7d80 | 97 | EXPORT_SYMBOL_GPL(xen_max_p2m_pfn); |
b5eafe92 | 98 | |
633d6f17 JG |
99 | #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT |
100 | #define P2M_LIMIT CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT | |
101 | #else | |
102 | #define P2M_LIMIT 0 | |
103 | #endif | |
104 | ||
054954eb JG |
105 | static DEFINE_SPINLOCK(p2m_update_lock); |
106 | ||
2c185687 JG |
107 | static unsigned long *p2m_mid_missing_mfn; |
108 | static unsigned long *p2m_top_mfn; | |
109 | static unsigned long **p2m_top_mfn_p; | |
054954eb JG |
110 | static unsigned long *p2m_missing; |
111 | static unsigned long *p2m_identity; | |
112 | static pte_t *p2m_missing_pte; | |
113 | static pte_t *p2m_identity_pte; | |
7108c9ce | 114 | |
98dd166e DV |
115 | /* |
116 | * Hint at last populated PFN. | |
117 | * | |
118 | * Used to set HYPERVISOR_shared_info->arch.max_pfn so the toolstack | |
119 | * can avoid scanning the whole P2M (which may be sized to account for | |
120 | * hotplugged memory). | |
121 | */ | |
122 | static unsigned long xen_p2m_last_pfn; | |
123 | ||
b5eafe92 JF |
124 | static inline unsigned p2m_top_index(unsigned long pfn) |
125 | { | |
126 | BUG_ON(pfn >= MAX_P2M_PFN); | |
127 | return pfn / (P2M_MID_PER_PAGE * P2M_PER_PAGE); | |
128 | } | |
129 | ||
130 | static inline unsigned p2m_mid_index(unsigned long pfn) | |
131 | { | |
132 | return (pfn / P2M_PER_PAGE) % P2M_MID_PER_PAGE; | |
133 | } | |
134 | ||
135 | static inline unsigned p2m_index(unsigned long pfn) | |
136 | { | |
137 | return pfn % P2M_PER_PAGE; | |
138 | } | |
139 | ||
b5eafe92 JF |
140 | static void p2m_top_mfn_init(unsigned long *top) |
141 | { | |
142 | unsigned i; | |
143 | ||
144 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | |
145 | top[i] = virt_to_mfn(p2m_mid_missing_mfn); | |
146 | } | |
147 | ||
148 | static void p2m_top_mfn_p_init(unsigned long **top) | |
149 | { | |
150 | unsigned i; | |
151 | ||
152 | for (i = 0; i < P2M_TOP_PER_PAGE; i++) | |
153 | top[i] = p2m_mid_missing_mfn; | |
154 | } | |
155 | ||
054954eb | 156 | static void p2m_mid_mfn_init(unsigned long *mid, unsigned long *leaf) |
b5eafe92 JF |
157 | { |
158 | unsigned i; | |
159 | ||
160 | for (i = 0; i < P2M_MID_PER_PAGE; i++) | |
054954eb | 161 | mid[i] = virt_to_mfn(leaf); |
b5eafe92 JF |
162 | } |
163 | ||
054954eb | 164 | static void p2m_init(unsigned long *p2m) |
b5eafe92 JF |
165 | { |
166 | unsigned i; | |
167 | ||
054954eb JG |
168 | for (i = 0; i < P2M_PER_PAGE; i++) |
169 | p2m[i] = INVALID_P2M_ENTRY; | |
b5eafe92 JF |
170 | } |
171 | ||
054954eb | 172 | static void p2m_init_identity(unsigned long *p2m, unsigned long pfn) |
b5eafe92 JF |
173 | { |
174 | unsigned i; | |
175 | ||
054954eb JG |
176 | for (i = 0; i < P2M_PER_PAGE; i++) |
177 | p2m[i] = IDENTITY_FRAME(pfn + i); | |
b5eafe92 JF |
178 | } |
179 | ||
7108c9ce JG |
180 | static void * __ref alloc_p2m_page(void) |
181 | { | |
7108c9ce JG |
182 | if (unlikely(!slab_is_available())) |
183 | return alloc_bootmem_align(PAGE_SIZE, PAGE_SIZE); | |
184 | ||
32d6bd90 | 185 | return (void *)__get_free_page(GFP_KERNEL); |
7108c9ce JG |
186 | } |
187 | ||
701a261a | 188 | static void __ref free_p2m_page(void *p) |
7108c9ce | 189 | { |
701a261a BO |
190 | if (unlikely(!slab_is_available())) { |
191 | free_bootmem((unsigned long)p, PAGE_SIZE); | |
192 | return; | |
193 | } | |
194 | ||
7108c9ce JG |
195 | free_page((unsigned long)p); |
196 | } | |
197 | ||
b5eafe92 JF |
198 | /* |
199 | * Build the parallel p2m_top_mfn and p2m_mid_mfn structures | |
200 | * | |
201 | * This is called both at boot time, and after resuming from suspend: | |
2c185687 | 202 | * - At boot time we're called rather early, and must use alloc_bootmem*() |
b5eafe92 JF |
203 | * to allocate memory. |
204 | * | |
205 | * - After resume we're called from within stop_machine, but the mfn | |
2c185687 | 206 | * tree should already be completely allocated. |
b5eafe92 | 207 | */ |
44b46c3e | 208 | void __ref xen_build_mfn_list_list(void) |
b5eafe92 | 209 | { |
054954eb JG |
210 | unsigned long pfn, mfn; |
211 | pte_t *ptep; | |
212 | unsigned int level, topidx, mididx; | |
213 | unsigned long *mid_mfn_p; | |
b5eafe92 | 214 | |
d51e8b3e JG |
215 | if (xen_feature(XENFEAT_auto_translated_physmap) || |
216 | xen_start_info->flags & SIF_VIRT_P2M_4TOOLS) | |
696fd7c5 KRW |
217 | return; |
218 | ||
b5eafe92 JF |
219 | /* Pre-initialize p2m_top_mfn to be completely missing */ |
220 | if (p2m_top_mfn == NULL) { | |
7108c9ce | 221 | p2m_mid_missing_mfn = alloc_p2m_page(); |
3cb83e46 | 222 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); |
b5eafe92 | 223 | |
7108c9ce | 224 | p2m_top_mfn_p = alloc_p2m_page(); |
b5eafe92 JF |
225 | p2m_top_mfn_p_init(p2m_top_mfn_p); |
226 | ||
7108c9ce | 227 | p2m_top_mfn = alloc_p2m_page(); |
b5eafe92 JF |
228 | p2m_top_mfn_init(p2m_top_mfn); |
229 | } else { | |
230 | /* Reinitialise, mfn's all change after migration */ | |
3cb83e46 | 231 | p2m_mid_mfn_init(p2m_mid_missing_mfn, p2m_missing); |
b5eafe92 JF |
232 | } |
233 | ||
054954eb JG |
234 | for (pfn = 0; pfn < xen_max_p2m_pfn && pfn < MAX_P2M_PFN; |
235 | pfn += P2M_PER_PAGE) { | |
236 | topidx = p2m_top_index(pfn); | |
237 | mididx = p2m_mid_index(pfn); | |
b5eafe92 | 238 | |
b5eafe92 | 239 | mid_mfn_p = p2m_top_mfn_p[topidx]; |
054954eb JG |
240 | ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), |
241 | &level); | |
242 | BUG_ON(!ptep || level != PG_LEVEL_4K); | |
243 | mfn = pte_mfn(*ptep); | |
244 | ptep = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); | |
b5eafe92 JF |
245 | |
246 | /* Don't bother allocating any mfn mid levels if | |
247 | * they're just missing, just update the stored mfn, | |
248 | * since all could have changed over a migrate. | |
249 | */ | |
054954eb | 250 | if (ptep == p2m_missing_pte || ptep == p2m_identity_pte) { |
b5eafe92 JF |
251 | BUG_ON(mididx); |
252 | BUG_ON(mid_mfn_p != p2m_mid_missing_mfn); | |
253 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_mid_missing_mfn); | |
254 | pfn += (P2M_MID_PER_PAGE - 1) * P2M_PER_PAGE; | |
255 | continue; | |
256 | } | |
257 | ||
258 | if (mid_mfn_p == p2m_mid_missing_mfn) { | |
7108c9ce | 259 | mid_mfn_p = alloc_p2m_page(); |
3cb83e46 | 260 | p2m_mid_mfn_init(mid_mfn_p, p2m_missing); |
b5eafe92 JF |
261 | |
262 | p2m_top_mfn_p[topidx] = mid_mfn_p; | |
263 | } | |
264 | ||
265 | p2m_top_mfn[topidx] = virt_to_mfn(mid_mfn_p); | |
054954eb | 266 | mid_mfn_p[mididx] = mfn; |
b5eafe92 JF |
267 | } |
268 | } | |
269 | ||
270 | void xen_setup_mfn_list_list(void) | |
271 | { | |
4dd322bc MR |
272 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
273 | return; | |
274 | ||
b5eafe92 JF |
275 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); |
276 | ||
d51e8b3e JG |
277 | if (xen_start_info->flags & SIF_VIRT_P2M_4TOOLS) |
278 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = ~0UL; | |
279 | else | |
280 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | |
281 | virt_to_mfn(p2m_top_mfn); | |
98dd166e | 282 | HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn; |
4b9c9a11 JG |
283 | HYPERVISOR_shared_info->arch.p2m_generation = 0; |
284 | HYPERVISOR_shared_info->arch.p2m_vaddr = (unsigned long)xen_p2m_addr; | |
285 | HYPERVISOR_shared_info->arch.p2m_cr3 = | |
286 | xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); | |
b5eafe92 JF |
287 | } |
288 | ||
289 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | |
290 | void __init xen_build_dynamic_phys_to_machine(void) | |
291 | { | |
b5eafe92 JF |
292 | unsigned long pfn; |
293 | ||
696fd7c5 KRW |
294 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
295 | return; | |
296 | ||
5b8e7d80 | 297 | xen_p2m_addr = (unsigned long *)xen_start_info->mfn_list; |
054954eb | 298 | xen_p2m_size = ALIGN(xen_start_info->nr_pages, P2M_PER_PAGE); |
b5eafe92 | 299 | |
054954eb JG |
300 | for (pfn = xen_start_info->nr_pages; pfn < xen_p2m_size; pfn++) |
301 | xen_p2m_addr[pfn] = INVALID_P2M_ENTRY; | |
b5eafe92 | 302 | |
054954eb JG |
303 | xen_max_p2m_pfn = xen_p2m_size; |
304 | } | |
b5eafe92 | 305 | |
054954eb JG |
306 | #define P2M_TYPE_IDENTITY 0 |
307 | #define P2M_TYPE_MISSING 1 | |
308 | #define P2M_TYPE_PFN 2 | |
309 | #define P2M_TYPE_UNKNOWN 3 | |
b5eafe92 | 310 | |
054954eb JG |
311 | static int xen_p2m_elem_type(unsigned long pfn) |
312 | { | |
313 | unsigned long mfn; | |
b5eafe92 | 314 | |
054954eb JG |
315 | if (pfn >= xen_p2m_size) |
316 | return P2M_TYPE_IDENTITY; | |
b5eafe92 | 317 | |
054954eb | 318 | mfn = xen_p2m_addr[pfn]; |
b5eafe92 | 319 | |
054954eb JG |
320 | if (mfn == INVALID_P2M_ENTRY) |
321 | return P2M_TYPE_MISSING; | |
cf04d120 | 322 | |
054954eb JG |
323 | if (mfn & IDENTITY_FRAME_BIT) |
324 | return P2M_TYPE_IDENTITY; | |
325 | ||
326 | return P2M_TYPE_PFN; | |
b5eafe92 | 327 | } |
054954eb JG |
328 | |
329 | static void __init xen_rebuild_p2m_list(unsigned long *p2m) | |
357a3cfb | 330 | { |
054954eb | 331 | unsigned int i, chunk; |
357a3cfb | 332 | unsigned long pfn; |
054954eb JG |
333 | unsigned long *mfns; |
334 | pte_t *ptep; | |
335 | pmd_t *pmdp; | |
336 | int type; | |
357a3cfb | 337 | |
054954eb JG |
338 | p2m_missing = alloc_p2m_page(); |
339 | p2m_init(p2m_missing); | |
340 | p2m_identity = alloc_p2m_page(); | |
341 | p2m_init(p2m_identity); | |
b5eafe92 | 342 | |
054954eb JG |
343 | p2m_missing_pte = alloc_p2m_page(); |
344 | paravirt_alloc_pte(&init_mm, __pa(p2m_missing_pte) >> PAGE_SHIFT); | |
345 | p2m_identity_pte = alloc_p2m_page(); | |
346 | paravirt_alloc_pte(&init_mm, __pa(p2m_identity_pte) >> PAGE_SHIFT); | |
347 | for (i = 0; i < PTRS_PER_PTE; i++) { | |
348 | set_pte(p2m_missing_pte + i, | |
2e917175 | 349 | pfn_pte(PFN_DOWN(__pa(p2m_missing)), PAGE_KERNEL_RO)); |
054954eb | 350 | set_pte(p2m_identity_pte + i, |
2e917175 | 351 | pfn_pte(PFN_DOWN(__pa(p2m_identity)), PAGE_KERNEL_RO)); |
054954eb | 352 | } |
357a3cfb | 353 | |
054954eb JG |
354 | for (pfn = 0; pfn < xen_max_p2m_pfn; pfn += chunk) { |
355 | /* | |
356 | * Try to map missing/identity PMDs or p2m-pages if possible. | |
357 | * We have to respect the structure of the mfn_list_list | |
358 | * which will be built just afterwards. | |
359 | * Chunk size to test is one p2m page if we are in the middle | |
360 | * of a mfn_list_list mid page and the complete mid page area | |
361 | * if we are at index 0 of the mid page. Please note that a | |
362 | * mid page might cover more than one PMD, e.g. on 32 bit PAE | |
363 | * kernels. | |
364 | */ | |
365 | chunk = (pfn & (P2M_PER_PAGE * P2M_MID_PER_PAGE - 1)) ? | |
366 | P2M_PER_PAGE : P2M_PER_PAGE * P2M_MID_PER_PAGE; | |
367 | ||
368 | type = xen_p2m_elem_type(pfn); | |
369 | i = 0; | |
370 | if (type != P2M_TYPE_PFN) | |
371 | for (i = 1; i < chunk; i++) | |
372 | if (xen_p2m_elem_type(pfn + i) != type) | |
373 | break; | |
374 | if (i < chunk) | |
375 | /* Reset to minimal chunk size. */ | |
376 | chunk = P2M_PER_PAGE; | |
377 | ||
378 | if (type == P2M_TYPE_PFN || i < chunk) { | |
379 | /* Use initial p2m page contents. */ | |
380 | #ifdef CONFIG_X86_64 | |
381 | mfns = alloc_p2m_page(); | |
382 | copy_page(mfns, xen_p2m_addr + pfn); | |
383 | #else | |
384 | mfns = xen_p2m_addr + pfn; | |
385 | #endif | |
386 | ptep = populate_extra_pte((unsigned long)(p2m + pfn)); | |
387 | set_pte(ptep, | |
388 | pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL)); | |
357a3cfb | 389 | continue; |
054954eb | 390 | } |
b5eafe92 | 391 | |
054954eb JG |
392 | if (chunk == P2M_PER_PAGE) { |
393 | /* Map complete missing or identity p2m-page. */ | |
394 | mfns = (type == P2M_TYPE_MISSING) ? | |
395 | p2m_missing : p2m_identity; | |
396 | ptep = populate_extra_pte((unsigned long)(p2m + pfn)); | |
397 | set_pte(ptep, | |
2e917175 | 398 | pfn_pte(PFN_DOWN(__pa(mfns)), PAGE_KERNEL_RO)); |
357a3cfb | 399 | continue; |
054954eb | 400 | } |
357a3cfb | 401 | |
054954eb JG |
402 | /* Complete missing or identity PMD(s) can be mapped. */ |
403 | ptep = (type == P2M_TYPE_MISSING) ? | |
404 | p2m_missing_pte : p2m_identity_pte; | |
405 | for (i = 0; i < PMDS_PER_MID_PAGE; i++) { | |
406 | pmdp = populate_extra_pmd( | |
82c92ed1 | 407 | (unsigned long)(p2m + pfn) + i * PMD_SIZE); |
054954eb JG |
408 | set_pmd(pmdp, __pmd(__pa(ptep) | _KERNPG_TABLE)); |
409 | } | |
410 | } | |
411 | } | |
357a3cfb | 412 | |
054954eb JG |
413 | void __init xen_vmalloc_p2m_tree(void) |
414 | { | |
415 | static struct vm_struct vm; | |
633d6f17 | 416 | unsigned long p2m_limit; |
357a3cfb | 417 | |
98dd166e DV |
418 | xen_p2m_last_pfn = xen_max_p2m_pfn; |
419 | ||
633d6f17 | 420 | p2m_limit = (phys_addr_t)P2M_LIMIT * 1024 * 1024 * 1024 / PAGE_SIZE; |
054954eb | 421 | vm.flags = VM_ALLOC; |
633d6f17 | 422 | vm.size = ALIGN(sizeof(unsigned long) * max(xen_max_p2m_pfn, p2m_limit), |
054954eb JG |
423 | PMD_SIZE * PMDS_PER_MID_PAGE); |
424 | vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE); | |
425 | pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size); | |
3fc509fc | 426 | |
054954eb | 427 | xen_max_p2m_pfn = vm.size / sizeof(unsigned long); |
357a3cfb | 428 | |
054954eb | 429 | xen_rebuild_p2m_list(vm.addr); |
357a3cfb | 430 | |
054954eb | 431 | xen_p2m_addr = vm.addr; |
5b8e7d80 | 432 | xen_p2m_size = xen_max_p2m_pfn; |
5b8e7d80 JG |
433 | |
434 | xen_inv_extra_mem(); | |
357a3cfb | 435 | } |
054954eb | 436 | |
b5eafe92 JF |
437 | unsigned long get_phys_to_machine(unsigned long pfn) |
438 | { | |
054954eb JG |
439 | pte_t *ptep; |
440 | unsigned int level; | |
b5eafe92 | 441 | |
5b8e7d80 JG |
442 | if (unlikely(pfn >= xen_p2m_size)) { |
443 | if (pfn < xen_max_p2m_pfn) | |
444 | return xen_chk_extra_mem(pfn); | |
445 | ||
25b884a8 | 446 | return IDENTITY_FRAME(pfn); |
5b8e7d80 | 447 | } |
b5eafe92 | 448 | |
054954eb JG |
449 | ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level); |
450 | BUG_ON(!ptep || level != PG_LEVEL_4K); | |
b5eafe92 | 451 | |
f4cec35b KRW |
452 | /* |
453 | * The INVALID_P2M_ENTRY is filled in both p2m_*identity | |
454 | * and in p2m_*missing, so returning the INVALID_P2M_ENTRY | |
455 | * would be wrong. | |
456 | */ | |
054954eb | 457 | if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity))) |
f4cec35b KRW |
458 | return IDENTITY_FRAME(pfn); |
459 | ||
054954eb | 460 | return xen_p2m_addr[pfn]; |
b5eafe92 JF |
461 | } |
462 | EXPORT_SYMBOL_GPL(get_phys_to_machine); | |
463 | ||
054954eb JG |
464 | /* |
465 | * Allocate new pmd(s). It is checked whether the old pmd is still in place. | |
466 | * If not, nothing is changed. This is okay as the only reason for allocating | |
467 | * a new pmd is to replace p2m_missing_pte or p2m_identity_pte by a individual | |
468 | * pmd. In case of PAE/x86-32 there are multiple pmds to allocate! | |
469 | */ | |
f241b0b8 | 470 | static pte_t *alloc_p2m_pmd(unsigned long addr, pte_t *pte_pg) |
054954eb JG |
471 | { |
472 | pte_t *ptechk; | |
054954eb JG |
473 | pte_t *pte_newpg[PMDS_PER_MID_PAGE]; |
474 | pmd_t *pmdp; | |
475 | unsigned int level; | |
476 | unsigned long flags; | |
477 | unsigned long vaddr; | |
478 | int i; | |
479 | ||
480 | /* Do all allocations first to bail out in error case. */ | |
481 | for (i = 0; i < PMDS_PER_MID_PAGE; i++) { | |
482 | pte_newpg[i] = alloc_p2m_page(); | |
483 | if (!pte_newpg[i]) { | |
484 | for (i--; i >= 0; i--) | |
485 | free_p2m_page(pte_newpg[i]); | |
486 | ||
487 | return NULL; | |
488 | } | |
489 | } | |
490 | ||
491 | vaddr = addr & ~(PMD_SIZE * PMDS_PER_MID_PAGE - 1); | |
492 | ||
493 | for (i = 0; i < PMDS_PER_MID_PAGE; i++) { | |
494 | copy_page(pte_newpg[i], pte_pg); | |
495 | paravirt_alloc_pte(&init_mm, __pa(pte_newpg[i]) >> PAGE_SHIFT); | |
496 | ||
497 | pmdp = lookup_pmd_address(vaddr); | |
498 | BUG_ON(!pmdp); | |
499 | ||
500 | spin_lock_irqsave(&p2m_update_lock, flags); | |
501 | ||
502 | ptechk = lookup_address(vaddr, &level); | |
503 | if (ptechk == pte_pg) { | |
4b9c9a11 JG |
504 | HYPERVISOR_shared_info->arch.p2m_generation++; |
505 | wmb(); /* Tools are synchronizing via p2m_generation. */ | |
054954eb JG |
506 | set_pmd(pmdp, |
507 | __pmd(__pa(pte_newpg[i]) | _KERNPG_TABLE)); | |
4b9c9a11 JG |
508 | wmb(); /* Tools are synchronizing via p2m_generation. */ |
509 | HYPERVISOR_shared_info->arch.p2m_generation++; | |
054954eb JG |
510 | pte_newpg[i] = NULL; |
511 | } | |
512 | ||
513 | spin_unlock_irqrestore(&p2m_update_lock, flags); | |
514 | ||
515 | if (pte_newpg[i]) { | |
516 | paravirt_release_pte(__pa(pte_newpg[i]) >> PAGE_SHIFT); | |
517 | free_p2m_page(pte_newpg[i]); | |
518 | } | |
519 | ||
520 | vaddr += PMD_SIZE; | |
521 | } | |
522 | ||
f241b0b8 | 523 | return lookup_address(addr, &level); |
054954eb JG |
524 | } |
525 | ||
a3118beb | 526 | /* |
b5eafe92 JF |
527 | * Fully allocate the p2m structure for a given pfn. We need to check |
528 | * that both the top and mid levels are allocated, and make sure the | |
529 | * parallel mfn tree is kept in sync. We may race with other cpus, so | |
530 | * the new pages are installed with cmpxchg; if we lose the race then | |
531 | * simply free the page we allocated and use the one that's there. | |
532 | */ | |
8edfcf88 | 533 | int xen_alloc_p2m_entry(unsigned long pfn) |
b5eafe92 | 534 | { |
c70727a5 | 535 | unsigned topidx; |
b5eafe92 | 536 | unsigned long *top_mfn_p, *mid_mfn; |
054954eb JG |
537 | pte_t *ptep, *pte_pg; |
538 | unsigned int level; | |
539 | unsigned long flags; | |
540 | unsigned long addr = (unsigned long)(xen_p2m_addr + pfn); | |
541 | unsigned long p2m_pfn; | |
b5eafe92 | 542 | |
8edfcf88 DV |
543 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
544 | return 0; | |
545 | ||
054954eb JG |
546 | ptep = lookup_address(addr, &level); |
547 | BUG_ON(!ptep || level != PG_LEVEL_4K); | |
548 | pte_pg = (pte_t *)((unsigned long)ptep & ~(PAGE_SIZE - 1)); | |
b5eafe92 | 549 | |
054954eb JG |
550 | if (pte_pg == p2m_missing_pte || pte_pg == p2m_identity_pte) { |
551 | /* PMD level is missing, allocate a new one */ | |
f241b0b8 | 552 | ptep = alloc_p2m_pmd(addr, pte_pg); |
054954eb | 553 | if (!ptep) |
8edfcf88 | 554 | return -ENOMEM; |
b5eafe92 JF |
555 | } |
556 | ||
c70727a5 JG |
557 | if (p2m_top_mfn && pfn < MAX_P2M_PFN) { |
558 | topidx = p2m_top_index(pfn); | |
054954eb JG |
559 | top_mfn_p = &p2m_top_mfn[topidx]; |
560 | mid_mfn = ACCESS_ONCE(p2m_top_mfn_p[topidx]); | |
b5eafe92 | 561 | |
054954eb | 562 | BUG_ON(virt_to_mfn(mid_mfn) != *top_mfn_p); |
b5eafe92 | 563 | |
054954eb JG |
564 | if (mid_mfn == p2m_mid_missing_mfn) { |
565 | /* Separately check the mid mfn level */ | |
566 | unsigned long missing_mfn; | |
567 | unsigned long mid_mfn_mfn; | |
568 | unsigned long old_mfn; | |
b5eafe92 | 569 | |
054954eb JG |
570 | mid_mfn = alloc_p2m_page(); |
571 | if (!mid_mfn) | |
8edfcf88 | 572 | return -ENOMEM; |
b5eafe92 | 573 | |
054954eb | 574 | p2m_mid_mfn_init(mid_mfn, p2m_missing); |
b5eafe92 | 575 | |
054954eb JG |
576 | missing_mfn = virt_to_mfn(p2m_mid_missing_mfn); |
577 | mid_mfn_mfn = virt_to_mfn(mid_mfn); | |
578 | old_mfn = cmpxchg(top_mfn_p, missing_mfn, mid_mfn_mfn); | |
579 | if (old_mfn != missing_mfn) { | |
580 | free_p2m_page(mid_mfn); | |
581 | mid_mfn = mfn_to_virt(old_mfn); | |
582 | } else { | |
583 | p2m_top_mfn_p[topidx] = mid_mfn; | |
584 | } | |
239af7c7 | 585 | } |
054954eb JG |
586 | } else { |
587 | mid_mfn = NULL; | |
b5eafe92 JF |
588 | } |
589 | ||
1760f1eb | 590 | p2m_pfn = pte_pfn(READ_ONCE(*ptep)); |
054954eb JG |
591 | if (p2m_pfn == PFN_DOWN(__pa(p2m_identity)) || |
592 | p2m_pfn == PFN_DOWN(__pa(p2m_missing))) { | |
b5eafe92 JF |
593 | /* p2m leaf page is missing */ |
594 | unsigned long *p2m; | |
595 | ||
596 | p2m = alloc_p2m_page(); | |
597 | if (!p2m) | |
8edfcf88 | 598 | return -ENOMEM; |
b5eafe92 | 599 | |
054954eb JG |
600 | if (p2m_pfn == PFN_DOWN(__pa(p2m_missing))) |
601 | p2m_init(p2m); | |
602 | else | |
b8f05c88 | 603 | p2m_init_identity(p2m, pfn & ~(P2M_PER_PAGE - 1)); |
054954eb JG |
604 | |
605 | spin_lock_irqsave(&p2m_update_lock, flags); | |
606 | ||
607 | if (pte_pfn(*ptep) == p2m_pfn) { | |
4b9c9a11 JG |
608 | HYPERVISOR_shared_info->arch.p2m_generation++; |
609 | wmb(); /* Tools are synchronizing via p2m_generation. */ | |
054954eb JG |
610 | set_pte(ptep, |
611 | pfn_pte(PFN_DOWN(__pa(p2m)), PAGE_KERNEL)); | |
4b9c9a11 JG |
612 | wmb(); /* Tools are synchronizing via p2m_generation. */ |
613 | HYPERVISOR_shared_info->arch.p2m_generation++; | |
054954eb | 614 | if (mid_mfn) |
c70727a5 | 615 | mid_mfn[p2m_mid_index(pfn)] = virt_to_mfn(p2m); |
054954eb JG |
616 | p2m = NULL; |
617 | } | |
618 | ||
619 | spin_unlock_irqrestore(&p2m_update_lock, flags); | |
b5eafe92 | 620 | |
054954eb | 621 | if (p2m) |
b5eafe92 | 622 | free_p2m_page(p2m); |
b5eafe92 JF |
623 | } |
624 | ||
98dd166e DV |
625 | /* Expanded the p2m? */ |
626 | if (pfn > xen_p2m_last_pfn) { | |
627 | xen_p2m_last_pfn = pfn; | |
628 | HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn; | |
629 | } | |
630 | ||
8edfcf88 | 631 | return 0; |
b5eafe92 | 632 | } |
8edfcf88 | 633 | EXPORT_SYMBOL(xen_alloc_p2m_entry); |
b5eafe92 | 634 | |
b83c6e55 | 635 | unsigned long __init set_phys_range_identity(unsigned long pfn_s, |
f4cec35b KRW |
636 | unsigned long pfn_e) |
637 | { | |
638 | unsigned long pfn; | |
639 | ||
5b8e7d80 | 640 | if (unlikely(pfn_s >= xen_p2m_size)) |
f4cec35b KRW |
641 | return 0; |
642 | ||
643 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) | |
644 | return pfn_e - pfn_s; | |
645 | ||
646 | if (pfn_s > pfn_e) | |
647 | return 0; | |
648 | ||
5b8e7d80 JG |
649 | if (pfn_e > xen_p2m_size) |
650 | pfn_e = xen_p2m_size; | |
f4cec35b | 651 | |
5b8e7d80 JG |
652 | for (pfn = pfn_s; pfn < pfn_e; pfn++) |
653 | xen_p2m_addr[pfn] = IDENTITY_FRAME(pfn); | |
f4cec35b KRW |
654 | |
655 | return pfn - pfn_s; | |
656 | } | |
657 | ||
b5eafe92 JF |
658 | bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) |
659 | { | |
054954eb JG |
660 | pte_t *ptep; |
661 | unsigned int level; | |
b5eafe92 | 662 | |
2f558d40 SS |
663 | /* don't track P2M changes in autotranslate guests */ |
664 | if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) | |
6eaa412f | 665 | return true; |
2f558d40 | 666 | |
5b8e7d80 | 667 | if (unlikely(pfn >= xen_p2m_size)) { |
b5eafe92 JF |
668 | BUG_ON(mfn != INVALID_P2M_ENTRY); |
669 | return true; | |
670 | } | |
671 | ||
4b9c9a11 JG |
672 | /* |
673 | * The interface requires atomic updates on p2m elements. | |
674 | * xen_safe_write_ulong() is using __put_user which does an atomic | |
675 | * store via asm(). | |
676 | */ | |
90fff3ea | 677 | if (likely(!xen_safe_write_ulong(xen_p2m_addr + pfn, mfn))) |
2e917175 JG |
678 | return true; |
679 | ||
054954eb JG |
680 | ptep = lookup_address((unsigned long)(xen_p2m_addr + pfn), &level); |
681 | BUG_ON(!ptep || level != PG_LEVEL_4K); | |
f4cec35b | 682 | |
054954eb | 683 | if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_missing))) |
b5eafe92 JF |
684 | return mfn == INVALID_P2M_ENTRY; |
685 | ||
054954eb JG |
686 | if (pte_pfn(*ptep) == PFN_DOWN(__pa(p2m_identity))) |
687 | return mfn == IDENTITY_FRAME(pfn); | |
688 | ||
2e917175 | 689 | return false; |
b5eafe92 JF |
690 | } |
691 | ||
692 | bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) | |
693 | { | |
054954eb | 694 | if (unlikely(!__set_phys_to_machine(pfn, mfn))) { |
8edfcf88 DV |
695 | int ret; |
696 | ||
697 | ret = xen_alloc_p2m_entry(pfn); | |
698 | if (ret < 0) | |
b5eafe92 JF |
699 | return false; |
700 | ||
054954eb | 701 | return __set_phys_to_machine(pfn, mfn); |
b5eafe92 JF |
702 | } |
703 | ||
704 | return true; | |
705 | } | |
448f2831 | 706 | |
820c4db2 JG |
707 | int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, |
708 | struct gnttab_map_grant_ref *kmap_ops, | |
709 | struct page **pages, unsigned int count) | |
1429d46d ZK |
710 | { |
711 | int i, ret = 0; | |
820c4db2 | 712 | pte_t *pte; |
1429d46d ZK |
713 | |
714 | if (xen_feature(XENFEAT_auto_translated_physmap)) | |
715 | return 0; | |
716 | ||
0bb599fd DV |
717 | if (kmap_ops) { |
718 | ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, | |
719 | kmap_ops, count); | |
720 | if (ret) | |
721 | goto out; | |
1429d46d ZK |
722 | } |
723 | ||
724 | for (i = 0; i < count; i++) { | |
820c4db2 | 725 | unsigned long mfn, pfn; |
1429d46d | 726 | |
820c4db2 JG |
727 | /* Do not add to override if the map failed. */ |
728 | if (map_ops[i].status) | |
729 | continue; | |
730 | ||
731 | if (map_ops[i].flags & GNTMAP_contains_pte) { | |
732 | pte = (pte_t *)(mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + | |
733 | (map_ops[i].host_addr & ~PAGE_MASK)); | |
734 | mfn = pte_mfn(*pte); | |
735 | } else { | |
736 | mfn = PFN_DOWN(map_ops[i].dev_bus_addr); | |
1429d46d | 737 | } |
820c4db2 | 738 | pfn = page_to_pfn(pages[i]); |
1429d46d | 739 | |
0ae65f49 JH |
740 | WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned"); |
741 | ||
820c4db2 JG |
742 | if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { |
743 | ret = -ENOMEM; | |
1429d46d | 744 | goto out; |
820c4db2 | 745 | } |
1429d46d ZK |
746 | } |
747 | ||
748 | out: | |
1429d46d ZK |
749 | return ret; |
750 | } | |
820c4db2 | 751 | EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); |
1429d46d | 752 | |
820c4db2 | 753 | int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, |
853d0289 | 754 | struct gnttab_unmap_grant_ref *kunmap_ops, |
820c4db2 | 755 | struct page **pages, unsigned int count) |
448f2831 | 756 | { |
820c4db2 | 757 | int i, ret = 0; |
448f2831 | 758 | |
820c4db2 JG |
759 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
760 | return 0; | |
448f2831 | 761 | |
820c4db2 | 762 | for (i = 0; i < count; i++) { |
0aad5689 | 763 | unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i])); |
820c4db2 JG |
764 | unsigned long pfn = page_to_pfn(pages[i]); |
765 | ||
766 | if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { | |
767 | ret = -EINVAL; | |
768 | goto out; | |
448f2831 | 769 | } |
448f2831 | 770 | |
0ae65f49 | 771 | set_phys_to_machine(pfn, INVALID_P2M_ENTRY); |
820c4db2 | 772 | } |
0bb599fd DV |
773 | if (kunmap_ops) |
774 | ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, | |
775 | kunmap_ops, count); | |
820c4db2 | 776 | out: |
448f2831 JF |
777 | return ret; |
778 | } | |
820c4db2 | 779 | EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); |
448f2831 | 780 | |
2222e71b | 781 | #ifdef CONFIG_XEN_DEBUG_FS |
a867db10 KRW |
782 | #include <linux/debugfs.h> |
783 | #include "debugfs.h" | |
784 | static int p2m_dump_show(struct seq_file *m, void *v) | |
2222e71b | 785 | { |
a491dbef | 786 | static const char * const type_name[] = { |
054954eb JG |
787 | [P2M_TYPE_IDENTITY] = "identity", |
788 | [P2M_TYPE_MISSING] = "missing", | |
789 | [P2M_TYPE_PFN] = "pfn", | |
790 | [P2M_TYPE_UNKNOWN] = "abnormal"}; | |
791 | unsigned long pfn, first_pfn; | |
792 | int type, prev_type; | |
793 | ||
794 | prev_type = xen_p2m_elem_type(0); | |
795 | first_pfn = 0; | |
796 | ||
797 | for (pfn = 0; pfn < xen_p2m_size; pfn++) { | |
798 | type = xen_p2m_elem_type(pfn); | |
799 | if (type != prev_type) { | |
800 | seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn, | |
801 | type_name[prev_type]); | |
2222e71b | 802 | prev_type = type; |
054954eb | 803 | first_pfn = pfn; |
2222e71b KRW |
804 | } |
805 | } | |
054954eb JG |
806 | seq_printf(m, " [0x%lx->0x%lx] %s\n", first_pfn, pfn, |
807 | type_name[prev_type]); | |
2222e71b | 808 | return 0; |
2222e71b | 809 | } |
a867db10 KRW |
810 | |
811 | static int p2m_dump_open(struct inode *inode, struct file *filp) | |
812 | { | |
813 | return single_open(filp, p2m_dump_show, NULL); | |
814 | } | |
815 | ||
816 | static const struct file_operations p2m_dump_fops = { | |
817 | .open = p2m_dump_open, | |
818 | .read = seq_read, | |
819 | .llseek = seq_lseek, | |
820 | .release = single_release, | |
821 | }; | |
822 | ||
823 | static struct dentry *d_mmu_debug; | |
824 | ||
825 | static int __init xen_p2m_debugfs(void) | |
826 | { | |
827 | struct dentry *d_xen = xen_init_debugfs(); | |
828 | ||
829 | if (d_xen == NULL) | |
830 | return -ENOMEM; | |
831 | ||
832 | d_mmu_debug = debugfs_create_dir("mmu", d_xen); | |
833 | ||
834 | debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops); | |
835 | return 0; | |
836 | } | |
837 | fs_initcall(xen_p2m_debugfs); | |
838 | #endif /* CONFIG_XEN_DEBUG_FS */ |