]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/s390/mm/pgtable.c
[S390] uaccess: use might_fault() instead of might_sleep()
[mirror_ubuntu-artful-kernel.git] / arch / s390 / mm / pgtable.c
CommitLineData
3610cce8
MS
1/*
2 * arch/s390/mm/pgtable.c
3 *
4 * Copyright IBM Corp. 2007
5 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
6 */
7
8#include <linux/sched.h>
9#include <linux/kernel.h>
10#include <linux/errno.h>
11#include <linux/mm.h>
12#include <linux/swap.h>
13#include <linux/smp.h>
14#include <linux/highmem.h>
15#include <linux/slab.h>
16#include <linux/pagemap.h>
17#include <linux/spinlock.h>
18#include <linux/module.h>
19#include <linux/quicklist.h>
20
21#include <asm/system.h>
22#include <asm/pgtable.h>
23#include <asm/pgalloc.h>
24#include <asm/tlb.h>
25#include <asm/tlbflush.h>
6252d702 26#include <asm/mmu_context.h>
3610cce8
MS
27
28#ifndef CONFIG_64BIT
29#define ALLOC_ORDER 1
146e4b3c
MS
30#define TABLES_PER_PAGE 4
31#define FRAG_MASK 15UL
32#define SECOND_HALVES 10UL
402b0862
CO
33
34void clear_table_pgstes(unsigned long *table)
35{
36 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
37 memset(table + 256, 0, PAGE_SIZE/4);
38 clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4);
39 memset(table + 768, 0, PAGE_SIZE/4);
40}
41
3610cce8
MS
42#else
43#define ALLOC_ORDER 2
146e4b3c
MS
44#define TABLES_PER_PAGE 2
45#define FRAG_MASK 3UL
46#define SECOND_HALVES 2UL
402b0862
CO
47
48void clear_table_pgstes(unsigned long *table)
49{
50 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
51 memset(table + 256, 0, PAGE_SIZE/2);
52}
53
3610cce8
MS
54#endif
55
56unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec)
57{
58 struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
59
60 if (!page)
61 return NULL;
62 page->index = 0;
63 if (noexec) {
64 struct page *shadow = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
65 if (!shadow) {
66 __free_pages(page, ALLOC_ORDER);
67 return NULL;
68 }
69 page->index = page_to_phys(shadow);
70 }
146e4b3c
MS
71 spin_lock(&mm->page_table_lock);
72 list_add(&page->lru, &mm->context.crst_list);
73 spin_unlock(&mm->page_table_lock);
3610cce8
MS
74 return (unsigned long *) page_to_phys(page);
75}
76
146e4b3c 77void crst_table_free(struct mm_struct *mm, unsigned long *table)
3610cce8
MS
78{
79 unsigned long *shadow = get_shadow_table(table);
146e4b3c 80 struct page *page = virt_to_page(table);
3610cce8 81
146e4b3c
MS
82 spin_lock(&mm->page_table_lock);
83 list_del(&page->lru);
84 spin_unlock(&mm->page_table_lock);
3610cce8
MS
85 if (shadow)
86 free_pages((unsigned long) shadow, ALLOC_ORDER);
87 free_pages((unsigned long) table, ALLOC_ORDER);
88}
89
6252d702
MS
90#ifdef CONFIG_64BIT
91int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
92{
93 unsigned long *table, *pgd;
94 unsigned long entry;
95
96 BUG_ON(limit > (1UL << 53));
97repeat:
98 table = crst_table_alloc(mm, mm->context.noexec);
99 if (!table)
100 return -ENOMEM;
101 spin_lock(&mm->page_table_lock);
102 if (mm->context.asce_limit < limit) {
103 pgd = (unsigned long *) mm->pgd;
104 if (mm->context.asce_limit <= (1UL << 31)) {
105 entry = _REGION3_ENTRY_EMPTY;
106 mm->context.asce_limit = 1UL << 42;
107 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
108 _ASCE_USER_BITS |
109 _ASCE_TYPE_REGION3;
110 } else {
111 entry = _REGION2_ENTRY_EMPTY;
112 mm->context.asce_limit = 1UL << 53;
113 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
114 _ASCE_USER_BITS |
115 _ASCE_TYPE_REGION2;
116 }
117 crst_table_init(table, entry);
118 pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
119 mm->pgd = (pgd_t *) table;
f481bfaf 120 mm->task_size = mm->context.asce_limit;
6252d702
MS
121 table = NULL;
122 }
123 spin_unlock(&mm->page_table_lock);
124 if (table)
125 crst_table_free(mm, table);
126 if (mm->context.asce_limit < limit)
127 goto repeat;
128 update_mm(mm, current);
129 return 0;
130}
131
132void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
133{
134 pgd_t *pgd;
135
136 if (mm->context.asce_limit <= limit)
137 return;
138 __tlb_flush_mm(mm);
139 while (mm->context.asce_limit > limit) {
140 pgd = mm->pgd;
141 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
142 case _REGION_ENTRY_TYPE_R2:
143 mm->context.asce_limit = 1UL << 42;
144 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
145 _ASCE_USER_BITS |
146 _ASCE_TYPE_REGION3;
147 break;
148 case _REGION_ENTRY_TYPE_R3:
149 mm->context.asce_limit = 1UL << 31;
150 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
151 _ASCE_USER_BITS |
152 _ASCE_TYPE_SEGMENT;
153 break;
154 default:
155 BUG();
156 }
157 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
f481bfaf 158 mm->task_size = mm->context.asce_limit;
6252d702
MS
159 crst_table_free(mm, (unsigned long *) pgd);
160 }
161 update_mm(mm, current);
162}
163#endif
164
3610cce8
MS
165/*
166 * page table entry allocation/free routines.
167 */
146e4b3c 168unsigned long *page_table_alloc(struct mm_struct *mm)
3610cce8 169{
146e4b3c 170 struct page *page;
3610cce8 171 unsigned long *table;
146e4b3c 172 unsigned long bits;
3610cce8 173
250cf776 174 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
146e4b3c
MS
175 spin_lock(&mm->page_table_lock);
176 page = NULL;
177 if (!list_empty(&mm->context.pgtable_list)) {
178 page = list_first_entry(&mm->context.pgtable_list,
179 struct page, lru);
180 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
181 page = NULL;
182 }
183 if (!page) {
184 spin_unlock(&mm->page_table_lock);
185 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
186 if (!page)
3610cce8 187 return NULL;
146e4b3c
MS
188 pgtable_page_ctor(page);
189 page->flags &= ~FRAG_MASK;
190 table = (unsigned long *) page_to_phys(page);
250cf776 191 if (mm->context.has_pgste)
402b0862
CO
192 clear_table_pgstes(table);
193 else
194 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
146e4b3c
MS
195 spin_lock(&mm->page_table_lock);
196 list_add(&page->lru, &mm->context.pgtable_list);
3610cce8
MS
197 }
198 table = (unsigned long *) page_to_phys(page);
146e4b3c
MS
199 while (page->flags & bits) {
200 table += 256;
201 bits <<= 1;
202 }
203 page->flags |= bits;
204 if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1))
205 list_move_tail(&page->lru, &mm->context.pgtable_list);
206 spin_unlock(&mm->page_table_lock);
3610cce8
MS
207 return table;
208}
209
146e4b3c 210void page_table_free(struct mm_struct *mm, unsigned long *table)
3610cce8 211{
146e4b3c
MS
212 struct page *page;
213 unsigned long bits;
3610cce8 214
250cf776 215 bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL;
146e4b3c
MS
216 bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long);
217 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
218 spin_lock(&mm->page_table_lock);
219 page->flags ^= bits;
220 if (page->flags & FRAG_MASK) {
221 /* Page now has some free pgtable fragments. */
222 list_move(&page->lru, &mm->context.pgtable_list);
223 page = NULL;
224 } else
225 /* All fragments of the 4K page have been freed. */
226 list_del(&page->lru);
227 spin_unlock(&mm->page_table_lock);
228 if (page) {
229 pgtable_page_dtor(page);
230 __free_page(page);
231 }
232}
3610cce8 233
146e4b3c
MS
234void disable_noexec(struct mm_struct *mm, struct task_struct *tsk)
235{
236 struct page *page;
237
238 spin_lock(&mm->page_table_lock);
239 /* Free shadow region and segment tables. */
240 list_for_each_entry(page, &mm->context.crst_list, lru)
241 if (page->index) {
242 free_pages((unsigned long) page->index, ALLOC_ORDER);
243 page->index = 0;
244 }
245 /* "Free" second halves of page tables. */
246 list_for_each_entry(page, &mm->context.pgtable_list, lru)
247 page->flags &= ~SECOND_HALVES;
248 spin_unlock(&mm->page_table_lock);
249 mm->context.noexec = 0;
250 update_mm(mm, tsk);
3610cce8 251}
402b0862
CO
252
253/*
254 * switch on pgstes for its userspace process (for kvm)
255 */
256int s390_enable_sie(void)
257{
258 struct task_struct *tsk = current;
74b6b522 259 struct mm_struct *mm, *old_mm;
402b0862 260
702d9e58
CO
261 /* Do we have switched amode? If no, we cannot do sie */
262 if (!switch_amode)
263 return -EINVAL;
264
74b6b522 265 /* Do we have pgstes? if yes, we are done */
250cf776 266 if (tsk->mm->context.has_pgste)
74b6b522 267 return 0;
402b0862 268
74b6b522
CB
269 /* lets check if we are allowed to replace the mm */
270 task_lock(tsk);
402b0862 271 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
abf137dd 272 tsk->mm != tsk->active_mm || !hlist_empty(&tsk->mm->ioctx_list)) {
74b6b522
CB
273 task_unlock(tsk);
274 return -EINVAL;
275 }
276 task_unlock(tsk);
402b0862 277
250cf776
CB
278 /* we copy the mm and let dup_mm create the page tables with_pgstes */
279 tsk->mm->context.alloc_pgste = 1;
402b0862 280 mm = dup_mm(tsk);
250cf776 281 tsk->mm->context.alloc_pgste = 0;
402b0862 282 if (!mm)
74b6b522
CB
283 return -ENOMEM;
284
250cf776 285 /* Now lets check again if something happened */
74b6b522
CB
286 task_lock(tsk);
287 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
abf137dd 288 tsk->mm != tsk->active_mm || !hlist_empty(&tsk->mm->ioctx_list)) {
74b6b522
CB
289 mmput(mm);
290 task_unlock(tsk);
291 return -EINVAL;
292 }
293
294 /* ok, we are alone. No ptrace, no threads, etc. */
295 old_mm = tsk->mm;
402b0862
CO
296 tsk->mm = tsk->active_mm = mm;
297 preempt_disable();
298 update_mm(mm, tsk);
005f8eee 299 cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
402b0862 300 preempt_enable();
402b0862 301 task_unlock(tsk);
74b6b522
CB
302 mmput(old_mm);
303 return 0;
402b0862
CO
304}
305EXPORT_SYMBOL_GPL(s390_enable_sie);