]>
Commit | Line | Data |
---|---|---|
f204e0b8 IM |
1 | /* |
2 | * Copyright 2014 IBM Corp. | |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU General Public License | |
6 | * as published by the Free Software Foundation; either version | |
7 | * 2 of the License, or (at your option) any later version. | |
8 | */ | |
9 | ||
10 | #include <linux/workqueue.h> | |
11 | #include <linux/sched.h> | |
12 | #include <linux/pid.h> | |
13 | #include <linux/mm.h> | |
14 | #include <linux/moduleparam.h> | |
15 | ||
16 | #undef MODULE_PARAM_PREFIX | |
17 | #define MODULE_PARAM_PREFIX "cxl" "." | |
18 | #include <asm/current.h> | |
19 | #include <asm/copro.h> | |
20 | #include <asm/mmu.h> | |
21 | ||
22 | #include "cxl.h" | |
9bcf28cd | 23 | #include "trace.h" |
f204e0b8 | 24 | |
eb01d4c2 IM |
25 | static bool sste_matches(struct cxl_sste *sste, struct copro_slb *slb) |
26 | { | |
27 | return ((sste->vsid_data == cpu_to_be64(slb->vsid)) && | |
28 | (sste->esid_data == cpu_to_be64(slb->esid))); | |
29 | } | |
30 | ||
31 | /* | |
32 | * This finds a free SSTE for the given SLB, or returns NULL if it's already in | |
33 | * the segment table. | |
34 | */ | |
b03a7f57 IM |
35 | static struct cxl_sste* find_free_sste(struct cxl_context *ctx, |
36 | struct copro_slb *slb) | |
f204e0b8 | 37 | { |
eb01d4c2 | 38 | struct cxl_sste *primary, *sste, *ret = NULL; |
b03a7f57 | 39 | unsigned int mask = (ctx->sst_size >> 7) - 1; /* SSTP0[SegTableSize] */ |
5100a9d6 | 40 | unsigned int entry; |
b03a7f57 IM |
41 | unsigned int hash; |
42 | ||
43 | if (slb->vsid & SLB_VSID_B_1T) | |
44 | hash = (slb->esid >> SID_SHIFT_1T) & mask; | |
45 | else /* 256M */ | |
46 | hash = (slb->esid >> SID_SHIFT) & mask; | |
f204e0b8 | 47 | |
b03a7f57 IM |
48 | primary = ctx->sstp + (hash << 3); |
49 | ||
50 | for (entry = 0, sste = primary; entry < 8; entry++, sste++) { | |
eb01d4c2 IM |
51 | if (!ret && !(be64_to_cpu(sste->esid_data) & SLB_ESID_V)) |
52 | ret = sste; | |
53 | if (sste_matches(sste, slb)) | |
54 | return NULL; | |
f204e0b8 | 55 | } |
eb01d4c2 IM |
56 | if (ret) |
57 | return ret; | |
b03a7f57 | 58 | |
f204e0b8 | 59 | /* Nothing free, select an entry to cast out */ |
eb01d4c2 | 60 | ret = primary + ctx->sst_lru; |
b03a7f57 | 61 | ctx->sst_lru = (ctx->sst_lru + 1) & 0x7; |
f204e0b8 | 62 | |
eb01d4c2 | 63 | return ret; |
f204e0b8 IM |
64 | } |
65 | ||
66 | static void cxl_load_segment(struct cxl_context *ctx, struct copro_slb *slb) | |
67 | { | |
68 | /* mask is the group index, we search primary and secondary here. */ | |
f204e0b8 | 69 | struct cxl_sste *sste; |
f204e0b8 IM |
70 | unsigned long flags; |
71 | ||
f204e0b8 | 72 | spin_lock_irqsave(&ctx->sste_lock, flags); |
b03a7f57 | 73 | sste = find_free_sste(ctx, slb); |
eb01d4c2 IM |
74 | if (!sste) |
75 | goto out_unlock; | |
f204e0b8 IM |
76 | |
77 | pr_devel("CXL Populating SST[%li]: %#llx %#llx\n", | |
78 | sste - ctx->sstp, slb->vsid, slb->esid); | |
9bcf28cd | 79 | trace_cxl_ste_write(ctx, sste - ctx->sstp, slb->esid, slb->vsid); |
f204e0b8 IM |
80 | |
81 | sste->vsid_data = cpu_to_be64(slb->vsid); | |
82 | sste->esid_data = cpu_to_be64(slb->esid); | |
eb01d4c2 | 83 | out_unlock: |
f204e0b8 IM |
84 | spin_unlock_irqrestore(&ctx->sste_lock, flags); |
85 | } | |
86 | ||
87 | static int cxl_fault_segment(struct cxl_context *ctx, struct mm_struct *mm, | |
88 | u64 ea) | |
89 | { | |
90 | struct copro_slb slb = {0,0}; | |
91 | int rc; | |
92 | ||
93 | if (!(rc = copro_calculate_slb(mm, ea, &slb))) { | |
94 | cxl_load_segment(ctx, &slb); | |
95 | } | |
96 | ||
97 | return rc; | |
98 | } | |
99 | ||
100 | static void cxl_ack_ae(struct cxl_context *ctx) | |
101 | { | |
102 | unsigned long flags; | |
103 | ||
5be587b1 | 104 | cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_AE, 0); |
f204e0b8 IM |
105 | |
106 | spin_lock_irqsave(&ctx->lock, flags); | |
107 | ctx->pending_fault = true; | |
108 | ctx->fault_addr = ctx->dar; | |
109 | ctx->fault_dsisr = ctx->dsisr; | |
110 | spin_unlock_irqrestore(&ctx->lock, flags); | |
111 | ||
112 | wake_up_all(&ctx->wq); | |
113 | } | |
114 | ||
115 | static int cxl_handle_segment_miss(struct cxl_context *ctx, | |
116 | struct mm_struct *mm, u64 ea) | |
117 | { | |
118 | int rc; | |
119 | ||
120 | pr_devel("CXL interrupt: Segment fault pe: %i ea: %#llx\n", ctx->pe, ea); | |
9bcf28cd | 121 | trace_cxl_ste_miss(ctx, ea); |
f204e0b8 IM |
122 | |
123 | if ((rc = cxl_fault_segment(ctx, mm, ea))) | |
124 | cxl_ack_ae(ctx); | |
125 | else { | |
126 | ||
127 | mb(); /* Order seg table write to TFC MMIO write */ | |
5be587b1 | 128 | cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0); |
f204e0b8 IM |
129 | } |
130 | ||
131 | return IRQ_HANDLED; | |
132 | } | |
133 | ||
134 | static void cxl_handle_page_fault(struct cxl_context *ctx, | |
135 | struct mm_struct *mm, u64 dsisr, u64 dar) | |
136 | { | |
137 | unsigned flt = 0; | |
138 | int result; | |
aefa5688 | 139 | unsigned long access, flags, inv_flags = 0; |
f204e0b8 | 140 | |
9bcf28cd IM |
141 | trace_cxl_pte_miss(ctx, dsisr, dar); |
142 | ||
f204e0b8 IM |
143 | if ((result = copro_handle_mm_fault(mm, dar, dsisr, &flt))) { |
144 | pr_devel("copro_handle_mm_fault failed: %#x\n", result); | |
145 | return cxl_ack_ae(ctx); | |
146 | } | |
147 | ||
148 | /* | |
149 | * update_mmu_cache() will not have loaded the hash since current->trap | |
150 | * is not a 0x400 or 0x300, so just call hash_page_mm() here. | |
151 | */ | |
152 | access = _PAGE_PRESENT; | |
153 | if (dsisr & CXL_PSL_DSISR_An_S) | |
154 | access |= _PAGE_RW; | |
155 | if ((!ctx->kernel) || ~(dar & (1ULL << 63))) | |
156 | access |= _PAGE_USER; | |
aefa5688 AK |
157 | |
158 | if (dsisr & DSISR_NOHPTE) | |
159 | inv_flags |= HPTE_NOHPTE_UPDATE; | |
160 | ||
f204e0b8 | 161 | local_irq_save(flags); |
aefa5688 | 162 | hash_page_mm(mm, dar, access, 0x300, inv_flags); |
f204e0b8 IM |
163 | local_irq_restore(flags); |
164 | ||
165 | pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe); | |
5be587b1 | 166 | cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0); |
f204e0b8 IM |
167 | } |
168 | ||
7b8ad495 VJ |
169 | /* |
170 | * Returns the mm_struct corresponding to the context ctx via ctx->pid | |
171 | * In case the task has exited we use the task group leader accessible | |
172 | * via ctx->glpid to find the next task in the thread group that has a | |
173 | * valid mm_struct associated with it. If a task with valid mm_struct | |
174 | * is found the ctx->pid is updated to use the task struct for subsequent | |
175 | * translations. In case no valid mm_struct is found in the task group to | |
176 | * service the fault a NULL is returned. | |
177 | */ | |
178 | static struct mm_struct *get_mem_context(struct cxl_context *ctx) | |
179 | { | |
180 | struct task_struct *task = NULL; | |
181 | struct mm_struct *mm = NULL; | |
182 | struct pid *old_pid = ctx->pid; | |
183 | ||
184 | if (old_pid == NULL) { | |
185 | pr_warn("%s: Invalid context for pe=%d\n", | |
186 | __func__, ctx->pe); | |
187 | return NULL; | |
188 | } | |
189 | ||
190 | task = get_pid_task(old_pid, PIDTYPE_PID); | |
191 | ||
192 | /* | |
193 | * pid_alive may look racy but this saves us from costly | |
194 | * get_task_mm when the task is a zombie. In worst case | |
195 | * we may think a task is alive, which is about to die | |
196 | * but get_task_mm will return NULL. | |
197 | */ | |
198 | if (task != NULL && pid_alive(task)) | |
199 | mm = get_task_mm(task); | |
200 | ||
201 | /* release the task struct that was taken earlier */ | |
202 | if (task) | |
203 | put_task_struct(task); | |
204 | else | |
205 | pr_devel("%s: Context owning pid=%i for pe=%i dead\n", | |
206 | __func__, pid_nr(old_pid), ctx->pe); | |
207 | ||
208 | /* | |
209 | * If we couldn't find the mm context then use the group | |
210 | * leader to iterate over the task group and find a task | |
211 | * that gives us mm_struct. | |
212 | */ | |
213 | if (unlikely(mm == NULL && ctx->glpid != NULL)) { | |
214 | ||
215 | rcu_read_lock(); | |
216 | task = pid_task(ctx->glpid, PIDTYPE_PID); | |
217 | if (task) | |
218 | do { | |
219 | mm = get_task_mm(task); | |
220 | if (mm) { | |
221 | ctx->pid = get_task_pid(task, | |
222 | PIDTYPE_PID); | |
223 | break; | |
224 | } | |
225 | task = next_thread(task); | |
226 | } while (task && !thread_group_leader(task)); | |
227 | rcu_read_unlock(); | |
228 | ||
229 | /* check if we switched pid */ | |
230 | if (ctx->pid != old_pid) { | |
231 | if (mm) | |
232 | pr_devel("%s:pe=%i switch pid %i->%i\n", | |
233 | __func__, ctx->pe, pid_nr(old_pid), | |
234 | pid_nr(ctx->pid)); | |
235 | else | |
236 | pr_devel("%s:Cannot find mm for pid=%i\n", | |
237 | __func__, pid_nr(old_pid)); | |
238 | ||
239 | /* drop the reference to older pid */ | |
240 | put_pid(old_pid); | |
241 | } | |
242 | } | |
243 | ||
244 | return mm; | |
245 | } | |
246 | ||
247 | ||
248 | ||
f204e0b8 IM |
249 | void cxl_handle_fault(struct work_struct *fault_work) |
250 | { | |
251 | struct cxl_context *ctx = | |
252 | container_of(fault_work, struct cxl_context, fault_work); | |
253 | u64 dsisr = ctx->dsisr; | |
254 | u64 dar = ctx->dar; | |
a6b07d82 | 255 | struct mm_struct *mm = NULL; |
f204e0b8 | 256 | |
ea2d1f95 FB |
257 | if (cpu_has_feature(CPU_FTR_HVMODE)) { |
258 | if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr || | |
259 | cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar || | |
260 | cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) != ctx->pe) { | |
261 | /* Most likely explanation is harmless - a dedicated | |
262 | * process has detached and these were cleared by the | |
263 | * PSL purge, but warn about it just in case | |
264 | */ | |
265 | dev_notice(&ctx->afu->dev, "cxl_handle_fault: Translation fault regs changed\n"); | |
266 | return; | |
267 | } | |
f204e0b8 IM |
268 | } |
269 | ||
13da7046 IM |
270 | /* Early return if the context is being / has been detached */ |
271 | if (ctx->status == CLOSED) { | |
272 | cxl_ack_ae(ctx); | |
273 | return; | |
274 | } | |
275 | ||
f204e0b8 IM |
276 | pr_devel("CXL BOTTOM HALF handling fault for afu pe: %i. " |
277 | "DSISR: %#llx DAR: %#llx\n", ctx->pe, dsisr, dar); | |
278 | ||
a6b07d82 | 279 | if (!ctx->kernel) { |
7b8ad495 VJ |
280 | |
281 | mm = get_mem_context(ctx); | |
282 | /* indicates all the thread in task group have exited */ | |
283 | if (mm == NULL) { | |
284 | pr_devel("%s: unable to get mm for pe=%d pid=%i\n", | |
285 | __func__, ctx->pe, pid_nr(ctx->pid)); | |
a6b07d82 MN |
286 | cxl_ack_ae(ctx); |
287 | return; | |
7b8ad495 VJ |
288 | } else { |
289 | pr_devel("Handling page fault for pe=%d pid=%i\n", | |
290 | ctx->pe, pid_nr(ctx->pid)); | |
a6b07d82 | 291 | } |
f204e0b8 IM |
292 | } |
293 | ||
294 | if (dsisr & CXL_PSL_DSISR_An_DS) | |
295 | cxl_handle_segment_miss(ctx, mm, dar); | |
296 | else if (dsisr & CXL_PSL_DSISR_An_DM) | |
297 | cxl_handle_page_fault(ctx, mm, dsisr, dar); | |
298 | else | |
299 | WARN(1, "cxl_handle_fault has nothing to handle\n"); | |
300 | ||
a6b07d82 MN |
301 | if (mm) |
302 | mmput(mm); | |
f204e0b8 IM |
303 | } |
304 | ||
305 | static void cxl_prefault_one(struct cxl_context *ctx, u64 ea) | |
306 | { | |
f204e0b8 IM |
307 | struct mm_struct *mm; |
308 | ||
7b8ad495 VJ |
309 | mm = get_mem_context(ctx); |
310 | if (mm == NULL) { | |
f204e0b8 IM |
311 | pr_devel("cxl_prefault_one unable to get mm %i\n", |
312 | pid_nr(ctx->pid)); | |
f204e0b8 IM |
313 | return; |
314 | } | |
315 | ||
7b8ad495 | 316 | cxl_fault_segment(ctx, mm, ea); |
f204e0b8 IM |
317 | |
318 | mmput(mm); | |
f204e0b8 IM |
319 | } |
320 | ||
321 | static u64 next_segment(u64 ea, u64 vsid) | |
322 | { | |
323 | if (vsid & SLB_VSID_B_1T) | |
324 | ea |= (1ULL << 40) - 1; | |
325 | else | |
326 | ea |= (1ULL << 28) - 1; | |
327 | ||
328 | return ea + 1; | |
329 | } | |
330 | ||
331 | static void cxl_prefault_vma(struct cxl_context *ctx) | |
332 | { | |
333 | u64 ea, last_esid = 0; | |
334 | struct copro_slb slb; | |
335 | struct vm_area_struct *vma; | |
336 | int rc; | |
f204e0b8 IM |
337 | struct mm_struct *mm; |
338 | ||
7b8ad495 VJ |
339 | mm = get_mem_context(ctx); |
340 | if (mm == NULL) { | |
f204e0b8 IM |
341 | pr_devel("cxl_prefault_vm unable to get mm %i\n", |
342 | pid_nr(ctx->pid)); | |
7b8ad495 | 343 | return; |
f204e0b8 IM |
344 | } |
345 | ||
346 | down_read(&mm->mmap_sem); | |
347 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | |
348 | for (ea = vma->vm_start; ea < vma->vm_end; | |
349 | ea = next_segment(ea, slb.vsid)) { | |
350 | rc = copro_calculate_slb(mm, ea, &slb); | |
351 | if (rc) | |
352 | continue; | |
353 | ||
354 | if (last_esid == slb.esid) | |
355 | continue; | |
356 | ||
357 | cxl_load_segment(ctx, &slb); | |
358 | last_esid = slb.esid; | |
359 | } | |
360 | } | |
361 | up_read(&mm->mmap_sem); | |
362 | ||
363 | mmput(mm); | |
f204e0b8 IM |
364 | } |
365 | ||
366 | void cxl_prefault(struct cxl_context *ctx, u64 wed) | |
367 | { | |
368 | switch (ctx->afu->prefault_mode) { | |
369 | case CXL_PREFAULT_WED: | |
370 | cxl_prefault_one(ctx, wed); | |
371 | break; | |
372 | case CXL_PREFAULT_ALL: | |
373 | cxl_prefault_vma(ctx); | |
374 | break; | |
375 | default: | |
376 | break; | |
377 | } | |
378 | } |