drivers/misc/cxl/fault.c

   1 /*
   2  * Copyright 2014 IBM Corp.
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public License
   6  * as published by the Free Software Foundation; either version
   7  * 2 of the License, or (at your option) any later version.
   8  */
   9
  10 #include <linux/workqueue.h>
  11 #include <linux/sched/signal.h>
  12 #include <linux/sched/mm.h>
  13 #include <linux/pid.h>
  14 #include <linux/mm.h>
  15 #include <linux/moduleparam.h>
  16
  17 #undef MODULE_PARAM_PREFIX
  18 #define MODULE_PARAM_PREFIX "cxl" "."
  19 #include <asm/current.h>
  20 #include <asm/copro.h>
  21 #include <asm/mmu.h>
  22
  23 #include "cxl.h"
  24 #include "trace.h"
  25
  26 static bool sste_matches(struct cxl_sste *sste, struct copro_slb *slb)
  27 {
  28         return ((sste->vsid_data == cpu_to_be64(slb->vsid)) &&
  29                 (sste->esid_data == cpu_to_be64(slb->esid)));
  30 }
  31
  32 /*
  33  * This finds a free SSTE for the given SLB, or returns NULL if it's already in
  34  * the segment table.
  35  */
  36 static struct cxl_sste *find_free_sste(struct cxl_context *ctx,
  37                                        struct copro_slb *slb)
  38 {
  39         struct cxl_sste *primary, *sste, *ret = NULL;
  40         unsigned int mask = (ctx->sst_size >> 7) - 1; /* SSTP0[SegTableSize] */
  41         unsigned int entry;
  42         unsigned int hash;
  43
  44         if (slb->vsid & SLB_VSID_B_1T)
  45                 hash = (slb->esid >> SID_SHIFT_1T) & mask;
  46         else /* 256M */
  47                 hash = (slb->esid >> SID_SHIFT) & mask;
  48
  49         primary = ctx->sstp + (hash << 3);
  50
  51         for (entry = 0, sste = primary; entry < 8; entry++, sste++) {
  52                 if (!ret && !(be64_to_cpu(sste->esid_data) & SLB_ESID_V))
  53                         ret = sste;
  54                 if (sste_matches(sste, slb))
  55                         return NULL;
  56         }
  57         if (ret)
  58                 return ret;
  59
  60         /* Nothing free, select an entry to cast out */
  61         ret = primary + ctx->sst_lru;
  62         ctx->sst_lru = (ctx->sst_lru + 1) & 0x7;
  63
  64         return ret;
  65 }
  66
  67 static void cxl_load_segment(struct cxl_context *ctx, struct copro_slb *slb)
  68 {
  69         /* mask is the group index, we search primary and secondary here. */
  70         struct cxl_sste *sste;
  71         unsigned long flags;
  72
  73         spin_lock_irqsave(&ctx->sste_lock, flags);
  74         sste = find_free_sste(ctx, slb);
  75         if (!sste)
  76                 goto out_unlock;
  77
  78         pr_devel("CXL Populating SST[%li]: %#llx %#llx\n",
  79                         sste - ctx->sstp, slb->vsid, slb->esid);
  80         trace_cxl_ste_write(ctx, sste - ctx->sstp, slb->esid, slb->vsid);
  81
  82         sste->vsid_data = cpu_to_be64(slb->vsid);
  83         sste->esid_data = cpu_to_be64(slb->esid);
  84 out_unlock:
  85         spin_unlock_irqrestore(&ctx->sste_lock, flags);
  86 }
  87
  88 static int cxl_fault_segment(struct cxl_context *ctx, struct mm_struct *mm,
  89                              u64 ea)
  90 {
  91         struct copro_slb slb = {0,0};
  92         int rc;
  93
  94         if (!(rc = copro_calculate_slb(mm, ea, &slb))) {
  95                 cxl_load_segment(ctx, &slb);
  96         }
  97
  98         return rc;
  99 }
 100
 101 static void cxl_ack_ae(struct cxl_context *ctx)
 102 {
 103         unsigned long flags;
 104
 105         cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_AE, 0);
 106
 107         spin_lock_irqsave(&ctx->lock, flags);
 108         ctx->pending_fault = true;
 109         ctx->fault_addr = ctx->dar;
 110         ctx->fault_dsisr = ctx->dsisr;
 111         spin_unlock_irqrestore(&ctx->lock, flags);
 112
 113         wake_up_all(&ctx->wq);
 114 }
 115
 116 static int cxl_handle_segment_miss(struct cxl_context *ctx,
 117                                    struct mm_struct *mm, u64 ea)
 118 {
 119         int rc;
 120
 121         pr_devel("CXL interrupt: Segment fault pe: %i ea: %#llx\n", ctx->pe, ea);
 122         trace_cxl_ste_miss(ctx, ea);
 123
 124         if ((rc = cxl_fault_segment(ctx, mm, ea)))
 125                 cxl_ack_ae(ctx);
 126         else {
 127
 128                 mb(); /* Order seg table write to TFC MMIO write */
 129                 cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
 130         }
 131
 132         return IRQ_HANDLED;
 133 }
 134
 135 int cxl_handle_mm_fault(struct mm_struct *mm, u64 dsisr, u64 dar)
 136 {
 137         vm_fault_t flt = 0;
 138         int result;
 139         unsigned long access, flags, inv_flags = 0;
 140
 141         /*
 142          * Add the fault handling cpu to task mm cpumask so that we
 143          * can do a safe lockless page table walk when inserting the
 144          * hash page table entry. This function get called with a
 145          * valid mm for user space addresses. Hence using the if (mm)
 146          * check is sufficient here.
 147          */
 148         if (mm && !cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) {
 149                 cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
 150                 /*
 151                  * We need to make sure we walk the table only after
 152                  * we update the cpumask. The other side of the barrier
 153                  * is explained in serialize_against_pte_lookup()
 154                  */
 155                 smp_mb();
 156         }
 157         if ((result = copro_handle_mm_fault(mm, dar, dsisr, &flt))) {
 158                 pr_devel("copro_handle_mm_fault failed: %#x\n", result);
 159                 return result;
 160         }
 161
 162         if (!radix_enabled()) {
 163                 /*
 164                  * update_mmu_cache() will not have loaded the hash since current->trap
 165                  * is not a 0x400 or 0x300, so just call hash_page_mm() here.
 166                  */
 167                 access = _PAGE_PRESENT | _PAGE_READ;
 168                 if (dsisr & CXL_PSL_DSISR_An_S)
 169                         access |= _PAGE_WRITE;
 170
 171                 if (!mm && (get_region_id(dar) != USER_REGION_ID))
 172                         access |= _PAGE_PRIVILEGED;
 173
 174                 if (dsisr & DSISR_NOHPTE)
 175                         inv_flags |= HPTE_NOHPTE_UPDATE;
 176
 177                 local_irq_save(flags);
 178                 hash_page_mm(mm, dar, access, 0x300, inv_flags);
 179                 local_irq_restore(flags);
 180         }
 181         return 0;
 182 }
 183
 184 static void cxl_handle_page_fault(struct cxl_context *ctx,
 185                                   struct mm_struct *mm,
 186                                   u64 dsisr, u64 dar)
 187 {
 188         trace_cxl_pte_miss(ctx, dsisr, dar);
 189
 190         if (cxl_handle_mm_fault(mm, dsisr, dar)) {
 191                 cxl_ack_ae(ctx);
 192         } else {
 193                 pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe);
 194                 cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
 195         }
 196 }
 197
 198 /*
 199  * Returns the mm_struct corresponding to the context ctx.
 200  * mm_users == 0, the context may be in the process of being closed.
 201  */
 202 static struct mm_struct *get_mem_context(struct cxl_context *ctx)
 203 {
 204         if (ctx->mm == NULL)
 205                 return NULL;
 206
 207         if (!atomic_inc_not_zero(&ctx->mm->mm_users))
 208                 return NULL;
 209
 210         return ctx->mm;
 211 }
 212
 213 static bool cxl_is_segment_miss(struct cxl_context *ctx, u64 dsisr)
 214 {
 215         if ((cxl_is_power8() && (dsisr & CXL_PSL_DSISR_An_DS)))
 216                 return true;
 217
 218         return false;
 219 }
 220
 221 static bool cxl_is_page_fault(struct cxl_context *ctx, u64 dsisr)
 222 {
 223         if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_An_DM))
 224                 return true;
 225
 226         if (cxl_is_power9())
 227                 return true;
 228
 229         return false;
 230 }
 231
 232 void cxl_handle_fault(struct work_struct *fault_work)
 233 {
 234         struct cxl_context *ctx =
 235                 container_of(fault_work, struct cxl_context, fault_work);
 236         u64 dsisr = ctx->dsisr;
 237         u64 dar = ctx->dar;
 238         struct mm_struct *mm = NULL;
 239
 240         if (cpu_has_feature(CPU_FTR_HVMODE)) {
 241                 if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr ||
 242                     cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar ||
 243                     cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) != ctx->pe) {
 244                         /* Most likely explanation is harmless - a dedicated
 245                          * process has detached and these were cleared by the
 246                          * PSL purge, but warn about it just in case
 247                          */
 248                         dev_notice(&ctx->afu->dev, "cxl_handle_fault: Translation fault regs changed\n");
 249                         return;
 250                 }
 251         }
 252
 253         /* Early return if the context is being / has been detached */
 254         if (ctx->status == CLOSED) {
 255                 cxl_ack_ae(ctx);
 256                 return;
 257         }
 258
 259         pr_devel("CXL BOTTOM HALF handling fault for afu pe: %i. "
 260                 "DSISR: %#llx DAR: %#llx\n", ctx->pe, dsisr, dar);
 261
 262         if (!ctx->kernel) {
 263
 264                 mm = get_mem_context(ctx);
 265                 if (mm == NULL) {
 266                         pr_devel("%s: unable to get mm for pe=%d pid=%i\n",
 267                                  __func__, ctx->pe, pid_nr(ctx->pid));
 268                         cxl_ack_ae(ctx);
 269                         return;
 270                 } else {
 271                         pr_devel("Handling page fault for pe=%d pid=%i\n",
 272                                  ctx->pe, pid_nr(ctx->pid));
 273                 }
 274         }
 275
 276         if (cxl_is_segment_miss(ctx, dsisr))
 277                 cxl_handle_segment_miss(ctx, mm, dar);
 278         else if (cxl_is_page_fault(ctx, dsisr))
 279                 cxl_handle_page_fault(ctx, mm, dsisr, dar);
 280         else
 281                 WARN(1, "cxl_handle_fault has nothing to handle\n");
 282
 283         if (mm)
 284                 mmput(mm);
 285 }
 286
 287 static void cxl_prefault_one(struct cxl_context *ctx, u64 ea)
 288 {
 289         struct mm_struct *mm;
 290
 291         mm = get_mem_context(ctx);
 292         if (mm == NULL) {
 293                 pr_devel("cxl_prefault_one unable to get mm %i\n",
 294                          pid_nr(ctx->pid));
 295                 return;
 296         }
 297
 298         cxl_fault_segment(ctx, mm, ea);
 299
 300         mmput(mm);
 301 }
 302
 303 static u64 next_segment(u64 ea, u64 vsid)
 304 {
 305         if (vsid & SLB_VSID_B_1T)
 306                 ea |= (1ULL << 40) - 1;
 307         else
 308                 ea |= (1ULL << 28) - 1;
 309
 310         return ea + 1;
 311 }
 312
 313 static void cxl_prefault_vma(struct cxl_context *ctx)
 314 {
 315         u64 ea, last_esid = 0;
 316         struct copro_slb slb;
 317         struct vm_area_struct *vma;
 318         int rc;
 319         struct mm_struct *mm;
 320
 321         mm = get_mem_context(ctx);
 322         if (mm == NULL) {
 323                 pr_devel("cxl_prefault_vm unable to get mm %i\n",
 324                          pid_nr(ctx->pid));
 325                 return;
 326         }
 327
 328         down_read(&mm->mmap_sem);
 329         for (vma = mm->mmap; vma; vma = vma->vm_next) {
 330                 for (ea = vma->vm_start; ea < vma->vm_end;
 331                                 ea = next_segment(ea, slb.vsid)) {
 332                         rc = copro_calculate_slb(mm, ea, &slb);
 333                         if (rc)
 334                                 continue;
 335
 336                         if (last_esid == slb.esid)
 337                                 continue;
 338
 339                         cxl_load_segment(ctx, &slb);
 340                         last_esid = slb.esid;
 341                 }
 342         }
 343         up_read(&mm->mmap_sem);
 344
 345         mmput(mm);
 346 }
 347
 348 void cxl_prefault(struct cxl_context *ctx, u64 wed)
 349 {
 350         switch (ctx->afu->prefault_mode) {
 351         case CXL_PREFAULT_WED:
 352                 cxl_prefault_one(ctx, wed);
 353                 break;
 354         case CXL_PREFAULT_ALL:
 355                 cxl_prefault_vma(ctx);
 356                 break;
 357         default:
 358                 break;
 359         }
 360 }