drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c

   1 /*
   2  * Copyright 2017 Red Hat Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  */
  22 #define NVKM_VMM_LEVELS_MAX 5
  23 #include "vmm.h"
  24
  25 static void
  26 nvkm_vmm_pt_del(struct nvkm_vmm_pt **ppgt)
  27 {
  28         struct nvkm_vmm_pt *pgt = *ppgt;
  29         if (pgt) {
  30                 kvfree(pgt->pde);
  31                 kfree(pgt);
  32                 *ppgt = NULL;
  33         }
  34 }
  35
  36
  37 static struct nvkm_vmm_pt *
  38 nvkm_vmm_pt_new(const struct nvkm_vmm_desc *desc, bool sparse,
  39                 const struct nvkm_vmm_page *page)
  40 {
  41         const u32 pten = 1 << desc->bits;
  42         struct nvkm_vmm_pt *pgt;
  43         u32 lpte = 0;
  44
  45         if (desc->type > PGT) {
  46                 if (desc->type == SPT) {
  47                         const struct nvkm_vmm_desc *pair = page[-1].desc;
  48                         lpte = pten >> (desc->bits - pair->bits);
  49                 } else {
  50                         lpte = pten;
  51                 }
  52         }
  53
  54         if (!(pgt = kzalloc(sizeof(*pgt) + lpte, GFP_KERNEL)))
  55                 return NULL;
  56         pgt->page = page ? page->shift : 0;
  57         pgt->sparse = sparse;
  58
  59         if (desc->type == PGD) {
  60                 pgt->pde = kvzalloc(sizeof(*pgt->pde) * pten, GFP_KERNEL);
  61                 if (!pgt->pde) {
  62                         kfree(pgt);
  63                         return NULL;
  64                 }
  65         }
  66
  67         return pgt;
  68 }
  69
  70 struct nvkm_vmm_iter {
  71         const struct nvkm_vmm_page *page;
  72         const struct nvkm_vmm_desc *desc;
  73         struct nvkm_vmm *vmm;
  74         u64 cnt;
  75         u16 max, lvl;
  76         u32 pte[NVKM_VMM_LEVELS_MAX];
  77         struct nvkm_vmm_pt *pt[NVKM_VMM_LEVELS_MAX];
  78         int flush;
  79 };
  80
  81 #ifdef CONFIG_NOUVEAU_DEBUG_MMU
  82 static const char *
  83 nvkm_vmm_desc_type(const struct nvkm_vmm_desc *desc)
  84 {
  85         switch (desc->type) {
  86         case PGD: return "PGD";
  87         case PGT: return "PGT";
  88         case SPT: return "SPT";
  89         case LPT: return "LPT";
  90         default:
  91                 return "UNKNOWN";
  92         }
  93 }
  94
  95 static void
  96 nvkm_vmm_trace(struct nvkm_vmm_iter *it, char *buf)
  97 {
  98         int lvl;
  99         for (lvl = it->max; lvl >= 0; lvl--) {
 100                 if (lvl >= it->lvl)
 101                         buf += sprintf(buf,  "%05x:", it->pte[lvl]);
 102                 else
 103                         buf += sprintf(buf, "xxxxx:");
 104         }
 105 }
 106
 107 #define TRA(i,f,a...) do {                                                     \
 108         char _buf[NVKM_VMM_LEVELS_MAX * 7];                                    \
 109         struct nvkm_vmm_iter *_it = (i);                                       \
 110         nvkm_vmm_trace(_it, _buf);                                             \
 111         VMM_TRACE(_it->vmm, "%s "f, _buf, ##a);                                \
 112 } while(0)
 113 #else
 114 #define TRA(i,f,a...)
 115 #endif
 116
 117 static inline void
 118 nvkm_vmm_flush_mark(struct nvkm_vmm_iter *it)
 119 {
 120         it->flush = min(it->flush, it->max - it->lvl);
 121 }
 122
 123 static inline void
 124 nvkm_vmm_flush(struct nvkm_vmm_iter *it)
 125 {
 126         if (it->flush != NVKM_VMM_LEVELS_MAX) {
 127                 if (it->vmm->func->flush) {
 128                         TRA(it, "flush: %d", it->flush);
 129                         it->vmm->func->flush(it->vmm, it->flush);
 130                 }
 131                 it->flush = NVKM_VMM_LEVELS_MAX;
 132         }
 133 }
 134
 135 static void
 136 nvkm_vmm_unref_pdes(struct nvkm_vmm_iter *it)
 137 {
 138         const struct nvkm_vmm_desc *desc = it->desc;
 139         const int type = desc[it->lvl].type == SPT;
 140         struct nvkm_vmm_pt *pgd = it->pt[it->lvl + 1];
 141         struct nvkm_vmm_pt *pgt = it->pt[it->lvl];
 142         struct nvkm_mmu_pt *pt = pgt->pt[type];
 143         struct nvkm_vmm *vmm = it->vmm;
 144         u32 pdei = it->pte[it->lvl + 1];
 145
 146         /* Recurse up the tree, unreferencing/destroying unneeded PDs. */
 147         it->lvl++;
 148         if (--pgd->refs[0]) {
 149                 const struct nvkm_vmm_desc_func *func = desc[it->lvl].func;
 150                 /* PD has other valid PDEs, so we need a proper update. */
 151                 TRA(it, "PDE unmap %s", nvkm_vmm_desc_type(&desc[it->lvl - 1]));
 152                 pgt->pt[type] = NULL;
 153                 if (!pgt->refs[!type]) {
 154                         /* PDE no longer required. */
 155                         if (pgd->pt[0]) {
 156                                 if (pgt->sparse) {
 157                                         func->sparse(vmm, pgd->pt[0], pdei, 1);
 158                                         pgd->pde[pdei] = NVKM_VMM_PDE_SPARSE;
 159                                 } else {
 160                                         func->unmap(vmm, pgd->pt[0], pdei, 1);
 161                                         pgd->pde[pdei] = NULL;
 162                                 }
 163                         } else {
 164                                 /* Special handling for Tesla-class GPUs,
 165                                  * where there's no central PD, but each
 166                                  * instance has its own embedded PD.
 167                                  */
 168                                 func->pde(vmm, pgd, pdei);
 169                                 pgd->pde[pdei] = NULL;
 170                         }
 171                 } else {
 172                         /* PDE was pointing at dual-PTs and we're removing
 173                          * one of them, leaving the other in place.
 174                          */
 175                         func->pde(vmm, pgd, pdei);
 176                 }
 177
 178                 /* GPU may have cached the PTs, flush before freeing. */
 179                 nvkm_vmm_flush_mark(it);
 180                 nvkm_vmm_flush(it);
 181         } else {
 182                 /* PD has no valid PDEs left, so we can just destroy it. */
 183                 nvkm_vmm_unref_pdes(it);
 184         }
 185
 186         /* Destroy PD/PT. */
 187         TRA(it, "PDE free %s", nvkm_vmm_desc_type(&desc[it->lvl - 1]));
 188         nvkm_mmu_ptc_put(vmm->mmu, vmm->bootstrapped, &pt);
 189         if (!pgt->refs[!type])
 190                 nvkm_vmm_pt_del(&pgt);
 191         it->lvl--;
 192 }
 193
 194 static void
 195 nvkm_vmm_unref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
 196                      const struct nvkm_vmm_desc *desc, u32 ptei, u32 ptes)
 197 {
 198         const struct nvkm_vmm_desc *pair = it->page[-1].desc;
 199         const u32 sptb = desc->bits - pair->bits;
 200         const u32 sptn = 1 << sptb;
 201         struct nvkm_vmm *vmm = it->vmm;
 202         u32 spti = ptei & (sptn - 1), lpti, pteb;
 203
 204         /* Determine how many SPTEs are being touched under each LPTE,
 205          * and drop reference counts.
 206          */
 207         for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) {
 208                 const u32 pten = min(sptn - spti, ptes);
 209                 pgt->pte[lpti] -= pten;
 210                 ptes -= pten;
 211         }
 212
 213         /* We're done here if there's no corresponding LPT. */
 214         if (!pgt->refs[0])
 215                 return;
 216
 217         for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) {
 218                 /* Skip over any LPTEs that still have valid SPTEs. */
 219                 if (pgt->pte[pteb] & NVKM_VMM_PTE_SPTES) {
 220                         for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
 221                                 if (!(pgt->pte[ptei] & NVKM_VMM_PTE_SPTES))
 222                                         break;
 223                         }
 224                         continue;
 225                 }
 226
 227                 /* As there's no more non-UNMAPPED SPTEs left in the range
 228                  * covered by a number of LPTEs, the LPTEs once again take
 229                  * control over their address range.
 230                  *
 231                  * Determine how many LPTEs need to transition state.
 232                  */
 233                 pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID;
 234                 for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
 235                         if (pgt->pte[ptei] & NVKM_VMM_PTE_SPTES)
 236                                 break;
 237                         pgt->pte[ptei] &= ~NVKM_VMM_PTE_VALID;
 238                 }
 239
 240                 if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) {
 241                         TRA(it, "LPTE %05x: U -> S %d PTEs", pteb, ptes);
 242                         pair->func->sparse(vmm, pgt->pt[0], pteb, ptes);
 243                 } else
 244                 if (pair->func->invalid) {
 245                         /* If the MMU supports it, restore the LPTE to the
 246                          * INVALID state to tell the MMU there is no point
 247                          * trying to fetch the corresponding SPTEs.
 248                          */
 249                         TRA(it, "LPTE %05x: U -> I %d PTEs", pteb, ptes);
 250                         pair->func->invalid(vmm, pgt->pt[0], pteb, ptes);
 251                 }
 252         }
 253 }
 254
 255 static bool
 256 nvkm_vmm_unref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes)
 257 {
 258         const struct nvkm_vmm_desc *desc = it->desc;
 259         const int type = desc->type == SPT;
 260         struct nvkm_vmm_pt *pgt = it->pt[0];
 261
 262         /* Drop PTE references. */
 263         pgt->refs[type] -= ptes;
 264
 265         /* Dual-PTs need special handling, unless PDE becoming invalid. */
 266         if (desc->type == SPT && (pgt->refs[0] || pgt->refs[1]))
 267                 nvkm_vmm_unref_sptes(it, pgt, desc, ptei, ptes);
 268
 269         /* PT no longer neeed?  Destroy it. */
 270         if (!pgt->refs[type]) {
 271                 it->lvl++;
 272                 TRA(it, "%s empty", nvkm_vmm_desc_type(desc));
 273                 it->lvl--;
 274                 nvkm_vmm_unref_pdes(it);
 275                 return false; /* PTE writes for unmap() not necessary. */
 276         }
 277
 278         return true;
 279 }
 280
 281 static void
 282 nvkm_vmm_ref_sptes(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgt,
 283                    const struct nvkm_vmm_desc *desc, u32 ptei, u32 ptes)
 284 {
 285         const struct nvkm_vmm_desc *pair = it->page[-1].desc;
 286         const u32 sptb = desc->bits - pair->bits;
 287         const u32 sptn = 1 << sptb;
 288         struct nvkm_vmm *vmm = it->vmm;
 289         u32 spti = ptei & (sptn - 1), lpti, pteb;
 290
 291         /* Determine how many SPTEs are being touched under each LPTE,
 292          * and increase reference counts.
 293          */
 294         for (lpti = ptei >> sptb; ptes; spti = 0, lpti++) {
 295                 const u32 pten = min(sptn - spti, ptes);
 296                 pgt->pte[lpti] += pten;
 297                 ptes -= pten;
 298         }
 299
 300         /* We're done here if there's no corresponding LPT. */
 301         if (!pgt->refs[0])
 302                 return;
 303
 304         for (ptei = pteb = ptei >> sptb; ptei < lpti; pteb = ptei) {
 305                 /* Skip over any LPTEs that already have valid SPTEs. */
 306                 if (pgt->pte[pteb] & NVKM_VMM_PTE_VALID) {
 307                         for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
 308                                 if (!(pgt->pte[ptei] & NVKM_VMM_PTE_VALID))
 309                                         break;
 310                         }
 311                         continue;
 312                 }
 313
 314                 /* As there are now non-UNMAPPED SPTEs in the range covered
 315                  * by a number of LPTEs, we need to transfer control of the
 316                  * address range to the SPTEs.
 317                  *
 318                  * Determine how many LPTEs need to transition state.
 319                  */
 320                 pgt->pte[ptei] |= NVKM_VMM_PTE_VALID;
 321                 for (ptes = 1, ptei++; ptei < lpti; ptes++, ptei++) {
 322                         if (pgt->pte[ptei] & NVKM_VMM_PTE_VALID)
 323                                 break;
 324                         pgt->pte[ptei] |= NVKM_VMM_PTE_VALID;
 325                 }
 326
 327                 if (pgt->pte[pteb] & NVKM_VMM_PTE_SPARSE) {
 328                         const u32 spti = pteb * sptn;
 329                         const u32 sptc = ptes * sptn;
 330                         /* The entire LPTE is marked as sparse, we need
 331                          * to make sure that the SPTEs are too.
 332                          */
 333                         TRA(it, "SPTE %05x: U -> S %d PTEs", spti, sptc);
 334                         desc->func->sparse(vmm, pgt->pt[1], spti, sptc);
 335                         /* Sparse LPTEs prevent SPTEs from being accessed. */
 336                         TRA(it, "LPTE %05x: S -> U %d PTEs", pteb, ptes);
 337                         pair->func->unmap(vmm, pgt->pt[0], pteb, ptes);
 338                 } else
 339                 if (pair->func->invalid) {
 340                         /* MMU supports blocking SPTEs by marking an LPTE
 341                          * as INVALID.  We need to reverse that here.
 342                          */
 343                         TRA(it, "LPTE %05x: I -> U %d PTEs", pteb, ptes);
 344                         pair->func->unmap(vmm, pgt->pt[0], pteb, ptes);
 345                 }
 346         }
 347 }
 348
 349 static bool
 350 nvkm_vmm_ref_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes)
 351 {
 352         const struct nvkm_vmm_desc *desc = it->desc;
 353         const int type = desc->type == SPT;
 354         struct nvkm_vmm_pt *pgt = it->pt[0];
 355
 356         /* Take PTE references. */
 357         pgt->refs[type] += ptes;
 358
 359         /* Dual-PTs need special handling. */
 360         if (desc->type == SPT)
 361                 nvkm_vmm_ref_sptes(it, pgt, desc, ptei, ptes);
 362
 363         return true;
 364 }
 365
 366 static void
 367 nvkm_vmm_sparse_ptes(const struct nvkm_vmm_desc *desc,
 368                      struct nvkm_vmm_pt *pgt, u32 ptei, u32 ptes)
 369 {
 370         if (desc->type == PGD) {
 371                 while (ptes--)
 372                         pgt->pde[ptei++] = NVKM_VMM_PDE_SPARSE;
 373         } else
 374         if (desc->type == LPT) {
 375                 memset(&pgt->pte[ptei], NVKM_VMM_PTE_SPARSE, ptes);
 376         }
 377 }
 378
 379 static bool
 380 nvkm_vmm_ref_hwpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei)
 381 {
 382         const struct nvkm_vmm_desc *desc = &it->desc[it->lvl - 1];
 383         const int type = desc->type == SPT;
 384         struct nvkm_vmm_pt *pgt = pgd->pde[pdei];
 385         const bool zero = !pgt->sparse && !desc->func->invalid;
 386         struct nvkm_vmm *vmm = it->vmm;
 387         struct nvkm_mmu *mmu = vmm->mmu;
 388         struct nvkm_mmu_pt *pt;
 389         u32 pten = 1 << desc->bits;
 390         u32 pteb, ptei, ptes;
 391         u32 size = desc->size * pten;
 392
 393         pgd->refs[0]++;
 394
 395         pgt->pt[type] = nvkm_mmu_ptc_get(mmu, size, desc->align, zero);
 396         if (!pgt->pt[type]) {
 397                 it->lvl--;
 398                 nvkm_vmm_unref_pdes(it);
 399                 return false;
 400         }
 401
 402         if (zero)
 403                 goto done;
 404
 405         pt = pgt->pt[type];
 406
 407         if (desc->type == LPT && pgt->refs[1]) {
 408                 /* SPT already exists covering the same range as this LPT,
 409                  * which means we need to be careful that any LPTEs which
 410                  * overlap valid SPTEs are unmapped as opposed to invalid
 411                  * or sparse, which would prevent the MMU from looking at
 412                  * the SPTEs on some GPUs.
 413                  */
 414                 for (ptei = pteb = 0; ptei < pten; pteb = ptei) {
 415                         bool spte = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES;
 416                         for (ptes = 1, ptei++; ptei < pten; ptes++, ptei++) {
 417                                 bool next = pgt->pte[ptei] & NVKM_VMM_PTE_SPTES;
 418                                 if (spte != next)
 419                                         break;
 420                         }
 421
 422                         if (!spte) {
 423                                 if (pgt->sparse)
 424                                         desc->func->sparse(vmm, pt, pteb, ptes);
 425                                 else
 426                                         desc->func->invalid(vmm, pt, pteb, ptes);
 427                                 memset(&pgt->pte[pteb], 0x00, ptes);
 428                         } else {
 429                                 desc->func->unmap(vmm, pt, pteb, ptes);
 430                                 while (ptes--)
 431                                         pgt->pte[pteb++] |= NVKM_VMM_PTE_VALID;
 432                         }
 433                 }
 434         } else {
 435                 if (pgt->sparse) {
 436                         nvkm_vmm_sparse_ptes(desc, pgt, 0, pten);
 437                         desc->func->sparse(vmm, pt, 0, pten);
 438                 } else {
 439                         desc->func->invalid(vmm, pt, 0, pten);
 440                 }
 441         }
 442
 443 done:
 444         TRA(it, "PDE write %s", nvkm_vmm_desc_type(desc));
 445         it->desc[it->lvl].func->pde(it->vmm, pgd, pdei);
 446         nvkm_vmm_flush_mark(it);
 447         return true;
 448 }
 449
 450 static bool
 451 nvkm_vmm_ref_swpt(struct nvkm_vmm_iter *it, struct nvkm_vmm_pt *pgd, u32 pdei)
 452 {
 453         const struct nvkm_vmm_desc *desc = &it->desc[it->lvl - 1];
 454         struct nvkm_vmm_pt *pgt = pgd->pde[pdei];
 455
 456         pgt = nvkm_vmm_pt_new(desc, NVKM_VMM_PDE_SPARSED(pgt), it->page);
 457         if (!pgt) {
 458                 if (!pgd->refs[0])
 459                         nvkm_vmm_unref_pdes(it);
 460                 return false;
 461         }
 462
 463         pgd->pde[pdei] = pgt;
 464         return true;
 465 }
 466
 467 static inline u64
 468 nvkm_vmm_iter(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 469               u64 addr, u64 size, const char *name, bool ref,
 470               bool (*REF_PTES)(struct nvkm_vmm_iter *, u32, u32),
 471               nvkm_vmm_pte_func MAP_PTES, struct nvkm_vmm_map *map,
 472               nvkm_vmm_pxe_func CLR_PTES)
 473 {
 474         const struct nvkm_vmm_desc *desc = page->desc;
 475         struct nvkm_vmm_iter it;
 476         u64 bits = addr >> page->shift;
 477
 478         it.page = page;
 479         it.desc = desc;
 480         it.vmm = vmm;
 481         it.cnt = size >> page->shift;
 482         it.flush = NVKM_VMM_LEVELS_MAX;
 483
 484         /* Deconstruct address into PTE indices for each mapping level. */
 485         for (it.lvl = 0; desc[it.lvl].bits; it.lvl++) {
 486                 it.pte[it.lvl] = bits & ((1 << desc[it.lvl].bits) - 1);
 487                 bits >>= desc[it.lvl].bits;
 488         }
 489         it.max = --it.lvl;
 490         it.pt[it.max] = vmm->pd;
 491
 492         it.lvl = 0;
 493         TRA(&it, "%s: %016llx %016llx %d %lld PTEs", name,
 494                  addr, size, page->shift, it.cnt);
 495         it.lvl = it.max;
 496
 497         /* Depth-first traversal of page tables. */
 498         while (it.cnt) {
 499                 struct nvkm_vmm_pt *pgt = it.pt[it.lvl];
 500                 const int type = desc->type == SPT;
 501                 const u32 pten = 1 << desc->bits;
 502                 const u32 ptei = it.pte[0];
 503                 const u32 ptes = min_t(u64, it.cnt, pten - ptei);
 504
 505                 /* Walk down the tree, finding page tables for each level. */
 506                 for (; it.lvl; it.lvl--) {
 507                         const u32 pdei = it.pte[it.lvl];
 508                         struct nvkm_vmm_pt *pgd = pgt;
 509
 510                         /* Software PT. */
 511                         if (ref && NVKM_VMM_PDE_INVALID(pgd->pde[pdei])) {
 512                                 if (!nvkm_vmm_ref_swpt(&it, pgd, pdei))
 513                                         goto fail;
 514                         }
 515                         it.pt[it.lvl - 1] = pgt = pgd->pde[pdei];
 516
 517                         /* Hardware PT.
 518                          *
 519                          * This is a separate step from above due to GF100 and
 520                          * newer having dual page tables at some levels, which
 521                          * are refcounted independently.
 522                          */
 523                         if (ref && !pgt->refs[desc[it.lvl - 1].type == SPT]) {
 524                                 if (!nvkm_vmm_ref_hwpt(&it, pgd, pdei))
 525                                         goto fail;
 526                         }
 527                 }
 528
 529                 /* Handle PTE updates. */
 530                 if (!REF_PTES || REF_PTES(&it, ptei, ptes)) {
 531                         struct nvkm_mmu_pt *pt = pgt->pt[type];
 532                         if (MAP_PTES || CLR_PTES) {
 533                                 if (MAP_PTES)
 534                                         MAP_PTES(vmm, pt, ptei, ptes, map);
 535                                 else
 536                                         CLR_PTES(vmm, pt, ptei, ptes);
 537                                 nvkm_vmm_flush_mark(&it);
 538                         }
 539                 }
 540
 541                 /* Walk back up the tree to the next position. */
 542                 it.pte[it.lvl] += ptes;
 543                 it.cnt -= ptes;
 544                 if (it.cnt) {
 545                         while (it.pte[it.lvl] == (1 << desc[it.lvl].bits)) {
 546                                 it.pte[it.lvl++] = 0;
 547                                 it.pte[it.lvl]++;
 548                         }
 549                 }
 550         };
 551
 552         nvkm_vmm_flush(&it);
 553         return ~0ULL;
 554
 555 fail:
 556         /* Reconstruct the failure address so the caller is able to
 557          * reverse any partially completed operations.
 558          */
 559         addr = it.pte[it.max--];
 560         do {
 561                 addr  = addr << desc[it.max].bits;
 562                 addr |= it.pte[it.max];
 563         } while (it.max--);
 564
 565         return addr << page->shift;
 566 }
 567
 568 void
 569 nvkm_vmm_ptes_unmap(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 570                     u64 addr, u64 size, bool sparse)
 571 {
 572         const struct nvkm_vmm_desc_func *func = page->desc->func;
 573         nvkm_vmm_iter(vmm, page, addr, size, "unmap", false, NULL, NULL, NULL,
 574                       sparse ? func->sparse : func->invalid ? func->invalid :
 575                                                               func->unmap);
 576 }
 577
 578 void
 579 nvkm_vmm_ptes_map(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 580                   u64 addr, u64 size, struct nvkm_vmm_map *map,
 581                   nvkm_vmm_pte_func func)
 582 {
 583         nvkm_vmm_iter(vmm, page, addr, size, "map", false,
 584                       NULL, func, map, NULL);
 585 }
 586
 587 void
 588 nvkm_vmm_ptes_put(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 589                   u64 addr, u64 size)
 590 {
 591         nvkm_vmm_iter(vmm, page, addr, size, "unref", false,
 592                       nvkm_vmm_unref_ptes, NULL, NULL, NULL);
 593 }
 594
 595 int
 596 nvkm_vmm_ptes_get(struct nvkm_vmm *vmm, const struct nvkm_vmm_page *page,
 597                   u64 addr, u64 size)
 598 {
 599         u64 fail = nvkm_vmm_iter(vmm, page, addr, size, "ref", true,
 600                                  nvkm_vmm_ref_ptes, NULL, NULL, NULL);
 601         if (fail != ~0ULL) {
 602                 if (fail != addr)
 603                         nvkm_vmm_ptes_put(vmm, page, addr, fail - addr);
 604                 return -ENOMEM;
 605         }
 606         return 0;
 607 }
 608
 609 void
 610 nvkm_vmm_dtor(struct nvkm_vmm *vmm)
 611 {
 612         if (vmm->bootstrapped) {
 613                 const struct nvkm_vmm_page *page = vmm->func->page;
 614                 const u64 limit = vmm->limit - vmm->start;
 615
 616                 while (page[1].shift)
 617                         page++;
 618
 619                 nvkm_mmu_ptc_dump(vmm->mmu);
 620                 nvkm_vmm_ptes_put(vmm, page, vmm->start, limit);
 621         }
 622
 623         if (vmm->nullp) {
 624                 dma_free_coherent(vmm->mmu->subdev.device->dev, 16 * 1024,
 625                                   vmm->nullp, vmm->null);
 626         }
 627
 628         if (vmm->pd) {
 629                 nvkm_mmu_ptc_put(vmm->mmu, true, &vmm->pd->pt[0]);
 630                 nvkm_vmm_pt_del(&vmm->pd);
 631         }
 632 }
 633
 634 int
 635 nvkm_vmm_ctor(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu,
 636               u32 pd_header, u64 addr, u64 size, struct lock_class_key *key,
 637               const char *name, struct nvkm_vmm *vmm)
 638 {
 639         static struct lock_class_key _key;
 640         const struct nvkm_vmm_page *page = func->page;
 641         const struct nvkm_vmm_desc *desc;
 642         int levels, bits = 0;
 643
 644         vmm->func = func;
 645         vmm->mmu = mmu;
 646         vmm->name = name;
 647         vmm->debug = mmu->subdev.debug;
 648         kref_init(&vmm->kref);
 649
 650         __mutex_init(&vmm->mutex, "&vmm->mutex", key ? key : &_key);
 651
 652         /* Locate the smallest page size supported by the backend, it will
 653          * have the the deepest nesting of page tables.
 654          */
 655         while (page[1].shift)
 656                 page++;
 657
 658         /* Locate the structure that describes the layout of the top-level
 659          * page table, and determine the number of valid bits in a virtual
 660          * address.
 661          */
 662         for (levels = 0, desc = page->desc; desc->bits; desc++, levels++)
 663                 bits += desc->bits;
 664         bits += page->shift;
 665         desc--;
 666
 667         if (WARN_ON(levels > NVKM_VMM_LEVELS_MAX))
 668                 return -EINVAL;
 669
 670         vmm->start = addr;
 671         vmm->limit = size ? (addr + size) : (1ULL << bits);
 672         if (vmm->start > vmm->limit || vmm->limit > (1ULL << bits))
 673                 return -EINVAL;
 674
 675         /* Allocate top-level page table. */
 676         vmm->pd = nvkm_vmm_pt_new(desc, false, NULL);
 677         if (!vmm->pd)
 678                 return -ENOMEM;
 679         vmm->pd->refs[0] = 1;
 680         INIT_LIST_HEAD(&vmm->join);
 681
 682         /* ... and the GPU storage for it, except on Tesla-class GPUs that
 683          * have the PD embedded in the instance structure.
 684          */
 685         if (desc->size) {
 686                 const u32 size = pd_header + desc->size * (1 << desc->bits);
 687                 vmm->pd->pt[0] = nvkm_mmu_ptc_get(mmu, size, desc->align, true);
 688                 if (!vmm->pd->pt[0])
 689                         return -ENOMEM;
 690         }
 691
 692         return 0;
 693 }
 694
 695 int
 696 nvkm_vmm_new_(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu,
 697               u32 hdr, u64 addr, u64 size, struct lock_class_key *key,
 698               const char *name, struct nvkm_vmm **pvmm)
 699 {
 700         if (!(*pvmm = kzalloc(sizeof(**pvmm), GFP_KERNEL)))
 701                 return -ENOMEM;
 702         return nvkm_vmm_ctor(func, mmu, hdr, addr, size, key, name, *pvmm);
 703 }
 704
 705 static bool
 706 nvkm_vmm_boot_ptes(struct nvkm_vmm_iter *it, u32 ptei, u32 ptes)
 707 {
 708         const struct nvkm_vmm_desc *desc = it->desc;
 709         const int type = desc->type == SPT;
 710         nvkm_memory_boot(it->pt[0]->pt[type]->memory, it->vmm);
 711         return false;
 712 }
 713
 714 int
 715 nvkm_vmm_boot(struct nvkm_vmm *vmm)
 716 {
 717         const struct nvkm_vmm_page *page = vmm->func->page;
 718         const u64 limit = vmm->limit - vmm->start;
 719         int ret;
 720
 721         while (page[1].shift)
 722                 page++;
 723
 724         ret = nvkm_vmm_ptes_get(vmm, page, vmm->start, limit);
 725         if (ret)
 726                 return ret;
 727
 728         nvkm_vmm_iter(vmm, page, vmm->start, limit, "bootstrap", false,
 729                       nvkm_vmm_boot_ptes, NULL, NULL, NULL);
 730         vmm->bootstrapped = true;
 731         return 0;
 732 }