drivers/misc/habanalabs/common/mmu_v1.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 /*
   4  * Copyright 2016-2019 HabanaLabs, Ltd.
   5  * All Rights Reserved.
   6  */
   7
   8 #include "habanalabs.h"
   9 #include "../include/hw_ip/mmu/mmu_general.h"
  10
  11 #include <linux/slab.h>
  12
  13 static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
  14
  15 static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
  16 {
  17         struct pgt_info *pgt_info = NULL;
  18
  19         hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
  20                                 (unsigned long) hop_addr)
  21                 if (hop_addr == pgt_info->shadow_addr)
  22                         break;
  23
  24         return pgt_info;
  25 }
  26
  27 static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
  28 {
  29         struct hl_device *hdev = ctx->hdev;
  30
  31         gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr,
  32                         hdev->asic_prop.mmu_hop_table_size);
  33         hash_del(&pgt_info->node);
  34         kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
  35         kfree(pgt_info);
  36 }
  37
  38 static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
  39 {
  40         struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
  41
  42         _free_hop(ctx, pgt_info);
  43 }
  44
  45 static u64 alloc_hop(struct hl_ctx *ctx)
  46 {
  47         struct hl_device *hdev = ctx->hdev;
  48         struct asic_fixed_properties *prop = &hdev->asic_prop;
  49         struct pgt_info *pgt_info;
  50         u64 phys_addr, shadow_addr;
  51
  52         pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
  53         if (!pgt_info)
  54                 return ULLONG_MAX;
  55
  56         phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool,
  57                                         prop->mmu_hop_table_size);
  58         if (!phys_addr) {
  59                 dev_err(hdev->dev, "failed to allocate page\n");
  60                 goto pool_add_err;
  61         }
  62
  63         shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
  64                                                 GFP_KERNEL);
  65         if (!shadow_addr)
  66                 goto shadow_err;
  67
  68         pgt_info->phys_addr = phys_addr;
  69         pgt_info->shadow_addr = shadow_addr;
  70         pgt_info->ctx = ctx;
  71         pgt_info->num_of_ptes = 0;
  72         hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
  73
  74         return shadow_addr;
  75
  76 shadow_err:
  77         gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, phys_addr,
  78                         prop->mmu_hop_table_size);
  79 pool_add_err:
  80         kfree(pgt_info);
  81
  82         return ULLONG_MAX;
  83 }
  84
  85 static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
  86 {
  87         return ctx->hdev->asic_prop.mmu_pgt_addr +
  88                         (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
  89 }
  90
  91 static inline u64 get_hop0_addr(struct hl_ctx *ctx)
  92 {
  93         return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 +
  94                         (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
  95 }
  96
  97 static void flush(struct hl_ctx *ctx)
  98 {
  99         /* flush all writes from all cores to reach PCI */
 100         mb();
 101         ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
 102 }
 103
 104 /* transform the value to physical address when writing to H/W */
 105 static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
 106 {
 107         /*
 108          * The value to write is actually the address of the next shadow hop +
 109          * flags at the 12 LSBs.
 110          * Hence in order to get the value to write to the physical PTE, we
 111          * clear the 12 LSBs and translate the shadow hop to its associated
 112          * physical hop, and add back the original 12 LSBs.
 113          */
 114         u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) |
 115                                 (val & FLAGS_MASK);
 116
 117         ctx->hdev->asic_funcs->write_pte(ctx->hdev,
 118                                         get_phys_addr(ctx, shadow_pte_addr),
 119                                         phys_val);
 120
 121         *(u64 *) (uintptr_t) shadow_pte_addr = val;
 122 }
 123
 124 /* do not transform the value to physical address when writing to H/W */
 125 static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
 126                                         u64 val)
 127 {
 128         ctx->hdev->asic_funcs->write_pte(ctx->hdev,
 129                                         get_phys_addr(ctx, shadow_pte_addr),
 130                                         val);
 131         *(u64 *) (uintptr_t) shadow_pte_addr = val;
 132 }
 133
 134 /* clear the last and present bits */
 135 static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
 136 {
 137         /* no need to transform the value to physical address */
 138         write_final_pte(ctx, pte_addr, 0);
 139 }
 140
 141 static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
 142 {
 143         get_pgt_info(ctx, hop_addr)->num_of_ptes++;
 144 }
 145
 146 /*
 147  * put_pte - decrement the num of ptes and free the hop if possible
 148  *
 149  * @ctx: pointer to the context structure
 150  * @hop_addr: addr of the hop
 151  *
 152  * This function returns the number of ptes left on this hop. If the number is
 153  * 0, it means the pte was freed.
 154  */
 155 static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
 156 {
 157         struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
 158         int num_of_ptes_left;
 159
 160         pgt_info->num_of_ptes--;
 161
 162         /*
 163          * Need to save the number of ptes left because free_hop might free
 164          * the pgt_info
 165          */
 166         num_of_ptes_left = pgt_info->num_of_ptes;
 167         if (!num_of_ptes_left)
 168                 _free_hop(ctx, pgt_info);
 169
 170         return num_of_ptes_left;
 171 }
 172
 173 static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
 174                                         u64 virt_addr, u64 mask, u64 shift)
 175 {
 176         return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
 177                         ((virt_addr & mask) >> shift);
 178 }
 179
 180 static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
 181                                         struct hl_mmu_properties *mmu_prop,
 182                                         u64 hop_addr, u64 vaddr)
 183 {
 184         return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask,
 185                                         mmu_prop->hop0_shift);
 186 }
 187
 188 static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
 189                                         struct hl_mmu_properties *mmu_prop,
 190                                         u64 hop_addr, u64 vaddr)
 191 {
 192         return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask,
 193                                         mmu_prop->hop1_shift);
 194 }
 195
 196 static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
 197                                         struct hl_mmu_properties *mmu_prop,
 198                                         u64 hop_addr, u64 vaddr)
 199 {
 200         return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask,
 201                                         mmu_prop->hop2_shift);
 202 }
 203
 204 static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
 205                                         struct hl_mmu_properties *mmu_prop,
 206                                         u64 hop_addr, u64 vaddr)
 207 {
 208         return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask,
 209                                         mmu_prop->hop3_shift);
 210 }
 211
 212 static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
 213                                         struct hl_mmu_properties *mmu_prop,
 214                                         u64 hop_addr, u64 vaddr)
 215 {
 216         return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask,
 217                                         mmu_prop->hop4_shift);
 218 }
 219
 220 static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
 221 {
 222         if (curr_pte & PAGE_PRESENT_MASK)
 223                 return curr_pte & HOP_PHYS_ADDR_MASK;
 224         else
 225                 return ULLONG_MAX;
 226 }
 227
 228 static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
 229                                                 bool *is_new_hop)
 230 {
 231         u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
 232
 233         if (hop_addr == ULLONG_MAX) {
 234                 hop_addr = alloc_hop(ctx);
 235                 *is_new_hop = (hop_addr != ULLONG_MAX);
 236         }
 237
 238         return hop_addr;
 239 }
 240
 241 /* translates shadow address inside hop to a physical address */
 242 static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
 243 {
 244         u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
 245         u64 shadow_hop_addr = shadow_addr & ~page_mask;
 246         u64 pte_offset = shadow_addr & page_mask;
 247         u64 phys_hop_addr;
 248
 249         if (shadow_hop_addr != get_hop0_addr(ctx))
 250                 phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
 251         else
 252                 phys_hop_addr = get_phys_hop0_addr(ctx);
 253
 254         return phys_hop_addr + pte_offset;
 255 }
 256
 257 static int dram_default_mapping_init(struct hl_ctx *ctx)
 258 {
 259         struct hl_device *hdev = ctx->hdev;
 260         struct asic_fixed_properties *prop = &hdev->asic_prop;
 261         u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
 262                 hop2_pte_addr, hop3_pte_addr, pte_val;
 263         int rc, i, j, hop3_allocated = 0;
 264
 265         if ((!prop->dram_supports_virtual_memory) ||
 266                         (!hdev->dram_default_page_mapping) ||
 267                         (ctx->asid == HL_KERNEL_ASID_ID))
 268                 return 0;
 269
 270         num_of_hop3 = prop->dram_size_for_default_page_mapping;
 271         do_div(num_of_hop3, prop->dram_page_size);
 272         do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
 273
 274         /* add hop1 and hop2 */
 275         total_hops = num_of_hop3 + 2;
 276
 277         ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops,  GFP_KERNEL);
 278         if (!ctx->dram_default_hops)
 279                 return -ENOMEM;
 280
 281         hop0_addr = get_hop0_addr(ctx);
 282
 283         hop1_addr = alloc_hop(ctx);
 284         if (hop1_addr == ULLONG_MAX) {
 285                 dev_err(hdev->dev, "failed to alloc hop 1\n");
 286                 rc = -ENOMEM;
 287                 goto hop1_err;
 288         }
 289
 290         ctx->dram_default_hops[total_hops - 1] = hop1_addr;
 291
 292         hop2_addr = alloc_hop(ctx);
 293         if (hop2_addr == ULLONG_MAX) {
 294                 dev_err(hdev->dev, "failed to alloc hop 2\n");
 295                 rc = -ENOMEM;
 296                 goto hop2_err;
 297         }
 298
 299         ctx->dram_default_hops[total_hops - 2] = hop2_addr;
 300
 301         for (i = 0 ; i < num_of_hop3 ; i++) {
 302                 ctx->dram_default_hops[i] = alloc_hop(ctx);
 303                 if (ctx->dram_default_hops[i] == ULLONG_MAX) {
 304                         dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
 305                         rc = -ENOMEM;
 306                         goto hop3_err;
 307                 }
 308                 hop3_allocated++;
 309         }
 310
 311         /* need only pte 0 in hops 0 and 1 */
 312         pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 313         write_pte(ctx, hop0_addr, pte_val);
 314
 315         pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 316         write_pte(ctx, hop1_addr, pte_val);
 317         get_pte(ctx, hop1_addr);
 318
 319         hop2_pte_addr = hop2_addr;
 320         for (i = 0 ; i < num_of_hop3 ; i++) {
 321                 pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) |
 322                                 PAGE_PRESENT_MASK;
 323                 write_pte(ctx, hop2_pte_addr, pte_val);
 324                 get_pte(ctx, hop2_addr);
 325                 hop2_pte_addr += HL_PTE_SIZE;
 326         }
 327
 328         pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) |
 329                         LAST_MASK | PAGE_PRESENT_MASK;
 330
 331         for (i = 0 ; i < num_of_hop3 ; i++) {
 332                 hop3_pte_addr = ctx->dram_default_hops[i];
 333                 for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
 334                         write_final_pte(ctx, hop3_pte_addr, pte_val);
 335                         get_pte(ctx, ctx->dram_default_hops[i]);
 336                         hop3_pte_addr += HL_PTE_SIZE;
 337                 }
 338         }
 339
 340         flush(ctx);
 341
 342         return 0;
 343
 344 hop3_err:
 345         for (i = 0 ; i < hop3_allocated ; i++)
 346                 free_hop(ctx, ctx->dram_default_hops[i]);
 347
 348         free_hop(ctx, hop2_addr);
 349 hop2_err:
 350         free_hop(ctx, hop1_addr);
 351 hop1_err:
 352         kfree(ctx->dram_default_hops);
 353
 354         return rc;
 355 }
 356
 357 static void dram_default_mapping_fini(struct hl_ctx *ctx)
 358 {
 359         struct hl_device *hdev = ctx->hdev;
 360         struct asic_fixed_properties *prop = &hdev->asic_prop;
 361         u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
 362                 hop2_pte_addr, hop3_pte_addr;
 363         int i, j;
 364
 365         if ((!prop->dram_supports_virtual_memory) ||
 366                         (!hdev->dram_default_page_mapping) ||
 367                         (ctx->asid == HL_KERNEL_ASID_ID))
 368                 return;
 369
 370         num_of_hop3 = prop->dram_size_for_default_page_mapping;
 371         do_div(num_of_hop3, prop->dram_page_size);
 372         do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
 373
 374         hop0_addr = get_hop0_addr(ctx);
 375         /* add hop1 and hop2 */
 376         total_hops = num_of_hop3 + 2;
 377         hop1_addr = ctx->dram_default_hops[total_hops - 1];
 378         hop2_addr = ctx->dram_default_hops[total_hops - 2];
 379
 380         for (i = 0 ; i < num_of_hop3 ; i++) {
 381                 hop3_pte_addr = ctx->dram_default_hops[i];
 382                 for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
 383                         clear_pte(ctx, hop3_pte_addr);
 384                         put_pte(ctx, ctx->dram_default_hops[i]);
 385                         hop3_pte_addr += HL_PTE_SIZE;
 386                 }
 387         }
 388
 389         hop2_pte_addr = hop2_addr;
 390         hop2_pte_addr = hop2_addr;
 391         for (i = 0 ; i < num_of_hop3 ; i++) {
 392                 clear_pte(ctx, hop2_pte_addr);
 393                 put_pte(ctx, hop2_addr);
 394                 hop2_pte_addr += HL_PTE_SIZE;
 395         }
 396
 397         clear_pte(ctx, hop1_addr);
 398         put_pte(ctx, hop1_addr);
 399         clear_pte(ctx, hop0_addr);
 400
 401         kfree(ctx->dram_default_hops);
 402
 403         flush(ctx);
 404 }
 405
 406 /**
 407  * hl_mmu_v1_init() - initialize the MMU module.
 408  * @hdev: habanalabs device structure.
 409  *
 410  * This function does the following:
 411  * - Create a pool of pages for pgt_infos.
 412  * - Create a shadow table for pgt
 413  *
 414  * Return: 0 for success, non-zero for failure.
 415  */
 416 static int hl_mmu_v1_init(struct hl_device *hdev)
 417 {
 418         struct asic_fixed_properties *prop = &hdev->asic_prop;
 419         int rc;
 420
 421         hdev->mmu_priv.dr.mmu_pgt_pool =
 422                         gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
 423
 424         if (!hdev->mmu_priv.dr.mmu_pgt_pool) {
 425                 dev_err(hdev->dev, "Failed to create page gen pool\n");
 426                 return -ENOMEM;
 427         }
 428
 429         rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr +
 430                         prop->mmu_hop0_tables_total_size,
 431                         prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
 432                         -1);
 433         if (rc) {
 434                 dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
 435                 goto err_pool_add;
 436         }
 437
 438         hdev->mmu_priv.dr.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
 439                                                 prop->mmu_hop_table_size,
 440                                                 GFP_KERNEL | __GFP_ZERO);
 441         if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
 442                 rc = -ENOMEM;
 443                 goto err_pool_add;
 444         }
 445
 446         /* MMU H/W init will be done in device hw_init() */
 447
 448         return 0;
 449
 450 err_pool_add:
 451         gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
 452
 453         return rc;
 454 }
 455
 456 /**
 457  * hl_mmu_fini() - release the MMU module.
 458  * @hdev: habanalabs device structure.
 459  *
 460  * This function does the following:
 461  * - Disable MMU in H/W.
 462  * - Free the pgt_infos pool.
 463  *
 464  * All contexts should be freed before calling this function.
 465  */
 466 static void hl_mmu_v1_fini(struct hl_device *hdev)
 467 {
 468         /* MMU H/W fini was already done in device hw_fini() */
 469
 470         kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
 471         gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
 472 }
 473
 474 /**
 475  * hl_mmu_ctx_init() - initialize a context for using the MMU module.
 476  * @ctx: pointer to the context structure to initialize.
 477  *
 478  * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
 479  * page tables hops related to this context.
 480  * Return: 0 on success, non-zero otherwise.
 481  */
 482 static int hl_mmu_v1_ctx_init(struct hl_ctx *ctx)
 483 {
 484         hash_init(ctx->mmu_shadow_hash);
 485         return dram_default_mapping_init(ctx);
 486 }
 487
 488 /*
 489  * hl_mmu_ctx_fini - disable a ctx from using the mmu module
 490  *
 491  * @ctx: pointer to the context structure
 492  *
 493  * This function does the following:
 494  * - Free any pgts which were not freed yet
 495  * - Free the mutex
 496  * - Free DRAM default page mapping hops
 497  */
 498 static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx)
 499 {
 500         struct hl_device *hdev = ctx->hdev;
 501         struct pgt_info *pgt_info;
 502         struct hlist_node *tmp;
 503         int i;
 504
 505         dram_default_mapping_fini(ctx);
 506
 507         if (!hash_empty(ctx->mmu_shadow_hash))
 508                 dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
 509                         ctx->asid);
 510
 511         hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
 512                 dev_err_ratelimited(hdev->dev,
 513                         "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
 514                         pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
 515                 _free_hop(ctx, pgt_info);
 516         }
 517 }
 518
 519 static int _hl_mmu_v1_unmap(struct hl_ctx *ctx,
 520                                 u64 virt_addr, bool is_dram_addr)
 521 {
 522         struct hl_device *hdev = ctx->hdev;
 523         struct asic_fixed_properties *prop = &hdev->asic_prop;
 524         struct hl_mmu_properties *mmu_prop;
 525         u64 hop0_addr = 0, hop0_pte_addr = 0,
 526                 hop1_addr = 0, hop1_pte_addr = 0,
 527                 hop2_addr = 0, hop2_pte_addr = 0,
 528                 hop3_addr = 0, hop3_pte_addr = 0,
 529                 hop4_addr = 0, hop4_pte_addr = 0,
 530                 curr_pte;
 531         bool is_huge, clear_hop3 = true;
 532
 533         /* shifts and masks are the same in PMMU and HPMMU, use one of them */
 534         mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
 535
 536         hop0_addr = get_hop0_addr(ctx);
 537         hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
 538
 539         curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
 540
 541         hop1_addr = get_next_hop_addr(ctx, curr_pte);
 542
 543         if (hop1_addr == ULLONG_MAX)
 544                 goto not_mapped;
 545
 546         hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
 547
 548         curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
 549
 550         hop2_addr = get_next_hop_addr(ctx, curr_pte);
 551
 552         if (hop2_addr == ULLONG_MAX)
 553                 goto not_mapped;
 554
 555         hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
 556
 557         curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
 558
 559         hop3_addr = get_next_hop_addr(ctx, curr_pte);
 560
 561         if (hop3_addr == ULLONG_MAX)
 562                 goto not_mapped;
 563
 564         hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
 565
 566         curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
 567
 568         is_huge = curr_pte & LAST_MASK;
 569
 570         if (is_dram_addr && !is_huge) {
 571                 dev_err(hdev->dev,
 572                                 "DRAM unmapping should use huge pages only\n");
 573                 return -EFAULT;
 574         }
 575
 576         if (!is_huge) {
 577                 hop4_addr = get_next_hop_addr(ctx, curr_pte);
 578
 579                 if (hop4_addr == ULLONG_MAX)
 580                         goto not_mapped;
 581
 582                 hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
 583                                                         virt_addr);
 584
 585                 curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
 586
 587                 clear_hop3 = false;
 588         }
 589
 590         if (hdev->dram_default_page_mapping && is_dram_addr) {
 591                 u64 default_pte = (prop->mmu_dram_default_page_addr &
 592                                 HOP_PHYS_ADDR_MASK) | LAST_MASK |
 593                                         PAGE_PRESENT_MASK;
 594                 if (curr_pte == default_pte) {
 595                         dev_err(hdev->dev,
 596                                 "DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",
 597                                         virt_addr);
 598                         goto not_mapped;
 599                 }
 600
 601                 if (!(curr_pte & PAGE_PRESENT_MASK)) {
 602                         dev_err(hdev->dev,
 603                                 "DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n",
 604                                         virt_addr);
 605                         goto not_mapped;
 606                 }
 607
 608                 write_final_pte(ctx, hop3_pte_addr, default_pte);
 609                 put_pte(ctx, hop3_addr);
 610         } else {
 611                 if (!(curr_pte & PAGE_PRESENT_MASK))
 612                         goto not_mapped;
 613
 614                 if (hop4_addr)
 615                         clear_pte(ctx, hop4_pte_addr);
 616                 else
 617                         clear_pte(ctx, hop3_pte_addr);
 618
 619                 if (hop4_addr && !put_pte(ctx, hop4_addr))
 620                         clear_hop3 = true;
 621
 622                 if (!clear_hop3)
 623                         goto mapped;
 624
 625                 clear_pte(ctx, hop3_pte_addr);
 626
 627                 if (put_pte(ctx, hop3_addr))
 628                         goto mapped;
 629
 630                 clear_pte(ctx, hop2_pte_addr);
 631
 632                 if (put_pte(ctx, hop2_addr))
 633                         goto mapped;
 634
 635                 clear_pte(ctx, hop1_pte_addr);
 636
 637                 if (put_pte(ctx, hop1_addr))
 638                         goto mapped;
 639
 640                 clear_pte(ctx, hop0_pte_addr);
 641         }
 642
 643 mapped:
 644         return 0;
 645
 646 not_mapped:
 647         dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
 648                 virt_addr);
 649
 650         return -EINVAL;
 651 }
 652
 653 static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 654                         u32 page_size, bool is_dram_addr)
 655 {
 656         struct hl_device *hdev = ctx->hdev;
 657         struct asic_fixed_properties *prop = &hdev->asic_prop;
 658         struct hl_mmu_properties *mmu_prop;
 659         u64 hop0_addr = 0, hop0_pte_addr = 0,
 660                 hop1_addr = 0, hop1_pte_addr = 0,
 661                 hop2_addr = 0, hop2_pte_addr = 0,
 662                 hop3_addr = 0, hop3_pte_addr = 0,
 663                 hop4_addr = 0, hop4_pte_addr = 0,
 664                 curr_pte = 0;
 665         bool hop1_new = false, hop2_new = false, hop3_new = false,
 666                 hop4_new = false, is_huge;
 667         int rc = -ENOMEM;
 668
 669         /*
 670          * This mapping function can map a page or a huge page. For huge page
 671          * there are only 3 hops rather than 4. Currently the DRAM allocation
 672          * uses huge pages only but user memory could have been allocated with
 673          * one of the two page sizes. Since this is a common code for all the
 674          * three cases, we need this hugs page check.
 675          */
 676         if (is_dram_addr) {
 677                 mmu_prop = &prop->dmmu;
 678                 is_huge = true;
 679         } else if (page_size == prop->pmmu_huge.page_size) {
 680                 mmu_prop = &prop->pmmu_huge;
 681                 is_huge = true;
 682         } else {
 683                 mmu_prop = &prop->pmmu;
 684                 is_huge = false;
 685         }
 686
 687         hop0_addr = get_hop0_addr(ctx);
 688         hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
 689         curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
 690
 691         hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
 692         if (hop1_addr == ULLONG_MAX)
 693                 goto err;
 694
 695         hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
 696         curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
 697
 698         hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
 699         if (hop2_addr == ULLONG_MAX)
 700                 goto err;
 701
 702         hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
 703         curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
 704
 705         hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
 706         if (hop3_addr == ULLONG_MAX)
 707                 goto err;
 708
 709         hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
 710         curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
 711
 712         if (!is_huge) {
 713                 hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
 714                 if (hop4_addr == ULLONG_MAX)
 715                         goto err;
 716
 717                 hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
 718                                                         virt_addr);
 719                 curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
 720         }
 721
 722         if (hdev->dram_default_page_mapping && is_dram_addr) {
 723                 u64 default_pte = (prop->mmu_dram_default_page_addr &
 724                                         HOP_PHYS_ADDR_MASK) | LAST_MASK |
 725                                                 PAGE_PRESENT_MASK;
 726
 727                 if (curr_pte != default_pte) {
 728                         dev_err(hdev->dev,
 729                                 "DRAM: mapping already exists for virt_addr 0x%llx\n",
 730                                         virt_addr);
 731                         rc = -EINVAL;
 732                         goto err;
 733                 }
 734
 735                 if (hop1_new || hop2_new || hop3_new || hop4_new) {
 736                         dev_err(hdev->dev,
 737                                 "DRAM mapping should not allocate more hops\n");
 738                         rc = -EFAULT;
 739                         goto err;
 740                 }
 741         } else if (curr_pte & PAGE_PRESENT_MASK) {
 742                 dev_err(hdev->dev,
 743                         "mapping already exists for virt_addr 0x%llx\n",
 744                                 virt_addr);
 745
 746                 dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
 747                         *(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr);
 748                 dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
 749                         *(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr);
 750                 dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
 751                         *(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
 752                 dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
 753                         *(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
 754
 755                 if (!is_huge)
 756                         dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
 757                                 *(u64 *) (uintptr_t) hop4_pte_addr,
 758                                 hop4_pte_addr);
 759
 760                 rc = -EINVAL;
 761                 goto err;
 762         }
 763
 764         curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK
 765                         | PAGE_PRESENT_MASK;
 766
 767         if (is_huge)
 768                 write_final_pte(ctx, hop3_pte_addr, curr_pte);
 769         else
 770                 write_final_pte(ctx, hop4_pte_addr, curr_pte);
 771
 772         if (hop1_new) {
 773                 curr_pte =
 774                         (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 775                 write_pte(ctx, hop0_pte_addr, curr_pte);
 776         }
 777         if (hop2_new) {
 778                 curr_pte =
 779                         (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 780                 write_pte(ctx, hop1_pte_addr, curr_pte);
 781                 get_pte(ctx, hop1_addr);
 782         }
 783         if (hop3_new) {
 784                 curr_pte =
 785                         (hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 786                 write_pte(ctx, hop2_pte_addr, curr_pte);
 787                 get_pte(ctx, hop2_addr);
 788         }
 789
 790         if (!is_huge) {
 791                 if (hop4_new) {
 792                         curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) |
 793                                         PAGE_PRESENT_MASK;
 794                         write_pte(ctx, hop3_pte_addr, curr_pte);
 795                         get_pte(ctx, hop3_addr);
 796                 }
 797
 798                 get_pte(ctx, hop4_addr);
 799         } else {
 800                 get_pte(ctx, hop3_addr);
 801         }
 802
 803         return 0;
 804
 805 err:
 806         if (hop4_new)
 807                 free_hop(ctx, hop4_addr);
 808         if (hop3_new)
 809                 free_hop(ctx, hop3_addr);
 810         if (hop2_new)
 811                 free_hop(ctx, hop2_addr);
 812         if (hop1_new)
 813                 free_hop(ctx, hop1_addr);
 814
 815         return rc;
 816 }
 817
 818 /*
 819  * hl_mmu_v1_swap_out - marks all mapping of the given ctx as swapped out
 820  *
 821  * @ctx: pointer to the context structure
 822  *
 823  */
 824 static void hl_mmu_v1_swap_out(struct hl_ctx *ctx)
 825 {
 826
 827 }
 828
 829 /*
 830  * hl_mmu_v1_swap_in - marks all mapping of the given ctx as swapped in
 831  *
 832  * @ctx: pointer to the context structure
 833  *
 834  */
 835 static void hl_mmu_v1_swap_in(struct hl_ctx *ctx)
 836 {
 837
 838 }
 839
 840 static inline u64 get_hop_pte_addr(struct hl_ctx *ctx,
 841                                 struct hl_mmu_properties *mmu_prop,
 842                                 int hop_num, u64 hop_addr, u64 virt_addr)
 843 {
 844         switch (hop_num) {
 845         case 0:
 846                 return get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
 847         case 1:
 848                 return get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
 849         case 2:
 850                 return get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
 851         case 3:
 852                 return get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
 853         case 4:
 854                 return get_hop4_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
 855         default:
 856                 break;
 857         }
 858         return U64_MAX;
 859 }
 860
 861 static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
 862                                 struct hl_mmu_hop_info *hops)
 863 {
 864         struct hl_device *hdev = ctx->hdev;
 865         struct asic_fixed_properties *prop = &hdev->asic_prop;
 866         struct hl_mmu_properties *mmu_prop;
 867         bool is_dram_addr, is_pmmu_addr, is_pmmu_h_addr, is_huge;
 868         int i, used_hops;
 869
 870         is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
 871                                                 prop->dmmu.start_addr,
 872                                                 prop->dmmu.end_addr);
 873         is_pmmu_addr = hl_mem_area_inside_range(virt_addr, prop->pmmu.page_size,
 874                                                 prop->pmmu.start_addr,
 875                                                 prop->pmmu.end_addr);
 876         is_pmmu_h_addr = hl_mem_area_inside_range(virt_addr,
 877                                                 prop->pmmu_huge.page_size,
 878                                                 prop->pmmu_huge.start_addr,
 879                                                 prop->pmmu_huge.end_addr);
 880         if (is_dram_addr) {
 881                 mmu_prop = &prop->dmmu;
 882                 is_huge = true;
 883         } else if (is_pmmu_addr) {
 884                 mmu_prop = &prop->pmmu;
 885                 is_huge = false;
 886         } else if (is_pmmu_h_addr) {
 887                 mmu_prop = &prop->pmmu_huge;
 888                 is_huge = true;
 889         } else {
 890                 return -EINVAL;
 891         }
 892
 893         used_hops = mmu_prop->num_hops;
 894
 895         /* huge pages use lesser hops */
 896         if (is_huge)
 897                 used_hops--;
 898
 899         hops->hop_info[0].hop_addr = get_phys_hop0_addr(ctx);
 900         hops->hop_info[0].hop_pte_addr =
 901                         get_hop_pte_addr(ctx, mmu_prop, 0,
 902                                         hops->hop_info[0].hop_addr, virt_addr);
 903         hops->hop_info[0].hop_pte_val =
 904                         hdev->asic_funcs->read_pte(hdev,
 905                                                 hops->hop_info[0].hop_pte_addr);
 906
 907         for (i = 1 ; i < used_hops ; i++) {
 908                 hops->hop_info[i].hop_addr =
 909                         get_next_hop_addr(ctx,
 910                                         hops->hop_info[i - 1].hop_pte_val);
 911                 if (hops->hop_info[i].hop_addr == ULLONG_MAX)
 912                         return -EFAULT;
 913
 914                 hops->hop_info[i].hop_pte_addr =
 915                                 get_hop_pte_addr(ctx, mmu_prop, i,
 916                                                 hops->hop_info[i].hop_addr,
 917                                                 virt_addr);
 918                 hops->hop_info[i].hop_pte_val =
 919                                 hdev->asic_funcs->read_pte(hdev,
 920                                                 hops->hop_info[i].hop_pte_addr);
 921
 922                 if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
 923                         return -EFAULT;
 924
 925                 if (hops->hop_info[i].hop_pte_val & LAST_MASK)
 926                         break;
 927         }
 928
 929         /* if passed over all hops then no last hop was found */
 930         if (i == mmu_prop->num_hops)
 931                 return -EFAULT;
 932
 933         if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK))
 934                 return -EFAULT;
 935
 936         hops->used_hops = i + 1;
 937
 938         return 0;
 939 }
 940
 941 /*
 942  * hl_mmu_v1_prepare - prepare mmu  for working with mmu v1
 943  *
 944  * @hdev: pointer to the device structure
 945  */
 946 void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu)
 947 {
 948         mmu->init = hl_mmu_v1_init;
 949         mmu->fini = hl_mmu_v1_fini;
 950         mmu->ctx_init = hl_mmu_v1_ctx_init;
 951         mmu->ctx_fini = hl_mmu_v1_ctx_fini;
 952         mmu->map = _hl_mmu_v1_map;
 953         mmu->unmap = _hl_mmu_v1_unmap;
 954         mmu->flush = flush;
 955         mmu->swap_out = hl_mmu_v1_swap_out;
 956         mmu->swap_in = hl_mmu_v1_swap_in;
 957         mmu->get_tlb_info = hl_mmu_v1_get_tlb_info;
 958 }