]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/commitdiff
habanalabs: split MMU properties to PCI/DRAM
authorOmer Shpigelman <oshpigelman@habana.ai>
Thu, 14 Nov 2019 18:23:55 +0000 (18:23 +0000)
committerOded Gabbay <oded.gabbay@gmail.com>
Thu, 21 Nov 2019 09:35:46 +0000 (11:35 +0200)
Split the properties used for MMU mappings to DRAM and PCI (host) types.
This is a prerequisite for future ASICs support.
Note that in Goya ASIC, the PMMU and DMMU are the same (except of page
sizes) as only one MMU mechanism is used for both of the mapping types.
Hence this patch should not have any effect on current behavior.

Signed-off-by: Omer Shpigelman <oshpigelman@habana.ai>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/misc/habanalabs/debugfs.c
drivers/misc/habanalabs/goya/goya.c
drivers/misc/habanalabs/habanalabs.h
drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h
drivers/misc/habanalabs/memory.c
drivers/misc/habanalabs/mmu.c

index 1e1fa619a225d6bab1ca3ffb19aa0834279c00d3..1cf75010a379cf84fa2fa26bd2402618e81f8333 100644 (file)
@@ -307,39 +307,51 @@ static inline u64 get_hop0_addr(struct hl_ctx *ctx)
                        (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
 }
 
-static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
-               u64 virt_addr)
+static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
+                                       u64 virt_addr, u64 mask, u64 shift)
 {
        return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
-                       ((virt_addr & HOP0_MASK) >> HOP0_SHIFT);
+                       ((virt_addr & mask) >> shift);
 }
 
-static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
-               u64 virt_addr)
+static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
+                                       struct hl_mmu_properties *mmu_specs,
+                                       u64 hop_addr, u64 vaddr)
 {
-       return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
-                       ((virt_addr & HOP1_MASK) >> HOP1_SHIFT);
+       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop0_mask,
+                                       mmu_specs->hop0_shift);
 }
 
-static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
-               u64 virt_addr)
+static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
+                                       struct hl_mmu_properties *mmu_specs,
+                                       u64 hop_addr, u64 vaddr)
 {
-       return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
-                       ((virt_addr & HOP2_MASK) >> HOP2_SHIFT);
+       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop1_mask,
+                                       mmu_specs->hop1_shift);
 }
 
-static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
-               u64 virt_addr)
+static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
+                                       struct hl_mmu_properties *mmu_specs,
+                                       u64 hop_addr, u64 vaddr)
 {
-       return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
-                       ((virt_addr & HOP3_MASK) >> HOP3_SHIFT);
+       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop2_mask,
+                                       mmu_specs->hop2_shift);
 }
 
-static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
-               u64 virt_addr)
+static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
+                                       struct hl_mmu_properties *mmu_specs,
+                                       u64 hop_addr, u64 vaddr)
 {
-       return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
-                       ((virt_addr & HOP4_MASK) >> HOP4_SHIFT);
+       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop3_mask,
+                                       mmu_specs->hop3_shift);
+}
+
+static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
+                                       struct hl_mmu_properties *mmu_specs,
+                                       u64 hop_addr, u64 vaddr)
+{
+       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop4_mask,
+                                       mmu_specs->hop4_shift);
 }
 
 static inline u64 get_next_hop_addr(u64 curr_pte)
@@ -355,7 +367,10 @@ static int mmu_show(struct seq_file *s, void *data)
        struct hl_debugfs_entry *entry = s->private;
        struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
        struct hl_device *hdev = dev_entry->hdev;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       struct hl_mmu_properties *mmu_prop;
        struct hl_ctx *ctx;
+       bool is_dram_addr;
 
        u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0,
                hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0,
@@ -377,33 +392,39 @@ static int mmu_show(struct seq_file *s, void *data)
                return 0;
        }
 
+       is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+                               prop->va_space_dram_start_address,
+                               prop->va_space_dram_end_address);
+
+       mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
        mutex_lock(&ctx->mmu_lock);
 
        /* the following lookup is copied from unmap() in mmu.c */
 
        hop0_addr = get_hop0_addr(ctx);
-       hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
+       hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
        hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr);
        hop1_addr = get_next_hop_addr(hop0_pte);
 
        if (hop1_addr == ULLONG_MAX)
                goto not_mapped;
 
-       hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
+       hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
        hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr);
        hop2_addr = get_next_hop_addr(hop1_pte);
 
        if (hop2_addr == ULLONG_MAX)
                goto not_mapped;
 
-       hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
+       hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
        hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr);
        hop3_addr = get_next_hop_addr(hop2_pte);
 
        if (hop3_addr == ULLONG_MAX)
                goto not_mapped;
 
-       hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
+       hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
        hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
 
        if (!(hop3_pte & LAST_MASK)) {
@@ -412,7 +433,8 @@ static int mmu_show(struct seq_file *s, void *data)
                if (hop4_addr == ULLONG_MAX)
                        goto not_mapped;
 
-               hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
+               hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
+                                                       virt_addr);
                hop4_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr);
                if (!(hop4_pte & PAGE_PRESENT_MASK))
                        goto not_mapped;
@@ -534,41 +556,50 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
                                u64 *phys_addr)
 {
        struct hl_ctx *ctx = hdev->compute_ctx;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       struct hl_mmu_properties *mmu_prop;
        u64 hop_addr, hop_pte_addr, hop_pte;
        u64 offset_mask = HOP4_MASK | FLAGS_MASK;
        int rc = 0;
+       bool is_dram_addr;
 
        if (!ctx) {
                dev_err(hdev->dev, "no ctx available\n");
                return -EINVAL;
        }
 
+       is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+                               prop->va_space_dram_start_address,
+                               prop->va_space_dram_end_address);
+
+       mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
        mutex_lock(&ctx->mmu_lock);
 
        /* hop 0 */
        hop_addr = get_hop0_addr(ctx);
-       hop_pte_addr = get_hop0_pte_addr(ctx, hop_addr, virt_addr);
+       hop_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
        hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
 
        /* hop 1 */
        hop_addr = get_next_hop_addr(hop_pte);
        if (hop_addr == ULLONG_MAX)
                goto not_mapped;
-       hop_pte_addr = get_hop1_pte_addr(ctx, hop_addr, virt_addr);
+       hop_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
        hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
 
        /* hop 2 */
        hop_addr = get_next_hop_addr(hop_pte);
        if (hop_addr == ULLONG_MAX)
                goto not_mapped;
-       hop_pte_addr = get_hop2_pte_addr(ctx, hop_addr, virt_addr);
+       hop_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
        hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
 
        /* hop 3 */
        hop_addr = get_next_hop_addr(hop_pte);
        if (hop_addr == ULLONG_MAX)
                goto not_mapped;
-       hop_pte_addr = get_hop3_pte_addr(ctx, hop_addr, virt_addr);
+       hop_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr);
        hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
 
        if (!(hop_pte & LAST_MASK)) {
@@ -576,7 +607,8 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
                hop_addr = get_next_hop_addr(hop_pte);
                if (hop_addr == ULLONG_MAX)
                        goto not_mapped;
-               hop_pte_addr = get_hop4_pte_addr(ctx, hop_addr, virt_addr);
+               hop_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop_addr,
+                                                       virt_addr);
                hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
 
                offset_mask = FLAGS_MASK;
index 3c22fb96a26fa9b383d4d72e1f3a6430ab79bdc2..3294a6a92f75bd7481e0f33e2a1df70a6667e377 100644 (file)
@@ -380,6 +380,23 @@ void goya_get_fixed_properties(struct hl_device *hdev)
        prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
        prop->dram_page_size = PAGE_SIZE_2MB;
 
+       prop->dmmu.hop0_shift = HOP0_SHIFT;
+       prop->dmmu.hop1_shift = HOP1_SHIFT;
+       prop->dmmu.hop2_shift = HOP2_SHIFT;
+       prop->dmmu.hop3_shift = HOP3_SHIFT;
+       prop->dmmu.hop4_shift = HOP4_SHIFT;
+       prop->dmmu.hop0_mask = HOP0_MASK;
+       prop->dmmu.hop1_mask = HOP1_MASK;
+       prop->dmmu.hop2_mask = HOP2_MASK;
+       prop->dmmu.hop3_mask = HOP3_MASK;
+       prop->dmmu.hop4_mask = HOP4_MASK;
+       prop->dmmu.huge_page_size = PAGE_SIZE_2MB;
+
+       /* No difference between PMMU and DMMU except of page size */
+       memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
+       prop->dmmu.page_size = PAGE_SIZE_2MB;
+       prop->pmmu.page_size = PAGE_SIZE_4KB;
+
        prop->va_space_host_start_address = VA_HOST_SPACE_START;
        prop->va_space_host_end_address = VA_HOST_SPACE_END;
        prop->va_space_dram_start_address = VA_DDR_SPACE_START;
index 36d05c32f7ec866392433ed33bb1ca54bcdff038..00c949f4ccd1c56438722b119e70e56beeac6035 100644 (file)
@@ -130,6 +130,36 @@ enum hl_device_hw_state {
        HL_DEVICE_HW_STATE_DIRTY
 };
 
+/**
+ * struct hl_mmu_properties - ASIC specific MMU address translation properties.
+ * @hop0_shift: shift of hop 0 mask.
+ * @hop1_shift: shift of hop 1 mask.
+ * @hop2_shift: shift of hop 2 mask.
+ * @hop3_shift: shift of hop 3 mask.
+ * @hop4_shift: shift of hop 4 mask.
+ * @hop0_mask: mask to get the PTE address in hop 0.
+ * @hop1_mask: mask to get the PTE address in hop 1.
+ * @hop2_mask: mask to get the PTE address in hop 2.
+ * @hop3_mask: mask to get the PTE address in hop 3.
+ * @hop4_mask: mask to get the PTE address in hop 4.
+ * @page_size: default page size used to allocate memory.
+ * @huge_page_size: page size used to allocate memory with huge pages.
+ */
+struct hl_mmu_properties {
+       u64     hop0_shift;
+       u64     hop1_shift;
+       u64     hop2_shift;
+       u64     hop3_shift;
+       u64     hop4_shift;
+       u64     hop0_mask;
+       u64     hop1_mask;
+       u64     hop2_mask;
+       u64     hop3_mask;
+       u64     hop4_mask;
+       u32     page_size;
+       u32     huge_page_size;
+};
+
 /**
  * struct asic_fixed_properties - ASIC specific immutable properties.
  * @hw_queues_props: H/W queues properties.
@@ -137,6 +167,8 @@ enum hl_device_hw_state {
  *             available sensors.
  * @uboot_ver: F/W U-boot version.
  * @preboot_ver: F/W Preboot version.
+ * @dmmu: DRAM MMU address translation properties.
+ * @pmmu: PCI (host) MMU address translation properties.
  * @sram_base_address: SRAM physical start address.
  * @sram_end_address: SRAM physical end address.
  * @sram_user_base_address - SRAM physical start address for user access.
@@ -173,53 +205,55 @@ enum hl_device_hw_state {
  * @psoc_pci_pll_nf: PCI PLL NF value.
  * @psoc_pci_pll_od: PCI PLL OD value.
  * @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value.
- * @completion_queues_count: number of completion queues.
  * @high_pll: high PLL frequency used by the device.
  * @cb_pool_cb_cnt: number of CBs in the CB pool.
  * @cb_pool_cb_size: size of each CB in the CB pool.
  * @tpc_enabled_mask: which TPCs are enabled.
+ * @completion_queues_count: number of completion queues.
  */
 struct asic_fixed_properties {
        struct hw_queue_properties      hw_queues_props[HL_MAX_QUEUES];
-       struct armcp_info       armcp_info;
-       char                    uboot_ver[VERSION_MAX_LEN];
-       char                    preboot_ver[VERSION_MAX_LEN];
-       u64                     sram_base_address;
-       u64                     sram_end_address;
-       u64                     sram_user_base_address;
-       u64                     dram_base_address;
-       u64                     dram_end_address;
-       u64                     dram_user_base_address;
-       u64                     dram_size;
-       u64                     dram_pci_bar_size;
-       u64                     max_power_default;
-       u64                     va_space_host_start_address;
-       u64                     va_space_host_end_address;
-       u64                     va_space_dram_start_address;
-       u64                     va_space_dram_end_address;
-       u64                     dram_size_for_default_page_mapping;
-       u64                     pcie_dbi_base_address;
-       u64                     pcie_aux_dbi_reg_addr;
-       u64                     mmu_pgt_addr;
-       u64                     mmu_dram_default_page_addr;
-       u32                     mmu_pgt_size;
-       u32                     mmu_pte_size;
-       u32                     mmu_hop_table_size;
-       u32                     mmu_hop0_tables_total_size;
-       u32                     dram_page_size;
-       u32                     cfg_size;
-       u32                     sram_size;
-       u32                     max_asid;
-       u32                     num_of_events;
-       u32                     psoc_pci_pll_nr;
-       u32                     psoc_pci_pll_nf;
-       u32                     psoc_pci_pll_od;
-       u32                     psoc_pci_pll_div_factor;
-       u32                     high_pll;
-       u32                     cb_pool_cb_cnt;
-       u32                     cb_pool_cb_size;
-       u8                      completion_queues_count;
-       u8                      tpc_enabled_mask;
+       struct armcp_info               armcp_info;
+       char                            uboot_ver[VERSION_MAX_LEN];
+       char                            preboot_ver[VERSION_MAX_LEN];
+       struct hl_mmu_properties        dmmu;
+       struct hl_mmu_properties        pmmu;
+       u64                             sram_base_address;
+       u64                             sram_end_address;
+       u64                             sram_user_base_address;
+       u64                             dram_base_address;
+       u64                             dram_end_address;
+       u64                             dram_user_base_address;
+       u64                             dram_size;
+       u64                             dram_pci_bar_size;
+       u64                             max_power_default;
+       u64                             va_space_host_start_address;
+       u64                             va_space_host_end_address;
+       u64                             va_space_dram_start_address;
+       u64                             va_space_dram_end_address;
+       u64                             dram_size_for_default_page_mapping;
+       u64                             pcie_dbi_base_address;
+       u64                             pcie_aux_dbi_reg_addr;
+       u64                             mmu_pgt_addr;
+       u64                             mmu_dram_default_page_addr;
+       u32                             mmu_pgt_size;
+       u32                             mmu_pte_size;
+       u32                             mmu_hop_table_size;
+       u32                             mmu_hop0_tables_total_size;
+       u32                             dram_page_size;
+       u32                             cfg_size;
+       u32                             sram_size;
+       u32                             max_asid;
+       u32                             num_of_events;
+       u32                             psoc_pci_pll_nr;
+       u32                             psoc_pci_pll_nf;
+       u32                             psoc_pci_pll_od;
+       u32                             psoc_pci_pll_div_factor;
+       u32                             high_pll;
+       u32                             cb_pool_cb_cnt;
+       u32                             cb_pool_cb_size;
+       u8                              tpc_enabled_mask;
+       u8                              completion_queues_count;
 };
 
 /**
index 74a5502b8c4ec87e8f8db6f4a8616aef7abc75c7..a6851a9d3f03dea45d957b1340ae201f348cb0b3 100644 (file)
@@ -12,7 +12,6 @@
 #define PAGE_SHIFT_2MB                 21
 #define PAGE_SIZE_2MB                  (_AC(1, UL) << PAGE_SHIFT_2MB)
 #define PAGE_SIZE_4KB                  (_AC(1, UL) << PAGE_SHIFT_4KB)
-#define PAGE_MASK_2MB                  (~(PAGE_SIZE_2MB - 1))
 
 #define PAGE_PRESENT_MASK              0x0000000000001ull
 #define SWAP_OUT_MASK                  0x0000000000004ull
index 12db6609da27e1cc347f17847f64f8a88b1c0276..cce6bdb6e655406548404d719a0ef54bb4f1ac5d 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/slab.h>
 #include <linux/genalloc.h>
 
-#define PGS_IN_2MB_PAGE        (PAGE_SIZE_2MB >> PAGE_SHIFT)
 #define HL_MMU_DEBUG   0
 
 /*
@@ -516,8 +515,8 @@ static inline int add_va_block(struct hl_device *hdev,
  * - Return the start address of the virtual block
  */
 static u64 get_va_block(struct hl_device *hdev,
-               struct hl_va_range *va_range, u64 size, u64 hint_addr,
-               bool is_userptr)
+                       struct hl_va_range *va_range, u64 size, u64 hint_addr,
+                       bool is_userptr)
 {
        struct hl_vm_va_block *va_block, *new_va_block = NULL;
        u64 valid_start, valid_size, prev_start, prev_end, page_mask,
@@ -525,18 +524,17 @@ static u64 get_va_block(struct hl_device *hdev,
        u32 page_size;
        bool add_prev = false;
 
-       if (is_userptr) {
+       if (is_userptr)
                /*
                 * We cannot know if the user allocated memory with huge pages
                 * or not, hence we continue with the biggest possible
                 * granularity.
                 */
-               page_size = PAGE_SIZE_2MB;
-               page_mask = PAGE_MASK_2MB;
-       } else {
-               page_size = hdev->asic_prop.dram_page_size;
-               page_mask = ~((u64)page_size - 1);
-       }
+               page_size = hdev->asic_prop.pmmu.huge_page_size;
+       else
+               page_size = hdev->asic_prop.dmmu.page_size;
+
+       page_mask = ~((u64)page_size - 1);
 
        mutex_lock(&va_range->lock);
 
@@ -558,7 +556,6 @@ static u64 get_va_block(struct hl_device *hdev,
 
                if (valid_size >= size &&
                        (!new_va_block || valid_size < res_valid_size)) {
-
                        new_va_block = va_block;
                        res_valid_start = valid_start;
                        res_valid_size = valid_size;
@@ -629,7 +626,7 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
 /*
  * init_phys_pg_pack_from_userptr - initialize physical page pack from host
  *                                  memory
- * @asid: current context ASID
+ * @ctx: current context
  * @userptr: userptr to initialize from
  * @pphys_pg_pack: result pointer
  *
@@ -638,16 +635,20 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
  * - Create a physical page pack from the physical pages related to the given
  *   virtual block
  */
-static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr,
+static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
+                               struct hl_userptr *userptr,
                                struct hl_vm_phys_pg_pack **pphys_pg_pack)
 {
+       struct hl_mmu_properties *mmu_prop = &ctx->hdev->asic_prop.pmmu;
        struct hl_vm_phys_pg_pack *phys_pg_pack;
        struct scatterlist *sg;
        dma_addr_t dma_addr;
        u64 page_mask, total_npages;
-       u32 npages, page_size = PAGE_SIZE;
+       u32 npages, page_size = PAGE_SIZE,
+               huge_page_size = mmu_prop->huge_page_size;
        bool first = true, is_huge_page_opt = true;
        int rc, i, j;
+       u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
 
        phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
        if (!phys_pg_pack)
@@ -655,7 +656,7 @@ static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr,
 
        phys_pg_pack->vm_type = userptr->vm_type;
        phys_pg_pack->created_from_userptr = true;
-       phys_pg_pack->asid = asid;
+       phys_pg_pack->asid = ctx->asid;
        atomic_set(&phys_pg_pack->mapping_cnt, 1);
 
        /* Only if all dma_addrs are aligned to 2MB and their
@@ -670,14 +671,14 @@ static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr,
 
                total_npages += npages;
 
-               if ((npages % PGS_IN_2MB_PAGE) ||
-                                       (dma_addr & (PAGE_SIZE_2MB - 1)))
+               if ((npages % pgs_in_huge_page) ||
+                                       (dma_addr & (huge_page_size - 1)))
                        is_huge_page_opt = false;
        }
 
        if (is_huge_page_opt) {
-               page_size = PAGE_SIZE_2MB;
-               total_npages /= PGS_IN_2MB_PAGE;
+               page_size = huge_page_size;
+               do_div(total_npages, pgs_in_huge_page);
        }
 
        page_mask = ~(((u64) page_size) - 1);
@@ -709,7 +710,7 @@ static int init_phys_pg_pack_from_userptr(u32 asid, struct hl_userptr *userptr,
                        dma_addr += page_size;
 
                        if (is_huge_page_opt)
-                               npages -= PGS_IN_2MB_PAGE;
+                               npages -= pgs_in_huge_page;
                        else
                                npages--;
                }
@@ -872,7 +873,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
                        return rc;
                }
 
-               rc = init_phys_pg_pack_from_userptr(ctx->asid, userptr,
+               rc = init_phys_pg_pack_from_userptr(ctx, userptr,
                                &phys_pg_pack);
                if (rc) {
                        dev_err(hdev->dev,
@@ -1029,7 +1030,7 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr)
        if (*vm_type == VM_TYPE_USERPTR) {
                is_userptr = true;
                userptr = hnode->ptr;
-               rc = init_phys_pg_pack_from_userptr(ctx->asid, userptr,
+               rc = init_phys_pg_pack_from_userptr(ctx, userptr,
                                                        &phys_pg_pack);
                if (rc) {
                        dev_err(hdev->dev,
index 21b4e3281b3e50cfd0f29e746e2d5238fadaa03e..3a7f8ff19eb2393c4b7d82f99f8a38944fbd01c8 100644 (file)
@@ -171,29 +171,44 @@ static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
                        ((virt_addr & mask) >> shift);
 }
 
-static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
+                                       struct hl_mmu_properties *mmu_prop,
+                                       u64 hop_addr, u64 vaddr)
 {
-       return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP0_MASK, HOP0_SHIFT);
+       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask,
+                                       mmu_prop->hop0_shift);
 }
 
-static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
+                                       struct hl_mmu_properties *mmu_prop,
+                                       u64 hop_addr, u64 vaddr)
 {
-       return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP1_MASK, HOP1_SHIFT);
+       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask,
+                                       mmu_prop->hop1_shift);
 }
 
-static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
+                                       struct hl_mmu_properties *mmu_prop,
+                                       u64 hop_addr, u64 vaddr)
 {
-       return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP2_MASK, HOP2_SHIFT);
+       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask,
+                                       mmu_prop->hop2_shift);
 }
 
-static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
+                                       struct hl_mmu_properties *mmu_prop,
+                                       u64 hop_addr, u64 vaddr)
 {
-       return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP3_MASK, HOP3_SHIFT);
+       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask,
+                                       mmu_prop->hop3_shift);
 }
 
-static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
+static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
+                                       struct hl_mmu_properties *mmu_prop,
+                                       u64 hop_addr, u64 vaddr)
 {
-       return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP4_MASK, HOP4_SHIFT);
+       return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask,
+                                       mmu_prop->hop4_shift);
 }
 
 static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
@@ -513,24 +528,23 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
        mutex_destroy(&ctx->mmu_lock);
 }
 
-static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
+static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
 {
        struct hl_device *hdev = ctx->hdev;
        struct asic_fixed_properties *prop = &hdev->asic_prop;
+       struct hl_mmu_properties *mmu_prop;
        u64 hop0_addr = 0, hop0_pte_addr = 0,
                hop1_addr = 0, hop1_pte_addr = 0,
                hop2_addr = 0, hop2_pte_addr = 0,
                hop3_addr = 0, hop3_pte_addr = 0,
                hop4_addr = 0, hop4_pte_addr = 0,
                curr_pte;
-       bool is_dram_addr, is_huge, clear_hop3 = true;
+       bool is_huge, clear_hop3 = true;
 
-       is_dram_addr = hl_mem_area_inside_range(virt_addr, PAGE_SIZE_2MB,
-                               prop->va_space_dram_start_address,
-                               prop->va_space_dram_end_address);
+       mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
 
        hop0_addr = get_hop0_addr(ctx);
-       hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
+       hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
 
        curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
 
@@ -539,7 +553,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
        if (hop1_addr == ULLONG_MAX)
                goto not_mapped;
 
-       hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
+       hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
 
        curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
 
@@ -548,7 +562,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
        if (hop2_addr == ULLONG_MAX)
                goto not_mapped;
 
-       hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
+       hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
 
        curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
 
@@ -557,7 +571,7 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
        if (hop3_addr == ULLONG_MAX)
                goto not_mapped;
 
-       hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
+       hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
 
        curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
 
@@ -575,7 +589,8 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
                if (hop4_addr == ULLONG_MAX)
                        goto not_mapped;
 
-               hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
+               hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
+                                                       virt_addr);
 
                curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
 
@@ -667,25 +682,36 @@ not_mapped:
 int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
 {
        struct hl_device *hdev = ctx->hdev;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       struct hl_mmu_properties *mmu_prop;
        u64 real_virt_addr;
        u32 real_page_size, npages;
        int i, rc;
+       bool is_dram_addr;
 
        if (!hdev->mmu_enable)
                return 0;
 
+       is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+                               prop->va_space_dram_start_address,
+                               prop->va_space_dram_end_address);
+
+       mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
        /*
-        * The H/W handles mapping of 4KB/2MB page. Hence if the host page size
-        * is bigger, we break it to sub-pages and unmap them separately.
+        * The H/W handles mapping of specific page sizes. Hence if the page
+        * size is bigger, we break it to sub-pages and unmap them separately.
         */
-       if ((page_size % PAGE_SIZE_2MB) == 0) {
-               real_page_size = PAGE_SIZE_2MB;
-       } else if ((page_size % PAGE_SIZE_4KB) == 0) {
-               real_page_size = PAGE_SIZE_4KB;
+       if ((page_size % mmu_prop->huge_page_size) == 0) {
+               real_page_size = mmu_prop->huge_page_size;
+       } else if ((page_size % mmu_prop->page_size) == 0) {
+               real_page_size = mmu_prop->page_size;
        } else {
                dev_err(hdev->dev,
-                       "page size of %u is not 4KB nor 2MB aligned, can't unmap\n",
-                               page_size);
+                       "page size of %u is not %uKB nor %uMB aligned, can't unmap\n",
+                       page_size,
+                       mmu_prop->page_size >> 10,
+                       mmu_prop->huge_page_size >> 20);
 
                return -EFAULT;
        }
@@ -694,7 +720,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
        real_virt_addr = virt_addr;
 
        for (i = 0 ; i < npages ; i++) {
-               rc = _hl_mmu_unmap(ctx, real_virt_addr);
+               rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr);
                if (rc)
                        return rc;
 
@@ -705,10 +731,11 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
 }
 
 static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
-               u32 page_size)
+                       u32 page_size, bool is_dram_addr)
 {
        struct hl_device *hdev = ctx->hdev;
        struct asic_fixed_properties *prop = &hdev->asic_prop;
+       struct hl_mmu_properties *mmu_prop;
        u64 hop0_addr = 0, hop0_pte_addr = 0,
                hop1_addr = 0, hop1_pte_addr = 0,
                hop2_addr = 0, hop2_pte_addr = 0,
@@ -716,21 +743,19 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
                hop4_addr = 0, hop4_pte_addr = 0,
                curr_pte = 0;
        bool hop1_new = false, hop2_new = false, hop3_new = false,
-               hop4_new = false, is_huge, is_dram_addr;
+               hop4_new = false, is_huge;
        int rc = -ENOMEM;
 
+       mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
        /*
-        * This mapping function can map a 4KB/2MB page. For 2MB page there are
-        * only 3 hops rather than 4. Currently the DRAM allocation uses 2MB
-        * pages only but user memory could have been allocated with one of the
-        * two page sizes. Since this is a common code for all the three cases,
-        * we need this hugs page check.
+        * This mapping function can map a page or a huge page. For huge page
+        * there are only 3 hops rather than 4. Currently the DRAM allocation
+        * uses huge pages only but user memory could have been allocated with
+        * one of the two page sizes. Since this is a common code for all the
+        * three cases, we need this hugs page check.
         */
-       is_huge = page_size == PAGE_SIZE_2MB;
-
-       is_dram_addr = hl_mem_area_inside_range(virt_addr, page_size,
-                               prop->va_space_dram_start_address,
-                               prop->va_space_dram_end_address);
+       is_huge = page_size == mmu_prop->huge_page_size;
 
        if (is_dram_addr && !is_huge) {
                dev_err(hdev->dev, "DRAM mapping should use huge pages only\n");
@@ -738,28 +763,28 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
        }
 
        hop0_addr = get_hop0_addr(ctx);
-       hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
+       hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
        curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
 
        hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
        if (hop1_addr == ULLONG_MAX)
                goto err;
 
-       hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
+       hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
        curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
 
        hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
        if (hop2_addr == ULLONG_MAX)
                goto err;
 
-       hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
+       hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
        curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
 
        hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
        if (hop3_addr == ULLONG_MAX)
                goto err;
 
-       hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
+       hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
        curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
 
        if (!is_huge) {
@@ -767,7 +792,8 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
                if (hop4_addr == ULLONG_MAX)
                        goto err;
 
-               hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
+               hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
+                                                       virt_addr);
                curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
        }
 
@@ -890,25 +916,36 @@ err:
 int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
 {
        struct hl_device *hdev = ctx->hdev;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       struct hl_mmu_properties *mmu_prop;
        u64 real_virt_addr, real_phys_addr;
        u32 real_page_size, npages;
        int i, rc, mapped_cnt = 0;
+       bool is_dram_addr;
 
        if (!hdev->mmu_enable)
                return 0;
 
+       is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
+                               prop->va_space_dram_start_address,
+                               prop->va_space_dram_end_address);
+
+       mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
+
        /*
-        * The H/W handles mapping of 4KB/2MB page. Hence if the host page size
-        * is bigger, we break it to sub-pages and map them separately.
+        * The H/W handles mapping of specific page sizes. Hence if the page
+        * size is bigger, we break it to sub-pages and map them separately.
         */
-       if ((page_size % PAGE_SIZE_2MB) == 0) {
-               real_page_size = PAGE_SIZE_2MB;
-       } else if ((page_size % PAGE_SIZE_4KB) == 0) {
-               real_page_size = PAGE_SIZE_4KB;
+       if ((page_size % mmu_prop->huge_page_size) == 0) {
+               real_page_size = mmu_prop->huge_page_size;
+       } else if ((page_size % mmu_prop->page_size) == 0) {
+               real_page_size = mmu_prop->page_size;
        } else {
                dev_err(hdev->dev,
-                       "page size of %u is not 4KB nor 2MB aligned, can't map\n",
-                               page_size);
+                       "page size of %u is not %dKB nor %dMB aligned, can't unmap\n",
+                       page_size,
+                       mmu_prop->page_size >> 10,
+                       mmu_prop->huge_page_size >> 20);
 
                return -EFAULT;
        }
@@ -923,7 +960,7 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
 
        for (i = 0 ; i < npages ; i++) {
                rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr,
-                               real_page_size);
+                               real_page_size, is_dram_addr);
                if (rc)
                        goto err;
 
@@ -937,7 +974,7 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
 err:
        real_virt_addr = virt_addr;
        for (i = 0 ; i < mapped_cnt ; i++) {
-               if (_hl_mmu_unmap(ctx, real_virt_addr))
+               if (_hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr))
                        dev_warn_ratelimited(hdev->dev,
                                "failed to unmap va: 0x%llx\n", real_virt_addr);