/* MMU */
#define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
+/**
+ * enum hl_mmu_page_table_locaion - mmu page table location
+ * @MMU_DR_PGT: page-table is located on device DRAM.
+ * @MMU_HR_PGT: page-table is located on host memory.
+ * @MMU_NUM_PGT_LOCATIONS: number of page-table locations currently supported.
+ */
+enum hl_mmu_page_table_location {
+ MMU_DR_PGT = 0, /* device-dram-resident MMU PGT */
+ MMU_HR_PGT, /* host resident MMU PGT */
+ MMU_NUM_PGT_LOCATIONS /* num of PGT locations */
+};
+
/*
* HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream
* HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream
* @hop5_mask: mask to get the PTE address in hop 5.
* @page_size: default page size used to allocate memory.
* @num_hops: The amount of hops supported by the translation table.
+ * @host_resident: Should the MMU page table reside in host memory or in the
+ * device DRAM.
*/
struct hl_mmu_properties {
u64 start_addr;
u64 hop5_mask;
u32 page_size;
u32 num_hops;
+ u8 host_resident;
};
/**
ktime_t busy_to_idle_ts;
};
+/**
+ * struct hr_mmu_hop_addrs - used for holding per-device host-resident mmu hop
+ * information.
+ * @virt_addr: the virtual address of the hop.
+ * @phys-addr: the physical address of the hop (used by the device-mmu).
+ * @shadow_addr: The shadow of the hop used by the driver for walking the hops.
+ */
+struct hr_mmu_hop_addrs {
+ u64 virt_addr;
+ u64 phys_addr;
+ u64 shadow_addr;
+};
/**
- * struct hl_mmu_priv - used for holding per-device mmu internal information.
+ * struct hl_mmu_hr_pgt_priv - used for holding per-device mmu host-resident
+ * page-table internal information.
* @mmu_pgt_pool: pool of page tables used by MMU for allocating hops.
* @mmu_shadow_hop0: shadow array of hop0 tables.
*/
-struct hl_mmu_priv {
+struct hl_mmu_hr_priv {
+ struct gen_pool *mmu_pgt_pool;
+ struct hr_mmu_hop_addrs *mmu_shadow_hop0;
+};
+
+/**
+ * struct hl_mmu_dr_pgt_priv - used for holding per-device mmu device-resident
+ * page-table internal information.
+ * @mmu_pgt_pool: pool of page tables used by MMU for allocating hops.
+ * @mmu_shadow_hop0: shadow array of hop0 tables.
+ */
+struct hl_mmu_dr_priv {
struct gen_pool *mmu_pgt_pool;
void *mmu_shadow_hop0;
};
+/**
+ * struct hl_mmu_priv - used for holding per-device mmu internal information.
+ * @dr: information on the device-resident MMU, when exists.
+ * @hr: information on the host-resident MMU, when exists.
+ */
+struct hl_mmu_priv {
+ struct hl_mmu_dr_priv dr;
+ struct hl_mmu_hr_priv hr;
+};
+
/**
* struct hl_mmu_funcs - Device related MMU functions.
* @init: initialize the MMU module.
struct hl_cs_counters_atomic aggregated_cs_counters;
struct hl_mmu_priv mmu_priv;
- struct hl_mmu_funcs mmu_func;
+ struct hl_mmu_funcs mmu_func[MMU_NUM_PGT_LOCATIONS];
atomic64_t dram_used_mem;
u64 timeout_jiffies;
void hl_mmu_swap_out(struct hl_ctx *ctx);
void hl_mmu_swap_in(struct hl_ctx *ctx);
int hl_mmu_if_set_funcs(struct hl_device *hdev);
-void hl_mmu_v1_set_funcs(struct hl_device *hdev);
+void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
void __iomem *dst, u32 src_offset, u32 size);
* hl_mmu_init() - initialize the MMU module.
* @hdev: habanalabs device structure.
*
- * This function does the following:
- * - Create a pool of pages for pgt_infos.
- * - Create a shadow table for pgt
- *
* Return: 0 for success, non-zero for failure.
*/
int hl_mmu_init(struct hl_device *hdev)
{
- if (hdev->mmu_enable)
- return hdev->mmu_func.init(hdev);
+ int rc = -EOPNOTSUPP;
- return 0;
+ if (!hdev->mmu_enable)
+ return 0;
+
+ if (hdev->mmu_func[MMU_DR_PGT].init != NULL) {
+ rc = hdev->mmu_func[MMU_DR_PGT].init(hdev);
+ if (rc)
+ return rc;
+ }
+
+ if (hdev->mmu_func[MMU_HR_PGT].init != NULL)
+ rc = hdev->mmu_func[MMU_HR_PGT].init(hdev);
+
+ return rc;
}
/**
*/
void hl_mmu_fini(struct hl_device *hdev)
{
- if (hdev->mmu_enable)
- hdev->mmu_func.fini(hdev);
+ if (!hdev->mmu_enable)
+ return;
+
+ if (hdev->mmu_func[MMU_DR_PGT].fini != NULL)
+ hdev->mmu_func[MMU_DR_PGT].fini(hdev);
+
+ if (hdev->mmu_func[MMU_HR_PGT].fini != NULL)
+ hdev->mmu_func[MMU_HR_PGT].fini(hdev);
}
/**
int hl_mmu_ctx_init(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
+ int rc = -EOPNOTSUPP;
- if (hdev->mmu_enable)
- return hdev->mmu_func.ctx_init(ctx);
+ if (!hdev->mmu_enable)
+ return 0;
- return 0;
+ if (hdev->mmu_func[MMU_DR_PGT].ctx_init != NULL) {
+ rc = hdev->mmu_func[MMU_DR_PGT].ctx_init(ctx);
+ if (rc)
+ return rc;
+ }
+
+ if (hdev->mmu_func[MMU_HR_PGT].ctx_init != NULL)
+ rc = hdev->mmu_func[MMU_HR_PGT].ctx_init(ctx);
+
+ return rc;
}
/*
{
struct hl_device *hdev = ctx->hdev;
- if (hdev->mmu_enable)
- hdev->mmu_func.ctx_fini(ctx);
+ if (!hdev->mmu_enable)
+ return;
+
+ if (hdev->mmu_func[MMU_DR_PGT].ctx_fini != NULL)
+ hdev->mmu_func[MMU_DR_PGT].ctx_fini(ctx);
+
+ if (hdev->mmu_func[MMU_HR_PGT].ctx_fini != NULL)
+ hdev->mmu_func[MMU_HR_PGT].ctx_fini(ctx);
}
/*
struct hl_mmu_properties *mmu_prop;
u64 real_virt_addr;
u32 real_page_size, npages;
- int i, rc = 0;
+ int i, rc = 0, pgt_residency;
bool is_dram_addr;
if (!hdev->mmu_enable)
else
mmu_prop = &prop->pmmu;
+ pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
+
/*
* The H/W handles mapping of specific page sizes. Hence if the page
* size is bigger, we break it to sub-pages and unmap them separately.
real_virt_addr = virt_addr;
for (i = 0 ; i < npages ; i++) {
- rc = hdev->mmu_func.unmap(ctx, real_virt_addr, is_dram_addr);
+ rc = hdev->mmu_func[pgt_residency].unmap(ctx,
+ real_virt_addr, is_dram_addr);
if (rc)
break;
}
if (flush_pte)
- hdev->mmu_func.flush(ctx);
+ hdev->mmu_func[pgt_residency].flush(ctx);
return rc;
}
struct hl_mmu_properties *mmu_prop;
u64 real_virt_addr, real_phys_addr;
u32 real_page_size, npages;
- int i, rc, mapped_cnt = 0;
+ int i, rc, pgt_residency, mapped_cnt = 0;
bool is_dram_addr;
+
if (!hdev->mmu_enable)
return 0;
else
mmu_prop = &prop->pmmu;
+ pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
+
/*
* The H/W handles mapping of specific page sizes. Hence if the page
* size is bigger, we break it to sub-pages and map them separately.
real_phys_addr = phys_addr;
for (i = 0 ; i < npages ; i++) {
- rc = hdev->mmu_func.map(ctx, real_virt_addr, real_phys_addr,
- real_page_size, is_dram_addr);
+ rc = hdev->mmu_func[pgt_residency].map(ctx,
+ real_virt_addr, real_phys_addr,
+ real_page_size, is_dram_addr);
if (rc)
goto err;
}
if (flush_pte)
- hdev->mmu_func.flush(ctx);
+ hdev->mmu_func[pgt_residency].flush(ctx);
return 0;
err:
real_virt_addr = virt_addr;
for (i = 0 ; i < mapped_cnt ; i++) {
- if (hdev->mmu_func.unmap(ctx, real_virt_addr, is_dram_addr))
+ if (hdev->mmu_func[pgt_residency].unmap(ctx,
+ real_virt_addr, is_dram_addr))
dev_warn_ratelimited(hdev->dev,
"failed to unmap va: 0x%llx\n", real_virt_addr);
real_virt_addr += real_page_size;
}
- hdev->mmu_func.flush(ctx);
+ hdev->mmu_func[pgt_residency].flush(ctx);
return rc;
}
{
struct hl_device *hdev = ctx->hdev;
- if (hdev->mmu_enable)
- hdev->mmu_func.swap_out(ctx);
+ if (!hdev->mmu_enable)
+ return;
+
+ if (hdev->mmu_func[MMU_DR_PGT].swap_out != NULL)
+ hdev->mmu_func[MMU_DR_PGT].swap_out(ctx);
+
+ if (hdev->mmu_func[MMU_HR_PGT].swap_out != NULL)
+ hdev->mmu_func[MMU_HR_PGT].swap_out(ctx);
}
/*
{
struct hl_device *hdev = ctx->hdev;
- if (hdev->mmu_enable)
- hdev->mmu_func.swap_in(ctx);
+ if (!hdev->mmu_enable)
+ return;
+
+ if (hdev->mmu_func[MMU_DR_PGT].swap_in != NULL)
+ hdev->mmu_func[MMU_DR_PGT].swap_in(ctx);
+
+ if (hdev->mmu_func[MMU_HR_PGT].swap_in != NULL)
+ hdev->mmu_func[MMU_HR_PGT].swap_in(ctx);
}
int hl_mmu_if_set_funcs(struct hl_device *hdev)
switch (hdev->asic_type) {
case ASIC_GOYA:
case ASIC_GAUDI:
- hl_mmu_v1_set_funcs(hdev);
+ hl_mmu_v1_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]);
break;
default:
dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
{
struct hl_device *hdev = ctx->hdev;
- gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, pgt_info->phys_addr,
+ gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr,
hdev->asic_prop.mmu_hop_table_size);
hash_del(&pgt_info->node);
kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
if (!pgt_info)
return ULLONG_MAX;
- phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.mmu_pgt_pool,
+ phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool,
prop->mmu_hop_table_size);
if (!phys_addr) {
dev_err(hdev->dev, "failed to allocate page\n");
return shadow_addr;
shadow_err:
- gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, phys_addr,
+ gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, phys_addr,
prop->mmu_hop_table_size);
pool_add_err:
kfree(pgt_info);
static inline u64 get_hop0_addr(struct hl_ctx *ctx)
{
- return (u64) (uintptr_t) ctx->hdev->mmu_priv.mmu_shadow_hop0 +
+ return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 +
(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
}
struct asic_fixed_properties *prop = &hdev->asic_prop;
int rc;
- hdev->mmu_priv.mmu_pgt_pool =
+ hdev->mmu_priv.dr.mmu_pgt_pool =
gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
- if (!hdev->mmu_priv.mmu_pgt_pool) {
+ if (!hdev->mmu_priv.dr.mmu_pgt_pool) {
dev_err(hdev->dev, "Failed to create page gen pool\n");
return -ENOMEM;
}
- rc = gen_pool_add(hdev->mmu_priv.mmu_pgt_pool, prop->mmu_pgt_addr +
+ rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr +
prop->mmu_hop0_tables_total_size,
prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
-1);
goto err_pool_add;
}
- hdev->mmu_priv.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
+ hdev->mmu_priv.dr.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
prop->mmu_hop_table_size,
GFP_KERNEL | __GFP_ZERO);
- if (ZERO_OR_NULL_PTR(hdev->mmu_priv.mmu_shadow_hop0)) {
+ if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
rc = -ENOMEM;
goto err_pool_add;
}
return 0;
err_pool_add:
- gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool);
+ gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
return rc;
}
{
/* MMU H/W fini was already done in device hw_fini() */
- kvfree(hdev->mmu_priv.mmu_shadow_hop0);
- gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool);
+ kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
+ gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
}
/**
*
* @hdev: pointer to the device structure
*/
-void hl_mmu_v1_set_funcs(struct hl_device *hdev)
+void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu)
{
- struct hl_mmu_funcs *mmu = &hdev->mmu_func;
-
mmu->init = hl_mmu_v1_init;
mmu->fini = hl_mmu_v1_fini;
mmu->ctx_init = hl_mmu_v1_ctx_init;