]>
Commit | Line | Data |
---|---|---|
0feaf86d OS |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | ||
3 | /* | |
4 | * Copyright 2016-2019 HabanaLabs, Ltd. | |
5 | * All Rights Reserved. | |
6 | */ | |
7 | ||
8 | #include "habanalabs.h" | |
7b16a155 | 9 | #include "../include/hw_ip/mmu/mmu_general.h" |
0feaf86d | 10 | |
0feaf86d OS |
11 | #include <linux/slab.h> |
12 | ||
66542c3b OS |
13 | static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr); |
14 | ||
15 | static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr) | |
0feaf86d OS |
16 | { |
17 | struct pgt_info *pgt_info = NULL; | |
18 | ||
66542c3b OS |
19 | hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node, |
20 | (unsigned long) hop_addr) | |
21 | if (hop_addr == pgt_info->shadow_addr) | |
0feaf86d OS |
22 | break; |
23 | ||
24 | return pgt_info; | |
25 | } | |
26 | ||
1b98d8b2 | 27 | static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info) |
0feaf86d | 28 | { |
66542c3b | 29 | struct hl_device *hdev = ctx->hdev; |
0feaf86d | 30 | |
ccf979ee | 31 | gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr, |
66542c3b | 32 | hdev->asic_prop.mmu_hop_table_size); |
0feaf86d | 33 | hash_del(&pgt_info->node); |
66542c3b | 34 | kfree((u64 *) (uintptr_t) pgt_info->shadow_addr); |
0feaf86d OS |
35 | kfree(pgt_info); |
36 | } | |
37 | ||
1b98d8b2 OS |
38 | static void free_hop(struct hl_ctx *ctx, u64 hop_addr) |
39 | { | |
40 | struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); | |
41 | ||
42 | _free_hop(ctx, pgt_info); | |
43 | } | |
44 | ||
0feaf86d OS |
45 | static u64 alloc_hop(struct hl_ctx *ctx) |
46 | { | |
47 | struct hl_device *hdev = ctx->hdev; | |
66542c3b | 48 | struct asic_fixed_properties *prop = &hdev->asic_prop; |
0feaf86d | 49 | struct pgt_info *pgt_info; |
66542c3b | 50 | u64 phys_addr, shadow_addr; |
0feaf86d OS |
51 | |
52 | pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL); | |
53 | if (!pgt_info) | |
54 | return ULLONG_MAX; | |
55 | ||
ccf979ee | 56 | phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool, |
66542c3b OS |
57 | prop->mmu_hop_table_size); |
58 | if (!phys_addr) { | |
0feaf86d | 59 | dev_err(hdev->dev, "failed to allocate page\n"); |
66542c3b | 60 | goto pool_add_err; |
0feaf86d OS |
61 | } |
62 | ||
66542c3b OS |
63 | shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size, |
64 | GFP_KERNEL); | |
65 | if (!shadow_addr) | |
66 | goto shadow_err; | |
67 | ||
68 | pgt_info->phys_addr = phys_addr; | |
69 | pgt_info->shadow_addr = shadow_addr; | |
0feaf86d OS |
70 | pgt_info->ctx = ctx; |
71 | pgt_info->num_of_ptes = 0; | |
66542c3b OS |
72 | hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr); |
73 | ||
74 | return shadow_addr; | |
75 | ||
76 | shadow_err: | |
ccf979ee | 77 | gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, phys_addr, |
d83fe669 | 78 | prop->mmu_hop_table_size); |
66542c3b OS |
79 | pool_add_err: |
80 | kfree(pgt_info); | |
81 | ||
82 | return ULLONG_MAX; | |
83 | } | |
84 | ||
85 | static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx) | |
86 | { | |
87 | return ctx->hdev->asic_prop.mmu_pgt_addr + | |
88 | (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); | |
89 | } | |
90 | ||
91 | static inline u64 get_hop0_addr(struct hl_ctx *ctx) | |
92 | { | |
ccf979ee | 93 | return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 + |
66542c3b OS |
94 | (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); |
95 | } | |
96 | ||
d83fe669 | 97 | static void flush(struct hl_ctx *ctx) |
66542c3b OS |
98 | { |
99 | /* flush all writes from all cores to reach PCI */ | |
100 | mb(); | |
101 | ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx)); | |
102 | } | |
103 | ||
104 | /* transform the value to physical address when writing to H/W */ | |
105 | static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val) | |
106 | { | |
107 | /* | |
108 | * The value to write is actually the address of the next shadow hop + | |
109 | * flags at the 12 LSBs. | |
110 | * Hence in order to get the value to write to the physical PTE, we | |
111 | * clear the 12 LSBs and translate the shadow hop to its associated | |
112 | * physical hop, and add back the original 12 LSBs. | |
113 | */ | |
30919ede OS |
114 | u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) | |
115 | (val & FLAGS_MASK); | |
66542c3b OS |
116 | |
117 | ctx->hdev->asic_funcs->write_pte(ctx->hdev, | |
118 | get_phys_addr(ctx, shadow_pte_addr), | |
119 | phys_val); | |
120 | ||
121 | *(u64 *) (uintptr_t) shadow_pte_addr = val; | |
122 | } | |
0feaf86d | 123 | |
66542c3b OS |
124 | /* do not transform the value to physical address when writing to H/W */ |
125 | static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, | |
126 | u64 val) | |
127 | { | |
128 | ctx->hdev->asic_funcs->write_pte(ctx->hdev, | |
129 | get_phys_addr(ctx, shadow_pte_addr), | |
130 | val); | |
131 | *(u64 *) (uintptr_t) shadow_pte_addr = val; | |
0feaf86d OS |
132 | } |
133 | ||
66542c3b OS |
134 | /* clear the last and present bits */ |
135 | static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr) | |
0feaf86d | 136 | { |
66542c3b OS |
137 | /* no need to transform the value to physical address */ |
138 | write_final_pte(ctx, pte_addr, 0); | |
0feaf86d OS |
139 | } |
140 | ||
141 | static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr) | |
142 | { | |
143 | get_pgt_info(ctx, hop_addr)->num_of_ptes++; | |
144 | } | |
145 | ||
146 | /* | |
147 | * put_pte - decrement the num of ptes and free the hop if possible | |
148 | * | |
149 | * @ctx: pointer to the context structure | |
150 | * @hop_addr: addr of the hop | |
151 | * | |
152 | * This function returns the number of ptes left on this hop. If the number is | |
153 | * 0, it means the pte was freed. | |
154 | */ | |
155 | static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr) | |
156 | { | |
157 | struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr); | |
158 | int num_of_ptes_left; | |
159 | ||
160 | pgt_info->num_of_ptes--; | |
161 | ||
162 | /* | |
163 | * Need to save the number of ptes left because free_hop might free | |
164 | * the pgt_info | |
165 | */ | |
166 | num_of_ptes_left = pgt_info->num_of_ptes; | |
167 | if (!num_of_ptes_left) | |
1b98d8b2 | 168 | _free_hop(ctx, pgt_info); |
0feaf86d OS |
169 | |
170 | return num_of_ptes_left; | |
171 | } | |
172 | ||
0feaf86d OS |
173 | static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, |
174 | u64 virt_addr, u64 mask, u64 shift) | |
175 | { | |
176 | return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * | |
177 | ((virt_addr & mask) >> shift); | |
178 | } | |
179 | ||
54bb6744 OS |
180 | static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, |
181 | struct hl_mmu_properties *mmu_prop, | |
182 | u64 hop_addr, u64 vaddr) | |
0feaf86d | 183 | { |
54bb6744 OS |
184 | return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask, |
185 | mmu_prop->hop0_shift); | |
0feaf86d OS |
186 | } |
187 | ||
54bb6744 OS |
188 | static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, |
189 | struct hl_mmu_properties *mmu_prop, | |
190 | u64 hop_addr, u64 vaddr) | |
0feaf86d | 191 | { |
54bb6744 OS |
192 | return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask, |
193 | mmu_prop->hop1_shift); | |
0feaf86d OS |
194 | } |
195 | ||
54bb6744 OS |
196 | static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, |
197 | struct hl_mmu_properties *mmu_prop, | |
198 | u64 hop_addr, u64 vaddr) | |
0feaf86d | 199 | { |
54bb6744 OS |
200 | return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask, |
201 | mmu_prop->hop2_shift); | |
0feaf86d OS |
202 | } |
203 | ||
54bb6744 OS |
204 | static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, |
205 | struct hl_mmu_properties *mmu_prop, | |
206 | u64 hop_addr, u64 vaddr) | |
0feaf86d | 207 | { |
54bb6744 OS |
208 | return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask, |
209 | mmu_prop->hop3_shift); | |
0feaf86d OS |
210 | } |
211 | ||
54bb6744 OS |
212 | static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, |
213 | struct hl_mmu_properties *mmu_prop, | |
214 | u64 hop_addr, u64 vaddr) | |
0feaf86d | 215 | { |
54bb6744 OS |
216 | return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask, |
217 | mmu_prop->hop4_shift); | |
0feaf86d OS |
218 | } |
219 | ||
66542c3b | 220 | static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte) |
0feaf86d OS |
221 | { |
222 | if (curr_pte & PAGE_PRESENT_MASK) | |
30919ede | 223 | return curr_pte & HOP_PHYS_ADDR_MASK; |
0feaf86d OS |
224 | else |
225 | return ULLONG_MAX; | |
226 | } | |
227 | ||
228 | static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte, | |
229 | bool *is_new_hop) | |
230 | { | |
66542c3b | 231 | u64 hop_addr = get_next_hop_addr(ctx, curr_pte); |
0feaf86d OS |
232 | |
233 | if (hop_addr == ULLONG_MAX) { | |
234 | hop_addr = alloc_hop(ctx); | |
27ca384c | 235 | *is_new_hop = (hop_addr != ULLONG_MAX); |
0feaf86d OS |
236 | } |
237 | ||
238 | return hop_addr; | |
239 | } | |
240 | ||
66542c3b OS |
241 | /* translates shadow address inside hop to a physical address */ |
242 | static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr) | |
0feaf86d | 243 | { |
66542c3b OS |
244 | u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1); |
245 | u64 shadow_hop_addr = shadow_addr & ~page_mask; | |
246 | u64 pte_offset = shadow_addr & page_mask; | |
247 | u64 phys_hop_addr; | |
0feaf86d | 248 | |
66542c3b OS |
249 | if (shadow_hop_addr != get_hop0_addr(ctx)) |
250 | phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr; | |
251 | else | |
252 | phys_hop_addr = get_phys_hop0_addr(ctx); | |
0feaf86d | 253 | |
66542c3b | 254 | return phys_hop_addr + pte_offset; |
0feaf86d OS |
255 | } |
256 | ||
66542c3b | 257 | static int dram_default_mapping_init(struct hl_ctx *ctx) |
0feaf86d | 258 | { |
27ca384c OS |
259 | struct hl_device *hdev = ctx->hdev; |
260 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
66542c3b OS |
261 | u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr, |
262 | hop2_pte_addr, hop3_pte_addr, pte_val; | |
27ca384c OS |
263 | int rc, i, j, hop3_allocated = 0; |
264 | ||
7f070c91 | 265 | if ((!prop->dram_supports_virtual_memory) || |
7aa2227a OG |
266 | (!hdev->dram_default_page_mapping) || |
267 | (ctx->asid == HL_KERNEL_ASID_ID)) | |
27ca384c OS |
268 | return 0; |
269 | ||
b3bf147b OG |
270 | num_of_hop3 = prop->dram_size_for_default_page_mapping; |
271 | do_div(num_of_hop3, prop->dram_page_size); | |
272 | do_div(num_of_hop3, PTE_ENTRIES_IN_HOP); | |
27ca384c OS |
273 | |
274 | /* add hop1 and hop2 */ | |
275 | total_hops = num_of_hop3 + 2; | |
276 | ||
277 | ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops, GFP_KERNEL); | |
66542c3b OS |
278 | if (!ctx->dram_default_hops) |
279 | return -ENOMEM; | |
280 | ||
281 | hop0_addr = get_hop0_addr(ctx); | |
27ca384c OS |
282 | |
283 | hop1_addr = alloc_hop(ctx); | |
284 | if (hop1_addr == ULLONG_MAX) { | |
285 | dev_err(hdev->dev, "failed to alloc hop 1\n"); | |
286 | rc = -ENOMEM; | |
287 | goto hop1_err; | |
288 | } | |
289 | ||
290 | ctx->dram_default_hops[total_hops - 1] = hop1_addr; | |
291 | ||
292 | hop2_addr = alloc_hop(ctx); | |
293 | if (hop2_addr == ULLONG_MAX) { | |
294 | dev_err(hdev->dev, "failed to alloc hop 2\n"); | |
295 | rc = -ENOMEM; | |
296 | goto hop2_err; | |
297 | } | |
298 | ||
299 | ctx->dram_default_hops[total_hops - 2] = hop2_addr; | |
300 | ||
301 | for (i = 0 ; i < num_of_hop3 ; i++) { | |
302 | ctx->dram_default_hops[i] = alloc_hop(ctx); | |
303 | if (ctx->dram_default_hops[i] == ULLONG_MAX) { | |
304 | dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i); | |
305 | rc = -ENOMEM; | |
306 | goto hop3_err; | |
307 | } | |
308 | hop3_allocated++; | |
309 | } | |
310 | ||
311 | /* need only pte 0 in hops 0 and 1 */ | |
30919ede | 312 | pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; |
66542c3b | 313 | write_pte(ctx, hop0_addr, pte_val); |
27ca384c | 314 | |
30919ede | 315 | pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; |
66542c3b | 316 | write_pte(ctx, hop1_addr, pte_val); |
27ca384c OS |
317 | get_pte(ctx, hop1_addr); |
318 | ||
319 | hop2_pte_addr = hop2_addr; | |
320 | for (i = 0 ; i < num_of_hop3 ; i++) { | |
30919ede | 321 | pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) | |
27ca384c | 322 | PAGE_PRESENT_MASK; |
66542c3b | 323 | write_pte(ctx, hop2_pte_addr, pte_val); |
27ca384c OS |
324 | get_pte(ctx, hop2_addr); |
325 | hop2_pte_addr += HL_PTE_SIZE; | |
326 | } | |
327 | ||
30919ede | 328 | pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) | |
27ca384c OS |
329 | LAST_MASK | PAGE_PRESENT_MASK; |
330 | ||
331 | for (i = 0 ; i < num_of_hop3 ; i++) { | |
332 | hop3_pte_addr = ctx->dram_default_hops[i]; | |
333 | for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) { | |
66542c3b | 334 | write_final_pte(ctx, hop3_pte_addr, pte_val); |
27ca384c OS |
335 | get_pte(ctx, ctx->dram_default_hops[i]); |
336 | hop3_pte_addr += HL_PTE_SIZE; | |
337 | } | |
338 | } | |
339 | ||
66542c3b | 340 | flush(ctx); |
27ca384c OS |
341 | |
342 | return 0; | |
343 | ||
344 | hop3_err: | |
345 | for (i = 0 ; i < hop3_allocated ; i++) | |
346 | free_hop(ctx, ctx->dram_default_hops[i]); | |
66542c3b | 347 | |
27ca384c OS |
348 | free_hop(ctx, hop2_addr); |
349 | hop2_err: | |
350 | free_hop(ctx, hop1_addr); | |
351 | hop1_err: | |
352 | kfree(ctx->dram_default_hops); | |
27ca384c OS |
353 | |
354 | return rc; | |
0feaf86d OS |
355 | } |
356 | ||
66542c3b OS |
357 | static void dram_default_mapping_fini(struct hl_ctx *ctx) |
358 | { | |
359 | struct hl_device *hdev = ctx->hdev; | |
360 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
361 | u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr, | |
362 | hop2_pte_addr, hop3_pte_addr; | |
363 | int i, j; | |
364 | ||
7f070c91 | 365 | if ((!prop->dram_supports_virtual_memory) || |
7aa2227a OG |
366 | (!hdev->dram_default_page_mapping) || |
367 | (ctx->asid == HL_KERNEL_ASID_ID)) | |
66542c3b OS |
368 | return; |
369 | ||
370 | num_of_hop3 = prop->dram_size_for_default_page_mapping; | |
371 | do_div(num_of_hop3, prop->dram_page_size); | |
372 | do_div(num_of_hop3, PTE_ENTRIES_IN_HOP); | |
373 | ||
374 | hop0_addr = get_hop0_addr(ctx); | |
375 | /* add hop1 and hop2 */ | |
376 | total_hops = num_of_hop3 + 2; | |
377 | hop1_addr = ctx->dram_default_hops[total_hops - 1]; | |
378 | hop2_addr = ctx->dram_default_hops[total_hops - 2]; | |
379 | ||
380 | for (i = 0 ; i < num_of_hop3 ; i++) { | |
381 | hop3_pte_addr = ctx->dram_default_hops[i]; | |
382 | for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) { | |
383 | clear_pte(ctx, hop3_pte_addr); | |
384 | put_pte(ctx, ctx->dram_default_hops[i]); | |
385 | hop3_pte_addr += HL_PTE_SIZE; | |
386 | } | |
387 | } | |
388 | ||
389 | hop2_pte_addr = hop2_addr; | |
390 | hop2_pte_addr = hop2_addr; | |
391 | for (i = 0 ; i < num_of_hop3 ; i++) { | |
392 | clear_pte(ctx, hop2_pte_addr); | |
393 | put_pte(ctx, hop2_addr); | |
394 | hop2_pte_addr += HL_PTE_SIZE; | |
395 | } | |
396 | ||
397 | clear_pte(ctx, hop1_addr); | |
398 | put_pte(ctx, hop1_addr); | |
399 | clear_pte(ctx, hop0_addr); | |
400 | ||
401 | kfree(ctx->dram_default_hops); | |
402 | ||
403 | flush(ctx); | |
404 | } | |
405 | ||
406 | /** | |
d83fe669 | 407 | * hl_mmu_v1_init() - initialize the MMU module. |
66542c3b OS |
408 | * @hdev: habanalabs device structure. |
409 | * | |
410 | * This function does the following: | |
66542c3b | 411 | * - Create a pool of pages for pgt_infos. |
37d68ce5 | 412 | * - Create a shadow table for pgt |
66542c3b OS |
413 | * |
414 | * Return: 0 for success, non-zero for failure. | |
415 | */ | |
d83fe669 | 416 | static int hl_mmu_v1_init(struct hl_device *hdev) |
66542c3b OS |
417 | { |
418 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
419 | int rc; | |
420 | ||
ccf979ee | 421 | hdev->mmu_priv.dr.mmu_pgt_pool = |
66542c3b OS |
422 | gen_pool_create(__ffs(prop->mmu_hop_table_size), -1); |
423 | ||
ccf979ee | 424 | if (!hdev->mmu_priv.dr.mmu_pgt_pool) { |
66542c3b | 425 | dev_err(hdev->dev, "Failed to create page gen pool\n"); |
8d45f1de | 426 | return -ENOMEM; |
66542c3b OS |
427 | } |
428 | ||
ccf979ee | 429 | rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr + |
66542c3b OS |
430 | prop->mmu_hop0_tables_total_size, |
431 | prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size, | |
432 | -1); | |
433 | if (rc) { | |
434 | dev_err(hdev->dev, "Failed to add memory to page gen pool\n"); | |
435 | goto err_pool_add; | |
436 | } | |
437 | ||
ccf979ee | 438 | hdev->mmu_priv.dr.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid, |
d83fe669 MH |
439 | prop->mmu_hop_table_size, |
440 | GFP_KERNEL | __GFP_ZERO); | |
ccf979ee | 441 | if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) { |
66542c3b OS |
442 | rc = -ENOMEM; |
443 | goto err_pool_add; | |
444 | } | |
445 | ||
30919ede OS |
446 | /* MMU H/W init will be done in device hw_init() */ |
447 | ||
66542c3b OS |
448 | return 0; |
449 | ||
450 | err_pool_add: | |
ccf979ee | 451 | gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); |
66542c3b OS |
452 | |
453 | return rc; | |
454 | } | |
455 | ||
456 | /** | |
457 | * hl_mmu_fini() - release the MMU module. | |
458 | * @hdev: habanalabs device structure. | |
459 | * | |
460 | * This function does the following: | |
461 | * - Disable MMU in H/W. | |
462 | * - Free the pgt_infos pool. | |
463 | * | |
464 | * All contexts should be freed before calling this function. | |
465 | */ | |
d83fe669 | 466 | static void hl_mmu_v1_fini(struct hl_device *hdev) |
66542c3b | 467 | { |
30919ede OS |
468 | /* MMU H/W fini was already done in device hw_fini() */ |
469 | ||
aa6df653 OG |
470 | if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) { |
471 | kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0); | |
472 | gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); | |
473 | } | |
474 | ||
475 | /* Make sure that if we arrive here again without init was called we | |
476 | * won't cause kernel panic. This can happen for example if we fail | |
477 | * during hard reset code at certain points | |
478 | */ | |
479 | hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL; | |
66542c3b OS |
480 | } |
481 | ||
482 | /** | |
483 | * hl_mmu_ctx_init() - initialize a context for using the MMU module. | |
484 | * @ctx: pointer to the context structure to initialize. | |
485 | * | |
486 | * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all | |
487 | * page tables hops related to this context. | |
488 | * Return: 0 on success, non-zero otherwise. | |
489 | */ | |
d83fe669 | 490 | static int hl_mmu_v1_ctx_init(struct hl_ctx *ctx) |
66542c3b | 491 | { |
66542c3b | 492 | hash_init(ctx->mmu_shadow_hash); |
66542c3b OS |
493 | return dram_default_mapping_init(ctx); |
494 | } | |
495 | ||
0feaf86d OS |
496 | /* |
497 | * hl_mmu_ctx_fini - disable a ctx from using the mmu module | |
498 | * | |
499 | * @ctx: pointer to the context structure | |
500 | * | |
501 | * This function does the following: | |
502 | * - Free any pgts which were not freed yet | |
503 | * - Free the mutex | |
27ca384c | 504 | * - Free DRAM default page mapping hops |
0feaf86d | 505 | */ |
d83fe669 | 506 | static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx) |
0feaf86d | 507 | { |
27ca384c | 508 | struct hl_device *hdev = ctx->hdev; |
0feaf86d OS |
509 | struct pgt_info *pgt_info; |
510 | struct hlist_node *tmp; | |
66542c3b | 511 | int i; |
0feaf86d | 512 | |
66542c3b | 513 | dram_default_mapping_fini(ctx); |
27ca384c | 514 | |
66542c3b | 515 | if (!hash_empty(ctx->mmu_shadow_hash)) |
1b98d8b2 OS |
516 | dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n", |
517 | ctx->asid); | |
0feaf86d | 518 | |
66542c3b | 519 | hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) { |
1b98d8b2 | 520 | dev_err_ratelimited(hdev->dev, |
0feaf86d | 521 | "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n", |
66542c3b | 522 | pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes); |
1b98d8b2 | 523 | _free_hop(ctx, pgt_info); |
0feaf86d | 524 | } |
0feaf86d OS |
525 | } |
526 | ||
d83fe669 MH |
527 | static int _hl_mmu_v1_unmap(struct hl_ctx *ctx, |
528 | u64 virt_addr, bool is_dram_addr) | |
0feaf86d OS |
529 | { |
530 | struct hl_device *hdev = ctx->hdev; | |
27ca384c | 531 | struct asic_fixed_properties *prop = &hdev->asic_prop; |
54bb6744 | 532 | struct hl_mmu_properties *mmu_prop; |
0feaf86d OS |
533 | u64 hop0_addr = 0, hop0_pte_addr = 0, |
534 | hop1_addr = 0, hop1_pte_addr = 0, | |
535 | hop2_addr = 0, hop2_pte_addr = 0, | |
536 | hop3_addr = 0, hop3_pte_addr = 0, | |
537 | hop4_addr = 0, hop4_pte_addr = 0, | |
538 | curr_pte; | |
54bb6744 | 539 | bool is_huge, clear_hop3 = true; |
27ca384c | 540 | |
64a7e295 | 541 | /* shifts and masks are the same in PMMU and HPMMU, use one of them */ |
54bb6744 | 542 | mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; |
0feaf86d OS |
543 | |
544 | hop0_addr = get_hop0_addr(ctx); | |
54bb6744 | 545 | hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); |
0feaf86d | 546 | |
66542c3b | 547 | curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; |
0feaf86d | 548 | |
66542c3b | 549 | hop1_addr = get_next_hop_addr(ctx, curr_pte); |
0feaf86d OS |
550 | |
551 | if (hop1_addr == ULLONG_MAX) | |
552 | goto not_mapped; | |
553 | ||
54bb6744 | 554 | hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); |
0feaf86d | 555 | |
66542c3b | 556 | curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; |
0feaf86d | 557 | |
66542c3b | 558 | hop2_addr = get_next_hop_addr(ctx, curr_pte); |
0feaf86d OS |
559 | |
560 | if (hop2_addr == ULLONG_MAX) | |
561 | goto not_mapped; | |
562 | ||
54bb6744 | 563 | hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); |
0feaf86d | 564 | |
66542c3b | 565 | curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; |
0feaf86d | 566 | |
66542c3b | 567 | hop3_addr = get_next_hop_addr(ctx, curr_pte); |
0feaf86d OS |
568 | |
569 | if (hop3_addr == ULLONG_MAX) | |
570 | goto not_mapped; | |
571 | ||
54bb6744 | 572 | hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); |
0feaf86d | 573 | |
66542c3b | 574 | curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; |
0feaf86d | 575 | |
27ca384c OS |
576 | is_huge = curr_pte & LAST_MASK; |
577 | ||
578 | if (is_dram_addr && !is_huge) { | |
579 | dev_err(hdev->dev, | |
580 | "DRAM unmapping should use huge pages only\n"); | |
581 | return -EFAULT; | |
582 | } | |
583 | ||
27ca384c | 584 | if (!is_huge) { |
66542c3b | 585 | hop4_addr = get_next_hop_addr(ctx, curr_pte); |
0feaf86d OS |
586 | |
587 | if (hop4_addr == ULLONG_MAX) | |
588 | goto not_mapped; | |
589 | ||
54bb6744 OS |
590 | hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, |
591 | virt_addr); | |
0feaf86d | 592 | |
66542c3b | 593 | curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; |
0feaf86d | 594 | |
66542c3b | 595 | clear_hop3 = false; |
0feaf86d OS |
596 | } |
597 | ||
66542c3b OS |
598 | if (hdev->dram_default_page_mapping && is_dram_addr) { |
599 | u64 default_pte = (prop->mmu_dram_default_page_addr & | |
30919ede | 600 | HOP_PHYS_ADDR_MASK) | LAST_MASK | |
27ca384c | 601 | PAGE_PRESENT_MASK; |
66542c3b | 602 | if (curr_pte == default_pte) { |
27ca384c OS |
603 | dev_err(hdev->dev, |
604 | "DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n", | |
605 | virt_addr); | |
606 | goto not_mapped; | |
607 | } | |
0feaf86d | 608 | |
27ca384c OS |
609 | if (!(curr_pte & PAGE_PRESENT_MASK)) { |
610 | dev_err(hdev->dev, | |
611 | "DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n", | |
612 | virt_addr); | |
613 | goto not_mapped; | |
614 | } | |
615 | ||
66542c3b | 616 | write_final_pte(ctx, hop3_pte_addr, default_pte); |
27ca384c OS |
617 | put_pte(ctx, hop3_addr); |
618 | } else { | |
619 | if (!(curr_pte & PAGE_PRESENT_MASK)) | |
620 | goto not_mapped; | |
0feaf86d | 621 | |
66542c3b OS |
622 | if (hop4_addr) |
623 | clear_pte(ctx, hop4_pte_addr); | |
624 | else | |
625 | clear_pte(ctx, hop3_pte_addr); | |
0feaf86d | 626 | |
27ca384c | 627 | if (hop4_addr && !put_pte(ctx, hop4_addr)) |
66542c3b | 628 | clear_hop3 = true; |
0feaf86d | 629 | |
27ca384c | 630 | if (!clear_hop3) |
7fc40bca | 631 | goto mapped; |
66542c3b OS |
632 | |
633 | clear_pte(ctx, hop3_pte_addr); | |
0feaf86d | 634 | |
27ca384c | 635 | if (put_pte(ctx, hop3_addr)) |
7fc40bca | 636 | goto mapped; |
66542c3b OS |
637 | |
638 | clear_pte(ctx, hop2_pte_addr); | |
0feaf86d | 639 | |
27ca384c | 640 | if (put_pte(ctx, hop2_addr)) |
7fc40bca | 641 | goto mapped; |
66542c3b OS |
642 | |
643 | clear_pte(ctx, hop1_pte_addr); | |
27ca384c OS |
644 | |
645 | if (put_pte(ctx, hop1_addr)) | |
7fc40bca | 646 | goto mapped; |
66542c3b OS |
647 | |
648 | clear_pte(ctx, hop0_pte_addr); | |
27ca384c | 649 | } |
0feaf86d | 650 | |
7fc40bca | 651 | mapped: |
0feaf86d OS |
652 | return 0; |
653 | ||
654 | not_mapped: | |
655 | dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", | |
656 | virt_addr); | |
657 | ||
658 | return -EINVAL; | |
659 | } | |
660 | ||
d83fe669 | 661 | static int _hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, |
54bb6744 | 662 | u32 page_size, bool is_dram_addr) |
0feaf86d OS |
663 | { |
664 | struct hl_device *hdev = ctx->hdev; | |
27ca384c | 665 | struct asic_fixed_properties *prop = &hdev->asic_prop; |
54bb6744 | 666 | struct hl_mmu_properties *mmu_prop; |
0feaf86d OS |
667 | u64 hop0_addr = 0, hop0_pte_addr = 0, |
668 | hop1_addr = 0, hop1_pte_addr = 0, | |
669 | hop2_addr = 0, hop2_pte_addr = 0, | |
670 | hop3_addr = 0, hop3_pte_addr = 0, | |
671 | hop4_addr = 0, hop4_pte_addr = 0, | |
672 | curr_pte = 0; | |
673 | bool hop1_new = false, hop2_new = false, hop3_new = false, | |
54bb6744 | 674 | hop4_new = false, is_huge; |
0feaf86d OS |
675 | int rc = -ENOMEM; |
676 | ||
677 | /* | |
54bb6744 OS |
678 | * This mapping function can map a page or a huge page. For huge page |
679 | * there are only 3 hops rather than 4. Currently the DRAM allocation | |
680 | * uses huge pages only but user memory could have been allocated with | |
681 | * one of the two page sizes. Since this is a common code for all the | |
682 | * three cases, we need this hugs page check. | |
0feaf86d | 683 | */ |
64a7e295 OS |
684 | if (is_dram_addr) { |
685 | mmu_prop = &prop->dmmu; | |
686 | is_huge = true; | |
687 | } else if (page_size == prop->pmmu_huge.page_size) { | |
688 | mmu_prop = &prop->pmmu_huge; | |
689 | is_huge = true; | |
690 | } else { | |
691 | mmu_prop = &prop->pmmu; | |
692 | is_huge = false; | |
27ca384c OS |
693 | } |
694 | ||
0feaf86d | 695 | hop0_addr = get_hop0_addr(ctx); |
54bb6744 | 696 | hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); |
66542c3b | 697 | curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr; |
0feaf86d OS |
698 | |
699 | hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new); | |
0feaf86d OS |
700 | if (hop1_addr == ULLONG_MAX) |
701 | goto err; | |
702 | ||
54bb6744 | 703 | hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); |
66542c3b | 704 | curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr; |
0feaf86d OS |
705 | |
706 | hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new); | |
0feaf86d OS |
707 | if (hop2_addr == ULLONG_MAX) |
708 | goto err; | |
709 | ||
54bb6744 | 710 | hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); |
66542c3b | 711 | curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr; |
0feaf86d OS |
712 | |
713 | hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new); | |
0feaf86d OS |
714 | if (hop3_addr == ULLONG_MAX) |
715 | goto err; | |
716 | ||
54bb6744 | 717 | hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); |
66542c3b | 718 | curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr; |
0feaf86d OS |
719 | |
720 | if (!is_huge) { | |
721 | hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new); | |
0feaf86d OS |
722 | if (hop4_addr == ULLONG_MAX) |
723 | goto err; | |
724 | ||
54bb6744 OS |
725 | hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr, |
726 | virt_addr); | |
66542c3b | 727 | curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr; |
0feaf86d OS |
728 | } |
729 | ||
66542c3b OS |
730 | if (hdev->dram_default_page_mapping && is_dram_addr) { |
731 | u64 default_pte = (prop->mmu_dram_default_page_addr & | |
30919ede | 732 | HOP_PHYS_ADDR_MASK) | LAST_MASK | |
27ca384c OS |
733 | PAGE_PRESENT_MASK; |
734 | ||
66542c3b | 735 | if (curr_pte != default_pte) { |
27ca384c OS |
736 | dev_err(hdev->dev, |
737 | "DRAM: mapping already exists for virt_addr 0x%llx\n", | |
738 | virt_addr); | |
df3a8875 | 739 | rc = -EINVAL; |
27ca384c OS |
740 | goto err; |
741 | } | |
742 | ||
743 | if (hop1_new || hop2_new || hop3_new || hop4_new) { | |
744 | dev_err(hdev->dev, | |
745 | "DRAM mapping should not allocate more hops\n"); | |
746 | rc = -EFAULT; | |
747 | goto err; | |
748 | } | |
749 | } else if (curr_pte & PAGE_PRESENT_MASK) { | |
0feaf86d | 750 | dev_err(hdev->dev, |
66542c3b OS |
751 | "mapping already exists for virt_addr 0x%llx\n", |
752 | virt_addr); | |
0feaf86d OS |
753 | |
754 | dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n", | |
66542c3b | 755 | *(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr); |
0feaf86d | 756 | dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n", |
66542c3b | 757 | *(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr); |
0feaf86d | 758 | dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n", |
66542c3b | 759 | *(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr); |
0feaf86d | 760 | dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n", |
66542c3b | 761 | *(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr); |
0feaf86d OS |
762 | |
763 | if (!is_huge) | |
764 | dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n", | |
66542c3b OS |
765 | *(u64 *) (uintptr_t) hop4_pte_addr, |
766 | hop4_pte_addr); | |
0feaf86d | 767 | |
df3a8875 | 768 | rc = -EINVAL; |
0feaf86d OS |
769 | goto err; |
770 | } | |
771 | ||
30919ede | 772 | curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK |
0feaf86d OS |
773 | | PAGE_PRESENT_MASK; |
774 | ||
66542c3b OS |
775 | if (is_huge) |
776 | write_final_pte(ctx, hop3_pte_addr, curr_pte); | |
777 | else | |
778 | write_final_pte(ctx, hop4_pte_addr, curr_pte); | |
0feaf86d OS |
779 | |
780 | if (hop1_new) { | |
66542c3b | 781 | curr_pte = |
30919ede | 782 | (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; |
66542c3b | 783 | write_pte(ctx, hop0_pte_addr, curr_pte); |
0feaf86d OS |
784 | } |
785 | if (hop2_new) { | |
66542c3b | 786 | curr_pte = |
30919ede | 787 | (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; |
66542c3b | 788 | write_pte(ctx, hop1_pte_addr, curr_pte); |
0feaf86d OS |
789 | get_pte(ctx, hop1_addr); |
790 | } | |
791 | if (hop3_new) { | |
66542c3b | 792 | curr_pte = |
30919ede | 793 | (hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; |
66542c3b | 794 | write_pte(ctx, hop2_pte_addr, curr_pte); |
0feaf86d OS |
795 | get_pte(ctx, hop2_addr); |
796 | } | |
797 | ||
798 | if (!is_huge) { | |
799 | if (hop4_new) { | |
30919ede | 800 | curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) | |
0feaf86d | 801 | PAGE_PRESENT_MASK; |
66542c3b | 802 | write_pte(ctx, hop3_pte_addr, curr_pte); |
0feaf86d OS |
803 | get_pte(ctx, hop3_addr); |
804 | } | |
805 | ||
806 | get_pte(ctx, hop4_addr); | |
807 | } else { | |
808 | get_pte(ctx, hop3_addr); | |
809 | } | |
810 | ||
0feaf86d OS |
811 | return 0; |
812 | ||
813 | err: | |
814 | if (hop4_new) | |
815 | free_hop(ctx, hop4_addr); | |
816 | if (hop3_new) | |
817 | free_hop(ctx, hop3_addr); | |
818 | if (hop2_new) | |
819 | free_hop(ctx, hop2_addr); | |
820 | if (hop1_new) | |
821 | free_hop(ctx, hop1_addr); | |
822 | ||
823 | return rc; | |
824 | } | |
825 | ||
826 | /* | |
d83fe669 | 827 | * hl_mmu_v1_swap_out - marks all mapping of the given ctx as swapped out |
0feaf86d OS |
828 | * |
829 | * @ctx: pointer to the context structure | |
7fc40bca | 830 | * |
0feaf86d | 831 | */ |
d83fe669 | 832 | static void hl_mmu_v1_swap_out(struct hl_ctx *ctx) |
0feaf86d | 833 | { |
0feaf86d | 834 | |
0feaf86d OS |
835 | } |
836 | ||
837 | /* | |
d83fe669 | 838 | * hl_mmu_v1_swap_in - marks all mapping of the given ctx as swapped in |
0feaf86d OS |
839 | * |
840 | * @ctx: pointer to the context structure | |
841 | * | |
842 | */ | |
d83fe669 | 843 | static void hl_mmu_v1_swap_in(struct hl_ctx *ctx) |
0feaf86d OS |
844 | { |
845 | ||
846 | } | |
847 | ||
00e1b59c MH |
848 | static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, |
849 | struct hl_mmu_properties *mmu_prop, | |
850 | int hop_num, u64 hop_addr, u64 virt_addr) | |
851 | { | |
852 | switch (hop_num) { | |
853 | case 0: | |
854 | return get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); | |
855 | case 1: | |
856 | return get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); | |
857 | case 2: | |
858 | return get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); | |
859 | case 3: | |
860 | return get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); | |
861 | case 4: | |
862 | return get_hop4_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); | |
863 | default: | |
864 | break; | |
865 | } | |
866 | return U64_MAX; | |
867 | } | |
868 | ||
869 | static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, | |
870 | struct hl_mmu_hop_info *hops) | |
871 | { | |
872 | struct hl_device *hdev = ctx->hdev; | |
873 | struct asic_fixed_properties *prop = &hdev->asic_prop; | |
874 | struct hl_mmu_properties *mmu_prop; | |
875 | bool is_dram_addr, is_pmmu_addr, is_pmmu_h_addr, is_huge; | |
876 | int i, used_hops; | |
877 | ||
878 | is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, | |
879 | prop->dmmu.start_addr, | |
880 | prop->dmmu.end_addr); | |
881 | is_pmmu_addr = hl_mem_area_inside_range(virt_addr, prop->pmmu.page_size, | |
882 | prop->pmmu.start_addr, | |
883 | prop->pmmu.end_addr); | |
884 | is_pmmu_h_addr = hl_mem_area_inside_range(virt_addr, | |
885 | prop->pmmu_huge.page_size, | |
886 | prop->pmmu_huge.start_addr, | |
887 | prop->pmmu_huge.end_addr); | |
888 | if (is_dram_addr) { | |
889 | mmu_prop = &prop->dmmu; | |
890 | is_huge = true; | |
891 | } else if (is_pmmu_addr) { | |
892 | mmu_prop = &prop->pmmu; | |
893 | is_huge = false; | |
894 | } else if (is_pmmu_h_addr) { | |
895 | mmu_prop = &prop->pmmu_huge; | |
896 | is_huge = true; | |
897 | } else { | |
898 | return -EINVAL; | |
899 | } | |
900 | ||
901 | used_hops = mmu_prop->num_hops; | |
902 | ||
903 | /* huge pages use lesser hops */ | |
904 | if (is_huge) | |
905 | used_hops--; | |
906 | ||
907 | hops->hop_info[0].hop_addr = get_phys_hop0_addr(ctx); | |
908 | hops->hop_info[0].hop_pte_addr = | |
909 | get_hop_pte_addr(ctx, mmu_prop, 0, | |
910 | hops->hop_info[0].hop_addr, virt_addr); | |
911 | hops->hop_info[0].hop_pte_val = | |
912 | hdev->asic_funcs->read_pte(hdev, | |
913 | hops->hop_info[0].hop_pte_addr); | |
914 | ||
915 | for (i = 1 ; i < used_hops ; i++) { | |
916 | hops->hop_info[i].hop_addr = | |
917 | get_next_hop_addr(ctx, | |
918 | hops->hop_info[i - 1].hop_pte_val); | |
919 | if (hops->hop_info[i].hop_addr == ULLONG_MAX) | |
920 | return -EFAULT; | |
921 | ||
922 | hops->hop_info[i].hop_pte_addr = | |
923 | get_hop_pte_addr(ctx, mmu_prop, i, | |
924 | hops->hop_info[i].hop_addr, | |
925 | virt_addr); | |
926 | hops->hop_info[i].hop_pte_val = | |
927 | hdev->asic_funcs->read_pte(hdev, | |
928 | hops->hop_info[i].hop_pte_addr); | |
929 | ||
930 | if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK)) | |
931 | return -EFAULT; | |
932 | ||
933 | if (hops->hop_info[i].hop_pte_val & LAST_MASK) | |
934 | break; | |
935 | } | |
936 | ||
937 | /* if passed over all hops then no last hop was found */ | |
938 | if (i == mmu_prop->num_hops) | |
939 | return -EFAULT; | |
940 | ||
941 | if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK)) | |
942 | return -EFAULT; | |
943 | ||
944 | hops->used_hops = i + 1; | |
945 | ||
946 | return 0; | |
947 | } | |
948 | ||
0feaf86d | 949 | /* |
d83fe669 | 950 | * hl_mmu_v1_prepare - prepare mmu for working with mmu v1 |
0feaf86d | 951 | * |
d83fe669 | 952 | * @hdev: pointer to the device structure |
0feaf86d | 953 | */ |
ccf979ee | 954 | void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu) |
d83fe669 | 955 | { |
d83fe669 MH |
956 | mmu->init = hl_mmu_v1_init; |
957 | mmu->fini = hl_mmu_v1_fini; | |
958 | mmu->ctx_init = hl_mmu_v1_ctx_init; | |
959 | mmu->ctx_fini = hl_mmu_v1_ctx_fini; | |
960 | mmu->map = _hl_mmu_v1_map; | |
961 | mmu->unmap = _hl_mmu_v1_unmap; | |
962 | mmu->flush = flush; | |
963 | mmu->swap_out = hl_mmu_v1_swap_out; | |
964 | mmu->swap_in = hl_mmu_v1_swap_in; | |
00e1b59c | 965 | mmu->get_tlb_info = hl_mmu_v1_get_tlb_info; |
0feaf86d | 966 | } |