]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blame - drivers/misc/habanalabs/gaudi/gaudi.c
habanalabs: use PI in MMU cache invalidation
[mirror_ubuntu-hirsute-kernel.git] / drivers / misc / habanalabs / gaudi / gaudi.c
CommitLineData
ac0ae6a9
OG
1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "gaudiP.h"
9#include "include/hw_ip/mmu/mmu_general.h"
10#include "include/hw_ip/mmu/mmu_v1_1.h"
11#include "include/gaudi/gaudi_masks.h"
12#include "include/gaudi/gaudi_fw_if.h"
13#include "include/gaudi/gaudi_reg_map.h"
ebd8d122 14#include "include/gaudi/gaudi_async_ids_map_extended.h"
ac0ae6a9
OG
15
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/firmware.h>
19#include <linux/hwmon.h>
20#include <linux/genalloc.h>
21#include <linux/io-64-nonatomic-lo-hi.h>
22#include <linux/iommu.h>
23#include <linux/seq_file.h>
24
25/*
26 * Gaudi security scheme:
27 *
28 * 1. Host is protected by:
29 * - Range registers
30 * - MMU
31 *
32 * 2. DDR is protected by:
33 * - Range registers (protect the first 512MB)
34 *
35 * 3. Configuration is protected by:
36 * - Range registers
37 * - Protection bits
38 *
39 * MMU is always enabled.
40 *
41 * QMAN DMA channels 0,1,5 (PCI DMAN):
42 * - DMA is not secured.
43 * - PQ and CQ are secured.
44 * - CP is secured: The driver needs to parse CB but WREG should be allowed
45 * because of TDMA (tensor DMA). Hence, WREG is always not
46 * secured.
47 *
48 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
49 * channel 0 to be secured, execute the DMA and change it back to not secured.
50 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * running.
52 *
53 * The current use cases for the driver to use the DMA are:
54 * - Clear SRAM on context switch (happens on context switch when device is
55 * idle)
56 * - MMU page tables area clear (happens on init)
57 *
58 * QMAN DMA 2-4,6,7, TPC, MME, NIC:
59 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
60 * CQ, CP and the engine are not secured
61 *
62 */
63
64#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
65#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
66#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
67
68#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
69
70#define GAUDI_RESET_TIMEOUT_MSEC 1000 /* 1000ms */
71#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
72#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
73#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
74
75#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
76#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
77#define GAUDI_PLDM_SRESET_TIMEOUT_MSEC 14000 /* 14s */
78#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
79#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
80#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
81#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
82#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
83
84#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
85
86#define GAUDI_MAX_STRING_LEN 20
87
88#define GAUDI_CB_POOL_CB_CNT 512
89#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
90
91#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
92
93#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
94
95#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
96
97#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
98
99#define GAUDI_ARB_WDT_TIMEOUT 0x400000
100
101static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
102 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
103 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
104 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
105 "gaudi cpu eq"
106};
107
108static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
109 [GAUDI_PCI_DMA_1] = 0,
110 [GAUDI_PCI_DMA_2] = 1,
111 [GAUDI_PCI_DMA_3] = 5,
112 [GAUDI_HBM_DMA_1] = 2,
113 [GAUDI_HBM_DMA_2] = 3,
114 [GAUDI_HBM_DMA_3] = 4,
115 [GAUDI_HBM_DMA_4] = 6,
116 [GAUDI_HBM_DMA_5] = 7
117};
118
119static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
120 [0] = GAUDI_QUEUE_ID_DMA_0_0,
121 [1] = GAUDI_QUEUE_ID_DMA_0_1,
122 [2] = GAUDI_QUEUE_ID_DMA_0_2,
123 [3] = GAUDI_QUEUE_ID_DMA_0_3,
124 [4] = GAUDI_QUEUE_ID_DMA_1_0,
125 [5] = GAUDI_QUEUE_ID_DMA_1_1,
126 [6] = GAUDI_QUEUE_ID_DMA_1_2,
127 [7] = GAUDI_QUEUE_ID_DMA_1_3,
128 [8] = GAUDI_QUEUE_ID_DMA_5_0,
129 [9] = GAUDI_QUEUE_ID_DMA_5_1,
130 [10] = GAUDI_QUEUE_ID_DMA_5_2,
131 [11] = GAUDI_QUEUE_ID_DMA_5_3
132};
133
134static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
135 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
136 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
137 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
138 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
139 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
140 [PACKET_REPEAT] = sizeof(struct packet_repeat),
141 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
142 [PACKET_FENCE] = sizeof(struct packet_fence),
143 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
144 [PACKET_NOP] = sizeof(struct packet_nop),
145 [PACKET_STOP] = sizeof(struct packet_stop),
146 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
147 [PACKET_WAIT] = sizeof(struct packet_wait),
148 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
149};
150
ac0ae6a9
OG
151static const char * const
152gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
153 "tpc_address_exceed_slm",
154 "tpc_div_by_0",
155 "tpc_spu_mac_overflow",
156 "tpc_spu_addsub_overflow",
157 "tpc_spu_abs_overflow",
158 "tpc_spu_fp_dst_nan_inf",
159 "tpc_spu_fp_dst_denorm",
160 "tpc_vpu_mac_overflow",
161 "tpc_vpu_addsub_overflow",
162 "tpc_vpu_abs_overflow",
163 "tpc_vpu_fp_dst_nan_inf",
164 "tpc_vpu_fp_dst_denorm",
165 "tpc_assertions",
166 "tpc_illegal_instruction",
167 "tpc_pc_wrap_around",
168 "tpc_qm_sw_err",
169 "tpc_hbw_rresp_err",
170 "tpc_hbw_bresp_err",
171 "tpc_lbw_rresp_err",
172 "tpc_lbw_bresp_err"
173};
174
175static const char * const
176gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
177 "PQ AXI HBW error",
178 "CQ AXI HBW error",
179 "CP AXI HBW error",
180 "CP error due to undefined OPCODE",
181 "CP encountered STOP OPCODE",
182 "CP AXI LBW error",
183 "CP WRREG32 or WRBULK returned error",
184 "N/A",
185 "FENCE 0 inc over max value and clipped",
186 "FENCE 1 inc over max value and clipped",
187 "FENCE 2 inc over max value and clipped",
188 "FENCE 3 inc over max value and clipped",
189 "FENCE 0 dec under min value and clipped",
190 "FENCE 1 dec under min value and clipped",
191 "FENCE 2 dec under min value and clipped",
192 "FENCE 3 dec under min value and clipped"
193};
194
195static const char * const
196gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
197 "Choice push while full error",
198 "Choice Q watchdog error",
199 "MSG AXI LBW returned with error"
200};
201
202static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
203 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
204 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
205 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
206 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
207 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
208 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
209 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
210 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
211 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
212 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
213 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
214 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
215 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
216 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
217 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
218 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
219 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
220 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
221 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
222 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
223 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
224 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
225 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
226 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
227 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
228 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
229 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
230 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
231 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
232 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
233 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
234 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
235 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
236 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
237 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
238 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
239 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
240 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
241 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
242 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
243 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
244 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
245 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
246 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
247 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
248 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
249 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
250 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
251 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
252 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
253 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
254 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
255 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
256 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
257 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
258 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
259 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
260 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
261 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
262 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
263 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
264 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
265 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
266 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
267 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
268 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
269 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
270 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
271 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
272 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
273 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
274 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
275 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
276 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_0 */
277 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_1 */
278 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_2 */
279 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_3 */
280 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_0 */
281 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_1 */
282 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_2 */
283 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_3 */
284 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_0 */
285 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_1 */
286 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_2 */
287 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_3 */
288 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_0 */
289 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_1 */
290 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_2 */
291 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_3 */
292 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_0 */
293 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_1 */
294 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_2 */
295 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_3 */
296 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_0 */
297 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_1 */
298 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_2 */
299 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_3 */
300 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_0 */
301 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_1 */
302 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_2 */
303 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_3 */
304 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_0 */
305 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_1 */
306 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_2 */
307 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_3 */
308 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_0 */
309 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_1 */
310 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_2 */
311 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_3 */
312 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_0 */
313 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_1 */
314 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_2 */
315 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_3 */
316};
317
318static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
319 u64 phys_addr);
320static int gaudi_send_job_on_qman0(struct hl_device *hdev,
321 struct hl_cs_job *job);
322static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
323 u32 size, u64 val);
324static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
325 u32 tpc_id);
326static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
327static int gaudi_armcp_info_get(struct hl_device *hdev);
328static void gaudi_disable_clock_gating(struct hl_device *hdev);
329static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
330
331static int gaudi_get_fixed_properties(struct hl_device *hdev)
332{
333 struct asic_fixed_properties *prop = &hdev->asic_prop;
334 int i;
335
336 if (GAUDI_QUEUE_ID_SIZE >= HL_MAX_QUEUES) {
337 dev_err(hdev->dev,
338 "Number of H/W queues must be smaller than %d\n",
339 HL_MAX_QUEUES);
340 return -EFAULT;
341 }
342
343 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
344 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
345 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
346 prop->hw_queues_props[i].driver_only = 0;
347 prop->hw_queues_props[i].requires_kernel_cb = 1;
348 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
349 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
350 prop->hw_queues_props[i].driver_only = 1;
351 prop->hw_queues_props[i].requires_kernel_cb = 0;
352 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
353 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
354 prop->hw_queues_props[i].driver_only = 0;
355 prop->hw_queues_props[i].requires_kernel_cb = 0;
356 } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
357 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
358 prop->hw_queues_props[i].driver_only = 0;
359 prop->hw_queues_props[i].requires_kernel_cb = 0;
360 }
361 }
362
363 for (; i < HL_MAX_QUEUES; i++)
364 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
365
366 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
367
368 prop->dram_base_address = DRAM_PHYS_BASE;
369 prop->dram_size = GAUDI_HBM_SIZE_32GB;
370 prop->dram_end_address = prop->dram_base_address +
371 prop->dram_size;
372 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
373
374 prop->sram_base_address = SRAM_BASE_ADDR;
375 prop->sram_size = SRAM_SIZE;
376 prop->sram_end_address = prop->sram_base_address +
377 prop->sram_size;
378 prop->sram_user_base_address = prop->sram_base_address +
379 SRAM_USER_BASE_OFFSET;
380
381 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
382 if (hdev->pldm)
383 prop->mmu_pgt_size = 0x800000; /* 8MB */
384 else
385 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
386 prop->mmu_pte_size = HL_PTE_SIZE;
387 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
388 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
389 prop->dram_page_size = PAGE_SIZE_2MB;
390
391 prop->pmmu.hop0_shift = HOP0_SHIFT;
392 prop->pmmu.hop1_shift = HOP1_SHIFT;
393 prop->pmmu.hop2_shift = HOP2_SHIFT;
394 prop->pmmu.hop3_shift = HOP3_SHIFT;
395 prop->pmmu.hop4_shift = HOP4_SHIFT;
396 prop->pmmu.hop0_mask = HOP0_MASK;
397 prop->pmmu.hop1_mask = HOP1_MASK;
398 prop->pmmu.hop2_mask = HOP2_MASK;
399 prop->pmmu.hop3_mask = HOP3_MASK;
400 prop->pmmu.hop4_mask = HOP4_MASK;
401 prop->pmmu.start_addr = VA_HOST_SPACE_START;
402 prop->pmmu.end_addr =
403 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
404 prop->pmmu.page_size = PAGE_SIZE_4KB;
405
406 /* PMMU and HPMMU are the same except of page size */
407 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
408 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
409
410 /* shifts and masks are the same in PMMU and DMMU */
411 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
412 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
413 prop->dmmu.end_addr = VA_HOST_SPACE_END;
414 prop->dmmu.page_size = PAGE_SIZE_2MB;
415
416 prop->cfg_size = CFG_SIZE;
417 prop->max_asid = MAX_ASID;
418 prop->num_of_events = GAUDI_EVENT_SIZE;
419 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
420
421 prop->max_power_default = MAX_POWER_DEFAULT;
422
423 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
424 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
425
426 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
427 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
428
429 strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
430 CARD_NAME_MAX_LEN);
431
432 return 0;
433}
434
435static int gaudi_pci_bars_map(struct hl_device *hdev)
436{
437 static const char * const name[] = {"SRAM", "CFG", "HBM"};
438 bool is_wc[3] = {false, false, true};
439 int rc;
440
441 rc = hl_pci_bars_map(hdev, name, is_wc);
442 if (rc)
443 return rc;
444
445 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
446 (CFG_BASE - SPI_FLASH_BASE_ADDR);
447
448 return 0;
449}
450
451static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
452{
453 struct gaudi_device *gaudi = hdev->asic_specific;
454 u64 old_addr = addr;
455 int rc;
456
457 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
458 return old_addr;
459
460 /* Inbound Region 2 - Bar 4 - Point to HBM */
461 rc = hl_pci_set_dram_bar_base(hdev, 2, 4, addr);
462 if (rc)
463 return U64_MAX;
464
465 if (gaudi) {
466 old_addr = gaudi->hbm_bar_cur_addr;
467 gaudi->hbm_bar_cur_addr = addr;
468 }
469
470 return old_addr;
471}
472
473static int gaudi_init_iatu(struct hl_device *hdev)
474{
475 int rc = 0;
476
477 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
478 rc = hl_pci_iatu_write(hdev, 0x314,
479 lower_32_bits(SPI_FLASH_BASE_ADDR));
480 rc |= hl_pci_iatu_write(hdev, 0x318,
481 upper_32_bits(SPI_FLASH_BASE_ADDR));
482 rc |= hl_pci_iatu_write(hdev, 0x300, 0);
483 /* Enable + Bar match + match enable */
484 rc |= hl_pci_iatu_write(hdev, 0x304, 0xC0080200);
485
486 if (rc)
487 return -EIO;
488
489 return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
490 HOST_PHYS_BASE, HOST_PHYS_SIZE);
491}
492
493static int gaudi_early_init(struct hl_device *hdev)
494{
495 struct asic_fixed_properties *prop = &hdev->asic_prop;
496 struct pci_dev *pdev = hdev->pdev;
497 int rc;
498
499 rc = gaudi_get_fixed_properties(hdev);
500 if (rc) {
501 dev_err(hdev->dev, "Failed to get fixed properties\n");
502 return rc;
503 }
504
505 /* Check BAR sizes */
506 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
507 dev_err(hdev->dev,
508 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
509 SRAM_BAR_ID,
510 (unsigned long long) pci_resource_len(pdev,
511 SRAM_BAR_ID),
512 SRAM_BAR_SIZE);
513 return -ENODEV;
514 }
515
516 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
517 dev_err(hdev->dev,
518 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
519 CFG_BAR_ID,
520 (unsigned long long) pci_resource_len(pdev,
521 CFG_BAR_ID),
522 CFG_BAR_SIZE);
523 return -ENODEV;
524 }
525
526 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
527
528 rc = hl_pci_init(hdev);
529 if (rc)
530 return rc;
531
532 return 0;
533}
534
535static int gaudi_early_fini(struct hl_device *hdev)
536{
537 hl_pci_fini(hdev);
538
539 return 0;
540}
541
542/**
543 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
544 *
545 * @hdev: pointer to hl_device structure
546 *
547 */
548static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
549{
550 struct asic_fixed_properties *prop = &hdev->asic_prop;
551
552 prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR);
553 prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF);
554 prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD);
555 prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
556}
557
558static int _gaudi_init_tpc_mem(struct hl_device *hdev,
559 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
560{
561 struct asic_fixed_properties *prop = &hdev->asic_prop;
562 struct packet_lin_dma *init_tpc_mem_pkt;
563 struct hl_cs_job *job;
564 struct hl_cb *cb;
565 u64 dst_addr;
566 u32 cb_size, ctl;
567 u8 tpc_id;
568 int rc;
569
570 cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
571 if (!cb)
572 return -EFAULT;
573
574 init_tpc_mem_pkt = (struct packet_lin_dma *) (uintptr_t)
575 cb->kernel_address;
576 cb_size = sizeof(*init_tpc_mem_pkt);
577 memset(init_tpc_mem_pkt, 0, cb_size);
578
579 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
580
581 ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) |
582 (1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) |
583 (1 << GAUDI_PKT_CTL_RB_SHIFT) |
584 (1 << GAUDI_PKT_CTL_MB_SHIFT));
585
586 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
587
588 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
589 dst_addr = (prop->sram_user_base_address &
590 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
591 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
592 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
593
594 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
595 if (!job) {
596 dev_err(hdev->dev, "Failed to allocate a new job\n");
597 rc = -ENOMEM;
598 goto release_cb;
599 }
600
601 job->id = 0;
602 job->user_cb = cb;
603 job->user_cb->cs_cnt++;
604 job->user_cb_size = cb_size;
605 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
606 job->patched_cb = job->user_cb;
607 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
608
609 hl_debugfs_add_job(hdev, job);
610
611 rc = gaudi_send_job_on_qman0(hdev, job);
612
613 if (rc)
614 goto free_job;
615
616 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
617 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
618 if (rc)
619 break;
620 }
621
622free_job:
623 hl_userptr_delete_list(hdev, &job->userptr_list);
624 hl_debugfs_remove_job(hdev, job);
625 kfree(job);
626 cb->cs_cnt--;
627
628release_cb:
629 hl_cb_put(cb);
630 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
631
632 return rc;
633}
634
635/*
636 * gaudi_init_tpc_mem() - Initialize TPC memories.
637 * @hdev: Pointer to hl_device structure.
638 *
639 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
640 *
641 * Return: 0 for success, negative value for error.
642 */
643static int gaudi_init_tpc_mem(struct hl_device *hdev)
644{
645 const struct firmware *fw;
646 size_t fw_size;
647 void *cpu_addr;
648 dma_addr_t dma_handle;
649 int rc;
650
651 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
652 if (rc) {
653 dev_err(hdev->dev, "Firmware file %s is not found!\n",
654 GAUDI_TPC_FW_FILE);
655 goto out;
656 }
657
658 fw_size = fw->size;
659 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
660 &dma_handle, GFP_KERNEL | __GFP_ZERO);
661 if (!cpu_addr) {
662 dev_err(hdev->dev,
663 "Failed to allocate %zu of dma memory for TPC kernel\n",
664 fw_size);
665 rc = -ENOMEM;
666 goto out;
667 }
668
669 memcpy(cpu_addr, fw->data, fw_size);
670
671 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
672
673 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
674 dma_handle);
675
676out:
677 release_firmware(fw);
678 return rc;
679}
680
681static int gaudi_late_init(struct hl_device *hdev)
682{
683 struct gaudi_device *gaudi = hdev->asic_specific;
684 int rc;
685
686 rc = gaudi->armcp_info_get(hdev);
687 if (rc) {
688 dev_err(hdev->dev, "Failed to get armcp info\n");
689 return rc;
690 }
691
692 rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
693 if (rc) {
694 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
695 return rc;
696 }
697
698 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
699
700 gaudi_fetch_psoc_frequency(hdev);
701
702 rc = gaudi_mmu_clear_pgt_range(hdev);
703 if (rc) {
704 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
705 goto disable_pci_access;
706 }
707
708 rc = gaudi_init_tpc_mem(hdev);
709 if (rc) {
710 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
711 goto disable_pci_access;
712 }
713
714 return 0;
715
716disable_pci_access:
717 hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
718
719 return rc;
720}
721
722static void gaudi_late_fini(struct hl_device *hdev)
723{
724 const struct hwmon_channel_info **channel_info_arr;
725 int i = 0;
726
727 if (!hdev->hl_chip_info->info)
728 return;
729
730 channel_info_arr = hdev->hl_chip_info->info;
731
732 while (channel_info_arr[i]) {
733 kfree(channel_info_arr[i]->config);
734 kfree(channel_info_arr[i]);
735 i++;
736 }
737
738 kfree(channel_info_arr);
739
740 hdev->hl_chip_info->info = NULL;
741}
742
743static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
744{
745 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
746 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
747 int i, j, rc = 0;
748
749 /*
750 * The device CPU works with 40-bits addresses, while bit 39 must be set
751 * to '1' when accessing the host.
752 * Bits 49:39 of the full host address are saved for a later
753 * configuration of the HW to perform extension to 50 bits.
754 * Because there is a single HW register that holds the extension bits,
755 * these bits must be identical in all allocated range.
756 */
757
758 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
759 virt_addr_arr[i] =
760 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
761 HL_CPU_ACCESSIBLE_MEM_SIZE,
762 &dma_addr_arr[i],
763 GFP_KERNEL | __GFP_ZERO);
764 if (!virt_addr_arr[i]) {
765 rc = -ENOMEM;
766 goto free_dma_mem_arr;
767 }
768
769 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
770 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
771 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
772 break;
773 }
774
775 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
776 dev_err(hdev->dev,
777 "MSB of CPU accessible DMA memory are not identical in all range\n");
778 rc = -EFAULT;
779 goto free_dma_mem_arr;
780 }
781
782 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
783 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
784 hdev->cpu_pci_msb_addr =
785 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
786
787 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
788
789free_dma_mem_arr:
790 for (j = 0 ; j < i ; j++)
791 hdev->asic_funcs->asic_dma_free_coherent(hdev,
792 HL_CPU_ACCESSIBLE_MEM_SIZE,
793 virt_addr_arr[j],
794 dma_addr_arr[j]);
795
796 return rc;
797}
798
799static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
800{
801 struct gaudi_device *gaudi = hdev->asic_specific;
802 struct gaudi_internal_qman_info *q;
803 u32 i;
804
805 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
806 q = &gaudi->internal_qmans[i];
807 if (!q->pq_kernel_addr)
808 continue;
809 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
810 q->pq_kernel_addr,
811 q->pq_dma_addr);
812 }
813}
814
815static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
816{
817 struct gaudi_device *gaudi = hdev->asic_specific;
818 struct gaudi_internal_qman_info *q;
819 int rc, i;
820
821 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
822 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
823 continue;
824
825 q = &gaudi->internal_qmans[i];
826
827 switch (i) {
828 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3:
829 case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3:
830 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
831 break;
832 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
833 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
834 break;
835 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
836 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
837 break;
838 default:
839 dev_err(hdev->dev, "Bad internal queue index %d", i);
840 rc = -EINVAL;
841 goto free_internal_qmans_pq_mem;
842 }
843
844 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
845 hdev, q->pq_size,
846 &q->pq_dma_addr,
847 GFP_KERNEL | __GFP_ZERO);
848 if (!q->pq_kernel_addr) {
849 rc = -ENOMEM;
850 goto free_internal_qmans_pq_mem;
851 }
852 }
853
854 return 0;
855
856free_internal_qmans_pq_mem:
857 gaudi_free_internal_qmans_pq_mem(hdev);
858 return rc;
859}
860
861static int gaudi_sw_init(struct hl_device *hdev)
862{
863 struct gaudi_device *gaudi;
ebd8d122 864 u32 i, event_id = 0;
ac0ae6a9
OG
865 int rc;
866
867 /* Allocate device structure */
868 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
869 if (!gaudi)
870 return -ENOMEM;
871
ebd8d122
OB
872 for (i = 0 ; i < ARRAY_SIZE(gaudi_irq_map_table) ; i++) {
873 if (gaudi_irq_map_table[i].valid) {
874 if (event_id == GAUDI_EVENT_SIZE) {
875 dev_err(hdev->dev,
876 "Event array exceeds the limit of %u events\n",
877 GAUDI_EVENT_SIZE);
878 rc = -EINVAL;
879 goto free_gaudi_device;
880 }
881
882 gaudi->events[event_id++] =
883 gaudi_irq_map_table[i].fc_id;
884 }
885 }
886
ac0ae6a9
OG
887 gaudi->armcp_info_get = gaudi_armcp_info_get;
888
889 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
890
891 hdev->asic_specific = gaudi;
892
893 /* Create DMA pool for small allocations */
894 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
895 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
896 if (!hdev->dma_pool) {
897 dev_err(hdev->dev, "failed to create DMA pool\n");
898 rc = -ENOMEM;
899 goto free_gaudi_device;
900 }
901
902 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
903 if (rc)
904 goto free_dma_pool;
905
906 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
907 if (!hdev->cpu_accessible_dma_pool) {
908 dev_err(hdev->dev,
909 "Failed to create CPU accessible DMA pool\n");
910 rc = -ENOMEM;
911 goto free_cpu_dma_mem;
912 }
913
914 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
915 (uintptr_t) hdev->cpu_accessible_dma_mem,
916 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
917 if (rc) {
918 dev_err(hdev->dev,
919 "Failed to add memory to CPU accessible DMA pool\n");
920 rc = -EFAULT;
921 goto free_cpu_accessible_dma_pool;
922 }
923
924 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
925 if (rc)
926 goto free_cpu_accessible_dma_pool;
927
928 spin_lock_init(&gaudi->hw_queues_lock);
929 mutex_init(&gaudi->clk_gate_mutex);
930
931 hdev->supports_sync_stream = true;
932 hdev->supports_coresight = true;
933
934 return 0;
935
936free_cpu_accessible_dma_pool:
937 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
938free_cpu_dma_mem:
939 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
940 hdev->cpu_pci_msb_addr);
941 hdev->asic_funcs->asic_dma_free_coherent(hdev,
942 HL_CPU_ACCESSIBLE_MEM_SIZE,
943 hdev->cpu_accessible_dma_mem,
944 hdev->cpu_accessible_dma_address);
945free_dma_pool:
946 dma_pool_destroy(hdev->dma_pool);
947free_gaudi_device:
948 kfree(gaudi);
949 return rc;
950}
951
952static int gaudi_sw_fini(struct hl_device *hdev)
953{
954 struct gaudi_device *gaudi = hdev->asic_specific;
955
956 gaudi_free_internal_qmans_pq_mem(hdev);
957
958 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
959
960 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
961 hdev->cpu_pci_msb_addr);
962 hdev->asic_funcs->asic_dma_free_coherent(hdev,
963 HL_CPU_ACCESSIBLE_MEM_SIZE,
964 hdev->cpu_accessible_dma_mem,
965 hdev->cpu_accessible_dma_address);
966
967 dma_pool_destroy(hdev->dma_pool);
968
969 mutex_destroy(&gaudi->clk_gate_mutex);
970
971 kfree(gaudi);
972
973 return 0;
974}
975
976static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
977{
978 struct hl_device *hdev = arg;
979 int i;
980
981 if (hdev->disabled)
982 return IRQ_HANDLED;
983
984 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
985 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
986
987 hl_irq_handler_eq(irq, &hdev->event_queue);
988
989 return IRQ_HANDLED;
990}
991
992/*
993 * For backward compatibility, new MSI interrupts should be set after the
994 * existing CPU and NIC interrupts.
995 */
996static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
997 bool cpu_eq)
998{
999 int msi_vec;
1000
1001 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1002 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1003 GAUDI_EVENT_QUEUE_MSI_IDX);
1004
1005 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1006 (nr + NIC_NUMBER_OF_ENGINES + 1);
1007
1008 return pci_irq_vector(hdev->pdev, msi_vec);
1009}
1010
1011static int gaudi_enable_msi_single(struct hl_device *hdev)
1012{
1013 int rc, irq;
1014
1015 dev_info(hdev->dev, "Working in single MSI IRQ mode\n");
1016
1017 irq = gaudi_pci_irq_vector(hdev, 0, false);
1018 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1019 "gaudi single msi", hdev);
1020 if (rc)
1021 dev_err(hdev->dev,
1022 "Failed to request single MSI IRQ\n");
1023
1024 return rc;
1025}
1026
1027static int gaudi_enable_msi_multi(struct hl_device *hdev)
1028{
1029 int cq_cnt = hdev->asic_prop.completion_queues_count;
1030 int rc, i, irq_cnt_init, irq;
1031
1032 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1033 irq = gaudi_pci_irq_vector(hdev, i, false);
1034 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1035 &hdev->completion_queue[i]);
1036 if (rc) {
1037 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1038 goto free_irqs;
1039 }
1040 }
1041
1042 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1043 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1044 &hdev->event_queue);
1045 if (rc) {
1046 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1047 goto free_irqs;
1048 }
1049
1050 return 0;
1051
1052free_irqs:
1053 for (i = 0 ; i < irq_cnt_init ; i++)
1054 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1055 &hdev->completion_queue[i]);
1056 return rc;
1057}
1058
1059static int gaudi_enable_msi(struct hl_device *hdev)
1060{
1061 struct gaudi_device *gaudi = hdev->asic_specific;
1062 int rc;
1063
1064 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1065 return 0;
1066
1067 rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1068 PCI_IRQ_MSI);
1069 if (rc < 0) {
1070 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1071 return rc;
1072 }
1073
1074 if (rc < NUMBER_OF_INTERRUPTS) {
1075 gaudi->multi_msi_mode = false;
1076 rc = gaudi_enable_msi_single(hdev);
1077 } else {
1078 gaudi->multi_msi_mode = true;
1079 rc = gaudi_enable_msi_multi(hdev);
1080 }
1081
1082 if (rc)
1083 goto free_pci_irq_vectors;
1084
1085 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1086
1087 return 0;
1088
1089free_pci_irq_vectors:
1090 pci_free_irq_vectors(hdev->pdev);
1091 return rc;
1092}
1093
1094static void gaudi_sync_irqs(struct hl_device *hdev)
1095{
1096 struct gaudi_device *gaudi = hdev->asic_specific;
1097 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1098
1099 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1100 return;
1101
1102 /* Wait for all pending IRQs to be finished */
1103 if (gaudi->multi_msi_mode) {
1104 for (i = 0 ; i < cq_cnt ; i++)
1105 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1106
1107 synchronize_irq(gaudi_pci_irq_vector(hdev,
1108 GAUDI_EVENT_QUEUE_MSI_IDX,
1109 true));
1110 } else {
1111 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1112 }
1113}
1114
1115static void gaudi_disable_msi(struct hl_device *hdev)
1116{
1117 struct gaudi_device *gaudi = hdev->asic_specific;
1118 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1119
1120 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1121 return;
1122
1123 gaudi_sync_irqs(hdev);
1124
1125 if (gaudi->multi_msi_mode) {
1126 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1127 true);
1128 free_irq(irq, &hdev->event_queue);
1129
1130 for (i = 0 ; i < cq_cnt ; i++) {
1131 irq = gaudi_pci_irq_vector(hdev, i, false);
1132 free_irq(irq, &hdev->completion_queue[i]);
1133 }
1134 } else {
1135 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1136 }
1137
1138 pci_free_irq_vectors(hdev->pdev);
1139
1140 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1141}
1142
1143static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1144{
1145 struct gaudi_device *gaudi = hdev->asic_specific;
1146
1147 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1148 return;
1149
1150 if (!hdev->sram_scrambler_enable)
1151 return;
1152
1153 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1154 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1155 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1156 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1157 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1158 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1159 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1160 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1161 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1162 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1163 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1164 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1165 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1166 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1167 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1168 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1169
1170 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1171 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1172 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1173 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1174 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1175 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1176 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1177 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1178 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1179 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1180 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1181 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1182 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1183 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1184 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1185 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1186
1187 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1188 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1189 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1190 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1191 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1192 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1193 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1194 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1195 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1196 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1197 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1198 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1199 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1200 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1201 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1202 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1203
1204 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1205}
1206
1207static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1208{
1209 struct gaudi_device *gaudi = hdev->asic_specific;
1210
1211 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1212 return;
1213
1214 if (!hdev->dram_scrambler_enable)
1215 return;
1216
1217 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1218 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1219 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1220 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1221 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1222 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1223 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1224 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1225 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1226 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1227 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1228 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1229 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1230 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1231 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1232 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1233
1234 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1235 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1236 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1237 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1238 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1239 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1240 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1241 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1242 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1243 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1244 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1245 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1246 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1247 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1248 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1249 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1250
1251 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1252 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1253 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1254 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1255 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1256 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1257 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1258 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1259 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1260 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1261 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1262 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1263 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1264 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1265 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1266 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1267
1268 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1269}
1270
1271static void gaudi_init_e2e(struct hl_device *hdev)
1272{
1273 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1274 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1275 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1276 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1277
1278 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1279 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1280 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1281 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1282
1283 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1284 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1285 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1286 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1287
1288 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1289 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1290 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1291 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1292
1293 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1294 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1295 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1296 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1297
1298 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1299 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1300 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1301 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1302
1303 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1304 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1305 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1306 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1307
1308 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
1309 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
1310 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
1311 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
1312
1313 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
1314 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
1315 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
1316 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
1317
1318 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1319 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1320 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1321 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1322
1323 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1324 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1325 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1326 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1327
1328 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1329 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1330 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1331 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1332
1333 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1334 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1335 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1336 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1337
1338 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1339 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1340 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1341 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1342
1343 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1344 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1345 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1346 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1347
1348 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
1349 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
1350 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
1351 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
1352
1353 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1354 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1355 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1356 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1357
1358 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1359 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1360 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1361 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1362
1363 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1364 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1365 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1366 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1367
1368 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1369 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1370 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1371 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1372
1373 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1374 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1375 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1376 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1377
1378 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1379 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1380 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1381 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1382
1383 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1384 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1385 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1386 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1387
1388 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1389 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1390 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1391 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1392
1393 if (!hdev->dram_scrambler_enable) {
1394 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1395 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1396 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1397 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1398
1399 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1400 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1401 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1402 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1403
1404 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1405 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1406 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1407 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1408
1409 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1410 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1411 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1412 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1413
1414 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1415 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1416 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1417 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1418
1419 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1420 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1421 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1422 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1423
1424 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1425 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1426 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1427 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1428
1429 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1430 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1431 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1432 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1433
1434 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1435 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1436 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1437 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1438
1439 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1440 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1441 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1442 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1443
1444 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1445 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1446 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1447 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1448
1449 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1450 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1451 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1452 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1453
1454 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1455 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1456 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1457 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1458
1459 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1460 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1461 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1462 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1463
1464 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1465 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1466 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1467 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1468
1469 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1470 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1471 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1472 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1473
1474 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1475 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1476 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1477 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1478
1479 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1480 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1481 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1482 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1483
1484 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1485 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1486 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1487 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1488
1489 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1490 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1491 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1492 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1493
1494 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1495 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1496 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1497 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1498
1499 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1500 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1501 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1502 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1503
1504 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1505 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1506 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1507 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1508
1509 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1510 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1511 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1512 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1513 }
1514
1515 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
1516 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1517 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
1518 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1519
1520 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
1521 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1522 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
1523 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1524
1525 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
1526 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1527 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
1528 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1529
1530 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
1531 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1532 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
1533 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1534
1535 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
1536 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1537 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
1538 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1539
1540 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
1541 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1542 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
1543 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1544
1545 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
1546 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1547 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
1548 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1549
1550 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
1551 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1552 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
1553 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1554
1555 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
1556 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1557 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
1558 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1559
1560 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
1561 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1562 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
1563 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1564
1565 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
1566 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1567 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
1568 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1569
1570 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
1571 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1572 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
1573 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1574
1575 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
1576 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1577 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
1578 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1579
1580 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
1581 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1582 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
1583 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1584
1585 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
1586 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1587 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
1588 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1589
1590 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
1591 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1592 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
1593 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1594
1595 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
1596 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1597 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
1598 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1599
1600 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
1601 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1602 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
1603 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1604
1605 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
1606 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1607 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
1608 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1609
1610 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
1611 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1612 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
1613 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1614
1615 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
1616 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1617 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
1618 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1619
1620 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
1621 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1622 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
1623 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1624
1625 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
1626 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1627 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
1628 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1629
1630 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
1631 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1632 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
1633 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1634}
1635
1636static void gaudi_init_hbm_cred(struct hl_device *hdev)
1637{
1638 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
1639
1640 hbm0_wr = 0x33333333;
1641 hbm1_wr = 0x33333333;
1642 hbm0_rd = 0x77777777;
1643 hbm1_rd = 0xDDDDDDDD;
1644
1645 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
1646 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
1647 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
1648 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
1649
1650 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
1651 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
1652 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
1653 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
1654
1655 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
1656 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
1657 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
1658 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
1659
1660 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
1661 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
1662 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
1663 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
1664
1665 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
1666 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1667 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1668 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
1669 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1670 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1671 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
1672 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1673 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1674 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
1675 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1676 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1677
1678 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
1679 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1680 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1681 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
1682 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1683 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1684 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
1685 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1686 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1687 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
1688 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1689 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1690}
1691
1692static void gaudi_init_rate_limiter(struct hl_device *hdev)
1693{
1694 u32 nr, nf, od, sat, rst, timeout;
1695 u64 freq;
1696
1697 nr = RREG32(mmPSOC_HBM_PLL_NR);
1698 nf = RREG32(mmPSOC_HBM_PLL_NF);
1699 od = RREG32(mmPSOC_HBM_PLL_OD);
1700 freq = (50 * (nf + 1)) / ((nr + 1) * (od + 1));
1701
1702 dev_dbg(hdev->dev, "HBM frequency is %lluMHz\n", freq);
1703
1704 /* Configuration is for five (5) DDMA channels */
1705 if (freq == 800) {
1706 sat = 4;
1707 rst = 11;
1708 timeout = 15;
1709 } else if (freq == 900) {
1710 sat = 4;
1711 rst = 15;
1712 timeout = 16;
1713 } else if (freq == 950) {
1714 sat = 4;
1715 rst = 15;
1716 timeout = 15;
1717 } else {
1718 dev_warn(hdev->dev,
1719 "unsupported HBM frequency %lluMHz, no rate-limiters\n",
1720 freq);
1721 return;
1722 }
1723
1724 WREG32(mmDMA_IF_W_S_DOWN_RSP_MID_WGHT_0, 0x111);
1725 WREG32(mmDMA_IF_W_S_DOWN_RSP_MID_WGHT_1, 0x111);
1726 WREG32(mmDMA_IF_E_S_DOWN_RSP_MID_WGHT_0, 0x111);
1727 WREG32(mmDMA_IF_E_S_DOWN_RSP_MID_WGHT_1, 0x111);
1728 WREG32(mmDMA_IF_W_N_DOWN_RSP_MID_WGHT_0, 0x111);
1729 WREG32(mmDMA_IF_W_N_DOWN_RSP_MID_WGHT_1, 0x111);
1730 WREG32(mmDMA_IF_E_N_DOWN_RSP_MID_WGHT_0, 0x111);
1731 WREG32(mmDMA_IF_E_N_DOWN_RSP_MID_WGHT_1, 0x111);
1732
1733 if (!hdev->rl_enable) {
1734 dev_info(hdev->dev, "Rate limiters disabled\n");
1735 return;
1736 }
1737
1738 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_SAT, sat);
1739 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_SAT, sat);
1740 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_SAT, sat);
1741 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_SAT, sat);
1742 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_SAT, sat);
1743 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_SAT, sat);
1744 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_SAT, sat);
1745 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_SAT, sat);
1746
1747 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_RST, rst);
1748 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_RST, rst);
1749 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_RST, rst);
1750 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_RST, rst);
1751 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_RST, rst);
1752 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_RST, rst);
1753 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_RST, rst);
1754 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_RST, rst);
1755
1756 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
1757 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
1758 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
1759 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
1760 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
1761 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
1762 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
1763 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
1764
1765 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_EN, 1);
1766 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_EN, 1);
1767 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_EN, 1);
1768 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_EN, 1);
1769 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_EN, 1);
1770 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_EN, 1);
1771 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_EN, 1);
1772 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_EN, 1);
1773
1774 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_SAT, sat);
1775 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_SAT, sat);
1776 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_SAT, sat);
1777 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_SAT, sat);
1778 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_SAT, sat);
1779 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_SAT, sat);
1780 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_SAT, sat);
1781 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_SAT, sat);
1782
1783 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_RST, rst);
1784 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_RST, rst);
1785 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_RST, rst);
1786 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_RST, rst);
1787 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_RST, rst);
1788 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_RST, rst);
1789 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_RST, rst);
1790 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_RST, rst);
1791
1792 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
1793 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
1794 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
1795 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
1796 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
1797 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
1798 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
1799 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
1800
1801 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_EN, 1);
1802 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_EN, 1);
1803 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_EN, 1);
1804 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_EN, 1);
1805 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_EN, 1);
1806 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_EN, 1);
1807 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_EN, 1);
1808 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_EN, 1);
1809}
1810
1811static void gaudi_init_golden_registers(struct hl_device *hdev)
1812{
1813 u32 tpc_offset;
1814 int tpc_id, i;
1815
1816 gaudi_init_e2e(hdev);
1817
1818 gaudi_init_hbm_cred(hdev);
1819
1820 gaudi_init_rate_limiter(hdev);
1821
1822 gaudi_disable_clock_gating(hdev);
1823
1824 for (tpc_id = 0, tpc_offset = 0;
1825 tpc_id < TPC_NUMBER_OF_ENGINES;
1826 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
1827 /* Mask all arithmetic interrupts from TPC */
1828 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
1829 /* Set 16 cache lines */
1830 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
1831 ICACHE_FETCH_LINE_NUM, 2);
1832 }
1833
1834 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
1835 for (i = 0 ; i < 128 ; i += 8)
1836 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
1837
1838 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1839 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1840 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1841 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
1842
1843 /* WA for H3-2081 */
1844 WREG32(mmPCIE_WRAP_MAX_OUTSTAND, 0x10ff);
1845}
1846
1847static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
1848 int qman_id, dma_addr_t qman_pq_addr)
1849{
1850 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
1851 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
1852 u32 q_off, dma_qm_offset;
1853 u32 dma_qm_err_cfg;
1854
1855 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1856
1857 mtr_base_en_lo = lower_32_bits(CFG_BASE +
1858 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1859 mtr_base_en_hi = upper_32_bits(CFG_BASE +
1860 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1861 so_base_en_lo = lower_32_bits(CFG_BASE +
1862 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1863 so_base_en_hi = upper_32_bits(CFG_BASE +
1864 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
1865 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
1866 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1867 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
1868 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
1869 so_base_ws_lo = lower_32_bits(CFG_BASE +
1870 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1871 so_base_ws_hi = upper_32_bits(CFG_BASE +
1872 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
1873
1874 q_off = dma_qm_offset + qman_id * 4;
1875
1876 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
1877 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
1878
1879 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
1880 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
1881 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
1882
1883 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
1884 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
1885 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
1886
1887 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
1888 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
1889 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
1890 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
1891 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
1892 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
1893 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
1894 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
1895
1896 /* The following configuration is needed only once per QMAN */
1897 if (qman_id == 0) {
1898 /* Configure RAZWI IRQ */
1899 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
1900 if (hdev->stop_on_err) {
1901 dma_qm_err_cfg |=
1902 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
1903 }
1904
1905 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
1906 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
1907 lower_32_bits(CFG_BASE +
1908 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1909 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
1910 upper_32_bits(CFG_BASE +
1911 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1912 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
1913 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
1914 dma_id);
1915
1916 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
1917 QM_ARB_ERR_MSG_EN_MASK);
1918
1919 /* Increase ARB WDT to support streams architecture */
1920 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
1921 GAUDI_ARB_WDT_TIMEOUT);
1922
1923 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
1924 QMAN_EXTERNAL_MAKE_TRUSTED);
1925
1926 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
1927 }
1928}
1929
1930static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
1931{
1932 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
1933 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
1934
1935 /* Set to maximum possible according to physical size */
1936 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
1937 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
1938
1939 /* STOP_ON bit implies no completion to operation in case of RAZWI */
1940 if (hdev->stop_on_err)
1941 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
1942
1943 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
1944 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
1945 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1946 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
1947 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
1948 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
1949 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
1950 WREG32(mmDMA0_CORE_PROT + dma_offset,
1951 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
1952 /* If the channel is secured, it should be in MMU bypass mode */
1953 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
1954 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
1955 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
1956}
1957
1958static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
1959 u32 enable_mask)
1960{
1961 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
1962
1963 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
1964}
1965
1966static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
1967{
1968 struct gaudi_device *gaudi = hdev->asic_specific;
1969 struct hl_hw_queue *q;
1970 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
1971
1972 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
1973 return;
1974
1975 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
1976 dma_id = gaudi_dma_assignment[i];
1977 /*
1978 * For queues after the CPU Q need to add 1 to get the correct
1979 * queue. In addition, need to add the CPU EQ and NIC IRQs in
1980 * order to get the correct MSI register.
1981 */
1982 if (dma_id > 1) {
1983 cpu_skip = 1;
1984 nic_skip = NIC_NUMBER_OF_ENGINES;
1985 } else {
1986 cpu_skip = 0;
1987 nic_skip = 0;
1988 }
1989
1990 for (j = 0 ; j < QMAN_STREAMS ; j++) {
1991 q_idx = 4 * dma_id + j + cpu_skip;
1992 q = &hdev->kernel_queues[q_idx];
1993 q->cq_id = cq_id++;
1994 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
1995 gaudi_init_pci_dma_qman(hdev, dma_id, j,
1996 q->bus_address);
1997 }
1998
1999 gaudi_init_dma_core(hdev, dma_id);
2000
2001 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2002 }
2003
2004 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2005}
2006
2007static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2008 int qman_id, u64 qman_base_addr)
2009{
2010 u32 mtr_base_lo, mtr_base_hi;
2011 u32 so_base_lo, so_base_hi;
2012 u32 q_off, dma_qm_offset;
2013 u32 dma_qm_err_cfg;
2014
2015 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2016
2017 mtr_base_lo = lower_32_bits(CFG_BASE +
2018 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2019 mtr_base_hi = upper_32_bits(CFG_BASE +
2020 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2021 so_base_lo = lower_32_bits(CFG_BASE +
2022 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2023 so_base_hi = upper_32_bits(CFG_BASE +
2024 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2025
2026 q_off = dma_qm_offset + qman_id * 4;
2027
2028 if (qman_id < 4) {
2029 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2030 lower_32_bits(qman_base_addr));
2031 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2032 upper_32_bits(qman_base_addr));
2033
2034 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2035 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2036 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2037
2038 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2039 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2040 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2041 } else {
2042 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2043 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2044 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2045
2046 /* Configure RAZWI IRQ */
2047 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2048 if (hdev->stop_on_err) {
2049 dma_qm_err_cfg |=
2050 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2051 }
2052 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2053
2054 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2055 lower_32_bits(CFG_BASE +
2056 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2057 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2058 upper_32_bits(CFG_BASE +
2059 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2060 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2061 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2062 dma_id);
2063
2064 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2065 QM_ARB_ERR_MSG_EN_MASK);
2066
2067 /* Increase ARB WDT to support streams architecture */
2068 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2069 GAUDI_ARB_WDT_TIMEOUT);
2070
2071 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2072 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2073 QMAN_INTERNAL_MAKE_TRUSTED);
2074 }
2075
2076 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2077 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2078 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2079 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2080}
2081
2082static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2083{
2084 struct gaudi_device *gaudi = hdev->asic_specific;
2085 struct gaudi_internal_qman_info *q;
2086 u64 qman_base_addr;
2087 int i, j, dma_id, internal_q_index;
2088
2089 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2090 return;
2091
2092 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2093 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2094
2095 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2096 /*
2097 * Add the CPU queue in order to get the correct queue
2098 * number as all internal queue are placed after it
2099 */
2100 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2101
2102 q = &gaudi->internal_qmans[internal_q_index];
2103 qman_base_addr = (u64) q->pq_dma_addr;
2104 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2105 qman_base_addr);
2106 }
2107
2108 /* Initializing lower CP for HBM DMA QMAN */
2109 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2110
2111 gaudi_init_dma_core(hdev, dma_id);
2112
2113 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2114 }
2115
2116 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2117}
2118
2119static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2120 int qman_id, u64 qman_base_addr)
2121{
2122 u32 mtr_base_lo, mtr_base_hi;
2123 u32 so_base_lo, so_base_hi;
2124 u32 q_off, mme_id;
2125 u32 mme_qm_err_cfg;
2126
2127 mtr_base_lo = lower_32_bits(CFG_BASE +
2128 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2129 mtr_base_hi = upper_32_bits(CFG_BASE +
2130 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2131 so_base_lo = lower_32_bits(CFG_BASE +
2132 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2133 so_base_hi = upper_32_bits(CFG_BASE +
2134 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2135
2136 q_off = mme_offset + qman_id * 4;
2137
2138 if (qman_id < 4) {
2139 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2140 lower_32_bits(qman_base_addr));
2141 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2142 upper_32_bits(qman_base_addr));
2143
2144 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2145 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2146 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2147
2148 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2149 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2150 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2151 } else {
2152 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2153 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2154 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2155
2156 /* Configure RAZWI IRQ */
2157 mme_id = mme_offset /
2158 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2159
2160 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2161 if (hdev->stop_on_err) {
2162 mme_qm_err_cfg |=
2163 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2164 }
2165 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2166 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2167 lower_32_bits(CFG_BASE +
2168 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2169 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2170 upper_32_bits(CFG_BASE +
2171 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2172 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2173 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2174 mme_id);
2175
2176 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2177 QM_ARB_ERR_MSG_EN_MASK);
2178
2179 /* Increase ARB WDT to support streams architecture */
2180 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2181 GAUDI_ARB_WDT_TIMEOUT);
2182
2183 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2184 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2185 QMAN_INTERNAL_MAKE_TRUSTED);
2186 }
2187
2188 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2189 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2190 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2191 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2192}
2193
2194static void gaudi_init_mme_qmans(struct hl_device *hdev)
2195{
2196 struct gaudi_device *gaudi = hdev->asic_specific;
2197 struct gaudi_internal_qman_info *q;
2198 u64 qman_base_addr;
2199 u32 mme_offset;
2200 int i, internal_q_index;
2201
2202 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2203 return;
2204
2205 /*
2206 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2207 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2208 */
2209
2210 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2211
2212 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2213 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2214 q = &gaudi->internal_qmans[internal_q_index];
2215 qman_base_addr = (u64) q->pq_dma_addr;
2216 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2217 qman_base_addr);
2218 if (i == 3)
2219 mme_offset = 0;
2220 }
2221
2222 /* Initializing lower CP for MME QMANs */
2223 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2224 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2225 gaudi_init_mme_qman(hdev, 0, 4, 0);
2226
2227 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2228 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2229
2230 gaudi->hw_cap_initialized |= HW_CAP_MME;
2231}
2232
2233static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2234 int qman_id, u64 qman_base_addr)
2235{
2236 u32 mtr_base_lo, mtr_base_hi;
2237 u32 so_base_lo, so_base_hi;
2238 u32 q_off, tpc_id;
2239 u32 tpc_qm_err_cfg;
2240
2241 mtr_base_lo = lower_32_bits(CFG_BASE +
2242 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2243 mtr_base_hi = upper_32_bits(CFG_BASE +
2244 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2245 so_base_lo = lower_32_bits(CFG_BASE +
2246 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2247 so_base_hi = upper_32_bits(CFG_BASE +
2248 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2249
2250 q_off = tpc_offset + qman_id * 4;
2251
2252 if (qman_id < 4) {
2253 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2254 lower_32_bits(qman_base_addr));
2255 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2256 upper_32_bits(qman_base_addr));
2257
2258 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2259 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2260 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2261
2262 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2263 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2264 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2265 } else {
2266 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2267 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2268 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2269
2270 /* Configure RAZWI IRQ */
2271 tpc_id = tpc_offset /
2272 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2273
2274 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2275 if (hdev->stop_on_err) {
2276 tpc_qm_err_cfg |=
2277 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2278 }
2279
2280 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2281 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2282 lower_32_bits(CFG_BASE +
2283 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2284 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2285 upper_32_bits(CFG_BASE +
2286 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2287 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2288 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2289 tpc_id);
2290
2291 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2292 QM_ARB_ERR_MSG_EN_MASK);
2293
2294 /* Increase ARB WDT to support streams architecture */
2295 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2296 GAUDI_ARB_WDT_TIMEOUT);
2297
2298 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2299 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2300 QMAN_INTERNAL_MAKE_TRUSTED);
2301 }
2302
2303 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2304 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2305 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2306 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2307}
2308
2309static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2310{
2311 struct gaudi_device *gaudi = hdev->asic_specific;
2312 struct gaudi_internal_qman_info *q;
2313 u64 qman_base_addr;
2314 u32 so_base_hi, tpc_offset = 0;
2315 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2316 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2317 int i, tpc_id, internal_q_index;
2318
2319 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2320 return;
2321
2322 so_base_hi = upper_32_bits(CFG_BASE +
2323 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2324
2325 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2326 for (i = 0 ; i < QMAN_STREAMS ; i++) {
2327 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2328 tpc_id * QMAN_STREAMS + i;
2329 q = &gaudi->internal_qmans[internal_q_index];
2330 qman_base_addr = (u64) q->pq_dma_addr;
2331 gaudi_init_tpc_qman(hdev, tpc_offset, i,
2332 qman_base_addr);
2333
2334 if (i == 3) {
2335 /* Initializing lower CP for TPC QMAN */
2336 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2337
2338 /* Enable the QMAN and TPC channel */
2339 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2340 QMAN_TPC_ENABLE);
2341 }
2342 }
2343
2344 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2345 so_base_hi);
2346
2347 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2348
2349 gaudi->hw_cap_initialized |= 1 << (HW_CAP_TPC_SHIFT + tpc_id);
2350 }
2351}
2352
2353static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
2354{
2355 struct gaudi_device *gaudi = hdev->asic_specific;
2356
2357 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2358 return;
2359
2360 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
2361 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
2362 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
2363}
2364
2365static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
2366{
2367 struct gaudi_device *gaudi = hdev->asic_specific;
2368
2369 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2370 return;
2371
2372 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
2373 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
2374 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
2375 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
2376 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
2377}
2378
2379static void gaudi_disable_mme_qmans(struct hl_device *hdev)
2380{
2381 struct gaudi_device *gaudi = hdev->asic_specific;
2382
2383 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2384 return;
2385
2386 WREG32(mmMME2_QM_GLBL_CFG0, 0);
2387 WREG32(mmMME0_QM_GLBL_CFG0, 0);
2388}
2389
2390static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
2391{
2392 struct gaudi_device *gaudi = hdev->asic_specific;
2393 u32 tpc_offset = 0;
2394 int tpc_id;
2395
2396 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2397 return;
2398
2399 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2400 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
2401 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2402 }
2403}
2404
2405static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
2406{
2407 struct gaudi_device *gaudi = hdev->asic_specific;
2408
2409 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2410 return;
2411
2412 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
2413 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2414 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2415 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2416}
2417
2418static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
2419{
2420 struct gaudi_device *gaudi = hdev->asic_specific;
2421
2422 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2423 return;
2424
2425 /* Stop CPs of HBM DMA QMANs */
2426
2427 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2428 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2429 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2430 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2431 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2432}
2433
2434static void gaudi_stop_mme_qmans(struct hl_device *hdev)
2435{
2436 struct gaudi_device *gaudi = hdev->asic_specific;
2437
2438 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2439 return;
2440
2441 /* Stop CPs of MME QMANs */
2442 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2443 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2444}
2445
2446static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
2447{
2448 struct gaudi_device *gaudi = hdev->asic_specific;
2449
2450 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2451 return;
2452
2453 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2454 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2455 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2456 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2457 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2458 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2459 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2460 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2461}
2462
2463static void gaudi_pci_dma_stall(struct hl_device *hdev)
2464{
2465 struct gaudi_device *gaudi = hdev->asic_specific;
2466
2467 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2468 return;
2469
2470 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2471 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2472 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2473}
2474
2475static void gaudi_hbm_dma_stall(struct hl_device *hdev)
2476{
2477 struct gaudi_device *gaudi = hdev->asic_specific;
2478
2479 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2480 return;
2481
2482 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2483 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2484 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2485 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2486 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2487}
2488
2489static void gaudi_mme_stall(struct hl_device *hdev)
2490{
2491 struct gaudi_device *gaudi = hdev->asic_specific;
2492
2493 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2494 return;
2495
2496 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
2497 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2498 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2499 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2500 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2501 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2502 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2503 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2504 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2505 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2506 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2507 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2508 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2509 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2510 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2511 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2512 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2513}
2514
2515static void gaudi_tpc_stall(struct hl_device *hdev)
2516{
2517 struct gaudi_device *gaudi = hdev->asic_specific;
2518
2519 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2520 return;
2521
2522 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2523 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2524 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2525 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2526 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2527 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2528 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2529 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2530}
2531
2532static void gaudi_enable_clock_gating(struct hl_device *hdev)
2533{
2534 struct gaudi_device *gaudi = hdev->asic_specific;
2535 u32 qman_offset;
2536 int i;
2537
2538 if (!hdev->clock_gating)
2539 return;
2540
2541 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE)
2542 return;
2543
2544 /* In case we are during debug session, don't enable the clock gate
2545 * as it may interfere
2546 */
2547 if (hdev->in_debug)
2548 return;
2549
2550 for (i = 0, qman_offset = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2551 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2552 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
2553 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2554 QMAN_UPPER_CP_CGM_PWR_GATE_EN);
2555 }
2556
2557 for (; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2558 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2559 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
2560 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2561 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2562 }
2563
2564 WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
2565 WREG32(mmMME0_QM_CGM_CFG,
2566 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2567 WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
2568 WREG32(mmMME2_QM_CGM_CFG,
2569 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2570
2571 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2572 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
2573 QMAN_CGM1_PWR_GATE_EN);
2574 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
2575 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2576
2577 qman_offset += TPC_QMAN_OFFSET;
2578 }
2579
2580 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
2581}
2582
2583static void gaudi_disable_clock_gating(struct hl_device *hdev)
2584{
2585 struct gaudi_device *gaudi = hdev->asic_specific;
2586 u32 qman_offset;
2587 int i;
2588
2589 if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
2590 return;
2591
2592 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
2593 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
2594 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
2595
2596 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
2597 }
2598
2599 WREG32(mmMME0_QM_CGM_CFG, 0);
2600 WREG32(mmMME0_QM_CGM_CFG1, 0);
2601 WREG32(mmMME2_QM_CGM_CFG, 0);
2602 WREG32(mmMME2_QM_CGM_CFG1, 0);
2603
2604 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2605 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
2606 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
2607
2608 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
2609 }
2610
2611 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
2612}
2613
2614static void gaudi_enable_timestamp(struct hl_device *hdev)
2615{
2616 /* Disable the timestamp counter */
2617 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2618
2619 /* Zero the lower/upper parts of the 64-bit counter */
2620 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2621 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2622
2623 /* Enable the counter */
2624 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2625}
2626
2627static void gaudi_disable_timestamp(struct hl_device *hdev)
2628{
2629 /* Disable the timestamp counter */
2630 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2631}
2632
2633static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
2634{
2635 u32 wait_timeout_ms, cpu_timeout_ms;
2636
2637 dev_info(hdev->dev,
2638 "Halting compute engines and disabling interrupts\n");
2639
2640 if (hdev->pldm) {
2641 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2642 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2643 } else {
2644 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
2645 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
2646 }
2647
2648 if (hard_reset) {
2649 /*
2650 * I don't know what is the state of the CPU so make sure it is
2651 * stopped in any means necessary
2652 */
2653 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
2654 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
ebd8d122 2655 GAUDI_EVENT_HALT_MACHINE);
ac0ae6a9
OG
2656 msleep(cpu_timeout_ms);
2657 }
2658
2659 gaudi_stop_mme_qmans(hdev);
2660 gaudi_stop_tpc_qmans(hdev);
2661 gaudi_stop_hbm_dma_qmans(hdev);
2662 gaudi_stop_pci_dma_qmans(hdev);
2663
2664 gaudi_disable_clock_gating(hdev);
2665
2666 msleep(wait_timeout_ms);
2667
2668 gaudi_pci_dma_stall(hdev);
2669 gaudi_hbm_dma_stall(hdev);
2670 gaudi_tpc_stall(hdev);
2671 gaudi_mme_stall(hdev);
2672
2673 msleep(wait_timeout_ms);
2674
2675 gaudi_disable_mme_qmans(hdev);
2676 gaudi_disable_tpc_qmans(hdev);
2677 gaudi_disable_hbm_dma_qmans(hdev);
2678 gaudi_disable_pci_dma_qmans(hdev);
2679
2680 gaudi_disable_timestamp(hdev);
2681
2682 if (hard_reset)
2683 gaudi_disable_msi(hdev);
2684 else
2685 gaudi_sync_irqs(hdev);
2686}
2687
2688static int gaudi_mmu_init(struct hl_device *hdev)
2689{
2690 struct asic_fixed_properties *prop = &hdev->asic_prop;
2691 struct gaudi_device *gaudi = hdev->asic_specific;
2692 u64 hop0_addr;
2693 int rc, i;
2694
2695 if (!hdev->mmu_enable)
2696 return 0;
2697
2698 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
2699 return 0;
2700
2701 hdev->dram_supports_virtual_memory = false;
2702
2703 for (i = 0 ; i < prop->max_asid ; i++) {
2704 hop0_addr = prop->mmu_pgt_addr +
2705 (i * prop->mmu_hop_table_size);
2706
2707 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2708 if (rc) {
2709 dev_err(hdev->dev,
2710 "failed to set hop0 addr for asid %d\n", i);
2711 goto err;
2712 }
2713 }
2714
2715 /* init MMU cache manage page */
2716 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
2717 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2718
2719 hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
2720 VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK);
2721
2722 WREG32(mmMMU_UP_MMU_ENABLE, 1);
2723 WREG32(mmMMU_UP_SPI_MASK, 0xF);
2724
2725 WREG32(mmSTLB_HOP_CONFIGURATION,
2726 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
2727
cfd4176d
OS
2728 /*
2729 * The H/W expects the first PI after init to be 1. After wraparound
2730 * we'll write 0.
2731 */
2732 gaudi->mmu_cache_inv_pi = 1;
2733
ac0ae6a9
OG
2734 gaudi->hw_cap_initialized |= HW_CAP_MMU;
2735
2736 return 0;
2737
2738err:
2739 return rc;
2740}
2741
2742static int gaudi_load_firmware_to_device(struct hl_device *hdev)
2743{
2744 void __iomem *dst;
2745
2746 /* HBM scrambler must be initialized before pushing F/W to HBM */
2747 gaudi_init_scrambler_hbm(hdev);
2748
2749 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
2750
2751 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst);
2752}
2753
2754static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
2755{
2756 void __iomem *dst;
2757
2758 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
2759
2760 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst);
2761}
2762
2763static void gaudi_read_device_fw_version(struct hl_device *hdev,
2764 enum hl_fw_component fwc)
2765{
2766 const char *name;
2767 u32 ver_off;
2768 char *dest;
2769
2770 switch (fwc) {
2771 case FW_COMP_UBOOT:
2772 ver_off = RREG32(mmUBOOT_VER_OFFSET);
2773 dest = hdev->asic_prop.uboot_ver;
2774 name = "U-Boot";
2775 break;
2776 case FW_COMP_PREBOOT:
2777 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
2778 dest = hdev->asic_prop.preboot_ver;
2779 name = "Preboot";
2780 break;
2781 default:
2782 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
2783 return;
2784 }
2785
2786 ver_off &= ~((u32)SRAM_BASE_ADDR);
2787
2788 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
2789 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
2790 VERSION_MAX_LEN);
2791 } else {
2792 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
2793 name, ver_off);
2794 strcpy(dest, "unavailable");
2795 }
2796}
2797
2798static int gaudi_init_cpu(struct hl_device *hdev)
2799{
2800 struct gaudi_device *gaudi = hdev->asic_specific;
2801 int rc;
2802
2803 if (!hdev->cpu_enable)
2804 return 0;
2805
2806 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
2807 return 0;
2808
2809 /*
2810 * The device CPU works with 40 bits addresses.
2811 * This register sets the extension to 50 bits.
2812 */
2813 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
2814
2815 rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
2816 mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
2817 mmCPU_CMD_STATUS_TO_HOST,
2818 mmCPU_BOOT_ERR0,
2819 !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
2820 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
2821
2822 if (rc)
2823 return rc;
2824
2825 gaudi->hw_cap_initialized |= HW_CAP_CPU;
2826
2827 return 0;
2828}
2829
2830static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
2831{
2832 struct gaudi_device *gaudi = hdev->asic_specific;
2833 struct hl_eq *eq;
2834 u32 status;
2835 struct hl_hw_queue *cpu_pq =
2836 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
2837 int err;
2838
2839 if (!hdev->cpu_queues_enable)
2840 return 0;
2841
2842 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
2843 return 0;
2844
2845 eq = &hdev->event_queue;
2846
2847 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
2848 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
2849
2850 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
2851 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
2852
2853 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
2854 lower_32_bits(hdev->cpu_accessible_dma_address));
2855 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
2856 upper_32_bits(hdev->cpu_accessible_dma_address));
2857
2858 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
2859 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
2860 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
2861
2862 /* Used for EQ CI */
2863 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
2864
2865 WREG32(mmCPU_IF_PF_PQ_PI, 0);
2866
2867 if (gaudi->multi_msi_mode)
2868 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
2869 else
2870 WREG32(mmCPU_IF_QUEUE_INIT,
2871 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
2872
2873 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
2874
2875 err = hl_poll_timeout(
2876 hdev,
2877 mmCPU_IF_QUEUE_INIT,
2878 status,
2879 (status == PQ_INIT_STATUS_READY_FOR_HOST),
2880 1000,
2881 cpu_timeout);
2882
2883 if (err) {
2884 dev_err(hdev->dev,
2885 "Failed to communicate with ARM CPU (ArmCP timeout)\n");
2886 return -EIO;
2887 }
2888
2889 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
2890 return 0;
2891}
2892
2893static void gaudi_pre_hw_init(struct hl_device *hdev)
2894{
2895 /* Perform read from the device to make sure device is up */
2896 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
2897
2898 /*
2899 * Let's mark in the H/W that we have reached this point. We check
2900 * this value in the reset_before_init function to understand whether
2901 * we need to reset the chip before doing H/W init. This register is
2902 * cleared by the H/W upon H/W reset
2903 */
2904 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
2905
2906 /* Set the access through PCI bars (Linux driver only) as secured */
2907 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
2908 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
2909
2910 /* Perform read to flush the waiting writes to ensure configuration
2911 * was set in the device
2912 */
2913 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
2914
2915 if (hdev->axi_drain) {
2916 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG,
2917 1 << PCIE_WRAP_LBW_DRAIN_CFG_EN_SHIFT);
2918 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG,
2919 1 << PCIE_WRAP_HBW_DRAIN_CFG_EN_SHIFT);
2920
2921 /* Perform read to flush the DRAIN cfg */
2922 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
2923 } else {
2924 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG, 0);
2925 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG, 0);
2926
2927 /* Perform read to flush the DRAIN cfg */
2928 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
2929 }
2930
2931 /* Configure the reset registers. Must be done as early as possible
2932 * in case we fail during H/W initialization
2933 */
2934 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
2935 (CFG_RST_H_DMA_MASK |
2936 CFG_RST_H_MME_MASK |
2937 CFG_RST_H_SM_MASK |
2938 CFG_RST_H_TPC_MASK));
2939
2940 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
2941
2942 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
2943 (CFG_RST_H_HBM_MASK |
2944 CFG_RST_H_TPC_MASK |
2945 CFG_RST_H_NIC_MASK |
2946 CFG_RST_H_SM_MASK |
2947 CFG_RST_H_DMA_MASK |
2948 CFG_RST_H_MME_MASK |
2949 CFG_RST_H_CPU_MASK |
2950 CFG_RST_H_MMU_MASK));
2951
2952 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
2953 (CFG_RST_L_IF_MASK |
2954 CFG_RST_L_PSOC_MASK |
2955 CFG_RST_L_TPC_MASK));
2956}
2957
2958static int gaudi_hw_init(struct hl_device *hdev)
2959{
2960 int rc;
2961
2962 dev_info(hdev->dev, "Starting initialization of H/W\n");
2963
2964 gaudi_pre_hw_init(hdev);
2965
2966 gaudi_init_pci_dma_qmans(hdev);
2967
2968 gaudi_init_hbm_dma_qmans(hdev);
2969
2970 /*
2971 * Before pushing u-boot/linux to device, need to set the hbm bar to
2972 * base address of dram
2973 */
2974 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
2975 dev_err(hdev->dev,
2976 "failed to map HBM bar to DRAM base address\n");
2977 return -EIO;
2978 }
2979
2980 rc = gaudi_init_cpu(hdev);
2981 if (rc) {
2982 dev_err(hdev->dev, "failed to initialize CPU\n");
2983 return rc;
2984 }
2985
2986 /* SRAM scrambler must be initialized after CPU is running from HBM */
2987 gaudi_init_scrambler_sram(hdev);
2988
2989 /* This is here just in case we are working without CPU */
2990 gaudi_init_scrambler_hbm(hdev);
2991
2992 gaudi_init_golden_registers(hdev);
2993
2994 rc = gaudi_mmu_init(hdev);
2995 if (rc)
2996 return rc;
2997
3a3a5bf1
OS
2998 gaudi_init_security(hdev);
2999
ac0ae6a9
OG
3000 gaudi_init_mme_qmans(hdev);
3001
3002 gaudi_init_tpc_qmans(hdev);
3003
3004 gaudi_enable_clock_gating(hdev);
3005
3006 gaudi_enable_timestamp(hdev);
3007
3008 /* MSI must be enabled before CPU queues are initialized */
3009 rc = gaudi_enable_msi(hdev);
3010 if (rc)
3011 goto disable_queues;
3012
3013 /* must be called after MSI was enabled */
3014 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3015 if (rc) {
3016 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3017 rc);
3018 goto disable_msi;
3019 }
3020
3021 /* Perform read from the device to flush all configuration */
3022 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
3023
3024 return 0;
3025
3026disable_msi:
3027 gaudi_disable_msi(hdev);
3028disable_queues:
3029 gaudi_disable_mme_qmans(hdev);
3030 gaudi_disable_pci_dma_qmans(hdev);
3031
3032 return rc;
3033}
3034
3035static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3036{
3037 struct gaudi_device *gaudi = hdev->asic_specific;
3038 u32 status, reset_timeout_ms, boot_strap = 0;
3039
3040 if (hdev->pldm) {
3041 if (hard_reset)
3042 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
3043 else
3044 reset_timeout_ms = GAUDI_PLDM_SRESET_TIMEOUT_MSEC;
3045 } else {
3046 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
3047 }
3048
3049 if (hard_reset) {
3050 /* Tell ASIC not to re-initialize PCIe */
3051 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
3052
3053 boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
3054 /* H/W bug WA:
3055 * rdata[31:0] = strap_read_val;
3056 * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
3057 */
3058 boot_strap = (((boot_strap & 0x7FE00000) << 1) |
3059 (boot_strap & 0x001FFFFF));
3060 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
3061
3062 /* Restart BTL/BLR upon hard-reset */
3063 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
3064
3065 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
3066 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
3067 dev_info(hdev->dev,
3068 "Issued HARD reset command, going to wait %dms\n",
3069 reset_timeout_ms);
3070 } else {
3071 /* Don't restart BTL/BLR upon soft-reset */
3072 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 0);
3073
3074 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST,
3075 1 << PSOC_GLOBAL_CONF_SOFT_RST_IND_SHIFT);
3076 dev_info(hdev->dev,
3077 "Issued SOFT reset command, going to wait %dms\n",
3078 reset_timeout_ms);
3079 }
3080
3081 /*
3082 * After hard reset, we can't poll the BTM_FSM register because the PSOC
3083 * itself is in reset. Need to wait until the reset is deasserted
3084 */
3085 msleep(reset_timeout_ms);
3086
3087 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3088 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3089 dev_err(hdev->dev,
3090 "Timeout while waiting for device to reset 0x%x\n",
3091 status);
3092
3093 if (!hard_reset) {
3094 gaudi->hw_cap_initialized &= ~(HW_CAP_PCI_DMA | HW_CAP_MME |
3095 HW_CAP_TPC_MASK |
3096 HW_CAP_HBM_DMA);
3097
3098 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3099 GAUDI_EVENT_SOFT_RESET);
3100 return;
3101 }
3102
3103 /* We continue here only for hard-reset */
3104
3105 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
3106
3107 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3108 HW_CAP_HBM | HW_CAP_PCI_DMA |
3109 HW_CAP_MME | HW_CAP_TPC_MASK |
3110 HW_CAP_HBM_DMA | HW_CAP_PLL |
3111 HW_CAP_MMU |
3112 HW_CAP_SRAM_SCRAMBLER |
3113 HW_CAP_HBM_SCRAMBLER);
3114 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3115}
3116
3117static int gaudi_suspend(struct hl_device *hdev)
3118{
3119 int rc;
3120
3121 rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
3122 if (rc)
3123 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3124
3125 return rc;
3126}
3127
3128static int gaudi_resume(struct hl_device *hdev)
3129{
3130 return gaudi_init_iatu(hdev);
3131}
3132
3133static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
3134 u64 kaddress, phys_addr_t paddress, u32 size)
3135{
3136 int rc;
3137
3138 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
3139 VM_DONTCOPY | VM_NORESERVE;
3140
3141 rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT,
3142 size, vma->vm_page_prot);
3143 if (rc)
3144 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
3145
3146 return rc;
3147}
3148
3149static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
3150{
3151 struct gaudi_device *gaudi = hdev->asic_specific;
3152 u32 db_reg_offset, db_value, dma_qm_offset, q_off;
3153 int dma_id;
3154 bool invalid_queue = false;
3155
3156 switch (hw_queue_id) {
3157 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
3158 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
3159 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3160 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3161 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3162 break;
3163
3164 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
3165 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
3166 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3167 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3168 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3169 break;
3170
3171 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
3172 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
3173 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3174 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3175 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3176 break;
3177
3178 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
3179 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
3180 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3181 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3182 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3183 break;
3184
3185 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
3186 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
3187 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3188 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3189 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3190 break;
3191
3192 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
3193 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3];
3194 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3195 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3196 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3197 break;
3198
3199 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
3200 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
3201 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3202 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3203 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3204 break;
3205
3206 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
3207 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
3208 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3209 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3210 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3211 break;
3212
3213 case GAUDI_QUEUE_ID_CPU_PQ:
3214 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3215 db_reg_offset = mmCPU_IF_PF_PQ_PI;
3216 else
3217 invalid_queue = true;
3218 break;
3219
3220 case GAUDI_QUEUE_ID_MME_0_0:
3221 db_reg_offset = mmMME2_QM_PQ_PI_0;
3222 break;
3223
3224 case GAUDI_QUEUE_ID_MME_0_1:
3225 db_reg_offset = mmMME2_QM_PQ_PI_1;
3226 break;
3227
3228 case GAUDI_QUEUE_ID_MME_0_2:
3229 db_reg_offset = mmMME2_QM_PQ_PI_2;
3230 break;
3231
3232 case GAUDI_QUEUE_ID_MME_0_3:
3233 db_reg_offset = mmMME2_QM_PQ_PI_3;
3234 break;
3235
3236 case GAUDI_QUEUE_ID_MME_1_0:
3237 db_reg_offset = mmMME0_QM_PQ_PI_0;
3238 break;
3239
3240 case GAUDI_QUEUE_ID_MME_1_1:
3241 db_reg_offset = mmMME0_QM_PQ_PI_1;
3242 break;
3243
3244 case GAUDI_QUEUE_ID_MME_1_2:
3245 db_reg_offset = mmMME0_QM_PQ_PI_2;
3246 break;
3247
3248 case GAUDI_QUEUE_ID_MME_1_3:
3249 db_reg_offset = mmMME0_QM_PQ_PI_3;
3250 break;
3251
3252 case GAUDI_QUEUE_ID_TPC_0_0:
3253 db_reg_offset = mmTPC0_QM_PQ_PI_0;
3254 break;
3255
3256 case GAUDI_QUEUE_ID_TPC_0_1:
3257 db_reg_offset = mmTPC0_QM_PQ_PI_1;
3258 break;
3259
3260 case GAUDI_QUEUE_ID_TPC_0_2:
3261 db_reg_offset = mmTPC0_QM_PQ_PI_2;
3262 break;
3263
3264 case GAUDI_QUEUE_ID_TPC_0_3:
3265 db_reg_offset = mmTPC0_QM_PQ_PI_3;
3266 break;
3267
3268 case GAUDI_QUEUE_ID_TPC_1_0:
3269 db_reg_offset = mmTPC1_QM_PQ_PI_0;
3270 break;
3271
3272 case GAUDI_QUEUE_ID_TPC_1_1:
3273 db_reg_offset = mmTPC1_QM_PQ_PI_1;
3274 break;
3275
3276 case GAUDI_QUEUE_ID_TPC_1_2:
3277 db_reg_offset = mmTPC1_QM_PQ_PI_2;
3278 break;
3279
3280 case GAUDI_QUEUE_ID_TPC_1_3:
3281 db_reg_offset = mmTPC1_QM_PQ_PI_3;
3282 break;
3283
3284 case GAUDI_QUEUE_ID_TPC_2_0:
3285 db_reg_offset = mmTPC2_QM_PQ_PI_0;
3286 break;
3287
3288 case GAUDI_QUEUE_ID_TPC_2_1:
3289 db_reg_offset = mmTPC2_QM_PQ_PI_1;
3290 break;
3291
3292 case GAUDI_QUEUE_ID_TPC_2_2:
3293 db_reg_offset = mmTPC2_QM_PQ_PI_2;
3294 break;
3295
3296 case GAUDI_QUEUE_ID_TPC_2_3:
3297 db_reg_offset = mmTPC2_QM_PQ_PI_3;
3298 break;
3299
3300 case GAUDI_QUEUE_ID_TPC_3_0:
3301 db_reg_offset = mmTPC3_QM_PQ_PI_0;
3302 break;
3303
3304 case GAUDI_QUEUE_ID_TPC_3_1:
3305 db_reg_offset = mmTPC3_QM_PQ_PI_1;
3306 break;
3307
3308 case GAUDI_QUEUE_ID_TPC_3_2:
3309 db_reg_offset = mmTPC3_QM_PQ_PI_2;
3310 break;
3311
3312 case GAUDI_QUEUE_ID_TPC_3_3:
3313 db_reg_offset = mmTPC3_QM_PQ_PI_3;
3314 break;
3315
3316 case GAUDI_QUEUE_ID_TPC_4_0:
3317 db_reg_offset = mmTPC4_QM_PQ_PI_0;
3318 break;
3319
3320 case GAUDI_QUEUE_ID_TPC_4_1:
3321 db_reg_offset = mmTPC4_QM_PQ_PI_1;
3322 break;
3323
3324 case GAUDI_QUEUE_ID_TPC_4_2:
3325 db_reg_offset = mmTPC4_QM_PQ_PI_2;
3326 break;
3327
3328 case GAUDI_QUEUE_ID_TPC_4_3:
3329 db_reg_offset = mmTPC4_QM_PQ_PI_3;
3330 break;
3331
3332 case GAUDI_QUEUE_ID_TPC_5_0:
3333 db_reg_offset = mmTPC5_QM_PQ_PI_0;
3334 break;
3335
3336 case GAUDI_QUEUE_ID_TPC_5_1:
3337 db_reg_offset = mmTPC5_QM_PQ_PI_1;
3338 break;
3339
3340 case GAUDI_QUEUE_ID_TPC_5_2:
3341 db_reg_offset = mmTPC5_QM_PQ_PI_2;
3342 break;
3343
3344 case GAUDI_QUEUE_ID_TPC_5_3:
3345 db_reg_offset = mmTPC5_QM_PQ_PI_3;
3346 break;
3347
3348 case GAUDI_QUEUE_ID_TPC_6_0:
3349 db_reg_offset = mmTPC6_QM_PQ_PI_0;
3350 break;
3351
3352 case GAUDI_QUEUE_ID_TPC_6_1:
3353 db_reg_offset = mmTPC6_QM_PQ_PI_1;
3354 break;
3355
3356 case GAUDI_QUEUE_ID_TPC_6_2:
3357 db_reg_offset = mmTPC6_QM_PQ_PI_2;
3358 break;
3359
3360 case GAUDI_QUEUE_ID_TPC_6_3:
3361 db_reg_offset = mmTPC6_QM_PQ_PI_3;
3362 break;
3363
3364 case GAUDI_QUEUE_ID_TPC_7_0:
3365 db_reg_offset = mmTPC7_QM_PQ_PI_0;
3366 break;
3367
3368 case GAUDI_QUEUE_ID_TPC_7_1:
3369 db_reg_offset = mmTPC7_QM_PQ_PI_1;
3370 break;
3371
3372 case GAUDI_QUEUE_ID_TPC_7_2:
3373 db_reg_offset = mmTPC7_QM_PQ_PI_2;
3374 break;
3375
3376 case GAUDI_QUEUE_ID_TPC_7_3:
3377 db_reg_offset = mmTPC7_QM_PQ_PI_3;
3378 break;
3379
3380 default:
3381 invalid_queue = true;
3382 }
3383
3384 if (invalid_queue) {
3385 /* Should never get here */
3386 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
3387 hw_queue_id);
3388 return;
3389 }
3390
3391 db_value = pi;
3392
3393 /* ring the doorbell */
3394 WREG32(db_reg_offset, db_value);
3395
3396 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
3397 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3398 GAUDI_EVENT_PI_UPDATE);
3399}
3400
3401static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
3402 struct hl_bd *bd)
3403{
3404 __le64 *pbd = (__le64 *) bd;
3405
3406 /* The QMANs are on the host memory so a simple copy suffice */
3407 pqe[0] = pbd[0];
3408 pqe[1] = pbd[1];
3409}
3410
3411static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3412 dma_addr_t *dma_handle, gfp_t flags)
3413{
3414 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
3415 dma_handle, flags);
3416
3417 /* Shift to the device's base physical address of host memory */
3418 if (kernel_addr)
3419 *dma_handle += HOST_PHYS_BASE;
3420
3421 return kernel_addr;
3422}
3423
3424static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
3425 void *cpu_addr, dma_addr_t dma_handle)
3426{
3427 /* Cancel the device's base physical address of host memory */
3428 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3429
3430 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3431}
3432
3433static void *gaudi_get_int_queue_base(struct hl_device *hdev,
3434 u32 queue_id, dma_addr_t *dma_handle,
3435 u16 *queue_len)
3436{
3437 struct gaudi_device *gaudi = hdev->asic_specific;
3438 struct gaudi_internal_qman_info *q;
3439
3440 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
3441 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
3442 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3443 return NULL;
3444 }
3445
3446 q = &gaudi->internal_qmans[queue_id];
3447 *dma_handle = q->pq_dma_addr;
3448 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
3449
3450 return q->pq_kernel_addr;
3451}
3452
3453static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
3454 u16 len, u32 timeout, long *result)
3455{
3456 struct gaudi_device *gaudi = hdev->asic_specific;
3457
3458 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
3459 if (result)
3460 *result = 0;
3461 return 0;
3462 }
3463
3464 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
3465 timeout, result);
3466}
3467
3468static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3469{
3470 struct packet_msg_prot *fence_pkt;
3471 dma_addr_t pkt_dma_addr;
3472 u32 fence_val, tmp, timeout_usec;
3473 dma_addr_t fence_dma_addr;
3474 u32 *fence_ptr;
3475 int rc;
3476
3477 if (hdev->pldm)
3478 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
3479 else
3480 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
3481
3482 fence_val = GAUDI_QMAN0_FENCE_VAL;
3483
3484 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3485 &fence_dma_addr);
3486 if (!fence_ptr) {
3487 dev_err(hdev->dev,
3488 "Failed to allocate memory for queue testing\n");
3489 return -ENOMEM;
3490 }
3491
3492 *fence_ptr = 0;
3493
3494 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
3495 sizeof(struct packet_msg_prot),
3496 GFP_KERNEL, &pkt_dma_addr);
3497 if (!fence_pkt) {
3498 dev_err(hdev->dev,
3499 "Failed to allocate packet for queue testing\n");
3500 rc = -ENOMEM;
3501 goto free_fence_ptr;
3502 }
3503
3504 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
3505 (1 << GAUDI_PKT_CTL_EB_SHIFT) |
3506 (1 << GAUDI_PKT_CTL_MB_SHIFT);
3507 fence_pkt->ctl = cpu_to_le32(tmp);
3508 fence_pkt->value = cpu_to_le32(fence_val);
3509 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3510
3511 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3512 sizeof(struct packet_msg_prot),
3513 pkt_dma_addr);
3514 if (rc) {
3515 dev_err(hdev->dev,
3516 "Failed to send fence packet\n");
3517 goto free_pkt;
3518 }
3519
3520 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3521 1000, timeout_usec, true);
3522
3523 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3524
3525 if (rc == -ETIMEDOUT) {
3526 dev_err(hdev->dev,
3527 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3528 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3529 rc = -EIO;
3530 }
3531
3532free_pkt:
3533 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3534 pkt_dma_addr);
3535free_fence_ptr:
3536 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3537 fence_dma_addr);
3538 return rc;
3539}
3540
3541static int gaudi_test_cpu_queue(struct hl_device *hdev)
3542{
3543 struct gaudi_device *gaudi = hdev->asic_specific;
3544
3545 /*
3546 * check capability here as send_cpu_message() won't update the result
3547 * value if no capability
3548 */
3549 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
3550 return 0;
3551
3552 return hl_fw_test_cpu_queue(hdev);
3553}
3554
3555static int gaudi_test_queues(struct hl_device *hdev)
3556{
3557 int i, rc, ret_val = 0;
3558
3559 for (i = 0 ; i < HL_MAX_QUEUES ; i++) {
3560 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
3561 rc = gaudi_test_queue(hdev, i);
3562 if (rc)
3563 ret_val = -EINVAL;
3564 }
3565 }
3566
3567 rc = gaudi_test_cpu_queue(hdev);
3568 if (rc)
3569 ret_val = -EINVAL;
3570
3571 return ret_val;
3572}
3573
3574static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3575 gfp_t mem_flags, dma_addr_t *dma_handle)
3576{
3577 void *kernel_addr;
3578
3579 if (size > GAUDI_DMA_POOL_BLK_SIZE)
3580 return NULL;
3581
3582 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3583
3584 /* Shift to the device's base physical address of host memory */
3585 if (kernel_addr)
3586 *dma_handle += HOST_PHYS_BASE;
3587
3588 return kernel_addr;
3589}
3590
3591static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
3592 dma_addr_t dma_addr)
3593{
3594 /* Cancel the device's base physical address of host memory */
3595 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3596
3597 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3598}
3599
3600static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
3601 size_t size, dma_addr_t *dma_handle)
3602{
3603 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3604}
3605
3606static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
3607 size_t size, void *vaddr)
3608{
3609 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3610}
3611
3612static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3613 int nents, enum dma_data_direction dir)
3614{
3615 struct scatterlist *sg;
3616 int i;
3617
3618 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3619 return -ENOMEM;
3620
3621 /* Shift to the device's base physical address of host memory */
3622 for_each_sg(sgl, sg, nents, i)
3623 sg->dma_address += HOST_PHYS_BASE;
3624
3625 return 0;
3626}
3627
3628static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3629 int nents, enum dma_data_direction dir)
3630{
3631 struct scatterlist *sg;
3632 int i;
3633
3634 /* Cancel the device's base physical address of host memory */
3635 for_each_sg(sgl, sg, nents, i)
3636 sg->dma_address -= HOST_PHYS_BASE;
3637
3638 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3639}
3640
3641static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
3642 struct sg_table *sgt)
3643{
3644 struct scatterlist *sg, *sg_next_iter;
3645 u32 count, dma_desc_cnt;
3646 u64 len, len_next;
3647 dma_addr_t addr, addr_next;
3648
3649 dma_desc_cnt = 0;
3650
3651 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3652
3653 len = sg_dma_len(sg);
3654 addr = sg_dma_address(sg);
3655
3656 if (len == 0)
3657 break;
3658
3659 while ((count + 1) < sgt->nents) {
3660 sg_next_iter = sg_next(sg);
3661 len_next = sg_dma_len(sg_next_iter);
3662 addr_next = sg_dma_address(sg_next_iter);
3663
3664 if (len_next == 0)
3665 break;
3666
3667 if ((addr + len == addr_next) &&
3668 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3669 len += len_next;
3670 count++;
3671 sg = sg_next_iter;
3672 } else {
3673 break;
3674 }
3675 }
3676
3677 dma_desc_cnt++;
3678 }
3679
3680 return dma_desc_cnt * sizeof(struct packet_lin_dma);
3681}
3682
3683static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
3684 struct hl_cs_parser *parser,
3685 struct packet_lin_dma *user_dma_pkt,
3686 u64 addr, enum dma_data_direction dir)
3687{
3688 struct hl_userptr *userptr;
3689 int rc;
3690
3691 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3692 parser->job_userptr_list, &userptr))
3693 goto already_pinned;
3694
3695 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3696 if (!userptr)
3697 return -ENOMEM;
3698
3699 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3700 userptr);
3701 if (rc)
3702 goto free_userptr;
3703
3704 list_add_tail(&userptr->job_node, parser->job_userptr_list);
3705
3706 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3707 userptr->sgt->nents, dir);
3708 if (rc) {
3709 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3710 goto unpin_memory;
3711 }
3712
3713 userptr->dma_mapped = true;
3714 userptr->dir = dir;
3715
3716already_pinned:
3717 parser->patched_cb_size +=
3718 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
3719
3720 return 0;
3721
3722unpin_memory:
3723 hl_unpin_host_memory(hdev, userptr);
3724free_userptr:
3725 kfree(userptr);
3726 return rc;
3727}
3728
3729static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
3730 struct hl_cs_parser *parser,
3731 struct packet_lin_dma *user_dma_pkt,
3732 bool src_in_host)
3733{
3734 enum dma_data_direction dir;
3735 bool skip_host_mem_pin = false, user_memset;
3736 u64 addr;
3737 int rc = 0;
3738
3739 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
3740 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3741 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3742
3743 if (src_in_host) {
3744 if (user_memset)
3745 skip_host_mem_pin = true;
3746
3747 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
3748 dir = DMA_TO_DEVICE;
3749 addr = le64_to_cpu(user_dma_pkt->src_addr);
3750 } else {
3751 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
3752 dir = DMA_FROM_DEVICE;
3753 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3754 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3755 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3756 }
3757
3758 if (skip_host_mem_pin)
3759 parser->patched_cb_size += sizeof(*user_dma_pkt);
3760 else
3761 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
3762 addr, dir);
3763
3764 return rc;
3765}
3766
3767static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
3768 struct hl_cs_parser *parser,
3769 struct packet_lin_dma *user_dma_pkt)
3770{
3771 bool src_in_host = false;
3772 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
3773 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
3774 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
3775
3776 dev_dbg(hdev->dev, "DMA packet details:\n");
3777 dev_dbg(hdev->dev, "source == 0x%llx\n",
3778 le64_to_cpu(user_dma_pkt->src_addr));
3779 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
3780 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
3781
3782 /*
3783 * Special handling for DMA with size 0. Bypass all validations
3784 * because no transactions will be done except for WR_COMP, which
3785 * is not a security issue
3786 */
3787 if (!le32_to_cpu(user_dma_pkt->tsize)) {
3788 parser->patched_cb_size += sizeof(*user_dma_pkt);
3789 return 0;
3790 }
3791
3792 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3793 src_in_host = true;
3794
3795 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
3796 src_in_host);
3797}
3798
64536abc
OG
3799static int gaudi_validate_load_and_exe_pkt(struct hl_device *hdev,
3800 struct hl_cs_parser *parser,
3801 struct packet_load_and_exe *user_pkt)
3802{
3803 u32 cfg;
3804
3805 cfg = le32_to_cpu(user_pkt->cfg);
3806
3807 if (cfg & GAUDI_PKT_LOAD_AND_EXE_CFG_DST_MASK) {
3808 dev_err(hdev->dev,
3809 "User not allowed to use Load and Execute\n");
3810 return -EPERM;
3811 }
3812
3813 parser->patched_cb_size += sizeof(struct packet_load_and_exe);
3814
3815 return 0;
3816}
3817
ac0ae6a9
OG
3818static int gaudi_validate_cb(struct hl_device *hdev,
3819 struct hl_cs_parser *parser, bool is_mmu)
3820{
3821 u32 cb_parsed_length = 0;
3822 int rc = 0;
3823
3824 parser->patched_cb_size = 0;
3825
3826 /* cb_user_size is more than 0 so loop will always be executed */
3827 while (cb_parsed_length < parser->user_cb_size) {
3828 enum packet_id pkt_id;
3829 u16 pkt_size;
3830 struct gaudi_packet *user_pkt;
3831
3832 user_pkt = (struct gaudi_packet *) (uintptr_t)
3833 (parser->user_cb->kernel_address + cb_parsed_length);
3834
3835 pkt_id = (enum packet_id) (
3836 (le64_to_cpu(user_pkt->header) &
3837 PACKET_HEADER_PACKET_ID_MASK) >>
3838 PACKET_HEADER_PACKET_ID_SHIFT);
3839
3840 pkt_size = gaudi_packet_sizes[pkt_id];
3841 cb_parsed_length += pkt_size;
3842 if (cb_parsed_length > parser->user_cb_size) {
3843 dev_err(hdev->dev,
3844 "packet 0x%x is out of CB boundary\n", pkt_id);
3845 rc = -EINVAL;
3846 break;
3847 }
3848
3849 switch (pkt_id) {
3850 case PACKET_MSG_PROT:
3851 dev_err(hdev->dev,
3852 "User not allowed to use MSG_PROT\n");
3853 rc = -EPERM;
3854 break;
3855
3856 case PACKET_CP_DMA:
3857 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
3858 rc = -EPERM;
3859 break;
3860
3861 case PACKET_STOP:
3862 dev_err(hdev->dev, "User not allowed to use STOP\n");
3863 rc = -EPERM;
3864 break;
3865
64536abc
OG
3866 case PACKET_LOAD_AND_EXE:
3867 rc = gaudi_validate_load_and_exe_pkt(hdev, parser,
3868 (struct packet_load_and_exe *) user_pkt);
3869 break;
3870
ac0ae6a9
OG
3871 case PACKET_LIN_DMA:
3872 parser->contains_dma_pkt = true;
3873 if (is_mmu)
3874 parser->patched_cb_size += pkt_size;
3875 else
3876 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
3877 (struct packet_lin_dma *) user_pkt);
3878 break;
3879
3880 case PACKET_WREG_32:
3881 case PACKET_WREG_BULK:
3882 case PACKET_MSG_LONG:
3883 case PACKET_MSG_SHORT:
3884 case PACKET_REPEAT:
3885 case PACKET_FENCE:
3886 case PACKET_NOP:
3887 case PACKET_ARB_POINT:
ac0ae6a9
OG
3888 parser->patched_cb_size += pkt_size;
3889 break;
3890
3891 default:
3892 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
3893 pkt_id);
3894 rc = -EINVAL;
3895 break;
3896 }
3897
3898 if (rc)
3899 break;
3900 }
3901
3902 /*
3903 * The new CB should have space at the end for two MSG_PROT packets:
3904 * 1. A packet that will act as a completion packet
3905 * 2. A packet that will generate MSI-X interrupt
3906 */
3907 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
3908
3909 return rc;
3910}
3911
3912static int gaudi_patch_dma_packet(struct hl_device *hdev,
3913 struct hl_cs_parser *parser,
3914 struct packet_lin_dma *user_dma_pkt,
3915 struct packet_lin_dma *new_dma_pkt,
3916 u32 *new_dma_pkt_size)
3917{
3918 struct hl_userptr *userptr;
3919 struct scatterlist *sg, *sg_next_iter;
3920 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
3921 u64 len, len_next;
3922 dma_addr_t dma_addr, dma_addr_next;
3923 u64 device_memory_addr, addr;
3924 enum dma_data_direction dir;
3925 struct sg_table *sgt;
3926 bool src_in_host = false;
3927 bool skip_host_mem_pin = false;
3928 bool user_memset;
3929
3930 ctl = le32_to_cpu(user_dma_pkt->ctl);
3931
3932 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
3933 src_in_host = true;
3934
3935 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3936 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3937
3938 if (src_in_host) {
3939 addr = le64_to_cpu(user_dma_pkt->src_addr);
3940 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
3941 dir = DMA_TO_DEVICE;
3942 if (user_memset)
3943 skip_host_mem_pin = true;
3944 } else {
3945 addr = le64_to_cpu(user_dma_pkt->dst_addr);
3946 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
3947 dir = DMA_FROM_DEVICE;
3948 }
3949
3950 if ((!skip_host_mem_pin) &&
3951 (!hl_userptr_is_pinned(hdev, addr,
3952 le32_to_cpu(user_dma_pkt->tsize),
3953 parser->job_userptr_list, &userptr))) {
3954 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
3955 addr, user_dma_pkt->tsize);
3956 return -EFAULT;
3957 }
3958
3959 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
3960 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
3961 *new_dma_pkt_size = sizeof(*user_dma_pkt);
3962 return 0;
3963 }
3964
3965 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
3966
3967 sgt = userptr->sgt;
3968 dma_desc_cnt = 0;
3969
3970 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3971 len = sg_dma_len(sg);
3972 dma_addr = sg_dma_address(sg);
3973
3974 if (len == 0)
3975 break;
3976
3977 while ((count + 1) < sgt->nents) {
3978 sg_next_iter = sg_next(sg);
3979 len_next = sg_dma_len(sg_next_iter);
3980 dma_addr_next = sg_dma_address(sg_next_iter);
3981
3982 if (len_next == 0)
3983 break;
3984
3985 if ((dma_addr + len == dma_addr_next) &&
3986 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3987 len += len_next;
3988 count++;
3989 sg = sg_next_iter;
3990 } else {
3991 break;
3992 }
3993 }
3994
3995 new_dma_pkt->ctl = user_dma_pkt->ctl;
3996
3997 ctl = le32_to_cpu(user_dma_pkt->ctl);
3998 if (likely(dma_desc_cnt))
3999 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
4000 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
4001 new_dma_pkt->ctl = cpu_to_le32(ctl);
4002 new_dma_pkt->tsize = cpu_to_le32(len);
4003
4004 if (dir == DMA_TO_DEVICE) {
4005 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
4006 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
4007 } else {
4008 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
4009 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
4010 }
4011
4012 if (!user_memset)
4013 device_memory_addr += len;
4014 dma_desc_cnt++;
4015 new_dma_pkt++;
4016 }
4017
4018 if (!dma_desc_cnt) {
4019 dev_err(hdev->dev,
4020 "Error of 0 SG entries when patching DMA packet\n");
4021 return -EFAULT;
4022 }
4023
4024 /* Fix the last dma packet - wrcomp must be as user set it */
4025 new_dma_pkt--;
4026 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
4027
4028 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
4029
4030 return 0;
4031}
4032
4033static int gaudi_patch_cb(struct hl_device *hdev,
4034 struct hl_cs_parser *parser)
4035{
4036 u32 cb_parsed_length = 0;
4037 u32 cb_patched_cur_length = 0;
4038 int rc = 0;
4039
4040 /* cb_user_size is more than 0 so loop will always be executed */
4041 while (cb_parsed_length < parser->user_cb_size) {
4042 enum packet_id pkt_id;
4043 u16 pkt_size;
4044 u32 new_pkt_size = 0;
4045 struct gaudi_packet *user_pkt, *kernel_pkt;
4046
4047 user_pkt = (struct gaudi_packet *) (uintptr_t)
4048 (parser->user_cb->kernel_address + cb_parsed_length);
4049 kernel_pkt = (struct gaudi_packet *) (uintptr_t)
4050 (parser->patched_cb->kernel_address +
4051 cb_patched_cur_length);
4052
4053 pkt_id = (enum packet_id) (
4054 (le64_to_cpu(user_pkt->header) &
4055 PACKET_HEADER_PACKET_ID_MASK) >>
4056 PACKET_HEADER_PACKET_ID_SHIFT);
4057
4058 pkt_size = gaudi_packet_sizes[pkt_id];
4059 cb_parsed_length += pkt_size;
4060 if (cb_parsed_length > parser->user_cb_size) {
4061 dev_err(hdev->dev,
4062 "packet 0x%x is out of CB boundary\n", pkt_id);
4063 rc = -EINVAL;
4064 break;
4065 }
4066
4067 switch (pkt_id) {
4068 case PACKET_LIN_DMA:
4069 rc = gaudi_patch_dma_packet(hdev, parser,
4070 (struct packet_lin_dma *) user_pkt,
4071 (struct packet_lin_dma *) kernel_pkt,
4072 &new_pkt_size);
4073 cb_patched_cur_length += new_pkt_size;
4074 break;
4075
4076 case PACKET_MSG_PROT:
4077 dev_err(hdev->dev,
4078 "User not allowed to use MSG_PROT\n");
4079 rc = -EPERM;
4080 break;
4081
4082 case PACKET_CP_DMA:
4083 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4084 rc = -EPERM;
4085 break;
4086
4087 case PACKET_STOP:
4088 dev_err(hdev->dev, "User not allowed to use STOP\n");
4089 rc = -EPERM;
4090 break;
4091
4092 case PACKET_WREG_32:
4093 case PACKET_WREG_BULK:
4094 case PACKET_MSG_LONG:
4095 case PACKET_MSG_SHORT:
4096 case PACKET_REPEAT:
4097 case PACKET_FENCE:
4098 case PACKET_NOP:
4099 case PACKET_ARB_POINT:
4100 case PACKET_LOAD_AND_EXE:
4101 memcpy(kernel_pkt, user_pkt, pkt_size);
4102 cb_patched_cur_length += pkt_size;
4103 break;
4104
4105 default:
4106 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4107 pkt_id);
4108 rc = -EINVAL;
4109 break;
4110 }
4111
4112 if (rc)
4113 break;
4114 }
4115
4116 return rc;
4117}
4118
4119static int gaudi_parse_cb_mmu(struct hl_device *hdev,
4120 struct hl_cs_parser *parser)
4121{
4122 u64 patched_cb_handle;
4123 u32 patched_cb_size;
4124 struct hl_cb *user_cb;
4125 int rc;
4126
4127 /*
4128 * The new CB should have space at the end for two MSG_PROT pkt:
4129 * 1. A packet that will act as a completion packet
4130 * 2. A packet that will generate MSI interrupt
4131 */
4132 parser->patched_cb_size = parser->user_cb_size +
4133 sizeof(struct packet_msg_prot) * 2;
4134
4135 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
4136 parser->patched_cb_size,
4137 &patched_cb_handle, HL_KERNEL_ASID_ID);
4138
4139 if (rc) {
4140 dev_err(hdev->dev,
4141 "Failed to allocate patched CB for DMA CS %d\n",
4142 rc);
4143 return rc;
4144 }
4145
4146 patched_cb_handle >>= PAGE_SHIFT;
4147 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4148 (u32) patched_cb_handle);
4149 /* hl_cb_get should never fail here so use kernel WARN */
4150 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4151 (u32) patched_cb_handle);
4152 if (!parser->patched_cb) {
4153 rc = -EFAULT;
4154 goto out;
4155 }
4156
4157 /*
4158 * The check that parser->user_cb_size <= parser->user_cb->size was done
4159 * in validate_queue_index().
4160 */
4161 memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address,
4162 (void *) (uintptr_t) parser->user_cb->kernel_address,
4163 parser->user_cb_size);
4164
4165 patched_cb_size = parser->patched_cb_size;
4166
4167 /* Validate patched CB instead of user CB */
4168 user_cb = parser->user_cb;
4169 parser->user_cb = parser->patched_cb;
4170 rc = gaudi_validate_cb(hdev, parser, true);
4171 parser->user_cb = user_cb;
4172
4173 if (rc) {
4174 hl_cb_put(parser->patched_cb);
4175 goto out;
4176 }
4177
4178 if (patched_cb_size != parser->patched_cb_size) {
4179 dev_err(hdev->dev, "user CB size mismatch\n");
4180 hl_cb_put(parser->patched_cb);
4181 rc = -EINVAL;
4182 goto out;
4183 }
4184
4185out:
4186 /*
4187 * Always call cb destroy here because we still have 1 reference
4188 * to it by calling cb_get earlier. After the job will be completed,
4189 * cb_put will release it, but here we want to remove it from the
4190 * idr
4191 */
4192 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4193 patched_cb_handle << PAGE_SHIFT);
4194
4195 return rc;
4196}
4197
4198static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
4199 struct hl_cs_parser *parser)
4200{
4201 u64 patched_cb_handle;
4202 int rc;
4203
4204 rc = gaudi_validate_cb(hdev, parser, false);
4205
4206 if (rc)
4207 goto free_userptr;
4208
4209 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
4210 parser->patched_cb_size,
4211 &patched_cb_handle, HL_KERNEL_ASID_ID);
4212 if (rc) {
4213 dev_err(hdev->dev,
4214 "Failed to allocate patched CB for DMA CS %d\n", rc);
4215 goto free_userptr;
4216 }
4217
4218 patched_cb_handle >>= PAGE_SHIFT;
4219 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4220 (u32) patched_cb_handle);
4221 /* hl_cb_get should never fail here so use kernel WARN */
4222 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4223 (u32) patched_cb_handle);
4224 if (!parser->patched_cb) {
4225 rc = -EFAULT;
4226 goto out;
4227 }
4228
4229 rc = gaudi_patch_cb(hdev, parser);
4230
4231 if (rc)
4232 hl_cb_put(parser->patched_cb);
4233
4234out:
4235 /*
4236 * Always call cb destroy here because we still have 1 reference
4237 * to it by calling cb_get earlier. After the job will be completed,
4238 * cb_put will release it, but here we want to remove it from the
4239 * idr
4240 */
4241 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4242 patched_cb_handle << PAGE_SHIFT);
4243
4244free_userptr:
4245 if (rc)
4246 hl_userptr_delete_list(hdev, parser->job_userptr_list);
4247 return rc;
4248}
4249
4250static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
4251 struct hl_cs_parser *parser)
4252{
4253 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4254
4255 /* For internal queue jobs just check if CB address is valid */
4256 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4257 parser->user_cb_size,
4258 asic_prop->sram_user_base_address,
4259 asic_prop->sram_end_address))
4260 return 0;
4261
4262 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4263 parser->user_cb_size,
4264 asic_prop->dram_user_base_address,
4265 asic_prop->dram_end_address))
4266 return 0;
4267
4268 /* PMMU and HPMMU addresses are equal, check only one of them */
4269 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4270 parser->user_cb_size,
4271 asic_prop->pmmu.start_addr,
4272 asic_prop->pmmu.end_addr))
4273 return 0;
4274
4275 dev_err(hdev->dev,
4276 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
4277 parser->user_cb, parser->user_cb_size);
4278
4279 return -EFAULT;
4280}
4281
4282static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4283{
4284 struct gaudi_device *gaudi = hdev->asic_specific;
4285
4286 if (parser->queue_type == QUEUE_TYPE_INT)
4287 return gaudi_parse_cb_no_ext_queue(hdev, parser);
4288
4289 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
4290 return gaudi_parse_cb_mmu(hdev, parser);
4291 else
4292 return gaudi_parse_cb_no_mmu(hdev, parser);
4293}
4294
4295static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
4296 u64 kernel_address, u32 len,
4297 u64 cq_addr, u32 cq_val, u32 msi_vec,
4298 bool eb)
4299{
4300 struct gaudi_device *gaudi = hdev->asic_specific;
4301 struct packet_msg_prot *cq_pkt;
4302 u32 tmp;
4303
4304 cq_pkt = (struct packet_msg_prot *) (uintptr_t)
4305 (kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
4306
4307 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4308 (1 << GAUDI_PKT_CTL_MB_SHIFT);
4309
4310 if (eb)
4311 tmp |= (1 << GAUDI_PKT_CTL_EB_SHIFT);
4312
4313 cq_pkt->ctl = cpu_to_le32(tmp);
4314 cq_pkt->value = cpu_to_le32(cq_val);
4315 cq_pkt->addr = cpu_to_le64(cq_addr);
4316
4317 cq_pkt++;
4318
4319 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4320 (1 << GAUDI_PKT_CTL_MB_SHIFT);
4321 cq_pkt->ctl = cpu_to_le32(tmp);
4322 cq_pkt->value = cpu_to_le32(1);
4323
4324 if (!gaudi->multi_msi_mode)
4325 msi_vec = 0;
4326
4327 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
4328}
4329
4330static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
4331{
4332 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
4333}
4334
4335static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
4336 u32 size, u64 val)
4337{
4338 struct packet_lin_dma *lin_dma_pkt;
4339 struct hl_cs_job *job;
4340 u32 cb_size, ctl;
4341 struct hl_cb *cb;
4342 int rc;
4343
4344 cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
4345 if (!cb)
4346 return -EFAULT;
4347
4348 lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
4349 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4350 cb_size = sizeof(*lin_dma_pkt);
4351
4352 ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4353 (1 << GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4354 (1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) |
4355 (1 << GAUDI_PKT_CTL_RB_SHIFT) |
4356 (1 << GAUDI_PKT_CTL_MB_SHIFT));
4357 lin_dma_pkt->ctl = cpu_to_le32(ctl);
4358 lin_dma_pkt->src_addr = cpu_to_le64(val);
4359 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
4360 lin_dma_pkt->tsize = cpu_to_le32(size);
4361
4362 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4363 if (!job) {
4364 dev_err(hdev->dev, "Failed to allocate a new job\n");
4365 rc = -ENOMEM;
4366 goto release_cb;
4367 }
4368
4369 job->id = 0;
4370 job->user_cb = cb;
4371 job->user_cb->cs_cnt++;
4372 job->user_cb_size = cb_size;
4373 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
4374 job->patched_cb = job->user_cb;
4375 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
4376
4377 hl_debugfs_add_job(hdev, job);
4378
4379 rc = gaudi_send_job_on_qman0(hdev, job);
4380
4381 hl_debugfs_remove_job(hdev, job);
4382 kfree(job);
4383 cb->cs_cnt--;
4384
4385release_cb:
4386 hl_cb_put(cb);
4387 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4388
4389 return rc;
4390}
4391
4392static void gaudi_restore_sm_registers(struct hl_device *hdev)
4393{
4394 int i;
4395
4396 for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
4397 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4398 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4399 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4400 }
4401
4402 for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
4403 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4404 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4405 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4406 }
4407
4408 i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
4409
4410 for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
4411 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4412
4413 i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
4414
4415 for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
4416 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4417}
4418
4419static void gaudi_restore_dma_registers(struct hl_device *hdev)
4420{
4421 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
4422 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
4423 int i;
4424
4425 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4426 u64 sob_addr = CFG_BASE +
4427 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
4428 (i * sob_delta);
4429 u32 dma_offset = i * DMA_CORE_OFFSET;
4430
4431 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
4432 lower_32_bits(sob_addr));
4433 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
4434 upper_32_bits(sob_addr));
4435 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
4436
4437 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
4438 * modified by the user for SRAM reduction
4439 */
4440 if (i > 1)
4441 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
4442 0x00000001);
4443 }
4444}
4445
4446static void gaudi_restore_qm_registers(struct hl_device *hdev)
4447{
4448 u32 qman_offset;
4449 int i;
4450
4451 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4452 qman_offset = i * DMA_QMAN_OFFSET;
4453 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
4454 }
4455
4456 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
4457 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
4458 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
4459 }
4460
4461 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
4462 qman_offset = i * TPC_QMAN_OFFSET;
4463 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
4464 }
4465}
4466
4467static void gaudi_restore_user_registers(struct hl_device *hdev)
4468{
4469 gaudi_restore_sm_registers(hdev);
4470 gaudi_restore_dma_registers(hdev);
4471 gaudi_restore_qm_registers(hdev);
4472}
4473
4474static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
4475{
4476 struct asic_fixed_properties *prop = &hdev->asic_prop;
4477 u64 addr = prop->sram_user_base_address;
4478 u32 size = hdev->pldm ? 0x10000 :
4479 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4480 u64 val = 0x7777777777777777ull;
4481 int rc;
4482
4483 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4484 if (rc) {
4485 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4486 return rc;
4487 }
4488
4489 gaudi_mmu_prepare(hdev, asid);
4490
4491 gaudi_restore_user_registers(hdev);
4492
4493 return 0;
4494}
4495
4496static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
4497{
4498 struct asic_fixed_properties *prop = &hdev->asic_prop;
4499 struct gaudi_device *gaudi = hdev->asic_specific;
4500 u64 addr = prop->mmu_pgt_addr;
4501 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
4502
4503 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4504 return 0;
4505
4506 return gaudi_memset_device_memory(hdev, addr, size, 0);
4507}
4508
4509static void gaudi_restore_phase_topology(struct hl_device *hdev)
4510{
4511
4512}
4513
4514static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4515{
4516 struct asic_fixed_properties *prop = &hdev->asic_prop;
4517 struct gaudi_device *gaudi = hdev->asic_specific;
4518 u64 hbm_bar_addr;
4519 int rc = 0;
4520
4521 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4522 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4523 dev_err_ratelimited(hdev->dev,
4524 "Can't read register - clock gating is enabled!\n");
4525 rc = -EFAULT;
4526 } else {
4527 *val = RREG32(addr - CFG_BASE);
4528 }
4529 } else if ((addr >= SRAM_BASE_ADDR) &&
4530 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4531 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
4532 (addr - SRAM_BASE_ADDR));
4533 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4534 u64 bar_base_addr = DRAM_PHYS_BASE +
4535 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4536
4537 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4538 if (hbm_bar_addr != U64_MAX) {
4539 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
4540 (addr - bar_base_addr));
4541
4542 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4543 hbm_bar_addr);
4544 }
4545 if (hbm_bar_addr == U64_MAX)
4546 rc = -EIO;
4547 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4548 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4549 } else {
4550 rc = -EFAULT;
4551 }
4552
4553 return rc;
4554}
4555
4556static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4557{
4558 struct asic_fixed_properties *prop = &hdev->asic_prop;
4559 struct gaudi_device *gaudi = hdev->asic_specific;
4560 u64 hbm_bar_addr;
4561 int rc = 0;
4562
4563 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4564 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4565 dev_err_ratelimited(hdev->dev,
4566 "Can't write register - clock gating is enabled!\n");
4567 rc = -EFAULT;
4568 } else {
4569 WREG32(addr - CFG_BASE, val);
4570 }
4571 } else if ((addr >= SRAM_BASE_ADDR) &&
4572 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4573 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
4574 (addr - SRAM_BASE_ADDR));
4575 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4576 u64 bar_base_addr = DRAM_PHYS_BASE +
4577 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4578
4579 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4580 if (hbm_bar_addr != U64_MAX) {
4581 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
4582 (addr - bar_base_addr));
4583
4584 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4585 hbm_bar_addr);
4586 }
4587 if (hbm_bar_addr == U64_MAX)
4588 rc = -EIO;
4589 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4590 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4591 } else {
4592 rc = -EFAULT;
4593 }
4594
4595 return rc;
4596}
4597
4598static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4599{
4600 struct asic_fixed_properties *prop = &hdev->asic_prop;
4601 struct gaudi_device *gaudi = hdev->asic_specific;
4602 u64 hbm_bar_addr;
4603 int rc = 0;
4604
4605 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4606 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4607 dev_err_ratelimited(hdev->dev,
4608 "Can't read register - clock gating is enabled!\n");
4609 rc = -EFAULT;
4610 } else {
4611 u32 val_l = RREG32(addr - CFG_BASE);
4612 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4613
4614 *val = (((u64) val_h) << 32) | val_l;
4615 }
4616 } else if ((addr >= SRAM_BASE_ADDR) &&
4617 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4618 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
4619 (addr - SRAM_BASE_ADDR));
4620 } else if (addr <=
4621 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4622 u64 bar_base_addr = DRAM_PHYS_BASE +
4623 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4624
4625 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4626 if (hbm_bar_addr != U64_MAX) {
4627 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
4628 (addr - bar_base_addr));
4629
4630 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4631 hbm_bar_addr);
4632 }
4633 if (hbm_bar_addr == U64_MAX)
4634 rc = -EIO;
4635 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4636 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4637 } else {
4638 rc = -EFAULT;
4639 }
4640
4641 return rc;
4642}
4643
4644static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4645{
4646 struct asic_fixed_properties *prop = &hdev->asic_prop;
4647 struct gaudi_device *gaudi = hdev->asic_specific;
4648 u64 hbm_bar_addr;
4649 int rc = 0;
4650
4651 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4652 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4653 dev_err_ratelimited(hdev->dev,
4654 "Can't write register - clock gating is enabled!\n");
4655 rc = -EFAULT;
4656 } else {
4657 WREG32(addr - CFG_BASE, lower_32_bits(val));
4658 WREG32(addr + sizeof(u32) - CFG_BASE,
4659 upper_32_bits(val));
4660 }
4661 } else if ((addr >= SRAM_BASE_ADDR) &&
4662 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4663 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
4664 (addr - SRAM_BASE_ADDR));
4665 } else if (addr <=
4666 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4667 u64 bar_base_addr = DRAM_PHYS_BASE +
4668 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4669
4670 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4671 if (hbm_bar_addr != U64_MAX) {
4672 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4673 (addr - bar_base_addr));
4674
4675 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4676 hbm_bar_addr);
4677 }
4678 if (hbm_bar_addr == U64_MAX)
4679 rc = -EIO;
4680 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4681 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4682 } else {
4683 rc = -EFAULT;
4684 }
4685
4686 return rc;
4687}
4688
4689static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
4690{
4691 struct gaudi_device *gaudi = hdev->asic_specific;
4692
4693 if (hdev->hard_reset_pending)
4694 return U64_MAX;
4695
4696 return readq(hdev->pcie_bar[HBM_BAR_ID] +
4697 (addr - gaudi->hbm_bar_cur_addr));
4698}
4699
4700static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4701{
4702 struct gaudi_device *gaudi = hdev->asic_specific;
4703
4704 if (hdev->hard_reset_pending)
4705 return;
4706
4707 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4708 (addr - gaudi->hbm_bar_cur_addr));
4709}
4710
4711static void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
4712{
4713 /* mask to zero the MMBP and ASID bits */
4714 WREG32_AND(reg, ~0x7FF);
4715 WREG32_OR(reg, asid);
4716}
4717
4718static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
4719{
4720 struct gaudi_device *gaudi = hdev->asic_specific;
4721
4722 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4723 return;
4724
4725 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
4726 WARN(1, "asid %u is too big\n", asid);
4727 return;
4728 }
4729
4730 mutex_lock(&gaudi->clk_gate_mutex);
4731
4732 hdev->asic_funcs->disable_clock_gating(hdev);
4733
4734 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4735 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4736 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4737 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4738 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4739
4740 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4741 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4742 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4743 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4744 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4745
4746 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4747 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4748 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4749 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4750 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4751
4752 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4753 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4754 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4755 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4756 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4757
4758 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4759 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4760 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4761 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4762 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4763
4764 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4765 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4766 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4767 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4768 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4769
4770 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4771 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4772 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4773 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4774 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4775
4776 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4777 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4778 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4779 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4780 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4781
4782 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
4783 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
4784 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
4785 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
4786 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
4787 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
4788 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
4789 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
4790
4791 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4792 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4793 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4794 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4795 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4796 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
4797 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
4798
4799 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4800 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4801 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4802 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4803 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4804 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
4805 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
4806
4807 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4808 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4809 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4810 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4811 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4812 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
4813 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
4814
4815 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4816 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4817 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4818 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4819 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4820 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
4821 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
4822
4823 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4824 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4825 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4826 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4827 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4828 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
4829 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
4830
4831 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4832 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4833 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4834 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4835 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4836 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
4837 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
4838
4839 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4840 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4841 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
4842 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
4843 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
4844 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
4845 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
4846
4847 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
4848 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
4849 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
4850 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
4851 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
4852 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
4853 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
4854
4855 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4856 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4857 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4858 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4859 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4860 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4861 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4862 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4863 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4864 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4865
4866 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
4867 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
4868 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
4869 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
4870 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
4871 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
4872 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
4873 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
4874 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
4875 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
4876 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
4877 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
4878
4879 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
4880 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
4881
4882 hdev->asic_funcs->enable_clock_gating(hdev);
4883
4884 mutex_unlock(&gaudi->clk_gate_mutex);
4885}
4886
4887static int gaudi_send_job_on_qman0(struct hl_device *hdev,
4888 struct hl_cs_job *job)
4889{
4890 struct packet_msg_prot *fence_pkt;
4891 u32 *fence_ptr;
4892 dma_addr_t fence_dma_addr;
4893 struct hl_cb *cb;
4894 u32 tmp, timeout, dma_offset;
4895 int rc;
4896
4897 if (hdev->pldm)
4898 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
4899 else
4900 timeout = HL_DEVICE_TIMEOUT_USEC;
4901
4902 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
4903 dev_err_ratelimited(hdev->dev,
4904 "Can't send driver job on QMAN0 because the device is not idle\n");
4905 return -EBUSY;
4906 }
4907
4908 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
4909 &fence_dma_addr);
4910 if (!fence_ptr) {
4911 dev_err(hdev->dev,
4912 "Failed to allocate fence memory for QMAN0\n");
4913 return -ENOMEM;
4914 }
4915
4916 cb = job->patched_cb;
4917
4918 fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
4919 job->job_cb_size - sizeof(struct packet_msg_prot));
4920
4921 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4922 (1 << GAUDI_PKT_CTL_EB_SHIFT) |
4923 (1 << GAUDI_PKT_CTL_MB_SHIFT);
4924 fence_pkt->ctl = cpu_to_le32(tmp);
4925 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
4926 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
4927
4928 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
4929
4930 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
4931
4932 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
4933 job->job_cb_size, cb->bus_address);
4934 if (rc) {
4935 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
4936 goto free_fence_ptr;
4937 }
4938
4939 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
4940 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
4941 timeout, true);
4942
4943 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
4944
4945 if (rc == -ETIMEDOUT) {
4946 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
4947 goto free_fence_ptr;
4948 }
4949
4950free_fence_ptr:
4951 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
4952 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
4953
4954 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
4955 fence_dma_addr);
4956 return rc;
4957}
4958
ac0ae6a9
OG
4959static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
4960{
ebd8d122
OB
4961 if (event_type >= GAUDI_EVENT_SIZE)
4962 goto event_not_supported;
ac0ae6a9 4963
ebd8d122
OB
4964 if (!gaudi_irq_map_table[event_type].valid)
4965 goto event_not_supported;
4966
4967 snprintf(desc, size, gaudi_irq_map_table[event_type].name);
4968
4969 return;
4970
4971event_not_supported:
4972 snprintf(desc, size, "N/A");
ac0ae6a9
OG
4973}
4974
4975static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
4976 u32 x_y, bool is_write)
4977{
4978 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
4979
4980 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
4981 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
4982
4983 switch (x_y) {
4984 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
4985 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
4986 dma_id[0] = 0;
4987 dma_id[1] = 2;
4988 break;
4989 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
4990 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
4991 dma_id[0] = 1;
4992 dma_id[1] = 3;
4993 break;
4994 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
4995 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
4996 dma_id[0] = 4;
4997 dma_id[1] = 6;
4998 break;
4999 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5000 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5001 dma_id[0] = 5;
5002 dma_id[1] = 7;
5003 break;
5004 default:
5005 goto unknown_initiator;
5006 }
5007
5008 for (i = 0 ; i < 2 ; i++) {
5009 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
5010 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5011 }
5012
5013 switch (x_y) {
5014 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5015 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5016 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5017 return "DMA0";
5018 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5019 return "DMA2";
5020 else
5021 return "DMA0 or DMA2";
5022 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5023 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5024 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5025 return "DMA1";
5026 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5027 return "DMA3";
5028 else
5029 return "DMA1 or DMA3";
5030 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5031 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5032 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5033 return "DMA4";
5034 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5035 return "DMA6";
5036 else
5037 return "DMA4 or DMA6";
5038 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5039 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5040 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5041 return "DMA5";
5042 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5043 return "DMA7";
5044 else
5045 return "DMA5 or DMA7";
5046 }
5047
5048unknown_initiator:
5049 return "unknown initiator";
5050}
5051
5052static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
5053 bool is_write)
5054{
5055 u32 val, x_y, axi_id;
5056
5057 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
5058 RREG32(mmMMU_UP_RAZWI_READ_ID);
5059 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
5060 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
5061 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
5062 RAZWI_INITIATOR_AXI_ID_SHIFT);
5063
5064 switch (x_y) {
5065 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
5066 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5067 return "TPC0";
5068 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5069 return "NIC0";
5070 break;
5071 case RAZWI_INITIATOR_ID_X_Y_TPC1:
5072 return "TPC1";
5073 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
5074 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
5075 return "MME0";
5076 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
5077 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
5078 return "MME1";
5079 case RAZWI_INITIATOR_ID_X_Y_TPC2:
5080 return "TPC2";
5081 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
5082 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5083 return "TPC3";
5084 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
5085 return "PCI";
5086 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
5087 return "CPU";
5088 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
5089 return "PSOC";
5090 break;
5091 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5092 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5093 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5094 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5095 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5096 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5097 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5098 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5099 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
5100 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
5101 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5102 return "TPC4";
5103 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5104 return "NIC1";
5105 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5106 return "NIC2";
5107 break;
5108 case RAZWI_INITIATOR_ID_X_Y_TPC5:
5109 return "TPC5";
5110 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
5111 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
5112 return "MME2";
5113 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
5114 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
5115 return "MME3";
5116 case RAZWI_INITIATOR_ID_X_Y_TPC6:
5117 return "TPC6";
5118 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
5119 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5120 return "TPC7";
5121 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5122 return "NIC4";
5123 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5124 return "NIC5";
5125 break;
5126 default:
5127 break;
5128 }
5129
5130 dev_err(hdev->dev,
5131 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
5132 val,
5133 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
5134 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
5135 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
5136 RAZWI_INITIATOR_AXI_ID_MASK);
5137
5138 return "unknown initiator";
5139}
5140
5141static void gaudi_print_razwi_info(struct hl_device *hdev)
5142{
5143 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
5144 dev_err_ratelimited(hdev->dev,
5145 "RAZWI event caused by illegal write of %s\n",
5146 gaudi_get_razwi_initiator_name(hdev, true));
5147 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
5148 }
5149
5150 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
5151 dev_err_ratelimited(hdev->dev,
5152 "RAZWI event caused by illegal read of %s\n",
5153 gaudi_get_razwi_initiator_name(hdev, false));
5154 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
5155 }
5156}
5157
5158static void gaudi_print_mmu_error_info(struct hl_device *hdev)
5159{
5160 struct gaudi_device *gaudi = hdev->asic_specific;
5161 u64 addr;
5162 u32 val;
5163
5164 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5165 return;
5166
5167 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
5168 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5169 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
5170 addr <<= 32;
5171 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
5172
5173 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
5174 addr);
5175
5176 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
5177 }
5178
5179 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
5180 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5181 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
5182 addr <<= 32;
5183 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
5184
5185 dev_err_ratelimited(hdev->dev,
5186 "MMU access error on va 0x%llx\n", addr);
5187
5188 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
5189 }
5190}
5191
5192/*
5193 * +-------------------+------------------------------------------------------+
5194 * | Configuration Reg | Description |
5195 * | Address | |
5196 * +-------------------+------------------------------------------------------+
5197 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
5198 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
5199 * | |0xF34 memory wrappers 63:32 |
5200 * | |0xF38 memory wrappers 95:64 |
5201 * | |0xF3C memory wrappers 127:96 |
5202 * +-------------------+------------------------------------------------------+
5203 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
5204 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
5205 * | |0xF44 memory wrappers 63:32 |
5206 * | |0xF48 memory wrappers 95:64 |
5207 * | |0xF4C memory wrappers 127:96 |
5208 * +-------------------+------------------------------------------------------+
5209 */
5210static void gaudi_print_ecc_info_generic(struct hl_device *hdev,
5211 const char *block_name,
5212 u64 block_address, int num_memories,
5213 bool derr, bool disable_clock_gating)
5214{
5215 struct gaudi_device *gaudi = hdev->asic_specific;
5216 int num_mem_regs = num_memories / 32 + ((num_memories % 32) ? 1 : 0);
5217
5218 if (block_address >= CFG_BASE)
5219 block_address -= CFG_BASE;
5220
5221 if (derr)
5222 block_address += GAUDI_ECC_DERR0_OFFSET;
5223 else
5224 block_address += GAUDI_ECC_SERR0_OFFSET;
5225
5226 if (disable_clock_gating) {
5227 mutex_lock(&gaudi->clk_gate_mutex);
5228 hdev->asic_funcs->disable_clock_gating(hdev);
5229 }
5230
5231 switch (num_mem_regs) {
5232 case 1:
5233 dev_err(hdev->dev,
5234 "%s ECC indication: 0x%08x\n",
5235 block_name, RREG32(block_address));
5236 break;
5237 case 2:
5238 dev_err(hdev->dev,
5239 "%s ECC indication: 0x%08x 0x%08x\n",
5240 block_name,
5241 RREG32(block_address), RREG32(block_address + 4));
5242 break;
5243 case 3:
5244 dev_err(hdev->dev,
5245 "%s ECC indication: 0x%08x 0x%08x 0x%08x\n",
5246 block_name,
5247 RREG32(block_address), RREG32(block_address + 4),
5248 RREG32(block_address + 8));
5249 break;
5250 case 4:
5251 dev_err(hdev->dev,
5252 "%s ECC indication: 0x%08x 0x%08x 0x%08x 0x%08x\n",
5253 block_name,
5254 RREG32(block_address), RREG32(block_address + 4),
5255 RREG32(block_address + 8), RREG32(block_address + 0xc));
5256 break;
5257 default:
5258 break;
5259
5260 }
5261
5262 if (disable_clock_gating) {
5263 hdev->asic_funcs->enable_clock_gating(hdev);
5264 mutex_unlock(&gaudi->clk_gate_mutex);
5265 }
5266}
5267
5268static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
5269 const char *qm_name,
5270 u64 glbl_sts_addr,
5271 u64 arb_err_addr)
5272{
5273 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
5274 char reg_desc[32];
5275
5276 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
5277 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
5278 glbl_sts_clr_val = 0;
5279 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
5280
5281 if (!glbl_sts_val)
5282 continue;
5283
5284 if (i == QMAN_STREAMS)
5285 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
5286 else
5287 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
5288
5289 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
5290 if (glbl_sts_val & BIT(j)) {
5291 dev_err_ratelimited(hdev->dev,
5292 "%s %s. err cause: %s\n",
5293 qm_name, reg_desc,
5294 gaudi_qman_error_cause[j]);
5295 glbl_sts_clr_val |= BIT(j);
5296 }
5297 }
5298
5299 /* Write 1 clear errors */
5300 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
5301 }
5302
5303 arb_err_val = RREG32(arb_err_addr);
5304
5305 if (!arb_err_val)
5306 return;
5307
5308 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
5309 if (arb_err_val & BIT(j)) {
5310 dev_err_ratelimited(hdev->dev,
5311 "%s ARB_ERR. err cause: %s\n",
5312 qm_name,
5313 gaudi_qman_arb_error_cause[j]);
5314 }
5315 }
5316}
5317
5318static void gaudi_print_ecc_info(struct hl_device *hdev, u16 event_type)
5319{
5320 u64 block_address;
5321 u8 index;
5322 int num_memories;
5323 char desc[32];
5324 bool derr;
5325 bool disable_clock_gating;
5326
5327 switch (event_type) {
5328 case GAUDI_EVENT_PCIE_CORE_SERR:
5329 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_CORE");
5330 block_address = mmPCIE_CORE_BASE;
5331 num_memories = 51;
5332 derr = false;
5333 disable_clock_gating = false;
5334 break;
5335 case GAUDI_EVENT_PCIE_CORE_DERR:
5336 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_CORE");
5337 block_address = mmPCIE_CORE_BASE;
5338 num_memories = 51;
5339 derr = true;
5340 disable_clock_gating = false;
5341 break;
5342 case GAUDI_EVENT_PCIE_IF_SERR:
5343 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_WRAP");
5344 block_address = mmPCIE_WRAP_BASE;
5345 num_memories = 11;
5346 derr = false;
5347 disable_clock_gating = false;
5348 break;
5349 case GAUDI_EVENT_PCIE_IF_DERR:
5350 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_WRAP");
5351 block_address = mmPCIE_WRAP_BASE;
5352 num_memories = 11;
5353 derr = true;
5354 disable_clock_gating = false;
5355 break;
5356 case GAUDI_EVENT_PCIE_PHY_SERR:
5357 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_PHY");
5358 block_address = mmPCIE_PHY_BASE;
5359 num_memories = 4;
5360 derr = false;
5361 disable_clock_gating = false;
5362 break;
5363 case GAUDI_EVENT_PCIE_PHY_DERR:
5364 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_PHY");
5365 block_address = mmPCIE_PHY_BASE;
5366 num_memories = 4;
5367 derr = true;
5368 disable_clock_gating = false;
5369 break;
5370 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5371 index = event_type - GAUDI_EVENT_TPC0_SERR;
5372 block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5373 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC", index);
5374 num_memories = 90;
5375 derr = false;
5376 disable_clock_gating = true;
5377 break;
5378 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5379 index = event_type - GAUDI_EVENT_TPC0_DERR;
5380 block_address =
5381 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5382 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC", index);
5383 num_memories = 90;
5384 derr = true;
5385 disable_clock_gating = true;
5386 break;
5387 case GAUDI_EVENT_MME0_ACC_SERR:
5388 case GAUDI_EVENT_MME1_ACC_SERR:
5389 case GAUDI_EVENT_MME2_ACC_SERR:
5390 case GAUDI_EVENT_MME3_ACC_SERR:
5391 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
5392 block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5393 snprintf(desc, ARRAY_SIZE(desc), "MME%d_ACC", index);
5394 num_memories = 128;
5395 derr = false;
5396 disable_clock_gating = true;
5397 break;
5398 case GAUDI_EVENT_MME0_ACC_DERR:
5399 case GAUDI_EVENT_MME1_ACC_DERR:
5400 case GAUDI_EVENT_MME2_ACC_DERR:
5401 case GAUDI_EVENT_MME3_ACC_DERR:
5402 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
5403 block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5404 snprintf(desc, ARRAY_SIZE(desc), "MME%d_ACC", index);
5405 num_memories = 128;
5406 derr = true;
5407 disable_clock_gating = true;
5408 break;
5409 case GAUDI_EVENT_MME0_SBAB_SERR:
5410 case GAUDI_EVENT_MME1_SBAB_SERR:
5411 case GAUDI_EVENT_MME2_SBAB_SERR:
5412 case GAUDI_EVENT_MME3_SBAB_SERR:
5413 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
5414 block_address = mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5415 snprintf(desc, ARRAY_SIZE(desc), "MME%d_SBAB", index);
5416 num_memories = 33;
5417 derr = false;
5418 disable_clock_gating = true;
5419 break;
5420 case GAUDI_EVENT_MME0_SBAB_DERR:
5421 case GAUDI_EVENT_MME1_SBAB_DERR:
5422 case GAUDI_EVENT_MME2_SBAB_DERR:
5423 case GAUDI_EVENT_MME3_SBAB_DERR:
5424 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
5425 block_address = mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5426 snprintf(desc, ARRAY_SIZE(desc), "MME%d_SBAB", index);
5427 num_memories = 33;
5428 derr = true;
5429 disable_clock_gating = true;
5430 break;
5431 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5432 index = event_type - GAUDI_EVENT_DMA0_SERR_ECC;
5433 block_address = mmDMA0_CORE_BASE + index * DMA_CORE_OFFSET;
5434 snprintf(desc, ARRAY_SIZE(desc), "DMA%d_CORE", index);
5435 num_memories = 16;
5436 derr = false;
5437 disable_clock_gating = false;
5438 break;
5439 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5440 index = event_type - GAUDI_EVENT_DMA0_DERR_ECC;
5441 block_address = mmDMA0_CORE_BASE + index * DMA_CORE_OFFSET;
5442 snprintf(desc, ARRAY_SIZE(desc), "DMA%d_CORE", index);
5443 num_memories = 16;
5444 derr = true;
5445 disable_clock_gating = false;
5446 break;
5447 case GAUDI_EVENT_CPU_IF_ECC_SERR:
5448 block_address = mmCPU_IF_BASE;
5449 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5450 num_memories = 4;
5451 derr = false;
5452 disable_clock_gating = false;
5453 break;
5454 case GAUDI_EVENT_CPU_IF_ECC_DERR:
5455 block_address = mmCPU_IF_BASE;
5456 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5457 num_memories = 4;
5458 derr = true;
5459 disable_clock_gating = false;
5460 break;
5461 case GAUDI_EVENT_PSOC_MEM_SERR:
5462 block_address = mmPSOC_GLOBAL_CONF_BASE;
5463 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5464 num_memories = 4;
5465 derr = false;
5466 disable_clock_gating = false;
5467 break;
5468 case GAUDI_EVENT_PSOC_MEM_DERR:
5469 block_address = mmPSOC_GLOBAL_CONF_BASE;
5470 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5471 num_memories = 4;
5472 derr = true;
5473 disable_clock_gating = false;
5474 break;
5475 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5476 block_address = mmPSOC_CS_TRACE_BASE;
5477 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5478 num_memories = 2;
5479 derr = false;
5480 disable_clock_gating = false;
5481 break;
5482 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5483 block_address = mmPSOC_CS_TRACE_BASE;
5484 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5485 num_memories = 2;
5486 derr = true;
5487 disable_clock_gating = false;
5488 break;
5489 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5490 index = event_type - GAUDI_EVENT_SRAM0_SERR;
5491 block_address =
5492 mmSRAM_Y0_X0_BANK_BASE + index * SRAM_BANK_OFFSET;
5493 snprintf(desc, ARRAY_SIZE(desc), "SRAM%d", index);
5494 num_memories = 2;
5495 derr = false;
5496 disable_clock_gating = false;
5497 break;
5498 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5499 index = event_type - GAUDI_EVENT_SRAM0_DERR;
5500 block_address =
5501 mmSRAM_Y0_X0_BANK_BASE + index * SRAM_BANK_OFFSET;
5502 snprintf(desc, ARRAY_SIZE(desc), "SRAM%d", index);
5503 num_memories = 2;
5504 derr = true;
5505 disable_clock_gating = false;
5506 break;
5507 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5508 index = event_type - GAUDI_EVENT_DMA_IF0_SERR;
5509 block_address = mmDMA_IF_W_S_BASE +
5510 index * (mmDMA_IF_E_S_BASE - mmDMA_IF_W_S_BASE);
5511 snprintf(desc, ARRAY_SIZE(desc), "DMA_IF%d", index);
5512 num_memories = 60;
5513 derr = false;
5514 disable_clock_gating = false;
5515 break;
5516 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5517 index = event_type - GAUDI_EVENT_DMA_IF0_DERR;
5518 block_address = mmDMA_IF_W_S_BASE +
5519 index * (mmDMA_IF_E_S_BASE - mmDMA_IF_W_S_BASE);
5520 snprintf(desc, ARRAY_SIZE(desc), "DMA_IF%d", index);
5521 derr = true;
5522 num_memories = 60;
5523 disable_clock_gating = false;
5524 break;
5525 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5526 index = event_type - GAUDI_EVENT_HBM_0_SERR;
5527 /* HBM Registers are at different offsets */
5528 block_address = mmHBM0_BASE + 0x8000 +
5529 index * (mmHBM1_BASE - mmHBM0_BASE);
5530 snprintf(desc, ARRAY_SIZE(desc), "HBM%d", index);
5531 derr = false;
5532 num_memories = 64;
5533 disable_clock_gating = false;
5534 break;
5535 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5536 index = event_type - GAUDI_EVENT_HBM_0_SERR;
5537 /* HBM Registers are at different offsets */
5538 block_address = mmHBM0_BASE + 0x8000 +
5539 index * (mmHBM1_BASE - mmHBM0_BASE);
5540 snprintf(desc, ARRAY_SIZE(desc), "HBM%d", index);
5541 derr = true;
5542 num_memories = 64;
5543 disable_clock_gating = false;
5544 break;
5545 default:
5546 return;
5547 }
5548
5549 gaudi_print_ecc_info_generic(hdev, desc, block_address, num_memories,
5550 derr, disable_clock_gating);
5551}
5552
5553static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
5554{
5555 u64 glbl_sts_addr, arb_err_addr;
5556 u8 index;
5557 char desc[32];
5558
5559 switch (event_type) {
5560 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5561 index = event_type - GAUDI_EVENT_TPC0_QM;
5562 glbl_sts_addr =
5563 mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
5564 arb_err_addr =
5565 mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
5566 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
5567 break;
5568 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5569 index = event_type - GAUDI_EVENT_MME0_QM;
5570 glbl_sts_addr =
5571 mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
5572 arb_err_addr =
5573 mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
5574 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
5575 break;
5576 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5577 index = event_type - GAUDI_EVENT_DMA0_QM;
5578 glbl_sts_addr =
5579 mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
5580 arb_err_addr =
5581 mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
5582 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
5583 break;
5584 default:
5585 return;
5586 }
5587
5588 gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
5589}
5590
5591static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
5592 bool razwi)
5593{
ebd8d122 5594 char desc[64] = "";
ac0ae6a9
OG
5595
5596 gaudi_get_event_desc(event_type, desc, sizeof(desc));
5597 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
5598 event_type, desc);
5599
5600 gaudi_print_ecc_info(hdev, event_type);
5601
5602 if (razwi) {
5603 gaudi_print_razwi_info(hdev);
5604 gaudi_print_mmu_error_info(hdev);
5605 }
5606}
5607
ac0ae6a9
OG
5608static int gaudi_soft_reset_late_init(struct hl_device *hdev)
5609{
ebd8d122
OB
5610 struct gaudi_device *gaudi = hdev->asic_specific;
5611
ac0ae6a9
OG
5612 /* Unmask all IRQs since some could have been received
5613 * during the soft reset
5614 */
ebd8d122 5615 return hl_fw_unmask_irq_arr(hdev, gaudi->events, sizeof(gaudi->events));
ac0ae6a9
OG
5616}
5617
5618static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)
5619{
5620 int ch, err = 0;
5621 u32 base, val, val2;
5622
5623 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
5624 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
5625 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
5626 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5627 if (val) {
5628 err = 1;
5629 dev_err(hdev->dev,
5630 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5631 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
5632 (val >> 2) & 0x1, (val >> 3) & 0x1,
5633 (val >> 4) & 0x1);
5634
5635 val2 = RREG32(base + ch * 0x1000 + 0x060);
5636 dev_err(hdev->dev,
5637 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5638 device, ch * 2,
5639 RREG32(base + ch * 0x1000 + 0x064),
5640 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5641 (val2 & 0xFF0000) >> 16,
5642 (val2 & 0xFF000000) >> 24);
5643 }
5644
5645 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
5646 val = (val & 0xFF) | ((val >> 8) & 0xFF);
5647 if (val) {
5648 err = 1;
5649 dev_err(hdev->dev,
5650 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
5651 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
5652 (val >> 2) & 0x1, (val >> 3) & 0x1,
5653 (val >> 4) & 0x1);
5654
5655 val2 = RREG32(base + ch * 0x1000 + 0x070);
5656 dev_err(hdev->dev,
5657 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
5658 device, ch * 2 + 1,
5659 RREG32(base + ch * 0x1000 + 0x074),
5660 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
5661 (val2 & 0xFF0000) >> 16,
5662 (val2 & 0xFF000000) >> 24);
5663 }
5664
5665 /* Clear interrupts */
5666 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
5667 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
5668 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
5669 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
5670 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
5671 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
5672 }
5673
5674 val = RREG32(base + 0x8F30);
5675 val2 = RREG32(base + 0x8F34);
5676 if (val | val2) {
5677 err = 1;
5678 dev_err(hdev->dev,
5679 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
5680 device, val, val2);
5681 }
5682 val = RREG32(base + 0x8F40);
5683 val2 = RREG32(base + 0x8F44);
5684 if (val | val2) {
5685 err = 1;
5686 dev_err(hdev->dev,
5687 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
5688 device, val, val2);
5689 }
5690
5691 return err;
5692}
5693
5694static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
5695{
5696 switch (hbm_event_type) {
5697 case GAUDI_EVENT_HBM0_SPI_0:
5698 case GAUDI_EVENT_HBM0_SPI_1:
5699 return 0;
5700 case GAUDI_EVENT_HBM1_SPI_0:
5701 case GAUDI_EVENT_HBM1_SPI_1:
5702 return 1;
5703 case GAUDI_EVENT_HBM2_SPI_0:
5704 case GAUDI_EVENT_HBM2_SPI_1:
5705 return 2;
5706 case GAUDI_EVENT_HBM3_SPI_0:
5707 case GAUDI_EVENT_HBM3_SPI_1:
5708 return 3;
5709 default:
5710 break;
5711 }
5712
5713 /* Should never happen */
5714 return 0;
5715}
5716
5717static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
5718 char *interrupt_name)
5719{
5720 struct gaudi_device *gaudi = hdev->asic_specific;
5721 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
5722 bool soft_reset_required = false;
5723
5724 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
5725 * gating, and thus cannot be done in ArmCP and should be done instead
5726 * by the driver.
5727 */
5728
5729 mutex_lock(&gaudi->clk_gate_mutex);
5730
5731 hdev->asic_funcs->disable_clock_gating(hdev);
5732
5733 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
5734 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
5735
5736 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
5737 if (tpc_interrupts_cause & BIT(i)) {
5738 dev_err_ratelimited(hdev->dev,
5739 "TPC%d_%s interrupt cause: %s\n",
5740 tpc_id, interrupt_name,
5741 gaudi_tpc_interrupts_cause[i]);
5742 /* If this is QM error, we need to soft-reset */
5743 if (i == 15)
5744 soft_reset_required = true;
5745 }
5746
5747 /* Clear interrupts */
5748 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
5749
5750 hdev->asic_funcs->enable_clock_gating(hdev);
5751
5752 mutex_unlock(&gaudi->clk_gate_mutex);
5753
5754 return soft_reset_required;
5755}
5756
5757static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
5758{
5759 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
5760}
5761
5762static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
5763{
5764 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
5765}
5766
5767static void gaudi_print_clk_change_info(struct hl_device *hdev,
5768 u16 event_type)
5769{
5770 switch (event_type) {
5771 case GAUDI_EVENT_FIX_POWER_ENV_S:
5772 dev_info_ratelimited(hdev->dev,
5773 "Clock throttling due to power consumption\n");
5774 break;
5775
5776 case GAUDI_EVENT_FIX_POWER_ENV_E:
5777 dev_info_ratelimited(hdev->dev,
5778 "Power envelop is safe, back to optimal clock\n");
5779 break;
5780
5781 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
5782 dev_info_ratelimited(hdev->dev,
5783 "Clock throttling due to overheating\n");
5784 break;
5785
5786 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
5787 dev_info_ratelimited(hdev->dev,
5788 "Thermal envelop is safe, back to optimal clock\n");
5789 break;
5790
5791 default:
5792 dev_err(hdev->dev, "Received invalid clock change event %d\n",
5793 event_type);
5794 break;
5795 }
5796}
5797
5798static void gaudi_handle_eqe(struct hl_device *hdev,
5799 struct hl_eq_entry *eq_entry)
5800{
5801 struct gaudi_device *gaudi = hdev->asic_specific;
5802 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
5803 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
5804 >> EQ_CTL_EVENT_TYPE_SHIFT);
5805 u8 cause;
66446820 5806 bool reset_required;
ac0ae6a9
OG
5807
5808 gaudi->events_stat[event_type]++;
5809 gaudi->events_stat_aggregate[event_type]++;
5810
5811 switch (event_type) {
5812 case GAUDI_EVENT_PCIE_CORE_DERR:
5813 case GAUDI_EVENT_PCIE_IF_DERR:
5814 case GAUDI_EVENT_PCIE_PHY_DERR:
5815 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5816 case GAUDI_EVENT_MME0_ACC_DERR:
5817 case GAUDI_EVENT_MME0_SBAB_DERR:
5818 case GAUDI_EVENT_MME1_ACC_DERR:
5819 case GAUDI_EVENT_MME1_SBAB_DERR:
5820 case GAUDI_EVENT_MME2_ACC_DERR:
5821 case GAUDI_EVENT_MME2_SBAB_DERR:
5822 case GAUDI_EVENT_MME3_ACC_DERR:
5823 case GAUDI_EVENT_MME3_SBAB_DERR:
5824 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5825 fallthrough;
5826 case GAUDI_EVENT_CPU_IF_ECC_DERR:
5827 case GAUDI_EVENT_PSOC_MEM_DERR:
5828 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5829 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5830 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5831 fallthrough;
5832 case GAUDI_EVENT_GIC500:
5833 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5834 case GAUDI_EVENT_MMU_DERR:
5835 case GAUDI_EVENT_AXI_ECC:
5836 case GAUDI_EVENT_L2_RAM_ECC:
5837 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
5838 gaudi_print_irq_info(hdev, event_type, false);
5839 if (hdev->hard_reset_on_fw_events)
5840 hl_device_reset(hdev, true, false);
5841 break;
5842
5843 case GAUDI_EVENT_HBM0_SPI_0:
5844 case GAUDI_EVENT_HBM1_SPI_0:
5845 case GAUDI_EVENT_HBM2_SPI_0:
5846 case GAUDI_EVENT_HBM3_SPI_0:
5847 gaudi_print_irq_info(hdev, event_type, false);
5848 gaudi_hbm_read_interrupts(hdev,
5849 gaudi_hbm_event_to_dev(event_type));
5850 if (hdev->hard_reset_on_fw_events)
5851 hl_device_reset(hdev, true, false);
5852 break;
5853
5854 case GAUDI_EVENT_HBM0_SPI_1:
5855 case GAUDI_EVENT_HBM1_SPI_1:
5856 case GAUDI_EVENT_HBM2_SPI_1:
5857 case GAUDI_EVENT_HBM3_SPI_1:
5858 gaudi_print_irq_info(hdev, event_type, false);
5859 gaudi_hbm_read_interrupts(hdev,
5860 gaudi_hbm_event_to_dev(event_type));
5861 break;
5862
5863 case GAUDI_EVENT_TPC0_DEC:
5864 case GAUDI_EVENT_TPC1_DEC:
5865 case GAUDI_EVENT_TPC2_DEC:
5866 case GAUDI_EVENT_TPC3_DEC:
5867 case GAUDI_EVENT_TPC4_DEC:
5868 case GAUDI_EVENT_TPC5_DEC:
5869 case GAUDI_EVENT_TPC6_DEC:
5870 case GAUDI_EVENT_TPC7_DEC:
5871 gaudi_print_irq_info(hdev, event_type, true);
66446820 5872 reset_required = gaudi_tpc_read_interrupts(hdev,
ac0ae6a9
OG
5873 tpc_dec_event_to_tpc_id(event_type),
5874 "AXI_SLV_DEC_Error");
66446820
OG
5875 if (reset_required) {
5876 dev_err(hdev->dev, "hard reset required due to %s\n",
5877 gaudi_irq_map_table[event_type].name);
5878
5879 if (hdev->hard_reset_on_fw_events)
5880 hl_device_reset(hdev, true, false);
5881 } else {
5882 hl_fw_unmask_irq(hdev, event_type);
d7985079 5883 }
ac0ae6a9
OG
5884 break;
5885
5886 case GAUDI_EVENT_TPC0_KRN_ERR:
5887 case GAUDI_EVENT_TPC1_KRN_ERR:
5888 case GAUDI_EVENT_TPC2_KRN_ERR:
5889 case GAUDI_EVENT_TPC3_KRN_ERR:
5890 case GAUDI_EVENT_TPC4_KRN_ERR:
5891 case GAUDI_EVENT_TPC5_KRN_ERR:
5892 case GAUDI_EVENT_TPC6_KRN_ERR:
5893 case GAUDI_EVENT_TPC7_KRN_ERR:
5894 gaudi_print_irq_info(hdev, event_type, true);
66446820 5895 reset_required = gaudi_tpc_read_interrupts(hdev,
ac0ae6a9
OG
5896 tpc_krn_event_to_tpc_id(event_type),
5897 "KRN_ERR");
66446820
OG
5898 if (reset_required) {
5899 dev_err(hdev->dev, "hard reset required due to %s\n",
5900 gaudi_irq_map_table[event_type].name);
5901
5902 if (hdev->hard_reset_on_fw_events)
5903 hl_device_reset(hdev, true, false);
5904 } else {
5905 hl_fw_unmask_irq(hdev, event_type);
d7985079 5906 }
ac0ae6a9
OG
5907 break;
5908
5909 case GAUDI_EVENT_PCIE_CORE_SERR:
5910 case GAUDI_EVENT_PCIE_IF_SERR:
5911 case GAUDI_EVENT_PCIE_PHY_SERR:
5912 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5913 case GAUDI_EVENT_MME0_ACC_SERR:
5914 case GAUDI_EVENT_MME0_SBAB_SERR:
5915 case GAUDI_EVENT_MME1_ACC_SERR:
5916 case GAUDI_EVENT_MME1_SBAB_SERR:
5917 case GAUDI_EVENT_MME2_ACC_SERR:
5918 case GAUDI_EVENT_MME2_SBAB_SERR:
5919 case GAUDI_EVENT_MME3_ACC_SERR:
5920 case GAUDI_EVENT_MME3_SBAB_SERR:
5921 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5922 case GAUDI_EVENT_CPU_IF_ECC_SERR:
5923 case GAUDI_EVENT_PSOC_MEM_SERR:
5924 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5925 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5926 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5927 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5928 fallthrough;
5929 case GAUDI_EVENT_MMU_SERR:
5930 case GAUDI_EVENT_PCIE_DEC:
5931 case GAUDI_EVENT_MME0_WBC_RSP:
5932 case GAUDI_EVENT_MME0_SBAB0_RSP:
5933 case GAUDI_EVENT_MME1_WBC_RSP:
5934 case GAUDI_EVENT_MME1_SBAB0_RSP:
5935 case GAUDI_EVENT_MME2_WBC_RSP:
5936 case GAUDI_EVENT_MME2_SBAB0_RSP:
5937 case GAUDI_EVENT_MME3_WBC_RSP:
5938 case GAUDI_EVENT_MME3_SBAB0_RSP:
5939 case GAUDI_EVENT_CPU_AXI_SPLITTER:
5940 case GAUDI_EVENT_PSOC_AXI_DEC:
5941 case GAUDI_EVENT_PSOC_PRSTN_FALL:
5942 case GAUDI_EVENT_MMU_PAGE_FAULT:
5943 case GAUDI_EVENT_MMU_WR_PERM:
5944 case GAUDI_EVENT_RAZWI_OR_ADC:
5945 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5946 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5947 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5948 fallthrough;
5949 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
5950 gaudi_print_irq_info(hdev, event_type, true);
5951 gaudi_handle_qman_err(hdev, event_type);
ebd8d122 5952 hl_fw_unmask_irq(hdev, event_type);
ac0ae6a9
OG
5953 break;
5954
5955 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
5956 gaudi_print_irq_info(hdev, event_type, true);
66446820
OG
5957 if (hdev->hard_reset_on_fw_events)
5958 hl_device_reset(hdev, true, false);
ac0ae6a9
OG
5959 break;
5960
5961 case GAUDI_EVENT_TPC0_BMON_SPMU:
5962 case GAUDI_EVENT_TPC1_BMON_SPMU:
5963 case GAUDI_EVENT_TPC2_BMON_SPMU:
5964 case GAUDI_EVENT_TPC3_BMON_SPMU:
5965 case GAUDI_EVENT_TPC4_BMON_SPMU:
5966 case GAUDI_EVENT_TPC5_BMON_SPMU:
5967 case GAUDI_EVENT_TPC6_BMON_SPMU:
5968 case GAUDI_EVENT_TPC7_BMON_SPMU:
5969 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
5970 gaudi_print_irq_info(hdev, event_type, false);
ebd8d122 5971 hl_fw_unmask_irq(hdev, event_type);
ac0ae6a9
OG
5972 break;
5973
5974 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
5975 gaudi_print_clk_change_info(hdev, event_type);
ebd8d122 5976 hl_fw_unmask_irq(hdev, event_type);
ac0ae6a9
OG
5977 break;
5978
5979 case GAUDI_EVENT_PSOC_GPIO_U16_0:
5980 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
5981 dev_err(hdev->dev,
5982 "Received high temp H/W interrupt %d (cause %d)\n",
5983 event_type, cause);
5984 break;
5985
5986 default:
5987 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
5988 event_type);
5989 break;
5990 }
5991}
5992
5993static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
5994 u32 *size)
5995{
5996 struct gaudi_device *gaudi = hdev->asic_specific;
5997
5998 if (aggregate) {
5999 *size = (u32) sizeof(gaudi->events_stat_aggregate);
6000 return gaudi->events_stat_aggregate;
6001 }
6002
6003 *size = (u32) sizeof(gaudi->events_stat);
6004 return gaudi->events_stat;
6005}
6006
8ff5f4fd 6007static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
ac0ae6a9
OG
6008 u32 flags)
6009{
6010 struct gaudi_device *gaudi = hdev->asic_specific;
6011 u32 status, timeout_usec;
6012 int rc;
6013
6014 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
6015 hdev->hard_reset_pending)
8ff5f4fd 6016 return 0;
ac0ae6a9
OG
6017
6018 if (hdev->pldm)
6019 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6020 else
6021 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6022
8ff5f4fd
OS
6023 mutex_lock(&hdev->mmu_cache_lock);
6024
ac0ae6a9 6025 /* L0 & L1 invalidation */
cfd4176d
OS
6026 WREG32(mmSTLB_INV_PS, 3);
6027 WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
42d0b0b9 6028 WREG32(mmSTLB_INV_PS, 2);
ac0ae6a9
OG
6029
6030 rc = hl_poll_timeout(
6031 hdev,
42d0b0b9 6032 mmSTLB_INV_PS,
ac0ae6a9
OG
6033 status,
6034 !status,
6035 1000,
6036 timeout_usec);
6037
42d0b0b9
OS
6038 WREG32(mmSTLB_INV_SET, 0);
6039
ac0ae6a9 6040 mutex_unlock(&hdev->mmu_cache_lock);
8ff5f4fd
OS
6041
6042 if (rc) {
6043 dev_err_ratelimited(hdev->dev,
6044 "MMU cache invalidation timeout\n");
6045 hl_device_reset(hdev, true, false);
6046 }
6047
6048 return rc;
ac0ae6a9
OG
6049}
6050
8ff5f4fd 6051static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
ac0ae6a9
OG
6052 bool is_hard, u32 asid, u64 va, u64 size)
6053{
6054 struct gaudi_device *gaudi = hdev->asic_specific;
6055 u32 status, timeout_usec;
6056 u32 inv_data;
6057 u32 pi;
6058 int rc;
6059
6060 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
6061 hdev->hard_reset_pending)
8ff5f4fd 6062 return 0;
ac0ae6a9
OG
6063
6064 mutex_lock(&hdev->mmu_cache_lock);
6065
6066 if (hdev->pldm)
6067 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6068 else
6069 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6070
6071 /*
6072 * TODO: currently invalidate entire L0 & L1 as in regular hard
6073 * invalidation. Need to apply invalidation of specific cache
6074 * lines with mask of ASID & VA & size.
6075 * Note that L1 with be flushed entirely in any case.
6076 */
6077
6078 /* L0 & L1 invalidation */
6079 inv_data = RREG32(mmSTLB_CACHE_INV);
6080 /* PI is 8 bit */
6081 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
6082 WREG32(mmSTLB_CACHE_INV,
6083 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
6084
6085 rc = hl_poll_timeout(
6086 hdev,
6087 mmSTLB_INV_CONSUMER_INDEX,
6088 status,
6089 status == pi,
6090 1000,
6091 timeout_usec);
6092
ac0ae6a9 6093 mutex_unlock(&hdev->mmu_cache_lock);
8ff5f4fd
OS
6094
6095 if (rc) {
6096 dev_err_ratelimited(hdev->dev,
6097 "MMU cache invalidation timeout\n");
6098 hl_device_reset(hdev, true, false);
6099 }
6100
6101 return rc;
ac0ae6a9
OG
6102}
6103
6104static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
6105 u32 asid, u64 phys_addr)
6106{
6107 u32 status, timeout_usec;
6108 int rc;
6109
6110 if (hdev->pldm)
6111 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6112 else
6113 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6114
6115 WREG32(MMU_ASID, asid);
6116 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
6117 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
6118 WREG32(MMU_BUSY, 0x80000000);
6119
6120 rc = hl_poll_timeout(
6121 hdev,
6122 MMU_BUSY,
6123 status,
6124 !(status & 0x80000000),
6125 1000,
6126 timeout_usec);
6127
6128 if (rc) {
6129 dev_err(hdev->dev,
6130 "Timeout during MMU hop0 config of asid %d\n", asid);
6131 return rc;
6132 }
6133
6134 return 0;
6135}
6136
6137static int gaudi_send_heartbeat(struct hl_device *hdev)
6138{
6139 struct gaudi_device *gaudi = hdev->asic_specific;
6140
6141 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6142 return 0;
6143
6144 return hl_fw_send_heartbeat(hdev);
6145}
6146
6147static int gaudi_armcp_info_get(struct hl_device *hdev)
6148{
6149 struct gaudi_device *gaudi = hdev->asic_specific;
6150 struct asic_fixed_properties *prop = &hdev->asic_prop;
6151 int rc;
6152
6153 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6154 return 0;
6155
6156 rc = hl_fw_armcp_info_get(hdev);
6157 if (rc)
6158 return rc;
6159
6160 if (!strlen(prop->armcp_info.card_name))
6161 strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
6162 CARD_NAME_MAX_LEN);
6163
6164 return 0;
6165}
6166
6167static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,
6168 struct seq_file *s)
6169{
6170 struct gaudi_device *gaudi = hdev->asic_specific;
6171 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
6172 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
6173 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
6174 bool is_idle = true, is_eng_idle, is_slave;
6175 u64 offset;
6176 int i, dma_id;
6177
6178 mutex_lock(&gaudi->clk_gate_mutex);
6179
6180 hdev->asic_funcs->disable_clock_gating(hdev);
6181
6182 if (s)
6183 seq_puts(s,
6184 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
6185 "--- ------- ------------ ---------- -------------\n");
6186
6187 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
6188 dma_id = gaudi_dma_assignment[i];
6189 offset = dma_id * DMA_QMAN_OFFSET;
6190
6191 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
6192 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
6193 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
6194 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6195 IS_DMA_IDLE(dma_core_sts0);
6196 is_idle &= is_eng_idle;
6197
6198 if (mask)
6199 *mask |= !is_eng_idle <<
6200 (GAUDI_ENGINE_ID_DMA_0 + dma_id);
6201 if (s)
6202 seq_printf(s, fmt, dma_id,
6203 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
6204 qm_cgm_sts, dma_core_sts0);
6205 }
6206
6207 if (s)
6208 seq_puts(s,
6209 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
6210 "--- ------- ------------ ---------- ----------\n");
6211
6212 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6213 offset = i * TPC_QMAN_OFFSET;
6214 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
6215 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
6216 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
6217 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6218 IS_TPC_IDLE(tpc_cfg_sts);
6219 is_idle &= is_eng_idle;
6220
6221 if (mask)
6222 *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_TPC_0 + i);
6223 if (s)
6224 seq_printf(s, fmt, i,
6225 is_eng_idle ? "Y" : "N",
6226 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6227 }
6228
6229 if (s)
6230 seq_puts(s,
6231 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
6232 "--- ------- ------------ ---------- -----------\n");
6233
6234 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
6235 offset = i * MME_QMAN_OFFSET;
6236 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
6237 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
6238
6239 /* MME 1 & 3 are slaves, no need to check their QMANs */
6240 is_slave = i % 2;
6241 if (!is_slave) {
6242 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
6243 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
6244 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
6245 }
6246
6247 is_idle &= is_eng_idle;
6248
6249 if (mask)
6250 *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_MME_0 + i);
6251 if (s) {
6252 if (!is_slave)
6253 seq_printf(s, fmt, i,
6254 is_eng_idle ? "Y" : "N",
6255 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
6256 else
6257 seq_printf(s, mme_slave_fmt, i,
6258 is_eng_idle ? "Y" : "N", "-",
6259 "-", mme_arch_sts);
6260 }
6261 }
6262
6263 if (s)
6264 seq_puts(s, "\n");
6265
6266 hdev->asic_funcs->enable_clock_gating(hdev);
6267
6268 mutex_unlock(&gaudi->clk_gate_mutex);
6269
6270 return is_idle;
6271}
6272
6273static void gaudi_hw_queues_lock(struct hl_device *hdev)
6274 __acquires(&gaudi->hw_queues_lock)
6275{
6276 struct gaudi_device *gaudi = hdev->asic_specific;
6277
6278 spin_lock(&gaudi->hw_queues_lock);
6279}
6280
6281static void gaudi_hw_queues_unlock(struct hl_device *hdev)
6282 __releases(&gaudi->hw_queues_lock)
6283{
6284 struct gaudi_device *gaudi = hdev->asic_specific;
6285
6286 spin_unlock(&gaudi->hw_queues_lock);
6287}
6288
6289static u32 gaudi_get_pci_id(struct hl_device *hdev)
6290{
6291 return hdev->pdev->device;
6292}
6293
6294static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
6295 size_t max_size)
6296{
6297 struct gaudi_device *gaudi = hdev->asic_specific;
6298
6299 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6300 return 0;
6301
6302 return hl_fw_get_eeprom_data(hdev, data, max_size);
6303}
6304
6305/*
6306 * this function should be used only during initialization and/or after reset,
6307 * when there are no active users.
6308 */
6309static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
6310 u32 tpc_id)
6311{
6312 struct gaudi_device *gaudi = hdev->asic_specific;
6313 u64 kernel_timeout;
6314 u32 status, offset;
6315 int rc;
6316
6317 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
6318
6319 if (hdev->pldm)
6320 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
6321 else
6322 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
6323
6324 mutex_lock(&gaudi->clk_gate_mutex);
6325
6326 hdev->asic_funcs->disable_clock_gating(hdev);
6327
6328 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
6329 lower_32_bits(tpc_kernel));
6330 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
6331 upper_32_bits(tpc_kernel));
6332
6333 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
6334 lower_32_bits(tpc_kernel));
6335 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
6336 upper_32_bits(tpc_kernel));
6337 /* set a valid LUT pointer, content is of no significance */
6338 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
6339 lower_32_bits(tpc_kernel));
6340 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
6341 upper_32_bits(tpc_kernel));
6342
6343 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
6344 lower_32_bits(CFG_BASE +
6345 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
6346
6347 WREG32(mmTPC0_CFG_TPC_CMD + offset,
6348 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
6349 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
6350 /* wait a bit for the engine to start executing */
6351 usleep_range(1000, 1500);
6352
6353 /* wait until engine has finished executing */
6354 rc = hl_poll_timeout(
6355 hdev,
6356 mmTPC0_CFG_STATUS + offset,
6357 status,
6358 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6359 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6360 1000,
6361 kernel_timeout);
6362
6363 if (rc) {
6364 dev_err(hdev->dev,
6365 "Timeout while waiting for TPC%d icache prefetch\n",
6366 tpc_id);
6367 hdev->asic_funcs->enable_clock_gating(hdev);
6368 mutex_unlock(&gaudi->clk_gate_mutex);
6369 return -EIO;
6370 }
6371
6372 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
6373 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
6374
6375 /* wait a bit for the engine to start executing */
6376 usleep_range(1000, 1500);
6377
6378 /* wait until engine has finished executing */
6379 rc = hl_poll_timeout(
6380 hdev,
6381 mmTPC0_CFG_STATUS + offset,
6382 status,
6383 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6384 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6385 1000,
6386 kernel_timeout);
6387
6388 rc = hl_poll_timeout(
6389 hdev,
6390 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
6391 status,
6392 (status == 0),
6393 1000,
6394 kernel_timeout);
6395
6396 hdev->asic_funcs->enable_clock_gating(hdev);
6397 mutex_unlock(&gaudi->clk_gate_mutex);
6398
6399 if (rc) {
6400 dev_err(hdev->dev,
6401 "Timeout while waiting for TPC%d kernel to execute\n",
6402 tpc_id);
6403 return -EIO;
6404 }
6405
6406 return 0;
6407}
6408
6409static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
6410{
6411 return RREG32(mmHW_STATE);
6412}
6413
6414static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
6415{
6416 return gaudi_cq_assignment[cq_idx];
6417}
6418
6419static void gaudi_ext_queue_init(struct hl_device *hdev, u32 q_idx)
6420{
6421 struct gaudi_device *gaudi = hdev->asic_specific;
6422 struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
6423 struct hl_hw_sob *hw_sob;
6424 int sob, ext_idx = gaudi->ext_queue_idx++;
6425
6426 /*
6427 * The external queues might not sit sequentially, hence use the
6428 * real external queue index for the SOB/MON base id.
6429 */
6430 hw_queue->base_sob_id = ext_idx * HL_RSVD_SOBS;
6431 hw_queue->base_mon_id = ext_idx * HL_RSVD_MONS;
6432 hw_queue->next_sob_val = 1;
6433 hw_queue->curr_sob_offset = 0;
6434
6435 for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
6436 hw_sob = &hw_queue->hw_sob[sob];
6437 hw_sob->hdev = hdev;
6438 hw_sob->sob_id = hw_queue->base_sob_id + sob;
6439 hw_sob->q_idx = q_idx;
6440 kref_init(&hw_sob->kref);
6441 }
6442}
6443
6444static void gaudi_ext_queue_reset(struct hl_device *hdev, u32 q_idx)
6445{
6446 struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
6447
6448 /*
6449 * In case we got here due to a stuck CS, the refcnt might be bigger
6450 * than 1 and therefore we reset it.
6451 */
6452 kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref);
6453 hw_queue->curr_sob_offset = 0;
6454 hw_queue->next_sob_val = 1;
6455}
6456
6457static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
6458{
6459 return sizeof(struct packet_msg_short) +
6460 sizeof(struct packet_msg_prot) * 2;
6461}
6462
6463static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
6464{
6465 return sizeof(struct packet_msg_short) * 4 +
6466 sizeof(struct packet_fence) +
6467 sizeof(struct packet_msg_prot) * 2;
6468}
6469
6470static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
6471{
6472 struct hl_cb *cb = (struct hl_cb *) data;
6473 struct packet_msg_short *pkt;
6474 u32 value, ctl;
6475
6476 pkt = (struct packet_msg_short *) (uintptr_t) cb->kernel_address;
6477 memset(pkt, 0, sizeof(*pkt));
6478
6479 value = 1 << GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_SHIFT; /* inc by 1 */
6480 value |= 1 << GAUDI_PKT_SHORT_VAL_SOB_MOD_SHIFT; /* add mode */
6481
6482 ctl = (sob_id * 4) << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT; /* SOB id */
6483 ctl |= 0 << GAUDI_PKT_SHORT_CTL_OP_SHIFT; /* write the value */
6484 ctl |= 3 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S SOB base */
6485 ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
6486 ctl |= 1 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
6487 ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
6488 ctl |= 1 << GAUDI_PKT_SHORT_CTL_MB_SHIFT;
6489
6490 pkt->value = cpu_to_le32(value);
6491 pkt->ctl = cpu_to_le32(ctl);
6492}
6493
6494static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
6495 u16 addr)
6496{
6497 u32 ctl, pkt_size = sizeof(*pkt);
6498
6499 memset(pkt, 0, pkt_size);
6500
6501 ctl = addr << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT;
6502 ctl |= 2 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S MON base */
6503 ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
6504 ctl |= 0 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
6505 ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
6506 ctl |= 0 << GAUDI_PKT_SHORT_CTL_MB_SHIFT; /* only last pkt needs MB */
6507
6508 pkt->value = cpu_to_le32(value);
6509 pkt->ctl = cpu_to_le32(ctl);
6510
6511 return pkt_size;
6512}
6513
6514static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
6515 u16 sob_val, u16 addr)
6516{
6517 u32 ctl, value, pkt_size = sizeof(*pkt);
6518 u8 mask = ~(1 << (sob_id & 0x7));
6519
6520 memset(pkt, 0, pkt_size);
6521
6522 value = (sob_id / 8) << GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_SHIFT;
6523 value |= sob_val << GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_SHIFT;
6524 value |= 0 << GAUDI_PKT_SHORT_VAL_MON_MODE_SHIFT; /* GREATER_OR_EQUAL */
6525 value |= mask << GAUDI_PKT_SHORT_VAL_MON_MASK_SHIFT;
6526
6527 ctl = addr << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT;
6528 ctl |= 0 << GAUDI_PKT_SHORT_CTL_OP_SHIFT; /* write the value */
6529 ctl |= 2 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S MON base */
6530 ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
6531 ctl |= 0 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
6532 ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
6533 ctl |= 1 << GAUDI_PKT_SHORT_CTL_MB_SHIFT;
6534
6535 pkt->value = cpu_to_le32(value);
6536 pkt->ctl = cpu_to_le32(ctl);
6537
6538 return pkt_size;
6539}
6540
6541static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
6542{
6543 u32 ctl, cfg, pkt_size = sizeof(*pkt);
6544
6545 memset(pkt, 0, pkt_size);
6546
6547 cfg = 1 << GAUDI_PKT_FENCE_CFG_DEC_VAL_SHIFT;
6548 cfg |= 1 << GAUDI_PKT_FENCE_CFG_TARGET_VAL_SHIFT;
6549 cfg |= 2 << GAUDI_PKT_FENCE_CFG_ID_SHIFT;
6550
6551 ctl = 0 << GAUDI_PKT_FENCE_CTL_PRED_SHIFT;
6552 ctl |= PACKET_FENCE << GAUDI_PKT_FENCE_CTL_OPCODE_SHIFT;
6553 ctl |= 0 << GAUDI_PKT_FENCE_CTL_EB_SHIFT;
6554 ctl |= 1 << GAUDI_PKT_FENCE_CTL_RB_SHIFT;
6555 ctl |= 1 << GAUDI_PKT_FENCE_CTL_MB_SHIFT;
6556
6557 pkt->cfg = cpu_to_le32(cfg);
6558 pkt->ctl = cpu_to_le32(ctl);
6559
6560 return pkt_size;
6561}
6562
6563static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
6564 u16 sob_val, u16 mon_id, u32 q_idx)
6565{
6566 struct hl_cb *cb = (struct hl_cb *) data;
6567 void *buf = (void *) (uintptr_t) cb->kernel_address;
6568 u64 monitor_base, fence_addr = 0;
6569 u32 size = 0;
6570 u16 msg_addr_offset;
6571
6572 switch (q_idx) {
6573 case GAUDI_QUEUE_ID_DMA_0_0:
6574 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
6575 break;
6576 case GAUDI_QUEUE_ID_DMA_0_1:
6577 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1;
6578 break;
6579 case GAUDI_QUEUE_ID_DMA_0_2:
6580 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2;
6581 break;
6582 case GAUDI_QUEUE_ID_DMA_0_3:
6583 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3;
6584 break;
6585 case GAUDI_QUEUE_ID_DMA_1_0:
6586 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0;
6587 break;
6588 case GAUDI_QUEUE_ID_DMA_1_1:
6589 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1;
6590 break;
6591 case GAUDI_QUEUE_ID_DMA_1_2:
6592 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2;
6593 break;
6594 case GAUDI_QUEUE_ID_DMA_1_3:
6595 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3;
6596 break;
6597 case GAUDI_QUEUE_ID_DMA_5_0:
6598 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0;
6599 break;
6600 case GAUDI_QUEUE_ID_DMA_5_1:
6601 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1;
6602 break;
6603 case GAUDI_QUEUE_ID_DMA_5_2:
6604 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2;
6605 break;
6606 case GAUDI_QUEUE_ID_DMA_5_3:
6607 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3;
6608 break;
6609 default:
6610 /* queue index should be valid here */
6611 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
6612 q_idx);
6613 return;
6614 }
6615
6616 fence_addr += CFG_BASE;
6617
6618 /*
6619 * monitor_base should be the content of the base0 address registers,
6620 * so it will be added to the msg short offsets
6621 */
6622 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
6623
6624 /* First monitor config packet: low address of the sync */
6625 msg_addr_offset =
6626 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
6627 monitor_base;
6628
6629 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
6630 msg_addr_offset);
6631
6632 /* Second monitor config packet: high address of the sync */
6633 msg_addr_offset =
6634 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
6635 monitor_base;
6636
6637 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
6638 msg_addr_offset);
6639
6640 /*
6641 * Third monitor config packet: the payload, i.e. what to write when the
6642 * sync triggers
6643 */
6644 msg_addr_offset =
6645 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
6646 monitor_base;
6647
6648 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
6649
6650 /* Fourth monitor config packet: bind the monitor to a sync object */
6651 msg_addr_offset =
6652 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
6653 monitor_base;
6654 size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val,
6655 msg_addr_offset);
6656
6657 /* Fence packet */
6658 size += gaudi_add_fence_pkt(buf + size);
6659}
6660
6661static void gaudi_reset_sob(struct hl_device *hdev, void *data)
6662{
6663 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
6664
6665 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
6666 hw_sob->sob_id);
6667
6668 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
6669 0);
6670
6671 kref_init(&hw_sob->kref);
6672}
6673
6674static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
6675{
6676 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
6677 HL_POWER9_HOST_MAGIC) {
6678 hdev->power9_64bit_dma_enable = 1;
6679 hdev->dma_mask = 64;
6680 } else {
6681 hdev->power9_64bit_dma_enable = 0;
6682 hdev->dma_mask = 48;
6683 }
6684}
6685
6686static u64 gaudi_get_device_time(struct hl_device *hdev)
6687{
6688 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
6689
6690 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
6691}
6692
6693static const struct hl_asic_funcs gaudi_funcs = {
6694 .early_init = gaudi_early_init,
6695 .early_fini = gaudi_early_fini,
6696 .late_init = gaudi_late_init,
6697 .late_fini = gaudi_late_fini,
6698 .sw_init = gaudi_sw_init,
6699 .sw_fini = gaudi_sw_fini,
6700 .hw_init = gaudi_hw_init,
6701 .hw_fini = gaudi_hw_fini,
6702 .halt_engines = gaudi_halt_engines,
6703 .suspend = gaudi_suspend,
6704 .resume = gaudi_resume,
6705 .cb_mmap = gaudi_cb_mmap,
6706 .ring_doorbell = gaudi_ring_doorbell,
6707 .pqe_write = gaudi_pqe_write,
6708 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
6709 .asic_dma_free_coherent = gaudi_dma_free_coherent,
6710 .get_int_queue_base = gaudi_get_int_queue_base,
6711 .test_queues = gaudi_test_queues,
6712 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
6713 .asic_dma_pool_free = gaudi_dma_pool_free,
6714 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
6715 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
6716 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
6717 .cs_parser = gaudi_cs_parser,
6718 .asic_dma_map_sg = gaudi_dma_map_sg,
6719 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
6720 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
6721 .update_eq_ci = gaudi_update_eq_ci,
6722 .context_switch = gaudi_context_switch,
6723 .restore_phase_topology = gaudi_restore_phase_topology,
6724 .debugfs_read32 = gaudi_debugfs_read32,
6725 .debugfs_write32 = gaudi_debugfs_write32,
6726 .debugfs_read64 = gaudi_debugfs_read64,
6727 .debugfs_write64 = gaudi_debugfs_write64,
bcaf4152 6728 .add_device_attr = gaudi_add_device_attr,
ac0ae6a9 6729 .handle_eqe = gaudi_handle_eqe,
bcaf4152 6730 .set_pll_profile = gaudi_set_pll_profile,
ac0ae6a9
OG
6731 .get_events_stat = gaudi_get_events_stat,
6732 .read_pte = gaudi_read_pte,
6733 .write_pte = gaudi_write_pte,
6734 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
6735 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
6736 .send_heartbeat = gaudi_send_heartbeat,
6737 .enable_clock_gating = gaudi_enable_clock_gating,
6738 .disable_clock_gating = gaudi_disable_clock_gating,
79fc7a9f 6739 .debug_coresight = gaudi_debug_coresight,
ac0ae6a9
OG
6740 .is_device_idle = gaudi_is_device_idle,
6741 .soft_reset_late_init = gaudi_soft_reset_late_init,
6742 .hw_queues_lock = gaudi_hw_queues_lock,
6743 .hw_queues_unlock = gaudi_hw_queues_unlock,
6744 .get_pci_id = gaudi_get_pci_id,
6745 .get_eeprom_data = gaudi_get_eeprom_data,
6746 .send_cpu_message = gaudi_send_cpu_message,
6747 .get_hw_state = gaudi_get_hw_state,
6748 .pci_bars_map = gaudi_pci_bars_map,
6749 .set_dram_bar_base = gaudi_set_hbm_bar_base,
6750 .init_iatu = gaudi_init_iatu,
6751 .rreg = hl_rreg,
6752 .wreg = hl_wreg,
79fc7a9f 6753 .halt_coresight = gaudi_halt_coresight,
bcaf4152 6754 .get_clk_rate = gaudi_get_clk_rate,
ac0ae6a9
OG
6755 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
6756 .read_device_fw_version = gaudi_read_device_fw_version,
6757 .load_firmware_to_device = gaudi_load_firmware_to_device,
6758 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
6759 .ext_queue_init = gaudi_ext_queue_init,
6760 .ext_queue_reset = gaudi_ext_queue_reset,
6761 .get_signal_cb_size = gaudi_get_signal_cb_size,
6762 .get_wait_cb_size = gaudi_get_wait_cb_size,
6763 .gen_signal_cb = gaudi_gen_signal_cb,
6764 .gen_wait_cb = gaudi_gen_wait_cb,
6765 .reset_sob = gaudi_reset_sob,
6766 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
6767 .get_device_time = gaudi_get_device_time
6768};
6769
6770/**
6771 * gaudi_set_asic_funcs - set GAUDI function pointers
6772 *
6773 * @*hdev: pointer to hl_device structure
6774 *
6775 */
6776void gaudi_set_asic_funcs(struct hl_device *hdev)
6777{
6778 hdev->asic_funcs = &gaudi_funcs;
6779}