]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/misc/habanalabs/gaudi/gaudi.c
habanalabs: add gaudi security module
[mirror_ubuntu-jammy-kernel.git] / drivers / misc / habanalabs / gaudi / gaudi.c
CommitLineData
ac0ae6a9
OG
1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2020 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "gaudiP.h"
9#include "include/hw_ip/mmu/mmu_general.h"
10#include "include/hw_ip/mmu/mmu_v1_1.h"
11#include "include/gaudi/gaudi_masks.h"
12#include "include/gaudi/gaudi_fw_if.h"
13#include "include/gaudi/gaudi_reg_map.h"
14#include "include/gaudi/gaudi_async_ids_map.h"
15
16#include <linux/module.h>
17#include <linux/pci.h>
18#include <linux/firmware.h>
19#include <linux/hwmon.h>
20#include <linux/genalloc.h>
21#include <linux/io-64-nonatomic-lo-hi.h>
22#include <linux/iommu.h>
23#include <linux/seq_file.h>
24
25/*
26 * Gaudi security scheme:
27 *
28 * 1. Host is protected by:
29 * - Range registers
30 * - MMU
31 *
32 * 2. DDR is protected by:
33 * - Range registers (protect the first 512MB)
34 *
35 * 3. Configuration is protected by:
36 * - Range registers
37 * - Protection bits
38 *
39 * MMU is always enabled.
40 *
41 * QMAN DMA channels 0,1,5 (PCI DMAN):
42 * - DMA is not secured.
43 * - PQ and CQ are secured.
44 * - CP is secured: The driver needs to parse CB but WREG should be allowed
45 * because of TDMA (tensor DMA). Hence, WREG is always not
46 * secured.
47 *
48 * When the driver needs to use DMA it will check that Gaudi is idle, set DMA
49 * channel 0 to be secured, execute the DMA and change it back to not secured.
50 * Currently, the driver doesn't use the DMA while there are compute jobs
51 * running.
52 *
53 * The current use cases for the driver to use the DMA are:
54 * - Clear SRAM on context switch (happens on context switch when device is
55 * idle)
56 * - MMU page tables area clear (happens on init)
57 *
58 * QMAN DMA 2-4,6,7, TPC, MME, NIC:
59 * PQ is secured and is located on the Host (HBM CON TPC3 bug)
60 * CQ, CP and the engine are not secured
61 *
62 */
63
64#define GAUDI_BOOT_FIT_FILE "habanalabs/gaudi/gaudi-boot-fit.itb"
65#define GAUDI_LINUX_FW_FILE "habanalabs/gaudi/gaudi-fit.itb"
66#define GAUDI_TPC_FW_FILE "habanalabs/gaudi/gaudi_tpc.bin"
67
68#define GAUDI_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
69
70#define GAUDI_RESET_TIMEOUT_MSEC 1000 /* 1000ms */
71#define GAUDI_RESET_WAIT_MSEC 1 /* 1ms */
72#define GAUDI_CPU_RESET_WAIT_MSEC 200 /* 200ms */
73#define GAUDI_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
74
75#define GAUDI_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
76#define GAUDI_PLDM_HRESET_TIMEOUT_MSEC 20000 /* 20s */
77#define GAUDI_PLDM_SRESET_TIMEOUT_MSEC 14000 /* 14s */
78#define GAUDI_PLDM_TEST_QUEUE_WAIT_USEC 1000000 /* 1s */
79#define GAUDI_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
80#define GAUDI_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
81#define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
82#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC 1000000 /* 1s */
83
84#define GAUDI_QMAN0_FENCE_VAL 0x72E91AB9
85
86#define GAUDI_MAX_STRING_LEN 20
87
88#define GAUDI_CB_POOL_CB_CNT 512
89#define GAUDI_CB_POOL_CB_SIZE 0x20000 /* 128KB */
90
91#define GAUDI_ALLOC_CPU_MEM_RETRY_CNT 3
92
93#define GAUDI_NUM_OF_TPC_INTR_CAUSE 20
94
95#define GAUDI_NUM_OF_QM_ERR_CAUSE 16
96
97#define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3
98
99#define GAUDI_ARB_WDT_TIMEOUT 0x400000
100
101static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
102 "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
103 "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
104 "gaudi cq 5_0", "gaudi cq 5_1", "gaudi cq 5_2", "gaudi cq 5_3",
105 "gaudi cpu eq"
106};
107
108static const u8 gaudi_dma_assignment[GAUDI_DMA_MAX] = {
109 [GAUDI_PCI_DMA_1] = 0,
110 [GAUDI_PCI_DMA_2] = 1,
111 [GAUDI_PCI_DMA_3] = 5,
112 [GAUDI_HBM_DMA_1] = 2,
113 [GAUDI_HBM_DMA_2] = 3,
114 [GAUDI_HBM_DMA_3] = 4,
115 [GAUDI_HBM_DMA_4] = 6,
116 [GAUDI_HBM_DMA_5] = 7
117};
118
119static const u8 gaudi_cq_assignment[NUMBER_OF_CMPLT_QUEUES] = {
120 [0] = GAUDI_QUEUE_ID_DMA_0_0,
121 [1] = GAUDI_QUEUE_ID_DMA_0_1,
122 [2] = GAUDI_QUEUE_ID_DMA_0_2,
123 [3] = GAUDI_QUEUE_ID_DMA_0_3,
124 [4] = GAUDI_QUEUE_ID_DMA_1_0,
125 [5] = GAUDI_QUEUE_ID_DMA_1_1,
126 [6] = GAUDI_QUEUE_ID_DMA_1_2,
127 [7] = GAUDI_QUEUE_ID_DMA_1_3,
128 [8] = GAUDI_QUEUE_ID_DMA_5_0,
129 [9] = GAUDI_QUEUE_ID_DMA_5_1,
130 [10] = GAUDI_QUEUE_ID_DMA_5_2,
131 [11] = GAUDI_QUEUE_ID_DMA_5_3
132};
133
134static const u16 gaudi_packet_sizes[MAX_PACKET_ID] = {
135 [PACKET_WREG_32] = sizeof(struct packet_wreg32),
136 [PACKET_WREG_BULK] = sizeof(struct packet_wreg_bulk),
137 [PACKET_MSG_LONG] = sizeof(struct packet_msg_long),
138 [PACKET_MSG_SHORT] = sizeof(struct packet_msg_short),
139 [PACKET_CP_DMA] = sizeof(struct packet_cp_dma),
140 [PACKET_REPEAT] = sizeof(struct packet_repeat),
141 [PACKET_MSG_PROT] = sizeof(struct packet_msg_prot),
142 [PACKET_FENCE] = sizeof(struct packet_fence),
143 [PACKET_LIN_DMA] = sizeof(struct packet_lin_dma),
144 [PACKET_NOP] = sizeof(struct packet_nop),
145 [PACKET_STOP] = sizeof(struct packet_stop),
146 [PACKET_ARB_POINT] = sizeof(struct packet_arb_point),
147 [PACKET_WAIT] = sizeof(struct packet_wait),
148 [PACKET_LOAD_AND_EXE] = sizeof(struct packet_load_and_exe)
149};
150
151static const u32 gaudi_all_events[] = {
152 GAUDI_EVENT_PCIE_CORE_SERR,
153 GAUDI_EVENT_PCIE_CORE_DERR,
154 GAUDI_EVENT_PCIE_IF_SERR,
155 GAUDI_EVENT_PCIE_IF_DERR,
156 GAUDI_EVENT_PCIE_PHY_SERR,
157 GAUDI_EVENT_PCIE_PHY_DERR,
158 GAUDI_EVENT_TPC0_SERR,
159 GAUDI_EVENT_TPC1_SERR,
160 GAUDI_EVENT_TPC2_SERR,
161 GAUDI_EVENT_TPC3_SERR,
162 GAUDI_EVENT_TPC4_SERR,
163 GAUDI_EVENT_TPC5_SERR,
164 GAUDI_EVENT_TPC6_SERR,
165 GAUDI_EVENT_TPC7_SERR,
166 GAUDI_EVENT_TPC0_DERR,
167 GAUDI_EVENT_TPC1_DERR,
168 GAUDI_EVENT_TPC2_DERR,
169 GAUDI_EVENT_TPC3_DERR,
170 GAUDI_EVENT_TPC4_DERR,
171 GAUDI_EVENT_TPC5_DERR,
172 GAUDI_EVENT_TPC6_DERR,
173 GAUDI_EVENT_TPC7_DERR,
174 GAUDI_EVENT_MME0_ACC_SERR,
175 GAUDI_EVENT_MME0_ACC_DERR,
176 GAUDI_EVENT_MME0_SBAB_SERR,
177 GAUDI_EVENT_MME0_SBAB_DERR,
178 GAUDI_EVENT_MME1_ACC_SERR,
179 GAUDI_EVENT_MME1_ACC_DERR,
180 GAUDI_EVENT_MME1_SBAB_SERR,
181 GAUDI_EVENT_MME1_SBAB_DERR,
182 GAUDI_EVENT_MME2_ACC_SERR,
183 GAUDI_EVENT_MME2_ACC_DERR,
184 GAUDI_EVENT_MME2_SBAB_SERR,
185 GAUDI_EVENT_MME2_SBAB_DERR,
186 GAUDI_EVENT_MME3_ACC_SERR,
187 GAUDI_EVENT_MME3_ACC_DERR,
188 GAUDI_EVENT_MME3_SBAB_SERR,
189 GAUDI_EVENT_MME3_SBAB_DERR,
190 GAUDI_EVENT_DMA0_SERR_ECC,
191 GAUDI_EVENT_DMA1_SERR_ECC,
192 GAUDI_EVENT_DMA2_SERR_ECC,
193 GAUDI_EVENT_DMA3_SERR_ECC,
194 GAUDI_EVENT_DMA4_SERR_ECC,
195 GAUDI_EVENT_DMA5_SERR_ECC,
196 GAUDI_EVENT_DMA6_SERR_ECC,
197 GAUDI_EVENT_DMA7_SERR_ECC,
198 GAUDI_EVENT_DMA0_DERR_ECC,
199 GAUDI_EVENT_DMA1_DERR_ECC,
200 GAUDI_EVENT_DMA2_DERR_ECC,
201 GAUDI_EVENT_DMA3_DERR_ECC,
202 GAUDI_EVENT_DMA4_DERR_ECC,
203 GAUDI_EVENT_DMA5_DERR_ECC,
204 GAUDI_EVENT_DMA6_DERR_ECC,
205 GAUDI_EVENT_DMA7_DERR_ECC,
206 GAUDI_EVENT_CPU_IF_ECC_SERR,
207 GAUDI_EVENT_CPU_IF_ECC_DERR,
208 GAUDI_EVENT_PSOC_MEM_SERR,
209 GAUDI_EVENT_PSOC_CORESIGHT_SERR,
210 GAUDI_EVENT_PSOC_MEM_DERR,
211 GAUDI_EVENT_PSOC_CORESIGHT_DERR,
212 GAUDI_EVENT_SRAM0_SERR,
213 GAUDI_EVENT_SRAM1_SERR,
214 GAUDI_EVENT_SRAM2_SERR,
215 GAUDI_EVENT_SRAM3_SERR,
216 GAUDI_EVENT_SRAM7_SERR,
217 GAUDI_EVENT_SRAM6_SERR,
218 GAUDI_EVENT_SRAM5_SERR,
219 GAUDI_EVENT_SRAM4_SERR,
220 GAUDI_EVENT_SRAM8_SERR,
221 GAUDI_EVENT_SRAM9_SERR,
222 GAUDI_EVENT_SRAM10_SERR,
223 GAUDI_EVENT_SRAM11_SERR,
224 GAUDI_EVENT_SRAM15_SERR,
225 GAUDI_EVENT_SRAM14_SERR,
226 GAUDI_EVENT_SRAM13_SERR,
227 GAUDI_EVENT_SRAM12_SERR,
228 GAUDI_EVENT_SRAM16_SERR,
229 GAUDI_EVENT_SRAM17_SERR,
230 GAUDI_EVENT_SRAM18_SERR,
231 GAUDI_EVENT_SRAM19_SERR,
232 GAUDI_EVENT_SRAM23_SERR,
233 GAUDI_EVENT_SRAM22_SERR,
234 GAUDI_EVENT_SRAM21_SERR,
235 GAUDI_EVENT_SRAM20_SERR,
236 GAUDI_EVENT_SRAM24_SERR,
237 GAUDI_EVENT_SRAM25_SERR,
238 GAUDI_EVENT_SRAM26_SERR,
239 GAUDI_EVENT_SRAM27_SERR,
240 GAUDI_EVENT_SRAM31_SERR,
241 GAUDI_EVENT_SRAM30_SERR,
242 GAUDI_EVENT_SRAM29_SERR,
243 GAUDI_EVENT_SRAM28_SERR,
244 GAUDI_EVENT_SRAM0_DERR,
245 GAUDI_EVENT_SRAM1_DERR,
246 GAUDI_EVENT_SRAM2_DERR,
247 GAUDI_EVENT_SRAM3_DERR,
248 GAUDI_EVENT_SRAM7_DERR,
249 GAUDI_EVENT_SRAM6_DERR,
250 GAUDI_EVENT_SRAM5_DERR,
251 GAUDI_EVENT_SRAM4_DERR,
252 GAUDI_EVENT_SRAM8_DERR,
253 GAUDI_EVENT_SRAM9_DERR,
254 GAUDI_EVENT_SRAM10_DERR,
255 GAUDI_EVENT_SRAM11_DERR,
256 GAUDI_EVENT_SRAM15_DERR,
257 GAUDI_EVENT_SRAM14_DERR,
258 GAUDI_EVENT_SRAM13_DERR,
259 GAUDI_EVENT_SRAM12_DERR,
260 GAUDI_EVENT_SRAM16_DERR,
261 GAUDI_EVENT_SRAM17_DERR,
262 GAUDI_EVENT_SRAM18_DERR,
263 GAUDI_EVENT_SRAM19_DERR,
264 GAUDI_EVENT_SRAM23_DERR,
265 GAUDI_EVENT_SRAM22_DERR,
266 GAUDI_EVENT_SRAM21_DERR,
267 GAUDI_EVENT_SRAM20_DERR,
268 GAUDI_EVENT_SRAM24_DERR,
269 GAUDI_EVENT_SRAM25_DERR,
270 GAUDI_EVENT_SRAM26_DERR,
271 GAUDI_EVENT_SRAM27_DERR,
272 GAUDI_EVENT_SRAM31_DERR,
273 GAUDI_EVENT_SRAM30_DERR,
274 GAUDI_EVENT_SRAM29_DERR,
275 GAUDI_EVENT_SRAM28_DERR,
276 GAUDI_EVENT_NIC0_SERR,
277 GAUDI_EVENT_NIC1_SERR,
278 GAUDI_EVENT_NIC2_SERR,
279 GAUDI_EVENT_NIC3_SERR,
280 GAUDI_EVENT_NIC4_SERR,
281 GAUDI_EVENT_NIC0_DERR,
282 GAUDI_EVENT_NIC1_DERR,
283 GAUDI_EVENT_NIC2_DERR,
284 GAUDI_EVENT_NIC3_DERR,
285 GAUDI_EVENT_NIC4_DERR,
286 GAUDI_EVENT_DMA_IF0_SERR,
287 GAUDI_EVENT_DMA_IF1_SERR,
288 GAUDI_EVENT_DMA_IF2_SERR,
289 GAUDI_EVENT_DMA_IF3_SERR,
290 GAUDI_EVENT_DMA_IF0_DERR,
291 GAUDI_EVENT_DMA_IF1_DERR,
292 GAUDI_EVENT_DMA_IF2_DERR,
293 GAUDI_EVENT_DMA_IF3_DERR,
294 GAUDI_EVENT_GIC500,
295 GAUDI_EVENT_HBM_0_SERR,
296 GAUDI_EVENT_HBM_1_SERR,
297 GAUDI_EVENT_HBM_2_SERR,
298 GAUDI_EVENT_HBM_3_SERR,
299 GAUDI_EVENT_HBM_0_DERR,
300 GAUDI_EVENT_HBM_1_DERR,
301 GAUDI_EVENT_HBM_2_DERR,
302 GAUDI_EVENT_HBM_3_DERR,
303 GAUDI_EVENT_MMU_SERR,
304 GAUDI_EVENT_MMU_DERR,
305 GAUDI_EVENT_PCIE_DEC,
306 GAUDI_EVENT_TPC0_DEC,
307 GAUDI_EVENT_TPC1_DEC,
308 GAUDI_EVENT_TPC2_DEC,
309 GAUDI_EVENT_TPC3_DEC,
310 GAUDI_EVENT_TPC4_DEC,
311 GAUDI_EVENT_TPC5_DEC,
312 GAUDI_EVENT_TPC6_DEC,
313 GAUDI_EVENT_TPC7_DEC,
314 GAUDI_EVENT_AXI_ECC,
315 GAUDI_EVENT_L2_RAM_ECC,
316 GAUDI_EVENT_MME0_WBC_RSP,
317 GAUDI_EVENT_MME0_SBAB0_RSP,
318 GAUDI_EVENT_MME1_WBC_RSP,
319 GAUDI_EVENT_MME1_SBAB0_RSP,
320 GAUDI_EVENT_MME2_WBC_RSP,
321 GAUDI_EVENT_MME2_SBAB0_RSP,
322 GAUDI_EVENT_MME3_WBC_RSP,
323 GAUDI_EVENT_MME3_SBAB0_RSP,
324 GAUDI_EVENT_PLL0,
325 GAUDI_EVENT_PLL1,
326 GAUDI_EVENT_PLL2,
327 GAUDI_EVENT_PLL3,
328 GAUDI_EVENT_PLL4,
329 GAUDI_EVENT_PLL5,
330 GAUDI_EVENT_PLL6,
331 GAUDI_EVENT_PLL7,
332 GAUDI_EVENT_PLL8,
333 GAUDI_EVENT_PLL9,
334 GAUDI_EVENT_PLL10,
335 GAUDI_EVENT_PLL11,
336 GAUDI_EVENT_PLL12,
337 GAUDI_EVENT_PLL13,
338 GAUDI_EVENT_PLL14,
339 GAUDI_EVENT_PLL15,
340 GAUDI_EVENT_PLL16,
341 GAUDI_EVENT_PLL17,
342 GAUDI_EVENT_CPU_AXI_SPLITTER,
343 GAUDI_EVENT_PSOC_AXI_DEC,
344 GAUDI_EVENT_PSOC_PRSTN_FALL,
345 GAUDI_EVENT_TPC0_BMON_SPMU,
346 GAUDI_EVENT_TPC0_KRN_ERR,
347 GAUDI_EVENT_TPC1_BMON_SPMU,
348 GAUDI_EVENT_TPC1_KRN_ERR,
349 GAUDI_EVENT_TPC2_BMON_SPMU,
350 GAUDI_EVENT_TPC2_KRN_ERR,
351 GAUDI_EVENT_TPC3_BMON_SPMU,
352 GAUDI_EVENT_TPC3_KRN_ERR,
353 GAUDI_EVENT_TPC4_BMON_SPMU,
354 GAUDI_EVENT_TPC4_KRN_ERR,
355 GAUDI_EVENT_TPC5_BMON_SPMU,
356 GAUDI_EVENT_TPC5_KRN_ERR,
357 GAUDI_EVENT_TPC6_BMON_SPMU,
358 GAUDI_EVENT_TPC6_KRN_ERR,
359 GAUDI_EVENT_TPC7_BMON_SPMU,
360 GAUDI_EVENT_TPC7_KRN_ERR,
361 GAUDI_EVENT_MMU_PAGE_FAULT,
362 GAUDI_EVENT_MMU_WR_PERM,
363 GAUDI_EVENT_DMA_BM_CH0,
364 GAUDI_EVENT_DMA_BM_CH1,
365 GAUDI_EVENT_DMA_BM_CH2,
366 GAUDI_EVENT_DMA_BM_CH3,
367 GAUDI_EVENT_DMA_BM_CH4,
368 GAUDI_EVENT_DMA_BM_CH5,
369 GAUDI_EVENT_DMA_BM_CH6,
370 GAUDI_EVENT_DMA_BM_CH7,
371 GAUDI_EVENT_HBM0_SPI_0,
372 GAUDI_EVENT_HBM0_SPI_1,
373 GAUDI_EVENT_HBM1_SPI_0,
374 GAUDI_EVENT_HBM1_SPI_1,
375 GAUDI_EVENT_HBM2_SPI_0,
376 GAUDI_EVENT_HBM2_SPI_1,
377 GAUDI_EVENT_HBM3_SPI_0,
378 GAUDI_EVENT_HBM3_SPI_1,
379 GAUDI_EVENT_RAZWI_OR_ADC,
380 GAUDI_EVENT_TPC0_QM,
381 GAUDI_EVENT_TPC1_QM,
382 GAUDI_EVENT_TPC2_QM,
383 GAUDI_EVENT_TPC3_QM,
384 GAUDI_EVENT_TPC4_QM,
385 GAUDI_EVENT_TPC5_QM,
386 GAUDI_EVENT_TPC6_QM,
387 GAUDI_EVENT_TPC7_QM,
388 GAUDI_EVENT_MME0_QM,
389 GAUDI_EVENT_MME2_QM,
390 GAUDI_EVENT_DMA0_QM,
391 GAUDI_EVENT_DMA1_QM,
392 GAUDI_EVENT_DMA2_QM,
393 GAUDI_EVENT_DMA3_QM,
394 GAUDI_EVENT_DMA4_QM,
395 GAUDI_EVENT_DMA5_QM,
396 GAUDI_EVENT_DMA6_QM,
397 GAUDI_EVENT_DMA7_QM,
398 GAUDI_EVENT_NIC0_QM0,
399 GAUDI_EVENT_NIC0_QM1,
400 GAUDI_EVENT_NIC1_QM0,
401 GAUDI_EVENT_NIC1_QM1,
402 GAUDI_EVENT_NIC2_QM0,
403 GAUDI_EVENT_NIC2_QM1,
404 GAUDI_EVENT_NIC3_QM0,
405 GAUDI_EVENT_NIC3_QM1,
406 GAUDI_EVENT_NIC4_QM0,
407 GAUDI_EVENT_NIC4_QM1,
408 GAUDI_EVENT_DMA0_CORE,
409 GAUDI_EVENT_DMA1_CORE,
410 GAUDI_EVENT_DMA2_CORE,
411 GAUDI_EVENT_DMA3_CORE,
412 GAUDI_EVENT_DMA4_CORE,
413 GAUDI_EVENT_DMA5_CORE,
414 GAUDI_EVENT_DMA6_CORE,
415 GAUDI_EVENT_DMA7_CORE,
416 GAUDI_EVENT_FIX_POWER_ENV_S,
417 GAUDI_EVENT_FIX_POWER_ENV_E,
418 GAUDI_EVENT_FIX_THERMAL_ENV_S,
419 GAUDI_EVENT_FIX_THERMAL_ENV_E,
420 GAUDI_EVENT_RAZWI_OR_ADC_SW
421};
422
423static const char * const
424gaudi_tpc_interrupts_cause[GAUDI_NUM_OF_TPC_INTR_CAUSE] = {
425 "tpc_address_exceed_slm",
426 "tpc_div_by_0",
427 "tpc_spu_mac_overflow",
428 "tpc_spu_addsub_overflow",
429 "tpc_spu_abs_overflow",
430 "tpc_spu_fp_dst_nan_inf",
431 "tpc_spu_fp_dst_denorm",
432 "tpc_vpu_mac_overflow",
433 "tpc_vpu_addsub_overflow",
434 "tpc_vpu_abs_overflow",
435 "tpc_vpu_fp_dst_nan_inf",
436 "tpc_vpu_fp_dst_denorm",
437 "tpc_assertions",
438 "tpc_illegal_instruction",
439 "tpc_pc_wrap_around",
440 "tpc_qm_sw_err",
441 "tpc_hbw_rresp_err",
442 "tpc_hbw_bresp_err",
443 "tpc_lbw_rresp_err",
444 "tpc_lbw_bresp_err"
445};
446
447static const char * const
448gaudi_qman_error_cause[GAUDI_NUM_OF_QM_ERR_CAUSE] = {
449 "PQ AXI HBW error",
450 "CQ AXI HBW error",
451 "CP AXI HBW error",
452 "CP error due to undefined OPCODE",
453 "CP encountered STOP OPCODE",
454 "CP AXI LBW error",
455 "CP WRREG32 or WRBULK returned error",
456 "N/A",
457 "FENCE 0 inc over max value and clipped",
458 "FENCE 1 inc over max value and clipped",
459 "FENCE 2 inc over max value and clipped",
460 "FENCE 3 inc over max value and clipped",
461 "FENCE 0 dec under min value and clipped",
462 "FENCE 1 dec under min value and clipped",
463 "FENCE 2 dec under min value and clipped",
464 "FENCE 3 dec under min value and clipped"
465};
466
467static const char * const
468gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
469 "Choice push while full error",
470 "Choice Q watchdog error",
471 "MSG AXI LBW returned with error"
472};
473
474static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
475 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
476 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
477 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_2 */
478 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_3 */
479 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_0 */
480 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_1 */
481 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_2 */
482 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_1_3 */
483 QUEUE_TYPE_CPU, /* GAUDI_QUEUE_ID_CPU_PQ */
484 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_0 */
485 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_1 */
486 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_2 */
487 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_2_3 */
488 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_0 */
489 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_1 */
490 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_2 */
491 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_3_3 */
492 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_0 */
493 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_1 */
494 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_2 */
495 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_4_3 */
496 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_0 */
497 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_1 */
498 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_2 */
499 QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_5_3 */
500 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_0 */
501 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_1 */
502 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_2 */
503 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_6_3 */
504 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_0 */
505 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_1 */
506 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_2 */
507 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_DMA_7_3 */
508 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_0 */
509 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_1 */
510 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_2 */
511 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_0_3 */
512 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_0 */
513 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_1 */
514 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_2 */
515 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_MME_1_3 */
516 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_0 */
517 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_1 */
518 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_2 */
519 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_0_3 */
520 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_0 */
521 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_1 */
522 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_2 */
523 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_1_3 */
524 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_0 */
525 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_1 */
526 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_2 */
527 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_2_3 */
528 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_0 */
529 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_1 */
530 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_2 */
531 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_3_3 */
532 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_0 */
533 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_1 */
534 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_2 */
535 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_4_3 */
536 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_0 */
537 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_1 */
538 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_2 */
539 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_5_3 */
540 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_0 */
541 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_1 */
542 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_2 */
543 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_6_3 */
544 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_0 */
545 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_1 */
546 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_2 */
547 QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_TPC_7_3 */
548 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_0 */
549 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_1 */
550 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_2 */
551 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_0_3 */
552 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_0 */
553 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_1 */
554 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_2 */
555 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_1_3 */
556 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_0 */
557 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_1 */
558 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_2 */
559 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_2_3 */
560 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_0 */
561 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_1 */
562 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_2 */
563 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_3_3 */
564 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_0 */
565 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_1 */
566 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_2 */
567 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_4_3 */
568 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_0 */
569 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_1 */
570 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_2 */
571 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_5_3 */
572 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_0 */
573 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_1 */
574 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_2 */
575 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_6_3 */
576 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_0 */
577 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_1 */
578 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_2 */
579 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_7_3 */
580 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_0 */
581 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_1 */
582 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_2 */
583 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_8_3 */
584 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_0 */
585 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_1 */
586 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_2 */
587 QUEUE_TYPE_NA, /* GAUDI_QUEUE_ID_NIC_9_3 */
588};
589
590static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
591 u64 phys_addr);
592static int gaudi_send_job_on_qman0(struct hl_device *hdev,
593 struct hl_cs_job *job);
594static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
595 u32 size, u64 val);
596static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
597 u32 tpc_id);
598static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
599static int gaudi_armcp_info_get(struct hl_device *hdev);
600static void gaudi_disable_clock_gating(struct hl_device *hdev);
601static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid);
602
603static int gaudi_get_fixed_properties(struct hl_device *hdev)
604{
605 struct asic_fixed_properties *prop = &hdev->asic_prop;
606 int i;
607
608 if (GAUDI_QUEUE_ID_SIZE >= HL_MAX_QUEUES) {
609 dev_err(hdev->dev,
610 "Number of H/W queues must be smaller than %d\n",
611 HL_MAX_QUEUES);
612 return -EFAULT;
613 }
614
615 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
616 if (gaudi_queue_type[i] == QUEUE_TYPE_EXT) {
617 prop->hw_queues_props[i].type = QUEUE_TYPE_EXT;
618 prop->hw_queues_props[i].driver_only = 0;
619 prop->hw_queues_props[i].requires_kernel_cb = 1;
620 } else if (gaudi_queue_type[i] == QUEUE_TYPE_CPU) {
621 prop->hw_queues_props[i].type = QUEUE_TYPE_CPU;
622 prop->hw_queues_props[i].driver_only = 1;
623 prop->hw_queues_props[i].requires_kernel_cb = 0;
624 } else if (gaudi_queue_type[i] == QUEUE_TYPE_INT) {
625 prop->hw_queues_props[i].type = QUEUE_TYPE_INT;
626 prop->hw_queues_props[i].driver_only = 0;
627 prop->hw_queues_props[i].requires_kernel_cb = 0;
628 } else if (gaudi_queue_type[i] == QUEUE_TYPE_NA) {
629 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
630 prop->hw_queues_props[i].driver_only = 0;
631 prop->hw_queues_props[i].requires_kernel_cb = 0;
632 }
633 }
634
635 for (; i < HL_MAX_QUEUES; i++)
636 prop->hw_queues_props[i].type = QUEUE_TYPE_NA;
637
638 prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
639
640 prop->dram_base_address = DRAM_PHYS_BASE;
641 prop->dram_size = GAUDI_HBM_SIZE_32GB;
642 prop->dram_end_address = prop->dram_base_address +
643 prop->dram_size;
644 prop->dram_user_base_address = DRAM_BASE_ADDR_USER;
645
646 prop->sram_base_address = SRAM_BASE_ADDR;
647 prop->sram_size = SRAM_SIZE;
648 prop->sram_end_address = prop->sram_base_address +
649 prop->sram_size;
650 prop->sram_user_base_address = prop->sram_base_address +
651 SRAM_USER_BASE_OFFSET;
652
653 prop->mmu_pgt_addr = MMU_PAGE_TABLES_ADDR;
654 if (hdev->pldm)
655 prop->mmu_pgt_size = 0x800000; /* 8MB */
656 else
657 prop->mmu_pgt_size = MMU_PAGE_TABLES_SIZE;
658 prop->mmu_pte_size = HL_PTE_SIZE;
659 prop->mmu_hop_table_size = HOP_TABLE_SIZE;
660 prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE;
661 prop->dram_page_size = PAGE_SIZE_2MB;
662
663 prop->pmmu.hop0_shift = HOP0_SHIFT;
664 prop->pmmu.hop1_shift = HOP1_SHIFT;
665 prop->pmmu.hop2_shift = HOP2_SHIFT;
666 prop->pmmu.hop3_shift = HOP3_SHIFT;
667 prop->pmmu.hop4_shift = HOP4_SHIFT;
668 prop->pmmu.hop0_mask = HOP0_MASK;
669 prop->pmmu.hop1_mask = HOP1_MASK;
670 prop->pmmu.hop2_mask = HOP2_MASK;
671 prop->pmmu.hop3_mask = HOP3_MASK;
672 prop->pmmu.hop4_mask = HOP4_MASK;
673 prop->pmmu.start_addr = VA_HOST_SPACE_START;
674 prop->pmmu.end_addr =
675 (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1;
676 prop->pmmu.page_size = PAGE_SIZE_4KB;
677
678 /* PMMU and HPMMU are the same except of page size */
679 memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
680 prop->pmmu_huge.page_size = PAGE_SIZE_2MB;
681
682 /* shifts and masks are the same in PMMU and DMMU */
683 memcpy(&prop->dmmu, &prop->pmmu, sizeof(prop->pmmu));
684 prop->dmmu.start_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2);
685 prop->dmmu.end_addr = VA_HOST_SPACE_END;
686 prop->dmmu.page_size = PAGE_SIZE_2MB;
687
688 prop->cfg_size = CFG_SIZE;
689 prop->max_asid = MAX_ASID;
690 prop->num_of_events = GAUDI_EVENT_SIZE;
691 prop->tpc_enabled_mask = TPC_ENABLED_MASK;
692
693 prop->max_power_default = MAX_POWER_DEFAULT;
694
695 prop->cb_pool_cb_cnt = GAUDI_CB_POOL_CB_CNT;
696 prop->cb_pool_cb_size = GAUDI_CB_POOL_CB_SIZE;
697
698 prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
699 prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
700
701 strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
702 CARD_NAME_MAX_LEN);
703
704 return 0;
705}
706
707static int gaudi_pci_bars_map(struct hl_device *hdev)
708{
709 static const char * const name[] = {"SRAM", "CFG", "HBM"};
710 bool is_wc[3] = {false, false, true};
711 int rc;
712
713 rc = hl_pci_bars_map(hdev, name, is_wc);
714 if (rc)
715 return rc;
716
717 hdev->rmmio = hdev->pcie_bar[CFG_BAR_ID] +
718 (CFG_BASE - SPI_FLASH_BASE_ADDR);
719
720 return 0;
721}
722
723static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
724{
725 struct gaudi_device *gaudi = hdev->asic_specific;
726 u64 old_addr = addr;
727 int rc;
728
729 if ((gaudi) && (gaudi->hbm_bar_cur_addr == addr))
730 return old_addr;
731
732 /* Inbound Region 2 - Bar 4 - Point to HBM */
733 rc = hl_pci_set_dram_bar_base(hdev, 2, 4, addr);
734 if (rc)
735 return U64_MAX;
736
737 if (gaudi) {
738 old_addr = gaudi->hbm_bar_cur_addr;
739 gaudi->hbm_bar_cur_addr = addr;
740 }
741
742 return old_addr;
743}
744
745static int gaudi_init_iatu(struct hl_device *hdev)
746{
747 int rc = 0;
748
749 /* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
750 rc = hl_pci_iatu_write(hdev, 0x314,
751 lower_32_bits(SPI_FLASH_BASE_ADDR));
752 rc |= hl_pci_iatu_write(hdev, 0x318,
753 upper_32_bits(SPI_FLASH_BASE_ADDR));
754 rc |= hl_pci_iatu_write(hdev, 0x300, 0);
755 /* Enable + Bar match + match enable */
756 rc |= hl_pci_iatu_write(hdev, 0x304, 0xC0080200);
757
758 if (rc)
759 return -EIO;
760
761 return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
762 HOST_PHYS_BASE, HOST_PHYS_SIZE);
763}
764
765static int gaudi_early_init(struct hl_device *hdev)
766{
767 struct asic_fixed_properties *prop = &hdev->asic_prop;
768 struct pci_dev *pdev = hdev->pdev;
769 int rc;
770
771 rc = gaudi_get_fixed_properties(hdev);
772 if (rc) {
773 dev_err(hdev->dev, "Failed to get fixed properties\n");
774 return rc;
775 }
776
777 /* Check BAR sizes */
778 if (pci_resource_len(pdev, SRAM_BAR_ID) != SRAM_BAR_SIZE) {
779 dev_err(hdev->dev,
780 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
781 SRAM_BAR_ID,
782 (unsigned long long) pci_resource_len(pdev,
783 SRAM_BAR_ID),
784 SRAM_BAR_SIZE);
785 return -ENODEV;
786 }
787
788 if (pci_resource_len(pdev, CFG_BAR_ID) != CFG_BAR_SIZE) {
789 dev_err(hdev->dev,
790 "Not " HL_NAME "? BAR %d size %llu, expecting %llu\n",
791 CFG_BAR_ID,
792 (unsigned long long) pci_resource_len(pdev,
793 CFG_BAR_ID),
794 CFG_BAR_SIZE);
795 return -ENODEV;
796 }
797
798 prop->dram_pci_bar_size = pci_resource_len(pdev, HBM_BAR_ID);
799
800 rc = hl_pci_init(hdev);
801 if (rc)
802 return rc;
803
804 return 0;
805}
806
807static int gaudi_early_fini(struct hl_device *hdev)
808{
809 hl_pci_fini(hdev);
810
811 return 0;
812}
813
814/**
815 * gaudi_fetch_psoc_frequency - Fetch PSOC frequency values
816 *
817 * @hdev: pointer to hl_device structure
818 *
819 */
820static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
821{
822 struct asic_fixed_properties *prop = &hdev->asic_prop;
823
824 prop->psoc_pci_pll_nr = RREG32(mmPSOC_PCI_PLL_NR);
825 prop->psoc_pci_pll_nf = RREG32(mmPSOC_PCI_PLL_NF);
826 prop->psoc_pci_pll_od = RREG32(mmPSOC_PCI_PLL_OD);
827 prop->psoc_pci_pll_div_factor = RREG32(mmPSOC_PCI_PLL_DIV_FACTOR_1);
828}
829
830static int _gaudi_init_tpc_mem(struct hl_device *hdev,
831 dma_addr_t tpc_kernel_src_addr, u32 tpc_kernel_size)
832{
833 struct asic_fixed_properties *prop = &hdev->asic_prop;
834 struct packet_lin_dma *init_tpc_mem_pkt;
835 struct hl_cs_job *job;
836 struct hl_cb *cb;
837 u64 dst_addr;
838 u32 cb_size, ctl;
839 u8 tpc_id;
840 int rc;
841
842 cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
843 if (!cb)
844 return -EFAULT;
845
846 init_tpc_mem_pkt = (struct packet_lin_dma *) (uintptr_t)
847 cb->kernel_address;
848 cb_size = sizeof(*init_tpc_mem_pkt);
849 memset(init_tpc_mem_pkt, 0, cb_size);
850
851 init_tpc_mem_pkt->tsize = cpu_to_le32(tpc_kernel_size);
852
853 ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) |
854 (1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) |
855 (1 << GAUDI_PKT_CTL_RB_SHIFT) |
856 (1 << GAUDI_PKT_CTL_MB_SHIFT));
857
858 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl);
859
860 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr);
861 dst_addr = (prop->sram_user_base_address &
862 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
863 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
864 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr);
865
866 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
867 if (!job) {
868 dev_err(hdev->dev, "Failed to allocate a new job\n");
869 rc = -ENOMEM;
870 goto release_cb;
871 }
872
873 job->id = 0;
874 job->user_cb = cb;
875 job->user_cb->cs_cnt++;
876 job->user_cb_size = cb_size;
877 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
878 job->patched_cb = job->user_cb;
879 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
880
881 hl_debugfs_add_job(hdev, job);
882
883 rc = gaudi_send_job_on_qman0(hdev, job);
884
885 if (rc)
886 goto free_job;
887
888 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
889 rc = gaudi_run_tpc_kernel(hdev, dst_addr, tpc_id);
890 if (rc)
891 break;
892 }
893
894free_job:
895 hl_userptr_delete_list(hdev, &job->userptr_list);
896 hl_debugfs_remove_job(hdev, job);
897 kfree(job);
898 cb->cs_cnt--;
899
900release_cb:
901 hl_cb_put(cb);
902 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
903
904 return rc;
905}
906
907/*
908 * gaudi_init_tpc_mem() - Initialize TPC memories.
909 * @hdev: Pointer to hl_device structure.
910 *
911 * Copy TPC kernel fw from firmware file and run it to initialize TPC memories.
912 *
913 * Return: 0 for success, negative value for error.
914 */
915static int gaudi_init_tpc_mem(struct hl_device *hdev)
916{
917 const struct firmware *fw;
918 size_t fw_size;
919 void *cpu_addr;
920 dma_addr_t dma_handle;
921 int rc;
922
923 rc = request_firmware(&fw, GAUDI_TPC_FW_FILE, hdev->dev);
924 if (rc) {
925 dev_err(hdev->dev, "Firmware file %s is not found!\n",
926 GAUDI_TPC_FW_FILE);
927 goto out;
928 }
929
930 fw_size = fw->size;
931 cpu_addr = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, fw_size,
932 &dma_handle, GFP_KERNEL | __GFP_ZERO);
933 if (!cpu_addr) {
934 dev_err(hdev->dev,
935 "Failed to allocate %zu of dma memory for TPC kernel\n",
936 fw_size);
937 rc = -ENOMEM;
938 goto out;
939 }
940
941 memcpy(cpu_addr, fw->data, fw_size);
942
943 rc = _gaudi_init_tpc_mem(hdev, dma_handle, fw_size);
944
945 hdev->asic_funcs->asic_dma_free_coherent(hdev, fw->size, cpu_addr,
946 dma_handle);
947
948out:
949 release_firmware(fw);
950 return rc;
951}
952
953static int gaudi_late_init(struct hl_device *hdev)
954{
955 struct gaudi_device *gaudi = hdev->asic_specific;
956 int rc;
957
958 rc = gaudi->armcp_info_get(hdev);
959 if (rc) {
960 dev_err(hdev->dev, "Failed to get armcp info\n");
961 return rc;
962 }
963
964 rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
965 if (rc) {
966 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
967 return rc;
968 }
969
970 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
971
972 gaudi_fetch_psoc_frequency(hdev);
973
974 rc = gaudi_mmu_clear_pgt_range(hdev);
975 if (rc) {
976 dev_err(hdev->dev, "Failed to clear MMU page tables range\n");
977 goto disable_pci_access;
978 }
979
980 rc = gaudi_init_tpc_mem(hdev);
981 if (rc) {
982 dev_err(hdev->dev, "Failed to initialize TPC memories\n");
983 goto disable_pci_access;
984 }
985
986 return 0;
987
988disable_pci_access:
989 hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
990
991 return rc;
992}
993
994static void gaudi_late_fini(struct hl_device *hdev)
995{
996 const struct hwmon_channel_info **channel_info_arr;
997 int i = 0;
998
999 if (!hdev->hl_chip_info->info)
1000 return;
1001
1002 channel_info_arr = hdev->hl_chip_info->info;
1003
1004 while (channel_info_arr[i]) {
1005 kfree(channel_info_arr[i]->config);
1006 kfree(channel_info_arr[i]);
1007 i++;
1008 }
1009
1010 kfree(channel_info_arr);
1011
1012 hdev->hl_chip_info->info = NULL;
1013}
1014
1015static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev)
1016{
1017 dma_addr_t dma_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {}, end_addr;
1018 void *virt_addr_arr[GAUDI_ALLOC_CPU_MEM_RETRY_CNT] = {};
1019 int i, j, rc = 0;
1020
1021 /*
1022 * The device CPU works with 40-bits addresses, while bit 39 must be set
1023 * to '1' when accessing the host.
1024 * Bits 49:39 of the full host address are saved for a later
1025 * configuration of the HW to perform extension to 50 bits.
1026 * Because there is a single HW register that holds the extension bits,
1027 * these bits must be identical in all allocated range.
1028 */
1029
1030 for (i = 0 ; i < GAUDI_ALLOC_CPU_MEM_RETRY_CNT ; i++) {
1031 virt_addr_arr[i] =
1032 hdev->asic_funcs->asic_dma_alloc_coherent(hdev,
1033 HL_CPU_ACCESSIBLE_MEM_SIZE,
1034 &dma_addr_arr[i],
1035 GFP_KERNEL | __GFP_ZERO);
1036 if (!virt_addr_arr[i]) {
1037 rc = -ENOMEM;
1038 goto free_dma_mem_arr;
1039 }
1040
1041 end_addr = dma_addr_arr[i] + HL_CPU_ACCESSIBLE_MEM_SIZE - 1;
1042 if (GAUDI_CPU_PCI_MSB_ADDR(dma_addr_arr[i]) ==
1043 GAUDI_CPU_PCI_MSB_ADDR(end_addr))
1044 break;
1045 }
1046
1047 if (i == GAUDI_ALLOC_CPU_MEM_RETRY_CNT) {
1048 dev_err(hdev->dev,
1049 "MSB of CPU accessible DMA memory are not identical in all range\n");
1050 rc = -EFAULT;
1051 goto free_dma_mem_arr;
1052 }
1053
1054 hdev->cpu_accessible_dma_mem = virt_addr_arr[i];
1055 hdev->cpu_accessible_dma_address = dma_addr_arr[i];
1056 hdev->cpu_pci_msb_addr =
1057 GAUDI_CPU_PCI_MSB_ADDR(hdev->cpu_accessible_dma_address);
1058
1059 GAUDI_PCI_TO_CPU_ADDR(hdev->cpu_accessible_dma_address);
1060
1061free_dma_mem_arr:
1062 for (j = 0 ; j < i ; j++)
1063 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1064 HL_CPU_ACCESSIBLE_MEM_SIZE,
1065 virt_addr_arr[j],
1066 dma_addr_arr[j]);
1067
1068 return rc;
1069}
1070
1071static void gaudi_free_internal_qmans_pq_mem(struct hl_device *hdev)
1072{
1073 struct gaudi_device *gaudi = hdev->asic_specific;
1074 struct gaudi_internal_qman_info *q;
1075 u32 i;
1076
1077 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1078 q = &gaudi->internal_qmans[i];
1079 if (!q->pq_kernel_addr)
1080 continue;
1081 hdev->asic_funcs->asic_dma_free_coherent(hdev, q->pq_size,
1082 q->pq_kernel_addr,
1083 q->pq_dma_addr);
1084 }
1085}
1086
1087static int gaudi_alloc_internal_qmans_pq_mem(struct hl_device *hdev)
1088{
1089 struct gaudi_device *gaudi = hdev->asic_specific;
1090 struct gaudi_internal_qman_info *q;
1091 int rc, i;
1092
1093 for (i = 0 ; i < GAUDI_QUEUE_ID_SIZE ; i++) {
1094 if (gaudi_queue_type[i] != QUEUE_TYPE_INT)
1095 continue;
1096
1097 q = &gaudi->internal_qmans[i];
1098
1099 switch (i) {
1100 case GAUDI_QUEUE_ID_DMA_2_0 ... GAUDI_QUEUE_ID_DMA_4_3:
1101 case GAUDI_QUEUE_ID_DMA_6_0 ... GAUDI_QUEUE_ID_DMA_7_3:
1102 q->pq_size = HBM_DMA_QMAN_SIZE_IN_BYTES;
1103 break;
1104 case GAUDI_QUEUE_ID_MME_0_0 ... GAUDI_QUEUE_ID_MME_1_3:
1105 q->pq_size = MME_QMAN_SIZE_IN_BYTES;
1106 break;
1107 case GAUDI_QUEUE_ID_TPC_0_0 ... GAUDI_QUEUE_ID_TPC_7_3:
1108 q->pq_size = TPC_QMAN_SIZE_IN_BYTES;
1109 break;
1110 default:
1111 dev_err(hdev->dev, "Bad internal queue index %d", i);
1112 rc = -EINVAL;
1113 goto free_internal_qmans_pq_mem;
1114 }
1115
1116 q->pq_kernel_addr = hdev->asic_funcs->asic_dma_alloc_coherent(
1117 hdev, q->pq_size,
1118 &q->pq_dma_addr,
1119 GFP_KERNEL | __GFP_ZERO);
1120 if (!q->pq_kernel_addr) {
1121 rc = -ENOMEM;
1122 goto free_internal_qmans_pq_mem;
1123 }
1124 }
1125
1126 return 0;
1127
1128free_internal_qmans_pq_mem:
1129 gaudi_free_internal_qmans_pq_mem(hdev);
1130 return rc;
1131}
1132
1133static int gaudi_sw_init(struct hl_device *hdev)
1134{
1135 struct gaudi_device *gaudi;
1136 int rc;
1137
1138 /* Allocate device structure */
1139 gaudi = kzalloc(sizeof(*gaudi), GFP_KERNEL);
1140 if (!gaudi)
1141 return -ENOMEM;
1142
1143 gaudi->armcp_info_get = gaudi_armcp_info_get;
1144
1145 gaudi->max_freq_value = GAUDI_MAX_CLK_FREQ;
1146
1147 hdev->asic_specific = gaudi;
1148
1149 /* Create DMA pool for small allocations */
1150 hdev->dma_pool = dma_pool_create(dev_name(hdev->dev),
1151 &hdev->pdev->dev, GAUDI_DMA_POOL_BLK_SIZE, 8, 0);
1152 if (!hdev->dma_pool) {
1153 dev_err(hdev->dev, "failed to create DMA pool\n");
1154 rc = -ENOMEM;
1155 goto free_gaudi_device;
1156 }
1157
1158 rc = gaudi_alloc_cpu_accessible_dma_mem(hdev);
1159 if (rc)
1160 goto free_dma_pool;
1161
1162 hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
1163 if (!hdev->cpu_accessible_dma_pool) {
1164 dev_err(hdev->dev,
1165 "Failed to create CPU accessible DMA pool\n");
1166 rc = -ENOMEM;
1167 goto free_cpu_dma_mem;
1168 }
1169
1170 rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
1171 (uintptr_t) hdev->cpu_accessible_dma_mem,
1172 HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
1173 if (rc) {
1174 dev_err(hdev->dev,
1175 "Failed to add memory to CPU accessible DMA pool\n");
1176 rc = -EFAULT;
1177 goto free_cpu_accessible_dma_pool;
1178 }
1179
1180 rc = gaudi_alloc_internal_qmans_pq_mem(hdev);
1181 if (rc)
1182 goto free_cpu_accessible_dma_pool;
1183
1184 spin_lock_init(&gaudi->hw_queues_lock);
1185 mutex_init(&gaudi->clk_gate_mutex);
1186
1187 hdev->supports_sync_stream = true;
1188 hdev->supports_coresight = true;
1189
1190 return 0;
1191
1192free_cpu_accessible_dma_pool:
1193 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1194free_cpu_dma_mem:
1195 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1196 hdev->cpu_pci_msb_addr);
1197 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1198 HL_CPU_ACCESSIBLE_MEM_SIZE,
1199 hdev->cpu_accessible_dma_mem,
1200 hdev->cpu_accessible_dma_address);
1201free_dma_pool:
1202 dma_pool_destroy(hdev->dma_pool);
1203free_gaudi_device:
1204 kfree(gaudi);
1205 return rc;
1206}
1207
1208static int gaudi_sw_fini(struct hl_device *hdev)
1209{
1210 struct gaudi_device *gaudi = hdev->asic_specific;
1211
1212 gaudi_free_internal_qmans_pq_mem(hdev);
1213
1214 gen_pool_destroy(hdev->cpu_accessible_dma_pool);
1215
1216 GAUDI_CPU_TO_PCI_ADDR(hdev->cpu_accessible_dma_address,
1217 hdev->cpu_pci_msb_addr);
1218 hdev->asic_funcs->asic_dma_free_coherent(hdev,
1219 HL_CPU_ACCESSIBLE_MEM_SIZE,
1220 hdev->cpu_accessible_dma_mem,
1221 hdev->cpu_accessible_dma_address);
1222
1223 dma_pool_destroy(hdev->dma_pool);
1224
1225 mutex_destroy(&gaudi->clk_gate_mutex);
1226
1227 kfree(gaudi);
1228
1229 return 0;
1230}
1231
1232static irqreturn_t gaudi_irq_handler_single(int irq, void *arg)
1233{
1234 struct hl_device *hdev = arg;
1235 int i;
1236
1237 if (hdev->disabled)
1238 return IRQ_HANDLED;
1239
1240 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1241 hl_irq_handler_cq(irq, &hdev->completion_queue[i]);
1242
1243 hl_irq_handler_eq(irq, &hdev->event_queue);
1244
1245 return IRQ_HANDLED;
1246}
1247
1248/*
1249 * For backward compatibility, new MSI interrupts should be set after the
1250 * existing CPU and NIC interrupts.
1251 */
1252static int gaudi_pci_irq_vector(struct hl_device *hdev, unsigned int nr,
1253 bool cpu_eq)
1254{
1255 int msi_vec;
1256
1257 if ((nr != GAUDI_EVENT_QUEUE_MSI_IDX) && (cpu_eq))
1258 dev_crit(hdev->dev, "CPU EQ must use IRQ %d\n",
1259 GAUDI_EVENT_QUEUE_MSI_IDX);
1260
1261 msi_vec = ((nr < GAUDI_EVENT_QUEUE_MSI_IDX) || (cpu_eq)) ? nr :
1262 (nr + NIC_NUMBER_OF_ENGINES + 1);
1263
1264 return pci_irq_vector(hdev->pdev, msi_vec);
1265}
1266
1267static int gaudi_enable_msi_single(struct hl_device *hdev)
1268{
1269 int rc, irq;
1270
1271 dev_info(hdev->dev, "Working in single MSI IRQ mode\n");
1272
1273 irq = gaudi_pci_irq_vector(hdev, 0, false);
1274 rc = request_irq(irq, gaudi_irq_handler_single, 0,
1275 "gaudi single msi", hdev);
1276 if (rc)
1277 dev_err(hdev->dev,
1278 "Failed to request single MSI IRQ\n");
1279
1280 return rc;
1281}
1282
1283static int gaudi_enable_msi_multi(struct hl_device *hdev)
1284{
1285 int cq_cnt = hdev->asic_prop.completion_queues_count;
1286 int rc, i, irq_cnt_init, irq;
1287
1288 for (i = 0, irq_cnt_init = 0 ; i < cq_cnt ; i++, irq_cnt_init++) {
1289 irq = gaudi_pci_irq_vector(hdev, i, false);
1290 rc = request_irq(irq, hl_irq_handler_cq, 0, gaudi_irq_name[i],
1291 &hdev->completion_queue[i]);
1292 if (rc) {
1293 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1294 goto free_irqs;
1295 }
1296 }
1297
1298 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX, true);
1299 rc = request_irq(irq, hl_irq_handler_eq, 0, gaudi_irq_name[cq_cnt],
1300 &hdev->event_queue);
1301 if (rc) {
1302 dev_err(hdev->dev, "Failed to request IRQ %d", irq);
1303 goto free_irqs;
1304 }
1305
1306 return 0;
1307
1308free_irqs:
1309 for (i = 0 ; i < irq_cnt_init ; i++)
1310 free_irq(gaudi_pci_irq_vector(hdev, i, false),
1311 &hdev->completion_queue[i]);
1312 return rc;
1313}
1314
1315static int gaudi_enable_msi(struct hl_device *hdev)
1316{
1317 struct gaudi_device *gaudi = hdev->asic_specific;
1318 int rc;
1319
1320 if (gaudi->hw_cap_initialized & HW_CAP_MSI)
1321 return 0;
1322
1323 rc = pci_alloc_irq_vectors(hdev->pdev, 1, GAUDI_MSI_ENTRIES,
1324 PCI_IRQ_MSI);
1325 if (rc < 0) {
1326 dev_err(hdev->dev, "MSI: Failed to enable support %d\n", rc);
1327 return rc;
1328 }
1329
1330 if (rc < NUMBER_OF_INTERRUPTS) {
1331 gaudi->multi_msi_mode = false;
1332 rc = gaudi_enable_msi_single(hdev);
1333 } else {
1334 gaudi->multi_msi_mode = true;
1335 rc = gaudi_enable_msi_multi(hdev);
1336 }
1337
1338 if (rc)
1339 goto free_pci_irq_vectors;
1340
1341 gaudi->hw_cap_initialized |= HW_CAP_MSI;
1342
1343 return 0;
1344
1345free_pci_irq_vectors:
1346 pci_free_irq_vectors(hdev->pdev);
1347 return rc;
1348}
1349
1350static void gaudi_sync_irqs(struct hl_device *hdev)
1351{
1352 struct gaudi_device *gaudi = hdev->asic_specific;
1353 int i, cq_cnt = hdev->asic_prop.completion_queues_count;
1354
1355 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1356 return;
1357
1358 /* Wait for all pending IRQs to be finished */
1359 if (gaudi->multi_msi_mode) {
1360 for (i = 0 ; i < cq_cnt ; i++)
1361 synchronize_irq(gaudi_pci_irq_vector(hdev, i, false));
1362
1363 synchronize_irq(gaudi_pci_irq_vector(hdev,
1364 GAUDI_EVENT_QUEUE_MSI_IDX,
1365 true));
1366 } else {
1367 synchronize_irq(gaudi_pci_irq_vector(hdev, 0, false));
1368 }
1369}
1370
1371static void gaudi_disable_msi(struct hl_device *hdev)
1372{
1373 struct gaudi_device *gaudi = hdev->asic_specific;
1374 int i, irq, cq_cnt = hdev->asic_prop.completion_queues_count;
1375
1376 if (!(gaudi->hw_cap_initialized & HW_CAP_MSI))
1377 return;
1378
1379 gaudi_sync_irqs(hdev);
1380
1381 if (gaudi->multi_msi_mode) {
1382 irq = gaudi_pci_irq_vector(hdev, GAUDI_EVENT_QUEUE_MSI_IDX,
1383 true);
1384 free_irq(irq, &hdev->event_queue);
1385
1386 for (i = 0 ; i < cq_cnt ; i++) {
1387 irq = gaudi_pci_irq_vector(hdev, i, false);
1388 free_irq(irq, &hdev->completion_queue[i]);
1389 }
1390 } else {
1391 free_irq(gaudi_pci_irq_vector(hdev, 0, false), hdev);
1392 }
1393
1394 pci_free_irq_vectors(hdev->pdev);
1395
1396 gaudi->hw_cap_initialized &= ~HW_CAP_MSI;
1397}
1398
1399static void gaudi_init_scrambler_sram(struct hl_device *hdev)
1400{
1401 struct gaudi_device *gaudi = hdev->asic_specific;
1402
1403 if (gaudi->hw_cap_initialized & HW_CAP_SRAM_SCRAMBLER)
1404 return;
1405
1406 if (!hdev->sram_scrambler_enable)
1407 return;
1408
1409 WREG32(mmNIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1410 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1411 WREG32(mmNIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1412 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1413 WREG32(mmNIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1414 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1415 WREG32(mmNIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1416 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1417 WREG32(mmNIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1418 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1419 WREG32(mmNIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1420 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1421 WREG32(mmNIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1422 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1423 WREG32(mmNIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1424 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1425
1426 WREG32(mmSIF_RTR_CTRL_0_SCRAM_SRAM_EN,
1427 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1428 WREG32(mmSIF_RTR_CTRL_1_SCRAM_SRAM_EN,
1429 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1430 WREG32(mmSIF_RTR_CTRL_2_SCRAM_SRAM_EN,
1431 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1432 WREG32(mmSIF_RTR_CTRL_3_SCRAM_SRAM_EN,
1433 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1434 WREG32(mmSIF_RTR_CTRL_4_SCRAM_SRAM_EN,
1435 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1436 WREG32(mmSIF_RTR_CTRL_5_SCRAM_SRAM_EN,
1437 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1438 WREG32(mmSIF_RTR_CTRL_6_SCRAM_SRAM_EN,
1439 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1440 WREG32(mmSIF_RTR_CTRL_7_SCRAM_SRAM_EN,
1441 1 << IF_RTR_CTRL_SCRAM_SRAM_EN_VAL_SHIFT);
1442
1443 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_SRAM_EN,
1444 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1445 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_SRAM_EN,
1446 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1447 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_SRAM_EN,
1448 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1449 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_SRAM_EN,
1450 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1451 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_SRAM_EN,
1452 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1453 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_SRAM_EN,
1454 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1455 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_SRAM_EN,
1456 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1457 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_SRAM_EN,
1458 1 << DMA_IF_DOWN_CHX_SCRAM_SRAM_EN_VAL_SHIFT);
1459
1460 gaudi->hw_cap_initialized |= HW_CAP_SRAM_SCRAMBLER;
1461}
1462
1463static void gaudi_init_scrambler_hbm(struct hl_device *hdev)
1464{
1465 struct gaudi_device *gaudi = hdev->asic_specific;
1466
1467 if (gaudi->hw_cap_initialized & HW_CAP_HBM_SCRAMBLER)
1468 return;
1469
1470 if (!hdev->dram_scrambler_enable)
1471 return;
1472
1473 WREG32(mmNIF_RTR_CTRL_0_SCRAM_HBM_EN,
1474 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1475 WREG32(mmNIF_RTR_CTRL_1_SCRAM_HBM_EN,
1476 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1477 WREG32(mmNIF_RTR_CTRL_2_SCRAM_HBM_EN,
1478 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1479 WREG32(mmNIF_RTR_CTRL_3_SCRAM_HBM_EN,
1480 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1481 WREG32(mmNIF_RTR_CTRL_4_SCRAM_HBM_EN,
1482 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1483 WREG32(mmNIF_RTR_CTRL_5_SCRAM_HBM_EN,
1484 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1485 WREG32(mmNIF_RTR_CTRL_6_SCRAM_HBM_EN,
1486 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1487 WREG32(mmNIF_RTR_CTRL_7_SCRAM_HBM_EN,
1488 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1489
1490 WREG32(mmSIF_RTR_CTRL_0_SCRAM_HBM_EN,
1491 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1492 WREG32(mmSIF_RTR_CTRL_1_SCRAM_HBM_EN,
1493 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1494 WREG32(mmSIF_RTR_CTRL_2_SCRAM_HBM_EN,
1495 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1496 WREG32(mmSIF_RTR_CTRL_3_SCRAM_HBM_EN,
1497 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1498 WREG32(mmSIF_RTR_CTRL_4_SCRAM_HBM_EN,
1499 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1500 WREG32(mmSIF_RTR_CTRL_5_SCRAM_HBM_EN,
1501 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1502 WREG32(mmSIF_RTR_CTRL_6_SCRAM_HBM_EN,
1503 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1504 WREG32(mmSIF_RTR_CTRL_7_SCRAM_HBM_EN,
1505 1 << IF_RTR_CTRL_SCRAM_HBM_EN_VAL_SHIFT);
1506
1507 WREG32(mmDMA_IF_E_N_DOWN_CH0_SCRAM_HBM_EN,
1508 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1509 WREG32(mmDMA_IF_E_N_DOWN_CH1_SCRAM_HBM_EN,
1510 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1511 WREG32(mmDMA_IF_E_S_DOWN_CH0_SCRAM_HBM_EN,
1512 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1513 WREG32(mmDMA_IF_E_S_DOWN_CH1_SCRAM_HBM_EN,
1514 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1515 WREG32(mmDMA_IF_W_N_DOWN_CH0_SCRAM_HBM_EN,
1516 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1517 WREG32(mmDMA_IF_W_N_DOWN_CH1_SCRAM_HBM_EN,
1518 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1519 WREG32(mmDMA_IF_W_S_DOWN_CH0_SCRAM_HBM_EN,
1520 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1521 WREG32(mmDMA_IF_W_S_DOWN_CH1_SCRAM_HBM_EN,
1522 1 << DMA_IF_DOWN_CHX_SCRAM_HBM_EN_VAL_SHIFT);
1523
1524 gaudi->hw_cap_initialized |= HW_CAP_HBM_SCRAMBLER;
1525}
1526
1527static void gaudi_init_e2e(struct hl_device *hdev)
1528{
1529 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 247 >> 3);
1530 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 785 >> 3);
1531 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 49);
1532 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 101);
1533
1534 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1535 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1536 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1537 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1538
1539 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1540 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1541 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1542 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1543
1544 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1545 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1546 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1547 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1548
1549 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1550 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1551 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1552 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1553
1554 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1555 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1556 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1557 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1558
1559 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1560 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1561 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1562 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1563
1564 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 297 >> 3);
1565 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 908 >> 3);
1566 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 19);
1567 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 19);
1568
1569 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_WR_SIZE, 318 >> 3);
1570 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_RD_SIZE, 956 >> 3);
1571 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_WR_SIZE, 79);
1572 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_RD_SIZE, 163);
1573
1574 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_WR_SIZE, 275 >> 3);
1575 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_RD_SIZE, 614 >> 3);
1576 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_WR_SIZE, 1);
1577 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_RD_SIZE, 39);
1578
1579 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_WR_SIZE, 1);
1580 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_RD_SIZE, 1);
1581 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_WR_SIZE, 1);
1582 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_RD_SIZE, 32);
1583
1584 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_WR_SIZE, 176 >> 3);
1585 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_RD_SIZE, 32 >> 3);
1586 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_WR_SIZE, 19);
1587 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_RD_SIZE, 32);
1588
1589 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_WR_SIZE, 176 >> 3);
1590 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_RD_SIZE, 32 >> 3);
1591 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_WR_SIZE, 19);
1592 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_RD_SIZE, 32);
1593
1594 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_WR_SIZE, 1);
1595 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_RD_SIZE, 1);
1596 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_WR_SIZE, 1);
1597 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_RD_SIZE, 32);
1598
1599 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_WR_SIZE, 275 >> 3);
1600 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_RD_SIZE, 614 >> 3);
1601 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_WR_SIZE, 1);
1602 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_RD_SIZE, 39);
1603
1604 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_WR_SIZE, 318 >> 3);
1605 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_RD_SIZE, 956 >> 3);
1606 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_WR_SIZE, 79);
1607 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_RD_SIZE, 79);
1608
1609 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1610 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1611 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1612 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1613
1614 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1615 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1616 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1617 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1618
1619 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1620 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1621 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1622 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1623
1624 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1625 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1626 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1627 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1628
1629 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1630 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1631 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1632 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1633
1634 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1635 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1636 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1637 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1638
1639 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_WR_SIZE, 344 >> 3);
1640 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_RD_SIZE, 1000 >> 3);
1641 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_WR_SIZE, 162);
1642 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_RD_SIZE, 338);
1643
1644 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_WR_SIZE, 344 >> 3);
1645 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_RD_SIZE, 1000 >> 3);
1646 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_WR_SIZE, 162);
1647 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_RD_SIZE, 338);
1648
1649 if (!hdev->dram_scrambler_enable) {
1650 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1651 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1652 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1653 WREG32(mmSIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1654
1655 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1656 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1657 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1658 WREG32(mmSIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1659
1660 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1661 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1662 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1663 WREG32(mmSIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1664
1665 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1666 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1667 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1668 WREG32(mmSIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1669
1670 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1671 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1672 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1673 WREG32(mmSIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1674
1675 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1676 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1677 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1678 WREG32(mmSIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1679
1680 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1681 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1682 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1683 WREG32(mmSIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1684
1685 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1686 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1687 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1688 WREG32(mmSIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1689
1690 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_0, 0x21);
1691 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_SEL_1, 0x22);
1692 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_OFFSET_18, 0x1F);
1693 WREG32(mmNIF_RTR_CTRL_0_NL_HBM_PC_SEL_3, 0x20);
1694
1695 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_0, 0x21);
1696 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_SEL_1, 0x22);
1697 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_OFFSET_18, 0x1F);
1698 WREG32(mmNIF_RTR_CTRL_1_NL_HBM_PC_SEL_3, 0x20);
1699
1700 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_0, 0x21);
1701 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_SEL_1, 0x22);
1702 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_OFFSET_18, 0x1F);
1703 WREG32(mmNIF_RTR_CTRL_2_NL_HBM_PC_SEL_3, 0x20);
1704
1705 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_0, 0x21);
1706 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_SEL_1, 0x22);
1707 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_OFFSET_18, 0x1F);
1708 WREG32(mmNIF_RTR_CTRL_3_NL_HBM_PC_SEL_3, 0x20);
1709
1710 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_0, 0x21);
1711 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_SEL_1, 0x22);
1712 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_OFFSET_18, 0x1F);
1713 WREG32(mmNIF_RTR_CTRL_4_NL_HBM_PC_SEL_3, 0x20);
1714
1715 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_0, 0x21);
1716 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_SEL_1, 0x22);
1717 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_OFFSET_18, 0x1F);
1718 WREG32(mmNIF_RTR_CTRL_5_NL_HBM_PC_SEL_3, 0x20);
1719
1720 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_0, 0x21);
1721 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_SEL_1, 0x22);
1722 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_OFFSET_18, 0x1F);
1723 WREG32(mmNIF_RTR_CTRL_6_NL_HBM_PC_SEL_3, 0x20);
1724
1725 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_0, 0x21);
1726 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_SEL_1, 0x22);
1727 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_OFFSET_18, 0x1F);
1728 WREG32(mmNIF_RTR_CTRL_7_NL_HBM_PC_SEL_3, 0x20);
1729
1730 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1731 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1732 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1733 WREG32(mmDMA_IF_E_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1734
1735 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1736 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1737 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1738 WREG32(mmDMA_IF_E_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1739
1740 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1741 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1742 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1743 WREG32(mmDMA_IF_E_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1744
1745 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1746 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1747 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1748 WREG32(mmDMA_IF_E_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1749
1750 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1751 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1752 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1753 WREG32(mmDMA_IF_W_N_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1754
1755 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1756 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1757 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1758 WREG32(mmDMA_IF_W_N_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1759
1760 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_0, 0x21);
1761 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_SEL_1, 0x22);
1762 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_OFFSET_18, 0x1F);
1763 WREG32(mmDMA_IF_W_S_DOWN_CH0_NL_HBM_PC_SEL_3, 0x20);
1764
1765 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_0, 0x21);
1766 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_SEL_1, 0x22);
1767 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_OFFSET_18, 0x1F);
1768 WREG32(mmDMA_IF_W_S_DOWN_CH1_NL_HBM_PC_SEL_3, 0x20);
1769 }
1770
1771 WREG32(mmSIF_RTR_CTRL_0_E2E_HBM_EN,
1772 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1773 WREG32(mmSIF_RTR_CTRL_0_E2E_PCI_EN,
1774 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1775
1776 WREG32(mmSIF_RTR_CTRL_1_E2E_HBM_EN,
1777 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1778 WREG32(mmSIF_RTR_CTRL_1_E2E_PCI_EN,
1779 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1780
1781 WREG32(mmSIF_RTR_CTRL_2_E2E_HBM_EN,
1782 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1783 WREG32(mmSIF_RTR_CTRL_2_E2E_PCI_EN,
1784 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1785
1786 WREG32(mmSIF_RTR_CTRL_3_E2E_HBM_EN,
1787 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1788 WREG32(mmSIF_RTR_CTRL_3_E2E_PCI_EN,
1789 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1790
1791 WREG32(mmSIF_RTR_CTRL_4_E2E_HBM_EN,
1792 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1793 WREG32(mmSIF_RTR_CTRL_4_E2E_PCI_EN,
1794 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1795
1796 WREG32(mmSIF_RTR_CTRL_5_E2E_HBM_EN,
1797 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1798 WREG32(mmSIF_RTR_CTRL_5_E2E_PCI_EN,
1799 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1800
1801 WREG32(mmSIF_RTR_CTRL_6_E2E_HBM_EN,
1802 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1803 WREG32(mmSIF_RTR_CTRL_6_E2E_PCI_EN,
1804 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1805
1806 WREG32(mmSIF_RTR_CTRL_7_E2E_HBM_EN,
1807 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1808 WREG32(mmSIF_RTR_CTRL_7_E2E_PCI_EN,
1809 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1810
1811 WREG32(mmNIF_RTR_CTRL_0_E2E_HBM_EN,
1812 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1813 WREG32(mmNIF_RTR_CTRL_0_E2E_PCI_EN,
1814 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1815
1816 WREG32(mmNIF_RTR_CTRL_1_E2E_HBM_EN,
1817 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1818 WREG32(mmNIF_RTR_CTRL_1_E2E_PCI_EN,
1819 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1820
1821 WREG32(mmNIF_RTR_CTRL_2_E2E_HBM_EN,
1822 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1823 WREG32(mmNIF_RTR_CTRL_2_E2E_PCI_EN,
1824 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1825
1826 WREG32(mmNIF_RTR_CTRL_3_E2E_HBM_EN,
1827 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1828 WREG32(mmNIF_RTR_CTRL_3_E2E_PCI_EN,
1829 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1830
1831 WREG32(mmNIF_RTR_CTRL_4_E2E_HBM_EN,
1832 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1833 WREG32(mmNIF_RTR_CTRL_4_E2E_PCI_EN,
1834 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1835
1836 WREG32(mmNIF_RTR_CTRL_5_E2E_HBM_EN,
1837 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1838 WREG32(mmNIF_RTR_CTRL_5_E2E_PCI_EN,
1839 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1840
1841 WREG32(mmNIF_RTR_CTRL_6_E2E_HBM_EN,
1842 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1843 WREG32(mmNIF_RTR_CTRL_6_E2E_PCI_EN,
1844 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1845
1846 WREG32(mmNIF_RTR_CTRL_7_E2E_HBM_EN,
1847 1 << IF_RTR_CTRL_E2E_HBM_EN_VAL_SHIFT);
1848 WREG32(mmNIF_RTR_CTRL_7_E2E_PCI_EN,
1849 1 << IF_RTR_CTRL_E2E_PCI_EN_VAL_SHIFT);
1850
1851 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_HBM_EN,
1852 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1853 WREG32(mmDMA_IF_E_N_DOWN_CH0_E2E_PCI_EN,
1854 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1855
1856 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_HBM_EN,
1857 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1858 WREG32(mmDMA_IF_E_N_DOWN_CH1_E2E_PCI_EN,
1859 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1860
1861 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_HBM_EN,
1862 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1863 WREG32(mmDMA_IF_E_S_DOWN_CH0_E2E_PCI_EN,
1864 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1865
1866 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_HBM_EN,
1867 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1868 WREG32(mmDMA_IF_E_S_DOWN_CH1_E2E_PCI_EN,
1869 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1870
1871 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_HBM_EN,
1872 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1873 WREG32(mmDMA_IF_W_N_DOWN_CH0_E2E_PCI_EN,
1874 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1875
1876 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_HBM_EN,
1877 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1878 WREG32(mmDMA_IF_W_N_DOWN_CH1_E2E_PCI_EN,
1879 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1880
1881 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_HBM_EN,
1882 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1883 WREG32(mmDMA_IF_W_S_DOWN_CH0_E2E_PCI_EN,
1884 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1885
1886 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_HBM_EN,
1887 1 << DMA_IF_DOWN_CHX_E2E_HBM_EN_VAL_SHIFT);
1888 WREG32(mmDMA_IF_W_S_DOWN_CH1_E2E_PCI_EN,
1889 1 << DMA_IF_DOWN_CHX_E2E_PCI_EN_VAL_SHIFT);
1890}
1891
1892static void gaudi_init_hbm_cred(struct hl_device *hdev)
1893{
1894 uint32_t hbm0_wr, hbm1_wr, hbm0_rd, hbm1_rd;
1895
1896 hbm0_wr = 0x33333333;
1897 hbm1_wr = 0x33333333;
1898 hbm0_rd = 0x77777777;
1899 hbm1_rd = 0xDDDDDDDD;
1900
1901 WREG32(mmDMA_IF_E_N_HBM0_WR_CRED_CNT, hbm0_wr);
1902 WREG32(mmDMA_IF_E_N_HBM1_WR_CRED_CNT, hbm1_wr);
1903 WREG32(mmDMA_IF_E_N_HBM0_RD_CRED_CNT, hbm0_rd);
1904 WREG32(mmDMA_IF_E_N_HBM1_RD_CRED_CNT, hbm1_rd);
1905
1906 WREG32(mmDMA_IF_E_S_HBM0_WR_CRED_CNT, hbm0_wr);
1907 WREG32(mmDMA_IF_E_S_HBM1_WR_CRED_CNT, hbm1_wr);
1908 WREG32(mmDMA_IF_E_S_HBM0_RD_CRED_CNT, hbm0_rd);
1909 WREG32(mmDMA_IF_E_S_HBM1_RD_CRED_CNT, hbm1_rd);
1910
1911 WREG32(mmDMA_IF_W_N_HBM0_WR_CRED_CNT, hbm0_wr);
1912 WREG32(mmDMA_IF_W_N_HBM1_WR_CRED_CNT, hbm1_wr);
1913 WREG32(mmDMA_IF_W_N_HBM0_RD_CRED_CNT, hbm0_rd);
1914 WREG32(mmDMA_IF_W_N_HBM1_RD_CRED_CNT, hbm1_rd);
1915
1916 WREG32(mmDMA_IF_W_S_HBM0_WR_CRED_CNT, hbm0_wr);
1917 WREG32(mmDMA_IF_W_S_HBM1_WR_CRED_CNT, hbm1_wr);
1918 WREG32(mmDMA_IF_W_S_HBM0_RD_CRED_CNT, hbm0_rd);
1919 WREG32(mmDMA_IF_W_S_HBM1_RD_CRED_CNT, hbm1_rd);
1920
1921 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_0,
1922 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1923 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1924 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_0,
1925 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1926 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1927 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_0,
1928 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1929 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1930 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_0,
1931 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1932 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1933
1934 WREG32(mmDMA_IF_E_N_HBM_CRED_EN_1,
1935 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1936 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1937 WREG32(mmDMA_IF_E_S_HBM_CRED_EN_1,
1938 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1939 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1940 WREG32(mmDMA_IF_W_N_HBM_CRED_EN_1,
1941 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1942 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1943 WREG32(mmDMA_IF_W_S_HBM_CRED_EN_1,
1944 (1 << DMA_IF_HBM_CRED_EN_READ_CREDIT_EN_SHIFT) |
1945 (1 << DMA_IF_HBM_CRED_EN_WRITE_CREDIT_EN_SHIFT));
1946}
1947
1948static void gaudi_init_rate_limiter(struct hl_device *hdev)
1949{
1950 u32 nr, nf, od, sat, rst, timeout;
1951 u64 freq;
1952
1953 nr = RREG32(mmPSOC_HBM_PLL_NR);
1954 nf = RREG32(mmPSOC_HBM_PLL_NF);
1955 od = RREG32(mmPSOC_HBM_PLL_OD);
1956 freq = (50 * (nf + 1)) / ((nr + 1) * (od + 1));
1957
1958 dev_dbg(hdev->dev, "HBM frequency is %lluMHz\n", freq);
1959
1960 /* Configuration is for five (5) DDMA channels */
1961 if (freq == 800) {
1962 sat = 4;
1963 rst = 11;
1964 timeout = 15;
1965 } else if (freq == 900) {
1966 sat = 4;
1967 rst = 15;
1968 timeout = 16;
1969 } else if (freq == 950) {
1970 sat = 4;
1971 rst = 15;
1972 timeout = 15;
1973 } else {
1974 dev_warn(hdev->dev,
1975 "unsupported HBM frequency %lluMHz, no rate-limiters\n",
1976 freq);
1977 return;
1978 }
1979
1980 WREG32(mmDMA_IF_W_S_DOWN_RSP_MID_WGHT_0, 0x111);
1981 WREG32(mmDMA_IF_W_S_DOWN_RSP_MID_WGHT_1, 0x111);
1982 WREG32(mmDMA_IF_E_S_DOWN_RSP_MID_WGHT_0, 0x111);
1983 WREG32(mmDMA_IF_E_S_DOWN_RSP_MID_WGHT_1, 0x111);
1984 WREG32(mmDMA_IF_W_N_DOWN_RSP_MID_WGHT_0, 0x111);
1985 WREG32(mmDMA_IF_W_N_DOWN_RSP_MID_WGHT_1, 0x111);
1986 WREG32(mmDMA_IF_E_N_DOWN_RSP_MID_WGHT_0, 0x111);
1987 WREG32(mmDMA_IF_E_N_DOWN_RSP_MID_WGHT_1, 0x111);
1988
1989 if (!hdev->rl_enable) {
1990 dev_info(hdev->dev, "Rate limiters disabled\n");
1991 return;
1992 }
1993
1994 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_SAT, sat);
1995 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_SAT, sat);
1996 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_SAT, sat);
1997 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_SAT, sat);
1998 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_SAT, sat);
1999 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_SAT, sat);
2000 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_SAT, sat);
2001 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_SAT, sat);
2002
2003 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_RST, rst);
2004 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_RST, rst);
2005 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_RST, rst);
2006 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_RST, rst);
2007 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_RST, rst);
2008 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_RST, rst);
2009 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_RST, rst);
2010 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_RST, rst);
2011
2012 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
2013 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
2014 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
2015 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
2016 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
2017 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
2018 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_TIMEOUT, timeout);
2019 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_TIMEOUT, timeout);
2020
2021 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_HBM_EN, 1);
2022 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_HBM_EN, 1);
2023 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_HBM_EN, 1);
2024 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_HBM_EN, 1);
2025 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_HBM_EN, 1);
2026 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_HBM_EN, 1);
2027 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_HBM_EN, 1);
2028 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_HBM_EN, 1);
2029
2030 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_SAT, sat);
2031 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_SAT, sat);
2032 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_SAT, sat);
2033 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_SAT, sat);
2034 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_SAT, sat);
2035 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_SAT, sat);
2036 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_SAT, sat);
2037 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_SAT, sat);
2038
2039 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_RST, rst);
2040 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_RST, rst);
2041 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_RST, rst);
2042 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_RST, rst);
2043 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_RST, rst);
2044 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_RST, rst);
2045 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_RST, rst);
2046 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_RST, rst);
2047
2048 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
2049 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
2050 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
2051 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
2052 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
2053 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
2054 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_TIMEOUT, timeout);
2055 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_TIMEOUT, timeout);
2056
2057 WREG32(mmDMA_IF_W_S_DOWN_CH0_RL_SRAM_EN, 1);
2058 WREG32(mmDMA_IF_W_S_DOWN_CH1_RL_SRAM_EN, 1);
2059 WREG32(mmDMA_IF_E_S_DOWN_CH0_RL_SRAM_EN, 1);
2060 WREG32(mmDMA_IF_E_S_DOWN_CH1_RL_SRAM_EN, 1);
2061 WREG32(mmDMA_IF_W_N_DOWN_CH0_RL_SRAM_EN, 1);
2062 WREG32(mmDMA_IF_W_N_DOWN_CH1_RL_SRAM_EN, 1);
2063 WREG32(mmDMA_IF_E_N_DOWN_CH0_RL_SRAM_EN, 1);
2064 WREG32(mmDMA_IF_E_N_DOWN_CH1_RL_SRAM_EN, 1);
2065}
2066
2067static void gaudi_init_golden_registers(struct hl_device *hdev)
2068{
2069 u32 tpc_offset;
2070 int tpc_id, i;
2071
2072 gaudi_init_e2e(hdev);
2073
2074 gaudi_init_hbm_cred(hdev);
2075
2076 gaudi_init_rate_limiter(hdev);
2077
2078 gaudi_disable_clock_gating(hdev);
2079
2080 for (tpc_id = 0, tpc_offset = 0;
2081 tpc_id < TPC_NUMBER_OF_ENGINES;
2082 tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
2083 /* Mask all arithmetic interrupts from TPC */
2084 WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
2085 /* Set 16 cache lines */
2086 WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
2087 ICACHE_FETCH_LINE_NUM, 2);
2088 }
2089
2090 /* Make sure 1st 128 bytes in SRAM are 0 for Tensor DMA */
2091 for (i = 0 ; i < 128 ; i += 8)
2092 writeq(0, hdev->pcie_bar[SRAM_BAR_ID] + i);
2093
2094 WREG32(mmMME0_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2095 WREG32(mmMME1_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2096 WREG32(mmMME2_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2097 WREG32(mmMME3_CTRL_EUS_ROLLUP_CNT_ADD, 3);
2098
2099 /* WA for H3-2081 */
2100 WREG32(mmPCIE_WRAP_MAX_OUTSTAND, 0x10ff);
2101}
2102
2103static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id,
2104 int qman_id, dma_addr_t qman_pq_addr)
2105{
2106 u32 mtr_base_en_lo, mtr_base_en_hi, mtr_base_ws_lo, mtr_base_ws_hi;
2107 u32 so_base_en_lo, so_base_en_hi, so_base_ws_lo, so_base_ws_hi;
2108 u32 q_off, dma_qm_offset;
2109 u32 dma_qm_err_cfg;
2110
2111 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2112
2113 mtr_base_en_lo = lower_32_bits(CFG_BASE +
2114 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2115 mtr_base_en_hi = upper_32_bits(CFG_BASE +
2116 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2117 so_base_en_lo = lower_32_bits(CFG_BASE +
2118 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2119 so_base_en_hi = upper_32_bits(CFG_BASE +
2120 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2121 mtr_base_ws_lo = lower_32_bits(CFG_BASE +
2122 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2123 mtr_base_ws_hi = upper_32_bits(CFG_BASE +
2124 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2125 so_base_ws_lo = lower_32_bits(CFG_BASE +
2126 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2127 so_base_ws_hi = upper_32_bits(CFG_BASE +
2128 mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0);
2129
2130 q_off = dma_qm_offset + qman_id * 4;
2131
2132 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off, lower_32_bits(qman_pq_addr));
2133 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off, upper_32_bits(qman_pq_addr));
2134
2135 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HL_QUEUE_LENGTH));
2136 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2137 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2138
2139 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2140 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2141 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2142
2143 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_en_lo);
2144 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_en_hi);
2145 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_en_lo);
2146 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_en_hi);
2147 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_LO_0 + q_off, mtr_base_ws_lo);
2148 WREG32(mmDMA0_QM_CP_MSG_BASE2_ADDR_HI_0 + q_off, mtr_base_ws_hi);
2149 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_LO_0 + q_off, so_base_ws_lo);
2150 WREG32(mmDMA0_QM_CP_MSG_BASE3_ADDR_HI_0 + q_off, so_base_ws_hi);
2151
2152 /* The following configuration is needed only once per QMAN */
2153 if (qman_id == 0) {
2154 /* Configure RAZWI IRQ */
2155 dma_qm_err_cfg = PCI_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2156 if (hdev->stop_on_err) {
2157 dma_qm_err_cfg |=
2158 PCI_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2159 }
2160
2161 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2162 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2163 lower_32_bits(CFG_BASE +
2164 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2165 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2166 upper_32_bits(CFG_BASE +
2167 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2168 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2169 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2170 dma_id);
2171
2172 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2173 QM_ARB_ERR_MSG_EN_MASK);
2174
2175 /* Increase ARB WDT to support streams architecture */
2176 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2177 GAUDI_ARB_WDT_TIMEOUT);
2178
2179 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2180 QMAN_EXTERNAL_MAKE_TRUSTED);
2181
2182 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2183 }
2184}
2185
2186static void gaudi_init_dma_core(struct hl_device *hdev, int dma_id)
2187{
2188 u32 dma_offset = dma_id * DMA_CORE_OFFSET;
2189 u32 dma_err_cfg = 1 << DMA0_CORE_ERR_CFG_ERR_MSG_EN_SHIFT;
2190
2191 /* Set to maximum possible according to physical size */
2192 WREG32(mmDMA0_CORE_RD_MAX_OUTSTAND + dma_offset, 0);
2193 WREG32(mmDMA0_CORE_RD_MAX_SIZE + dma_offset, 0);
2194
2195 /* STOP_ON bit implies no completion to operation in case of RAZWI */
2196 if (hdev->stop_on_err)
2197 dma_err_cfg |= 1 << DMA0_CORE_ERR_CFG_STOP_ON_ERR_SHIFT;
2198
2199 WREG32(mmDMA0_CORE_ERR_CFG + dma_offset, dma_err_cfg);
2200 WREG32(mmDMA0_CORE_ERRMSG_ADDR_LO + dma_offset,
2201 lower_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2202 WREG32(mmDMA0_CORE_ERRMSG_ADDR_HI + dma_offset,
2203 upper_32_bits(CFG_BASE + mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2204 WREG32(mmDMA0_CORE_ERRMSG_WDATA + dma_offset,
2205 gaudi_irq_map_table[GAUDI_EVENT_DMA0_CORE].cpu_id + dma_id);
2206 WREG32(mmDMA0_CORE_PROT + dma_offset,
2207 1 << DMA0_CORE_PROT_ERR_VAL_SHIFT);
2208 /* If the channel is secured, it should be in MMU bypass mode */
2209 WREG32(mmDMA0_CORE_SECURE_PROPS + dma_offset,
2210 1 << DMA0_CORE_SECURE_PROPS_MMBP_SHIFT);
2211 WREG32(mmDMA0_CORE_CFG_0 + dma_offset, 1 << DMA0_CORE_CFG_0_EN_SHIFT);
2212}
2213
2214static void gaudi_enable_qman(struct hl_device *hdev, int dma_id,
2215 u32 enable_mask)
2216{
2217 u32 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2218
2219 WREG32(mmDMA0_QM_GLBL_CFG0 + dma_qm_offset, enable_mask);
2220}
2221
2222static void gaudi_init_pci_dma_qmans(struct hl_device *hdev)
2223{
2224 struct gaudi_device *gaudi = hdev->asic_specific;
2225 struct hl_hw_queue *q;
2226 int i, j, dma_id, cpu_skip, nic_skip, cq_id = 0, q_idx, msi_vec = 0;
2227
2228 if (gaudi->hw_cap_initialized & HW_CAP_PCI_DMA)
2229 return;
2230
2231 for (i = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2232 dma_id = gaudi_dma_assignment[i];
2233 /*
2234 * For queues after the CPU Q need to add 1 to get the correct
2235 * queue. In addition, need to add the CPU EQ and NIC IRQs in
2236 * order to get the correct MSI register.
2237 */
2238 if (dma_id > 1) {
2239 cpu_skip = 1;
2240 nic_skip = NIC_NUMBER_OF_ENGINES;
2241 } else {
2242 cpu_skip = 0;
2243 nic_skip = 0;
2244 }
2245
2246 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2247 q_idx = 4 * dma_id + j + cpu_skip;
2248 q = &hdev->kernel_queues[q_idx];
2249 q->cq_id = cq_id++;
2250 q->msi_vec = nic_skip + cpu_skip + msi_vec++;
2251 gaudi_init_pci_dma_qman(hdev, dma_id, j,
2252 q->bus_address);
2253 }
2254
2255 gaudi_init_dma_core(hdev, dma_id);
2256
2257 gaudi_enable_qman(hdev, dma_id, PCI_DMA_QMAN_ENABLE);
2258 }
2259
2260 gaudi->hw_cap_initialized |= HW_CAP_PCI_DMA;
2261}
2262
2263static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id,
2264 int qman_id, u64 qman_base_addr)
2265{
2266 u32 mtr_base_lo, mtr_base_hi;
2267 u32 so_base_lo, so_base_hi;
2268 u32 q_off, dma_qm_offset;
2269 u32 dma_qm_err_cfg;
2270
2271 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
2272
2273 mtr_base_lo = lower_32_bits(CFG_BASE +
2274 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2275 mtr_base_hi = upper_32_bits(CFG_BASE +
2276 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2277 so_base_lo = lower_32_bits(CFG_BASE +
2278 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2279 so_base_hi = upper_32_bits(CFG_BASE +
2280 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2281
2282 q_off = dma_qm_offset + qman_id * 4;
2283
2284 if (qman_id < 4) {
2285 WREG32(mmDMA0_QM_PQ_BASE_LO_0 + q_off,
2286 lower_32_bits(qman_base_addr));
2287 WREG32(mmDMA0_QM_PQ_BASE_HI_0 + q_off,
2288 upper_32_bits(qman_base_addr));
2289
2290 WREG32(mmDMA0_QM_PQ_SIZE_0 + q_off, ilog2(HBM_DMA_QMAN_LENGTH));
2291 WREG32(mmDMA0_QM_PQ_PI_0 + q_off, 0);
2292 WREG32(mmDMA0_QM_PQ_CI_0 + q_off, 0);
2293
2294 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2295 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2296 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2297 } else {
2298 WREG32(mmDMA0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2299 WREG32(mmDMA0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2300 WREG32(mmDMA0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2301
2302 /* Configure RAZWI IRQ */
2303 dma_qm_err_cfg = HBM_DMA_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2304 if (hdev->stop_on_err) {
2305 dma_qm_err_cfg |=
2306 HBM_DMA_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2307 }
2308 WREG32(mmDMA0_QM_GLBL_ERR_CFG + dma_qm_offset, dma_qm_err_cfg);
2309
2310 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_LO + dma_qm_offset,
2311 lower_32_bits(CFG_BASE +
2312 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2313 WREG32(mmDMA0_QM_GLBL_ERR_ADDR_HI + dma_qm_offset,
2314 upper_32_bits(CFG_BASE +
2315 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2316 WREG32(mmDMA0_QM_GLBL_ERR_WDATA + dma_qm_offset,
2317 gaudi_irq_map_table[GAUDI_EVENT_DMA0_QM].cpu_id +
2318 dma_id);
2319
2320 WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset,
2321 QM_ARB_ERR_MSG_EN_MASK);
2322
2323 /* Increase ARB WDT to support streams architecture */
2324 WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset,
2325 GAUDI_ARB_WDT_TIMEOUT);
2326
2327 WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0);
2328 WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset,
2329 QMAN_INTERNAL_MAKE_TRUSTED);
2330 }
2331
2332 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2333 WREG32(mmDMA0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2334 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2335 WREG32(mmDMA0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2336}
2337
2338static void gaudi_init_hbm_dma_qmans(struct hl_device *hdev)
2339{
2340 struct gaudi_device *gaudi = hdev->asic_specific;
2341 struct gaudi_internal_qman_info *q;
2342 u64 qman_base_addr;
2343 int i, j, dma_id, internal_q_index;
2344
2345 if (gaudi->hw_cap_initialized & HW_CAP_HBM_DMA)
2346 return;
2347
2348 for (i = 0 ; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2349 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1 + i];
2350
2351 for (j = 0 ; j < QMAN_STREAMS ; j++) {
2352 /*
2353 * Add the CPU queue in order to get the correct queue
2354 * number as all internal queue are placed after it
2355 */
2356 internal_q_index = dma_id * QMAN_STREAMS + j + 1;
2357
2358 q = &gaudi->internal_qmans[internal_q_index];
2359 qman_base_addr = (u64) q->pq_dma_addr;
2360 gaudi_init_hbm_dma_qman(hdev, dma_id, j,
2361 qman_base_addr);
2362 }
2363
2364 /* Initializing lower CP for HBM DMA QMAN */
2365 gaudi_init_hbm_dma_qman(hdev, dma_id, 4, 0);
2366
2367 gaudi_init_dma_core(hdev, dma_id);
2368
2369 gaudi_enable_qman(hdev, dma_id, HBM_DMA_QMAN_ENABLE);
2370 }
2371
2372 gaudi->hw_cap_initialized |= HW_CAP_HBM_DMA;
2373}
2374
2375static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset,
2376 int qman_id, u64 qman_base_addr)
2377{
2378 u32 mtr_base_lo, mtr_base_hi;
2379 u32 so_base_lo, so_base_hi;
2380 u32 q_off, mme_id;
2381 u32 mme_qm_err_cfg;
2382
2383 mtr_base_lo = lower_32_bits(CFG_BASE +
2384 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2385 mtr_base_hi = upper_32_bits(CFG_BASE +
2386 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2387 so_base_lo = lower_32_bits(CFG_BASE +
2388 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2389 so_base_hi = upper_32_bits(CFG_BASE +
2390 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2391
2392 q_off = mme_offset + qman_id * 4;
2393
2394 if (qman_id < 4) {
2395 WREG32(mmMME0_QM_PQ_BASE_LO_0 + q_off,
2396 lower_32_bits(qman_base_addr));
2397 WREG32(mmMME0_QM_PQ_BASE_HI_0 + q_off,
2398 upper_32_bits(qman_base_addr));
2399
2400 WREG32(mmMME0_QM_PQ_SIZE_0 + q_off, ilog2(MME_QMAN_LENGTH));
2401 WREG32(mmMME0_QM_PQ_PI_0 + q_off, 0);
2402 WREG32(mmMME0_QM_PQ_CI_0 + q_off, 0);
2403
2404 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2405 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2406 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2407 } else {
2408 WREG32(mmMME0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2409 WREG32(mmMME0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2410 WREG32(mmMME0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2411
2412 /* Configure RAZWI IRQ */
2413 mme_id = mme_offset /
2414 (mmMME1_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0);
2415
2416 mme_qm_err_cfg = MME_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2417 if (hdev->stop_on_err) {
2418 mme_qm_err_cfg |=
2419 MME_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2420 }
2421 WREG32(mmMME0_QM_GLBL_ERR_CFG + mme_offset, mme_qm_err_cfg);
2422 WREG32(mmMME0_QM_GLBL_ERR_ADDR_LO + mme_offset,
2423 lower_32_bits(CFG_BASE +
2424 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2425 WREG32(mmMME0_QM_GLBL_ERR_ADDR_HI + mme_offset,
2426 upper_32_bits(CFG_BASE +
2427 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2428 WREG32(mmMME0_QM_GLBL_ERR_WDATA + mme_offset,
2429 gaudi_irq_map_table[GAUDI_EVENT_MME0_QM].cpu_id +
2430 mme_id);
2431
2432 WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset,
2433 QM_ARB_ERR_MSG_EN_MASK);
2434
2435 /* Increase ARB WDT to support streams architecture */
2436 WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset,
2437 GAUDI_ARB_WDT_TIMEOUT);
2438
2439 WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0);
2440 WREG32(mmMME0_QM_GLBL_PROT + mme_offset,
2441 QMAN_INTERNAL_MAKE_TRUSTED);
2442 }
2443
2444 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2445 WREG32(mmMME0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2446 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2447 WREG32(mmMME0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2448}
2449
2450static void gaudi_init_mme_qmans(struct hl_device *hdev)
2451{
2452 struct gaudi_device *gaudi = hdev->asic_specific;
2453 struct gaudi_internal_qman_info *q;
2454 u64 qman_base_addr;
2455 u32 mme_offset;
2456 int i, internal_q_index;
2457
2458 if (gaudi->hw_cap_initialized & HW_CAP_MME)
2459 return;
2460
2461 /*
2462 * map GAUDI_QUEUE_ID_MME_0_X to the N_W_MME (mmMME2_QM_BASE)
2463 * and GAUDI_QUEUE_ID_MME_1_X to the S_W_MME (mmMME0_QM_BASE)
2464 */
2465
2466 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2467
2468 for (i = 0 ; i < MME_NUMBER_OF_QMANS ; i++) {
2469 internal_q_index = GAUDI_QUEUE_ID_MME_0_0 + i;
2470 q = &gaudi->internal_qmans[internal_q_index];
2471 qman_base_addr = (u64) q->pq_dma_addr;
2472 gaudi_init_mme_qman(hdev, mme_offset, (i & 0x3),
2473 qman_base_addr);
2474 if (i == 3)
2475 mme_offset = 0;
2476 }
2477
2478 /* Initializing lower CP for MME QMANs */
2479 mme_offset = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0;
2480 gaudi_init_mme_qman(hdev, mme_offset, 4, 0);
2481 gaudi_init_mme_qman(hdev, 0, 4, 0);
2482
2483 WREG32(mmMME2_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2484 WREG32(mmMME0_QM_GLBL_CFG0, QMAN_MME_ENABLE);
2485
2486 gaudi->hw_cap_initialized |= HW_CAP_MME;
2487}
2488
2489static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset,
2490 int qman_id, u64 qman_base_addr)
2491{
2492 u32 mtr_base_lo, mtr_base_hi;
2493 u32 so_base_lo, so_base_hi;
2494 u32 q_off, tpc_id;
2495 u32 tpc_qm_err_cfg;
2496
2497 mtr_base_lo = lower_32_bits(CFG_BASE +
2498 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2499 mtr_base_hi = upper_32_bits(CFG_BASE +
2500 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0);
2501 so_base_lo = lower_32_bits(CFG_BASE +
2502 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2503 so_base_hi = upper_32_bits(CFG_BASE +
2504 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2505
2506 q_off = tpc_offset + qman_id * 4;
2507
2508 if (qman_id < 4) {
2509 WREG32(mmTPC0_QM_PQ_BASE_LO_0 + q_off,
2510 lower_32_bits(qman_base_addr));
2511 WREG32(mmTPC0_QM_PQ_BASE_HI_0 + q_off,
2512 upper_32_bits(qman_base_addr));
2513
2514 WREG32(mmTPC0_QM_PQ_SIZE_0 + q_off, ilog2(TPC_QMAN_LENGTH));
2515 WREG32(mmTPC0_QM_PQ_PI_0 + q_off, 0);
2516 WREG32(mmTPC0_QM_PQ_CI_0 + q_off, 0);
2517
2518 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x81BC);
2519 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x81B4);
2520 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2521 } else {
2522 WREG32(mmTPC0_QM_CP_LDMA_TSIZE_OFFSET_0 + q_off, 0x74);
2523 WREG32(mmTPC0_QM_CP_LDMA_SRC_BASE_LO_OFFSET_0 + q_off, 0x14);
2524 WREG32(mmTPC0_QM_CP_LDMA_DST_BASE_LO_OFFSET_0 + q_off, 0x1C);
2525
2526 /* Configure RAZWI IRQ */
2527 tpc_id = tpc_offset /
2528 (mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0);
2529
2530 tpc_qm_err_cfg = TPC_QMAN_GLBL_ERR_CFG_MSG_EN_MASK;
2531 if (hdev->stop_on_err) {
2532 tpc_qm_err_cfg |=
2533 TPC_QMAN_GLBL_ERR_CFG_STOP_ON_ERR_EN_MASK;
2534 }
2535
2536 WREG32(mmTPC0_QM_GLBL_ERR_CFG + tpc_offset, tpc_qm_err_cfg);
2537 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_LO + tpc_offset,
2538 lower_32_bits(CFG_BASE +
2539 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2540 WREG32(mmTPC0_QM_GLBL_ERR_ADDR_HI + tpc_offset,
2541 upper_32_bits(CFG_BASE +
2542 mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR));
2543 WREG32(mmTPC0_QM_GLBL_ERR_WDATA + tpc_offset,
2544 gaudi_irq_map_table[GAUDI_EVENT_TPC0_QM].cpu_id +
2545 tpc_id);
2546
2547 WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset,
2548 QM_ARB_ERR_MSG_EN_MASK);
2549
2550 /* Increase ARB WDT to support streams architecture */
2551 WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset,
2552 GAUDI_ARB_WDT_TIMEOUT);
2553
2554 WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0);
2555 WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset,
2556 QMAN_INTERNAL_MAKE_TRUSTED);
2557 }
2558
2559 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_LO_0 + q_off, mtr_base_lo);
2560 WREG32(mmTPC0_QM_CP_MSG_BASE0_ADDR_HI_0 + q_off, mtr_base_hi);
2561 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_LO_0 + q_off, so_base_lo);
2562 WREG32(mmTPC0_QM_CP_MSG_BASE1_ADDR_HI_0 + q_off, so_base_hi);
2563}
2564
2565static void gaudi_init_tpc_qmans(struct hl_device *hdev)
2566{
2567 struct gaudi_device *gaudi = hdev->asic_specific;
2568 struct gaudi_internal_qman_info *q;
2569 u64 qman_base_addr;
2570 u32 so_base_hi, tpc_offset = 0;
2571 u32 tpc_delta = mmTPC1_CFG_SM_BASE_ADDRESS_HIGH -
2572 mmTPC0_CFG_SM_BASE_ADDRESS_HIGH;
2573 int i, tpc_id, internal_q_index;
2574
2575 if (gaudi->hw_cap_initialized & HW_CAP_TPC_MASK)
2576 return;
2577
2578 so_base_hi = upper_32_bits(CFG_BASE +
2579 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0);
2580
2581 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2582 for (i = 0 ; i < QMAN_STREAMS ; i++) {
2583 internal_q_index = GAUDI_QUEUE_ID_TPC_0_0 +
2584 tpc_id * QMAN_STREAMS + i;
2585 q = &gaudi->internal_qmans[internal_q_index];
2586 qman_base_addr = (u64) q->pq_dma_addr;
2587 gaudi_init_tpc_qman(hdev, tpc_offset, i,
2588 qman_base_addr);
2589
2590 if (i == 3) {
2591 /* Initializing lower CP for TPC QMAN */
2592 gaudi_init_tpc_qman(hdev, tpc_offset, 4, 0);
2593
2594 /* Enable the QMAN and TPC channel */
2595 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset,
2596 QMAN_TPC_ENABLE);
2597 }
2598 }
2599
2600 WREG32(mmTPC0_CFG_SM_BASE_ADDRESS_HIGH + tpc_id * tpc_delta,
2601 so_base_hi);
2602
2603 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2604
2605 gaudi->hw_cap_initialized |= 1 << (HW_CAP_TPC_SHIFT + tpc_id);
2606 }
2607}
2608
2609static void gaudi_disable_pci_dma_qmans(struct hl_device *hdev)
2610{
2611 struct gaudi_device *gaudi = hdev->asic_specific;
2612
2613 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2614 return;
2615
2616 WREG32(mmDMA0_QM_GLBL_CFG0, 0);
2617 WREG32(mmDMA1_QM_GLBL_CFG0, 0);
2618 WREG32(mmDMA5_QM_GLBL_CFG0, 0);
2619}
2620
2621static void gaudi_disable_hbm_dma_qmans(struct hl_device *hdev)
2622{
2623 struct gaudi_device *gaudi = hdev->asic_specific;
2624
2625 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2626 return;
2627
2628 WREG32(mmDMA2_QM_GLBL_CFG0, 0);
2629 WREG32(mmDMA3_QM_GLBL_CFG0, 0);
2630 WREG32(mmDMA4_QM_GLBL_CFG0, 0);
2631 WREG32(mmDMA6_QM_GLBL_CFG0, 0);
2632 WREG32(mmDMA7_QM_GLBL_CFG0, 0);
2633}
2634
2635static void gaudi_disable_mme_qmans(struct hl_device *hdev)
2636{
2637 struct gaudi_device *gaudi = hdev->asic_specific;
2638
2639 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2640 return;
2641
2642 WREG32(mmMME2_QM_GLBL_CFG0, 0);
2643 WREG32(mmMME0_QM_GLBL_CFG0, 0);
2644}
2645
2646static void gaudi_disable_tpc_qmans(struct hl_device *hdev)
2647{
2648 struct gaudi_device *gaudi = hdev->asic_specific;
2649 u32 tpc_offset = 0;
2650 int tpc_id;
2651
2652 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2653 return;
2654
2655 for (tpc_id = 0 ; tpc_id < TPC_NUMBER_OF_ENGINES ; tpc_id++) {
2656 WREG32(mmTPC0_QM_GLBL_CFG0 + tpc_offset, 0);
2657 tpc_offset += mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0;
2658 }
2659}
2660
2661static void gaudi_stop_pci_dma_qmans(struct hl_device *hdev)
2662{
2663 struct gaudi_device *gaudi = hdev->asic_specific;
2664
2665 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2666 return;
2667
2668 /* Stop upper CPs of QMANs 0.0 to 1.3 and 5.0 to 5.3 */
2669 WREG32(mmDMA0_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2670 WREG32(mmDMA1_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2671 WREG32(mmDMA5_QM_GLBL_CFG1, 0xF << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2672}
2673
2674static void gaudi_stop_hbm_dma_qmans(struct hl_device *hdev)
2675{
2676 struct gaudi_device *gaudi = hdev->asic_specific;
2677
2678 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2679 return;
2680
2681 /* Stop CPs of HBM DMA QMANs */
2682
2683 WREG32(mmDMA2_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2684 WREG32(mmDMA3_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2685 WREG32(mmDMA4_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2686 WREG32(mmDMA6_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2687 WREG32(mmDMA7_QM_GLBL_CFG1, 0x1F << DMA0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2688}
2689
2690static void gaudi_stop_mme_qmans(struct hl_device *hdev)
2691{
2692 struct gaudi_device *gaudi = hdev->asic_specific;
2693
2694 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2695 return;
2696
2697 /* Stop CPs of MME QMANs */
2698 WREG32(mmMME2_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2699 WREG32(mmMME0_QM_GLBL_CFG1, 0x1F << MME0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2700}
2701
2702static void gaudi_stop_tpc_qmans(struct hl_device *hdev)
2703{
2704 struct gaudi_device *gaudi = hdev->asic_specific;
2705
2706 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2707 return;
2708
2709 WREG32(mmTPC0_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2710 WREG32(mmTPC1_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2711 WREG32(mmTPC2_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2712 WREG32(mmTPC3_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2713 WREG32(mmTPC4_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2714 WREG32(mmTPC5_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2715 WREG32(mmTPC6_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2716 WREG32(mmTPC7_QM_GLBL_CFG1, 0x1F << TPC0_QM_GLBL_CFG1_CP_STOP_SHIFT);
2717}
2718
2719static void gaudi_pci_dma_stall(struct hl_device *hdev)
2720{
2721 struct gaudi_device *gaudi = hdev->asic_specific;
2722
2723 if (!(gaudi->hw_cap_initialized & HW_CAP_PCI_DMA))
2724 return;
2725
2726 WREG32(mmDMA0_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2727 WREG32(mmDMA1_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2728 WREG32(mmDMA5_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2729}
2730
2731static void gaudi_hbm_dma_stall(struct hl_device *hdev)
2732{
2733 struct gaudi_device *gaudi = hdev->asic_specific;
2734
2735 if (!(gaudi->hw_cap_initialized & HW_CAP_HBM_DMA))
2736 return;
2737
2738 WREG32(mmDMA2_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2739 WREG32(mmDMA3_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2740 WREG32(mmDMA4_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2741 WREG32(mmDMA6_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2742 WREG32(mmDMA7_CORE_CFG_1, 1 << DMA0_CORE_CFG_1_HALT_SHIFT);
2743}
2744
2745static void gaudi_mme_stall(struct hl_device *hdev)
2746{
2747 struct gaudi_device *gaudi = hdev->asic_specific;
2748
2749 if (!(gaudi->hw_cap_initialized & HW_CAP_MME))
2750 return;
2751
2752 /* WA for H3-1800 bug: do ACC and SBAB writes twice */
2753 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2754 WREG32(mmMME0_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2755 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2756 WREG32(mmMME0_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2757 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2758 WREG32(mmMME1_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2759 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2760 WREG32(mmMME1_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2761 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2762 WREG32(mmMME2_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2763 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2764 WREG32(mmMME2_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2765 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2766 WREG32(mmMME3_ACC_ACC_STALL, 1 << MME_ACC_ACC_STALL_R_SHIFT);
2767 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2768 WREG32(mmMME3_SBAB_SB_STALL, 1 << MME_SBAB_SB_STALL_R_SHIFT);
2769}
2770
2771static void gaudi_tpc_stall(struct hl_device *hdev)
2772{
2773 struct gaudi_device *gaudi = hdev->asic_specific;
2774
2775 if (!(gaudi->hw_cap_initialized & HW_CAP_TPC_MASK))
2776 return;
2777
2778 WREG32(mmTPC0_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2779 WREG32(mmTPC1_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2780 WREG32(mmTPC2_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2781 WREG32(mmTPC3_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2782 WREG32(mmTPC4_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2783 WREG32(mmTPC5_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2784 WREG32(mmTPC6_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2785 WREG32(mmTPC7_CFG_TPC_STALL, 1 << TPC0_CFG_TPC_STALL_V_SHIFT);
2786}
2787
2788static void gaudi_enable_clock_gating(struct hl_device *hdev)
2789{
2790 struct gaudi_device *gaudi = hdev->asic_specific;
2791 u32 qman_offset;
2792 int i;
2793
2794 if (!hdev->clock_gating)
2795 return;
2796
2797 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE)
2798 return;
2799
2800 /* In case we are during debug session, don't enable the clock gate
2801 * as it may interfere
2802 */
2803 if (hdev->in_debug)
2804 return;
2805
2806 for (i = 0, qman_offset = 0 ; i < PCI_DMA_NUMBER_OF_CHNLS ; i++) {
2807 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2808 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
2809 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2810 QMAN_UPPER_CP_CGM_PWR_GATE_EN);
2811 }
2812
2813 for (; i < HBM_DMA_NUMBER_OF_CHNLS ; i++) {
2814 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
2815 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, QMAN_CGM1_PWR_GATE_EN);
2816 WREG32(mmDMA0_QM_CGM_CFG + qman_offset,
2817 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2818 }
2819
2820 WREG32(mmMME0_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
2821 WREG32(mmMME0_QM_CGM_CFG,
2822 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2823 WREG32(mmMME2_QM_CGM_CFG1, QMAN_CGM1_PWR_GATE_EN);
2824 WREG32(mmMME2_QM_CGM_CFG,
2825 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2826
2827 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2828 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset,
2829 QMAN_CGM1_PWR_GATE_EN);
2830 WREG32(mmTPC0_QM_CGM_CFG + qman_offset,
2831 QMAN_COMMON_CP_CGM_PWR_GATE_EN);
2832
2833 qman_offset += TPC_QMAN_OFFSET;
2834 }
2835
2836 gaudi->hw_cap_initialized |= HW_CAP_CLK_GATE;
2837}
2838
2839static void gaudi_disable_clock_gating(struct hl_device *hdev)
2840{
2841 struct gaudi_device *gaudi = hdev->asic_specific;
2842 u32 qman_offset;
2843 int i;
2844
2845 if (!(gaudi->hw_cap_initialized & HW_CAP_CLK_GATE))
2846 return;
2847
2848 for (i = 0, qman_offset = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
2849 WREG32(mmDMA0_QM_CGM_CFG + qman_offset, 0);
2850 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 0);
2851
2852 qman_offset += (mmDMA1_QM_CGM_CFG - mmDMA0_QM_CGM_CFG);
2853 }
2854
2855 WREG32(mmMME0_QM_CGM_CFG, 0);
2856 WREG32(mmMME0_QM_CGM_CFG1, 0);
2857 WREG32(mmMME2_QM_CGM_CFG, 0);
2858 WREG32(mmMME2_QM_CGM_CFG1, 0);
2859
2860 for (i = 0, qman_offset = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
2861 WREG32(mmTPC0_QM_CGM_CFG + qman_offset, 0);
2862 WREG32(mmTPC0_QM_CGM_CFG1 + qman_offset, 0);
2863
2864 qman_offset += (mmTPC1_QM_CGM_CFG - mmTPC0_QM_CGM_CFG);
2865 }
2866
2867 gaudi->hw_cap_initialized &= ~(HW_CAP_CLK_GATE);
2868}
2869
2870static void gaudi_enable_timestamp(struct hl_device *hdev)
2871{
2872 /* Disable the timestamp counter */
2873 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2874
2875 /* Zero the lower/upper parts of the 64-bit counter */
2876 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
2877 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
2878
2879 /* Enable the counter */
2880 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
2881}
2882
2883static void gaudi_disable_timestamp(struct hl_device *hdev)
2884{
2885 /* Disable the timestamp counter */
2886 WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
2887}
2888
2889static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
2890{
2891 u32 wait_timeout_ms, cpu_timeout_ms;
2892
2893 dev_info(hdev->dev,
2894 "Halting compute engines and disabling interrupts\n");
2895
2896 if (hdev->pldm) {
2897 wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2898 cpu_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC;
2899 } else {
2900 wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
2901 cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
2902 }
2903
2904 if (hard_reset) {
2905 /*
2906 * I don't know what is the state of the CPU so make sure it is
2907 * stopped in any means necessary
2908 */
2909 WREG32(mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU, KMD_MSG_GOTO_WFE);
2910 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
2911 GAUDI_EVENT_HALT_MACHINE);
2912 msleep(cpu_timeout_ms);
2913 }
2914
2915 gaudi_stop_mme_qmans(hdev);
2916 gaudi_stop_tpc_qmans(hdev);
2917 gaudi_stop_hbm_dma_qmans(hdev);
2918 gaudi_stop_pci_dma_qmans(hdev);
2919
2920 gaudi_disable_clock_gating(hdev);
2921
2922 msleep(wait_timeout_ms);
2923
2924 gaudi_pci_dma_stall(hdev);
2925 gaudi_hbm_dma_stall(hdev);
2926 gaudi_tpc_stall(hdev);
2927 gaudi_mme_stall(hdev);
2928
2929 msleep(wait_timeout_ms);
2930
2931 gaudi_disable_mme_qmans(hdev);
2932 gaudi_disable_tpc_qmans(hdev);
2933 gaudi_disable_hbm_dma_qmans(hdev);
2934 gaudi_disable_pci_dma_qmans(hdev);
2935
2936 gaudi_disable_timestamp(hdev);
2937
2938 if (hard_reset)
2939 gaudi_disable_msi(hdev);
2940 else
2941 gaudi_sync_irqs(hdev);
2942}
2943
2944static int gaudi_mmu_init(struct hl_device *hdev)
2945{
2946 struct asic_fixed_properties *prop = &hdev->asic_prop;
2947 struct gaudi_device *gaudi = hdev->asic_specific;
2948 u64 hop0_addr;
2949 int rc, i;
2950
2951 if (!hdev->mmu_enable)
2952 return 0;
2953
2954 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
2955 return 0;
2956
2957 hdev->dram_supports_virtual_memory = false;
2958
2959 for (i = 0 ; i < prop->max_asid ; i++) {
2960 hop0_addr = prop->mmu_pgt_addr +
2961 (i * prop->mmu_hop_table_size);
2962
2963 rc = gaudi_mmu_update_asid_hop0_addr(hdev, i, hop0_addr);
2964 if (rc) {
2965 dev_err(hdev->dev,
2966 "failed to set hop0 addr for asid %d\n", i);
2967 goto err;
2968 }
2969 }
2970
2971 /* init MMU cache manage page */
2972 WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
2973 WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
2974
2975 hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
2976 VM_TYPE_USERPTR | VM_TYPE_PHYS_PACK);
2977
2978 WREG32(mmMMU_UP_MMU_ENABLE, 1);
2979 WREG32(mmMMU_UP_SPI_MASK, 0xF);
2980
2981 WREG32(mmSTLB_HOP_CONFIGURATION,
2982 hdev->mmu_huge_page_opt ? 0x30440 : 0x40440);
2983
2984 gaudi->hw_cap_initialized |= HW_CAP_MMU;
2985
2986 return 0;
2987
2988err:
2989 return rc;
2990}
2991
2992static int gaudi_load_firmware_to_device(struct hl_device *hdev)
2993{
2994 void __iomem *dst;
2995
2996 /* HBM scrambler must be initialized before pushing F/W to HBM */
2997 gaudi_init_scrambler_hbm(hdev);
2998
2999 dst = hdev->pcie_bar[HBM_BAR_ID] + LINUX_FW_OFFSET;
3000
3001 return hl_fw_load_fw_to_device(hdev, GAUDI_LINUX_FW_FILE, dst);
3002}
3003
3004static int gaudi_load_boot_fit_to_device(struct hl_device *hdev)
3005{
3006 void __iomem *dst;
3007
3008 dst = hdev->pcie_bar[SRAM_BAR_ID] + BOOT_FIT_SRAM_OFFSET;
3009
3010 return hl_fw_load_fw_to_device(hdev, GAUDI_BOOT_FIT_FILE, dst);
3011}
3012
3013static void gaudi_read_device_fw_version(struct hl_device *hdev,
3014 enum hl_fw_component fwc)
3015{
3016 const char *name;
3017 u32 ver_off;
3018 char *dest;
3019
3020 switch (fwc) {
3021 case FW_COMP_UBOOT:
3022 ver_off = RREG32(mmUBOOT_VER_OFFSET);
3023 dest = hdev->asic_prop.uboot_ver;
3024 name = "U-Boot";
3025 break;
3026 case FW_COMP_PREBOOT:
3027 ver_off = RREG32(mmPREBOOT_VER_OFFSET);
3028 dest = hdev->asic_prop.preboot_ver;
3029 name = "Preboot";
3030 break;
3031 default:
3032 dev_warn(hdev->dev, "Undefined FW component: %d\n", fwc);
3033 return;
3034 }
3035
3036 ver_off &= ~((u32)SRAM_BASE_ADDR);
3037
3038 if (ver_off < SRAM_SIZE - VERSION_MAX_LEN) {
3039 memcpy_fromio(dest, hdev->pcie_bar[SRAM_BAR_ID] + ver_off,
3040 VERSION_MAX_LEN);
3041 } else {
3042 dev_err(hdev->dev, "%s version offset (0x%x) is above SRAM\n",
3043 name, ver_off);
3044 strcpy(dest, "unavailable");
3045 }
3046}
3047
3048static int gaudi_init_cpu(struct hl_device *hdev)
3049{
3050 struct gaudi_device *gaudi = hdev->asic_specific;
3051 int rc;
3052
3053 if (!hdev->cpu_enable)
3054 return 0;
3055
3056 if (gaudi->hw_cap_initialized & HW_CAP_CPU)
3057 return 0;
3058
3059 /*
3060 * The device CPU works with 40 bits addresses.
3061 * This register sets the extension to 50 bits.
3062 */
3063 WREG32(mmCPU_IF_CPU_MSB_ADDR, hdev->cpu_pci_msb_addr);
3064
3065 rc = hl_fw_init_cpu(hdev, mmPSOC_GLOBAL_CONF_CPU_BOOT_STATUS,
3066 mmPSOC_GLOBAL_CONF_KMD_MSG_TO_CPU,
3067 mmCPU_CMD_STATUS_TO_HOST,
3068 mmCPU_BOOT_ERR0,
3069 !hdev->bmc_enable, GAUDI_CPU_TIMEOUT_USEC,
3070 GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
3071
3072 if (rc)
3073 return rc;
3074
3075 gaudi->hw_cap_initialized |= HW_CAP_CPU;
3076
3077 return 0;
3078}
3079
3080static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
3081{
3082 struct gaudi_device *gaudi = hdev->asic_specific;
3083 struct hl_eq *eq;
3084 u32 status;
3085 struct hl_hw_queue *cpu_pq =
3086 &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
3087 int err;
3088
3089 if (!hdev->cpu_queues_enable)
3090 return 0;
3091
3092 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3093 return 0;
3094
3095 eq = &hdev->event_queue;
3096
3097 WREG32(mmCPU_IF_PQ_BASE_ADDR_LOW, lower_32_bits(cpu_pq->bus_address));
3098 WREG32(mmCPU_IF_PQ_BASE_ADDR_HIGH, upper_32_bits(cpu_pq->bus_address));
3099
3100 WREG32(mmCPU_IF_EQ_BASE_ADDR_LOW, lower_32_bits(eq->bus_address));
3101 WREG32(mmCPU_IF_EQ_BASE_ADDR_HIGH, upper_32_bits(eq->bus_address));
3102
3103 WREG32(mmCPU_IF_CQ_BASE_ADDR_LOW,
3104 lower_32_bits(hdev->cpu_accessible_dma_address));
3105 WREG32(mmCPU_IF_CQ_BASE_ADDR_HIGH,
3106 upper_32_bits(hdev->cpu_accessible_dma_address));
3107
3108 WREG32(mmCPU_IF_PQ_LENGTH, HL_QUEUE_SIZE_IN_BYTES);
3109 WREG32(mmCPU_IF_EQ_LENGTH, HL_EQ_SIZE_IN_BYTES);
3110 WREG32(mmCPU_IF_CQ_LENGTH, HL_CPU_ACCESSIBLE_MEM_SIZE);
3111
3112 /* Used for EQ CI */
3113 WREG32(mmCPU_IF_EQ_RD_OFFS, 0);
3114
3115 WREG32(mmCPU_IF_PF_PQ_PI, 0);
3116
3117 if (gaudi->multi_msi_mode)
3118 WREG32(mmCPU_IF_QUEUE_INIT, PQ_INIT_STATUS_READY_FOR_CP);
3119 else
3120 WREG32(mmCPU_IF_QUEUE_INIT,
3121 PQ_INIT_STATUS_READY_FOR_CP_SINGLE_MSI);
3122
3123 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_PI_UPDATE);
3124
3125 err = hl_poll_timeout(
3126 hdev,
3127 mmCPU_IF_QUEUE_INIT,
3128 status,
3129 (status == PQ_INIT_STATUS_READY_FOR_HOST),
3130 1000,
3131 cpu_timeout);
3132
3133 if (err) {
3134 dev_err(hdev->dev,
3135 "Failed to communicate with ARM CPU (ArmCP timeout)\n");
3136 return -EIO;
3137 }
3138
3139 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
3140 return 0;
3141}
3142
3143static void gaudi_pre_hw_init(struct hl_device *hdev)
3144{
3145 /* Perform read from the device to make sure device is up */
3146 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
3147
3148 /*
3149 * Let's mark in the H/W that we have reached this point. We check
3150 * this value in the reset_before_init function to understand whether
3151 * we need to reset the chip before doing H/W init. This register is
3152 * cleared by the H/W upon H/W reset
3153 */
3154 WREG32(mmHW_STATE, HL_DEVICE_HW_STATE_DIRTY);
3155
3156 /* Set the access through PCI bars (Linux driver only) as secured */
3157 WREG32(mmPCIE_WRAP_LBW_PROT_OVR, (PCIE_WRAP_LBW_PROT_OVR_RD_EN_MASK |
3158 PCIE_WRAP_LBW_PROT_OVR_WR_EN_MASK));
3159
3160 /* Perform read to flush the waiting writes to ensure configuration
3161 * was set in the device
3162 */
3163 RREG32(mmPCIE_WRAP_LBW_PROT_OVR);
3164
3165 if (hdev->axi_drain) {
3166 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG,
3167 1 << PCIE_WRAP_LBW_DRAIN_CFG_EN_SHIFT);
3168 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG,
3169 1 << PCIE_WRAP_HBW_DRAIN_CFG_EN_SHIFT);
3170
3171 /* Perform read to flush the DRAIN cfg */
3172 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
3173 } else {
3174 WREG32(mmPCIE_WRAP_LBW_DRAIN_CFG, 0);
3175 WREG32(mmPCIE_WRAP_HBW_DRAIN_CFG, 0);
3176
3177 /* Perform read to flush the DRAIN cfg */
3178 RREG32(mmPCIE_WRAP_HBW_DRAIN_CFG);
3179 }
3180
3181 /* Configure the reset registers. Must be done as early as possible
3182 * in case we fail during H/W initialization
3183 */
3184 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_H,
3185 (CFG_RST_H_DMA_MASK |
3186 CFG_RST_H_MME_MASK |
3187 CFG_RST_H_SM_MASK |
3188 CFG_RST_H_TPC_MASK));
3189
3190 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST_CFG_L, CFG_RST_L_TPC_MASK);
3191
3192 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_H,
3193 (CFG_RST_H_HBM_MASK |
3194 CFG_RST_H_TPC_MASK |
3195 CFG_RST_H_NIC_MASK |
3196 CFG_RST_H_SM_MASK |
3197 CFG_RST_H_DMA_MASK |
3198 CFG_RST_H_MME_MASK |
3199 CFG_RST_H_CPU_MASK |
3200 CFG_RST_H_MMU_MASK));
3201
3202 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST_CFG_L,
3203 (CFG_RST_L_IF_MASK |
3204 CFG_RST_L_PSOC_MASK |
3205 CFG_RST_L_TPC_MASK));
3206}
3207
3208static int gaudi_hw_init(struct hl_device *hdev)
3209{
3210 int rc;
3211
3212 dev_info(hdev->dev, "Starting initialization of H/W\n");
3213
3214 gaudi_pre_hw_init(hdev);
3215
3216 gaudi_init_pci_dma_qmans(hdev);
3217
3218 gaudi_init_hbm_dma_qmans(hdev);
3219
3220 /*
3221 * Before pushing u-boot/linux to device, need to set the hbm bar to
3222 * base address of dram
3223 */
3224 if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
3225 dev_err(hdev->dev,
3226 "failed to map HBM bar to DRAM base address\n");
3227 return -EIO;
3228 }
3229
3230 rc = gaudi_init_cpu(hdev);
3231 if (rc) {
3232 dev_err(hdev->dev, "failed to initialize CPU\n");
3233 return rc;
3234 }
3235
3236 /* SRAM scrambler must be initialized after CPU is running from HBM */
3237 gaudi_init_scrambler_sram(hdev);
3238
3239 /* This is here just in case we are working without CPU */
3240 gaudi_init_scrambler_hbm(hdev);
3241
3242 gaudi_init_golden_registers(hdev);
3243
3244 rc = gaudi_mmu_init(hdev);
3245 if (rc)
3246 return rc;
3247
3a3a5bf1
OS
3248 gaudi_init_security(hdev);
3249
ac0ae6a9
OG
3250 gaudi_init_mme_qmans(hdev);
3251
3252 gaudi_init_tpc_qmans(hdev);
3253
3254 gaudi_enable_clock_gating(hdev);
3255
3256 gaudi_enable_timestamp(hdev);
3257
3258 /* MSI must be enabled before CPU queues are initialized */
3259 rc = gaudi_enable_msi(hdev);
3260 if (rc)
3261 goto disable_queues;
3262
3263 /* must be called after MSI was enabled */
3264 rc = gaudi_init_cpu_queues(hdev, GAUDI_CPU_TIMEOUT_USEC);
3265 if (rc) {
3266 dev_err(hdev->dev, "failed to initialize CPU H/W queues %d\n",
3267 rc);
3268 goto disable_msi;
3269 }
3270
3271 /* Perform read from the device to flush all configuration */
3272 RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
3273
3274 return 0;
3275
3276disable_msi:
3277 gaudi_disable_msi(hdev);
3278disable_queues:
3279 gaudi_disable_mme_qmans(hdev);
3280 gaudi_disable_pci_dma_qmans(hdev);
3281
3282 return rc;
3283}
3284
3285static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
3286{
3287 struct gaudi_device *gaudi = hdev->asic_specific;
3288 u32 status, reset_timeout_ms, boot_strap = 0;
3289
3290 if (hdev->pldm) {
3291 if (hard_reset)
3292 reset_timeout_ms = GAUDI_PLDM_HRESET_TIMEOUT_MSEC;
3293 else
3294 reset_timeout_ms = GAUDI_PLDM_SRESET_TIMEOUT_MSEC;
3295 } else {
3296 reset_timeout_ms = GAUDI_RESET_TIMEOUT_MSEC;
3297 }
3298
3299 if (hard_reset) {
3300 /* Tell ASIC not to re-initialize PCIe */
3301 WREG32(mmPREBOOT_PCIE_EN, LKD_HARD_RESET_MAGIC);
3302
3303 boot_strap = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
3304 /* H/W bug WA:
3305 * rdata[31:0] = strap_read_val;
3306 * wdata[31:0] = rdata[30:21],1'b0,rdata[20:0]
3307 */
3308 boot_strap = (((boot_strap & 0x7FE00000) << 1) |
3309 (boot_strap & 0x001FFFFF));
3310 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap & ~0x2);
3311
3312 /* Restart BTL/BLR upon hard-reset */
3313 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 1);
3314
3315 WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST,
3316 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT);
3317 dev_info(hdev->dev,
3318 "Issued HARD reset command, going to wait %dms\n",
3319 reset_timeout_ms);
3320 } else {
3321 /* Don't restart BTL/BLR upon soft-reset */
3322 WREG32(mmPSOC_GLOBAL_CONF_BOOT_SEQ_RE_START, 0);
3323
3324 WREG32(mmPSOC_GLOBAL_CONF_SOFT_RST,
3325 1 << PSOC_GLOBAL_CONF_SOFT_RST_IND_SHIFT);
3326 dev_info(hdev->dev,
3327 "Issued SOFT reset command, going to wait %dms\n",
3328 reset_timeout_ms);
3329 }
3330
3331 /*
3332 * After hard reset, we can't poll the BTM_FSM register because the PSOC
3333 * itself is in reset. Need to wait until the reset is deasserted
3334 */
3335 msleep(reset_timeout_ms);
3336
3337 status = RREG32(mmPSOC_GLOBAL_CONF_BTM_FSM);
3338 if (status & PSOC_GLOBAL_CONF_BTM_FSM_STATE_MASK)
3339 dev_err(hdev->dev,
3340 "Timeout while waiting for device to reset 0x%x\n",
3341 status);
3342
3343 if (!hard_reset) {
3344 gaudi->hw_cap_initialized &= ~(HW_CAP_PCI_DMA | HW_CAP_MME |
3345 HW_CAP_TPC_MASK |
3346 HW_CAP_HBM_DMA);
3347
3348 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3349 GAUDI_EVENT_SOFT_RESET);
3350 return;
3351 }
3352
3353 /* We continue here only for hard-reset */
3354
3355 WREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS, boot_strap);
3356
3357 gaudi->hw_cap_initialized &= ~(HW_CAP_CPU | HW_CAP_CPU_Q |
3358 HW_CAP_HBM | HW_CAP_PCI_DMA |
3359 HW_CAP_MME | HW_CAP_TPC_MASK |
3360 HW_CAP_HBM_DMA | HW_CAP_PLL |
3361 HW_CAP_MMU |
3362 HW_CAP_SRAM_SCRAMBLER |
3363 HW_CAP_HBM_SCRAMBLER);
3364 memset(gaudi->events_stat, 0, sizeof(gaudi->events_stat));
3365}
3366
3367static int gaudi_suspend(struct hl_device *hdev)
3368{
3369 int rc;
3370
3371 rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
3372 if (rc)
3373 dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
3374
3375 return rc;
3376}
3377
3378static int gaudi_resume(struct hl_device *hdev)
3379{
3380 return gaudi_init_iatu(hdev);
3381}
3382
3383static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
3384 u64 kaddress, phys_addr_t paddress, u32 size)
3385{
3386 int rc;
3387
3388 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
3389 VM_DONTCOPY | VM_NORESERVE;
3390
3391 rc = remap_pfn_range(vma, vma->vm_start, paddress >> PAGE_SHIFT,
3392 size, vma->vm_page_prot);
3393 if (rc)
3394 dev_err(hdev->dev, "remap_pfn_range error %d", rc);
3395
3396 return rc;
3397}
3398
3399static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
3400{
3401 struct gaudi_device *gaudi = hdev->asic_specific;
3402 u32 db_reg_offset, db_value, dma_qm_offset, q_off;
3403 int dma_id;
3404 bool invalid_queue = false;
3405
3406 switch (hw_queue_id) {
3407 case GAUDI_QUEUE_ID_DMA_0_0...GAUDI_QUEUE_ID_DMA_0_3:
3408 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_1];
3409 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3410 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3411 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3412 break;
3413
3414 case GAUDI_QUEUE_ID_DMA_1_0...GAUDI_QUEUE_ID_DMA_1_3:
3415 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_2];
3416 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3417 q_off = dma_qm_offset + (hw_queue_id & 0x3) * 4;
3418 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3419 break;
3420
3421 case GAUDI_QUEUE_ID_DMA_2_0...GAUDI_QUEUE_ID_DMA_2_3:
3422 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_1];
3423 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3424 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3425 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3426 break;
3427
3428 case GAUDI_QUEUE_ID_DMA_3_0...GAUDI_QUEUE_ID_DMA_3_3:
3429 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_2];
3430 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3431 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3432 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3433 break;
3434
3435 case GAUDI_QUEUE_ID_DMA_4_0...GAUDI_QUEUE_ID_DMA_4_3:
3436 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_3];
3437 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3438 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3439 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3440 break;
3441
3442 case GAUDI_QUEUE_ID_DMA_5_0...GAUDI_QUEUE_ID_DMA_5_3:
3443 dma_id = gaudi_dma_assignment[GAUDI_PCI_DMA_3];
3444 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3445 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3446 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3447 break;
3448
3449 case GAUDI_QUEUE_ID_DMA_6_0...GAUDI_QUEUE_ID_DMA_6_3:
3450 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_4];
3451 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3452 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3453 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3454 break;
3455
3456 case GAUDI_QUEUE_ID_DMA_7_0...GAUDI_QUEUE_ID_DMA_7_3:
3457 dma_id = gaudi_dma_assignment[GAUDI_HBM_DMA_5];
3458 dma_qm_offset = dma_id * DMA_QMAN_OFFSET;
3459 q_off = dma_qm_offset + ((hw_queue_id - 1) & 0x3) * 4;
3460 db_reg_offset = mmDMA0_QM_PQ_PI_0 + q_off;
3461 break;
3462
3463 case GAUDI_QUEUE_ID_CPU_PQ:
3464 if (gaudi->hw_cap_initialized & HW_CAP_CPU_Q)
3465 db_reg_offset = mmCPU_IF_PF_PQ_PI;
3466 else
3467 invalid_queue = true;
3468 break;
3469
3470 case GAUDI_QUEUE_ID_MME_0_0:
3471 db_reg_offset = mmMME2_QM_PQ_PI_0;
3472 break;
3473
3474 case GAUDI_QUEUE_ID_MME_0_1:
3475 db_reg_offset = mmMME2_QM_PQ_PI_1;
3476 break;
3477
3478 case GAUDI_QUEUE_ID_MME_0_2:
3479 db_reg_offset = mmMME2_QM_PQ_PI_2;
3480 break;
3481
3482 case GAUDI_QUEUE_ID_MME_0_3:
3483 db_reg_offset = mmMME2_QM_PQ_PI_3;
3484 break;
3485
3486 case GAUDI_QUEUE_ID_MME_1_0:
3487 db_reg_offset = mmMME0_QM_PQ_PI_0;
3488 break;
3489
3490 case GAUDI_QUEUE_ID_MME_1_1:
3491 db_reg_offset = mmMME0_QM_PQ_PI_1;
3492 break;
3493
3494 case GAUDI_QUEUE_ID_MME_1_2:
3495 db_reg_offset = mmMME0_QM_PQ_PI_2;
3496 break;
3497
3498 case GAUDI_QUEUE_ID_MME_1_3:
3499 db_reg_offset = mmMME0_QM_PQ_PI_3;
3500 break;
3501
3502 case GAUDI_QUEUE_ID_TPC_0_0:
3503 db_reg_offset = mmTPC0_QM_PQ_PI_0;
3504 break;
3505
3506 case GAUDI_QUEUE_ID_TPC_0_1:
3507 db_reg_offset = mmTPC0_QM_PQ_PI_1;
3508 break;
3509
3510 case GAUDI_QUEUE_ID_TPC_0_2:
3511 db_reg_offset = mmTPC0_QM_PQ_PI_2;
3512 break;
3513
3514 case GAUDI_QUEUE_ID_TPC_0_3:
3515 db_reg_offset = mmTPC0_QM_PQ_PI_3;
3516 break;
3517
3518 case GAUDI_QUEUE_ID_TPC_1_0:
3519 db_reg_offset = mmTPC1_QM_PQ_PI_0;
3520 break;
3521
3522 case GAUDI_QUEUE_ID_TPC_1_1:
3523 db_reg_offset = mmTPC1_QM_PQ_PI_1;
3524 break;
3525
3526 case GAUDI_QUEUE_ID_TPC_1_2:
3527 db_reg_offset = mmTPC1_QM_PQ_PI_2;
3528 break;
3529
3530 case GAUDI_QUEUE_ID_TPC_1_3:
3531 db_reg_offset = mmTPC1_QM_PQ_PI_3;
3532 break;
3533
3534 case GAUDI_QUEUE_ID_TPC_2_0:
3535 db_reg_offset = mmTPC2_QM_PQ_PI_0;
3536 break;
3537
3538 case GAUDI_QUEUE_ID_TPC_2_1:
3539 db_reg_offset = mmTPC2_QM_PQ_PI_1;
3540 break;
3541
3542 case GAUDI_QUEUE_ID_TPC_2_2:
3543 db_reg_offset = mmTPC2_QM_PQ_PI_2;
3544 break;
3545
3546 case GAUDI_QUEUE_ID_TPC_2_3:
3547 db_reg_offset = mmTPC2_QM_PQ_PI_3;
3548 break;
3549
3550 case GAUDI_QUEUE_ID_TPC_3_0:
3551 db_reg_offset = mmTPC3_QM_PQ_PI_0;
3552 break;
3553
3554 case GAUDI_QUEUE_ID_TPC_3_1:
3555 db_reg_offset = mmTPC3_QM_PQ_PI_1;
3556 break;
3557
3558 case GAUDI_QUEUE_ID_TPC_3_2:
3559 db_reg_offset = mmTPC3_QM_PQ_PI_2;
3560 break;
3561
3562 case GAUDI_QUEUE_ID_TPC_3_3:
3563 db_reg_offset = mmTPC3_QM_PQ_PI_3;
3564 break;
3565
3566 case GAUDI_QUEUE_ID_TPC_4_0:
3567 db_reg_offset = mmTPC4_QM_PQ_PI_0;
3568 break;
3569
3570 case GAUDI_QUEUE_ID_TPC_4_1:
3571 db_reg_offset = mmTPC4_QM_PQ_PI_1;
3572 break;
3573
3574 case GAUDI_QUEUE_ID_TPC_4_2:
3575 db_reg_offset = mmTPC4_QM_PQ_PI_2;
3576 break;
3577
3578 case GAUDI_QUEUE_ID_TPC_4_3:
3579 db_reg_offset = mmTPC4_QM_PQ_PI_3;
3580 break;
3581
3582 case GAUDI_QUEUE_ID_TPC_5_0:
3583 db_reg_offset = mmTPC5_QM_PQ_PI_0;
3584 break;
3585
3586 case GAUDI_QUEUE_ID_TPC_5_1:
3587 db_reg_offset = mmTPC5_QM_PQ_PI_1;
3588 break;
3589
3590 case GAUDI_QUEUE_ID_TPC_5_2:
3591 db_reg_offset = mmTPC5_QM_PQ_PI_2;
3592 break;
3593
3594 case GAUDI_QUEUE_ID_TPC_5_3:
3595 db_reg_offset = mmTPC5_QM_PQ_PI_3;
3596 break;
3597
3598 case GAUDI_QUEUE_ID_TPC_6_0:
3599 db_reg_offset = mmTPC6_QM_PQ_PI_0;
3600 break;
3601
3602 case GAUDI_QUEUE_ID_TPC_6_1:
3603 db_reg_offset = mmTPC6_QM_PQ_PI_1;
3604 break;
3605
3606 case GAUDI_QUEUE_ID_TPC_6_2:
3607 db_reg_offset = mmTPC6_QM_PQ_PI_2;
3608 break;
3609
3610 case GAUDI_QUEUE_ID_TPC_6_3:
3611 db_reg_offset = mmTPC6_QM_PQ_PI_3;
3612 break;
3613
3614 case GAUDI_QUEUE_ID_TPC_7_0:
3615 db_reg_offset = mmTPC7_QM_PQ_PI_0;
3616 break;
3617
3618 case GAUDI_QUEUE_ID_TPC_7_1:
3619 db_reg_offset = mmTPC7_QM_PQ_PI_1;
3620 break;
3621
3622 case GAUDI_QUEUE_ID_TPC_7_2:
3623 db_reg_offset = mmTPC7_QM_PQ_PI_2;
3624 break;
3625
3626 case GAUDI_QUEUE_ID_TPC_7_3:
3627 db_reg_offset = mmTPC7_QM_PQ_PI_3;
3628 break;
3629
3630 default:
3631 invalid_queue = true;
3632 }
3633
3634 if (invalid_queue) {
3635 /* Should never get here */
3636 dev_err(hdev->dev, "h/w queue %d is invalid. Can't set pi\n",
3637 hw_queue_id);
3638 return;
3639 }
3640
3641 db_value = pi;
3642
3643 /* ring the doorbell */
3644 WREG32(db_reg_offset, db_value);
3645
3646 if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
3647 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
3648 GAUDI_EVENT_PI_UPDATE);
3649}
3650
3651static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
3652 struct hl_bd *bd)
3653{
3654 __le64 *pbd = (__le64 *) bd;
3655
3656 /* The QMANs are on the host memory so a simple copy suffice */
3657 pqe[0] = pbd[0];
3658 pqe[1] = pbd[1];
3659}
3660
3661static void *gaudi_dma_alloc_coherent(struct hl_device *hdev, size_t size,
3662 dma_addr_t *dma_handle, gfp_t flags)
3663{
3664 void *kernel_addr = dma_alloc_coherent(&hdev->pdev->dev, size,
3665 dma_handle, flags);
3666
3667 /* Shift to the device's base physical address of host memory */
3668 if (kernel_addr)
3669 *dma_handle += HOST_PHYS_BASE;
3670
3671 return kernel_addr;
3672}
3673
3674static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size,
3675 void *cpu_addr, dma_addr_t dma_handle)
3676{
3677 /* Cancel the device's base physical address of host memory */
3678 dma_addr_t fixed_dma_handle = dma_handle - HOST_PHYS_BASE;
3679
3680 dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle);
3681}
3682
3683static void *gaudi_get_int_queue_base(struct hl_device *hdev,
3684 u32 queue_id, dma_addr_t *dma_handle,
3685 u16 *queue_len)
3686{
3687 struct gaudi_device *gaudi = hdev->asic_specific;
3688 struct gaudi_internal_qman_info *q;
3689
3690 if (queue_id >= GAUDI_QUEUE_ID_SIZE ||
3691 gaudi_queue_type[queue_id] != QUEUE_TYPE_INT) {
3692 dev_err(hdev->dev, "Got invalid queue id %d\n", queue_id);
3693 return NULL;
3694 }
3695
3696 q = &gaudi->internal_qmans[queue_id];
3697 *dma_handle = q->pq_dma_addr;
3698 *queue_len = q->pq_size / QMAN_PQ_ENTRY_SIZE;
3699
3700 return q->pq_kernel_addr;
3701}
3702
3703static int gaudi_send_cpu_message(struct hl_device *hdev, u32 *msg,
3704 u16 len, u32 timeout, long *result)
3705{
3706 struct gaudi_device *gaudi = hdev->asic_specific;
3707
3708 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) {
3709 if (result)
3710 *result = 0;
3711 return 0;
3712 }
3713
3714 return hl_fw_send_cpu_message(hdev, GAUDI_QUEUE_ID_CPU_PQ, msg, len,
3715 timeout, result);
3716}
3717
3718static int gaudi_test_queue(struct hl_device *hdev, u32 hw_queue_id)
3719{
3720 struct packet_msg_prot *fence_pkt;
3721 dma_addr_t pkt_dma_addr;
3722 u32 fence_val, tmp, timeout_usec;
3723 dma_addr_t fence_dma_addr;
3724 u32 *fence_ptr;
3725 int rc;
3726
3727 if (hdev->pldm)
3728 timeout_usec = GAUDI_PLDM_TEST_QUEUE_WAIT_USEC;
3729 else
3730 timeout_usec = GAUDI_TEST_QUEUE_WAIT_USEC;
3731
3732 fence_val = GAUDI_QMAN0_FENCE_VAL;
3733
3734 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
3735 &fence_dma_addr);
3736 if (!fence_ptr) {
3737 dev_err(hdev->dev,
3738 "Failed to allocate memory for queue testing\n");
3739 return -ENOMEM;
3740 }
3741
3742 *fence_ptr = 0;
3743
3744 fence_pkt = hdev->asic_funcs->asic_dma_pool_zalloc(hdev,
3745 sizeof(struct packet_msg_prot),
3746 GFP_KERNEL, &pkt_dma_addr);
3747 if (!fence_pkt) {
3748 dev_err(hdev->dev,
3749 "Failed to allocate packet for queue testing\n");
3750 rc = -ENOMEM;
3751 goto free_fence_ptr;
3752 }
3753
3754 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
3755 (1 << GAUDI_PKT_CTL_EB_SHIFT) |
3756 (1 << GAUDI_PKT_CTL_MB_SHIFT);
3757 fence_pkt->ctl = cpu_to_le32(tmp);
3758 fence_pkt->value = cpu_to_le32(fence_val);
3759 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
3760
3761 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id,
3762 sizeof(struct packet_msg_prot),
3763 pkt_dma_addr);
3764 if (rc) {
3765 dev_err(hdev->dev,
3766 "Failed to send fence packet\n");
3767 goto free_pkt;
3768 }
3769
3770 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp, (tmp == fence_val),
3771 1000, timeout_usec, true);
3772
3773 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
3774
3775 if (rc == -ETIMEDOUT) {
3776 dev_err(hdev->dev,
3777 "H/W queue %d test failed (scratch(0x%08llX) == 0x%08X)\n",
3778 hw_queue_id, (unsigned long long) fence_dma_addr, tmp);
3779 rc = -EIO;
3780 }
3781
3782free_pkt:
3783 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_pkt,
3784 pkt_dma_addr);
3785free_fence_ptr:
3786 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
3787 fence_dma_addr);
3788 return rc;
3789}
3790
3791static int gaudi_test_cpu_queue(struct hl_device *hdev)
3792{
3793 struct gaudi_device *gaudi = hdev->asic_specific;
3794
3795 /*
3796 * check capability here as send_cpu_message() won't update the result
3797 * value if no capability
3798 */
3799 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
3800 return 0;
3801
3802 return hl_fw_test_cpu_queue(hdev);
3803}
3804
3805static int gaudi_test_queues(struct hl_device *hdev)
3806{
3807 int i, rc, ret_val = 0;
3808
3809 for (i = 0 ; i < HL_MAX_QUEUES ; i++) {
3810 if (hdev->asic_prop.hw_queues_props[i].type == QUEUE_TYPE_EXT) {
3811 rc = gaudi_test_queue(hdev, i);
3812 if (rc)
3813 ret_val = -EINVAL;
3814 }
3815 }
3816
3817 rc = gaudi_test_cpu_queue(hdev);
3818 if (rc)
3819 ret_val = -EINVAL;
3820
3821 return ret_val;
3822}
3823
3824static void *gaudi_dma_pool_zalloc(struct hl_device *hdev, size_t size,
3825 gfp_t mem_flags, dma_addr_t *dma_handle)
3826{
3827 void *kernel_addr;
3828
3829 if (size > GAUDI_DMA_POOL_BLK_SIZE)
3830 return NULL;
3831
3832 kernel_addr = dma_pool_zalloc(hdev->dma_pool, mem_flags, dma_handle);
3833
3834 /* Shift to the device's base physical address of host memory */
3835 if (kernel_addr)
3836 *dma_handle += HOST_PHYS_BASE;
3837
3838 return kernel_addr;
3839}
3840
3841static void gaudi_dma_pool_free(struct hl_device *hdev, void *vaddr,
3842 dma_addr_t dma_addr)
3843{
3844 /* Cancel the device's base physical address of host memory */
3845 dma_addr_t fixed_dma_addr = dma_addr - HOST_PHYS_BASE;
3846
3847 dma_pool_free(hdev->dma_pool, vaddr, fixed_dma_addr);
3848}
3849
3850static void *gaudi_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
3851 size_t size, dma_addr_t *dma_handle)
3852{
3853 return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
3854}
3855
3856static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev,
3857 size_t size, void *vaddr)
3858{
3859 hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
3860}
3861
3862static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl,
3863 int nents, enum dma_data_direction dir)
3864{
3865 struct scatterlist *sg;
3866 int i;
3867
3868 if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir))
3869 return -ENOMEM;
3870
3871 /* Shift to the device's base physical address of host memory */
3872 for_each_sg(sgl, sg, nents, i)
3873 sg->dma_address += HOST_PHYS_BASE;
3874
3875 return 0;
3876}
3877
3878static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl,
3879 int nents, enum dma_data_direction dir)
3880{
3881 struct scatterlist *sg;
3882 int i;
3883
3884 /* Cancel the device's base physical address of host memory */
3885 for_each_sg(sgl, sg, nents, i)
3886 sg->dma_address -= HOST_PHYS_BASE;
3887
3888 dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir);
3889}
3890
3891static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev,
3892 struct sg_table *sgt)
3893{
3894 struct scatterlist *sg, *sg_next_iter;
3895 u32 count, dma_desc_cnt;
3896 u64 len, len_next;
3897 dma_addr_t addr, addr_next;
3898
3899 dma_desc_cnt = 0;
3900
3901 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
3902
3903 len = sg_dma_len(sg);
3904 addr = sg_dma_address(sg);
3905
3906 if (len == 0)
3907 break;
3908
3909 while ((count + 1) < sgt->nents) {
3910 sg_next_iter = sg_next(sg);
3911 len_next = sg_dma_len(sg_next_iter);
3912 addr_next = sg_dma_address(sg_next_iter);
3913
3914 if (len_next == 0)
3915 break;
3916
3917 if ((addr + len == addr_next) &&
3918 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
3919 len += len_next;
3920 count++;
3921 sg = sg_next_iter;
3922 } else {
3923 break;
3924 }
3925 }
3926
3927 dma_desc_cnt++;
3928 }
3929
3930 return dma_desc_cnt * sizeof(struct packet_lin_dma);
3931}
3932
3933static int gaudi_pin_memory_before_cs(struct hl_device *hdev,
3934 struct hl_cs_parser *parser,
3935 struct packet_lin_dma *user_dma_pkt,
3936 u64 addr, enum dma_data_direction dir)
3937{
3938 struct hl_userptr *userptr;
3939 int rc;
3940
3941 if (hl_userptr_is_pinned(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3942 parser->job_userptr_list, &userptr))
3943 goto already_pinned;
3944
3945 userptr = kzalloc(sizeof(*userptr), GFP_ATOMIC);
3946 if (!userptr)
3947 return -ENOMEM;
3948
3949 rc = hl_pin_host_memory(hdev, addr, le32_to_cpu(user_dma_pkt->tsize),
3950 userptr);
3951 if (rc)
3952 goto free_userptr;
3953
3954 list_add_tail(&userptr->job_node, parser->job_userptr_list);
3955
3956 rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl,
3957 userptr->sgt->nents, dir);
3958 if (rc) {
3959 dev_err(hdev->dev, "failed to map sgt with DMA region\n");
3960 goto unpin_memory;
3961 }
3962
3963 userptr->dma_mapped = true;
3964 userptr->dir = dir;
3965
3966already_pinned:
3967 parser->patched_cb_size +=
3968 gaudi_get_dma_desc_list_size(hdev, userptr->sgt);
3969
3970 return 0;
3971
3972unpin_memory:
3973 hl_unpin_host_memory(hdev, userptr);
3974free_userptr:
3975 kfree(userptr);
3976 return rc;
3977}
3978
3979static int gaudi_validate_dma_pkt_host(struct hl_device *hdev,
3980 struct hl_cs_parser *parser,
3981 struct packet_lin_dma *user_dma_pkt,
3982 bool src_in_host)
3983{
3984 enum dma_data_direction dir;
3985 bool skip_host_mem_pin = false, user_memset;
3986 u64 addr;
3987 int rc = 0;
3988
3989 user_memset = (le32_to_cpu(user_dma_pkt->ctl) &
3990 GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
3991 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
3992
3993 if (src_in_host) {
3994 if (user_memset)
3995 skip_host_mem_pin = true;
3996
3997 dev_dbg(hdev->dev, "DMA direction is HOST --> DEVICE\n");
3998 dir = DMA_TO_DEVICE;
3999 addr = le64_to_cpu(user_dma_pkt->src_addr);
4000 } else {
4001 dev_dbg(hdev->dev, "DMA direction is DEVICE --> HOST\n");
4002 dir = DMA_FROM_DEVICE;
4003 addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4004 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4005 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4006 }
4007
4008 if (skip_host_mem_pin)
4009 parser->patched_cb_size += sizeof(*user_dma_pkt);
4010 else
4011 rc = gaudi_pin_memory_before_cs(hdev, parser, user_dma_pkt,
4012 addr, dir);
4013
4014 return rc;
4015}
4016
4017static int gaudi_validate_dma_pkt_no_mmu(struct hl_device *hdev,
4018 struct hl_cs_parser *parser,
4019 struct packet_lin_dma *user_dma_pkt)
4020{
4021 bool src_in_host = false;
4022 u64 dst_addr = (le64_to_cpu(user_dma_pkt->dst_addr) &
4023 GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >>
4024 GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT;
4025
4026 dev_dbg(hdev->dev, "DMA packet details:\n");
4027 dev_dbg(hdev->dev, "source == 0x%llx\n",
4028 le64_to_cpu(user_dma_pkt->src_addr));
4029 dev_dbg(hdev->dev, "destination == 0x%llx\n", dst_addr);
4030 dev_dbg(hdev->dev, "size == %u\n", le32_to_cpu(user_dma_pkt->tsize));
4031
4032 /*
4033 * Special handling for DMA with size 0. Bypass all validations
4034 * because no transactions will be done except for WR_COMP, which
4035 * is not a security issue
4036 */
4037 if (!le32_to_cpu(user_dma_pkt->tsize)) {
4038 parser->patched_cb_size += sizeof(*user_dma_pkt);
4039 return 0;
4040 }
4041
4042 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4043 src_in_host = true;
4044
4045 return gaudi_validate_dma_pkt_host(hdev, parser, user_dma_pkt,
4046 src_in_host);
4047}
4048
4049static int gaudi_validate_cb(struct hl_device *hdev,
4050 struct hl_cs_parser *parser, bool is_mmu)
4051{
4052 u32 cb_parsed_length = 0;
4053 int rc = 0;
4054
4055 parser->patched_cb_size = 0;
4056
4057 /* cb_user_size is more than 0 so loop will always be executed */
4058 while (cb_parsed_length < parser->user_cb_size) {
4059 enum packet_id pkt_id;
4060 u16 pkt_size;
4061 struct gaudi_packet *user_pkt;
4062
4063 user_pkt = (struct gaudi_packet *) (uintptr_t)
4064 (parser->user_cb->kernel_address + cb_parsed_length);
4065
4066 pkt_id = (enum packet_id) (
4067 (le64_to_cpu(user_pkt->header) &
4068 PACKET_HEADER_PACKET_ID_MASK) >>
4069 PACKET_HEADER_PACKET_ID_SHIFT);
4070
4071 pkt_size = gaudi_packet_sizes[pkt_id];
4072 cb_parsed_length += pkt_size;
4073 if (cb_parsed_length > parser->user_cb_size) {
4074 dev_err(hdev->dev,
4075 "packet 0x%x is out of CB boundary\n", pkt_id);
4076 rc = -EINVAL;
4077 break;
4078 }
4079
4080 switch (pkt_id) {
4081 case PACKET_MSG_PROT:
4082 dev_err(hdev->dev,
4083 "User not allowed to use MSG_PROT\n");
4084 rc = -EPERM;
4085 break;
4086
4087 case PACKET_CP_DMA:
4088 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4089 rc = -EPERM;
4090 break;
4091
4092 case PACKET_STOP:
4093 dev_err(hdev->dev, "User not allowed to use STOP\n");
4094 rc = -EPERM;
4095 break;
4096
4097 case PACKET_LIN_DMA:
4098 parser->contains_dma_pkt = true;
4099 if (is_mmu)
4100 parser->patched_cb_size += pkt_size;
4101 else
4102 rc = gaudi_validate_dma_pkt_no_mmu(hdev, parser,
4103 (struct packet_lin_dma *) user_pkt);
4104 break;
4105
4106 case PACKET_WREG_32:
4107 case PACKET_WREG_BULK:
4108 case PACKET_MSG_LONG:
4109 case PACKET_MSG_SHORT:
4110 case PACKET_REPEAT:
4111 case PACKET_FENCE:
4112 case PACKET_NOP:
4113 case PACKET_ARB_POINT:
4114 case PACKET_LOAD_AND_EXE:
4115 parser->patched_cb_size += pkt_size;
4116 break;
4117
4118 default:
4119 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4120 pkt_id);
4121 rc = -EINVAL;
4122 break;
4123 }
4124
4125 if (rc)
4126 break;
4127 }
4128
4129 /*
4130 * The new CB should have space at the end for two MSG_PROT packets:
4131 * 1. A packet that will act as a completion packet
4132 * 2. A packet that will generate MSI-X interrupt
4133 */
4134 parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
4135
4136 return rc;
4137}
4138
4139static int gaudi_patch_dma_packet(struct hl_device *hdev,
4140 struct hl_cs_parser *parser,
4141 struct packet_lin_dma *user_dma_pkt,
4142 struct packet_lin_dma *new_dma_pkt,
4143 u32 *new_dma_pkt_size)
4144{
4145 struct hl_userptr *userptr;
4146 struct scatterlist *sg, *sg_next_iter;
4147 u32 count, dma_desc_cnt, user_wrcomp_en_mask, ctl;
4148 u64 len, len_next;
4149 dma_addr_t dma_addr, dma_addr_next;
4150 u64 device_memory_addr, addr;
4151 enum dma_data_direction dir;
4152 struct sg_table *sgt;
4153 bool src_in_host = false;
4154 bool skip_host_mem_pin = false;
4155 bool user_memset;
4156
4157 ctl = le32_to_cpu(user_dma_pkt->ctl);
4158
4159 if (parser->hw_queue_id <= GAUDI_QUEUE_ID_DMA_0_3)
4160 src_in_host = true;
4161
4162 user_memset = (ctl & GAUDI_PKT_LIN_DMA_CTL_MEMSET_MASK) >>
4163 GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT;
4164
4165 if (src_in_host) {
4166 addr = le64_to_cpu(user_dma_pkt->src_addr);
4167 device_memory_addr = le64_to_cpu(user_dma_pkt->dst_addr);
4168 dir = DMA_TO_DEVICE;
4169 if (user_memset)
4170 skip_host_mem_pin = true;
4171 } else {
4172 addr = le64_to_cpu(user_dma_pkt->dst_addr);
4173 device_memory_addr = le64_to_cpu(user_dma_pkt->src_addr);
4174 dir = DMA_FROM_DEVICE;
4175 }
4176
4177 if ((!skip_host_mem_pin) &&
4178 (!hl_userptr_is_pinned(hdev, addr,
4179 le32_to_cpu(user_dma_pkt->tsize),
4180 parser->job_userptr_list, &userptr))) {
4181 dev_err(hdev->dev, "Userptr 0x%llx + 0x%x NOT mapped\n",
4182 addr, user_dma_pkt->tsize);
4183 return -EFAULT;
4184 }
4185
4186 if ((user_memset) && (dir == DMA_TO_DEVICE)) {
4187 memcpy(new_dma_pkt, user_dma_pkt, sizeof(*user_dma_pkt));
4188 *new_dma_pkt_size = sizeof(*user_dma_pkt);
4189 return 0;
4190 }
4191
4192 user_wrcomp_en_mask = ctl & GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
4193
4194 sgt = userptr->sgt;
4195 dma_desc_cnt = 0;
4196
4197 for_each_sg(sgt->sgl, sg, sgt->nents, count) {
4198 len = sg_dma_len(sg);
4199 dma_addr = sg_dma_address(sg);
4200
4201 if (len == 0)
4202 break;
4203
4204 while ((count + 1) < sgt->nents) {
4205 sg_next_iter = sg_next(sg);
4206 len_next = sg_dma_len(sg_next_iter);
4207 dma_addr_next = sg_dma_address(sg_next_iter);
4208
4209 if (len_next == 0)
4210 break;
4211
4212 if ((dma_addr + len == dma_addr_next) &&
4213 (len + len_next <= DMA_MAX_TRANSFER_SIZE)) {
4214 len += len_next;
4215 count++;
4216 sg = sg_next_iter;
4217 } else {
4218 break;
4219 }
4220 }
4221
4222 new_dma_pkt->ctl = user_dma_pkt->ctl;
4223
4224 ctl = le32_to_cpu(user_dma_pkt->ctl);
4225 if (likely(dma_desc_cnt))
4226 ctl &= ~GAUDI_PKT_CTL_EB_MASK;
4227 ctl &= ~GAUDI_PKT_LIN_DMA_CTL_WRCOMP_EN_MASK;
4228 new_dma_pkt->ctl = cpu_to_le32(ctl);
4229 new_dma_pkt->tsize = cpu_to_le32(len);
4230
4231 if (dir == DMA_TO_DEVICE) {
4232 new_dma_pkt->src_addr = cpu_to_le64(dma_addr);
4233 new_dma_pkt->dst_addr = cpu_to_le64(device_memory_addr);
4234 } else {
4235 new_dma_pkt->src_addr = cpu_to_le64(device_memory_addr);
4236 new_dma_pkt->dst_addr = cpu_to_le64(dma_addr);
4237 }
4238
4239 if (!user_memset)
4240 device_memory_addr += len;
4241 dma_desc_cnt++;
4242 new_dma_pkt++;
4243 }
4244
4245 if (!dma_desc_cnt) {
4246 dev_err(hdev->dev,
4247 "Error of 0 SG entries when patching DMA packet\n");
4248 return -EFAULT;
4249 }
4250
4251 /* Fix the last dma packet - wrcomp must be as user set it */
4252 new_dma_pkt--;
4253 new_dma_pkt->ctl |= cpu_to_le32(user_wrcomp_en_mask);
4254
4255 *new_dma_pkt_size = dma_desc_cnt * sizeof(struct packet_lin_dma);
4256
4257 return 0;
4258}
4259
4260static int gaudi_patch_cb(struct hl_device *hdev,
4261 struct hl_cs_parser *parser)
4262{
4263 u32 cb_parsed_length = 0;
4264 u32 cb_patched_cur_length = 0;
4265 int rc = 0;
4266
4267 /* cb_user_size is more than 0 so loop will always be executed */
4268 while (cb_parsed_length < parser->user_cb_size) {
4269 enum packet_id pkt_id;
4270 u16 pkt_size;
4271 u32 new_pkt_size = 0;
4272 struct gaudi_packet *user_pkt, *kernel_pkt;
4273
4274 user_pkt = (struct gaudi_packet *) (uintptr_t)
4275 (parser->user_cb->kernel_address + cb_parsed_length);
4276 kernel_pkt = (struct gaudi_packet *) (uintptr_t)
4277 (parser->patched_cb->kernel_address +
4278 cb_patched_cur_length);
4279
4280 pkt_id = (enum packet_id) (
4281 (le64_to_cpu(user_pkt->header) &
4282 PACKET_HEADER_PACKET_ID_MASK) >>
4283 PACKET_HEADER_PACKET_ID_SHIFT);
4284
4285 pkt_size = gaudi_packet_sizes[pkt_id];
4286 cb_parsed_length += pkt_size;
4287 if (cb_parsed_length > parser->user_cb_size) {
4288 dev_err(hdev->dev,
4289 "packet 0x%x is out of CB boundary\n", pkt_id);
4290 rc = -EINVAL;
4291 break;
4292 }
4293
4294 switch (pkt_id) {
4295 case PACKET_LIN_DMA:
4296 rc = gaudi_patch_dma_packet(hdev, parser,
4297 (struct packet_lin_dma *) user_pkt,
4298 (struct packet_lin_dma *) kernel_pkt,
4299 &new_pkt_size);
4300 cb_patched_cur_length += new_pkt_size;
4301 break;
4302
4303 case PACKET_MSG_PROT:
4304 dev_err(hdev->dev,
4305 "User not allowed to use MSG_PROT\n");
4306 rc = -EPERM;
4307 break;
4308
4309 case PACKET_CP_DMA:
4310 dev_err(hdev->dev, "User not allowed to use CP_DMA\n");
4311 rc = -EPERM;
4312 break;
4313
4314 case PACKET_STOP:
4315 dev_err(hdev->dev, "User not allowed to use STOP\n");
4316 rc = -EPERM;
4317 break;
4318
4319 case PACKET_WREG_32:
4320 case PACKET_WREG_BULK:
4321 case PACKET_MSG_LONG:
4322 case PACKET_MSG_SHORT:
4323 case PACKET_REPEAT:
4324 case PACKET_FENCE:
4325 case PACKET_NOP:
4326 case PACKET_ARB_POINT:
4327 case PACKET_LOAD_AND_EXE:
4328 memcpy(kernel_pkt, user_pkt, pkt_size);
4329 cb_patched_cur_length += pkt_size;
4330 break;
4331
4332 default:
4333 dev_err(hdev->dev, "Invalid packet header 0x%x\n",
4334 pkt_id);
4335 rc = -EINVAL;
4336 break;
4337 }
4338
4339 if (rc)
4340 break;
4341 }
4342
4343 return rc;
4344}
4345
4346static int gaudi_parse_cb_mmu(struct hl_device *hdev,
4347 struct hl_cs_parser *parser)
4348{
4349 u64 patched_cb_handle;
4350 u32 patched_cb_size;
4351 struct hl_cb *user_cb;
4352 int rc;
4353
4354 /*
4355 * The new CB should have space at the end for two MSG_PROT pkt:
4356 * 1. A packet that will act as a completion packet
4357 * 2. A packet that will generate MSI interrupt
4358 */
4359 parser->patched_cb_size = parser->user_cb_size +
4360 sizeof(struct packet_msg_prot) * 2;
4361
4362 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
4363 parser->patched_cb_size,
4364 &patched_cb_handle, HL_KERNEL_ASID_ID);
4365
4366 if (rc) {
4367 dev_err(hdev->dev,
4368 "Failed to allocate patched CB for DMA CS %d\n",
4369 rc);
4370 return rc;
4371 }
4372
4373 patched_cb_handle >>= PAGE_SHIFT;
4374 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4375 (u32) patched_cb_handle);
4376 /* hl_cb_get should never fail here so use kernel WARN */
4377 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4378 (u32) patched_cb_handle);
4379 if (!parser->patched_cb) {
4380 rc = -EFAULT;
4381 goto out;
4382 }
4383
4384 /*
4385 * The check that parser->user_cb_size <= parser->user_cb->size was done
4386 * in validate_queue_index().
4387 */
4388 memcpy((void *) (uintptr_t) parser->patched_cb->kernel_address,
4389 (void *) (uintptr_t) parser->user_cb->kernel_address,
4390 parser->user_cb_size);
4391
4392 patched_cb_size = parser->patched_cb_size;
4393
4394 /* Validate patched CB instead of user CB */
4395 user_cb = parser->user_cb;
4396 parser->user_cb = parser->patched_cb;
4397 rc = gaudi_validate_cb(hdev, parser, true);
4398 parser->user_cb = user_cb;
4399
4400 if (rc) {
4401 hl_cb_put(parser->patched_cb);
4402 goto out;
4403 }
4404
4405 if (patched_cb_size != parser->patched_cb_size) {
4406 dev_err(hdev->dev, "user CB size mismatch\n");
4407 hl_cb_put(parser->patched_cb);
4408 rc = -EINVAL;
4409 goto out;
4410 }
4411
4412out:
4413 /*
4414 * Always call cb destroy here because we still have 1 reference
4415 * to it by calling cb_get earlier. After the job will be completed,
4416 * cb_put will release it, but here we want to remove it from the
4417 * idr
4418 */
4419 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4420 patched_cb_handle << PAGE_SHIFT);
4421
4422 return rc;
4423}
4424
4425static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
4426 struct hl_cs_parser *parser)
4427{
4428 u64 patched_cb_handle;
4429 int rc;
4430
4431 rc = gaudi_validate_cb(hdev, parser, false);
4432
4433 if (rc)
4434 goto free_userptr;
4435
4436 rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr,
4437 parser->patched_cb_size,
4438 &patched_cb_handle, HL_KERNEL_ASID_ID);
4439 if (rc) {
4440 dev_err(hdev->dev,
4441 "Failed to allocate patched CB for DMA CS %d\n", rc);
4442 goto free_userptr;
4443 }
4444
4445 patched_cb_handle >>= PAGE_SHIFT;
4446 parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
4447 (u32) patched_cb_handle);
4448 /* hl_cb_get should never fail here so use kernel WARN */
4449 WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
4450 (u32) patched_cb_handle);
4451 if (!parser->patched_cb) {
4452 rc = -EFAULT;
4453 goto out;
4454 }
4455
4456 rc = gaudi_patch_cb(hdev, parser);
4457
4458 if (rc)
4459 hl_cb_put(parser->patched_cb);
4460
4461out:
4462 /*
4463 * Always call cb destroy here because we still have 1 reference
4464 * to it by calling cb_get earlier. After the job will be completed,
4465 * cb_put will release it, but here we want to remove it from the
4466 * idr
4467 */
4468 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr,
4469 patched_cb_handle << PAGE_SHIFT);
4470
4471free_userptr:
4472 if (rc)
4473 hl_userptr_delete_list(hdev, parser->job_userptr_list);
4474 return rc;
4475}
4476
4477static int gaudi_parse_cb_no_ext_queue(struct hl_device *hdev,
4478 struct hl_cs_parser *parser)
4479{
4480 struct asic_fixed_properties *asic_prop = &hdev->asic_prop;
4481
4482 /* For internal queue jobs just check if CB address is valid */
4483 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4484 parser->user_cb_size,
4485 asic_prop->sram_user_base_address,
4486 asic_prop->sram_end_address))
4487 return 0;
4488
4489 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4490 parser->user_cb_size,
4491 asic_prop->dram_user_base_address,
4492 asic_prop->dram_end_address))
4493 return 0;
4494
4495 /* PMMU and HPMMU addresses are equal, check only one of them */
4496 if (hl_mem_area_inside_range((u64) (uintptr_t) parser->user_cb,
4497 parser->user_cb_size,
4498 asic_prop->pmmu.start_addr,
4499 asic_prop->pmmu.end_addr))
4500 return 0;
4501
4502 dev_err(hdev->dev,
4503 "CB address 0x%px + 0x%x for internal QMAN is not valid\n",
4504 parser->user_cb, parser->user_cb_size);
4505
4506 return -EFAULT;
4507}
4508
4509static int gaudi_cs_parser(struct hl_device *hdev, struct hl_cs_parser *parser)
4510{
4511 struct gaudi_device *gaudi = hdev->asic_specific;
4512
4513 if (parser->queue_type == QUEUE_TYPE_INT)
4514 return gaudi_parse_cb_no_ext_queue(hdev, parser);
4515
4516 if (gaudi->hw_cap_initialized & HW_CAP_MMU)
4517 return gaudi_parse_cb_mmu(hdev, parser);
4518 else
4519 return gaudi_parse_cb_no_mmu(hdev, parser);
4520}
4521
4522static void gaudi_add_end_of_cb_packets(struct hl_device *hdev,
4523 u64 kernel_address, u32 len,
4524 u64 cq_addr, u32 cq_val, u32 msi_vec,
4525 bool eb)
4526{
4527 struct gaudi_device *gaudi = hdev->asic_specific;
4528 struct packet_msg_prot *cq_pkt;
4529 u32 tmp;
4530
4531 cq_pkt = (struct packet_msg_prot *) (uintptr_t)
4532 (kernel_address + len - (sizeof(struct packet_msg_prot) * 2));
4533
4534 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4535 (1 << GAUDI_PKT_CTL_MB_SHIFT);
4536
4537 if (eb)
4538 tmp |= (1 << GAUDI_PKT_CTL_EB_SHIFT);
4539
4540 cq_pkt->ctl = cpu_to_le32(tmp);
4541 cq_pkt->value = cpu_to_le32(cq_val);
4542 cq_pkt->addr = cpu_to_le64(cq_addr);
4543
4544 cq_pkt++;
4545
4546 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4547 (1 << GAUDI_PKT_CTL_MB_SHIFT);
4548 cq_pkt->ctl = cpu_to_le32(tmp);
4549 cq_pkt->value = cpu_to_le32(1);
4550
4551 if (!gaudi->multi_msi_mode)
4552 msi_vec = 0;
4553
4554 cq_pkt->addr = cpu_to_le64(CFG_BASE + mmPCIE_MSI_INTR_0 + msi_vec * 4);
4555}
4556
4557static void gaudi_update_eq_ci(struct hl_device *hdev, u32 val)
4558{
4559 WREG32(mmCPU_IF_EQ_RD_OFFS, val);
4560}
4561
4562static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
4563 u32 size, u64 val)
4564{
4565 struct packet_lin_dma *lin_dma_pkt;
4566 struct hl_cs_job *job;
4567 u32 cb_size, ctl;
4568 struct hl_cb *cb;
4569 int rc;
4570
4571 cb = hl_cb_kernel_create(hdev, PAGE_SIZE);
4572 if (!cb)
4573 return -EFAULT;
4574
4575 lin_dma_pkt = (struct packet_lin_dma *) (uintptr_t) cb->kernel_address;
4576 memset(lin_dma_pkt, 0, sizeof(*lin_dma_pkt));
4577 cb_size = sizeof(*lin_dma_pkt);
4578
4579 ctl = ((PACKET_LIN_DMA << GAUDI_PKT_CTL_OPCODE_SHIFT) |
4580 (1 << GAUDI_PKT_LIN_DMA_CTL_MEMSET_SHIFT) |
4581 (1 << GAUDI_PKT_LIN_DMA_CTL_LIN_SHIFT) |
4582 (1 << GAUDI_PKT_CTL_RB_SHIFT) |
4583 (1 << GAUDI_PKT_CTL_MB_SHIFT));
4584 lin_dma_pkt->ctl = cpu_to_le32(ctl);
4585 lin_dma_pkt->src_addr = cpu_to_le64(val);
4586 lin_dma_pkt->dst_addr |= cpu_to_le64(addr);
4587 lin_dma_pkt->tsize = cpu_to_le32(size);
4588
4589 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
4590 if (!job) {
4591 dev_err(hdev->dev, "Failed to allocate a new job\n");
4592 rc = -ENOMEM;
4593 goto release_cb;
4594 }
4595
4596 job->id = 0;
4597 job->user_cb = cb;
4598 job->user_cb->cs_cnt++;
4599 job->user_cb_size = cb_size;
4600 job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
4601 job->patched_cb = job->user_cb;
4602 job->job_cb_size = job->user_cb_size + sizeof(struct packet_msg_prot);
4603
4604 hl_debugfs_add_job(hdev, job);
4605
4606 rc = gaudi_send_job_on_qman0(hdev, job);
4607
4608 hl_debugfs_remove_job(hdev, job);
4609 kfree(job);
4610 cb->cs_cnt--;
4611
4612release_cb:
4613 hl_cb_put(cb);
4614 hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
4615
4616 return rc;
4617}
4618
4619static void gaudi_restore_sm_registers(struct hl_device *hdev)
4620{
4621 int i;
4622
4623 for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
4624 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4625 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4626 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4627 }
4628
4629 for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
4630 WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4631 WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4632 WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4633 }
4634
4635 i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
4636
4637 for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
4638 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
4639
4640 i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
4641
4642 for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
4643 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
4644}
4645
4646static void gaudi_restore_dma_registers(struct hl_device *hdev)
4647{
4648 u32 sob_delta = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1 -
4649 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
4650 int i;
4651
4652 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4653 u64 sob_addr = CFG_BASE +
4654 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 +
4655 (i * sob_delta);
4656 u32 dma_offset = i * DMA_CORE_OFFSET;
4657
4658 WREG32(mmDMA0_CORE_WR_COMP_ADDR_LO + dma_offset,
4659 lower_32_bits(sob_addr));
4660 WREG32(mmDMA0_CORE_WR_COMP_ADDR_HI + dma_offset,
4661 upper_32_bits(sob_addr));
4662 WREG32(mmDMA0_CORE_WR_COMP_WDATA + dma_offset, 0x80000001);
4663
4664 /* For DMAs 2-7, need to restore WR_AWUSER_31_11 as it can be
4665 * modified by the user for SRAM reduction
4666 */
4667 if (i > 1)
4668 WREG32(mmDMA0_CORE_WR_AWUSER_31_11 + dma_offset,
4669 0x00000001);
4670 }
4671}
4672
4673static void gaudi_restore_qm_registers(struct hl_device *hdev)
4674{
4675 u32 qman_offset;
4676 int i;
4677
4678 for (i = 0 ; i < DMA_NUMBER_OF_CHANNELS ; i++) {
4679 qman_offset = i * DMA_QMAN_OFFSET;
4680 WREG32(mmDMA0_QM_ARB_CFG_0 + qman_offset, 0);
4681 }
4682
4683 for (i = 0 ; i < MME_NUMBER_OF_MASTER_ENGINES ; i++) {
4684 qman_offset = i * (mmMME2_QM_BASE - mmMME0_QM_BASE);
4685 WREG32(mmMME0_QM_ARB_CFG_0 + qman_offset, 0);
4686 }
4687
4688 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
4689 qman_offset = i * TPC_QMAN_OFFSET;
4690 WREG32(mmTPC0_QM_ARB_CFG_0 + qman_offset, 0);
4691 }
4692}
4693
4694static void gaudi_restore_user_registers(struct hl_device *hdev)
4695{
4696 gaudi_restore_sm_registers(hdev);
4697 gaudi_restore_dma_registers(hdev);
4698 gaudi_restore_qm_registers(hdev);
4699}
4700
4701static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
4702{
4703 struct asic_fixed_properties *prop = &hdev->asic_prop;
4704 u64 addr = prop->sram_user_base_address;
4705 u32 size = hdev->pldm ? 0x10000 :
4706 (prop->sram_size - SRAM_USER_BASE_OFFSET);
4707 u64 val = 0x7777777777777777ull;
4708 int rc;
4709
4710 rc = gaudi_memset_device_memory(hdev, addr, size, val);
4711 if (rc) {
4712 dev_err(hdev->dev, "Failed to clear SRAM in context switch\n");
4713 return rc;
4714 }
4715
4716 gaudi_mmu_prepare(hdev, asid);
4717
4718 gaudi_restore_user_registers(hdev);
4719
4720 return 0;
4721}
4722
4723static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
4724{
4725 struct asic_fixed_properties *prop = &hdev->asic_prop;
4726 struct gaudi_device *gaudi = hdev->asic_specific;
4727 u64 addr = prop->mmu_pgt_addr;
4728 u32 size = prop->mmu_pgt_size + MMU_CACHE_MNG_SIZE;
4729
4730 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4731 return 0;
4732
4733 return gaudi_memset_device_memory(hdev, addr, size, 0);
4734}
4735
4736static void gaudi_restore_phase_topology(struct hl_device *hdev)
4737{
4738
4739}
4740
4741static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
4742{
4743 struct asic_fixed_properties *prop = &hdev->asic_prop;
4744 struct gaudi_device *gaudi = hdev->asic_specific;
4745 u64 hbm_bar_addr;
4746 int rc = 0;
4747
4748 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4749 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4750 dev_err_ratelimited(hdev->dev,
4751 "Can't read register - clock gating is enabled!\n");
4752 rc = -EFAULT;
4753 } else {
4754 *val = RREG32(addr - CFG_BASE);
4755 }
4756 } else if ((addr >= SRAM_BASE_ADDR) &&
4757 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4758 *val = readl(hdev->pcie_bar[SRAM_BAR_ID] +
4759 (addr - SRAM_BASE_ADDR));
4760 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4761 u64 bar_base_addr = DRAM_PHYS_BASE +
4762 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4763
4764 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4765 if (hbm_bar_addr != U64_MAX) {
4766 *val = readl(hdev->pcie_bar[HBM_BAR_ID] +
4767 (addr - bar_base_addr));
4768
4769 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4770 hbm_bar_addr);
4771 }
4772 if (hbm_bar_addr == U64_MAX)
4773 rc = -EIO;
4774 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4775 *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
4776 } else {
4777 rc = -EFAULT;
4778 }
4779
4780 return rc;
4781}
4782
4783static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
4784{
4785 struct asic_fixed_properties *prop = &hdev->asic_prop;
4786 struct gaudi_device *gaudi = hdev->asic_specific;
4787 u64 hbm_bar_addr;
4788 int rc = 0;
4789
4790 if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) {
4791 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4792 dev_err_ratelimited(hdev->dev,
4793 "Can't write register - clock gating is enabled!\n");
4794 rc = -EFAULT;
4795 } else {
4796 WREG32(addr - CFG_BASE, val);
4797 }
4798 } else if ((addr >= SRAM_BASE_ADDR) &&
4799 (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) {
4800 writel(val, hdev->pcie_bar[SRAM_BAR_ID] +
4801 (addr - SRAM_BASE_ADDR));
4802 } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) {
4803 u64 bar_base_addr = DRAM_PHYS_BASE +
4804 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4805
4806 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4807 if (hbm_bar_addr != U64_MAX) {
4808 writel(val, hdev->pcie_bar[HBM_BAR_ID] +
4809 (addr - bar_base_addr));
4810
4811 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4812 hbm_bar_addr);
4813 }
4814 if (hbm_bar_addr == U64_MAX)
4815 rc = -EIO;
4816 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4817 *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4818 } else {
4819 rc = -EFAULT;
4820 }
4821
4822 return rc;
4823}
4824
4825static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
4826{
4827 struct asic_fixed_properties *prop = &hdev->asic_prop;
4828 struct gaudi_device *gaudi = hdev->asic_specific;
4829 u64 hbm_bar_addr;
4830 int rc = 0;
4831
4832 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4833 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4834 dev_err_ratelimited(hdev->dev,
4835 "Can't read register - clock gating is enabled!\n");
4836 rc = -EFAULT;
4837 } else {
4838 u32 val_l = RREG32(addr - CFG_BASE);
4839 u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);
4840
4841 *val = (((u64) val_h) << 32) | val_l;
4842 }
4843 } else if ((addr >= SRAM_BASE_ADDR) &&
4844 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4845 *val = readq(hdev->pcie_bar[SRAM_BAR_ID] +
4846 (addr - SRAM_BASE_ADDR));
4847 } else if (addr <=
4848 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4849 u64 bar_base_addr = DRAM_PHYS_BASE +
4850 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4851
4852 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4853 if (hbm_bar_addr != U64_MAX) {
4854 *val = readq(hdev->pcie_bar[HBM_BAR_ID] +
4855 (addr - bar_base_addr));
4856
4857 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4858 hbm_bar_addr);
4859 }
4860 if (hbm_bar_addr == U64_MAX)
4861 rc = -EIO;
4862 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4863 *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
4864 } else {
4865 rc = -EFAULT;
4866 }
4867
4868 return rc;
4869}
4870
4871static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
4872{
4873 struct asic_fixed_properties *prop = &hdev->asic_prop;
4874 struct gaudi_device *gaudi = hdev->asic_specific;
4875 u64 hbm_bar_addr;
4876 int rc = 0;
4877
4878 if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
4879 if (gaudi->hw_cap_initialized & HW_CAP_CLK_GATE) {
4880 dev_err_ratelimited(hdev->dev,
4881 "Can't write register - clock gating is enabled!\n");
4882 rc = -EFAULT;
4883 } else {
4884 WREG32(addr - CFG_BASE, lower_32_bits(val));
4885 WREG32(addr + sizeof(u32) - CFG_BASE,
4886 upper_32_bits(val));
4887 }
4888 } else if ((addr >= SRAM_BASE_ADDR) &&
4889 (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) {
4890 writeq(val, hdev->pcie_bar[SRAM_BAR_ID] +
4891 (addr - SRAM_BASE_ADDR));
4892 } else if (addr <=
4893 DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) {
4894 u64 bar_base_addr = DRAM_PHYS_BASE +
4895 (addr & ~(prop->dram_pci_bar_size - 0x1ull));
4896
4897 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr);
4898 if (hbm_bar_addr != U64_MAX) {
4899 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4900 (addr - bar_base_addr));
4901
4902 hbm_bar_addr = gaudi_set_hbm_bar_base(hdev,
4903 hbm_bar_addr);
4904 }
4905 if (hbm_bar_addr == U64_MAX)
4906 rc = -EIO;
4907 } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
4908 *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
4909 } else {
4910 rc = -EFAULT;
4911 }
4912
4913 return rc;
4914}
4915
4916static u64 gaudi_read_pte(struct hl_device *hdev, u64 addr)
4917{
4918 struct gaudi_device *gaudi = hdev->asic_specific;
4919
4920 if (hdev->hard_reset_pending)
4921 return U64_MAX;
4922
4923 return readq(hdev->pcie_bar[HBM_BAR_ID] +
4924 (addr - gaudi->hbm_bar_cur_addr));
4925}
4926
4927static void gaudi_write_pte(struct hl_device *hdev, u64 addr, u64 val)
4928{
4929 struct gaudi_device *gaudi = hdev->asic_specific;
4930
4931 if (hdev->hard_reset_pending)
4932 return;
4933
4934 writeq(val, hdev->pcie_bar[HBM_BAR_ID] +
4935 (addr - gaudi->hbm_bar_cur_addr));
4936}
4937
4938static void gaudi_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
4939{
4940 /* mask to zero the MMBP and ASID bits */
4941 WREG32_AND(reg, ~0x7FF);
4942 WREG32_OR(reg, asid);
4943}
4944
4945static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
4946{
4947 struct gaudi_device *gaudi = hdev->asic_specific;
4948
4949 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
4950 return;
4951
4952 if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
4953 WARN(1, "asid %u is too big\n", asid);
4954 return;
4955 }
4956
4957 mutex_lock(&gaudi->clk_gate_mutex);
4958
4959 hdev->asic_funcs->disable_clock_gating(hdev);
4960
4961 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_0, asid);
4962 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_1, asid);
4963 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_2, asid);
4964 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_3, asid);
4965 gaudi_mmu_prepare_reg(hdev, mmDMA0_QM_GLBL_NON_SECURE_PROPS_4, asid);
4966
4967 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_0, asid);
4968 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_1, asid);
4969 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_2, asid);
4970 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_3, asid);
4971 gaudi_mmu_prepare_reg(hdev, mmDMA1_QM_GLBL_NON_SECURE_PROPS_4, asid);
4972
4973 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_0, asid);
4974 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_1, asid);
4975 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_2, asid);
4976 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_3, asid);
4977 gaudi_mmu_prepare_reg(hdev, mmDMA2_QM_GLBL_NON_SECURE_PROPS_4, asid);
4978
4979 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_0, asid);
4980 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_1, asid);
4981 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_2, asid);
4982 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_3, asid);
4983 gaudi_mmu_prepare_reg(hdev, mmDMA3_QM_GLBL_NON_SECURE_PROPS_4, asid);
4984
4985 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_0, asid);
4986 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_1, asid);
4987 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_2, asid);
4988 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_3, asid);
4989 gaudi_mmu_prepare_reg(hdev, mmDMA4_QM_GLBL_NON_SECURE_PROPS_4, asid);
4990
4991 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_0, asid);
4992 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_1, asid);
4993 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_2, asid);
4994 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_3, asid);
4995 gaudi_mmu_prepare_reg(hdev, mmDMA5_QM_GLBL_NON_SECURE_PROPS_4, asid);
4996
4997 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_0, asid);
4998 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_1, asid);
4999 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_2, asid);
5000 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_3, asid);
5001 gaudi_mmu_prepare_reg(hdev, mmDMA6_QM_GLBL_NON_SECURE_PROPS_4, asid);
5002
5003 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_0, asid);
5004 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_1, asid);
5005 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_2, asid);
5006 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_3, asid);
5007 gaudi_mmu_prepare_reg(hdev, mmDMA7_QM_GLBL_NON_SECURE_PROPS_4, asid);
5008
5009 gaudi_mmu_prepare_reg(hdev, mmDMA0_CORE_NON_SECURE_PROPS, asid);
5010 gaudi_mmu_prepare_reg(hdev, mmDMA1_CORE_NON_SECURE_PROPS, asid);
5011 gaudi_mmu_prepare_reg(hdev, mmDMA2_CORE_NON_SECURE_PROPS, asid);
5012 gaudi_mmu_prepare_reg(hdev, mmDMA3_CORE_NON_SECURE_PROPS, asid);
5013 gaudi_mmu_prepare_reg(hdev, mmDMA4_CORE_NON_SECURE_PROPS, asid);
5014 gaudi_mmu_prepare_reg(hdev, mmDMA5_CORE_NON_SECURE_PROPS, asid);
5015 gaudi_mmu_prepare_reg(hdev, mmDMA6_CORE_NON_SECURE_PROPS, asid);
5016 gaudi_mmu_prepare_reg(hdev, mmDMA7_CORE_NON_SECURE_PROPS, asid);
5017
5018 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_0, asid);
5019 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_1, asid);
5020 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_2, asid);
5021 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_3, asid);
5022 gaudi_mmu_prepare_reg(hdev, mmTPC0_QM_GLBL_NON_SECURE_PROPS_4, asid);
5023 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_ARUSER_LO, asid);
5024 gaudi_mmu_prepare_reg(hdev, mmTPC0_CFG_AWUSER_LO, asid);
5025
5026 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_0, asid);
5027 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_1, asid);
5028 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_2, asid);
5029 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_3, asid);
5030 gaudi_mmu_prepare_reg(hdev, mmTPC1_QM_GLBL_NON_SECURE_PROPS_4, asid);
5031 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_ARUSER_LO, asid);
5032 gaudi_mmu_prepare_reg(hdev, mmTPC1_CFG_AWUSER_LO, asid);
5033
5034 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_0, asid);
5035 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_1, asid);
5036 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_2, asid);
5037 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_3, asid);
5038 gaudi_mmu_prepare_reg(hdev, mmTPC2_QM_GLBL_NON_SECURE_PROPS_4, asid);
5039 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_ARUSER_LO, asid);
5040 gaudi_mmu_prepare_reg(hdev, mmTPC2_CFG_AWUSER_LO, asid);
5041
5042 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_0, asid);
5043 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_1, asid);
5044 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_2, asid);
5045 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_3, asid);
5046 gaudi_mmu_prepare_reg(hdev, mmTPC3_QM_GLBL_NON_SECURE_PROPS_4, asid);
5047 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_ARUSER_LO, asid);
5048 gaudi_mmu_prepare_reg(hdev, mmTPC3_CFG_AWUSER_LO, asid);
5049
5050 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_0, asid);
5051 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_1, asid);
5052 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_2, asid);
5053 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_3, asid);
5054 gaudi_mmu_prepare_reg(hdev, mmTPC4_QM_GLBL_NON_SECURE_PROPS_4, asid);
5055 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_ARUSER_LO, asid);
5056 gaudi_mmu_prepare_reg(hdev, mmTPC4_CFG_AWUSER_LO, asid);
5057
5058 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_0, asid);
5059 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_1, asid);
5060 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_2, asid);
5061 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_3, asid);
5062 gaudi_mmu_prepare_reg(hdev, mmTPC5_QM_GLBL_NON_SECURE_PROPS_4, asid);
5063 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_ARUSER_LO, asid);
5064 gaudi_mmu_prepare_reg(hdev, mmTPC5_CFG_AWUSER_LO, asid);
5065
5066 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_0, asid);
5067 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_1, asid);
5068 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_2, asid);
5069 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_3, asid);
5070 gaudi_mmu_prepare_reg(hdev, mmTPC6_QM_GLBL_NON_SECURE_PROPS_4, asid);
5071 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_ARUSER_LO, asid);
5072 gaudi_mmu_prepare_reg(hdev, mmTPC6_CFG_AWUSER_LO, asid);
5073
5074 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_0, asid);
5075 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_1, asid);
5076 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_2, asid);
5077 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_3, asid);
5078 gaudi_mmu_prepare_reg(hdev, mmTPC7_QM_GLBL_NON_SECURE_PROPS_4, asid);
5079 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_ARUSER_LO, asid);
5080 gaudi_mmu_prepare_reg(hdev, mmTPC7_CFG_AWUSER_LO, asid);
5081
5082 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_0, asid);
5083 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_1, asid);
5084 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_2, asid);
5085 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_3, asid);
5086 gaudi_mmu_prepare_reg(hdev, mmMME0_QM_GLBL_NON_SECURE_PROPS_4, asid);
5087 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_0, asid);
5088 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_1, asid);
5089 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_2, asid);
5090 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_3, asid);
5091 gaudi_mmu_prepare_reg(hdev, mmMME2_QM_GLBL_NON_SECURE_PROPS_4, asid);
5092
5093 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER0, asid);
5094 gaudi_mmu_prepare_reg(hdev, mmMME0_SBAB_ARUSER1, asid);
5095 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER0, asid);
5096 gaudi_mmu_prepare_reg(hdev, mmMME1_SBAB_ARUSER1, asid);
5097 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER0, asid);
5098 gaudi_mmu_prepare_reg(hdev, mmMME2_SBAB_ARUSER1, asid);
5099 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER0, asid);
5100 gaudi_mmu_prepare_reg(hdev, mmMME3_SBAB_ARUSER1, asid);
5101 gaudi_mmu_prepare_reg(hdev, mmMME0_ACC_WBC, asid);
5102 gaudi_mmu_prepare_reg(hdev, mmMME1_ACC_WBC, asid);
5103 gaudi_mmu_prepare_reg(hdev, mmMME2_ACC_WBC, asid);
5104 gaudi_mmu_prepare_reg(hdev, mmMME3_ACC_WBC, asid);
5105
5106 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
5107 gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
5108
5109 hdev->asic_funcs->enable_clock_gating(hdev);
5110
5111 mutex_unlock(&gaudi->clk_gate_mutex);
5112}
5113
5114static int gaudi_send_job_on_qman0(struct hl_device *hdev,
5115 struct hl_cs_job *job)
5116{
5117 struct packet_msg_prot *fence_pkt;
5118 u32 *fence_ptr;
5119 dma_addr_t fence_dma_addr;
5120 struct hl_cb *cb;
5121 u32 tmp, timeout, dma_offset;
5122 int rc;
5123
5124 if (hdev->pldm)
5125 timeout = GAUDI_PLDM_QMAN0_TIMEOUT_USEC;
5126 else
5127 timeout = HL_DEVICE_TIMEOUT_USEC;
5128
5129 if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
5130 dev_err_ratelimited(hdev->dev,
5131 "Can't send driver job on QMAN0 because the device is not idle\n");
5132 return -EBUSY;
5133 }
5134
5135 fence_ptr = hdev->asic_funcs->asic_dma_pool_zalloc(hdev, 4, GFP_KERNEL,
5136 &fence_dma_addr);
5137 if (!fence_ptr) {
5138 dev_err(hdev->dev,
5139 "Failed to allocate fence memory for QMAN0\n");
5140 return -ENOMEM;
5141 }
5142
5143 cb = job->patched_cb;
5144
5145 fence_pkt = (struct packet_msg_prot *) (uintptr_t) (cb->kernel_address +
5146 job->job_cb_size - sizeof(struct packet_msg_prot));
5147
5148 tmp = (PACKET_MSG_PROT << GAUDI_PKT_CTL_OPCODE_SHIFT) |
5149 (1 << GAUDI_PKT_CTL_EB_SHIFT) |
5150 (1 << GAUDI_PKT_CTL_MB_SHIFT);
5151 fence_pkt->ctl = cpu_to_le32(tmp);
5152 fence_pkt->value = cpu_to_le32(GAUDI_QMAN0_FENCE_VAL);
5153 fence_pkt->addr = cpu_to_le64(fence_dma_addr);
5154
5155 dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
5156
5157 WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
5158
5159 rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
5160 job->job_cb_size, cb->bus_address);
5161 if (rc) {
5162 dev_err(hdev->dev, "Failed to send CB on QMAN0, %d\n", rc);
5163 goto free_fence_ptr;
5164 }
5165
5166 rc = hl_poll_timeout_memory(hdev, fence_ptr, tmp,
5167 (tmp == GAUDI_QMAN0_FENCE_VAL), 1000,
5168 timeout, true);
5169
5170 hl_hw_queue_inc_ci_kernel(hdev, GAUDI_QUEUE_ID_DMA_0_0);
5171
5172 if (rc == -ETIMEDOUT) {
5173 dev_err(hdev->dev, "QMAN0 Job timeout (0x%x)\n", tmp);
5174 goto free_fence_ptr;
5175 }
5176
5177free_fence_ptr:
5178 WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
5179 ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
5180
5181 hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
5182 fence_dma_addr);
5183 return rc;
5184}
5185
5186static const char *_gaudi_get_event_desc(u16 event_type)
5187{
5188 switch (event_type) {
5189 case GAUDI_EVENT_PCIE_CORE_SERR:
5190 return "PCIe_core_serr";
5191 case GAUDI_EVENT_PCIE_CORE_DERR:
5192 return "PCIe_core_derr";
5193 case GAUDI_EVENT_PCIE_IF_SERR:
5194 return "PCIe_if_serr";
5195 case GAUDI_EVENT_PCIE_IF_DERR:
5196 return "PCIe_if_derr";
5197 case GAUDI_EVENT_PCIE_PHY_SERR:
5198 return "PCIe_phy_serr";
5199 case GAUDI_EVENT_PCIE_PHY_DERR:
5200 return "PCIe_phy_derr";
5201 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5202 return "TPC%d_Serr";
5203 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5204 return "TPC%d_Derr";
5205 case GAUDI_EVENT_MME0_ACC_SERR:
5206 case GAUDI_EVENT_MME1_ACC_SERR:
5207 case GAUDI_EVENT_MME2_ACC_SERR:
5208 case GAUDI_EVENT_MME3_ACC_SERR:
5209 return "MME%d_acc_serr";
5210 case GAUDI_EVENT_MME0_ACC_DERR:
5211 case GAUDI_EVENT_MME1_ACC_DERR:
5212 case GAUDI_EVENT_MME2_ACC_DERR:
5213 case GAUDI_EVENT_MME3_ACC_DERR:
5214 return "MME%d_acc_derr";
5215 case GAUDI_EVENT_MME0_SBAB_SERR:
5216 case GAUDI_EVENT_MME1_SBAB_SERR:
5217 case GAUDI_EVENT_MME2_SBAB_SERR:
5218 case GAUDI_EVENT_MME3_SBAB_SERR:
5219 return "MME%d_sbab_serr";
5220 case GAUDI_EVENT_MME0_SBAB_DERR:
5221 case GAUDI_EVENT_MME1_SBAB_DERR:
5222 case GAUDI_EVENT_MME2_SBAB_DERR:
5223 case GAUDI_EVENT_MME3_SBAB_DERR:
5224 return "MME%d_sbab_derr";
5225 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5226 return "DMA%d_serr_ecc";
5227 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5228 return "DMA%d_derr_ecc";
5229 case GAUDI_EVENT_CPU_IF_ECC_SERR:
5230 return "CPU_if_ecc_serr";
5231 case GAUDI_EVENT_CPU_IF_ECC_DERR:
5232 return "CPU_if_ecc_derr";
5233 case GAUDI_EVENT_PSOC_MEM_SERR:
5234 return "PSOC_mem_serr";
5235 case GAUDI_EVENT_PSOC_MEM_DERR:
5236 return "PSOC_mem_derr";
5237 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
5238 return "PSOC_coresight_serr";
5239 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
5240 return "PSOC_coresight_derr";
5241 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5242 return "SRAM%d_serr";
5243 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5244 return "SRAM%d_derr";
5245 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5246 return "DMA%d_if_serr";
5247 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5248 return "DMA%d_if_derr";
5249 case GAUDI_EVENT_GIC500:
5250 return "GIC500";
5251 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5252 return "HBM%d_serr";
5253 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5254 return "HBM%d_derr";
5255 case GAUDI_EVENT_MMU_SERR:
5256 return "MMU_serr";
5257 case GAUDI_EVENT_MMU_DERR:
5258 return "MMU_derr";
5259 case GAUDI_EVENT_PCIE_DEC:
5260 return "PCIe_dec";
5261 case GAUDI_EVENT_TPC0_DEC:
5262 case GAUDI_EVENT_TPC1_DEC:
5263 case GAUDI_EVENT_TPC2_DEC:
5264 case GAUDI_EVENT_TPC3_DEC:
5265 case GAUDI_EVENT_TPC4_DEC:
5266 case GAUDI_EVENT_TPC5_DEC:
5267 case GAUDI_EVENT_TPC6_DEC:
5268 case GAUDI_EVENT_TPC7_DEC:
5269 return "TPC%d_dec";
5270 case GAUDI_EVENT_AXI_ECC:
5271 return "AXI_ecc";
5272 case GAUDI_EVENT_L2_RAM_ECC:
5273 return "L2_ram_ecc";
5274 case GAUDI_EVENT_MME0_WBC_RSP:
5275 case GAUDI_EVENT_MME1_WBC_RSP:
5276 case GAUDI_EVENT_MME2_WBC_RSP:
5277 case GAUDI_EVENT_MME3_WBC_RSP:
5278 return "MME%d_wbc_rsp";
5279 case GAUDI_EVENT_MME0_SBAB0_RSP:
5280 case GAUDI_EVENT_MME1_SBAB0_RSP:
5281 case GAUDI_EVENT_MME2_SBAB0_RSP:
5282 case GAUDI_EVENT_MME3_SBAB0_RSP:
5283 return "MME%d_sbab0_rsp";
5284 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
5285 return "PLL%d";
5286 case GAUDI_EVENT_CPU_AXI_SPLITTER:
5287 return "CPU_axi_splitter";
5288 case GAUDI_EVENT_PSOC_AXI_DEC:
5289 return "CPU_axi_dec";
5290 case GAUDI_EVENT_PSOC_PRSTN_FALL:
5291 return "PSOC_prstn_fall";
5292 case GAUDI_EVENT_TPC0_BMON_SPMU:
5293 case GAUDI_EVENT_TPC1_BMON_SPMU:
5294 case GAUDI_EVENT_TPC2_BMON_SPMU:
5295 case GAUDI_EVENT_TPC3_BMON_SPMU:
5296 case GAUDI_EVENT_TPC4_BMON_SPMU:
5297 case GAUDI_EVENT_TPC5_BMON_SPMU:
5298 case GAUDI_EVENT_TPC6_BMON_SPMU:
5299 case GAUDI_EVENT_TPC7_BMON_SPMU:
5300 return "TPC%d_bmon_spmu";
5301 case GAUDI_EVENT_TPC0_KRN_ERR:
5302 case GAUDI_EVENT_TPC1_KRN_ERR:
5303 case GAUDI_EVENT_TPC2_KRN_ERR:
5304 case GAUDI_EVENT_TPC3_KRN_ERR:
5305 case GAUDI_EVENT_TPC4_KRN_ERR:
5306 case GAUDI_EVENT_TPC5_KRN_ERR:
5307 case GAUDI_EVENT_TPC6_KRN_ERR:
5308 case GAUDI_EVENT_TPC7_KRN_ERR:
5309 return "TPC%d_krn_err";
5310 case GAUDI_EVENT_MMU_PAGE_FAULT:
5311 return "MMU_page_fault";
5312 case GAUDI_EVENT_MMU_WR_PERM:
5313 return "MMU_write_permission";
5314 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
5315 return "DMA_bm_ch%d";
5316 case GAUDI_EVENT_HBM0_SPI_0:
5317 case GAUDI_EVENT_HBM1_SPI_0:
5318 case GAUDI_EVENT_HBM2_SPI_0:
5319 case GAUDI_EVENT_HBM3_SPI_0:
5320 return "HBM%d_spi_0";
5321 case GAUDI_EVENT_HBM0_SPI_1:
5322 case GAUDI_EVENT_HBM1_SPI_1:
5323 case GAUDI_EVENT_HBM2_SPI_1:
5324 case GAUDI_EVENT_HBM3_SPI_1:
5325 return "HBM%d_spi_1";
5326 case GAUDI_EVENT_FIX_POWER_ENV_S:
5327 return "POWER_ENV_S";
5328 case GAUDI_EVENT_FIX_POWER_ENV_E:
5329 return "POWER_ENV_E";
5330 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
5331 return "THERMAL_ENV_S";
5332 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
5333 return "THERMAL_ENV_E";
5334 case GAUDI_EVENT_RAZWI_OR_ADC:
5335 return "PSOC_razwi_or_adc";
5336 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5337 return "TPC%d_qm";
5338 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5339 return "MME%d_qm";
5340 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5341 return "DMA%d_qm";
5342 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
5343 return "DMA%d_core";
5344 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
5345 return "PSOC_razwi_or_adc_sw";
5346 default:
5347 return "N/A";
5348 }
5349}
5350
5351static void gaudi_get_event_desc(u16 event_type, char *desc, size_t size)
5352{
5353 u8 index;
5354
5355 switch (event_type) {
5356 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5357 index = event_type - GAUDI_EVENT_TPC0_SERR;
5358 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5359 break;
5360 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5361 index = event_type - GAUDI_EVENT_TPC0_DERR;
5362 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5363 break;
5364 case GAUDI_EVENT_MME0_ACC_SERR:
5365 case GAUDI_EVENT_MME1_ACC_SERR:
5366 case GAUDI_EVENT_MME2_ACC_SERR:
5367 case GAUDI_EVENT_MME3_ACC_SERR:
5368 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
5369 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5370 break;
5371 case GAUDI_EVENT_MME0_ACC_DERR:
5372 case GAUDI_EVENT_MME1_ACC_DERR:
5373 case GAUDI_EVENT_MME2_ACC_DERR:
5374 case GAUDI_EVENT_MME3_ACC_DERR:
5375 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
5376 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5377 break;
5378 case GAUDI_EVENT_MME0_SBAB_SERR:
5379 case GAUDI_EVENT_MME1_SBAB_SERR:
5380 case GAUDI_EVENT_MME2_SBAB_SERR:
5381 case GAUDI_EVENT_MME3_SBAB_SERR:
5382 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
5383 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5384 break;
5385 case GAUDI_EVENT_MME0_SBAB_DERR:
5386 case GAUDI_EVENT_MME1_SBAB_DERR:
5387 case GAUDI_EVENT_MME2_SBAB_DERR:
5388 case GAUDI_EVENT_MME3_SBAB_DERR:
5389 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
5390 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5391 break;
5392 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5393 index = event_type - GAUDI_EVENT_DMA0_SERR_ECC;
5394 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5395 break;
5396 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5397 index = event_type - GAUDI_EVENT_DMA0_DERR_ECC;
5398 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5399 break;
5400 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
5401 index = event_type - GAUDI_EVENT_SRAM0_SERR;
5402 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5403 break;
5404 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
5405 index = event_type - GAUDI_EVENT_SRAM0_DERR;
5406 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5407 break;
5408 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
5409 index = event_type - GAUDI_EVENT_DMA_IF0_SERR;
5410 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5411 break;
5412 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
5413 index = event_type - GAUDI_EVENT_DMA_IF0_DERR;
5414 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5415 break;
5416 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
5417 index = event_type - GAUDI_EVENT_HBM_0_SERR;
5418 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5419 break;
5420 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
5421 index = event_type - GAUDI_EVENT_HBM_0_DERR;
5422 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5423 break;
5424 case GAUDI_EVENT_TPC0_DEC:
5425 case GAUDI_EVENT_TPC1_DEC:
5426 case GAUDI_EVENT_TPC2_DEC:
5427 case GAUDI_EVENT_TPC3_DEC:
5428 case GAUDI_EVENT_TPC4_DEC:
5429 case GAUDI_EVENT_TPC5_DEC:
5430 case GAUDI_EVENT_TPC6_DEC:
5431 case GAUDI_EVENT_TPC7_DEC:
5432 index = (event_type - GAUDI_EVENT_TPC0_DEC) / 2;
5433 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5434 break;
5435 case GAUDI_EVENT_MME0_WBC_RSP:
5436 case GAUDI_EVENT_MME1_WBC_RSP:
5437 case GAUDI_EVENT_MME2_WBC_RSP:
5438 case GAUDI_EVENT_MME3_WBC_RSP:
5439 index = (event_type - GAUDI_EVENT_MME0_WBC_RSP) / 5;
5440 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5441 break;
5442 case GAUDI_EVENT_MME0_SBAB0_RSP:
5443 case GAUDI_EVENT_MME1_SBAB0_RSP:
5444 case GAUDI_EVENT_MME2_SBAB0_RSP:
5445 case GAUDI_EVENT_MME3_SBAB0_RSP:
5446 index = (event_type - GAUDI_EVENT_MME0_SBAB0_RSP) / 5;
5447 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5448 break;
5449 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
5450 index = event_type - GAUDI_EVENT_PLL0;
5451 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5452 break;
5453 case GAUDI_EVENT_TPC0_BMON_SPMU:
5454 case GAUDI_EVENT_TPC1_BMON_SPMU:
5455 case GAUDI_EVENT_TPC2_BMON_SPMU:
5456 case GAUDI_EVENT_TPC3_BMON_SPMU:
5457 case GAUDI_EVENT_TPC4_BMON_SPMU:
5458 case GAUDI_EVENT_TPC5_BMON_SPMU:
5459 case GAUDI_EVENT_TPC6_BMON_SPMU:
5460 case GAUDI_EVENT_TPC7_BMON_SPMU:
5461 index = (event_type - GAUDI_EVENT_TPC0_BMON_SPMU) / 6;
5462 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5463 break;
5464 case GAUDI_EVENT_TPC0_KRN_ERR:
5465 case GAUDI_EVENT_TPC1_KRN_ERR:
5466 case GAUDI_EVENT_TPC2_KRN_ERR:
5467 case GAUDI_EVENT_TPC3_KRN_ERR:
5468 case GAUDI_EVENT_TPC4_KRN_ERR:
5469 case GAUDI_EVENT_TPC5_KRN_ERR:
5470 case GAUDI_EVENT_TPC6_KRN_ERR:
5471 case GAUDI_EVENT_TPC7_KRN_ERR:
5472 index = (event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
5473 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5474 break;
5475 case GAUDI_EVENT_MMU_PAGE_FAULT:
5476 snprintf(desc, size, _gaudi_get_event_desc(event_type));
5477 break;
5478 case GAUDI_EVENT_MMU_WR_PERM:
5479 snprintf(desc, size, _gaudi_get_event_desc(event_type));
5480 break;
5481 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
5482 index = event_type - GAUDI_EVENT_DMA_BM_CH0;
5483 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5484 break;
5485 case GAUDI_EVENT_HBM0_SPI_0:
5486 case GAUDI_EVENT_HBM1_SPI_0:
5487 case GAUDI_EVENT_HBM2_SPI_0:
5488 case GAUDI_EVENT_HBM3_SPI_0:
5489 index = (event_type - GAUDI_EVENT_HBM0_SPI_0) / 4;
5490 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5491 break;
5492 case GAUDI_EVENT_HBM0_SPI_1:
5493 case GAUDI_EVENT_HBM1_SPI_1:
5494 case GAUDI_EVENT_HBM2_SPI_1:
5495 case GAUDI_EVENT_HBM3_SPI_1:
5496 index = (event_type - GAUDI_EVENT_HBM0_SPI_1) / 4;
5497 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5498 break;
5499 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
5500 index = event_type - GAUDI_EVENT_TPC0_QM;
5501 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5502 break;
5503 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
5504 index = event_type - GAUDI_EVENT_MME0_QM;
5505 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5506 break;
5507 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
5508 index = event_type - GAUDI_EVENT_DMA0_QM;
5509 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5510 break;
5511 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
5512 index = event_type - GAUDI_EVENT_DMA0_CORE;
5513 snprintf(desc, size, _gaudi_get_event_desc(event_type), index);
5514 break;
5515 default:
5516 snprintf(desc, size, _gaudi_get_event_desc(event_type));
5517 break;
5518 }
5519}
5520
5521static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev,
5522 u32 x_y, bool is_write)
5523{
5524 u32 dma_id[2], dma_offset, err_cause[2], mask, i;
5525
5526 mask = is_write ? DMA0_CORE_ERR_CAUSE_HBW_WR_ERR_MASK :
5527 DMA0_CORE_ERR_CAUSE_HBW_RD_ERR_MASK;
5528
5529 switch (x_y) {
5530 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5531 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5532 dma_id[0] = 0;
5533 dma_id[1] = 2;
5534 break;
5535 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5536 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5537 dma_id[0] = 1;
5538 dma_id[1] = 3;
5539 break;
5540 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5541 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5542 dma_id[0] = 4;
5543 dma_id[1] = 6;
5544 break;
5545 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5546 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5547 dma_id[0] = 5;
5548 dma_id[1] = 7;
5549 break;
5550 default:
5551 goto unknown_initiator;
5552 }
5553
5554 for (i = 0 ; i < 2 ; i++) {
5555 dma_offset = dma_id[i] * DMA_CORE_OFFSET;
5556 err_cause[i] = RREG32(mmDMA0_CORE_ERR_CAUSE + dma_offset);
5557 }
5558
5559 switch (x_y) {
5560 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5561 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5562 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5563 return "DMA0";
5564 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5565 return "DMA2";
5566 else
5567 return "DMA0 or DMA2";
5568 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5569 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5570 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5571 return "DMA1";
5572 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5573 return "DMA3";
5574 else
5575 return "DMA1 or DMA3";
5576 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5577 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5578 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5579 return "DMA4";
5580 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5581 return "DMA6";
5582 else
5583 return "DMA4 or DMA6";
5584 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5585 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5586 if ((err_cause[0] & mask) && !(err_cause[1] & mask))
5587 return "DMA5";
5588 else if (!(err_cause[0] & mask) && (err_cause[1] & mask))
5589 return "DMA7";
5590 else
5591 return "DMA5 or DMA7";
5592 }
5593
5594unknown_initiator:
5595 return "unknown initiator";
5596}
5597
5598static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev,
5599 bool is_write)
5600{
5601 u32 val, x_y, axi_id;
5602
5603 val = is_write ? RREG32(mmMMU_UP_RAZWI_WRITE_ID) :
5604 RREG32(mmMMU_UP_RAZWI_READ_ID);
5605 x_y = val & ((RAZWI_INITIATOR_Y_MASK << RAZWI_INITIATOR_Y_SHIFT) |
5606 (RAZWI_INITIATOR_X_MASK << RAZWI_INITIATOR_X_SHIFT));
5607 axi_id = val & (RAZWI_INITIATOR_AXI_ID_MASK <<
5608 RAZWI_INITIATOR_AXI_ID_SHIFT);
5609
5610 switch (x_y) {
5611 case RAZWI_INITIATOR_ID_X_Y_TPC0_NIC0:
5612 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5613 return "TPC0";
5614 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5615 return "NIC0";
5616 break;
5617 case RAZWI_INITIATOR_ID_X_Y_TPC1:
5618 return "TPC1";
5619 case RAZWI_INITIATOR_ID_X_Y_MME0_0:
5620 case RAZWI_INITIATOR_ID_X_Y_MME0_1:
5621 return "MME0";
5622 case RAZWI_INITIATOR_ID_X_Y_MME1_0:
5623 case RAZWI_INITIATOR_ID_X_Y_MME1_1:
5624 return "MME1";
5625 case RAZWI_INITIATOR_ID_X_Y_TPC2:
5626 return "TPC2";
5627 case RAZWI_INITIATOR_ID_X_Y_TPC3_PCI_CPU_PSOC:
5628 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5629 return "TPC3";
5630 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PCI))
5631 return "PCI";
5632 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_CPU))
5633 return "CPU";
5634 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_PSOC))
5635 return "PSOC";
5636 break;
5637 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_0:
5638 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_S_1:
5639 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_0:
5640 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_S_1:
5641 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_0:
5642 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_W_N_1:
5643 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_0:
5644 case RAZWI_INITIATOR_ID_X_Y_DMA_IF_E_N_1:
5645 return gaudi_get_razwi_initiator_dma_name(hdev, x_y, is_write);
5646 case RAZWI_INITIATOR_ID_X_Y_TPC4_NIC1_NIC2:
5647 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5648 return "TPC4";
5649 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5650 return "NIC1";
5651 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5652 return "NIC2";
5653 break;
5654 case RAZWI_INITIATOR_ID_X_Y_TPC5:
5655 return "TPC5";
5656 case RAZWI_INITIATOR_ID_X_Y_MME2_0:
5657 case RAZWI_INITIATOR_ID_X_Y_MME2_1:
5658 return "MME2";
5659 case RAZWI_INITIATOR_ID_X_Y_MME3_0:
5660 case RAZWI_INITIATOR_ID_X_Y_MME3_1:
5661 return "MME3";
5662 case RAZWI_INITIATOR_ID_X_Y_TPC6:
5663 return "TPC6";
5664 case RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5:
5665 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_TPC))
5666 return "TPC7";
5667 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC))
5668 return "NIC4";
5669 if (axi_id == RAZWI_INITIATOR_ID_AXI_ID(AXI_ID_NIC_FT))
5670 return "NIC5";
5671 break;
5672 default:
5673 break;
5674 }
5675
5676 dev_err(hdev->dev,
5677 "Unknown RAZWI initiator ID 0x%x [Y=%d, X=%d, AXI_ID=%d]\n",
5678 val,
5679 (val >> RAZWI_INITIATOR_Y_SHIFT) & RAZWI_INITIATOR_Y_MASK,
5680 (val >> RAZWI_INITIATOR_X_SHIFT) & RAZWI_INITIATOR_X_MASK,
5681 (val >> RAZWI_INITIATOR_AXI_ID_SHIFT) &
5682 RAZWI_INITIATOR_AXI_ID_MASK);
5683
5684 return "unknown initiator";
5685}
5686
5687static void gaudi_print_razwi_info(struct hl_device *hdev)
5688{
5689 if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
5690 dev_err_ratelimited(hdev->dev,
5691 "RAZWI event caused by illegal write of %s\n",
5692 gaudi_get_razwi_initiator_name(hdev, true));
5693 WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
5694 }
5695
5696 if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
5697 dev_err_ratelimited(hdev->dev,
5698 "RAZWI event caused by illegal read of %s\n",
5699 gaudi_get_razwi_initiator_name(hdev, false));
5700 WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
5701 }
5702}
5703
5704static void gaudi_print_mmu_error_info(struct hl_device *hdev)
5705{
5706 struct gaudi_device *gaudi = hdev->asic_specific;
5707 u64 addr;
5708 u32 val;
5709
5710 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
5711 return;
5712
5713 val = RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE);
5714 if (val & MMU_UP_PAGE_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5715 addr = val & MMU_UP_PAGE_ERROR_CAPTURE_VA_49_32_MASK;
5716 addr <<= 32;
5717 addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
5718
5719 dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
5720 addr);
5721
5722 WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
5723 }
5724
5725 val = RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE);
5726 if (val & MMU_UP_ACCESS_ERROR_CAPTURE_ENTRY_VALID_MASK) {
5727 addr = val & MMU_UP_ACCESS_ERROR_CAPTURE_VA_49_32_MASK;
5728 addr <<= 32;
5729 addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
5730
5731 dev_err_ratelimited(hdev->dev,
5732 "MMU access error on va 0x%llx\n", addr);
5733
5734 WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
5735 }
5736}
5737
5738/*
5739 * +-------------------+------------------------------------------------------+
5740 * | Configuration Reg | Description |
5741 * | Address | |
5742 * +-------------------+------------------------------------------------------+
5743 * | 0xF30 - 0xF3F |ECC single error indication (1 bit per memory wrapper)|
5744 * | |0xF30 memory wrappers 31:0 (MSB to LSB) |
5745 * | |0xF34 memory wrappers 63:32 |
5746 * | |0xF38 memory wrappers 95:64 |
5747 * | |0xF3C memory wrappers 127:96 |
5748 * +-------------------+------------------------------------------------------+
5749 * | 0xF40 - 0xF4F |ECC double error indication (1 bit per memory wrapper)|
5750 * | |0xF40 memory wrappers 31:0 (MSB to LSB) |
5751 * | |0xF44 memory wrappers 63:32 |
5752 * | |0xF48 memory wrappers 95:64 |
5753 * | |0xF4C memory wrappers 127:96 |
5754 * +-------------------+------------------------------------------------------+
5755 */
5756static void gaudi_print_ecc_info_generic(struct hl_device *hdev,
5757 const char *block_name,
5758 u64 block_address, int num_memories,
5759 bool derr, bool disable_clock_gating)
5760{
5761 struct gaudi_device *gaudi = hdev->asic_specific;
5762 int num_mem_regs = num_memories / 32 + ((num_memories % 32) ? 1 : 0);
5763
5764 if (block_address >= CFG_BASE)
5765 block_address -= CFG_BASE;
5766
5767 if (derr)
5768 block_address += GAUDI_ECC_DERR0_OFFSET;
5769 else
5770 block_address += GAUDI_ECC_SERR0_OFFSET;
5771
5772 if (disable_clock_gating) {
5773 mutex_lock(&gaudi->clk_gate_mutex);
5774 hdev->asic_funcs->disable_clock_gating(hdev);
5775 }
5776
5777 switch (num_mem_regs) {
5778 case 1:
5779 dev_err(hdev->dev,
5780 "%s ECC indication: 0x%08x\n",
5781 block_name, RREG32(block_address));
5782 break;
5783 case 2:
5784 dev_err(hdev->dev,
5785 "%s ECC indication: 0x%08x 0x%08x\n",
5786 block_name,
5787 RREG32(block_address), RREG32(block_address + 4));
5788 break;
5789 case 3:
5790 dev_err(hdev->dev,
5791 "%s ECC indication: 0x%08x 0x%08x 0x%08x\n",
5792 block_name,
5793 RREG32(block_address), RREG32(block_address + 4),
5794 RREG32(block_address + 8));
5795 break;
5796 case 4:
5797 dev_err(hdev->dev,
5798 "%s ECC indication: 0x%08x 0x%08x 0x%08x 0x%08x\n",
5799 block_name,
5800 RREG32(block_address), RREG32(block_address + 4),
5801 RREG32(block_address + 8), RREG32(block_address + 0xc));
5802 break;
5803 default:
5804 break;
5805
5806 }
5807
5808 if (disable_clock_gating) {
5809 hdev->asic_funcs->enable_clock_gating(hdev);
5810 mutex_unlock(&gaudi->clk_gate_mutex);
5811 }
5812}
5813
5814static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
5815 const char *qm_name,
5816 u64 glbl_sts_addr,
5817 u64 arb_err_addr)
5818{
5819 u32 i, j, glbl_sts_val, arb_err_val, glbl_sts_clr_val;
5820 char reg_desc[32];
5821
5822 /* Iterate through all stream GLBL_STS1 registers + Lower CP */
5823 for (i = 0 ; i < QMAN_STREAMS + 1 ; i++) {
5824 glbl_sts_clr_val = 0;
5825 glbl_sts_val = RREG32(glbl_sts_addr + 4 * i);
5826
5827 if (!glbl_sts_val)
5828 continue;
5829
5830 if (i == QMAN_STREAMS)
5831 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "LowerCP");
5832 else
5833 snprintf(reg_desc, ARRAY_SIZE(reg_desc), "stream%u", i);
5834
5835 for (j = 0 ; j < GAUDI_NUM_OF_QM_ERR_CAUSE ; j++) {
5836 if (glbl_sts_val & BIT(j)) {
5837 dev_err_ratelimited(hdev->dev,
5838 "%s %s. err cause: %s\n",
5839 qm_name, reg_desc,
5840 gaudi_qman_error_cause[j]);
5841 glbl_sts_clr_val |= BIT(j);
5842 }
5843 }
5844
5845 /* Write 1 clear errors */
5846 WREG32(glbl_sts_addr + 4 * i, glbl_sts_clr_val);
5847 }
5848
5849 arb_err_val = RREG32(arb_err_addr);
5850
5851 if (!arb_err_val)
5852 return;
5853
5854 for (j = 0 ; j < GAUDI_NUM_OF_QM_ARB_ERR_CAUSE ; j++) {
5855 if (arb_err_val & BIT(j)) {
5856 dev_err_ratelimited(hdev->dev,
5857 "%s ARB_ERR. err cause: %s\n",
5858 qm_name,
5859 gaudi_qman_arb_error_cause[j]);
5860 }
5861 }
5862}
5863
5864static void gaudi_print_ecc_info(struct hl_device *hdev, u16 event_type)
5865{
5866 u64 block_address;
5867 u8 index;
5868 int num_memories;
5869 char desc[32];
5870 bool derr;
5871 bool disable_clock_gating;
5872
5873 switch (event_type) {
5874 case GAUDI_EVENT_PCIE_CORE_SERR:
5875 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_CORE");
5876 block_address = mmPCIE_CORE_BASE;
5877 num_memories = 51;
5878 derr = false;
5879 disable_clock_gating = false;
5880 break;
5881 case GAUDI_EVENT_PCIE_CORE_DERR:
5882 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_CORE");
5883 block_address = mmPCIE_CORE_BASE;
5884 num_memories = 51;
5885 derr = true;
5886 disable_clock_gating = false;
5887 break;
5888 case GAUDI_EVENT_PCIE_IF_SERR:
5889 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_WRAP");
5890 block_address = mmPCIE_WRAP_BASE;
5891 num_memories = 11;
5892 derr = false;
5893 disable_clock_gating = false;
5894 break;
5895 case GAUDI_EVENT_PCIE_IF_DERR:
5896 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_WRAP");
5897 block_address = mmPCIE_WRAP_BASE;
5898 num_memories = 11;
5899 derr = true;
5900 disable_clock_gating = false;
5901 break;
5902 case GAUDI_EVENT_PCIE_PHY_SERR:
5903 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_PHY");
5904 block_address = mmPCIE_PHY_BASE;
5905 num_memories = 4;
5906 derr = false;
5907 disable_clock_gating = false;
5908 break;
5909 case GAUDI_EVENT_PCIE_PHY_DERR:
5910 snprintf(desc, ARRAY_SIZE(desc), "%s", "PCIE_PHY");
5911 block_address = mmPCIE_PHY_BASE;
5912 num_memories = 4;
5913 derr = true;
5914 disable_clock_gating = false;
5915 break;
5916 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
5917 index = event_type - GAUDI_EVENT_TPC0_SERR;
5918 block_address = mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5919 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC", index);
5920 num_memories = 90;
5921 derr = false;
5922 disable_clock_gating = true;
5923 break;
5924 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
5925 index = event_type - GAUDI_EVENT_TPC0_DERR;
5926 block_address =
5927 mmTPC0_CFG_BASE + index * TPC_CFG_OFFSET;
5928 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC", index);
5929 num_memories = 90;
5930 derr = true;
5931 disable_clock_gating = true;
5932 break;
5933 case GAUDI_EVENT_MME0_ACC_SERR:
5934 case GAUDI_EVENT_MME1_ACC_SERR:
5935 case GAUDI_EVENT_MME2_ACC_SERR:
5936 case GAUDI_EVENT_MME3_ACC_SERR:
5937 index = (event_type - GAUDI_EVENT_MME0_ACC_SERR) / 4;
5938 block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5939 snprintf(desc, ARRAY_SIZE(desc), "MME%d_ACC", index);
5940 num_memories = 128;
5941 derr = false;
5942 disable_clock_gating = true;
5943 break;
5944 case GAUDI_EVENT_MME0_ACC_DERR:
5945 case GAUDI_EVENT_MME1_ACC_DERR:
5946 case GAUDI_EVENT_MME2_ACC_DERR:
5947 case GAUDI_EVENT_MME3_ACC_DERR:
5948 index = (event_type - GAUDI_EVENT_MME0_ACC_DERR) / 4;
5949 block_address = mmMME0_ACC_BASE + index * MME_ACC_OFFSET;
5950 snprintf(desc, ARRAY_SIZE(desc), "MME%d_ACC", index);
5951 num_memories = 128;
5952 derr = true;
5953 disable_clock_gating = true;
5954 break;
5955 case GAUDI_EVENT_MME0_SBAB_SERR:
5956 case GAUDI_EVENT_MME1_SBAB_SERR:
5957 case GAUDI_EVENT_MME2_SBAB_SERR:
5958 case GAUDI_EVENT_MME3_SBAB_SERR:
5959 index = (event_type - GAUDI_EVENT_MME0_SBAB_SERR) / 4;
5960 block_address = mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5961 snprintf(desc, ARRAY_SIZE(desc), "MME%d_SBAB", index);
5962 num_memories = 33;
5963 derr = false;
5964 disable_clock_gating = true;
5965 break;
5966 case GAUDI_EVENT_MME0_SBAB_DERR:
5967 case GAUDI_EVENT_MME1_SBAB_DERR:
5968 case GAUDI_EVENT_MME2_SBAB_DERR:
5969 case GAUDI_EVENT_MME3_SBAB_DERR:
5970 index = (event_type - GAUDI_EVENT_MME0_SBAB_DERR) / 4;
5971 block_address = mmMME0_SBAB_BASE + index * MME_ACC_OFFSET;
5972 snprintf(desc, ARRAY_SIZE(desc), "MME%d_SBAB", index);
5973 num_memories = 33;
5974 derr = true;
5975 disable_clock_gating = true;
5976 break;
5977 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
5978 index = event_type - GAUDI_EVENT_DMA0_SERR_ECC;
5979 block_address = mmDMA0_CORE_BASE + index * DMA_CORE_OFFSET;
5980 snprintf(desc, ARRAY_SIZE(desc), "DMA%d_CORE", index);
5981 num_memories = 16;
5982 derr = false;
5983 disable_clock_gating = false;
5984 break;
5985 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
5986 index = event_type - GAUDI_EVENT_DMA0_DERR_ECC;
5987 block_address = mmDMA0_CORE_BASE + index * DMA_CORE_OFFSET;
5988 snprintf(desc, ARRAY_SIZE(desc), "DMA%d_CORE", index);
5989 num_memories = 16;
5990 derr = true;
5991 disable_clock_gating = false;
5992 break;
5993 case GAUDI_EVENT_CPU_IF_ECC_SERR:
5994 block_address = mmCPU_IF_BASE;
5995 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
5996 num_memories = 4;
5997 derr = false;
5998 disable_clock_gating = false;
5999 break;
6000 case GAUDI_EVENT_CPU_IF_ECC_DERR:
6001 block_address = mmCPU_IF_BASE;
6002 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
6003 num_memories = 4;
6004 derr = true;
6005 disable_clock_gating = false;
6006 break;
6007 case GAUDI_EVENT_PSOC_MEM_SERR:
6008 block_address = mmPSOC_GLOBAL_CONF_BASE;
6009 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
6010 num_memories = 4;
6011 derr = false;
6012 disable_clock_gating = false;
6013 break;
6014 case GAUDI_EVENT_PSOC_MEM_DERR:
6015 block_address = mmPSOC_GLOBAL_CONF_BASE;
6016 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
6017 num_memories = 4;
6018 derr = true;
6019 disable_clock_gating = false;
6020 break;
6021 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
6022 block_address = mmPSOC_CS_TRACE_BASE;
6023 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
6024 num_memories = 2;
6025 derr = false;
6026 disable_clock_gating = false;
6027 break;
6028 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
6029 block_address = mmPSOC_CS_TRACE_BASE;
6030 snprintf(desc, ARRAY_SIZE(desc), "%s", "CPU");
6031 num_memories = 2;
6032 derr = true;
6033 disable_clock_gating = false;
6034 break;
6035 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
6036 index = event_type - GAUDI_EVENT_SRAM0_SERR;
6037 block_address =
6038 mmSRAM_Y0_X0_BANK_BASE + index * SRAM_BANK_OFFSET;
6039 snprintf(desc, ARRAY_SIZE(desc), "SRAM%d", index);
6040 num_memories = 2;
6041 derr = false;
6042 disable_clock_gating = false;
6043 break;
6044 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
6045 index = event_type - GAUDI_EVENT_SRAM0_DERR;
6046 block_address =
6047 mmSRAM_Y0_X0_BANK_BASE + index * SRAM_BANK_OFFSET;
6048 snprintf(desc, ARRAY_SIZE(desc), "SRAM%d", index);
6049 num_memories = 2;
6050 derr = true;
6051 disable_clock_gating = false;
6052 break;
6053 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
6054 index = event_type - GAUDI_EVENT_DMA_IF0_SERR;
6055 block_address = mmDMA_IF_W_S_BASE +
6056 index * (mmDMA_IF_E_S_BASE - mmDMA_IF_W_S_BASE);
6057 snprintf(desc, ARRAY_SIZE(desc), "DMA_IF%d", index);
6058 num_memories = 60;
6059 derr = false;
6060 disable_clock_gating = false;
6061 break;
6062 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
6063 index = event_type - GAUDI_EVENT_DMA_IF0_DERR;
6064 block_address = mmDMA_IF_W_S_BASE +
6065 index * (mmDMA_IF_E_S_BASE - mmDMA_IF_W_S_BASE);
6066 snprintf(desc, ARRAY_SIZE(desc), "DMA_IF%d", index);
6067 derr = true;
6068 num_memories = 60;
6069 disable_clock_gating = false;
6070 break;
6071 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
6072 index = event_type - GAUDI_EVENT_HBM_0_SERR;
6073 /* HBM Registers are at different offsets */
6074 block_address = mmHBM0_BASE + 0x8000 +
6075 index * (mmHBM1_BASE - mmHBM0_BASE);
6076 snprintf(desc, ARRAY_SIZE(desc), "HBM%d", index);
6077 derr = false;
6078 num_memories = 64;
6079 disable_clock_gating = false;
6080 break;
6081 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
6082 index = event_type - GAUDI_EVENT_HBM_0_SERR;
6083 /* HBM Registers are at different offsets */
6084 block_address = mmHBM0_BASE + 0x8000 +
6085 index * (mmHBM1_BASE - mmHBM0_BASE);
6086 snprintf(desc, ARRAY_SIZE(desc), "HBM%d", index);
6087 derr = true;
6088 num_memories = 64;
6089 disable_clock_gating = false;
6090 break;
6091 default:
6092 return;
6093 }
6094
6095 gaudi_print_ecc_info_generic(hdev, desc, block_address, num_memories,
6096 derr, disable_clock_gating);
6097}
6098
6099static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type)
6100{
6101 u64 glbl_sts_addr, arb_err_addr;
6102 u8 index;
6103 char desc[32];
6104
6105 switch (event_type) {
6106 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
6107 index = event_type - GAUDI_EVENT_TPC0_QM;
6108 glbl_sts_addr =
6109 mmTPC0_QM_GLBL_STS1_0 + index * TPC_QMAN_OFFSET;
6110 arb_err_addr =
6111 mmTPC0_QM_ARB_ERR_CAUSE + index * TPC_QMAN_OFFSET;
6112 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "TPC_QM", index);
6113 break;
6114 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
6115 index = event_type - GAUDI_EVENT_MME0_QM;
6116 glbl_sts_addr =
6117 mmMME0_QM_GLBL_STS1_0 + index * MME_QMAN_OFFSET;
6118 arb_err_addr =
6119 mmMME0_QM_ARB_ERR_CAUSE + index * MME_QMAN_OFFSET;
6120 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "MME_QM", index);
6121 break;
6122 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
6123 index = event_type - GAUDI_EVENT_DMA0_QM;
6124 glbl_sts_addr =
6125 mmDMA0_QM_GLBL_STS1_0 + index * DMA_QMAN_OFFSET;
6126 arb_err_addr =
6127 mmDMA0_QM_ARB_ERR_CAUSE + index * DMA_QMAN_OFFSET;
6128 snprintf(desc, ARRAY_SIZE(desc), "%s%d", "DMA_QM", index);
6129 break;
6130 default:
6131 return;
6132 }
6133
6134 gaudi_handle_qman_err_generic(hdev, desc, glbl_sts_addr, arb_err_addr);
6135}
6136
6137static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
6138 bool razwi)
6139{
6140 char desc[20] = "";
6141
6142 gaudi_get_event_desc(event_type, desc, sizeof(desc));
6143 dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
6144 event_type, desc);
6145
6146 gaudi_print_ecc_info(hdev, event_type);
6147
6148 if (razwi) {
6149 gaudi_print_razwi_info(hdev);
6150 gaudi_print_mmu_error_info(hdev);
6151 }
6152}
6153
6154static int gaudi_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
6155 size_t irq_arr_size)
6156{
6157 struct armcp_unmask_irq_arr_packet *pkt;
6158 size_t total_pkt_size;
6159 long result;
6160 int rc;
6161
6162 total_pkt_size = sizeof(struct armcp_unmask_irq_arr_packet) +
6163 irq_arr_size;
6164
6165 /* data should be aligned to 8 bytes in order to ArmCP to copy it */
6166 total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
6167
6168 /* total_pkt_size is casted to u16 later on */
6169 if (total_pkt_size > USHRT_MAX) {
6170 dev_err(hdev->dev, "too many elements in IRQ array\n");
6171 return -EINVAL;
6172 }
6173
6174 pkt = kzalloc(total_pkt_size, GFP_KERNEL);
6175 if (!pkt)
6176 return -ENOMEM;
6177
6178 pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0]));
6179 memcpy(&pkt->irqs, irq_arr, irq_arr_size);
6180
6181 pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
6182 ARMCP_PKT_CTL_OPCODE_SHIFT);
6183
6184 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
6185 total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result);
6186
6187 if (rc)
6188 dev_err(hdev->dev, "failed to unmask IRQ array\n");
6189
6190 kfree(pkt);
6191
6192 return rc;
6193}
6194
6195static int gaudi_soft_reset_late_init(struct hl_device *hdev)
6196{
6197 /* Unmask all IRQs since some could have been received
6198 * during the soft reset
6199 */
6200 return gaudi_unmask_irq_arr(hdev, gaudi_all_events,
6201 sizeof(gaudi_all_events));
6202}
6203
6204static int gaudi_unmask_irq(struct hl_device *hdev, u16 event_type)
6205{
6206 struct armcp_packet pkt;
6207 long result;
6208 int rc;
6209
6210 memset(&pkt, 0, sizeof(pkt));
6211
6212 pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ <<
6213 ARMCP_PKT_CTL_OPCODE_SHIFT);
6214 pkt.value = cpu_to_le64(event_type);
6215
6216 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
6217 HL_DEVICE_TIMEOUT_USEC, &result);
6218
6219 if (rc)
6220 dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
6221
6222 return rc;
6223}
6224
6225static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device)
6226{
6227 int ch, err = 0;
6228 u32 base, val, val2;
6229
6230 base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
6231 for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
6232 val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
6233 val = (val & 0xFF) | ((val >> 8) & 0xFF);
6234 if (val) {
6235 err = 1;
6236 dev_err(hdev->dev,
6237 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
6238 device, ch * 2, val & 0x1, (val >> 1) & 0x1,
6239 (val >> 2) & 0x1, (val >> 3) & 0x1,
6240 (val >> 4) & 0x1);
6241
6242 val2 = RREG32(base + ch * 0x1000 + 0x060);
6243 dev_err(hdev->dev,
6244 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
6245 device, ch * 2,
6246 RREG32(base + ch * 0x1000 + 0x064),
6247 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
6248 (val2 & 0xFF0000) >> 16,
6249 (val2 & 0xFF000000) >> 24);
6250 }
6251
6252 val = RREG32_MASK(base + ch * 0x1000 + 0x07C, 0x0000FFFF);
6253 val = (val & 0xFF) | ((val >> 8) & 0xFF);
6254 if (val) {
6255 err = 1;
6256 dev_err(hdev->dev,
6257 "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
6258 device, ch * 2 + 1, val & 0x1, (val >> 1) & 0x1,
6259 (val >> 2) & 0x1, (val >> 3) & 0x1,
6260 (val >> 4) & 0x1);
6261
6262 val2 = RREG32(base + ch * 0x1000 + 0x070);
6263 dev_err(hdev->dev,
6264 "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%d, SEC_CNT=%d, DED_CNT=%d\n",
6265 device, ch * 2 + 1,
6266 RREG32(base + ch * 0x1000 + 0x074),
6267 (val2 & 0x200) >> 9, (val2 & 0xFC00) >> 10,
6268 (val2 & 0xFF0000) >> 16,
6269 (val2 & 0xFF000000) >> 24);
6270 }
6271
6272 /* Clear interrupts */
6273 RMWREG32(base + (ch * 0x1000) + 0x060, 0x1C8, 0x1FF);
6274 RMWREG32(base + (ch * 0x1000) + 0x070, 0x1C8, 0x1FF);
6275 WREG32(base + (ch * 0x1000) + 0x06C, 0x1F1F);
6276 WREG32(base + (ch * 0x1000) + 0x07C, 0x1F1F);
6277 RMWREG32(base + (ch * 0x1000) + 0x060, 0x0, 0xF);
6278 RMWREG32(base + (ch * 0x1000) + 0x070, 0x0, 0xF);
6279 }
6280
6281 val = RREG32(base + 0x8F30);
6282 val2 = RREG32(base + 0x8F34);
6283 if (val | val2) {
6284 err = 1;
6285 dev_err(hdev->dev,
6286 "HBM %d MC SRAM SERR info: Reg 0x8F30=0x%x, Reg 0x8F34=0x%x\n",
6287 device, val, val2);
6288 }
6289 val = RREG32(base + 0x8F40);
6290 val2 = RREG32(base + 0x8F44);
6291 if (val | val2) {
6292 err = 1;
6293 dev_err(hdev->dev,
6294 "HBM %d MC SRAM DERR info: Reg 0x8F40=0x%x, Reg 0x8F44=0x%x\n",
6295 device, val, val2);
6296 }
6297
6298 return err;
6299}
6300
6301static int gaudi_hbm_event_to_dev(u16 hbm_event_type)
6302{
6303 switch (hbm_event_type) {
6304 case GAUDI_EVENT_HBM0_SPI_0:
6305 case GAUDI_EVENT_HBM0_SPI_1:
6306 return 0;
6307 case GAUDI_EVENT_HBM1_SPI_0:
6308 case GAUDI_EVENT_HBM1_SPI_1:
6309 return 1;
6310 case GAUDI_EVENT_HBM2_SPI_0:
6311 case GAUDI_EVENT_HBM2_SPI_1:
6312 return 2;
6313 case GAUDI_EVENT_HBM3_SPI_0:
6314 case GAUDI_EVENT_HBM3_SPI_1:
6315 return 3;
6316 default:
6317 break;
6318 }
6319
6320 /* Should never happen */
6321 return 0;
6322}
6323
6324static bool gaudi_tpc_read_interrupts(struct hl_device *hdev, u8 tpc_id,
6325 char *interrupt_name)
6326{
6327 struct gaudi_device *gaudi = hdev->asic_specific;
6328 u32 tpc_offset = tpc_id * TPC_CFG_OFFSET, tpc_interrupts_cause, i;
6329 bool soft_reset_required = false;
6330
6331 /* Accessing the TPC_INTR_CAUSE registers requires disabling the clock
6332 * gating, and thus cannot be done in ArmCP and should be done instead
6333 * by the driver.
6334 */
6335
6336 mutex_lock(&gaudi->clk_gate_mutex);
6337
6338 hdev->asic_funcs->disable_clock_gating(hdev);
6339
6340 tpc_interrupts_cause = RREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset) &
6341 TPC0_CFG_TPC_INTR_CAUSE_CAUSE_MASK;
6342
6343 for (i = 0 ; i < GAUDI_NUM_OF_TPC_INTR_CAUSE ; i++)
6344 if (tpc_interrupts_cause & BIT(i)) {
6345 dev_err_ratelimited(hdev->dev,
6346 "TPC%d_%s interrupt cause: %s\n",
6347 tpc_id, interrupt_name,
6348 gaudi_tpc_interrupts_cause[i]);
6349 /* If this is QM error, we need to soft-reset */
6350 if (i == 15)
6351 soft_reset_required = true;
6352 }
6353
6354 /* Clear interrupts */
6355 WREG32(mmTPC0_CFG_TPC_INTR_CAUSE + tpc_offset, 0);
6356
6357 hdev->asic_funcs->enable_clock_gating(hdev);
6358
6359 mutex_unlock(&gaudi->clk_gate_mutex);
6360
6361 return soft_reset_required;
6362}
6363
6364static int tpc_dec_event_to_tpc_id(u16 tpc_dec_event_type)
6365{
6366 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_DEC) >> 1;
6367}
6368
6369static int tpc_krn_event_to_tpc_id(u16 tpc_dec_event_type)
6370{
6371 return (tpc_dec_event_type - GAUDI_EVENT_TPC0_KRN_ERR) / 6;
6372}
6373
6374static void gaudi_print_clk_change_info(struct hl_device *hdev,
6375 u16 event_type)
6376{
6377 switch (event_type) {
6378 case GAUDI_EVENT_FIX_POWER_ENV_S:
6379 dev_info_ratelimited(hdev->dev,
6380 "Clock throttling due to power consumption\n");
6381 break;
6382
6383 case GAUDI_EVENT_FIX_POWER_ENV_E:
6384 dev_info_ratelimited(hdev->dev,
6385 "Power envelop is safe, back to optimal clock\n");
6386 break;
6387
6388 case GAUDI_EVENT_FIX_THERMAL_ENV_S:
6389 dev_info_ratelimited(hdev->dev,
6390 "Clock throttling due to overheating\n");
6391 break;
6392
6393 case GAUDI_EVENT_FIX_THERMAL_ENV_E:
6394 dev_info_ratelimited(hdev->dev,
6395 "Thermal envelop is safe, back to optimal clock\n");
6396 break;
6397
6398 default:
6399 dev_err(hdev->dev, "Received invalid clock change event %d\n",
6400 event_type);
6401 break;
6402 }
6403}
6404
6405static void gaudi_handle_eqe(struct hl_device *hdev,
6406 struct hl_eq_entry *eq_entry)
6407{
6408 struct gaudi_device *gaudi = hdev->asic_specific;
6409 u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
6410 u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
6411 >> EQ_CTL_EVENT_TYPE_SHIFT);
6412 u8 cause;
6413 bool soft_reset_required;
6414
6415 gaudi->events_stat[event_type]++;
6416 gaudi->events_stat_aggregate[event_type]++;
6417
6418 switch (event_type) {
6419 case GAUDI_EVENT_PCIE_CORE_DERR:
6420 case GAUDI_EVENT_PCIE_IF_DERR:
6421 case GAUDI_EVENT_PCIE_PHY_DERR:
6422 case GAUDI_EVENT_TPC0_DERR ... GAUDI_EVENT_TPC7_DERR:
6423 case GAUDI_EVENT_MME0_ACC_DERR:
6424 case GAUDI_EVENT_MME0_SBAB_DERR:
6425 case GAUDI_EVENT_MME1_ACC_DERR:
6426 case GAUDI_EVENT_MME1_SBAB_DERR:
6427 case GAUDI_EVENT_MME2_ACC_DERR:
6428 case GAUDI_EVENT_MME2_SBAB_DERR:
6429 case GAUDI_EVENT_MME3_ACC_DERR:
6430 case GAUDI_EVENT_MME3_SBAB_DERR:
6431 case GAUDI_EVENT_DMA0_DERR_ECC ... GAUDI_EVENT_DMA7_DERR_ECC:
6432 fallthrough;
6433 case GAUDI_EVENT_CPU_IF_ECC_DERR:
6434 case GAUDI_EVENT_PSOC_MEM_DERR:
6435 case GAUDI_EVENT_PSOC_CORESIGHT_DERR:
6436 case GAUDI_EVENT_SRAM0_DERR ... GAUDI_EVENT_SRAM28_DERR:
6437 case GAUDI_EVENT_DMA_IF0_DERR ... GAUDI_EVENT_DMA_IF3_DERR:
6438 fallthrough;
6439 case GAUDI_EVENT_GIC500:
6440 case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
6441 case GAUDI_EVENT_MMU_DERR:
6442 case GAUDI_EVENT_AXI_ECC:
6443 case GAUDI_EVENT_L2_RAM_ECC:
6444 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
6445 gaudi_print_irq_info(hdev, event_type, false);
6446 if (hdev->hard_reset_on_fw_events)
6447 hl_device_reset(hdev, true, false);
6448 break;
6449
6450 case GAUDI_EVENT_HBM0_SPI_0:
6451 case GAUDI_EVENT_HBM1_SPI_0:
6452 case GAUDI_EVENT_HBM2_SPI_0:
6453 case GAUDI_EVENT_HBM3_SPI_0:
6454 gaudi_print_irq_info(hdev, event_type, false);
6455 gaudi_hbm_read_interrupts(hdev,
6456 gaudi_hbm_event_to_dev(event_type));
6457 if (hdev->hard_reset_on_fw_events)
6458 hl_device_reset(hdev, true, false);
6459 break;
6460
6461 case GAUDI_EVENT_HBM0_SPI_1:
6462 case GAUDI_EVENT_HBM1_SPI_1:
6463 case GAUDI_EVENT_HBM2_SPI_1:
6464 case GAUDI_EVENT_HBM3_SPI_1:
6465 gaudi_print_irq_info(hdev, event_type, false);
6466 gaudi_hbm_read_interrupts(hdev,
6467 gaudi_hbm_event_to_dev(event_type));
6468 break;
6469
6470 case GAUDI_EVENT_TPC0_DEC:
6471 case GAUDI_EVENT_TPC1_DEC:
6472 case GAUDI_EVENT_TPC2_DEC:
6473 case GAUDI_EVENT_TPC3_DEC:
6474 case GAUDI_EVENT_TPC4_DEC:
6475 case GAUDI_EVENT_TPC5_DEC:
6476 case GAUDI_EVENT_TPC6_DEC:
6477 case GAUDI_EVENT_TPC7_DEC:
6478 gaudi_print_irq_info(hdev, event_type, true);
6479 soft_reset_required = gaudi_tpc_read_interrupts(hdev,
6480 tpc_dec_event_to_tpc_id(event_type),
6481 "AXI_SLV_DEC_Error");
6482 if (soft_reset_required)
6483 hl_device_reset(hdev, false, false);
6484 gaudi_unmask_irq(hdev, event_type);
6485 break;
6486
6487 case GAUDI_EVENT_TPC0_KRN_ERR:
6488 case GAUDI_EVENT_TPC1_KRN_ERR:
6489 case GAUDI_EVENT_TPC2_KRN_ERR:
6490 case GAUDI_EVENT_TPC3_KRN_ERR:
6491 case GAUDI_EVENT_TPC4_KRN_ERR:
6492 case GAUDI_EVENT_TPC5_KRN_ERR:
6493 case GAUDI_EVENT_TPC6_KRN_ERR:
6494 case GAUDI_EVENT_TPC7_KRN_ERR:
6495 gaudi_print_irq_info(hdev, event_type, true);
6496 soft_reset_required = gaudi_tpc_read_interrupts(hdev,
6497 tpc_krn_event_to_tpc_id(event_type),
6498 "KRN_ERR");
6499 if (soft_reset_required)
6500 hl_device_reset(hdev, false, false);
6501 gaudi_unmask_irq(hdev, event_type);
6502 break;
6503
6504 case GAUDI_EVENT_PCIE_CORE_SERR:
6505 case GAUDI_EVENT_PCIE_IF_SERR:
6506 case GAUDI_EVENT_PCIE_PHY_SERR:
6507 case GAUDI_EVENT_TPC0_SERR ... GAUDI_EVENT_TPC7_SERR:
6508 case GAUDI_EVENT_MME0_ACC_SERR:
6509 case GAUDI_EVENT_MME0_SBAB_SERR:
6510 case GAUDI_EVENT_MME1_ACC_SERR:
6511 case GAUDI_EVENT_MME1_SBAB_SERR:
6512 case GAUDI_EVENT_MME2_ACC_SERR:
6513 case GAUDI_EVENT_MME2_SBAB_SERR:
6514 case GAUDI_EVENT_MME3_ACC_SERR:
6515 case GAUDI_EVENT_MME3_SBAB_SERR:
6516 case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_DMA7_SERR_ECC:
6517 case GAUDI_EVENT_CPU_IF_ECC_SERR:
6518 case GAUDI_EVENT_PSOC_MEM_SERR:
6519 case GAUDI_EVENT_PSOC_CORESIGHT_SERR:
6520 case GAUDI_EVENT_SRAM0_SERR ... GAUDI_EVENT_SRAM28_SERR:
6521 case GAUDI_EVENT_DMA_IF0_SERR ... GAUDI_EVENT_DMA_IF3_SERR:
6522 case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
6523 fallthrough;
6524 case GAUDI_EVENT_MMU_SERR:
6525 case GAUDI_EVENT_PCIE_DEC:
6526 case GAUDI_EVENT_MME0_WBC_RSP:
6527 case GAUDI_EVENT_MME0_SBAB0_RSP:
6528 case GAUDI_EVENT_MME1_WBC_RSP:
6529 case GAUDI_EVENT_MME1_SBAB0_RSP:
6530 case GAUDI_EVENT_MME2_WBC_RSP:
6531 case GAUDI_EVENT_MME2_SBAB0_RSP:
6532 case GAUDI_EVENT_MME3_WBC_RSP:
6533 case GAUDI_EVENT_MME3_SBAB0_RSP:
6534 case GAUDI_EVENT_CPU_AXI_SPLITTER:
6535 case GAUDI_EVENT_PSOC_AXI_DEC:
6536 case GAUDI_EVENT_PSOC_PRSTN_FALL:
6537 case GAUDI_EVENT_MMU_PAGE_FAULT:
6538 case GAUDI_EVENT_MMU_WR_PERM:
6539 case GAUDI_EVENT_RAZWI_OR_ADC:
6540 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
6541 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM:
6542 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM:
6543 fallthrough;
6544 case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
6545 gaudi_print_irq_info(hdev, event_type, true);
6546 gaudi_handle_qman_err(hdev, event_type);
6547 gaudi_unmask_irq(hdev, event_type);
6548 break;
6549
6550 case GAUDI_EVENT_RAZWI_OR_ADC_SW:
6551 gaudi_print_irq_info(hdev, event_type, true);
6552 hl_device_reset(hdev, false, false);
6553 gaudi_unmask_irq(hdev, event_type);
6554 break;
6555
6556 case GAUDI_EVENT_TPC0_BMON_SPMU:
6557 case GAUDI_EVENT_TPC1_BMON_SPMU:
6558 case GAUDI_EVENT_TPC2_BMON_SPMU:
6559 case GAUDI_EVENT_TPC3_BMON_SPMU:
6560 case GAUDI_EVENT_TPC4_BMON_SPMU:
6561 case GAUDI_EVENT_TPC5_BMON_SPMU:
6562 case GAUDI_EVENT_TPC6_BMON_SPMU:
6563 case GAUDI_EVENT_TPC7_BMON_SPMU:
6564 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
6565 gaudi_print_irq_info(hdev, event_type, false);
6566 gaudi_unmask_irq(hdev, event_type);
6567 break;
6568
6569 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
6570 gaudi_print_clk_change_info(hdev, event_type);
6571 gaudi_unmask_irq(hdev, event_type);
6572 break;
6573
6574 case GAUDI_EVENT_PSOC_GPIO_U16_0:
6575 cause = le64_to_cpu(eq_entry->data[0]) & 0xFF;
6576 dev_err(hdev->dev,
6577 "Received high temp H/W interrupt %d (cause %d)\n",
6578 event_type, cause);
6579 break;
6580
6581 default:
6582 dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
6583 event_type);
6584 break;
6585 }
6586}
6587
6588static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
6589 u32 *size)
6590{
6591 struct gaudi_device *gaudi = hdev->asic_specific;
6592
6593 if (aggregate) {
6594 *size = (u32) sizeof(gaudi->events_stat_aggregate);
6595 return gaudi->events_stat_aggregate;
6596 }
6597
6598 *size = (u32) sizeof(gaudi->events_stat);
6599 return gaudi->events_stat;
6600}
6601
6602static void gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
6603 u32 flags)
6604{
6605 struct gaudi_device *gaudi = hdev->asic_specific;
6606 u32 status, timeout_usec;
6607 int rc;
6608
6609 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
6610 hdev->hard_reset_pending)
6611 return;
6612
6613 mutex_lock(&hdev->mmu_cache_lock);
6614
6615 if (hdev->pldm)
6616 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6617 else
6618 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6619
6620 /* L0 & L1 invalidation */
6621 WREG32(mmSTLB_INV_ALL_START, 1);
6622
6623 rc = hl_poll_timeout(
6624 hdev,
6625 mmSTLB_INV_ALL_START,
6626 status,
6627 !status,
6628 1000,
6629 timeout_usec);
6630
6631 if (rc)
6632 dev_notice_ratelimited(hdev->dev,
6633 "Timeout when waiting for MMU cache invalidation\n");
6634
6635 mutex_unlock(&hdev->mmu_cache_lock);
6636}
6637
6638static void gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
6639 bool is_hard, u32 asid, u64 va, u64 size)
6640{
6641 struct gaudi_device *gaudi = hdev->asic_specific;
6642 u32 status, timeout_usec;
6643 u32 inv_data;
6644 u32 pi;
6645 int rc;
6646
6647 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
6648 hdev->hard_reset_pending)
6649 return;
6650
6651 mutex_lock(&hdev->mmu_cache_lock);
6652
6653 if (hdev->pldm)
6654 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6655 else
6656 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6657
6658 /*
6659 * TODO: currently invalidate entire L0 & L1 as in regular hard
6660 * invalidation. Need to apply invalidation of specific cache
6661 * lines with mask of ASID & VA & size.
6662 * Note that L1 with be flushed entirely in any case.
6663 */
6664
6665 /* L0 & L1 invalidation */
6666 inv_data = RREG32(mmSTLB_CACHE_INV);
6667 /* PI is 8 bit */
6668 pi = ((inv_data & STLB_CACHE_INV_PRODUCER_INDEX_MASK) + 1) & 0xFF;
6669 WREG32(mmSTLB_CACHE_INV,
6670 (inv_data & STLB_CACHE_INV_INDEX_MASK_MASK) | pi);
6671
6672 rc = hl_poll_timeout(
6673 hdev,
6674 mmSTLB_INV_CONSUMER_INDEX,
6675 status,
6676 status == pi,
6677 1000,
6678 timeout_usec);
6679
6680 if (rc)
6681 dev_notice_ratelimited(hdev->dev,
6682 "Timeout when waiting for MMU cache invalidation\n");
6683
6684 mutex_unlock(&hdev->mmu_cache_lock);
6685}
6686
6687static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
6688 u32 asid, u64 phys_addr)
6689{
6690 u32 status, timeout_usec;
6691 int rc;
6692
6693 if (hdev->pldm)
6694 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
6695 else
6696 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
6697
6698 WREG32(MMU_ASID, asid);
6699 WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
6700 WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
6701 WREG32(MMU_BUSY, 0x80000000);
6702
6703 rc = hl_poll_timeout(
6704 hdev,
6705 MMU_BUSY,
6706 status,
6707 !(status & 0x80000000),
6708 1000,
6709 timeout_usec);
6710
6711 if (rc) {
6712 dev_err(hdev->dev,
6713 "Timeout during MMU hop0 config of asid %d\n", asid);
6714 return rc;
6715 }
6716
6717 return 0;
6718}
6719
6720static int gaudi_send_heartbeat(struct hl_device *hdev)
6721{
6722 struct gaudi_device *gaudi = hdev->asic_specific;
6723
6724 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6725 return 0;
6726
6727 return hl_fw_send_heartbeat(hdev);
6728}
6729
6730static int gaudi_armcp_info_get(struct hl_device *hdev)
6731{
6732 struct gaudi_device *gaudi = hdev->asic_specific;
6733 struct asic_fixed_properties *prop = &hdev->asic_prop;
6734 int rc;
6735
6736 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6737 return 0;
6738
6739 rc = hl_fw_armcp_info_get(hdev);
6740 if (rc)
6741 return rc;
6742
6743 if (!strlen(prop->armcp_info.card_name))
6744 strncpy(prop->armcp_info.card_name, GAUDI_DEFAULT_CARD_NAME,
6745 CARD_NAME_MAX_LEN);
6746
6747 return 0;
6748}
6749
6750static bool gaudi_is_device_idle(struct hl_device *hdev, u32 *mask,
6751 struct seq_file *s)
6752{
6753 struct gaudi_device *gaudi = hdev->asic_specific;
6754 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
6755 const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
6756 u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
6757 bool is_idle = true, is_eng_idle, is_slave;
6758 u64 offset;
6759 int i, dma_id;
6760
6761 mutex_lock(&gaudi->clk_gate_mutex);
6762
6763 hdev->asic_funcs->disable_clock_gating(hdev);
6764
6765 if (s)
6766 seq_puts(s,
6767 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n"
6768 "--- ------- ------------ ---------- -------------\n");
6769
6770 for (i = 0 ; i < DMA_NUMBER_OF_CHNLS ; i++) {
6771 dma_id = gaudi_dma_assignment[i];
6772 offset = dma_id * DMA_QMAN_OFFSET;
6773
6774 qm_glbl_sts0 = RREG32(mmDMA0_QM_GLBL_STS0 + offset);
6775 qm_cgm_sts = RREG32(mmDMA0_QM_CGM_STS + offset);
6776 dma_core_sts0 = RREG32(mmDMA0_CORE_STS0 + offset);
6777 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6778 IS_DMA_IDLE(dma_core_sts0);
6779 is_idle &= is_eng_idle;
6780
6781 if (mask)
6782 *mask |= !is_eng_idle <<
6783 (GAUDI_ENGINE_ID_DMA_0 + dma_id);
6784 if (s)
6785 seq_printf(s, fmt, dma_id,
6786 is_eng_idle ? "Y" : "N", qm_glbl_sts0,
6787 qm_cgm_sts, dma_core_sts0);
6788 }
6789
6790 if (s)
6791 seq_puts(s,
6792 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n"
6793 "--- ------- ------------ ---------- ----------\n");
6794
6795 for (i = 0 ; i < TPC_NUMBER_OF_ENGINES ; i++) {
6796 offset = i * TPC_QMAN_OFFSET;
6797 qm_glbl_sts0 = RREG32(mmTPC0_QM_GLBL_STS0 + offset);
6798 qm_cgm_sts = RREG32(mmTPC0_QM_CGM_STS + offset);
6799 tpc_cfg_sts = RREG32(mmTPC0_CFG_STATUS + offset);
6800 is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts) &&
6801 IS_TPC_IDLE(tpc_cfg_sts);
6802 is_idle &= is_eng_idle;
6803
6804 if (mask)
6805 *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_TPC_0 + i);
6806 if (s)
6807 seq_printf(s, fmt, i,
6808 is_eng_idle ? "Y" : "N",
6809 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts);
6810 }
6811
6812 if (s)
6813 seq_puts(s,
6814 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n"
6815 "--- ------- ------------ ---------- -----------\n");
6816
6817 for (i = 0 ; i < MME_NUMBER_OF_ENGINES ; i++) {
6818 offset = i * MME_QMAN_OFFSET;
6819 mme_arch_sts = RREG32(mmMME0_CTRL_ARCH_STATUS + offset);
6820 is_eng_idle = IS_MME_IDLE(mme_arch_sts);
6821
6822 /* MME 1 & 3 are slaves, no need to check their QMANs */
6823 is_slave = i % 2;
6824 if (!is_slave) {
6825 qm_glbl_sts0 = RREG32(mmMME0_QM_GLBL_STS0 + offset);
6826 qm_cgm_sts = RREG32(mmMME0_QM_CGM_STS + offset);
6827 is_eng_idle &= IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
6828 }
6829
6830 is_idle &= is_eng_idle;
6831
6832 if (mask)
6833 *mask |= !is_eng_idle << (GAUDI_ENGINE_ID_MME_0 + i);
6834 if (s) {
6835 if (!is_slave)
6836 seq_printf(s, fmt, i,
6837 is_eng_idle ? "Y" : "N",
6838 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts);
6839 else
6840 seq_printf(s, mme_slave_fmt, i,
6841 is_eng_idle ? "Y" : "N", "-",
6842 "-", mme_arch_sts);
6843 }
6844 }
6845
6846 if (s)
6847 seq_puts(s, "\n");
6848
6849 hdev->asic_funcs->enable_clock_gating(hdev);
6850
6851 mutex_unlock(&gaudi->clk_gate_mutex);
6852
6853 return is_idle;
6854}
6855
6856static void gaudi_hw_queues_lock(struct hl_device *hdev)
6857 __acquires(&gaudi->hw_queues_lock)
6858{
6859 struct gaudi_device *gaudi = hdev->asic_specific;
6860
6861 spin_lock(&gaudi->hw_queues_lock);
6862}
6863
6864static void gaudi_hw_queues_unlock(struct hl_device *hdev)
6865 __releases(&gaudi->hw_queues_lock)
6866{
6867 struct gaudi_device *gaudi = hdev->asic_specific;
6868
6869 spin_unlock(&gaudi->hw_queues_lock);
6870}
6871
6872static u32 gaudi_get_pci_id(struct hl_device *hdev)
6873{
6874 return hdev->pdev->device;
6875}
6876
6877static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data,
6878 size_t max_size)
6879{
6880 struct gaudi_device *gaudi = hdev->asic_specific;
6881
6882 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
6883 return 0;
6884
6885 return hl_fw_get_eeprom_data(hdev, data, max_size);
6886}
6887
6888/*
6889 * this function should be used only during initialization and/or after reset,
6890 * when there are no active users.
6891 */
6892static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
6893 u32 tpc_id)
6894{
6895 struct gaudi_device *gaudi = hdev->asic_specific;
6896 u64 kernel_timeout;
6897 u32 status, offset;
6898 int rc;
6899
6900 offset = tpc_id * (mmTPC1_CFG_STATUS - mmTPC0_CFG_STATUS);
6901
6902 if (hdev->pldm)
6903 kernel_timeout = GAUDI_PLDM_TPC_KERNEL_WAIT_USEC;
6904 else
6905 kernel_timeout = HL_DEVICE_TIMEOUT_USEC;
6906
6907 mutex_lock(&gaudi->clk_gate_mutex);
6908
6909 hdev->asic_funcs->disable_clock_gating(hdev);
6910
6911 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_LOW + offset,
6912 lower_32_bits(tpc_kernel));
6913 WREG32(mmTPC0_CFG_QM_KERNEL_BASE_ADDRESS_HIGH + offset,
6914 upper_32_bits(tpc_kernel));
6915
6916 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_LOW + offset,
6917 lower_32_bits(tpc_kernel));
6918 WREG32(mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH + offset,
6919 upper_32_bits(tpc_kernel));
6920 /* set a valid LUT pointer, content is of no significance */
6921 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_LO + offset,
6922 lower_32_bits(tpc_kernel));
6923 WREG32(mmTPC0_CFG_LUT_FUNC256_BASE_ADDR_HI + offset,
6924 upper_32_bits(tpc_kernel));
6925
6926 WREG32(mmTPC0_CFG_QM_SYNC_OBJECT_ADDR + offset,
6927 lower_32_bits(CFG_BASE +
6928 mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0));
6929
6930 WREG32(mmTPC0_CFG_TPC_CMD + offset,
6931 (1 << TPC0_CFG_TPC_CMD_ICACHE_INVALIDATE_SHIFT |
6932 1 << TPC0_CFG_TPC_CMD_ICACHE_PREFETCH_64KB_SHIFT));
6933 /* wait a bit for the engine to start executing */
6934 usleep_range(1000, 1500);
6935
6936 /* wait until engine has finished executing */
6937 rc = hl_poll_timeout(
6938 hdev,
6939 mmTPC0_CFG_STATUS + offset,
6940 status,
6941 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6942 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6943 1000,
6944 kernel_timeout);
6945
6946 if (rc) {
6947 dev_err(hdev->dev,
6948 "Timeout while waiting for TPC%d icache prefetch\n",
6949 tpc_id);
6950 hdev->asic_funcs->enable_clock_gating(hdev);
6951 mutex_unlock(&gaudi->clk_gate_mutex);
6952 return -EIO;
6953 }
6954
6955 WREG32(mmTPC0_CFG_TPC_EXECUTE + offset,
6956 1 << TPC0_CFG_TPC_EXECUTE_V_SHIFT);
6957
6958 /* wait a bit for the engine to start executing */
6959 usleep_range(1000, 1500);
6960
6961 /* wait until engine has finished executing */
6962 rc = hl_poll_timeout(
6963 hdev,
6964 mmTPC0_CFG_STATUS + offset,
6965 status,
6966 (status & TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK) ==
6967 TPC0_CFG_STATUS_VECTOR_PIPE_EMPTY_MASK,
6968 1000,
6969 kernel_timeout);
6970
6971 rc = hl_poll_timeout(
6972 hdev,
6973 mmTPC0_CFG_WQ_INFLIGHT_CNTR + offset,
6974 status,
6975 (status == 0),
6976 1000,
6977 kernel_timeout);
6978
6979 hdev->asic_funcs->enable_clock_gating(hdev);
6980 mutex_unlock(&gaudi->clk_gate_mutex);
6981
6982 if (rc) {
6983 dev_err(hdev->dev,
6984 "Timeout while waiting for TPC%d kernel to execute\n",
6985 tpc_id);
6986 return -EIO;
6987 }
6988
6989 return 0;
6990}
6991
6992static enum hl_device_hw_state gaudi_get_hw_state(struct hl_device *hdev)
6993{
6994 return RREG32(mmHW_STATE);
6995}
6996
6997static u32 gaudi_get_queue_id_for_cq(struct hl_device *hdev, u32 cq_idx)
6998{
6999 return gaudi_cq_assignment[cq_idx];
7000}
7001
7002static void gaudi_ext_queue_init(struct hl_device *hdev, u32 q_idx)
7003{
7004 struct gaudi_device *gaudi = hdev->asic_specific;
7005 struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
7006 struct hl_hw_sob *hw_sob;
7007 int sob, ext_idx = gaudi->ext_queue_idx++;
7008
7009 /*
7010 * The external queues might not sit sequentially, hence use the
7011 * real external queue index for the SOB/MON base id.
7012 */
7013 hw_queue->base_sob_id = ext_idx * HL_RSVD_SOBS;
7014 hw_queue->base_mon_id = ext_idx * HL_RSVD_MONS;
7015 hw_queue->next_sob_val = 1;
7016 hw_queue->curr_sob_offset = 0;
7017
7018 for (sob = 0 ; sob < HL_RSVD_SOBS ; sob++) {
7019 hw_sob = &hw_queue->hw_sob[sob];
7020 hw_sob->hdev = hdev;
7021 hw_sob->sob_id = hw_queue->base_sob_id + sob;
7022 hw_sob->q_idx = q_idx;
7023 kref_init(&hw_sob->kref);
7024 }
7025}
7026
7027static void gaudi_ext_queue_reset(struct hl_device *hdev, u32 q_idx)
7028{
7029 struct hl_hw_queue *hw_queue = &hdev->kernel_queues[q_idx];
7030
7031 /*
7032 * In case we got here due to a stuck CS, the refcnt might be bigger
7033 * than 1 and therefore we reset it.
7034 */
7035 kref_init(&hw_queue->hw_sob[hw_queue->curr_sob_offset].kref);
7036 hw_queue->curr_sob_offset = 0;
7037 hw_queue->next_sob_val = 1;
7038}
7039
7040static u32 gaudi_get_signal_cb_size(struct hl_device *hdev)
7041{
7042 return sizeof(struct packet_msg_short) +
7043 sizeof(struct packet_msg_prot) * 2;
7044}
7045
7046static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
7047{
7048 return sizeof(struct packet_msg_short) * 4 +
7049 sizeof(struct packet_fence) +
7050 sizeof(struct packet_msg_prot) * 2;
7051}
7052
7053static void gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id)
7054{
7055 struct hl_cb *cb = (struct hl_cb *) data;
7056 struct packet_msg_short *pkt;
7057 u32 value, ctl;
7058
7059 pkt = (struct packet_msg_short *) (uintptr_t) cb->kernel_address;
7060 memset(pkt, 0, sizeof(*pkt));
7061
7062 value = 1 << GAUDI_PKT_SHORT_VAL_SOB_SYNC_VAL_SHIFT; /* inc by 1 */
7063 value |= 1 << GAUDI_PKT_SHORT_VAL_SOB_MOD_SHIFT; /* add mode */
7064
7065 ctl = (sob_id * 4) << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT; /* SOB id */
7066 ctl |= 0 << GAUDI_PKT_SHORT_CTL_OP_SHIFT; /* write the value */
7067 ctl |= 3 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S SOB base */
7068 ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
7069 ctl |= 1 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
7070 ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
7071 ctl |= 1 << GAUDI_PKT_SHORT_CTL_MB_SHIFT;
7072
7073 pkt->value = cpu_to_le32(value);
7074 pkt->ctl = cpu_to_le32(ctl);
7075}
7076
7077static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
7078 u16 addr)
7079{
7080 u32 ctl, pkt_size = sizeof(*pkt);
7081
7082 memset(pkt, 0, pkt_size);
7083
7084 ctl = addr << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT;
7085 ctl |= 2 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S MON base */
7086 ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
7087 ctl |= 0 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
7088 ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
7089 ctl |= 0 << GAUDI_PKT_SHORT_CTL_MB_SHIFT; /* only last pkt needs MB */
7090
7091 pkt->value = cpu_to_le32(value);
7092 pkt->ctl = cpu_to_le32(ctl);
7093
7094 return pkt_size;
7095}
7096
7097static u32 gaudi_add_arm_monitor_pkt(struct packet_msg_short *pkt, u16 sob_id,
7098 u16 sob_val, u16 addr)
7099{
7100 u32 ctl, value, pkt_size = sizeof(*pkt);
7101 u8 mask = ~(1 << (sob_id & 0x7));
7102
7103 memset(pkt, 0, pkt_size);
7104
7105 value = (sob_id / 8) << GAUDI_PKT_SHORT_VAL_MON_SYNC_GID_SHIFT;
7106 value |= sob_val << GAUDI_PKT_SHORT_VAL_MON_SYNC_VAL_SHIFT;
7107 value |= 0 << GAUDI_PKT_SHORT_VAL_MON_MODE_SHIFT; /* GREATER_OR_EQUAL */
7108 value |= mask << GAUDI_PKT_SHORT_VAL_MON_MASK_SHIFT;
7109
7110 ctl = addr << GAUDI_PKT_SHORT_CTL_ADDR_SHIFT;
7111 ctl |= 0 << GAUDI_PKT_SHORT_CTL_OP_SHIFT; /* write the value */
7112 ctl |= 2 << GAUDI_PKT_SHORT_CTL_BASE_SHIFT; /* W_S MON base */
7113 ctl |= PACKET_MSG_SHORT << GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT;
7114 ctl |= 0 << GAUDI_PKT_SHORT_CTL_EB_SHIFT;
7115 ctl |= 1 << GAUDI_PKT_SHORT_CTL_RB_SHIFT;
7116 ctl |= 1 << GAUDI_PKT_SHORT_CTL_MB_SHIFT;
7117
7118 pkt->value = cpu_to_le32(value);
7119 pkt->ctl = cpu_to_le32(ctl);
7120
7121 return pkt_size;
7122}
7123
7124static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
7125{
7126 u32 ctl, cfg, pkt_size = sizeof(*pkt);
7127
7128 memset(pkt, 0, pkt_size);
7129
7130 cfg = 1 << GAUDI_PKT_FENCE_CFG_DEC_VAL_SHIFT;
7131 cfg |= 1 << GAUDI_PKT_FENCE_CFG_TARGET_VAL_SHIFT;
7132 cfg |= 2 << GAUDI_PKT_FENCE_CFG_ID_SHIFT;
7133
7134 ctl = 0 << GAUDI_PKT_FENCE_CTL_PRED_SHIFT;
7135 ctl |= PACKET_FENCE << GAUDI_PKT_FENCE_CTL_OPCODE_SHIFT;
7136 ctl |= 0 << GAUDI_PKT_FENCE_CTL_EB_SHIFT;
7137 ctl |= 1 << GAUDI_PKT_FENCE_CTL_RB_SHIFT;
7138 ctl |= 1 << GAUDI_PKT_FENCE_CTL_MB_SHIFT;
7139
7140 pkt->cfg = cpu_to_le32(cfg);
7141 pkt->ctl = cpu_to_le32(ctl);
7142
7143 return pkt_size;
7144}
7145
7146static void gaudi_gen_wait_cb(struct hl_device *hdev, void *data, u16 sob_id,
7147 u16 sob_val, u16 mon_id, u32 q_idx)
7148{
7149 struct hl_cb *cb = (struct hl_cb *) data;
7150 void *buf = (void *) (uintptr_t) cb->kernel_address;
7151 u64 monitor_base, fence_addr = 0;
7152 u32 size = 0;
7153 u16 msg_addr_offset;
7154
7155 switch (q_idx) {
7156 case GAUDI_QUEUE_ID_DMA_0_0:
7157 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_0;
7158 break;
7159 case GAUDI_QUEUE_ID_DMA_0_1:
7160 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_1;
7161 break;
7162 case GAUDI_QUEUE_ID_DMA_0_2:
7163 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_2;
7164 break;
7165 case GAUDI_QUEUE_ID_DMA_0_3:
7166 fence_addr = mmDMA0_QM_CP_FENCE2_RDATA_3;
7167 break;
7168 case GAUDI_QUEUE_ID_DMA_1_0:
7169 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_0;
7170 break;
7171 case GAUDI_QUEUE_ID_DMA_1_1:
7172 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_1;
7173 break;
7174 case GAUDI_QUEUE_ID_DMA_1_2:
7175 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_2;
7176 break;
7177 case GAUDI_QUEUE_ID_DMA_1_3:
7178 fence_addr = mmDMA1_QM_CP_FENCE2_RDATA_3;
7179 break;
7180 case GAUDI_QUEUE_ID_DMA_5_0:
7181 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_0;
7182 break;
7183 case GAUDI_QUEUE_ID_DMA_5_1:
7184 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_1;
7185 break;
7186 case GAUDI_QUEUE_ID_DMA_5_2:
7187 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_2;
7188 break;
7189 case GAUDI_QUEUE_ID_DMA_5_3:
7190 fence_addr = mmDMA5_QM_CP_FENCE2_RDATA_3;
7191 break;
7192 default:
7193 /* queue index should be valid here */
7194 dev_crit(hdev->dev, "wrong queue id %d for wait packet\n",
7195 q_idx);
7196 return;
7197 }
7198
7199 fence_addr += CFG_BASE;
7200
7201 /*
7202 * monitor_base should be the content of the base0 address registers,
7203 * so it will be added to the msg short offsets
7204 */
7205 monitor_base = mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0;
7206
7207 /* First monitor config packet: low address of the sync */
7208 msg_addr_offset =
7209 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0 + mon_id * 4) -
7210 monitor_base;
7211
7212 size += gaudi_add_mon_msg_short(buf + size, (u32) fence_addr,
7213 msg_addr_offset);
7214
7215 /* Second monitor config packet: high address of the sync */
7216 msg_addr_offset =
7217 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0 + mon_id * 4) -
7218 monitor_base;
7219
7220 size += gaudi_add_mon_msg_short(buf + size, (u32) (fence_addr >> 32),
7221 msg_addr_offset);
7222
7223 /*
7224 * Third monitor config packet: the payload, i.e. what to write when the
7225 * sync triggers
7226 */
7227 msg_addr_offset =
7228 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_PAY_DATA_0 + mon_id * 4) -
7229 monitor_base;
7230
7231 size += gaudi_add_mon_msg_short(buf + size, 1, msg_addr_offset);
7232
7233 /* Fourth monitor config packet: bind the monitor to a sync object */
7234 msg_addr_offset =
7235 (mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0 + mon_id * 4) -
7236 monitor_base;
7237 size += gaudi_add_arm_monitor_pkt(buf + size, sob_id, sob_val,
7238 msg_addr_offset);
7239
7240 /* Fence packet */
7241 size += gaudi_add_fence_pkt(buf + size);
7242}
7243
7244static void gaudi_reset_sob(struct hl_device *hdev, void *data)
7245{
7246 struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
7247
7248 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
7249 hw_sob->sob_id);
7250
7251 WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
7252 0);
7253
7254 kref_init(&hw_sob->kref);
7255}
7256
7257static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev)
7258{
7259 if (RREG32(mmPSOC_GLOBAL_CONF_NON_RST_FLOPS_0) ==
7260 HL_POWER9_HOST_MAGIC) {
7261 hdev->power9_64bit_dma_enable = 1;
7262 hdev->dma_mask = 64;
7263 } else {
7264 hdev->power9_64bit_dma_enable = 0;
7265 hdev->dma_mask = 48;
7266 }
7267}
7268
7269static u64 gaudi_get_device_time(struct hl_device *hdev)
7270{
7271 u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32;
7272
7273 return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
7274}
7275
7276static const struct hl_asic_funcs gaudi_funcs = {
7277 .early_init = gaudi_early_init,
7278 .early_fini = gaudi_early_fini,
7279 .late_init = gaudi_late_init,
7280 .late_fini = gaudi_late_fini,
7281 .sw_init = gaudi_sw_init,
7282 .sw_fini = gaudi_sw_fini,
7283 .hw_init = gaudi_hw_init,
7284 .hw_fini = gaudi_hw_fini,
7285 .halt_engines = gaudi_halt_engines,
7286 .suspend = gaudi_suspend,
7287 .resume = gaudi_resume,
7288 .cb_mmap = gaudi_cb_mmap,
7289 .ring_doorbell = gaudi_ring_doorbell,
7290 .pqe_write = gaudi_pqe_write,
7291 .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
7292 .asic_dma_free_coherent = gaudi_dma_free_coherent,
7293 .get_int_queue_base = gaudi_get_int_queue_base,
7294 .test_queues = gaudi_test_queues,
7295 .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc,
7296 .asic_dma_pool_free = gaudi_dma_pool_free,
7297 .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc,
7298 .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free,
7299 .hl_dma_unmap_sg = gaudi_dma_unmap_sg,
7300 .cs_parser = gaudi_cs_parser,
7301 .asic_dma_map_sg = gaudi_dma_map_sg,
7302 .get_dma_desc_list_size = gaudi_get_dma_desc_list_size,
7303 .add_end_of_cb_packets = gaudi_add_end_of_cb_packets,
7304 .update_eq_ci = gaudi_update_eq_ci,
7305 .context_switch = gaudi_context_switch,
7306 .restore_phase_topology = gaudi_restore_phase_topology,
7307 .debugfs_read32 = gaudi_debugfs_read32,
7308 .debugfs_write32 = gaudi_debugfs_write32,
7309 .debugfs_read64 = gaudi_debugfs_read64,
7310 .debugfs_write64 = gaudi_debugfs_write64,
bcaf4152 7311 .add_device_attr = gaudi_add_device_attr,
ac0ae6a9 7312 .handle_eqe = gaudi_handle_eqe,
bcaf4152 7313 .set_pll_profile = gaudi_set_pll_profile,
ac0ae6a9
OG
7314 .get_events_stat = gaudi_get_events_stat,
7315 .read_pte = gaudi_read_pte,
7316 .write_pte = gaudi_write_pte,
7317 .mmu_invalidate_cache = gaudi_mmu_invalidate_cache,
7318 .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range,
7319 .send_heartbeat = gaudi_send_heartbeat,
7320 .enable_clock_gating = gaudi_enable_clock_gating,
7321 .disable_clock_gating = gaudi_disable_clock_gating,
7322 .debug_coresight = NULL,
7323 .is_device_idle = gaudi_is_device_idle,
7324 .soft_reset_late_init = gaudi_soft_reset_late_init,
7325 .hw_queues_lock = gaudi_hw_queues_lock,
7326 .hw_queues_unlock = gaudi_hw_queues_unlock,
7327 .get_pci_id = gaudi_get_pci_id,
7328 .get_eeprom_data = gaudi_get_eeprom_data,
7329 .send_cpu_message = gaudi_send_cpu_message,
7330 .get_hw_state = gaudi_get_hw_state,
7331 .pci_bars_map = gaudi_pci_bars_map,
7332 .set_dram_bar_base = gaudi_set_hbm_bar_base,
7333 .init_iatu = gaudi_init_iatu,
7334 .rreg = hl_rreg,
7335 .wreg = hl_wreg,
7336 .halt_coresight = NULL,
bcaf4152 7337 .get_clk_rate = gaudi_get_clk_rate,
ac0ae6a9
OG
7338 .get_queue_id_for_cq = gaudi_get_queue_id_for_cq,
7339 .read_device_fw_version = gaudi_read_device_fw_version,
7340 .load_firmware_to_device = gaudi_load_firmware_to_device,
7341 .load_boot_fit_to_device = gaudi_load_boot_fit_to_device,
7342 .ext_queue_init = gaudi_ext_queue_init,
7343 .ext_queue_reset = gaudi_ext_queue_reset,
7344 .get_signal_cb_size = gaudi_get_signal_cb_size,
7345 .get_wait_cb_size = gaudi_get_wait_cb_size,
7346 .gen_signal_cb = gaudi_gen_signal_cb,
7347 .gen_wait_cb = gaudi_gen_wait_cb,
7348 .reset_sob = gaudi_reset_sob,
7349 .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
7350 .get_device_time = gaudi_get_device_time
7351};
7352
7353/**
7354 * gaudi_set_asic_funcs - set GAUDI function pointers
7355 *
7356 * @*hdev: pointer to hl_device structure
7357 *
7358 */
7359void gaudi_set_asic_funcs(struct hl_device *hdev)
7360{
7361 hdev->asic_funcs = &gaudi_funcs;
7362}