]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - drivers/misc/habanalabs/common/firmware_if.c
Merge branch 'akpm' (patches from Andrew)
[mirror_ubuntu-jammy-kernel.git] / drivers / misc / habanalabs / common / firmware_if.c
CommitLineData
3110c60f
TT
1// SPDX-License-Identifier: GPL-2.0
2
3/*
4 * Copyright 2016-2019 HabanaLabs, Ltd.
5 * All Rights Reserved.
6 */
7
8#include "habanalabs.h"
7b16a155 9#include "../include/common/hl_boot_if.h"
3110c60f
TT
10
11#include <linux/firmware.h>
ebd8d122 12#include <linux/slab.h>
3110c60f 13
bce382a8 14#define FW_FILE_MAX_SIZE 0x1400000 /* maximum size of 20MB */
3110c60f 15/**
7e1c07dd 16 * hl_fw_load_fw_to_device() - Load F/W code to device's memory.
a0c11b3c 17 *
3110c60f 18 * @hdev: pointer to hl_device structure.
a0c11b3c
LJ
19 * @fw_name: the firmware image name
20 * @dst: IO memory mapped address space to copy firmware to
9bb86b63
OB
21 * @src_offset: offset in src FW to copy from
22 * @size: amount of bytes to copy (0 to copy the whole binary)
3110c60f
TT
23 *
24 * Copy fw code from firmware file to device memory.
25 *
26 * Return: 0 on success, non-zero for failure.
27 */
7e1c07dd 28int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
9bb86b63 29 void __iomem *dst, u32 src_offset, u32 size)
3110c60f
TT
30{
31 const struct firmware *fw;
9bb86b63 32 const void *fw_data;
75035fe2 33 size_t fw_size;
3110c60f
TT
34 int rc;
35
36 rc = request_firmware(&fw, fw_name, hdev->dev);
37 if (rc) {
cfc2f350 38 dev_err(hdev->dev, "Firmware file %s is not found!\n", fw_name);
3110c60f
TT
39 goto out;
40 }
41
42 fw_size = fw->size;
43 if ((fw_size % 4) != 0) {
cfc2f350 44 dev_err(hdev->dev, "Illegal %s firmware size %zu\n",
3110c60f
TT
45 fw_name, fw_size);
46 rc = -EINVAL;
47 goto out;
48 }
49
50 dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size);
51
bce382a8
OB
52 if (fw_size > FW_FILE_MAX_SIZE) {
53 dev_err(hdev->dev,
54 "FW file size %zu exceeds maximum of %u bytes\n",
55 fw_size, FW_FILE_MAX_SIZE);
56 rc = -EINVAL;
57 goto out;
58 }
59
9bb86b63
OB
60 if (size - src_offset > fw_size) {
61 dev_err(hdev->dev,
62 "size to copy(%u) and offset(%u) are invalid\n",
63 size, src_offset);
64 rc = -EINVAL;
65 goto out;
66 }
67
68 if (size)
69 fw_size = size;
70
71 fw_data = (const void *) fw->data;
3110c60f 72
9bb86b63 73 memcpy_toio(dst, fw_data + src_offset, fw_size);
3110c60f
TT
74
75out:
76 release_firmware(fw);
77 return rc;
78}
79
80int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
81{
2f55342c 82 struct cpucp_packet pkt = {};
3110c60f 83
2f55342c 84 pkt.ctl = cpu_to_le32(opcode << CPUCP_PKT_CTL_OPCODE_SHIFT);
3110c60f
TT
85
86 return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt,
788cacf3 87 sizeof(pkt), 0, NULL);
3110c60f
TT
88}
89
90int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
439bc47b 91 u16 len, u32 timeout, u64 *result)
3110c60f 92{
2f55342c 93 struct cpucp_packet *pkt;
3110c60f
TT
94 dma_addr_t pkt_dma_addr;
95 u32 tmp;
96 int rc = 0;
97
3110c60f
TT
98 pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
99 &pkt_dma_addr);
100 if (!pkt) {
101 dev_err(hdev->dev,
102 "Failed to allocate DMA memory for packet to CPU\n");
103 return -ENOMEM;
104 }
105
106 memcpy(pkt, msg, len);
107
108 mutex_lock(&hdev->send_cpu_message_lock);
109
110 if (hdev->disabled)
111 goto out;
112
113 if (hdev->device_cpu_disabled) {
114 rc = -EIO;
115 goto out;
116 }
117
118 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
119 if (rc) {
120 dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
121 goto out;
122 }
123
a08b51a9 124 rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
2f55342c 125 (tmp == CPUCP_PACKET_FENCE_VAL), 1000,
2aa4e410 126 timeout, true);
3110c60f
TT
127
128 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
129
130 if (rc == -ETIMEDOUT) {
a08b51a9 131 dev_err(hdev->dev, "Device CPU packet timeout (0x%x)\n", tmp);
3110c60f
TT
132 hdev->device_cpu_disabled = true;
133 goto out;
134 }
135
a08b51a9 136 tmp = le32_to_cpu(pkt->ctl);
3110c60f 137
2f55342c 138 rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT;
a08b51a9
OG
139 if (rc) {
140 dev_err(hdev->dev, "F/W ERROR %d for CPU packet %d\n",
141 rc,
2f55342c
OG
142 (tmp & CPUCP_PKT_CTL_OPCODE_MASK)
143 >> CPUCP_PKT_CTL_OPCODE_SHIFT);
a08b51a9
OG
144 rc = -EIO;
145 } else if (result) {
439bc47b 146 *result = le64_to_cpu(pkt->result);
3110c60f
TT
147 }
148
149out:
150 mutex_unlock(&hdev->send_cpu_message_lock);
151
152 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt);
153
154 return rc;
155}
156
ebd8d122
OB
157int hl_fw_unmask_irq(struct hl_device *hdev, u16 event_type)
158{
2f55342c 159 struct cpucp_packet pkt;
439bc47b 160 u64 result;
ebd8d122
OB
161 int rc;
162
163 memset(&pkt, 0, sizeof(pkt));
164
2f55342c
OG
165 pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ <<
166 CPUCP_PKT_CTL_OPCODE_SHIFT);
ebd8d122
OB
167 pkt.value = cpu_to_le64(event_type);
168
169 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
788cacf3 170 0, &result);
ebd8d122
OB
171
172 if (rc)
173 dev_err(hdev->dev, "failed to unmask RAZWI IRQ %d", event_type);
174
175 return rc;
176}
177
178int hl_fw_unmask_irq_arr(struct hl_device *hdev, const u32 *irq_arr,
179 size_t irq_arr_size)
180{
2f55342c 181 struct cpucp_unmask_irq_arr_packet *pkt;
ebd8d122 182 size_t total_pkt_size;
439bc47b 183 u64 result;
ebd8d122
OB
184 int rc;
185
2f55342c 186 total_pkt_size = sizeof(struct cpucp_unmask_irq_arr_packet) +
ebd8d122
OB
187 irq_arr_size;
188
6138bbe9 189 /* data should be aligned to 8 bytes in order to CPU-CP to copy it */
ebd8d122
OB
190 total_pkt_size = (total_pkt_size + 0x7) & ~0x7;
191
192 /* total_pkt_size is casted to u16 later on */
193 if (total_pkt_size > USHRT_MAX) {
194 dev_err(hdev->dev, "too many elements in IRQ array\n");
195 return -EINVAL;
196 }
197
198 pkt = kzalloc(total_pkt_size, GFP_KERNEL);
199 if (!pkt)
200 return -ENOMEM;
201
202 pkt->length = cpu_to_le32(irq_arr_size / sizeof(irq_arr[0]));
203 memcpy(&pkt->irqs, irq_arr, irq_arr_size);
204
2f55342c
OG
205 pkt->cpucp_pkt.ctl = cpu_to_le32(CPUCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
206 CPUCP_PKT_CTL_OPCODE_SHIFT);
ebd8d122
OB
207
208 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
788cacf3 209 total_pkt_size, 0, &result);
ebd8d122
OB
210
211 if (rc)
212 dev_err(hdev->dev, "failed to unmask IRQ array\n");
213
214 kfree(pkt);
215
216 return rc;
217}
218
3110c60f
TT
219int hl_fw_test_cpu_queue(struct hl_device *hdev)
220{
2f55342c 221 struct cpucp_packet test_pkt = {};
439bc47b 222 u64 result;
3110c60f
TT
223 int rc;
224
2f55342c
OG
225 test_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST <<
226 CPUCP_PKT_CTL_OPCODE_SHIFT);
227 test_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL);
3110c60f
TT
228
229 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
788cacf3 230 sizeof(test_pkt), 0, &result);
3110c60f
TT
231
232 if (!rc) {
2f55342c 233 if (result != CPUCP_PACKET_FENCE_VAL)
3110c60f 234 dev_err(hdev->dev,
439bc47b 235 "CPU queue test failed (%#08llx)\n", result);
3110c60f
TT
236 } else {
237 dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc);
238 }
239
240 return rc;
241}
242
243void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
244 dma_addr_t *dma_handle)
245{
246 u64 kernel_addr;
247
3110c60f
TT
248 kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
249
250 *dma_handle = hdev->cpu_accessible_dma_address +
251 (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem);
252
253 return (void *) (uintptr_t) kernel_addr;
254}
255
256void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
257 void *vaddr)
258{
3110c60f
TT
259 gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
260 size);
261}
262
263int hl_fw_send_heartbeat(struct hl_device *hdev)
264{
2f55342c 265 struct cpucp_packet hb_pkt = {};
439bc47b 266 u64 result;
3110c60f
TT
267 int rc;
268
2f55342c
OG
269 hb_pkt.ctl = cpu_to_le32(CPUCP_PACKET_TEST <<
270 CPUCP_PKT_CTL_OPCODE_SHIFT);
271 hb_pkt.value = cpu_to_le64(CPUCP_PACKET_FENCE_VAL);
3110c60f
TT
272
273 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt,
788cacf3 274 sizeof(hb_pkt), 0, &result);
3110c60f 275
2f55342c 276 if ((rc) || (result != CPUCP_PACKET_FENCE_VAL))
3110c60f
TT
277 rc = -EIO;
278
279 return rc;
280}
281
4147864e
AM
282int hl_fw_cpucp_info_get(struct hl_device *hdev,
283 u32 cpu_security_boot_status_reg)
3110c60f
TT
284{
285 struct asic_fixed_properties *prop = &hdev->asic_prop;
2f55342c
OG
286 struct cpucp_packet pkt = {};
287 void *cpucp_info_cpu_addr;
288 dma_addr_t cpucp_info_dma_addr;
439bc47b 289 u64 result;
3110c60f
TT
290 int rc;
291
2f55342c 292 cpucp_info_cpu_addr =
3110c60f 293 hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
2f55342c
OG
294 sizeof(struct cpucp_info),
295 &cpucp_info_dma_addr);
296 if (!cpucp_info_cpu_addr) {
3110c60f 297 dev_err(hdev->dev,
6138bbe9 298 "Failed to allocate DMA memory for CPU-CP info packet\n");
3110c60f
TT
299 return -ENOMEM;
300 }
301
2f55342c 302 memset(cpucp_info_cpu_addr, 0, sizeof(struct cpucp_info));
3110c60f 303
2f55342c
OG
304 pkt.ctl = cpu_to_le32(CPUCP_PACKET_INFO_GET <<
305 CPUCP_PKT_CTL_OPCODE_SHIFT);
306 pkt.addr = cpu_to_le64(cpucp_info_dma_addr);
307 pkt.data_max_size = cpu_to_le32(sizeof(struct cpucp_info));
3110c60f
TT
308
309 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
2f55342c 310 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
3110c60f
TT
311 if (rc) {
312 dev_err(hdev->dev,
6138bbe9 313 "Failed to handle CPU-CP info pkt, error %d\n", rc);
3110c60f
TT
314 goto out;
315 }
316
2f55342c
OG
317 memcpy(&prop->cpucp_info, cpucp_info_cpu_addr,
318 sizeof(prop->cpucp_info));
3110c60f 319
2f55342c 320 rc = hl_build_hwmon_channel_info(hdev, prop->cpucp_info.sensors);
3110c60f
TT
321 if (rc) {
322 dev_err(hdev->dev,
323 "Failed to build hwmon channel info, error %d\n", rc);
324 rc = -EFAULT;
325 goto out;
326 }
327
4147864e
AM
328 /* Read FW application security bits again */
329 if (hdev->asic_prop.fw_security_status_valid)
330 hdev->asic_prop.fw_app_security_map =
331 RREG32(cpu_security_boot_status_reg);
332
3110c60f
TT
333out:
334 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
2f55342c 335 sizeof(struct cpucp_info), cpucp_info_cpu_addr);
3110c60f
TT
336
337 return rc;
338}
339
340int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
341{
2f55342c 342 struct cpucp_packet pkt = {};
3110c60f
TT
343 void *eeprom_info_cpu_addr;
344 dma_addr_t eeprom_info_dma_addr;
439bc47b 345 u64 result;
3110c60f
TT
346 int rc;
347
348 eeprom_info_cpu_addr =
349 hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
350 max_size, &eeprom_info_dma_addr);
351 if (!eeprom_info_cpu_addr) {
352 dev_err(hdev->dev,
6138bbe9 353 "Failed to allocate DMA memory for CPU-CP EEPROM packet\n");
3110c60f
TT
354 return -ENOMEM;
355 }
356
357 memset(eeprom_info_cpu_addr, 0, max_size);
358
2f55342c
OG
359 pkt.ctl = cpu_to_le32(CPUCP_PACKET_EEPROM_DATA_GET <<
360 CPUCP_PKT_CTL_OPCODE_SHIFT);
94cb669c 361 pkt.addr = cpu_to_le64(eeprom_info_dma_addr);
3110c60f
TT
362 pkt.data_max_size = cpu_to_le32(max_size);
363
364 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
2f55342c 365 HL_CPUCP_EEPROM_TIMEOUT_USEC, &result);
3110c60f
TT
366
367 if (rc) {
368 dev_err(hdev->dev,
6138bbe9
OG
369 "Failed to handle CPU-CP EEPROM packet, error %d\n",
370 rc);
3110c60f
TT
371 goto out;
372 }
373
374 /* result contains the actual size */
375 memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size));
376
377out:
378 hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size,
379 eeprom_info_cpu_addr);
380
381 return rc;
382}
7e1c07dd 383
2f55342c 384int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
0a068add
OB
385 struct hl_info_pci_counters *counters)
386{
2f55342c 387 struct cpucp_packet pkt = {};
439bc47b 388 u64 result;
0a068add
OB
389 int rc;
390
2f55342c
OG
391 pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
392 CPUCP_PKT_CTL_OPCODE_SHIFT);
0a068add
OB
393
394 /* Fetch PCI rx counter */
2f55342c 395 pkt.index = cpu_to_le32(cpucp_pcie_throughput_rx);
0a068add 396 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
2f55342c 397 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
0a068add
OB
398 if (rc) {
399 dev_err(hdev->dev,
6138bbe9 400 "Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
0a068add
OB
401 return rc;
402 }
403 counters->rx_throughput = result;
404
9354f1b4
OB
405 memset(&pkt, 0, sizeof(pkt));
406 pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_THROUGHPUT_GET <<
407 CPUCP_PKT_CTL_OPCODE_SHIFT);
408
0a068add 409 /* Fetch PCI tx counter */
2f55342c 410 pkt.index = cpu_to_le32(cpucp_pcie_throughput_tx);
0a068add 411 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
2f55342c 412 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
0a068add
OB
413 if (rc) {
414 dev_err(hdev->dev,
6138bbe9 415 "Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
0a068add
OB
416 return rc;
417 }
418 counters->tx_throughput = result;
419
420 /* Fetch PCI replay counter */
9354f1b4 421 memset(&pkt, 0, sizeof(pkt));
2f55342c
OG
422 pkt.ctl = cpu_to_le32(CPUCP_PACKET_PCIE_REPLAY_CNT_GET <<
423 CPUCP_PKT_CTL_OPCODE_SHIFT);
0a068add
OB
424
425 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
2f55342c 426 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
0a068add
OB
427 if (rc) {
428 dev_err(hdev->dev,
6138bbe9 429 "Failed to handle CPU-CP PCI info pkt, error %d\n", rc);
0a068add
OB
430 return rc;
431 }
432 counters->replay_cnt = (u32) result;
433
434 return rc;
435}
436
2f55342c 437int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy)
9f306491 438{
2f55342c 439 struct cpucp_packet pkt = {};
439bc47b 440 u64 result;
9f306491 441 int rc;
442
2f55342c
OG
443 pkt.ctl = cpu_to_le32(CPUCP_PACKET_TOTAL_ENERGY_GET <<
444 CPUCP_PKT_CTL_OPCODE_SHIFT);
9f306491 445
446 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
2f55342c 447 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
9f306491 448 if (rc) {
449 dev_err(hdev->dev,
2f55342c 450 "Failed to handle CpuCP total energy pkt, error %d\n",
9f306491 451 rc);
452 return rc;
453 }
454
455 *total_energy = result;
456
457 return rc;
458}
459
4147864e
AM
460int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
461 u16 *pll_freq_arr)
1cbca899
OB
462{
463 struct cpucp_packet pkt;
439bc47b 464 u64 result;
1cbca899
OB
465 int rc;
466
467 memset(&pkt, 0, sizeof(pkt));
468
4147864e 469 pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_INFO_GET <<
1cbca899 470 CPUCP_PKT_CTL_OPCODE_SHIFT);
4147864e 471 pkt.pll_type = __cpu_to_le16(pll_index);
1cbca899
OB
472
473 rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
474 HL_CPUCP_INFO_TIMEOUT_USEC, &result);
475 if (rc)
476 dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc);
477
4147864e
AM
478 pll_freq_arr[0] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT0_MASK, result);
479 pll_freq_arr[1] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT1_MASK, result);
480 pll_freq_arr[2] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT2_MASK, result);
481 pll_freq_arr[3] = FIELD_GET(CPUCP_PKT_RES_PLL_OUT3_MASK, result);
1cbca899
OB
482
483 return rc;
484}
485
323b7267
OB
486static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
487 u32 cpu_security_boot_status_reg)
7e1c07dd 488{
323b7267 489 u32 err_val, security_val;
7e1c07dd
OG
490
491 /* Some of the firmware status codes are deprecated in newer f/w
492 * versions. In those versions, the errors are reported
493 * in different registers. Therefore, we need to check those
494 * registers and print the exact errors. Moreover, there
495 * may be multiple errors, so we need to report on each error
496 * separately. Some of the error codes might indicate a state
497 * that is not an error per-se, but it is an error in production
498 * environment
499 */
500 err_val = RREG32(boot_err0_reg);
501 if (!(err_val & CPU_BOOT_ERR0_ENABLED))
502 return;
503
504 if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
505 dev_err(hdev->dev,
506 "Device boot error - DRAM initialization failed\n");
507 if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
508 dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
509 if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
510 dev_err(hdev->dev,
511 "Device boot error - Thermal Sensor initialization failed\n");
512 if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
513 dev_warn(hdev->dev,
514 "Device boot warning - Skipped DRAM initialization\n");
515 if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED)
516 dev_warn(hdev->dev,
517 "Device boot error - Skipped waiting for BMC\n");
518 if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
519 dev_err(hdev->dev,
520 "Device boot error - Serdes data from BMC not available\n");
521 if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
522 dev_err(hdev->dev,
523 "Device boot error - NIC F/W initialization failed\n");
b2d09622
GN
524 if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY)
525 dev_warn(hdev->dev,
526 "Device boot warning - security not ready\n");
527 if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL)
528 dev_err(hdev->dev, "Device boot error - security failure\n");
529 if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL)
530 dev_err(hdev->dev, "Device boot error - eFuse failure\n");
323b7267
OB
531
532 security_val = RREG32(cpu_security_boot_status_reg);
533 if (security_val & CPU_BOOT_DEV_STS0_ENABLED)
3b82c34f 534 dev_dbg(hdev->dev, "Device security status %#x\n",
323b7267 535 security_val);
7e1c07dd
OG
536}
537
57799ce9 538static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
c8f9b49d 539{
57799ce9
OG
540 /* Some of the status codes below are deprecated in newer f/w
541 * versions but we keep them here for backward compatibility
542 */
c8f9b49d
CG
543 switch (status) {
544 case CPU_BOOT_STATUS_NA:
545 dev_err(hdev->dev,
546 "Device boot error - BTL did NOT run\n");
547 break;
548 case CPU_BOOT_STATUS_IN_WFE:
549 dev_err(hdev->dev,
550 "Device boot error - Stuck inside WFE loop\n");
551 break;
552 case CPU_BOOT_STATUS_IN_BTL:
553 dev_err(hdev->dev,
554 "Device boot error - Stuck in BTL\n");
555 break;
556 case CPU_BOOT_STATUS_IN_PREBOOT:
557 dev_err(hdev->dev,
558 "Device boot error - Stuck in Preboot\n");
559 break;
560 case CPU_BOOT_STATUS_IN_SPL:
561 dev_err(hdev->dev,
562 "Device boot error - Stuck in SPL\n");
563 break;
564 case CPU_BOOT_STATUS_IN_UBOOT:
565 dev_err(hdev->dev,
566 "Device boot error - Stuck in u-boot\n");
567 break;
568 case CPU_BOOT_STATUS_DRAM_INIT_FAIL:
569 dev_err(hdev->dev,
570 "Device boot error - DRAM initialization failed\n");
571 break;
572 case CPU_BOOT_STATUS_UBOOT_NOT_READY:
573 dev_err(hdev->dev,
574 "Device boot error - u-boot stopped by user\n");
575 break;
576 case CPU_BOOT_STATUS_TS_INIT_FAIL:
577 dev_err(hdev->dev,
578 "Device boot error - Thermal Sensor initialization failed\n");
579 break;
580 default:
581 dev_err(hdev->dev,
582 "Device boot error - Invalid status code %d\n",
583 status);
584 break;
585 }
586}
587
323b7267
OB
588int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
589 u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
590 u32 timeout)
57799ce9 591{
323b7267
OB
592 struct asic_fixed_properties *prop = &hdev->asic_prop;
593 u32 status, security_status;
57799ce9
OG
594 int rc;
595
596 if (!hdev->cpu_enable)
597 return 0;
598
599 /* Need to check two possible scenarios:
600 *
601 * CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT - for newer firmwares where
602 * the preboot is waiting for the boot fit
603 *
604 * All other status values - for older firmwares where the uboot was
605 * loaded from the FLASH
606 */
607 rc = hl_poll_timeout(
608 hdev,
609 cpu_boot_status_reg,
610 status,
611 (status == CPU_BOOT_STATUS_IN_UBOOT) ||
612 (status == CPU_BOOT_STATUS_DRAM_RDY) ||
613 (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
614 (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
615 (status == CPU_BOOT_STATUS_SRAM_AVAIL) ||
616 (status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT),
617 10000,
618 timeout);
619
620 if (rc) {
621 dev_err(hdev->dev, "Failed to read preboot version\n");
622 detect_cpu_boot_status(hdev, status);
323b7267
OB
623 fw_read_errors(hdev, boot_err0_reg,
624 cpu_security_boot_status_reg);
57799ce9
OG
625 return -EIO;
626 }
627
eb10b897 628 rc = hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_PREBOOT);
629 if (rc)
630 return rc;
57799ce9 631
323b7267
OB
632 security_status = RREG32(cpu_security_boot_status_reg);
633
634 /* We read security status multiple times during boot:
9c9013cb
OB
635 * 1. preboot - a. Check whether the security status bits are valid
636 * b. Check whether fw security is enabled
6bbb77b9
OB
637 * c. Check whether hard reset is done by preboot
638 * 2. boot cpu - a. Fetch boot cpu security status
639 * b. Check whether hard reset is done by boot cpu
640 * 3. FW application - a. Fetch fw application security status
641 * b. Check whether hard reset is done by fw app
323b7267
OB
642 *
643 * Preboot:
644 * Check security status bit (CPU_BOOT_DEV_STS0_ENABLED), if it is set
645 * check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN)
646 */
647 if (security_status & CPU_BOOT_DEV_STS0_ENABLED) {
6bbb77b9 648 prop->fw_security_status_valid = 1;
9c9013cb 649
6bbb77b9
OB
650 if (security_status & CPU_BOOT_DEV_STS0_SECURITY_EN)
651 prop->fw_security_disabled = false;
652 else
9c9013cb
OB
653 prop->fw_security_disabled = true;
654
655 if (security_status & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
6bbb77b9 656 prop->hard_reset_done_by_fw = true;
323b7267 657 } else {
6bbb77b9 658 prop->fw_security_status_valid = 0;
323b7267
OB
659 prop->fw_security_disabled = true;
660 }
661
6bbb77b9
OB
662 dev_dbg(hdev->dev, "Firmware preboot hard-reset is %s\n",
663 prop->hard_reset_done_by_fw ? "enabled" : "disabled");
9c9013cb 664
323b7267 665 dev_info(hdev->dev, "firmware-level security is %s\n",
6bbb77b9 666 prop->fw_security_disabled ? "disabled" : "enabled");
323b7267 667
57799ce9
OG
668 return 0;
669}
670
7e1c07dd 671int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
47f6b41c 672 u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
323b7267
OB
673 u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
674 bool skip_bmc, u32 cpu_timeout, u32 boot_fit_timeout)
7e1c07dd 675{
6bbb77b9 676 struct asic_fixed_properties *prop = &hdev->asic_prop;
7e1c07dd
OG
677 u32 status;
678 int rc;
679
596553db
OG
680 if (!(hdev->fw_loading & FW_TYPE_BOOT_CPU))
681 return 0;
682
7e1c07dd
OG
683 dev_info(hdev->dev, "Going to wait for device boot (up to %lds)\n",
684 cpu_timeout / USEC_PER_SEC);
685
47f6b41c
OB
686 /* Wait for boot FIT request */
687 rc = hl_poll_timeout(
688 hdev,
689 cpu_boot_status_reg,
690 status,
691 status == CPU_BOOT_STATUS_WAITING_FOR_BOOT_FIT,
692 10000,
693 boot_fit_timeout);
694
695 if (rc) {
696 dev_dbg(hdev->dev,
697 "No boot fit request received, resuming boot\n");
698 } else {
699 rc = hdev->asic_funcs->load_boot_fit_to_device(hdev);
700 if (rc)
701 goto out;
702
703 /* Clear device CPU message status */
704 WREG32(cpu_msg_status_reg, CPU_MSG_CLR);
705
706 /* Signal device CPU that boot loader is ready */
707 WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
708
709 /* Poll for CPU device ack */
710 rc = hl_poll_timeout(
711 hdev,
712 cpu_msg_status_reg,
713 status,
714 status == CPU_MSG_OK,
715 10000,
716 boot_fit_timeout);
717
718 if (rc) {
719 dev_err(hdev->dev,
720 "Timeout waiting for boot fit load ack\n");
721 goto out;
722 }
723
724 /* Clear message */
725 WREG32(msg_to_cpu_reg, KMD_MSG_NA);
726 }
727
7e1c07dd
OG
728 /* Make sure CPU boot-loader is running */
729 rc = hl_poll_timeout(
730 hdev,
731 cpu_boot_status_reg,
732 status,
733 (status == CPU_BOOT_STATUS_DRAM_RDY) ||
734 (status == CPU_BOOT_STATUS_NIC_FW_RDY) ||
735 (status == CPU_BOOT_STATUS_READY_TO_BOOT) ||
736 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
737 10000,
738 cpu_timeout);
739
596553db
OG
740 dev_dbg(hdev->dev, "uboot status = %d\n", status);
741
57799ce9 742 /* Read U-Boot version now in case we will later fail */
7e1c07dd 743 hdev->asic_funcs->read_device_fw_version(hdev, FW_COMP_UBOOT);
7e1c07dd 744
6bbb77b9
OB
745 /* Clear reset status since we need to read it again from boot CPU */
746 prop->hard_reset_done_by_fw = false;
747
323b7267 748 /* Read boot_cpu security bits */
6bbb77b9
OB
749 if (prop->fw_security_status_valid) {
750 prop->fw_boot_cpu_security_map =
323b7267
OB
751 RREG32(cpu_security_boot_status_reg);
752
6bbb77b9
OB
753 if (prop->fw_boot_cpu_security_map &
754 CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
755 prop->hard_reset_done_by_fw = true;
756 }
757
758 dev_dbg(hdev->dev, "Firmware boot CPU hard-reset is %s\n",
759 prop->hard_reset_done_by_fw ? "enabled" : "disabled");
760
7e1c07dd 761 if (rc) {
57799ce9 762 detect_cpu_boot_status(hdev, status);
7e1c07dd
OG
763 rc = -EIO;
764 goto out;
765 }
766
596553db
OG
767 if (!(hdev->fw_loading & FW_TYPE_LINUX)) {
768 dev_info(hdev->dev, "Skip loading Linux F/W\n");
7e1c07dd
OG
769 goto out;
770 }
771
772 if (status == CPU_BOOT_STATUS_SRAM_AVAIL)
773 goto out;
774
775 dev_info(hdev->dev,
776 "Loading firmware to device, may take some time...\n");
777
778 rc = hdev->asic_funcs->load_firmware_to_device(hdev);
779 if (rc)
780 goto out;
781
782 if (skip_bmc) {
783 WREG32(msg_to_cpu_reg, KMD_MSG_SKIP_BMC);
784
785 rc = hl_poll_timeout(
786 hdev,
787 cpu_boot_status_reg,
788 status,
789 (status == CPU_BOOT_STATUS_BMC_WAITING_SKIPPED),
790 10000,
791 cpu_timeout);
792
793 if (rc) {
794 dev_err(hdev->dev,
795 "Failed to get ACK on skipping BMC, %d\n",
796 status);
797 WREG32(msg_to_cpu_reg, KMD_MSG_NA);
798 rc = -EIO;
799 goto out;
800 }
801 }
802
803 WREG32(msg_to_cpu_reg, KMD_MSG_FIT_RDY);
804
805 rc = hl_poll_timeout(
806 hdev,
807 cpu_boot_status_reg,
808 status,
809 (status == CPU_BOOT_STATUS_SRAM_AVAIL),
810 10000,
811 cpu_timeout);
812
47f6b41c
OB
813 /* Clear message */
814 WREG32(msg_to_cpu_reg, KMD_MSG_NA);
815
7e1c07dd
OG
816 if (rc) {
817 if (status == CPU_BOOT_STATUS_FIT_CORRUPTED)
818 dev_err(hdev->dev,
819 "Device reports FIT image is corrupted\n");
820 else
821 dev_err(hdev->dev,
917b79b0
OS
822 "Failed to load firmware to device, %d\n",
823 status);
7e1c07dd 824
7e1c07dd
OG
825 rc = -EIO;
826 goto out;
827 }
828
6bbb77b9
OB
829 /* Clear reset status since we need to read again from app */
830 prop->hard_reset_done_by_fw = false;
831
323b7267 832 /* Read FW application security bits */
6bbb77b9
OB
833 if (prop->fw_security_status_valid) {
834 prop->fw_app_security_map =
323b7267
OB
835 RREG32(cpu_security_boot_status_reg);
836
6bbb77b9
OB
837 if (prop->fw_app_security_map &
838 CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
839 prop->hard_reset_done_by_fw = true;
840 }
841
842 dev_dbg(hdev->dev, "Firmware application CPU hard-reset is %s\n",
843 prop->hard_reset_done_by_fw ? "enabled" : "disabled");
844
7e1c07dd
OG
845 dev_info(hdev->dev, "Successfully loaded firmware to device\n");
846
847out:
323b7267 848 fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
7e1c07dd
OG
849
850 return rc;
851}