1 // SPDX-License-Identifier: GPL-2.0+
2 /* Copyright (c) 2016-2017 Hisilicon Limited. */
6 static const struct hclge_hw_error hclge_imp_tcm_ecc_int
[] = {
7 { .int_msk
= BIT(0), .msg
= "imp_itcm0_ecc_1bit_err" },
8 { .int_msk
= BIT(1), .msg
= "imp_itcm0_ecc_mbit_err" },
9 { .int_msk
= BIT(2), .msg
= "imp_itcm1_ecc_1bit_err" },
10 { .int_msk
= BIT(3), .msg
= "imp_itcm1_ecc_mbit_err" },
11 { .int_msk
= BIT(4), .msg
= "imp_itcm2_ecc_1bit_err" },
12 { .int_msk
= BIT(5), .msg
= "imp_itcm2_ecc_mbit_err" },
13 { .int_msk
= BIT(6), .msg
= "imp_itcm3_ecc_1bit_err" },
14 { .int_msk
= BIT(7), .msg
= "imp_itcm3_ecc_mbit_err" },
15 { .int_msk
= BIT(8), .msg
= "imp_dtcm0_mem0_ecc_1bit_err" },
16 { .int_msk
= BIT(9), .msg
= "imp_dtcm0_mem0_ecc_mbit_err" },
17 { .int_msk
= BIT(10), .msg
= "imp_dtcm0_mem1_ecc_1bit_err" },
18 { .int_msk
= BIT(11), .msg
= "imp_dtcm0_mem1_ecc_mbit_err" },
19 { .int_msk
= BIT(12), .msg
= "imp_dtcm1_mem0_ecc_1bit_err" },
20 { .int_msk
= BIT(13), .msg
= "imp_dtcm1_mem0_ecc_mbit_err" },
21 { .int_msk
= BIT(14), .msg
= "imp_dtcm1_mem1_ecc_1bit_err" },
22 { .int_msk
= BIT(15), .msg
= "imp_dtcm1_mem1_ecc_mbit_err" },
26 static const struct hclge_hw_error hclge_imp_itcm4_ecc_int
[] = {
27 { .int_msk
= BIT(0), .msg
= "imp_itcm4_ecc_1bit_err" },
28 { .int_msk
= BIT(1), .msg
= "imp_itcm4_ecc_mbit_err" },
32 static const struct hclge_hw_error hclge_cmdq_nic_mem_ecc_int
[] = {
33 { .int_msk
= BIT(0), .msg
= "cmdq_nic_rx_depth_ecc_1bit_err" },
34 { .int_msk
= BIT(1), .msg
= "cmdq_nic_rx_depth_ecc_mbit_err" },
35 { .int_msk
= BIT(2), .msg
= "cmdq_nic_tx_depth_ecc_1bit_err" },
36 { .int_msk
= BIT(3), .msg
= "cmdq_nic_tx_depth_ecc_mbit_err" },
37 { .int_msk
= BIT(4), .msg
= "cmdq_nic_rx_tail_ecc_1bit_err" },
38 { .int_msk
= BIT(5), .msg
= "cmdq_nic_rx_tail_ecc_mbit_err" },
39 { .int_msk
= BIT(6), .msg
= "cmdq_nic_tx_tail_ecc_1bit_err" },
40 { .int_msk
= BIT(7), .msg
= "cmdq_nic_tx_tail_ecc_mbit_err" },
41 { .int_msk
= BIT(8), .msg
= "cmdq_nic_rx_head_ecc_1bit_err" },
42 { .int_msk
= BIT(9), .msg
= "cmdq_nic_rx_head_ecc_mbit_err" },
43 { .int_msk
= BIT(10), .msg
= "cmdq_nic_tx_head_ecc_1bit_err" },
44 { .int_msk
= BIT(11), .msg
= "cmdq_nic_tx_head_ecc_mbit_err" },
45 { .int_msk
= BIT(12), .msg
= "cmdq_nic_rx_addr_ecc_1bit_err" },
46 { .int_msk
= BIT(13), .msg
= "cmdq_nic_rx_addr_ecc_mbit_err" },
47 { .int_msk
= BIT(14), .msg
= "cmdq_nic_tx_addr_ecc_1bit_err" },
48 { .int_msk
= BIT(15), .msg
= "cmdq_nic_tx_addr_ecc_mbit_err" },
52 static const struct hclge_hw_error hclge_cmdq_rocee_mem_ecc_int
[] = {
53 { .int_msk
= BIT(0), .msg
= "cmdq_rocee_rx_depth_ecc_1bit_err" },
54 { .int_msk
= BIT(1), .msg
= "cmdq_rocee_rx_depth_ecc_mbit_err" },
55 { .int_msk
= BIT(2), .msg
= "cmdq_rocee_tx_depth_ecc_1bit_err" },
56 { .int_msk
= BIT(3), .msg
= "cmdq_rocee_tx_depth_ecc_mbit_err" },
57 { .int_msk
= BIT(4), .msg
= "cmdq_rocee_rx_tail_ecc_1bit_err" },
58 { .int_msk
= BIT(5), .msg
= "cmdq_rocee_rx_tail_ecc_mbit_err" },
59 { .int_msk
= BIT(6), .msg
= "cmdq_rocee_tx_tail_ecc_1bit_err" },
60 { .int_msk
= BIT(7), .msg
= "cmdq_rocee_tx_tail_ecc_mbit_err" },
61 { .int_msk
= BIT(8), .msg
= "cmdq_rocee_rx_head_ecc_1bit_err" },
62 { .int_msk
= BIT(9), .msg
= "cmdq_rocee_rx_head_ecc_mbit_err" },
63 { .int_msk
= BIT(10), .msg
= "cmdq_rocee_tx_head_ecc_1bit_err" },
64 { .int_msk
= BIT(11), .msg
= "cmdq_rocee_tx_head_ecc_mbit_err" },
65 { .int_msk
= BIT(12), .msg
= "cmdq_rocee_rx_addr_ecc_1bit_err" },
66 { .int_msk
= BIT(13), .msg
= "cmdq_rocee_rx_addr_ecc_mbit_err" },
67 { .int_msk
= BIT(14), .msg
= "cmdq_rocee_tx_addr_ecc_1bit_err" },
68 { .int_msk
= BIT(15), .msg
= "cmdq_rocee_tx_addr_ecc_mbit_err" },
72 static const struct hclge_hw_error hclge_tqp_int_ecc_int
[] = {
73 { .int_msk
= BIT(0), .msg
= "tqp_int_cfg_even_ecc_1bit_err" },
74 { .int_msk
= BIT(1), .msg
= "tqp_int_cfg_odd_ecc_1bit_err" },
75 { .int_msk
= BIT(2), .msg
= "tqp_int_ctrl_even_ecc_1bit_err" },
76 { .int_msk
= BIT(3), .msg
= "tqp_int_ctrl_odd_ecc_1bit_err" },
77 { .int_msk
= BIT(4), .msg
= "tx_que_scan_int_ecc_1bit_err" },
78 { .int_msk
= BIT(5), .msg
= "rx_que_scan_int_ecc_1bit_err" },
79 { .int_msk
= BIT(6), .msg
= "tqp_int_cfg_even_ecc_mbit_err" },
80 { .int_msk
= BIT(7), .msg
= "tqp_int_cfg_odd_ecc_mbit_err" },
81 { .int_msk
= BIT(8), .msg
= "tqp_int_ctrl_even_ecc_mbit_err" },
82 { .int_msk
= BIT(9), .msg
= "tqp_int_ctrl_odd_ecc_mbit_err" },
83 { .int_msk
= BIT(10), .msg
= "tx_que_scan_int_ecc_mbit_err" },
84 { .int_msk
= BIT(11), .msg
= "rx_que_scan_int_ecc_mbit_err" },
88 static const struct hclge_hw_error hclge_igu_com_err_int
[] = {
89 { .int_msk
= BIT(0), .msg
= "igu_rx_buf0_ecc_mbit_err" },
90 { .int_msk
= BIT(1), .msg
= "igu_rx_buf0_ecc_1bit_err" },
91 { .int_msk
= BIT(2), .msg
= "igu_rx_buf1_ecc_mbit_err" },
92 { .int_msk
= BIT(3), .msg
= "igu_rx_buf1_ecc_1bit_err" },
96 static const struct hclge_hw_error hclge_igu_egu_tnl_err_int
[] = {
97 { .int_msk
= BIT(0), .msg
= "rx_buf_overflow" },
98 { .int_msk
= BIT(1), .msg
= "rx_stp_fifo_overflow" },
99 { .int_msk
= BIT(2), .msg
= "rx_stp_fifo_undeflow" },
100 { .int_msk
= BIT(3), .msg
= "tx_buf_overflow" },
101 { .int_msk
= BIT(4), .msg
= "tx_buf_underrun" },
102 { .int_msk
= BIT(5), .msg
= "rx_stp_buf_overflow" },
106 static const struct hclge_hw_error hclge_ncsi_err_int
[] = {
107 { .int_msk
= BIT(0), .msg
= "ncsi_tx_ecc_1bit_err" },
108 { .int_msk
= BIT(1), .msg
= "ncsi_tx_ecc_mbit_err" },
112 static void hclge_log_error(struct device
*dev
,
113 const struct hclge_hw_error
*err_list
,
116 const struct hclge_hw_error
*err
;
119 while (err_list
[i
].msg
) {
121 if (!(err
->int_msk
& err_sts
)) {
125 dev_warn(dev
, "%s [error status=0x%x] found\n",
131 /* hclge_cmd_query_error: read the error information
132 * @hdev: pointer to struct hclge_dev
133 * @desc: descriptor for describing the command
134 * @cmd: command opcode
135 * @flag: flag for extended command structure
136 * @w_num: offset for setting the read interrupt type.
137 * @int_type: select which type of the interrupt for which the error
138 * info will be read(RAS-CE/RAS-NFE/RAS-FE etc).
140 * This function query the error info from hw register/s using command
142 static int hclge_cmd_query_error(struct hclge_dev
*hdev
,
143 struct hclge_desc
*desc
, u32 cmd
,
145 enum hclge_err_int_type int_type
)
147 struct device
*dev
= &hdev
->pdev
->dev
;
151 hclge_cmd_setup_basic_desc(&desc
[0], cmd
, true);
153 desc
[0].flag
|= cpu_to_le16(flag
);
154 hclge_cmd_setup_basic_desc(&desc
[1], cmd
, true);
158 desc
[0].data
[w_num
] = cpu_to_le32(int_type
);
160 ret
= hclge_cmd_send(&hdev
->hw
, &desc
[0], num
);
162 dev_err(dev
, "query error cmd failed (%d)\n", ret
);
167 /* hclge_cmd_clear_error: clear the error status
168 * @hdev: pointer to struct hclge_dev
169 * @desc: descriptor for describing the command
170 * @desc_src: prefilled descriptor from the previous command for reusing
171 * @cmd: command opcode
172 * @flag: flag for extended command structure
174 * This function clear the error status in the hw register/s using command
176 static int hclge_cmd_clear_error(struct hclge_dev
*hdev
,
177 struct hclge_desc
*desc
,
178 struct hclge_desc
*desc_src
,
181 struct device
*dev
= &hdev
->pdev
->dev
;
186 hclge_cmd_setup_basic_desc(&desc
[0], cmd
, false);
188 desc
[0].flag
|= cpu_to_le16(flag
);
189 hclge_cmd_setup_basic_desc(&desc
[1], cmd
, false);
193 for (i
= 0; i
< 6; i
++) {
194 desc
[0].data
[i
] = desc_src
[0].data
[i
];
196 desc
[1].data
[i
] = desc_src
[1].data
[i
];
200 hclge_cmd_reuse_desc(&desc
[0], false);
202 desc
[0].flag
|= cpu_to_le16(flag
);
203 hclge_cmd_reuse_desc(&desc
[1], false);
207 ret
= hclge_cmd_send(&hdev
->hw
, &desc
[0], num
);
209 dev_err(dev
, "clear error cmd failed (%d)\n", ret
);
214 static int hclge_enable_common_error(struct hclge_dev
*hdev
, bool en
)
216 struct device
*dev
= &hdev
->pdev
->dev
;
217 struct hclge_desc desc
[2];
220 hclge_cmd_setup_basic_desc(&desc
[0], HCLGE_COMMON_ECC_INT_CFG
, false);
221 desc
[0].flag
|= cpu_to_le16(HCLGE_CMD_FLAG_NEXT
);
222 hclge_cmd_setup_basic_desc(&desc
[1], HCLGE_COMMON_ECC_INT_CFG
, false);
225 /* enable COMMON error interrupts */
226 desc
[0].data
[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN
);
227 desc
[0].data
[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN
|
228 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN
);
229 desc
[0].data
[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN
);
230 desc
[0].data
[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN
);
231 desc
[0].data
[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN
);
233 /* disable COMMON error interrupts */
240 desc
[1].data
[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_ERR_INT_EN_MASK
);
241 desc
[1].data
[2] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_ERR_INT_EN_MASK
|
242 HCLGE_CMDQ_ROCEE_ECC_ERR_INT_EN_MASK
);
243 desc
[1].data
[3] = cpu_to_le32(HCLGE_IMP_RD_POISON_ERR_INT_EN_MASK
);
244 desc
[1].data
[4] = cpu_to_le32(HCLGE_TQP_ECC_ERR_INT_EN_MASK
);
245 desc
[1].data
[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_ERR_INT_EN_MASK
);
247 ret
= hclge_cmd_send(&hdev
->hw
, &desc
[0], 2);
250 "failed(%d) to enable/disable COMMON err interrupts\n",
256 static int hclge_enable_ncsi_error(struct hclge_dev
*hdev
, bool en
)
258 struct device
*dev
= &hdev
->pdev
->dev
;
259 struct hclge_desc desc
;
262 if (hdev
->pdev
->revision
< 0x21)
265 /* enable/disable NCSI error interrupts */
266 hclge_cmd_setup_basic_desc(&desc
, HCLGE_NCSI_INT_EN
, false);
268 desc
.data
[0] = cpu_to_le32(HCLGE_NCSI_ERR_INT_EN
);
272 ret
= hclge_cmd_send(&hdev
->hw
, &desc
, 1);
275 "failed(%d) to enable/disable NCSI error interrupts\n",
281 static int hclge_enable_igu_egu_error(struct hclge_dev
*hdev
, bool en
)
283 struct device
*dev
= &hdev
->pdev
->dev
;
284 struct hclge_desc desc
;
287 /* enable/disable error interrupts */
288 hclge_cmd_setup_basic_desc(&desc
, HCLGE_IGU_COMMON_INT_EN
, false);
290 desc
.data
[0] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN
);
293 desc
.data
[1] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN_MASK
);
295 ret
= hclge_cmd_send(&hdev
->hw
, &desc
, 1);
298 "failed(%d) to enable/disable IGU common interrupts\n",
303 hclge_cmd_setup_basic_desc(&desc
, HCLGE_IGU_EGU_TNL_INT_EN
, false);
305 desc
.data
[0] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN
);
308 desc
.data
[1] = cpu_to_le32(HCLGE_IGU_TNL_ERR_INT_EN_MASK
);
310 ret
= hclge_cmd_send(&hdev
->hw
, &desc
, 1);
313 "failed(%d) to enable/disable IGU-EGU TNL interrupts\n",
318 ret
= hclge_enable_ncsi_error(hdev
, en
);
320 dev_err(dev
, "fail(%d) to en/disable err int\n", ret
);
325 static void hclge_process_common_error(struct hclge_dev
*hdev
,
326 enum hclge_err_int_type type
)
328 struct device
*dev
= &hdev
->pdev
->dev
;
329 struct hclge_desc desc
[2];
334 ret
= hclge_cmd_query_error(hdev
, &desc
[0],
335 HCLGE_COMMON_ECC_INT_CFG
,
336 HCLGE_CMD_FLAG_NEXT
, 0, 0);
339 "failed(=%d) to query COMMON error interrupt status\n",
345 err_sts
= (le32_to_cpu(desc
[0].data
[0])) & HCLGE_IMP_TCM_ECC_INT_MASK
;
346 hclge_log_error(dev
, &hclge_imp_tcm_ecc_int
[0], err_sts
);
348 err_sts
= (le32_to_cpu(desc
[0].data
[1])) & HCLGE_CMDQ_ECC_INT_MASK
;
349 hclge_log_error(dev
, &hclge_cmdq_nic_mem_ecc_int
[0], err_sts
);
351 err_sts
= (le32_to_cpu(desc
[0].data
[1]) >> HCLGE_CMDQ_ROC_ECC_INT_SHIFT
)
352 & HCLGE_CMDQ_ECC_INT_MASK
;
353 hclge_log_error(dev
, &hclge_cmdq_rocee_mem_ecc_int
[0], err_sts
);
355 if ((le32_to_cpu(desc
[0].data
[3])) & BIT(0))
356 dev_warn(dev
, "imp_rd_data_poison_err found\n");
358 err_sts
= (le32_to_cpu(desc
[0].data
[3]) >> HCLGE_TQP_ECC_INT_SHIFT
) &
359 HCLGE_TQP_ECC_INT_MASK
;
360 hclge_log_error(dev
, &hclge_tqp_int_ecc_int
[0], err_sts
);
362 err_sts
= (le32_to_cpu(desc
[0].data
[5])) &
363 HCLGE_IMP_ITCM4_ECC_INT_MASK
;
364 hclge_log_error(dev
, &hclge_imp_itcm4_ecc_int
[0], err_sts
);
366 /* clear error interrupts */
367 desc
[1].data
[0] = cpu_to_le32(HCLGE_IMP_TCM_ECC_CLR_MASK
);
368 desc
[1].data
[1] = cpu_to_le32(HCLGE_CMDQ_NIC_ECC_CLR_MASK
|
369 HCLGE_CMDQ_ROCEE_ECC_CLR_MASK
);
370 desc
[1].data
[3] = cpu_to_le32(HCLGE_TQP_IMP_ERR_CLR_MASK
);
371 desc
[1].data
[5] = cpu_to_le32(HCLGE_IMP_ITCM4_ECC_CLR_MASK
);
373 ret
= hclge_cmd_clear_error(hdev
, &desc
[0], NULL
, 0,
374 HCLGE_CMD_FLAG_NEXT
);
377 "failed(%d) to clear COMMON error interrupt status\n",
381 static void hclge_process_ncsi_error(struct hclge_dev
*hdev
,
382 enum hclge_err_int_type type
)
384 struct device
*dev
= &hdev
->pdev
->dev
;
385 struct hclge_desc desc_rd
;
386 struct hclge_desc desc_wr
;
390 if (hdev
->pdev
->revision
< 0x21)
393 /* read NCSI error status */
394 ret
= hclge_cmd_query_error(hdev
, &desc_rd
, HCLGE_NCSI_INT_QUERY
,
395 0, 1, HCLGE_NCSI_ERR_INT_TYPE
);
398 "failed(=%d) to query NCSI error interrupt status\n",
404 err_sts
= le32_to_cpu(desc_rd
.data
[0]);
405 hclge_log_error(dev
, &hclge_ncsi_err_int
[0], err_sts
);
408 ret
= hclge_cmd_clear_error(hdev
, &desc_wr
, &desc_rd
,
409 HCLGE_NCSI_INT_CLR
, 0);
411 dev_err(dev
, "failed(=%d) to clear NCSI intrerrupt status\n",
415 static void hclge_process_igu_egu_error(struct hclge_dev
*hdev
,
416 enum hclge_err_int_type int_type
)
418 struct device
*dev
= &hdev
->pdev
->dev
;
419 struct hclge_desc desc_rd
;
420 struct hclge_desc desc_wr
;
424 /* read IGU common err sts */
425 ret
= hclge_cmd_query_error(hdev
, &desc_rd
,
426 HCLGE_IGU_COMMON_INT_QUERY
,
429 dev_err(dev
, "failed(=%d) to query IGU common int status\n",
435 err_sts
= le32_to_cpu(desc_rd
.data
[0]) &
436 HCLGE_IGU_COM_INT_MASK
;
437 hclge_log_error(dev
, &hclge_igu_com_err_int
[0], err_sts
);
440 ret
= hclge_cmd_clear_error(hdev
, &desc_wr
, &desc_rd
,
441 HCLGE_IGU_COMMON_INT_CLR
, 0);
443 dev_err(dev
, "failed(=%d) to clear IGU common int status\n",
448 /* read IGU-EGU TNL err sts */
449 ret
= hclge_cmd_query_error(hdev
, &desc_rd
,
450 HCLGE_IGU_EGU_TNL_INT_QUERY
,
453 dev_err(dev
, "failed(=%d) to query IGU-EGU TNL int status\n",
459 err_sts
= le32_to_cpu(desc_rd
.data
[0]) &
460 HCLGE_IGU_EGU_TNL_INT_MASK
;
461 hclge_log_error(dev
, &hclge_igu_egu_tnl_err_int
[0], err_sts
);
464 ret
= hclge_cmd_clear_error(hdev
, &desc_wr
, &desc_rd
,
465 HCLGE_IGU_EGU_TNL_INT_CLR
, 0);
467 dev_err(dev
, "failed(=%d) to clear IGU-EGU TNL int status\n",
472 hclge_process_ncsi_error(hdev
, HCLGE_ERR_INT_RAS_NFE
);
475 static const struct hclge_hw_blk hw_blk
[] = {
476 { .msk
= BIT(0), .name
= "IGU_EGU",
477 .enable_error
= hclge_enable_igu_egu_error
,
478 .process_error
= hclge_process_igu_egu_error
, },
479 { .msk
= BIT(5), .name
= "COMMON",
480 .enable_error
= hclge_enable_common_error
,
481 .process_error
= hclge_process_common_error
, },
485 int hclge_hw_error_set_state(struct hclge_dev
*hdev
, bool state
)
487 struct device
*dev
= &hdev
->pdev
->dev
;
491 while (hw_blk
[i
].name
) {
492 if (!hw_blk
[i
].enable_error
) {
496 ret
= hw_blk
[i
].enable_error(hdev
, state
);
498 dev_err(dev
, "fail(%d) to en/disable err int\n", ret
);
507 pci_ers_result_t
hclge_process_ras_hw_error(struct hnae3_ae_dev
*ae_dev
)
509 struct hclge_dev
*hdev
= ae_dev
->priv
;
510 struct device
*dev
= &hdev
->pdev
->dev
;
514 sts
= hclge_read_dev(&hdev
->hw
, HCLGE_RAS_PF_OTHER_INT_STS_REG
);
516 /* Processing Non-fatal errors */
517 if (sts
& HCLGE_RAS_REG_NFE_MASK
) {
518 val
= (sts
>> HCLGE_RAS_REG_NFE_SHIFT
) & 0xFF;
520 while (hw_blk
[i
].name
) {
521 if (!(hw_blk
[i
].msk
& val
)) {
525 dev_warn(dev
, "%s ras non-fatal error identified\n",
527 if (hw_blk
[i
].process_error
)
528 hw_blk
[i
].process_error(hdev
,
529 HCLGE_ERR_INT_RAS_NFE
);
534 return PCI_ERS_RESULT_NEED_RESET
;