]> git.proxmox.com Git - mirror_ubuntu-kernels.git/commitdiff
drm/amdgpu: correct query xgmi3x16 pcs error status
authorStanley.Yang <Stanley.Yang@amd.com>
Mon, 16 Jan 2023 07:42:36 +0000 (15:42 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 17 Jan 2023 21:11:52 +0000 (16:11 -0500)
There is xgmi3x16 pcs error status for aldebaran, driver should
check xgmi3x16 pcs error status field instead of gopx16 pcs error
status field.

Signed-off-by: Stanley.Yang <Stanley.Yang@amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
drivers/gpu/drm/amd/include/asic_reg/xgmi/xgmi_6_1_0_sh_mask.h [new file with mode: 0644]

index 922b0e19ae6660fac5a314c846fddb14b63bd704..4340d08f7607374f5c2a478c3f2d3ee87d3dcf70 100644 (file)
@@ -29,6 +29,7 @@
 #include "df/df_3_6_offset.h"
 #include "xgmi/xgmi_4_0_0_smn.h"
 #include "xgmi/xgmi_4_0_0_sh_mask.h"
+#include "xgmi/xgmi_6_1_0_sh_mask.h"
 #include "wafl/wafl2_4_0_0_smn.h"
 #include "wafl/wafl2_4_0_0_sh_mask.h"
 
@@ -180,6 +181,67 @@ static const struct amdgpu_pcs_ras_field wafl_pcs_ras_fields[] = {
         SOC15_REG_FIELD(PCS_GOPX1_0_PCS_GOPX1_PCS_ERROR_STATUS, RecoveryRelockAttemptErr)},
 };
 
+static const struct amdgpu_pcs_ras_field xgmi3x16_pcs_ras_fields[] = {
+       {"XGMI3X16 PCS DataLossErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DataLossErr)},
+       {"XGMI3X16 PCS TrainingErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, TrainingErr)},
+       {"XGMI3X16 PCS FlowCtrlAckErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, FlowCtrlAckErr)},
+       {"XGMI3X16 PCS RxFifoUnderflowErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxFifoUnderflowErr)},
+       {"XGMI3X16 PCS RxFifoOverflowErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxFifoOverflowErr)},
+       {"XGMI3X16 PCS CRCErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, CRCErr)},
+       {"XGMI3X16 PCS BERExceededErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, BERExceededErr)},
+       {"XGMI3X16 PCS TxVcidDataErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, TxVcidDataErr)},
+       {"XGMI3X16 PCS ReplayBufParityErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayBufParityErr)},
+       {"XGMI3X16 PCS DataParityErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DataParityErr)},
+       {"XGMI3X16 PCS ReplayFifoOverflowErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayFifoOverflowErr)},
+       {"XGMI3X16 PCS ReplayFifoUnderflowErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayFifoUnderflowErr)},
+       {"XGMI3X16 PCS ElasticFifoOverflowErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ElasticFifoOverflowErr)},
+       {"XGMI3X16 PCS DeskewErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DeskewErr)},
+       {"XGMI3X16 PCS FlowCtrlCRCErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, FlowCtrlCRCErr)},
+       {"XGMI3X16 PCS DataStartupLimitErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, DataStartupLimitErr)},
+       {"XGMI3X16 PCS FCInitTimeoutErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, FCInitTimeoutErr)},
+       {"XGMI3X16 PCS RecoveryTimeoutErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RecoveryTimeoutErr)},
+       {"XGMI3X16 PCS ReadySerialTimeoutErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReadySerialTimeoutErr)},
+       {"XGMI3X16 PCS ReadySerialAttemptErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReadySerialAttemptErr)},
+       {"XGMI3X16 PCS RecoveryAttemptErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RecoveryAttemptErr)},
+       {"XGMI3X16 PCS RecoveryRelockAttemptErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RecoveryRelockAttemptErr)},
+       {"XGMI3X16 PCS ReplayAttemptErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, ReplayAttemptErr)},
+       {"XGMI3X16 PCS SyncHdrErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, SyncHdrErr)},
+       {"XGMI3X16 PCS TxReplayTimeoutErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, TxReplayTimeoutErr)},
+       {"XGMI3X16 PCS RxReplayTimeoutErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxReplayTimeoutErr)},
+       {"XGMI3X16 PCS LinkSubTxTimeoutErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, LinkSubTxTimeoutErr)},
+       {"XGMI3X16 PCS LinkSubRxTimeoutErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, LinkSubRxTimeoutErr)},
+       {"XGMI3X16 PCS RxCMDPktErr",
+        SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxCMDPktErr)},
+};
+
 /**
  * DOC: AMDGPU XGMI Support
  *
@@ -835,13 +897,17 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
 {
        int i;
        int ue_cnt = 0;
-       int mask_bit_value = 0;
        const struct amdgpu_pcs_ras_field *pcs_ras_fields = NULL;
        uint32_t field_array_size = 0;
 
        if (is_xgmi_pcs) {
-               pcs_ras_fields = &xgmi_pcs_ras_fields[0];
-               field_array_size = ARRAY_SIZE(xgmi_pcs_ras_fields);
+               if (adev->ip_versions[XGMI_HWIP][0] == IP_VERSION(6, 1, 0)) {
+                       pcs_ras_fields = &xgmi3x16_pcs_ras_fields[0];
+                       field_array_size = ARRAY_SIZE(xgmi3x16_pcs_ras_fields);
+               } else {
+                       pcs_ras_fields = &xgmi_pcs_ras_fields[0];
+                       field_array_size = ARRAY_SIZE(xgmi_pcs_ras_fields);
+               }
        } else {
                pcs_ras_fields = &wafl_pcs_ras_fields[0];
                field_array_size = ARRAY_SIZE(wafl_pcs_ras_fields);
diff --git a/drivers/gpu/drm/amd/include/asic_reg/xgmi/xgmi_6_1_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/xgmi/xgmi_6_1_0_sh_mask.h
new file mode 100644 (file)
index 0000000..c6c0cf1
--- /dev/null
@@ -0,0 +1,87 @@
+/*
+ * Copyright 2023 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _xgmi_6_1_0_SH_MASK_HEADER
+#define _xgmi_6_1_0_SH_MASK_HEADER
+
+//PCS_XGMI3X16_PCS_ERROR_STATUS
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__DataLossErr__SHIFT                                    0x0
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__TrainingErr__SHIFT                                    0x1
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__FlowCtrlAckErr__SHIFT                                 0x2
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__RxFifoUnderflowErr__SHIFT                             0x3
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__RxFifoOverflowErr__SHIFT                              0x4
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__CRCErr__SHIFT                                         0x5
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__BERExceededErr__SHIFT                                 0x6
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__TxVcidDataErr__SHIFT                                  0x7
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__ReplayBufParityErr__SHIFT                             0x8
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__DataParityErr__SHIFT                                  0x9
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__ReplayFifoOverflowErr__SHIFT                          0xa
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__ReplayFifoUnderflowErr__SHIFT                         0xb
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__ElasticFifoOverflowErr__SHIFT                         0xc
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__DeskewErr__SHIFT                                      0xd
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__FlowCtrlCRCErr__SHIFT                                 0xe
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__DataStartupLimitErr__SHIFT                            0xf
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__FCInitTimeoutErr__SHIFT                               0x10
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__RecoveryTimeoutErr__SHIFT                             0x11
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__ReadySerialTimeoutErr__SHIFT                          0x12
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__ReadySerialAttemptErr__SHIFT                          0x13
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__RecoveryAttemptErr__SHIFT                             0x14
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__RecoveryRelockAttemptErr__SHIFT                       0x15
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__ReplayAttemptErr__SHIFT                               0x16
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__SyncHdrErr__SHIFT                                     0x17
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__TxReplayTimeoutErr__SHIFT                             0x18
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__RxReplayTimeoutErr__SHIFT                             0x19
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__LinkSubTxTimeoutErr__SHIFT                            0x1a
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__LinkSubRxTimeoutErr__SHIFT                            0x1b
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__RxCMDPktErr__SHIFT                                    0x1c
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__DataLossErr_MASK                                      0x00000001L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__TrainingErr_MASK                                      0x00000002L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__FlowCtrlAckErr_MASK                                   0x00000004L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__RxFifoUnderflowErr_MASK                               0x00000008L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__RxFifoOverflowErr_MASK                                0x00000010L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__CRCErr_MASK                                           0x00000020L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__BERExceededErr_MASK                                   0x00000040L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__TxVcidDataErr_MASK                                    0x00000080L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__ReplayBufParityErr_MASK                               0x00000100L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__DataParityErr_MASK                                    0x00000200L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__ReplayFifoOverflowErr_MASK                            0x00000400L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__ReplayFifoUnderflowErr_MASK                           0x00000800L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__ElasticFifoOverflowErr_MASK                           0x00001000L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__DeskewErr_MASK                                        0x00002000L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__FlowCtrlCRCErr_MASK                                   0x00004000L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__DataStartupLimitErr_MASK                              0x00008000L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__FCInitTimeoutErr_MASK                                 0x00010000L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__RecoveryTimeoutErr_MASK                               0x00020000L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__ReadySerialTimeoutErr_MASK                            0x00040000L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__ReadySerialAttemptErr_MASK                            0x00080000L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__RecoveryAttemptErr_MASK                               0x00100000L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__RecoveryRelockAttemptErr_MASK                         0x00200000L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__ReplayAttemptErr_MASK                                 0x00400000L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__SyncHdrErr_MASK                                       0x00800000L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__TxReplayTimeoutErr_MASK                               0x01000000L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__RxReplayTimeoutErr_MASK                               0x02000000L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__LinkSubTxTimeoutErr_MASK                              0x04000000L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__LinkSubRxTimeoutErr_MASK                              0x08000000L
+#define PCS_XGMI3X16_PCS_ERROR_STATUS__RxCMDPktErr_MASK                                      0x10000000L
+
+#endif