]>
Commit | Line | Data |
---|---|---|
9884c2b1 HZ |
1 | /* |
2 | * Copyright 2019 Advanced Micro Devices, Inc. | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice shall be included in | |
12 | * all copies or substantial portions of the Software. | |
13 | * | |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR | |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
20 | * OTHER DEALINGS IN THE SOFTWARE. | |
21 | * | |
22 | */ | |
23 | #include "umc_v6_1.h" | |
24 | #include "amdgpu_ras.h" | |
25 | #include "amdgpu.h" | |
26 | ||
27 | #include "rsmu/rsmu_0_0_2_offset.h" | |
28 | #include "rsmu/rsmu_0_0_2_sh_mask.h" | |
29 | #include "umc/umc_6_1_1_offset.h" | |
30 | #include "umc/umc_6_1_1_sh_mask.h" | |
fb71a336 | 31 | #include "umc/umc_6_1_2_offset.h" |
9884c2b1 | 32 | |
bd68fb94 JC |
33 | #define UMC_6_INST_DIST 0x40000 |
34 | ||
8c948103 TZ |
35 | /* |
36 | * (addr / 256) * 8192, the higher 26 bits in ErrorAddr | |
37 | * is the index of 8KB block | |
38 | */ | |
bd68fb94 | 39 | #define ADDR_OF_8KB_BLOCK(addr) (((addr) & ~0xffULL) << 5) |
8c948103 TZ |
40 | /* channel index is the index of 256B block */ |
41 | #define ADDR_OF_256B_BLOCK(channel_index) ((channel_index) << 8) | |
42 | /* offset in 256B block */ | |
43 | #define OFFSET_IN_256B_BLOCK(addr) ((addr) & 0xffULL) | |
44 | ||
c8aa6ae3 JC |
45 | #define LOOP_UMC_INST(umc_inst) for ((umc_inst) = 0; (umc_inst) < adev->umc.umc_inst_num; (umc_inst)++) |
46 | #define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++) | |
47 | #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst)) | |
48 | ||
3aacf4ea | 49 | const uint32_t |
c2742aef TZ |
50 | umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM] = { |
51 | {2, 18, 11, 27}, {4, 20, 13, 29}, | |
52 | {1, 17, 8, 24}, {7, 23, 14, 30}, | |
53 | {10, 26, 3, 19}, {12, 28, 5, 21}, | |
54 | {9, 25, 0, 16}, {15, 31, 6, 22} | |
55 | }; | |
56 | ||
eee2eaba JC |
57 | static void umc_v6_1_enable_umc_index_mode(struct amdgpu_device *adev) |
58 | { | |
40e73314 GC |
59 | uint32_t rsmu_umc_addr, rsmu_umc_val; |
60 | ||
61 | rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0, | |
62 | mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); | |
63 | rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4); | |
64 | ||
65 | rsmu_umc_val = REG_SET_FIELD(rsmu_umc_val, | |
66 | RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, | |
eee2eaba | 67 | RSMU_UMC_INDEX_MODE_EN, 1); |
40e73314 GC |
68 | |
69 | WREG32_PCIE(rsmu_umc_addr * 4, rsmu_umc_val); | |
eee2eaba JC |
70 | } |
71 | ||
0ee51f1d JC |
72 | static void umc_v6_1_disable_umc_index_mode(struct amdgpu_device *adev) |
73 | { | |
40e73314 GC |
74 | uint32_t rsmu_umc_addr, rsmu_umc_val; |
75 | ||
76 | rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0, | |
77 | mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); | |
78 | rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4); | |
79 | ||
80 | rsmu_umc_val = REG_SET_FIELD(rsmu_umc_val, | |
81 | RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, | |
0ee51f1d | 82 | RSMU_UMC_INDEX_MODE_EN, 0); |
40e73314 GC |
83 | |
84 | WREG32_PCIE(rsmu_umc_addr * 4, rsmu_umc_val); | |
0ee51f1d JC |
85 | } |
86 | ||
eee2eaba JC |
87 | static uint32_t umc_v6_1_get_umc_index_mode_state(struct amdgpu_device *adev) |
88 | { | |
40e73314 | 89 | uint32_t rsmu_umc_addr, rsmu_umc_val; |
eee2eaba | 90 | |
40e73314 | 91 | rsmu_umc_addr = SOC15_REG_OFFSET(RSMU, 0, |
eee2eaba | 92 | mmRSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU); |
40e73314 | 93 | rsmu_umc_val = RREG32_PCIE(rsmu_umc_addr * 4); |
eee2eaba | 94 | |
40e73314 | 95 | return REG_GET_FIELD(rsmu_umc_val, |
eee2eaba JC |
96 | RSMU_UMC_INDEX_REGISTER_NBIF_VG20_GPU, |
97 | RSMU_UMC_INDEX_MODE_EN); | |
98 | } | |
99 | ||
bd68fb94 JC |
100 | static inline uint32_t get_umc_6_reg_offset(struct amdgpu_device *adev, |
101 | uint32_t umc_inst, | |
102 | uint32_t ch_inst) | |
87d2b92f | 103 | { |
bd68fb94 | 104 | return adev->umc.channel_offs*ch_inst + UMC_6_INST_DIST*umc_inst; |
87d2b92f TZ |
105 | } |
106 | ||
fd90456c GC |
107 | static void umc_v6_1_clear_error_count_per_channel(struct amdgpu_device *adev, |
108 | uint32_t umc_reg_offset) | |
109 | { | |
110 | uint32_t ecc_err_cnt_addr; | |
111 | uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; | |
112 | ||
113 | if (adev->asic_type == CHIP_ARCTURUS) { | |
114 | /* UMC 6_1_2 registers */ | |
115 | ecc_err_cnt_sel_addr = | |
116 | SOC15_REG_OFFSET(UMC, 0, | |
117 | mmUMCCH0_0_EccErrCntSel_ARCT); | |
118 | ecc_err_cnt_addr = | |
119 | SOC15_REG_OFFSET(UMC, 0, | |
120 | mmUMCCH0_0_EccErrCnt_ARCT); | |
121 | } else { | |
122 | /* UMC 6_1_1 registers */ | |
123 | ecc_err_cnt_sel_addr = | |
124 | SOC15_REG_OFFSET(UMC, 0, | |
125 | mmUMCCH0_0_EccErrCntSel); | |
126 | ecc_err_cnt_addr = | |
127 | SOC15_REG_OFFSET(UMC, 0, | |
128 | mmUMCCH0_0_EccErrCnt); | |
129 | } | |
130 | ||
131 | /* select the lower chip */ | |
132 | ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + | |
133 | umc_reg_offset) * 4); | |
134 | ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, | |
135 | UMCCH0_0_EccErrCntSel, | |
136 | EccErrCntCsSel, 0); | |
137 | WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, | |
138 | ecc_err_cnt_sel); | |
139 | ||
140 | /* clear lower chip error count */ | |
141 | WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, | |
142 | UMC_V6_1_CE_CNT_INIT); | |
143 | ||
144 | /* select the higher chip */ | |
145 | ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + | |
146 | umc_reg_offset) * 4); | |
147 | ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, | |
148 | UMCCH0_0_EccErrCntSel, | |
149 | EccErrCntCsSel, 1); | |
150 | WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, | |
151 | ecc_err_cnt_sel); | |
152 | ||
153 | /* clear higher chip error count */ | |
154 | WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, | |
155 | UMC_V6_1_CE_CNT_INIT); | |
156 | } | |
157 | ||
158 | static void umc_v6_1_clear_error_count(struct amdgpu_device *adev) | |
159 | { | |
160 | uint32_t umc_inst = 0; | |
161 | uint32_t ch_inst = 0; | |
162 | uint32_t umc_reg_offset = 0; | |
163 | uint32_t rsmu_umc_index_state = | |
164 | umc_v6_1_get_umc_index_mode_state(adev); | |
165 | ||
166 | if (rsmu_umc_index_state) | |
167 | umc_v6_1_disable_umc_index_mode(adev); | |
168 | ||
169 | LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { | |
170 | umc_reg_offset = get_umc_6_reg_offset(adev, | |
171 | umc_inst, | |
172 | ch_inst); | |
173 | ||
174 | umc_v6_1_clear_error_count_per_channel(adev, | |
175 | umc_reg_offset); | |
176 | } | |
177 | ||
178 | if (rsmu_umc_index_state) | |
179 | umc_v6_1_enable_umc_index_mode(adev); | |
180 | } | |
181 | ||
9884c2b1 HZ |
182 | static void umc_v6_1_query_correctable_error_count(struct amdgpu_device *adev, |
183 | uint32_t umc_reg_offset, | |
184 | unsigned long *error_count) | |
185 | { | |
186 | uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; | |
187 | uint32_t ecc_err_cnt, ecc_err_cnt_addr; | |
188 | uint64_t mc_umc_status; | |
189 | uint32_t mc_umc_status_addr; | |
190 | ||
4cf781c2 JC |
191 | if (adev->asic_type == CHIP_ARCTURUS) { |
192 | /* UMC 6_1_2 registers */ | |
4cf781c2 JC |
193 | ecc_err_cnt_sel_addr = |
194 | SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT); | |
195 | ecc_err_cnt_addr = | |
196 | SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT); | |
197 | mc_umc_status_addr = | |
198 | SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT); | |
199 | } else { | |
200 | /* UMC 6_1_1 registers */ | |
4cf781c2 JC |
201 | ecc_err_cnt_sel_addr = |
202 | SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); | |
203 | ecc_err_cnt_addr = | |
204 | SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); | |
205 | mc_umc_status_addr = | |
206 | SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); | |
207 | } | |
9884c2b1 HZ |
208 | |
209 | /* select the lower chip and check the error count */ | |
0ee51f1d | 210 | ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4); |
9884c2b1 HZ |
211 | ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, |
212 | EccErrCntCsSel, 0); | |
0ee51f1d | 213 | WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); |
fd90456c | 214 | |
0ee51f1d | 215 | ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); |
9884c2b1 | 216 | *error_count += |
b1a58953 TZ |
217 | (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - |
218 | UMC_V6_1_CE_CNT_INIT); | |
9884c2b1 HZ |
219 | |
220 | /* select the higher chip and check the err counter */ | |
221 | ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, | |
222 | EccErrCntCsSel, 1); | |
0ee51f1d | 223 | WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); |
fd90456c | 224 | |
0ee51f1d | 225 | ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); |
9884c2b1 | 226 | *error_count += |
b1a58953 TZ |
227 | (REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_EccErrCnt, EccErrCnt) - |
228 | UMC_V6_1_CE_CNT_INIT); | |
9884c2b1 HZ |
229 | |
230 | /* check for SRAM correctable error | |
231 | MCUMC_STATUS is a 64 bit register */ | |
955c7120 | 232 | mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); |
9884c2b1 HZ |
233 | if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, ErrorCodeExt) == 6 && |
234 | REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && | |
235 | REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1) | |
236 | *error_count += 1; | |
9884c2b1 HZ |
237 | } |
238 | ||
239 | static void umc_v6_1_querry_uncorrectable_error_count(struct amdgpu_device *adev, | |
240 | uint32_t umc_reg_offset, | |
241 | unsigned long *error_count) | |
242 | { | |
243 | uint64_t mc_umc_status; | |
244 | uint32_t mc_umc_status_addr; | |
245 | ||
4cf781c2 JC |
246 | if (adev->asic_type == CHIP_ARCTURUS) { |
247 | /* UMC 6_1_2 registers */ | |
4cf781c2 JC |
248 | mc_umc_status_addr = |
249 | SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT); | |
250 | } else { | |
251 | /* UMC 6_1_1 registers */ | |
4cf781c2 JC |
252 | mc_umc_status_addr = |
253 | SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); | |
254 | } | |
9884c2b1 HZ |
255 | |
256 | /* check the MCUMC_STATUS */ | |
955c7120 | 257 | mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); |
f1ed4afa TZ |
258 | if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) && |
259 | (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 || | |
260 | REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || | |
261 | REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 || | |
262 | REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 || | |
263 | REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1)) | |
9884c2b1 | 264 | *error_count += 1; |
9884c2b1 HZ |
265 | } |
266 | ||
267 | static void umc_v6_1_query_ras_error_count(struct amdgpu_device *adev, | |
268 | void *ras_error_status) | |
269 | { | |
bd68fb94 JC |
270 | struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status; |
271 | ||
272 | uint32_t umc_inst = 0; | |
273 | uint32_t ch_inst = 0; | |
274 | uint32_t umc_reg_offset = 0; | |
275 | ||
eee2eaba JC |
276 | uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev); |
277 | ||
278 | if (rsmu_umc_index_state) | |
279 | umc_v6_1_disable_umc_index_mode(adev); | |
280 | ||
d38c3ac7 GC |
281 | if ((adev->asic_type == CHIP_ARCTURUS) && |
282 | amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) | |
283 | DRM_WARN("Fail to disable DF-Cstate.\n"); | |
284 | ||
c8aa6ae3 JC |
285 | LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { |
286 | umc_reg_offset = get_umc_6_reg_offset(adev, | |
287 | umc_inst, | |
288 | ch_inst); | |
289 | ||
290 | umc_v6_1_query_correctable_error_count(adev, | |
291 | umc_reg_offset, | |
292 | &(err_data->ce_count)); | |
293 | umc_v6_1_querry_uncorrectable_error_count(adev, | |
294 | umc_reg_offset, | |
295 | &(err_data->ue_count)); | |
bd68fb94 | 296 | } |
eee2eaba | 297 | |
d38c3ac7 GC |
298 | if ((adev->asic_type == CHIP_ARCTURUS) && |
299 | amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW)) | |
300 | DRM_WARN("Fail to enable DF-Cstate\n"); | |
301 | ||
eee2eaba JC |
302 | if (rsmu_umc_index_state) |
303 | umc_v6_1_enable_umc_index_mode(adev); | |
fd90456c GC |
304 | |
305 | umc_v6_1_clear_error_count(adev); | |
9884c2b1 HZ |
306 | } |
307 | ||
8c948103 | 308 | static void umc_v6_1_query_error_address(struct amdgpu_device *adev, |
2b671b60 | 309 | struct ras_err_data *err_data, |
bd68fb94 | 310 | uint32_t umc_reg_offset, |
c8aa6ae3 | 311 | uint32_t ch_inst, |
bd68fb94 | 312 | uint32_t umc_inst) |
8c948103 | 313 | { |
2b671b60 | 314 | uint32_t lsb, mc_umc_status_addr; |
5d4667ec | 315 | uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0; |
87d2b92f | 316 | struct eeprom_table_record *err_rec; |
c8aa6ae3 | 317 | uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; |
2b671b60 | 318 | |
4cf781c2 JC |
319 | if (adev->asic_type == CHIP_ARCTURUS) { |
320 | /* UMC 6_1_2 registers */ | |
4cf781c2 JC |
321 | mc_umc_status_addr = |
322 | SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0_ARCT); | |
5d4667ec GC |
323 | mc_umc_addrt0 = |
324 | SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0_ARCT); | |
4cf781c2 JC |
325 | } else { |
326 | /* UMC 6_1_1 registers */ | |
4cf781c2 JC |
327 | mc_umc_status_addr = |
328 | SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_STATUST0); | |
5d4667ec GC |
329 | mc_umc_addrt0 = |
330 | SOC15_REG_OFFSET(UMC, 0, mmMCA_UMC_UMC0_MCUMC_ADDRT0); | |
4cf781c2 | 331 | } |
8c948103 | 332 | |
1a2172b5 JC |
333 | mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4); |
334 | ||
335 | if (mc_umc_status == 0) | |
336 | return; | |
337 | ||
2b671b60 TZ |
338 | if (!err_data->err_addr) { |
339 | /* clear umc status */ | |
955c7120 | 340 | WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); |
8c948103 | 341 | return; |
2b671b60 | 342 | } |
8c948103 | 343 | |
87d2b92f | 344 | err_rec = &err_data->err_addr[err_data->err_addr_cnt]; |
8c948103 TZ |
345 | |
346 | /* calculate error address if ue/ce error is detected */ | |
347 | if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && | |
348 | (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || | |
349 | REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) { | |
8c948103 | 350 | |
eee2eaba | 351 | err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4); |
8c948103 TZ |
352 | /* the lowest lsb bits should be ignored */ |
353 | lsb = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, LSB); | |
354 | err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr); | |
355 | err_addr &= ~((0x1ULL << lsb) - 1); | |
356 | ||
357 | /* translate umc channel address to soc pa, 3 parts are included */ | |
87d2b92f TZ |
358 | retired_page = ADDR_OF_8KB_BLOCK(err_addr) | |
359 | ADDR_OF_256B_BLOCK(channel_index) | | |
360 | OFFSET_IN_256B_BLOCK(err_addr); | |
361 | ||
362 | /* we only save ue error information currently, ce is skipped */ | |
363 | if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) | |
364 | == 1) { | |
365 | err_rec->address = err_addr; | |
366 | /* page frame address is saved */ | |
afa44809 | 367 | err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; |
87d2b92f TZ |
368 | err_rec->ts = (uint64_t)ktime_get_real_seconds(); |
369 | err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; | |
370 | err_rec->cu = 0; | |
371 | err_rec->mem_channel = channel_index; | |
bd68fb94 | 372 | err_rec->mcumc_id = umc_inst; |
87d2b92f TZ |
373 | |
374 | err_data->err_addr_cnt++; | |
375 | } | |
8c948103 | 376 | } |
2b671b60 TZ |
377 | |
378 | /* clear umc status */ | |
955c7120 | 379 | WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL); |
8c948103 TZ |
380 | } |
381 | ||
382 | static void umc_v6_1_query_ras_error_address(struct amdgpu_device *adev, | |
383 | void *ras_error_status) | |
384 | { | |
bd68fb94 JC |
385 | struct ras_err_data* err_data = (struct ras_err_data*)ras_error_status; |
386 | ||
387 | uint32_t umc_inst = 0; | |
388 | uint32_t ch_inst = 0; | |
389 | uint32_t umc_reg_offset = 0; | |
390 | ||
eee2eaba JC |
391 | uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev); |
392 | ||
393 | if (rsmu_umc_index_state) | |
394 | umc_v6_1_disable_umc_index_mode(adev); | |
395 | ||
d38c3ac7 GC |
396 | if ((adev->asic_type == CHIP_ARCTURUS) && |
397 | amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) | |
398 | DRM_WARN("Fail to disable DF-Cstate.\n"); | |
399 | ||
c8aa6ae3 JC |
400 | LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { |
401 | umc_reg_offset = get_umc_6_reg_offset(adev, | |
402 | umc_inst, | |
403 | ch_inst); | |
404 | ||
405 | umc_v6_1_query_error_address(adev, | |
406 | err_data, | |
407 | umc_reg_offset, | |
408 | ch_inst, | |
409 | umc_inst); | |
bd68fb94 JC |
410 | } |
411 | ||
d38c3ac7 GC |
412 | if ((adev->asic_type == CHIP_ARCTURUS) && |
413 | amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW)) | |
414 | DRM_WARN("Fail to enable DF-Cstate\n"); | |
415 | ||
eee2eaba JC |
416 | if (rsmu_umc_index_state) |
417 | umc_v6_1_enable_umc_index_mode(adev); | |
8c948103 TZ |
418 | } |
419 | ||
d99659a0 | 420 | static void umc_v6_1_err_cnt_init_per_channel(struct amdgpu_device *adev, |
bd68fb94 | 421 | uint32_t umc_reg_offset) |
b7f92097 TZ |
422 | { |
423 | uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; | |
424 | uint32_t ecc_err_cnt_addr; | |
425 | ||
4cf781c2 JC |
426 | if (adev->asic_type == CHIP_ARCTURUS) { |
427 | /* UMC 6_1_2 registers */ | |
4cf781c2 JC |
428 | ecc_err_cnt_sel_addr = |
429 | SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel_ARCT); | |
430 | ecc_err_cnt_addr = | |
431 | SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt_ARCT); | |
432 | } else { | |
433 | /* UMC 6_1_1 registers */ | |
4cf781c2 JC |
434 | ecc_err_cnt_sel_addr = |
435 | SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCntSel); | |
436 | ecc_err_cnt_addr = | |
437 | SOC15_REG_OFFSET(UMC, 0, mmUMCCH0_0_EccErrCnt); | |
438 | } | |
b7f92097 TZ |
439 | |
440 | /* select the lower chip and check the error count */ | |
0ee51f1d | 441 | ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4); |
b7f92097 TZ |
442 | ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, |
443 | EccErrCntCsSel, 0); | |
444 | /* set ce error interrupt type to APIC based interrupt */ | |
445 | ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, | |
446 | EccErrInt, 0x1); | |
0ee51f1d | 447 | WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); |
b7f92097 | 448 | /* set error count to initial value */ |
0ee51f1d | 449 | WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); |
b7f92097 TZ |
450 | |
451 | /* select the higher chip and check the err counter */ | |
452 | ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_EccErrCntSel, | |
453 | EccErrCntCsSel, 1); | |
0ee51f1d JC |
454 | WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); |
455 | WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V6_1_CE_CNT_INIT); | |
b7f92097 TZ |
456 | } |
457 | ||
d99659a0 | 458 | static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev) |
3aacf4ea | 459 | { |
bd68fb94 JC |
460 | uint32_t umc_inst = 0; |
461 | uint32_t ch_inst = 0; | |
462 | uint32_t umc_reg_offset = 0; | |
463 | ||
eee2eaba JC |
464 | uint32_t rsmu_umc_index_state = umc_v6_1_get_umc_index_mode_state(adev); |
465 | ||
466 | if (rsmu_umc_index_state) | |
467 | umc_v6_1_disable_umc_index_mode(adev); | |
0ee51f1d | 468 | |
c8aa6ae3 JC |
469 | LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) { |
470 | umc_reg_offset = get_umc_6_reg_offset(adev, | |
471 | umc_inst, | |
472 | ch_inst); | |
3aacf4ea | 473 | |
c8aa6ae3 | 474 | umc_v6_1_err_cnt_init_per_channel(adev, umc_reg_offset); |
bd68fb94 | 475 | } |
eee2eaba JC |
476 | |
477 | if (rsmu_umc_index_state) | |
478 | umc_v6_1_enable_umc_index_mode(adev); | |
3aacf4ea TZ |
479 | } |
480 | ||
9884c2b1 | 481 | const struct amdgpu_umc_funcs umc_v6_1_funcs = { |
d99659a0 | 482 | .err_cnt_init = umc_v6_1_err_cnt_init, |
86edcc7d | 483 | .ras_late_init = amdgpu_umc_ras_late_init, |
9884c2b1 | 484 | .query_ras_error_count = umc_v6_1_query_ras_error_count, |
8c948103 | 485 | .query_ras_error_address = umc_v6_1_query_ras_error_address, |
9884c2b1 | 486 | }; |