]> git.proxmox.com Git - mirror_edk2.git/blame - QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei/meminit_utils.c
QuarkSocPkg: Add new package for Quark SoC X1000
[mirror_edk2.git] / QuarkSocPkg / QuarkNorthCluster / MemoryInit / Pei / meminit_utils.c
CommitLineData
9b6bbcdb
MK
1/************************************************************************\r
2 *\r
3 * Copyright (c) 2013-2015 Intel Corporation.\r
4 *\r
5* This program and the accompanying materials\r
6* are licensed and made available under the terms and conditions of the BSD License\r
7* which accompanies this distribution. The full text of the license may be found at\r
8* http://opensource.org/licenses/bsd-license.php\r
9*\r
10* THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,\r
11* WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.\r
12 *\r
13 ***************************************************************************/\r
14\r
15#include "mrc.h"\r
16#include "memory_options.h"\r
17\r
18#include "meminit_utils.h"\r
19#include "hte.h"\r
20#include "io.h"\r
21\r
22void select_hte(\r
23 MRCParams_t *mrc_params);\r
24\r
25static uint8_t first_run = 0;\r
26\r
27const uint8_t vref_codes[64] =\r
28{ // lowest to highest\r
29 0x3F, 0x3E, 0x3D, 0x3C, 0x3B, 0x3A, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30, // 00 - 15\r
30 0x2F, 0x2E, 0x2D, 0x2C, 0x2B, 0x2A, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, // 16 - 31\r
31 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, // 32 - 47\r
32 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F // 48 - 63\r
33};\r
34\r
35#ifdef EMU\r
36// Track current post code for debugging purpose\r
37uint32_t PostCode;\r
38#endif\r
39\r
40// set_rcvn:\r
41//\r
42// This function will program the RCVEN delays.\r
43// (currently doesn't comprehend rank)\r
44void set_rcvn(\r
45 uint8_t channel,\r
46 uint8_t rank,\r
47 uint8_t byte_lane,\r
48 uint32_t pi_count)\r
49{\r
50 uint32_t reg;\r
51 uint32_t msk;\r
52 uint32_t tempD;\r
53\r
54 ENTERFN();\r
55 DPF(D_TRN, "Rcvn ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);\r
56\r
57 // RDPTR (1/2 MCLK, 64 PIs)\r
58 // BL0 -> B01PTRCTL0[11:08] (0x0-0xF)\r
59 // BL1 -> B01PTRCTL0[23:20] (0x0-0xF)\r
60 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);\r
61 msk = (byte_lane & BIT0) ? (BIT23 | BIT22 | BIT21 | BIT20) : (BIT11 | BIT10 | BIT9 | BIT8);\r
62 tempD = (byte_lane & BIT0) ? ((pi_count / HALF_CLK) << 20) : ((pi_count / HALF_CLK) << 8);\r
63 isbM32m(DDRPHY, reg, tempD, msk);\r
64\r
65 // Adjust PI_COUNT\r
66 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;\r
67\r
68 // PI (1/64 MCLK, 1 PIs)\r
69 // BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)\r
70 // BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)\r
71 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);\r
72 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));\r
73 msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24);\r
74 tempD = pi_count << 24;\r
75 isbM32m(DDRPHY, reg, tempD, msk);\r
76\r
77 // DEADBAND\r
78 // BL0/1 -> B01DBCTL1[08/11] (+1 select)\r
79 // BL0/1 -> B01DBCTL1[02/05] (enable)\r
80 reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);\r
81 msk = 0x00;\r
82 tempD = 0x00;\r
83 // enable\r
84 msk |= (byte_lane & BIT0) ? (BIT5) : (BIT2);\r
85 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))\r
86 {\r
87 tempD |= msk;\r
88 }\r
89 // select\r
90 msk |= (byte_lane & BIT0) ? (BIT11) : (BIT8);\r
91 if (pi_count < EARLY_DB)\r
92 {\r
93 tempD |= msk;\r
94 }\r
95 isbM32m(DDRPHY, reg, tempD, msk);\r
96\r
97 // error check\r
98 if (pi_count > 0x3F)\r
99 {\r
100 training_message(channel, rank, byte_lane);\r
101 post_code(0xEE, 0xE0);\r
102 }\r
103\r
104 LEAVEFN();\r
105 return;\r
106}\r
107\r
108// get_rcvn:\r
109//\r
110// This function will return the current RCVEN delay on the given channel, rank, byte_lane as an absolute PI count.\r
111// (currently doesn't comprehend rank)\r
112uint32_t get_rcvn(\r
113 uint8_t channel,\r
114 uint8_t rank,\r
115 uint8_t byte_lane)\r
116{\r
117 uint32_t reg;\r
118 uint32_t tempD;\r
119 uint32_t pi_count;\r
120\r
121 ENTERFN();\r
122\r
123 // RDPTR (1/2 MCLK, 64 PIs)\r
124 // BL0 -> B01PTRCTL0[11:08] (0x0-0xF)\r
125 // BL1 -> B01PTRCTL0[23:20] (0x0-0xF)\r
126 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);\r
127 tempD = isbR32m(DDRPHY, reg);\r
128 tempD >>= (byte_lane & BIT0) ? (20) : (8);\r
129 tempD &= 0xF;\r
130\r
131 // Adjust PI_COUNT\r
132 pi_count = tempD * HALF_CLK;\r
133\r
134 // PI (1/64 MCLK, 1 PIs)\r
135 // BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)\r
136 // BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)\r
137 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);\r
138 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));\r
139 tempD = isbR32m(DDRPHY, reg);\r
140 tempD >>= 24;\r
141 tempD &= 0x3F;\r
142\r
143 // Adjust PI_COUNT\r
144 pi_count += tempD;\r
145\r
146 LEAVEFN();\r
147 return pi_count;\r
148}\r
149\r
150// set_rdqs:\r
151//\r
152// This function will program the RDQS delays based on an absolute amount of PIs.\r
153// (currently doesn't comprehend rank)\r
154void set_rdqs(\r
155 uint8_t channel,\r
156 uint8_t rank,\r
157 uint8_t byte_lane,\r
158 uint32_t pi_count)\r
159{\r
160 uint32_t reg;\r
161 uint32_t msk;\r
162 uint32_t tempD;\r
163\r
164 ENTERFN();\r
165 DPF(D_TRN, "Rdqs ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);\r
166\r
167 // PI (1/128 MCLK)\r
168 // BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)\r
169 // BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)\r
170 reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE);\r
171 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));\r
172 msk = (BIT6 | BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0);\r
173 tempD = pi_count << 0;\r
174 isbM32m(DDRPHY, reg, tempD, msk);\r
175\r
176 // error check (shouldn't go above 0x3F)\r
177 if (pi_count > 0x47)\r
178 {\r
179 training_message(channel, rank, byte_lane);\r
180 post_code(0xEE, 0xE1);\r
181 }\r
182\r
183 LEAVEFN();\r
184 return;\r
185}\r
186\r
187// get_rdqs:\r
188//\r
189// This function will return the current RDQS delay on the given channel, rank, byte_lane as an absolute PI count.\r
190// (currently doesn't comprehend rank)\r
191uint32_t get_rdqs(\r
192 uint8_t channel,\r
193 uint8_t rank,\r
194 uint8_t byte_lane)\r
195{\r
196 uint32_t reg;\r
197 uint32_t tempD;\r
198 uint32_t pi_count;\r
199\r
200 ENTERFN();\r
201\r
202 // PI (1/128 MCLK)\r
203 // BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)\r
204 // BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)\r
205 reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE);\r
206 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));\r
207 tempD = isbR32m(DDRPHY, reg);\r
208\r
209 // Adjust PI_COUNT\r
210 pi_count = tempD & 0x7F;\r
211\r
212 LEAVEFN();\r
213 return pi_count;\r
214}\r
215\r
216// set_wdqs:\r
217//\r
218// This function will program the WDQS delays based on an absolute amount of PIs.\r
219// (currently doesn't comprehend rank)\r
220void set_wdqs(\r
221 uint8_t channel,\r
222 uint8_t rank,\r
223 uint8_t byte_lane,\r
224 uint32_t pi_count)\r
225{\r
226 uint32_t reg;\r
227 uint32_t msk;\r
228 uint32_t tempD;\r
229\r
230 ENTERFN();\r
231 DPF(D_TRN, "Wdqs ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);\r
232\r
233 // RDPTR (1/2 MCLK, 64 PIs)\r
234 // BL0 -> B01PTRCTL0[07:04] (0x0-0xF)\r
235 // BL1 -> B01PTRCTL0[19:16] (0x0-0xF)\r
236 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);\r
237 msk = (byte_lane & BIT0) ? (BIT19 | BIT18 | BIT17 | BIT16) : (BIT7 | BIT6 | BIT5 | BIT4);\r
238 tempD = pi_count / HALF_CLK;\r
239 tempD <<= (byte_lane & BIT0) ? (16) : (4);\r
240 isbM32m(DDRPHY, reg, tempD, msk);\r
241\r
242 // Adjust PI_COUNT\r
243 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;\r
244\r
245 // PI (1/64 MCLK, 1 PIs)\r
246 // BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)\r
247 // BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)\r
248 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);\r
249 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));\r
250 msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16);\r
251 tempD = pi_count << 16;\r
252 isbM32m(DDRPHY, reg, tempD, msk);\r
253\r
254 // DEADBAND\r
255 // BL0/1 -> B01DBCTL1[07/10] (+1 select)\r
256 // BL0/1 -> B01DBCTL1[01/04] (enable)\r
257 reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);\r
258 msk = 0x00;\r
259 tempD = 0x00;\r
260 // enable\r
261 msk |= (byte_lane & BIT0) ? (BIT4) : (BIT1);\r
262 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))\r
263 {\r
264 tempD |= msk;\r
265 }\r
266 // select\r
267 msk |= (byte_lane & BIT0) ? (BIT10) : (BIT7);\r
268 if (pi_count < EARLY_DB)\r
269 {\r
270 tempD |= msk;\r
271 }\r
272 isbM32m(DDRPHY, reg, tempD, msk);\r
273\r
274 // error check\r
275 if (pi_count > 0x3F)\r
276 {\r
277 training_message(channel, rank, byte_lane);\r
278 post_code(0xEE, 0xE2);\r
279 }\r
280\r
281 LEAVEFN();\r
282 return;\r
283}\r
284\r
285// get_wdqs:\r
286//\r
287// This function will return the amount of WDQS delay on the given channel, rank, byte_lane as an absolute PI count.\r
288// (currently doesn't comprehend rank)\r
289uint32_t get_wdqs(\r
290 uint8_t channel,\r
291 uint8_t rank,\r
292 uint8_t byte_lane)\r
293{\r
294 uint32_t reg;\r
295 uint32_t tempD;\r
296 uint32_t pi_count;\r
297\r
298 ENTERFN();\r
299\r
300 // RDPTR (1/2 MCLK, 64 PIs)\r
301 // BL0 -> B01PTRCTL0[07:04] (0x0-0xF)\r
302 // BL1 -> B01PTRCTL0[19:16] (0x0-0xF)\r
303 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);\r
304 tempD = isbR32m(DDRPHY, reg);\r
305 tempD >>= (byte_lane & BIT0) ? (16) : (4);\r
306 tempD &= 0xF;\r
307\r
308 // Adjust PI_COUNT\r
309 pi_count = (tempD * HALF_CLK);\r
310\r
311 // PI (1/64 MCLK, 1 PIs)\r
312 // BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)\r
313 // BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)\r
314 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);\r
315 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));\r
316 tempD = isbR32m(DDRPHY, reg);\r
317 tempD >>= 16;\r
318 tempD &= 0x3F;\r
319\r
320 // Adjust PI_COUNT\r
321 pi_count += tempD;\r
322\r
323 LEAVEFN();\r
324 return pi_count;\r
325}\r
326\r
327// set_wdq:\r
328//\r
329// This function will program the WDQ delays based on an absolute number of PIs.\r
330// (currently doesn't comprehend rank)\r
331void set_wdq(\r
332 uint8_t channel,\r
333 uint8_t rank,\r
334 uint8_t byte_lane,\r
335 uint32_t pi_count)\r
336{\r
337 uint32_t reg;\r
338 uint32_t msk;\r
339 uint32_t tempD;\r
340\r
341 ENTERFN();\r
342 DPF(D_TRN, "Wdq ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);\r
343\r
344 // RDPTR (1/2 MCLK, 64 PIs)\r
345 // BL0 -> B01PTRCTL0[03:00] (0x0-0xF)\r
346 // BL1 -> B01PTRCTL0[15:12] (0x0-0xF)\r
347 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);\r
348 msk = (byte_lane & BIT0) ? (BIT15 | BIT14 | BIT13 | BIT12) : (BIT3 | BIT2 | BIT1 | BIT0);\r
349 tempD = pi_count / HALF_CLK;\r
350 tempD <<= (byte_lane & BIT0) ? (12) : (0);\r
351 isbM32m(DDRPHY, reg, tempD, msk);\r
352\r
353 // Adjust PI_COUNT\r
354 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;\r
355\r
356 // PI (1/64 MCLK, 1 PIs)\r
357 // BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)\r
358 // BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)\r
359 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);\r
360 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));\r
361 msk = (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8);\r
362 tempD = pi_count << 8;\r
363 isbM32m(DDRPHY, reg, tempD, msk);\r
364\r
365 // DEADBAND\r
366 // BL0/1 -> B01DBCTL1[06/09] (+1 select)\r
367 // BL0/1 -> B01DBCTL1[00/03] (enable)\r
368 reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);\r
369 msk = 0x00;\r
370 tempD = 0x00;\r
371 // enable\r
372 msk |= (byte_lane & BIT0) ? (BIT3) : (BIT0);\r
373 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))\r
374 {\r
375 tempD |= msk;\r
376 }\r
377 // select\r
378 msk |= (byte_lane & BIT0) ? (BIT9) : (BIT6);\r
379 if (pi_count < EARLY_DB)\r
380 {\r
381 tempD |= msk;\r
382 }\r
383 isbM32m(DDRPHY, reg, tempD, msk);\r
384\r
385 // error check\r
386 if (pi_count > 0x3F)\r
387 {\r
388 training_message(channel, rank, byte_lane);\r
389 post_code(0xEE, 0xE3);\r
390 }\r
391\r
392 LEAVEFN();\r
393 return;\r
394}\r
395\r
396// get_wdq:\r
397//\r
398// This function will return the amount of WDQ delay on the given channel, rank, byte_lane as an absolute PI count.\r
399// (currently doesn't comprehend rank)\r
400uint32_t get_wdq(\r
401 uint8_t channel,\r
402 uint8_t rank,\r
403 uint8_t byte_lane)\r
404{\r
405 uint32_t reg;\r
406 uint32_t tempD;\r
407 uint32_t pi_count;\r
408\r
409 ENTERFN();\r
410\r
411 // RDPTR (1/2 MCLK, 64 PIs)\r
412 // BL0 -> B01PTRCTL0[03:00] (0x0-0xF)\r
413 // BL1 -> B01PTRCTL0[15:12] (0x0-0xF)\r
414 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);\r
415 tempD = isbR32m(DDRPHY, reg);\r
416 tempD >>= (byte_lane & BIT0) ? (12) : (0);\r
417 tempD &= 0xF;\r
418\r
419 // Adjust PI_COUNT\r
420 pi_count = (tempD * HALF_CLK);\r
421\r
422 // PI (1/64 MCLK, 1 PIs)\r
423 // BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)\r
424 // BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)\r
425 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);\r
426 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));\r
427 tempD = isbR32m(DDRPHY, reg);\r
428 tempD >>= 8;\r
429 tempD &= 0x3F;\r
430\r
431 // Adjust PI_COUNT\r
432 pi_count += tempD;\r
433\r
434 LEAVEFN();\r
435 return pi_count;\r
436}\r
437\r
438// set_wcmd:\r
439//\r
440// This function will program the WCMD delays based on an absolute number of PIs.\r
441void set_wcmd(\r
442 uint8_t channel,\r
443 uint32_t pi_count)\r
444{\r
445 uint32_t reg;\r
446 uint32_t msk;\r
447 uint32_t tempD;\r
448\r
449 ENTERFN();\r
450 // RDPTR (1/2 MCLK, 64 PIs)\r
451 // CMDPTRREG[11:08] (0x0-0xF)\r
452 reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET);\r
453 msk = (BIT11 | BIT10 | BIT9 | BIT8);\r
454 tempD = pi_count / HALF_CLK;\r
455 tempD <<= 8;\r
456 isbM32m(DDRPHY, reg, tempD, msk);\r
457\r
458 // Adjust PI_COUNT\r
459 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;\r
460\r
461 // PI (1/64 MCLK, 1 PIs)\r
462 // CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)\r
463 // CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)\r
464 // CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)\r
465 // CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)\r
466 // CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)\r
467 // CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)\r
468 // CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)\r
469 // CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)\r
470 reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET);\r
471\r
472 msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24) | (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16)\r
473 | (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8) | (BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0);\r
474\r
475 tempD = (pi_count << 24) | (pi_count << 16) | (pi_count << 8) | (pi_count << 0);\r
476\r
477 isbM32m(DDRPHY, reg, tempD, msk);\r
478 reg = CMDDLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET); // PO\r
479 isbM32m(DDRPHY, reg, tempD, msk);\r
480\r
481 // DEADBAND\r
482 // CMDCFGREG0[17] (+1 select)\r
483 // CMDCFGREG0[16] (enable)\r
484 reg = CMDCFGREG0 + (channel * DDRIOCCC_CH_OFFSET);\r
485 msk = 0x00;\r
486 tempD = 0x00;\r
487 // enable\r
488 msk |= BIT16;\r
489 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))\r
490 {\r
491 tempD |= msk;\r
492 }\r
493 // select\r
494 msk |= BIT17;\r
495 if (pi_count < EARLY_DB)\r
496 {\r
497 tempD |= msk;\r
498 }\r
499 isbM32m(DDRPHY, reg, tempD, msk);\r
500\r
501 // error check\r
502 if (pi_count > 0x3F)\r
503 {\r
504 post_code(0xEE, 0xE4);\r
505 }\r
506\r
507 LEAVEFN();\r
508 return;\r
509}\r
510\r
511// get_wcmd:\r
512//\r
513// This function will return the amount of WCMD delay on the given channel as an absolute PI count.\r
514uint32_t get_wcmd(\r
515 uint8_t channel)\r
516{\r
517 uint32_t reg;\r
518 uint32_t tempD;\r
519 uint32_t pi_count;\r
520\r
521 ENTERFN();\r
522 // RDPTR (1/2 MCLK, 64 PIs)\r
523 // CMDPTRREG[11:08] (0x0-0xF)\r
524 reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET);\r
525 tempD = isbR32m(DDRPHY, reg);\r
526 tempD >>= 8;\r
527 tempD &= 0xF;\r
528\r
529 // Adjust PI_COUNT\r
530 pi_count = tempD * HALF_CLK;\r
531\r
532 // PI (1/64 MCLK, 1 PIs)\r
533 // CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)\r
534 // CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)\r
535 // CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)\r
536 // CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)\r
537 // CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)\r
538 // CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)\r
539 // CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)\r
540 // CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)\r
541 reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET);\r
542 tempD = isbR32m(DDRPHY, reg);\r
543 tempD >>= 16;\r
544 tempD &= 0x3F;\r
545\r
546 // Adjust PI_COUNT\r
547 pi_count += tempD;\r
548\r
549 LEAVEFN();\r
550 return pi_count;\r
551}\r
552\r
553// set_wclk:\r
554//\r
555// This function will program the WCLK delays based on an absolute number of PIs.\r
556void set_wclk(\r
557 uint8_t channel,\r
558 uint8_t rank,\r
559 uint32_t pi_count)\r
560{\r
561 uint32_t reg;\r
562 uint32_t msk;\r
563 uint32_t tempD;\r
564\r
565 ENTERFN();\r
566 // RDPTR (1/2 MCLK, 64 PIs)\r
567 // CCPTRREG[15:12] -> CLK1 (0x0-0xF)\r
568 // CCPTRREG[11:08] -> CLK0 (0x0-0xF)\r
569 reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);\r
570 msk = (BIT15 | BIT14 | BIT13 | BIT12) | (BIT11 | BIT10 | BIT9 | BIT8);\r
571 tempD = ((pi_count / HALF_CLK) << 12) | ((pi_count / HALF_CLK) << 8);\r
572 isbM32m(DDRPHY, reg, tempD, msk);\r
573\r
574 // Adjust PI_COUNT\r
575 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;\r
576\r
577 // PI (1/64 MCLK, 1 PIs)\r
578 // ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)\r
579 // ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)\r
580 reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0);\r
581 reg += (channel * DDRIOCCC_CH_OFFSET);\r
582 msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16) | (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8);\r
583 tempD = (pi_count << 16) | (pi_count << 8);\r
584 isbM32m(DDRPHY, reg, tempD, msk);\r
585 reg = (rank) ? (ECCB1DLLPICODER1) : (ECCB1DLLPICODER1);\r
586 reg += (channel * DDRIOCCC_CH_OFFSET);\r
587 isbM32m(DDRPHY, reg, tempD, msk);\r
588 reg = (rank) ? (ECCB1DLLPICODER2) : (ECCB1DLLPICODER2);\r
589 reg += (channel * DDRIOCCC_CH_OFFSET);\r
590 isbM32m(DDRPHY, reg, tempD, msk);\r
591 reg = (rank) ? (ECCB1DLLPICODER3) : (ECCB1DLLPICODER3);\r
592 reg += (channel * DDRIOCCC_CH_OFFSET);\r
593 isbM32m(DDRPHY, reg, tempD, msk);\r
594\r
595 // DEADBAND\r
596 // CCCFGREG1[11:08] (+1 select)\r
597 // CCCFGREG1[03:00] (enable)\r
598 reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET);\r
599 msk = 0x00;\r
600 tempD = 0x00;\r
601 // enable\r
602 msk |= (BIT3 | BIT2 | BIT1 | BIT0); // only ??? matters\r
603 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))\r
604 {\r
605 tempD |= msk;\r
606 }\r
607 // select\r
608 msk |= (BIT11 | BIT10 | BIT9 | BIT8); // only ??? matters\r
609 if (pi_count < EARLY_DB)\r
610 {\r
611 tempD |= msk;\r
612 }\r
613 isbM32m(DDRPHY, reg, tempD, msk);\r
614\r
615 // error check\r
616 if (pi_count > 0x3F)\r
617 {\r
618 post_code(0xEE, 0xE5);\r
619 }\r
620\r
621 LEAVEFN();\r
622 return;\r
623}\r
624\r
625// get_wclk:\r
626//\r
627// This function will return the amout of WCLK delay on the given channel, rank as an absolute PI count.\r
628uint32_t get_wclk(\r
629 uint8_t channel,\r
630 uint8_t rank)\r
631{\r
632 uint32_t reg;\r
633 uint32_t tempD;\r
634 uint32_t pi_count;\r
635\r
636 ENTERFN();\r
637 // RDPTR (1/2 MCLK, 64 PIs)\r
638 // CCPTRREG[15:12] -> CLK1 (0x0-0xF)\r
639 // CCPTRREG[11:08] -> CLK0 (0x0-0xF)\r
640 reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);\r
641 tempD = isbR32m(DDRPHY, reg);\r
642 tempD >>= (rank) ? (12) : (8);\r
643 tempD &= 0xF;\r
644\r
645 // Adjust PI_COUNT\r
646 pi_count = tempD * HALF_CLK;\r
647\r
648 // PI (1/64 MCLK, 1 PIs)\r
649 // ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)\r
650 // ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)\r
651 reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0);\r
652 reg += (channel * DDRIOCCC_CH_OFFSET);\r
653 tempD = isbR32m(DDRPHY, reg);\r
654 tempD >>= (rank) ? (16) : (8);\r
655 tempD &= 0x3F;\r
656\r
657 pi_count += tempD;\r
658\r
659 LEAVEFN();\r
660 return pi_count;\r
661}\r
662\r
663// set_wctl:\r
664//\r
665// This function will program the WCTL delays based on an absolute number of PIs.\r
666// (currently doesn't comprehend rank)\r
667void set_wctl(\r
668 uint8_t channel,\r
669 uint8_t rank,\r
670 uint32_t pi_count)\r
671{\r
672 uint32_t reg;\r
673 uint32_t msk;\r
674 uint32_t tempD;\r
675\r
676 ENTERFN();\r
677\r
678 // RDPTR (1/2 MCLK, 64 PIs)\r
679 // CCPTRREG[31:28] (0x0-0xF)\r
680 // CCPTRREG[27:24] (0x0-0xF)\r
681 reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);\r
682 msk = (BIT31 | BIT30 | BIT29 | BIT28) | (BIT27 | BIT26 | BIT25 | BIT24);\r
683 tempD = ((pi_count / HALF_CLK) << 28) | ((pi_count / HALF_CLK) << 24);\r
684 isbM32m(DDRPHY, reg, tempD, msk);\r
685\r
686 // Adjust PI_COUNT\r
687 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;\r
688\r
689 // PI (1/64 MCLK, 1 PIs)\r
690 // ECCB1DLLPICODER?[29:24] (0x00-0x3F)\r
691 // ECCB1DLLPICODER?[29:24] (0x00-0x3F)\r
692 reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET);\r
693 msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24);\r
694 tempD = (pi_count << 24);\r
695 isbM32m(DDRPHY, reg, tempD, msk);\r
696 reg = ECCB1DLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET);\r
697 isbM32m(DDRPHY, reg, tempD, msk);\r
698 reg = ECCB1DLLPICODER2 + (channel * DDRIOCCC_CH_OFFSET);\r
699 isbM32m(DDRPHY, reg, tempD, msk);\r
700 reg = ECCB1DLLPICODER3 + (channel * DDRIOCCC_CH_OFFSET);\r
701 isbM32m(DDRPHY, reg, tempD, msk);\r
702\r
703 // DEADBAND\r
704 // CCCFGREG1[13:12] (+1 select)\r
705 // CCCFGREG1[05:04] (enable)\r
706 reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET);\r
707 msk = 0x00;\r
708 tempD = 0x00;\r
709 // enable\r
710 msk |= (BIT5 | BIT4); // only ??? matters\r
711 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))\r
712 {\r
713 tempD |= msk;\r
714 }\r
715 // select\r
716 msk |= (BIT13 | BIT12); // only ??? matters\r
717 if (pi_count < EARLY_DB)\r
718 {\r
719 tempD |= msk;\r
720 }\r
721 isbM32m(DDRPHY, reg, tempD, msk);\r
722\r
723 // error check\r
724 if (pi_count > 0x3F)\r
725 {\r
726 post_code(0xEE, 0xE6);\r
727 }\r
728\r
729 LEAVEFN();\r
730 return;\r
731}\r
732\r
733// get_wctl:\r
734//\r
735// This function will return the amount of WCTL delay on the given channel, rank as an absolute PI count.\r
736// (currently doesn't comprehend rank)\r
737uint32_t get_wctl(\r
738 uint8_t channel,\r
739 uint8_t rank)\r
740{\r
741 uint32_t reg;\r
742 uint32_t tempD;\r
743 uint32_t pi_count;\r
744\r
745 ENTERFN();\r
746\r
747 // RDPTR (1/2 MCLK, 64 PIs)\r
748 // CCPTRREG[31:28] (0x0-0xF)\r
749 // CCPTRREG[27:24] (0x0-0xF)\r
750 reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);\r
751 tempD = isbR32m(DDRPHY, reg);\r
752 tempD >>= 24;\r
753 tempD &= 0xF;\r
754\r
755 // Adjust PI_COUNT\r
756 pi_count = tempD * HALF_CLK;\r
757\r
758 // PI (1/64 MCLK, 1 PIs)\r
759 // ECCB1DLLPICODER?[29:24] (0x00-0x3F)\r
760 // ECCB1DLLPICODER?[29:24] (0x00-0x3F)\r
761 reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET);\r
762 tempD = isbR32m(DDRPHY, reg);\r
763 tempD >>= 24;\r
764 tempD &= 0x3F;\r
765\r
766 // Adjust PI_COUNT\r
767 pi_count += tempD;\r
768\r
769 LEAVEFN();\r
770 return pi_count;\r
771}\r
772\r
773// set_vref:\r
774//\r
775// This function will program the internal Vref setting in a given byte lane in a given channel.\r
776void set_vref(\r
777 uint8_t channel,\r
778 uint8_t byte_lane,\r
779 uint32_t setting)\r
780{\r
781 uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL);\r
782\r
783 ENTERFN();\r
784 DPF(D_TRN, "Vref ch%d ln%d : val=%03X\n", channel, byte_lane, setting);\r
785\r
786 isbM32m(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)),\r
787 (vref_codes[setting] << 2), (BIT7 | BIT6 | BIT5 | BIT4 | BIT3 | BIT2));\r
788 //isbM32m(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)), (setting<<2), (BIT7|BIT6|BIT5|BIT4|BIT3|BIT2));\r
789 // need to wait ~300ns for Vref to settle (check that this is necessary)\r
790 delay_n(300);\r
791 // ??? may need to clear pointers ???\r
792 LEAVEFN();\r
793 return;\r
794}\r
795\r
796// get_vref:\r
797//\r
798// This function will return the internal Vref setting for the given channel, byte_lane;\r
799uint32_t get_vref(\r
800 uint8_t channel,\r
801 uint8_t byte_lane)\r
802{\r
803 uint8_t j;\r
804 uint32_t ret_val = sizeof(vref_codes) / 2;\r
805 uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL);\r
806\r
807 uint32_t tempD;\r
808\r
809 ENTERFN();\r
810 tempD = isbR32m(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)));\r
811 tempD >>= 2;\r
812 tempD &= 0x3F;\r
813 for (j = 0; j < sizeof(vref_codes); j++)\r
814 {\r
815 if (vref_codes[j] == tempD)\r
816 {\r
817 ret_val = j;\r
818 break;\r
819 }\r
820 }\r
821 LEAVEFN();\r
822 return ret_val;\r
823}\r
824\r
825// clear_pointers:\r
826//\r
827// This function will be used to clear the pointers in a given byte lane in a given channel.\r
828void clear_pointers(\r
829 void)\r
830{\r
831 uint8_t channel_i;\r
832 uint8_t bl_i;\r
833\r
834 ENTERFN();\r
835 for (channel_i = 0; channel_i < NUM_CHANNELS; channel_i++)\r
836 {\r
837 for (bl_i = 0; bl_i < NUM_BYTE_LANES; bl_i++)\r
838 {\r
839 isbM32m(DDRPHY, (B01PTRCTL1 + (channel_i * DDRIODQ_CH_OFFSET) + ((bl_i >> 1) * DDRIODQ_BL_OFFSET)), ~(BIT8),\r
840 (BIT8));\r
841 //delay_m(1); // DEBUG\r
842 isbM32m(DDRPHY, (B01PTRCTL1 + (channel_i * DDRIODQ_CH_OFFSET) + ((bl_i >> 1) * DDRIODQ_BL_OFFSET)), (BIT8),\r
843 (BIT8));\r
844 }\r
845 }\r
846 LEAVEFN();\r
847 return;\r
848}\r
849\r
850// void enable_cache:\r
851void enable_cache(\r
852 void)\r
853{\r
854 // Cache control not used in Quark MRC\r
855 return;\r
856}\r
857\r
858// void disable_cache:\r
859void disable_cache(\r
860 void)\r
861{\r
862 // Cache control not used in Quark MRC\r
863 return;\r
864}\r
865\r
866// Send DRAM command, data should be formated\r
867// using DCMD_Xxxx macro or emrsXCommand structure.\r
868static void dram_init_command(\r
869 uint32_t data)\r
870{\r
871 Wr32(DCMD, 0, data);\r
872}\r
873\r
874// find_rising_edge:\r
875//\r
876// This function will find the rising edge transition on RCVN or WDQS.\r
877void find_rising_edge(\r
878 MRCParams_t *mrc_params,\r
879 uint32_t delay[],\r
880 uint8_t channel,\r
881 uint8_t rank,\r
882 bool rcvn)\r
883{\r
884\r
885#define SAMPLE_CNT 3 // number of sample points\r
886#define SAMPLE_DLY 26 // number of PIs to increment per sample\r
887#define FORWARD true // indicates to increase delays when looking for edge\r
888#define BACKWARD false // indicates to decrease delays when looking for edge\r
889\r
890 bool all_edges_found; // determines stop condition\r
891 bool direction[NUM_BYTE_LANES]; // direction indicator\r
892 uint8_t sample_i; // sample counter\r
893 uint8_t bl_i; // byte lane counter\r
894 uint8_t bl_divisor = (mrc_params->channel_width == x16) ? 2 : 1; // byte lane divisor\r
895 uint32_t sample_result[SAMPLE_CNT]; // results of "sample_dqs()"\r
896 uint32_t tempD; // temporary DWORD\r
897 uint32_t transition_pattern;\r
898\r
899 ENTERFN();\r
900\r
901 // select hte and request initial configuration\r
902 select_hte(mrc_params);\r
903 first_run = 1;\r
904\r
905 // Take 3 sample points (T1,T2,T3) to obtain a transition pattern.\r
906 for (sample_i = 0; sample_i < SAMPLE_CNT; sample_i++)\r
907 {\r
908 // program the desired delays for sample\r
909 for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)\r
910 {\r
911 // increase sample delay by 26 PI (0.2 CLK)\r
912 if (rcvn)\r
913 {\r
914 set_rcvn(channel, rank, bl_i, delay[bl_i] + (sample_i * SAMPLE_DLY));\r
915 }\r
916 else\r
917 {\r
918 set_wdqs(channel, rank, bl_i, delay[bl_i] + (sample_i * SAMPLE_DLY));\r
919 }\r
920 } // bl_i loop\r
921 // take samples (Tsample_i)\r
922 sample_result[sample_i] = sample_dqs(mrc_params, channel, rank, rcvn);\r
923\r
924 DPF(D_TRN, "Find rising edge %s ch%d rnk%d: #%d dly=%d dqs=%02X\n",\r
925 (rcvn ? "RCVN" : "WDQS"), channel, rank,\r
926 sample_i, sample_i * SAMPLE_DLY, sample_result[sample_i]);\r
927\r
928 } // sample_i loop\r
929\r
930 // This pattern will help determine where we landed and ultimately how to place RCVEN/WDQS.\r
931 for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)\r
932 {\r
933 // build "transition_pattern" (MSB is 1st sample)\r
934 transition_pattern = 0x00;\r
935 for (sample_i = 0; sample_i < SAMPLE_CNT; sample_i++)\r
936 {\r
937 transition_pattern |= ((sample_result[sample_i] & (1 << bl_i)) >> bl_i) << (SAMPLE_CNT - 1 - sample_i);\r
938 } // sample_i loop\r
939\r
940 DPF(D_TRN, "=== transition pattern %d\n", transition_pattern);\r
941\r
942 // set up to look for rising edge based on "transition_pattern"\r
943 switch (transition_pattern)\r
944 {\r
945 case 0x00: // sampled 0->0->0\r
946 // move forward from T3 looking for 0->1\r
947 delay[bl_i] += 2 * SAMPLE_DLY;\r
948 direction[bl_i] = FORWARD;\r
949 break;\r
950 case 0x01: // sampled 0->0->1\r
951 case 0x05: // sampled 1->0->1 (bad duty cycle) *HSD#237503*\r
952 // move forward from T2 looking for 0->1\r
953 delay[bl_i] += 1 * SAMPLE_DLY;\r
954 direction[bl_i] = FORWARD;\r
955 break;\r
956// HSD#237503\r
957// case 0x02: // sampled 0->1->0 (bad duty cycle)\r
958// training_message(channel, rank, bl_i);\r
959// post_code(0xEE, 0xE8);\r
960// break;\r
961 case 0x02: // sampled 0->1->0 (bad duty cycle) *HSD#237503*\r
962 case 0x03: // sampled 0->1->1\r
963 // move forward from T1 looking for 0->1\r
964 delay[bl_i] += 0 * SAMPLE_DLY;\r
965 direction[bl_i] = FORWARD;\r
966 break;\r
967 case 0x04: // sampled 1->0->0 (assumes BL8, HSD#234975)\r
968 // move forward from T3 looking for 0->1\r
969 delay[bl_i] += 2 * SAMPLE_DLY;\r
970 direction[bl_i] = FORWARD;\r
971 break;\r
972// HSD#237503\r
973// case 0x05: // sampled 1->0->1 (bad duty cycle)\r
974// training_message(channel, rank, bl_i);\r
975// post_code(0xEE, 0xE9);\r
976// break;\r
977 case 0x06: // sampled 1->1->0\r
978 case 0x07: // sampled 1->1->1\r
979 // move backward from T1 looking for 1->0\r
980 delay[bl_i] += 0 * SAMPLE_DLY;\r
981 direction[bl_i] = BACKWARD;\r
982 break;\r
983 default:\r
984 post_code(0xEE, 0xEE);\r
985 break;\r
986 } // transition_pattern switch\r
987 // program delays\r
988 if (rcvn)\r
989 {\r
990 set_rcvn(channel, rank, bl_i, delay[bl_i]);\r
991 }\r
992 else\r
993 {\r
994 set_wdqs(channel, rank, bl_i, delay[bl_i]);\r
995 }\r
996 } // bl_i loop\r
997\r
998 // Based on the observed transition pattern on the byte lane,\r
999 // begin looking for a rising edge with single PI granularity.\r
1000 do\r
1001 {\r
1002 all_edges_found = true; // assume all byte lanes passed\r
1003 tempD = sample_dqs(mrc_params, channel, rank, rcvn); // take a sample\r
1004 // check all each byte lane for proper edge\r
1005 for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)\r
1006 {\r
1007 if (tempD & (1 << bl_i))\r
1008 {\r
1009 // sampled "1"\r
1010 if (direction[bl_i] == BACKWARD)\r
1011 {\r
1012 // keep looking for edge on this byte lane\r
1013 all_edges_found = false;\r
1014 delay[bl_i] -= 1;\r
1015 if (rcvn)\r
1016 {\r
1017 set_rcvn(channel, rank, bl_i, delay[bl_i]);\r
1018 }\r
1019 else\r
1020 {\r
1021 set_wdqs(channel, rank, bl_i, delay[bl_i]);\r
1022 }\r
1023 }\r
1024 }\r
1025 else\r
1026 {\r
1027 // sampled "0"\r
1028 if (direction[bl_i] == FORWARD)\r
1029 {\r
1030 // keep looking for edge on this byte lane\r
1031 all_edges_found = false;\r
1032 delay[bl_i] += 1;\r
1033 if (rcvn)\r
1034 {\r
1035 set_rcvn(channel, rank, bl_i, delay[bl_i]);\r
1036 }\r
1037 else\r
1038 {\r
1039 set_wdqs(channel, rank, bl_i, delay[bl_i]);\r
1040 }\r
1041 }\r
1042 }\r
1043 } // bl_i loop\r
1044 } while (!all_edges_found);\r
1045\r
1046 // restore DDR idle state\r
1047 dram_init_command(DCMD_PREA(rank));\r
1048\r
1049 DPF(D_TRN, "Delay %03X %03X %03X %03X\n",\r
1050 delay[0], delay[1], delay[2], delay[3]);\r
1051\r
1052 LEAVEFN();\r
1053 return;\r
1054}\r
1055\r
1056// sample_dqs:\r
1057//\r
1058// This function will sample the DQTRAINSTS registers in the given channel/rank SAMPLE_SIZE times looking for a valid '0' or '1'.\r
1059// It will return an encoded DWORD in which each bit corresponds to the sampled value on the byte lane.\r
1060uint32_t sample_dqs(\r
1061 MRCParams_t *mrc_params,\r
1062 uint8_t channel,\r
1063 uint8_t rank,\r
1064 bool rcvn)\r
1065{\r
1066 uint8_t j; // just a counter\r
1067 uint8_t bl_i; // which BL in the module (always 2 per module)\r
1068 uint8_t bl_grp; // which BL module\r
1069 uint8_t bl_divisor = (mrc_params->channel_width == x16) ? 2 : 1; // byte lane divisor\r
1070 uint32_t msk[2]; // BLx in module\r
1071 uint32_t sampled_val[SAMPLE_SIZE]; // DQTRAINSTS register contents for each sample\r
1072 uint32_t num_0s; // tracks the number of '0' samples\r
1073 uint32_t num_1s; // tracks the number of '1' samples\r
1074 uint32_t ret_val = 0x00; // assume all '0' samples\r
1075 uint32_t address = get_addr(mrc_params, channel, rank);\r
1076\r
1077 // initialise "msk[]"\r
1078 msk[0] = (rcvn) ? (BIT1) : (BIT9); // BL0\r
1079 msk[1] = (rcvn) ? (BIT0) : (BIT8); // BL1\r
1080\r
1081\r
1082 // cycle through each byte lane group\r
1083 for (bl_grp = 0; bl_grp < (NUM_BYTE_LANES / bl_divisor) / 2; bl_grp++)\r
1084 {\r
1085 // take SAMPLE_SIZE samples\r
1086 for (j = 0; j < SAMPLE_SIZE; j++)\r
1087 {\r
1088 HteMemOp(address, first_run, rcvn?0:1);\r
1089 first_run = 0;\r
1090\r
1091 // record the contents of the proper DQTRAINSTS register\r
1092 sampled_val[j] = isbR32m(DDRPHY, (DQTRAINSTS + (bl_grp * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET)));\r
1093 }\r
1094 // look for a majority value ( (SAMPLE_SIZE/2)+1 ) on the byte lane\r
1095 // and set that value in the corresponding "ret_val" bit\r
1096 for (bl_i = 0; bl_i < 2; bl_i++)\r
1097 {\r
1098 num_0s = 0x00; // reset '0' tracker for byte lane\r
1099 num_1s = 0x00; // reset '1' tracker for byte lane\r
1100 for (j = 0; j < SAMPLE_SIZE; j++)\r
1101 {\r
1102 if (sampled_val[j] & msk[bl_i])\r
1103 {\r
1104 num_1s++;\r
1105 }\r
1106 else\r
1107 {\r
1108 num_0s++;\r
1109 }\r
1110 }\r
1111 if (num_1s > num_0s)\r
1112 {\r
1113 ret_val |= (1 << (bl_i + (bl_grp * 2)));\r
1114 }\r
1115 }\r
1116 }\r
1117\r
1118 // "ret_val.0" contains the status of BL0\r
1119 // "ret_val.1" contains the status of BL1\r
1120 // "ret_val.2" contains the status of BL2\r
1121 // etc.\r
1122 return ret_val;\r
1123}\r
1124\r
1125// get_addr:\r
1126//\r
1127// This function will return a 32 bit address in the desired channel and rank.\r
1128uint32_t get_addr(\r
1129 MRCParams_t *mrc_params,\r
1130 uint8_t channel,\r
1131 uint8_t rank)\r
1132{\r
1133 uint32_t offset = 0x02000000; // 32MB\r
1134\r
1135 // Begin product specific code\r
1136 if (channel > 0)\r
1137 {\r
1138 DPF(D_ERROR, "ILLEGAL CHANNEL\n");\r
1139 DEAD_LOOP();\r
1140 }\r
1141\r
1142 if (rank > 1)\r
1143 {\r
1144 DPF(D_ERROR, "ILLEGAL RANK\n");\r
1145 DEAD_LOOP();\r
1146 }\r
1147\r
1148 // use 256MB lowest density as per DRP == 0x0003\r
1149 offset += rank * (256 * 1024 * 1024);\r
1150\r
1151 return offset;\r
1152}\r
1153\r
1154// byte_lane_mask:\r
1155//\r
1156// This function will return a 32 bit mask that will be used to check for byte lane failures.\r
1157uint32_t byte_lane_mask(\r
1158 MRCParams_t *mrc_params)\r
1159{\r
1160 uint32_t j;\r
1161 uint32_t ret_val = 0x00;\r
1162\r
1163 // set "ret_val" based on NUM_BYTE_LANES such that you will check only BL0 in "result"\r
1164 // (each bit in "result" represents a byte lane)\r
1165 for (j = 0; j < MAX_BYTE_LANES; j += NUM_BYTE_LANES)\r
1166 {\r
1167 ret_val |= (1 << ((j / NUM_BYTE_LANES) * NUM_BYTE_LANES));\r
1168 }\r
1169\r
1170 // HSD#235037\r
1171 // need to adjust the mask for 16-bit mode\r
1172 if (mrc_params->channel_width == x16)\r
1173 {\r
1174 ret_val |= (ret_val << 2);\r
1175 }\r
1176\r
1177 return ret_val;\r
1178}\r
1179\r
1180\r
1181// read_tsc:\r
1182//\r
1183// This function will do some assembly to return TSC register contents as a uint64_t.\r
1184uint64_t read_tsc(\r
1185 void)\r
1186{\r
1187 volatile uint64_t tsc; // EDX:EAX\r
1188\r
1189#if defined (SIM) || defined (GCC)\r
1190 volatile uint32_t tscH; // EDX\r
1191 volatile uint32_t tscL;// EAX\r
1192\r
1193 asm("rdtsc":"=a"(tscL),"=d"(tscH));\r
1194 tsc = tscH;\r
1195 tsc = (tsc<<32)|tscL;\r
1196#else\r
1197 tsc = __rdtsc();\r
1198#endif\r
1199\r
1200 return tsc;\r
1201}\r
1202\r
1203// get_tsc_freq:\r
1204//\r
1205// This function returns the TSC frequency in MHz\r
1206uint32_t get_tsc_freq(\r
1207 void)\r
1208{\r
1209 static uint32_t freq[] =\r
1210 { 533, 400, 200, 100 };\r
1211 uint32_t fuse;\r
1212#if 0\r
1213 fuse = (isbR32m(FUSE, 0) >> 12) & (BIT1|BIT0);\r
1214#else\r
1215 // todo!!! Fixed 533MHz for emulation or debugging\r
1216 fuse = 0;\r
1217#endif\r
1218 return freq[fuse];\r
1219}\r
1220\r
1221#ifndef SIM\r
1222// delay_n:\r
1223//\r
1224// This is a simple delay function.\r
1225// It takes "nanoseconds" as a parameter.\r
1226void delay_n(\r
1227 uint32_t nanoseconds)\r
1228{\r
1229 // 1000 MHz clock has 1ns period --> no conversion required\r
1230 uint64_t final_tsc = read_tsc();\r
1231 final_tsc += ((get_tsc_freq() * (nanoseconds)) / 1000);\r
1232\r
1233 while (read_tsc() < final_tsc)\r
1234 ;\r
1235 return;\r
1236}\r
1237#endif\r
1238\r
1239// delay_u:\r
1240//\r
1241// This is a simple delay function.\r
1242// It takes "microseconds as a parameter.\r
1243void delay_u(\r
1244 uint32_t microseconds)\r
1245{\r
1246 // 64 bit math is not an option, just use loops\r
1247 while (microseconds--)\r
1248 {\r
1249 delay_n(1000);\r
1250 }\r
1251 return;\r
1252}\r
1253\r
1254// delay_m:\r
1255//\r
1256// This is a simple delay function.\r
1257// It takes "milliseconds" as a parameter.\r
1258void delay_m(\r
1259 uint32_t milliseconds)\r
1260{\r
1261 // 64 bit math is not an option, just use loops\r
1262 while (milliseconds--)\r
1263 {\r
1264 delay_u(1000);\r
1265 }\r
1266 return;\r
1267}\r
1268\r
1269// delay_s:\r
1270//\r
1271// This is a simple delay function.\r
1272// It takes "seconds" as a parameter.\r
1273void delay_s(\r
1274 uint32_t seconds)\r
1275{\r
1276 // 64 bit math is not an option, just use loops\r
1277 while (seconds--)\r
1278 {\r
1279 delay_m(1000);\r
1280 }\r
1281 return;\r
1282}\r
1283\r
1284// post_code:\r
1285//\r
1286// This function will output the POST CODE to the four 7-Segment LED displays.\r
1287void post_code(\r
1288 uint8_t major,\r
1289 uint8_t minor)\r
1290{\r
1291#ifdef EMU\r
1292 // Update global variable for execution tracking in debug env\r
1293 PostCode = ((major << 8) | minor);\r
1294#endif\r
1295\r
1296 // send message to UART\r
1297 DPF(D_INFO, "POST: 0x%01X%02X\n", major, minor);\r
1298\r
1299 // error check:\r
1300 if (major == 0xEE)\r
1301 {\r
1302 // todo!!! Consider updating error status and exit MRC\r
1303#ifdef SIM\r
1304 // enable Ctrl-C handling\r
1305 for(;;) delay_n(100);\r
1306#else\r
1307 DEAD_LOOP();\r
1308#endif\r
1309 }\r
1310}\r
1311\r
1312void training_message(\r
1313 uint8_t channel,\r
1314 uint8_t rank,\r
1315 uint8_t byte_lane)\r
1316{\r
1317 // send message to UART\r
1318 DPF(D_INFO, "CH%01X RK%01X BL%01X\n", channel, rank, byte_lane);\r
1319 return;\r
1320}\r
1321\r
1322void print_timings(\r
1323 MRCParams_t *mrc_params)\r
1324{\r
1325 uint8_t algo_i;\r
1326 uint8_t channel_i;\r
1327 uint8_t rank_i;\r
1328 uint8_t bl_i;\r
1329 uint8_t bl_divisor = (mrc_params->channel_width == x16) ? 2 : 1;\r
1330\r
1331 DPF(D_INFO, "\n---------------------------");\r
1332 DPF(D_INFO, "\nALGO[CH:RK] BL0 BL1 BL2 BL3");\r
1333 DPF(D_INFO, "\n===========================");\r
1334 for (algo_i = 0; algo_i < eMAX_ALGOS; algo_i++)\r
1335 {\r
1336 for (channel_i = 0; channel_i < NUM_CHANNELS; channel_i++)\r
1337 {\r
1338 if (mrc_params->channel_enables & (1 << channel_i))\r
1339 {\r
1340 for (rank_i = 0; rank_i < NUM_RANKS; rank_i++)\r
1341 {\r
1342 if (mrc_params->rank_enables & (1 << rank_i))\r
1343 {\r
1344 switch (algo_i)\r
1345 {\r
1346 case eRCVN:\r
1347 DPF(D_INFO, "\nRCVN[%02d:%02d]", channel_i, rank_i);\r
1348 break;\r
1349 case eWDQS:\r
1350 DPF(D_INFO, "\nWDQS[%02d:%02d]", channel_i, rank_i);\r
1351 break;\r
1352 case eWDQx:\r
1353 DPF(D_INFO, "\nWDQx[%02d:%02d]", channel_i, rank_i);\r
1354 break;\r
1355 case eRDQS:\r
1356 DPF(D_INFO, "\nRDQS[%02d:%02d]", channel_i, rank_i);\r
1357 break;\r
1358 case eVREF:\r
1359 DPF(D_INFO, "\nVREF[%02d:%02d]", channel_i, rank_i);\r
1360 break;\r
1361 case eWCMD:\r
1362 DPF(D_INFO, "\nWCMD[%02d:%02d]", channel_i, rank_i);\r
1363 break;\r
1364 case eWCTL:\r
1365 DPF(D_INFO, "\nWCTL[%02d:%02d]", channel_i, rank_i);\r
1366 break;\r
1367 case eWCLK:\r
1368 DPF(D_INFO, "\nWCLK[%02d:%02d]", channel_i, rank_i);\r
1369 break;\r
1370 default:\r
1371 break;\r
1372 } // algo_i switch\r
1373 for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)\r
1374 {\r
1375 switch (algo_i)\r
1376 {\r
1377 case eRCVN:\r
1378 DPF(D_INFO, " %03d", get_rcvn(channel_i, rank_i, bl_i));\r
1379 break;\r
1380 case eWDQS:\r
1381 DPF(D_INFO, " %03d", get_wdqs(channel_i, rank_i, bl_i));\r
1382 break;\r
1383 case eWDQx:\r
1384 DPF(D_INFO, " %03d", get_wdq(channel_i, rank_i, bl_i));\r
1385 break;\r
1386 case eRDQS:\r
1387 DPF(D_INFO, " %03d", get_rdqs(channel_i, rank_i, bl_i));\r
1388 break;\r
1389 case eVREF:\r
1390 DPF(D_INFO, " %03d", get_vref(channel_i, bl_i));\r
1391 break;\r
1392 case eWCMD:\r
1393 DPF(D_INFO, " %03d", get_wcmd(channel_i));\r
1394 break;\r
1395 case eWCTL:\r
1396 DPF(D_INFO, " %03d", get_wctl(channel_i, rank_i));\r
1397 break;\r
1398 case eWCLK:\r
1399 DPF(D_INFO, " %03d", get_wclk(channel_i, rank_i));\r
1400 break;\r
1401 default:\r
1402 break;\r
1403 } // algo_i switch\r
1404 } // bl_i loop\r
1405 } // if rank_i enabled\r
1406 } // rank_i loop\r
1407 } // if channel_i enabled\r
1408 } // channel_i loop\r
1409 } // algo_i loop\r
1410 DPF(D_INFO, "\n---------------------------");\r
1411 DPF(D_INFO, "\n");\r
1412 return;\r
1413}\r
1414\r
1415// 32 bit LFSR with characteristic polynomial: X^32 + X^22 +X^2 + X^1\r
1416// The function takes pointer to previous 32 bit value and modifies it to next value.\r
1417void lfsr32(\r
1418 uint32_t *lfsr_ptr)\r
1419{\r
1420 uint32_t bit;\r
1421 uint32_t lfsr;\r
1422 uint32_t i;\r
1423\r
1424 lfsr = *lfsr_ptr;\r
1425\r
1426 for (i = 0; i < 32; i++)\r
1427 {\r
1428 bit = 1 ^ (lfsr & BIT0);\r
1429 bit = bit ^ ((lfsr & BIT1) >> 1);\r
1430 bit = bit ^ ((lfsr & BIT2) >> 2);\r
1431 bit = bit ^ ((lfsr & BIT22) >> 22);\r
1432\r
1433 lfsr = ((lfsr >> 1) | (bit << 31));\r
1434 }\r
1435\r
1436 *lfsr_ptr = lfsr;\r
1437 return;\r
1438}\r
1439\r
1440// The purpose of this function is to ensure the SEC comes out of reset\r
1441// and IA initiates the SEC enabling Memory Scrambling.\r
1442void enable_scrambling(\r
1443 MRCParams_t *mrc_params)\r
1444{\r
1445 uint32_t lfsr = 0;\r
1446 uint8_t i;\r
1447\r
1448 if (mrc_params->scrambling_enables == 0)\r
1449 return;\r
1450\r
1451 ENTERFN();\r
1452\r
1453 // 32 bit seed is always stored in BIOS NVM.\r
1454 lfsr = mrc_params->timings.scrambler_seed;\r
1455\r
1456 if (mrc_params->boot_mode == bmCold)\r
1457 {\r
1458 // factory value is 0 and in first boot, a clock based seed is loaded.\r
1459 if (lfsr == 0)\r
1460 {\r
1461 lfsr = read_tsc() & 0x0FFFFFFF; // get seed from system clock and make sure it is not all 1's\r
1462 }\r
1463 // need to replace scrambler\r
1464 // get next 32bit LFSR 16 times which is the last part of the previous scrambler vector.\r
1465 else\r
1466 {\r
1467 for (i = 0; i < 16; i++)\r
1468 {\r
1469 lfsr32(&lfsr);\r
1470 }\r
1471 }\r
1472 mrc_params->timings.scrambler_seed = lfsr; // save new seed.\r
1473 } // if (cold_boot)\r
1474\r
1475 // In warm boot or S3 exit, we have the previous seed.\r
1476 // In cold boot, we have the last 32bit LFSR which is the new seed.\r
1477 lfsr32(&lfsr); // shift to next value\r
1478 isbW32m(MCU, SCRMSEED, (lfsr & 0x0003FFFF));\r
1479 for (i = 0; i < 2; i++)\r
1480 {\r
1481 isbW32m(MCU, SCRMLO + i, (lfsr & 0xAAAAAAAA));\r
1482 }\r
1483\r
1484 LEAVEFN();\r
1485 return;\r
1486}\r
1487\r
1488// This function will store relevant timing data\r
1489// This data will be used on subsequent boots to speed up boot times\r
1490// and is required for Suspend To RAM capabilities.\r
1491void store_timings(\r
1492 MRCParams_t *mrc_params)\r
1493{\r
1494 uint8_t ch, rk, bl;\r
1495 MrcTimings_t *mt = &mrc_params->timings;\r
1496\r
1497 for (ch = 0; ch < NUM_CHANNELS; ch++)\r
1498 {\r
1499 for (rk = 0; rk < NUM_RANKS; rk++)\r
1500 {\r
1501 for (bl = 0; bl < NUM_BYTE_LANES; bl++)\r
1502 {\r
1503 mt->rcvn[ch][rk][bl] = get_rcvn(ch, rk, bl); // RCVN\r
1504 mt->rdqs[ch][rk][bl] = get_rdqs(ch, rk, bl); // RDQS\r
1505 mt->wdqs[ch][rk][bl] = get_wdqs(ch, rk, bl); // WDQS\r
1506 mt->wdq[ch][rk][bl] = get_wdq(ch, rk, bl); // WDQ\r
1507 if (rk == 0)\r
1508 {\r
1509 mt->vref[ch][bl] = get_vref(ch, bl); // VREF (RANK0 only)\r
1510 }\r
1511 }\r
1512 mt->wctl[ch][rk] = get_wctl(ch, rk); // WCTL\r
1513 }\r
1514 mt->wcmd[ch] = get_wcmd(ch); // WCMD\r
1515 }\r
1516\r
1517 // need to save for a case of changing frequency after warm reset\r
1518 mt->ddr_speed = mrc_params->ddr_speed;\r
1519\r
1520 return;\r
1521}\r
1522\r
1523// This function will retrieve relevant timing data\r
1524// This data will be used on subsequent boots to speed up boot times\r
1525// and is required for Suspend To RAM capabilities.\r
1526void restore_timings(\r
1527 MRCParams_t *mrc_params)\r
1528{\r
1529 uint8_t ch, rk, bl;\r
1530 const MrcTimings_t *mt = &mrc_params->timings;\r
1531\r
1532 for (ch = 0; ch < NUM_CHANNELS; ch++)\r
1533 {\r
1534 for (rk = 0; rk < NUM_RANKS; rk++)\r
1535 {\r
1536 for (bl = 0; bl < NUM_BYTE_LANES; bl++)\r
1537 {\r
1538 set_rcvn(ch, rk, bl, mt->rcvn[ch][rk][bl]); // RCVN\r
1539 set_rdqs(ch, rk, bl, mt->rdqs[ch][rk][bl]); // RDQS\r
1540 set_wdqs(ch, rk, bl, mt->wdqs[ch][rk][bl]); // WDQS\r
1541 set_wdq(ch, rk, bl, mt->wdq[ch][rk][bl]); // WDQ\r
1542 if (rk == 0)\r
1543 {\r
1544 set_vref(ch, bl, mt->vref[ch][bl]); // VREF (RANK0 only)\r
1545 }\r
1546 }\r
1547 set_wctl(ch, rk, mt->wctl[ch][rk]); // WCTL\r
1548 }\r
1549 set_wcmd(ch, mt->wcmd[ch]); // WCMD\r
1550 }\r
1551\r
1552 return;\r
1553}\r
1554\r
1555// Configure default settings normally set as part of read training\r
1556// Some defaults have to be set earlier as they may affect earlier\r
1557// training steps.\r
1558void default_timings(\r
1559 MRCParams_t *mrc_params)\r
1560{\r
1561 uint8_t ch, rk, bl;\r
1562\r
1563 for (ch = 0; ch < NUM_CHANNELS; ch++)\r
1564 {\r
1565 for (rk = 0; rk < NUM_RANKS; rk++)\r
1566 {\r
1567 for (bl = 0; bl < NUM_BYTE_LANES; bl++)\r
1568 {\r
1569 set_rdqs(ch, rk, bl, 24); // RDQS\r
1570 if (rk == 0)\r
1571 {\r
1572 set_vref(ch, bl, 32); // VREF (RANK0 only)\r
1573 }\r
1574 }\r
1575 }\r
1576 }\r
1577\r
1578 return;\r
1579}\r
1580\r