]> git.proxmox.com Git - mirror_edk2.git/blame - QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei/meminit_utils.c
QuarkSocPkg: Replace BSD License with BSD+Patent License
[mirror_edk2.git] / QuarkSocPkg / QuarkNorthCluster / MemoryInit / Pei / meminit_utils.c
CommitLineData
9b6bbcdb
MK
1/************************************************************************\r
2 *\r
3 * Copyright (c) 2013-2015 Intel Corporation.\r
4 *\r
c9f231d0 5* SPDX-License-Identifier: BSD-2-Clause-Patent\r
9b6bbcdb
MK
6 *\r
7 ***************************************************************************/\r
8\r
9#include "mrc.h"\r
10#include "memory_options.h"\r
11\r
12#include "meminit_utils.h"\r
13#include "hte.h"\r
14#include "io.h"\r
15\r
16void select_hte(\r
17 MRCParams_t *mrc_params);\r
18\r
19static uint8_t first_run = 0;\r
20\r
21const uint8_t vref_codes[64] =\r
22{ // lowest to highest\r
23 0x3F, 0x3E, 0x3D, 0x3C, 0x3B, 0x3A, 0x39, 0x38, 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30, // 00 - 15\r
24 0x2F, 0x2E, 0x2D, 0x2C, 0x2B, 0x2A, 0x29, 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, // 16 - 31\r
25 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, // 32 - 47\r
26 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F // 48 - 63\r
27};\r
28\r
29#ifdef EMU\r
30// Track current post code for debugging purpose\r
31uint32_t PostCode;\r
32#endif\r
33\r
34// set_rcvn:\r
35//\r
36// This function will program the RCVEN delays.\r
37// (currently doesn't comprehend rank)\r
38void set_rcvn(\r
39 uint8_t channel,\r
40 uint8_t rank,\r
41 uint8_t byte_lane,\r
42 uint32_t pi_count)\r
43{\r
44 uint32_t reg;\r
45 uint32_t msk;\r
46 uint32_t tempD;\r
47\r
48 ENTERFN();\r
49 DPF(D_TRN, "Rcvn ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);\r
50\r
51 // RDPTR (1/2 MCLK, 64 PIs)\r
52 // BL0 -> B01PTRCTL0[11:08] (0x0-0xF)\r
53 // BL1 -> B01PTRCTL0[23:20] (0x0-0xF)\r
54 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);\r
55 msk = (byte_lane & BIT0) ? (BIT23 | BIT22 | BIT21 | BIT20) : (BIT11 | BIT10 | BIT9 | BIT8);\r
56 tempD = (byte_lane & BIT0) ? ((pi_count / HALF_CLK) << 20) : ((pi_count / HALF_CLK) << 8);\r
57 isbM32m(DDRPHY, reg, tempD, msk);\r
58\r
59 // Adjust PI_COUNT\r
60 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;\r
61\r
62 // PI (1/64 MCLK, 1 PIs)\r
63 // BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)\r
64 // BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)\r
65 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);\r
66 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));\r
67 msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24);\r
68 tempD = pi_count << 24;\r
69 isbM32m(DDRPHY, reg, tempD, msk);\r
70\r
71 // DEADBAND\r
72 // BL0/1 -> B01DBCTL1[08/11] (+1 select)\r
73 // BL0/1 -> B01DBCTL1[02/05] (enable)\r
74 reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);\r
75 msk = 0x00;\r
76 tempD = 0x00;\r
77 // enable\r
78 msk |= (byte_lane & BIT0) ? (BIT5) : (BIT2);\r
79 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))\r
80 {\r
81 tempD |= msk;\r
82 }\r
83 // select\r
84 msk |= (byte_lane & BIT0) ? (BIT11) : (BIT8);\r
85 if (pi_count < EARLY_DB)\r
86 {\r
87 tempD |= msk;\r
88 }\r
89 isbM32m(DDRPHY, reg, tempD, msk);\r
90\r
91 // error check\r
92 if (pi_count > 0x3F)\r
93 {\r
94 training_message(channel, rank, byte_lane);\r
95 post_code(0xEE, 0xE0);\r
96 }\r
97\r
98 LEAVEFN();\r
99 return;\r
100}\r
101\r
102// get_rcvn:\r
103//\r
104// This function will return the current RCVEN delay on the given channel, rank, byte_lane as an absolute PI count.\r
105// (currently doesn't comprehend rank)\r
106uint32_t get_rcvn(\r
107 uint8_t channel,\r
108 uint8_t rank,\r
109 uint8_t byte_lane)\r
110{\r
111 uint32_t reg;\r
112 uint32_t tempD;\r
113 uint32_t pi_count;\r
114\r
115 ENTERFN();\r
116\r
117 // RDPTR (1/2 MCLK, 64 PIs)\r
118 // BL0 -> B01PTRCTL0[11:08] (0x0-0xF)\r
119 // BL1 -> B01PTRCTL0[23:20] (0x0-0xF)\r
120 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);\r
121 tempD = isbR32m(DDRPHY, reg);\r
122 tempD >>= (byte_lane & BIT0) ? (20) : (8);\r
123 tempD &= 0xF;\r
124\r
125 // Adjust PI_COUNT\r
126 pi_count = tempD * HALF_CLK;\r
127\r
128 // PI (1/64 MCLK, 1 PIs)\r
129 // BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F)\r
130 // BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F)\r
131 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);\r
132 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));\r
133 tempD = isbR32m(DDRPHY, reg);\r
134 tempD >>= 24;\r
135 tempD &= 0x3F;\r
136\r
137 // Adjust PI_COUNT\r
138 pi_count += tempD;\r
139\r
140 LEAVEFN();\r
141 return pi_count;\r
142}\r
143\r
144// set_rdqs:\r
145//\r
146// This function will program the RDQS delays based on an absolute amount of PIs.\r
147// (currently doesn't comprehend rank)\r
148void set_rdqs(\r
149 uint8_t channel,\r
150 uint8_t rank,\r
151 uint8_t byte_lane,\r
152 uint32_t pi_count)\r
153{\r
154 uint32_t reg;\r
155 uint32_t msk;\r
156 uint32_t tempD;\r
157\r
158 ENTERFN();\r
159 DPF(D_TRN, "Rdqs ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);\r
160\r
161 // PI (1/128 MCLK)\r
162 // BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)\r
163 // BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)\r
164 reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE);\r
165 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));\r
166 msk = (BIT6 | BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0);\r
167 tempD = pi_count << 0;\r
168 isbM32m(DDRPHY, reg, tempD, msk);\r
169\r
170 // error check (shouldn't go above 0x3F)\r
171 if (pi_count > 0x47)\r
172 {\r
173 training_message(channel, rank, byte_lane);\r
174 post_code(0xEE, 0xE1);\r
175 }\r
176\r
177 LEAVEFN();\r
178 return;\r
179}\r
180\r
181// get_rdqs:\r
182//\r
183// This function will return the current RDQS delay on the given channel, rank, byte_lane as an absolute PI count.\r
184// (currently doesn't comprehend rank)\r
185uint32_t get_rdqs(\r
186 uint8_t channel,\r
187 uint8_t rank,\r
188 uint8_t byte_lane)\r
189{\r
190 uint32_t reg;\r
191 uint32_t tempD;\r
192 uint32_t pi_count;\r
193\r
194 ENTERFN();\r
195\r
196 // PI (1/128 MCLK)\r
197 // BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47)\r
198 // BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47)\r
199 reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE);\r
200 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));\r
201 tempD = isbR32m(DDRPHY, reg);\r
202\r
203 // Adjust PI_COUNT\r
204 pi_count = tempD & 0x7F;\r
205\r
206 LEAVEFN();\r
207 return pi_count;\r
208}\r
209\r
210// set_wdqs:\r
211//\r
212// This function will program the WDQS delays based on an absolute amount of PIs.\r
213// (currently doesn't comprehend rank)\r
214void set_wdqs(\r
215 uint8_t channel,\r
216 uint8_t rank,\r
217 uint8_t byte_lane,\r
218 uint32_t pi_count)\r
219{\r
220 uint32_t reg;\r
221 uint32_t msk;\r
222 uint32_t tempD;\r
223\r
224 ENTERFN();\r
225 DPF(D_TRN, "Wdqs ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);\r
226\r
227 // RDPTR (1/2 MCLK, 64 PIs)\r
228 // BL0 -> B01PTRCTL0[07:04] (0x0-0xF)\r
229 // BL1 -> B01PTRCTL0[19:16] (0x0-0xF)\r
230 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);\r
231 msk = (byte_lane & BIT0) ? (BIT19 | BIT18 | BIT17 | BIT16) : (BIT7 | BIT6 | BIT5 | BIT4);\r
232 tempD = pi_count / HALF_CLK;\r
233 tempD <<= (byte_lane & BIT0) ? (16) : (4);\r
234 isbM32m(DDRPHY, reg, tempD, msk);\r
235\r
236 // Adjust PI_COUNT\r
237 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;\r
238\r
239 // PI (1/64 MCLK, 1 PIs)\r
240 // BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)\r
241 // BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)\r
242 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);\r
243 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));\r
244 msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16);\r
245 tempD = pi_count << 16;\r
246 isbM32m(DDRPHY, reg, tempD, msk);\r
247\r
248 // DEADBAND\r
249 // BL0/1 -> B01DBCTL1[07/10] (+1 select)\r
250 // BL0/1 -> B01DBCTL1[01/04] (enable)\r
251 reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);\r
252 msk = 0x00;\r
253 tempD = 0x00;\r
254 // enable\r
255 msk |= (byte_lane & BIT0) ? (BIT4) : (BIT1);\r
256 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))\r
257 {\r
258 tempD |= msk;\r
259 }\r
260 // select\r
261 msk |= (byte_lane & BIT0) ? (BIT10) : (BIT7);\r
262 if (pi_count < EARLY_DB)\r
263 {\r
264 tempD |= msk;\r
265 }\r
266 isbM32m(DDRPHY, reg, tempD, msk);\r
267\r
268 // error check\r
269 if (pi_count > 0x3F)\r
270 {\r
271 training_message(channel, rank, byte_lane);\r
272 post_code(0xEE, 0xE2);\r
273 }\r
274\r
275 LEAVEFN();\r
276 return;\r
277}\r
278\r
279// get_wdqs:\r
280//\r
281// This function will return the amount of WDQS delay on the given channel, rank, byte_lane as an absolute PI count.\r
282// (currently doesn't comprehend rank)\r
283uint32_t get_wdqs(\r
284 uint8_t channel,\r
285 uint8_t rank,\r
286 uint8_t byte_lane)\r
287{\r
288 uint32_t reg;\r
289 uint32_t tempD;\r
290 uint32_t pi_count;\r
291\r
292 ENTERFN();\r
293\r
294 // RDPTR (1/2 MCLK, 64 PIs)\r
295 // BL0 -> B01PTRCTL0[07:04] (0x0-0xF)\r
296 // BL1 -> B01PTRCTL0[19:16] (0x0-0xF)\r
297 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);\r
298 tempD = isbR32m(DDRPHY, reg);\r
299 tempD >>= (byte_lane & BIT0) ? (16) : (4);\r
300 tempD &= 0xF;\r
301\r
302 // Adjust PI_COUNT\r
303 pi_count = (tempD * HALF_CLK);\r
304\r
305 // PI (1/64 MCLK, 1 PIs)\r
306 // BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F)\r
307 // BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F)\r
308 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);\r
309 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));\r
310 tempD = isbR32m(DDRPHY, reg);\r
311 tempD >>= 16;\r
312 tempD &= 0x3F;\r
313\r
314 // Adjust PI_COUNT\r
315 pi_count += tempD;\r
316\r
317 LEAVEFN();\r
318 return pi_count;\r
319}\r
320\r
321// set_wdq:\r
322//\r
323// This function will program the WDQ delays based on an absolute number of PIs.\r
324// (currently doesn't comprehend rank)\r
325void set_wdq(\r
326 uint8_t channel,\r
327 uint8_t rank,\r
328 uint8_t byte_lane,\r
329 uint32_t pi_count)\r
330{\r
331 uint32_t reg;\r
332 uint32_t msk;\r
333 uint32_t tempD;\r
334\r
335 ENTERFN();\r
336 DPF(D_TRN, "Wdq ch%d rnk%d ln%d : pi=%03X\n", channel, rank, byte_lane, pi_count);\r
337\r
338 // RDPTR (1/2 MCLK, 64 PIs)\r
339 // BL0 -> B01PTRCTL0[03:00] (0x0-0xF)\r
340 // BL1 -> B01PTRCTL0[15:12] (0x0-0xF)\r
341 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);\r
342 msk = (byte_lane & BIT0) ? (BIT15 | BIT14 | BIT13 | BIT12) : (BIT3 | BIT2 | BIT1 | BIT0);\r
343 tempD = pi_count / HALF_CLK;\r
344 tempD <<= (byte_lane & BIT0) ? (12) : (0);\r
345 isbM32m(DDRPHY, reg, tempD, msk);\r
346\r
347 // Adjust PI_COUNT\r
348 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;\r
349\r
350 // PI (1/64 MCLK, 1 PIs)\r
351 // BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)\r
352 // BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)\r
353 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);\r
354 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));\r
355 msk = (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8);\r
356 tempD = pi_count << 8;\r
357 isbM32m(DDRPHY, reg, tempD, msk);\r
358\r
359 // DEADBAND\r
360 // BL0/1 -> B01DBCTL1[06/09] (+1 select)\r
361 // BL0/1 -> B01DBCTL1[00/03] (enable)\r
362 reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);\r
363 msk = 0x00;\r
364 tempD = 0x00;\r
365 // enable\r
366 msk |= (byte_lane & BIT0) ? (BIT3) : (BIT0);\r
367 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))\r
368 {\r
369 tempD |= msk;\r
370 }\r
371 // select\r
372 msk |= (byte_lane & BIT0) ? (BIT9) : (BIT6);\r
373 if (pi_count < EARLY_DB)\r
374 {\r
375 tempD |= msk;\r
376 }\r
377 isbM32m(DDRPHY, reg, tempD, msk);\r
378\r
379 // error check\r
380 if (pi_count > 0x3F)\r
381 {\r
382 training_message(channel, rank, byte_lane);\r
383 post_code(0xEE, 0xE3);\r
384 }\r
385\r
386 LEAVEFN();\r
387 return;\r
388}\r
389\r
390// get_wdq:\r
391//\r
392// This function will return the amount of WDQ delay on the given channel, rank, byte_lane as an absolute PI count.\r
393// (currently doesn't comprehend rank)\r
394uint32_t get_wdq(\r
395 uint8_t channel,\r
396 uint8_t rank,\r
397 uint8_t byte_lane)\r
398{\r
399 uint32_t reg;\r
400 uint32_t tempD;\r
401 uint32_t pi_count;\r
402\r
403 ENTERFN();\r
404\r
405 // RDPTR (1/2 MCLK, 64 PIs)\r
406 // BL0 -> B01PTRCTL0[03:00] (0x0-0xF)\r
407 // BL1 -> B01PTRCTL0[15:12] (0x0-0xF)\r
408 reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET);\r
409 tempD = isbR32m(DDRPHY, reg);\r
410 tempD >>= (byte_lane & BIT0) ? (12) : (0);\r
411 tempD &= 0xF;\r
412\r
413 // Adjust PI_COUNT\r
414 pi_count = (tempD * HALF_CLK);\r
415\r
416 // PI (1/64 MCLK, 1 PIs)\r
417 // BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F)\r
418 // BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F)\r
419 reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0);\r
420 reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET));\r
421 tempD = isbR32m(DDRPHY, reg);\r
422 tempD >>= 8;\r
423 tempD &= 0x3F;\r
424\r
425 // Adjust PI_COUNT\r
426 pi_count += tempD;\r
427\r
428 LEAVEFN();\r
429 return pi_count;\r
430}\r
431\r
432// set_wcmd:\r
433//\r
434// This function will program the WCMD delays based on an absolute number of PIs.\r
435void set_wcmd(\r
436 uint8_t channel,\r
437 uint32_t pi_count)\r
438{\r
439 uint32_t reg;\r
440 uint32_t msk;\r
441 uint32_t tempD;\r
442\r
443 ENTERFN();\r
444 // RDPTR (1/2 MCLK, 64 PIs)\r
445 // CMDPTRREG[11:08] (0x0-0xF)\r
446 reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET);\r
447 msk = (BIT11 | BIT10 | BIT9 | BIT8);\r
448 tempD = pi_count / HALF_CLK;\r
449 tempD <<= 8;\r
450 isbM32m(DDRPHY, reg, tempD, msk);\r
451\r
452 // Adjust PI_COUNT\r
453 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;\r
454\r
455 // PI (1/64 MCLK, 1 PIs)\r
456 // CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)\r
457 // CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)\r
458 // CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)\r
459 // CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)\r
460 // CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)\r
461 // CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)\r
462 // CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)\r
463 // CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)\r
464 reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET);\r
465\r
466 msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24) | (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16)\r
467 | (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8) | (BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0);\r
468\r
469 tempD = (pi_count << 24) | (pi_count << 16) | (pi_count << 8) | (pi_count << 0);\r
470\r
471 isbM32m(DDRPHY, reg, tempD, msk);\r
472 reg = CMDDLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET); // PO\r
473 isbM32m(DDRPHY, reg, tempD, msk);\r
474\r
475 // DEADBAND\r
476 // CMDCFGREG0[17] (+1 select)\r
477 // CMDCFGREG0[16] (enable)\r
478 reg = CMDCFGREG0 + (channel * DDRIOCCC_CH_OFFSET);\r
479 msk = 0x00;\r
480 tempD = 0x00;\r
481 // enable\r
482 msk |= BIT16;\r
483 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))\r
484 {\r
485 tempD |= msk;\r
486 }\r
487 // select\r
488 msk |= BIT17;\r
489 if (pi_count < EARLY_DB)\r
490 {\r
491 tempD |= msk;\r
492 }\r
493 isbM32m(DDRPHY, reg, tempD, msk);\r
494\r
495 // error check\r
496 if (pi_count > 0x3F)\r
497 {\r
498 post_code(0xEE, 0xE4);\r
499 }\r
500\r
501 LEAVEFN();\r
502 return;\r
503}\r
504\r
505// get_wcmd:\r
506//\r
507// This function will return the amount of WCMD delay on the given channel as an absolute PI count.\r
508uint32_t get_wcmd(\r
509 uint8_t channel)\r
510{\r
511 uint32_t reg;\r
512 uint32_t tempD;\r
513 uint32_t pi_count;\r
514\r
515 ENTERFN();\r
516 // RDPTR (1/2 MCLK, 64 PIs)\r
517 // CMDPTRREG[11:08] (0x0-0xF)\r
518 reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET);\r
519 tempD = isbR32m(DDRPHY, reg);\r
520 tempD >>= 8;\r
521 tempD &= 0xF;\r
522\r
523 // Adjust PI_COUNT\r
524 pi_count = tempD * HALF_CLK;\r
525\r
526 // PI (1/64 MCLK, 1 PIs)\r
527 // CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused)\r
528 // CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused)\r
529 // CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused)\r
530 // CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused)\r
531 // CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused)\r
532 // CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F)\r
533 // CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused)\r
534 // CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused)\r
535 reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET);\r
536 tempD = isbR32m(DDRPHY, reg);\r
537 tempD >>= 16;\r
538 tempD &= 0x3F;\r
539\r
540 // Adjust PI_COUNT\r
541 pi_count += tempD;\r
542\r
543 LEAVEFN();\r
544 return pi_count;\r
545}\r
546\r
547// set_wclk:\r
548//\r
549// This function will program the WCLK delays based on an absolute number of PIs.\r
550void set_wclk(\r
551 uint8_t channel,\r
552 uint8_t rank,\r
553 uint32_t pi_count)\r
554{\r
555 uint32_t reg;\r
556 uint32_t msk;\r
557 uint32_t tempD;\r
558\r
559 ENTERFN();\r
560 // RDPTR (1/2 MCLK, 64 PIs)\r
561 // CCPTRREG[15:12] -> CLK1 (0x0-0xF)\r
562 // CCPTRREG[11:08] -> CLK0 (0x0-0xF)\r
563 reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);\r
564 msk = (BIT15 | BIT14 | BIT13 | BIT12) | (BIT11 | BIT10 | BIT9 | BIT8);\r
565 tempD = ((pi_count / HALF_CLK) << 12) | ((pi_count / HALF_CLK) << 8);\r
566 isbM32m(DDRPHY, reg, tempD, msk);\r
567\r
568 // Adjust PI_COUNT\r
569 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;\r
570\r
571 // PI (1/64 MCLK, 1 PIs)\r
572 // ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)\r
573 // ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)\r
574 reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0);\r
575 reg += (channel * DDRIOCCC_CH_OFFSET);\r
576 msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16) | (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8);\r
577 tempD = (pi_count << 16) | (pi_count << 8);\r
578 isbM32m(DDRPHY, reg, tempD, msk);\r
579 reg = (rank) ? (ECCB1DLLPICODER1) : (ECCB1DLLPICODER1);\r
580 reg += (channel * DDRIOCCC_CH_OFFSET);\r
581 isbM32m(DDRPHY, reg, tempD, msk);\r
582 reg = (rank) ? (ECCB1DLLPICODER2) : (ECCB1DLLPICODER2);\r
583 reg += (channel * DDRIOCCC_CH_OFFSET);\r
584 isbM32m(DDRPHY, reg, tempD, msk);\r
585 reg = (rank) ? (ECCB1DLLPICODER3) : (ECCB1DLLPICODER3);\r
586 reg += (channel * DDRIOCCC_CH_OFFSET);\r
587 isbM32m(DDRPHY, reg, tempD, msk);\r
588\r
589 // DEADBAND\r
590 // CCCFGREG1[11:08] (+1 select)\r
591 // CCCFGREG1[03:00] (enable)\r
592 reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET);\r
593 msk = 0x00;\r
594 tempD = 0x00;\r
595 // enable\r
596 msk |= (BIT3 | BIT2 | BIT1 | BIT0); // only ??? matters\r
597 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))\r
598 {\r
599 tempD |= msk;\r
600 }\r
601 // select\r
602 msk |= (BIT11 | BIT10 | BIT9 | BIT8); // only ??? matters\r
603 if (pi_count < EARLY_DB)\r
604 {\r
605 tempD |= msk;\r
606 }\r
607 isbM32m(DDRPHY, reg, tempD, msk);\r
608\r
609 // error check\r
610 if (pi_count > 0x3F)\r
611 {\r
612 post_code(0xEE, 0xE5);\r
613 }\r
614\r
615 LEAVEFN();\r
616 return;\r
617}\r
618\r
619// get_wclk:\r
620//\r
621// This function will return the amout of WCLK delay on the given channel, rank as an absolute PI count.\r
622uint32_t get_wclk(\r
623 uint8_t channel,\r
624 uint8_t rank)\r
625{\r
626 uint32_t reg;\r
627 uint32_t tempD;\r
628 uint32_t pi_count;\r
629\r
630 ENTERFN();\r
631 // RDPTR (1/2 MCLK, 64 PIs)\r
632 // CCPTRREG[15:12] -> CLK1 (0x0-0xF)\r
633 // CCPTRREG[11:08] -> CLK0 (0x0-0xF)\r
634 reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);\r
635 tempD = isbR32m(DDRPHY, reg);\r
636 tempD >>= (rank) ? (12) : (8);\r
637 tempD &= 0xF;\r
638\r
639 // Adjust PI_COUNT\r
640 pi_count = tempD * HALF_CLK;\r
641\r
642 // PI (1/64 MCLK, 1 PIs)\r
643 // ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F)\r
644 // ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F)\r
645 reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0);\r
646 reg += (channel * DDRIOCCC_CH_OFFSET);\r
647 tempD = isbR32m(DDRPHY, reg);\r
648 tempD >>= (rank) ? (16) : (8);\r
649 tempD &= 0x3F;\r
650\r
651 pi_count += tempD;\r
652\r
653 LEAVEFN();\r
654 return pi_count;\r
655}\r
656\r
657// set_wctl:\r
658//\r
659// This function will program the WCTL delays based on an absolute number of PIs.\r
660// (currently doesn't comprehend rank)\r
661void set_wctl(\r
662 uint8_t channel,\r
663 uint8_t rank,\r
664 uint32_t pi_count)\r
665{\r
666 uint32_t reg;\r
667 uint32_t msk;\r
668 uint32_t tempD;\r
669\r
670 ENTERFN();\r
671\r
672 // RDPTR (1/2 MCLK, 64 PIs)\r
673 // CCPTRREG[31:28] (0x0-0xF)\r
674 // CCPTRREG[27:24] (0x0-0xF)\r
675 reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);\r
676 msk = (BIT31 | BIT30 | BIT29 | BIT28) | (BIT27 | BIT26 | BIT25 | BIT24);\r
677 tempD = ((pi_count / HALF_CLK) << 28) | ((pi_count / HALF_CLK) << 24);\r
678 isbM32m(DDRPHY, reg, tempD, msk);\r
679\r
680 // Adjust PI_COUNT\r
681 pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK;\r
682\r
683 // PI (1/64 MCLK, 1 PIs)\r
684 // ECCB1DLLPICODER?[29:24] (0x00-0x3F)\r
685 // ECCB1DLLPICODER?[29:24] (0x00-0x3F)\r
686 reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET);\r
687 msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24);\r
688 tempD = (pi_count << 24);\r
689 isbM32m(DDRPHY, reg, tempD, msk);\r
690 reg = ECCB1DLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET);\r
691 isbM32m(DDRPHY, reg, tempD, msk);\r
692 reg = ECCB1DLLPICODER2 + (channel * DDRIOCCC_CH_OFFSET);\r
693 isbM32m(DDRPHY, reg, tempD, msk);\r
694 reg = ECCB1DLLPICODER3 + (channel * DDRIOCCC_CH_OFFSET);\r
695 isbM32m(DDRPHY, reg, tempD, msk);\r
696\r
697 // DEADBAND\r
698 // CCCFGREG1[13:12] (+1 select)\r
699 // CCCFGREG1[05:04] (enable)\r
700 reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET);\r
701 msk = 0x00;\r
702 tempD = 0x00;\r
703 // enable\r
704 msk |= (BIT5 | BIT4); // only ??? matters\r
705 if ((pi_count < EARLY_DB) || (pi_count > LATE_DB))\r
706 {\r
707 tempD |= msk;\r
708 }\r
709 // select\r
710 msk |= (BIT13 | BIT12); // only ??? matters\r
711 if (pi_count < EARLY_DB)\r
712 {\r
713 tempD |= msk;\r
714 }\r
715 isbM32m(DDRPHY, reg, tempD, msk);\r
716\r
717 // error check\r
718 if (pi_count > 0x3F)\r
719 {\r
720 post_code(0xEE, 0xE6);\r
721 }\r
722\r
723 LEAVEFN();\r
724 return;\r
725}\r
726\r
727// get_wctl:\r
728//\r
729// This function will return the amount of WCTL delay on the given channel, rank as an absolute PI count.\r
730// (currently doesn't comprehend rank)\r
731uint32_t get_wctl(\r
732 uint8_t channel,\r
733 uint8_t rank)\r
734{\r
735 uint32_t reg;\r
736 uint32_t tempD;\r
737 uint32_t pi_count;\r
738\r
739 ENTERFN();\r
740\r
741 // RDPTR (1/2 MCLK, 64 PIs)\r
742 // CCPTRREG[31:28] (0x0-0xF)\r
743 // CCPTRREG[27:24] (0x0-0xF)\r
744 reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET);\r
745 tempD = isbR32m(DDRPHY, reg);\r
746 tempD >>= 24;\r
747 tempD &= 0xF;\r
748\r
749 // Adjust PI_COUNT\r
750 pi_count = tempD * HALF_CLK;\r
751\r
752 // PI (1/64 MCLK, 1 PIs)\r
753 // ECCB1DLLPICODER?[29:24] (0x00-0x3F)\r
754 // ECCB1DLLPICODER?[29:24] (0x00-0x3F)\r
755 reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET);\r
756 tempD = isbR32m(DDRPHY, reg);\r
757 tempD >>= 24;\r
758 tempD &= 0x3F;\r
759\r
760 // Adjust PI_COUNT\r
761 pi_count += tempD;\r
762\r
763 LEAVEFN();\r
764 return pi_count;\r
765}\r
766\r
767// set_vref:\r
768//\r
769// This function will program the internal Vref setting in a given byte lane in a given channel.\r
770void set_vref(\r
771 uint8_t channel,\r
772 uint8_t byte_lane,\r
773 uint32_t setting)\r
774{\r
775 uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL);\r
776\r
777 ENTERFN();\r
778 DPF(D_TRN, "Vref ch%d ln%d : val=%03X\n", channel, byte_lane, setting);\r
779\r
780 isbM32m(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)),\r
781 (vref_codes[setting] << 2), (BIT7 | BIT6 | BIT5 | BIT4 | BIT3 | BIT2));\r
782 //isbM32m(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)), (setting<<2), (BIT7|BIT6|BIT5|BIT4|BIT3|BIT2));\r
783 // need to wait ~300ns for Vref to settle (check that this is necessary)\r
784 delay_n(300);\r
785 // ??? may need to clear pointers ???\r
786 LEAVEFN();\r
787 return;\r
788}\r
789\r
790// get_vref:\r
791//\r
792// This function will return the internal Vref setting for the given channel, byte_lane;\r
793uint32_t get_vref(\r
794 uint8_t channel,\r
795 uint8_t byte_lane)\r
796{\r
797 uint8_t j;\r
798 uint32_t ret_val = sizeof(vref_codes) / 2;\r
799 uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL);\r
800\r
801 uint32_t tempD;\r
802\r
803 ENTERFN();\r
804 tempD = isbR32m(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)));\r
805 tempD >>= 2;\r
806 tempD &= 0x3F;\r
807 for (j = 0; j < sizeof(vref_codes); j++)\r
808 {\r
809 if (vref_codes[j] == tempD)\r
810 {\r
811 ret_val = j;\r
812 break;\r
813 }\r
814 }\r
815 LEAVEFN();\r
816 return ret_val;\r
817}\r
818\r
819// clear_pointers:\r
820//\r
821// This function will be used to clear the pointers in a given byte lane in a given channel.\r
822void clear_pointers(\r
823 void)\r
824{\r
825 uint8_t channel_i;\r
826 uint8_t bl_i;\r
827\r
828 ENTERFN();\r
829 for (channel_i = 0; channel_i < NUM_CHANNELS; channel_i++)\r
830 {\r
831 for (bl_i = 0; bl_i < NUM_BYTE_LANES; bl_i++)\r
832 {\r
833 isbM32m(DDRPHY, (B01PTRCTL1 + (channel_i * DDRIODQ_CH_OFFSET) + ((bl_i >> 1) * DDRIODQ_BL_OFFSET)), ~(BIT8),\r
834 (BIT8));\r
835 //delay_m(1); // DEBUG\r
836 isbM32m(DDRPHY, (B01PTRCTL1 + (channel_i * DDRIODQ_CH_OFFSET) + ((bl_i >> 1) * DDRIODQ_BL_OFFSET)), (BIT8),\r
837 (BIT8));\r
838 }\r
839 }\r
840 LEAVEFN();\r
841 return;\r
842}\r
843\r
844// void enable_cache:\r
845void enable_cache(\r
846 void)\r
847{\r
848 // Cache control not used in Quark MRC\r
849 return;\r
850}\r
851\r
852// void disable_cache:\r
853void disable_cache(\r
854 void)\r
855{\r
856 // Cache control not used in Quark MRC\r
857 return;\r
858}\r
859\r
860// Send DRAM command, data should be formated\r
861// using DCMD_Xxxx macro or emrsXCommand structure.\r
862static void dram_init_command(\r
863 uint32_t data)\r
864{\r
865 Wr32(DCMD, 0, data);\r
866}\r
867\r
868// find_rising_edge:\r
869//\r
870// This function will find the rising edge transition on RCVN or WDQS.\r
871void find_rising_edge(\r
872 MRCParams_t *mrc_params,\r
873 uint32_t delay[],\r
874 uint8_t channel,\r
875 uint8_t rank,\r
876 bool rcvn)\r
877{\r
878\r
879#define SAMPLE_CNT 3 // number of sample points\r
880#define SAMPLE_DLY 26 // number of PIs to increment per sample\r
881#define FORWARD true // indicates to increase delays when looking for edge\r
882#define BACKWARD false // indicates to decrease delays when looking for edge\r
883\r
884 bool all_edges_found; // determines stop condition\r
885 bool direction[NUM_BYTE_LANES]; // direction indicator\r
886 uint8_t sample_i; // sample counter\r
887 uint8_t bl_i; // byte lane counter\r
888 uint8_t bl_divisor = (mrc_params->channel_width == x16) ? 2 : 1; // byte lane divisor\r
889 uint32_t sample_result[SAMPLE_CNT]; // results of "sample_dqs()"\r
890 uint32_t tempD; // temporary DWORD\r
891 uint32_t transition_pattern;\r
892\r
893 ENTERFN();\r
894\r
895 // select hte and request initial configuration\r
896 select_hte(mrc_params);\r
897 first_run = 1;\r
898\r
899 // Take 3 sample points (T1,T2,T3) to obtain a transition pattern.\r
900 for (sample_i = 0; sample_i < SAMPLE_CNT; sample_i++)\r
901 {\r
902 // program the desired delays for sample\r
903 for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)\r
904 {\r
905 // increase sample delay by 26 PI (0.2 CLK)\r
906 if (rcvn)\r
907 {\r
908 set_rcvn(channel, rank, bl_i, delay[bl_i] + (sample_i * SAMPLE_DLY));\r
909 }\r
910 else\r
911 {\r
912 set_wdqs(channel, rank, bl_i, delay[bl_i] + (sample_i * SAMPLE_DLY));\r
913 }\r
914 } // bl_i loop\r
915 // take samples (Tsample_i)\r
916 sample_result[sample_i] = sample_dqs(mrc_params, channel, rank, rcvn);\r
917\r
918 DPF(D_TRN, "Find rising edge %s ch%d rnk%d: #%d dly=%d dqs=%02X\n",\r
919 (rcvn ? "RCVN" : "WDQS"), channel, rank,\r
920 sample_i, sample_i * SAMPLE_DLY, sample_result[sample_i]);\r
921\r
922 } // sample_i loop\r
923\r
924 // This pattern will help determine where we landed and ultimately how to place RCVEN/WDQS.\r
925 for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)\r
926 {\r
927 // build "transition_pattern" (MSB is 1st sample)\r
928 transition_pattern = 0x00;\r
929 for (sample_i = 0; sample_i < SAMPLE_CNT; sample_i++)\r
930 {\r
931 transition_pattern |= ((sample_result[sample_i] & (1 << bl_i)) >> bl_i) << (SAMPLE_CNT - 1 - sample_i);\r
932 } // sample_i loop\r
933\r
934 DPF(D_TRN, "=== transition pattern %d\n", transition_pattern);\r
935\r
936 // set up to look for rising edge based on "transition_pattern"\r
937 switch (transition_pattern)\r
938 {\r
939 case 0x00: // sampled 0->0->0\r
940 // move forward from T3 looking for 0->1\r
941 delay[bl_i] += 2 * SAMPLE_DLY;\r
942 direction[bl_i] = FORWARD;\r
943 break;\r
944 case 0x01: // sampled 0->0->1\r
945 case 0x05: // sampled 1->0->1 (bad duty cycle) *HSD#237503*\r
946 // move forward from T2 looking for 0->1\r
947 delay[bl_i] += 1 * SAMPLE_DLY;\r
948 direction[bl_i] = FORWARD;\r
949 break;\r
950// HSD#237503\r
951// case 0x02: // sampled 0->1->0 (bad duty cycle)\r
952// training_message(channel, rank, bl_i);\r
953// post_code(0xEE, 0xE8);\r
954// break;\r
955 case 0x02: // sampled 0->1->0 (bad duty cycle) *HSD#237503*\r
956 case 0x03: // sampled 0->1->1\r
957 // move forward from T1 looking for 0->1\r
958 delay[bl_i] += 0 * SAMPLE_DLY;\r
959 direction[bl_i] = FORWARD;\r
960 break;\r
961 case 0x04: // sampled 1->0->0 (assumes BL8, HSD#234975)\r
962 // move forward from T3 looking for 0->1\r
963 delay[bl_i] += 2 * SAMPLE_DLY;\r
964 direction[bl_i] = FORWARD;\r
965 break;\r
966// HSD#237503\r
967// case 0x05: // sampled 1->0->1 (bad duty cycle)\r
968// training_message(channel, rank, bl_i);\r
969// post_code(0xEE, 0xE9);\r
970// break;\r
971 case 0x06: // sampled 1->1->0\r
972 case 0x07: // sampled 1->1->1\r
973 // move backward from T1 looking for 1->0\r
974 delay[bl_i] += 0 * SAMPLE_DLY;\r
975 direction[bl_i] = BACKWARD;\r
976 break;\r
977 default:\r
978 post_code(0xEE, 0xEE);\r
979 break;\r
980 } // transition_pattern switch\r
981 // program delays\r
982 if (rcvn)\r
983 {\r
984 set_rcvn(channel, rank, bl_i, delay[bl_i]);\r
985 }\r
986 else\r
987 {\r
988 set_wdqs(channel, rank, bl_i, delay[bl_i]);\r
989 }\r
990 } // bl_i loop\r
991\r
992 // Based on the observed transition pattern on the byte lane,\r
993 // begin looking for a rising edge with single PI granularity.\r
994 do\r
995 {\r
996 all_edges_found = true; // assume all byte lanes passed\r
997 tempD = sample_dqs(mrc_params, channel, rank, rcvn); // take a sample\r
998 // check all each byte lane for proper edge\r
999 for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)\r
1000 {\r
1001 if (tempD & (1 << bl_i))\r
1002 {\r
1003 // sampled "1"\r
1004 if (direction[bl_i] == BACKWARD)\r
1005 {\r
1006 // keep looking for edge on this byte lane\r
1007 all_edges_found = false;\r
1008 delay[bl_i] -= 1;\r
1009 if (rcvn)\r
1010 {\r
1011 set_rcvn(channel, rank, bl_i, delay[bl_i]);\r
1012 }\r
1013 else\r
1014 {\r
1015 set_wdqs(channel, rank, bl_i, delay[bl_i]);\r
1016 }\r
1017 }\r
1018 }\r
1019 else\r
1020 {\r
1021 // sampled "0"\r
1022 if (direction[bl_i] == FORWARD)\r
1023 {\r
1024 // keep looking for edge on this byte lane\r
1025 all_edges_found = false;\r
1026 delay[bl_i] += 1;\r
1027 if (rcvn)\r
1028 {\r
1029 set_rcvn(channel, rank, bl_i, delay[bl_i]);\r
1030 }\r
1031 else\r
1032 {\r
1033 set_wdqs(channel, rank, bl_i, delay[bl_i]);\r
1034 }\r
1035 }\r
1036 }\r
1037 } // bl_i loop\r
1038 } while (!all_edges_found);\r
1039\r
1040 // restore DDR idle state\r
1041 dram_init_command(DCMD_PREA(rank));\r
1042\r
1043 DPF(D_TRN, "Delay %03X %03X %03X %03X\n",\r
1044 delay[0], delay[1], delay[2], delay[3]);\r
1045\r
1046 LEAVEFN();\r
1047 return;\r
1048}\r
1049\r
1050// sample_dqs:\r
1051//\r
1052// This function will sample the DQTRAINSTS registers in the given channel/rank SAMPLE_SIZE times looking for a valid '0' or '1'.\r
1053// It will return an encoded DWORD in which each bit corresponds to the sampled value on the byte lane.\r
1054uint32_t sample_dqs(\r
1055 MRCParams_t *mrc_params,\r
1056 uint8_t channel,\r
1057 uint8_t rank,\r
1058 bool rcvn)\r
1059{\r
1060 uint8_t j; // just a counter\r
1061 uint8_t bl_i; // which BL in the module (always 2 per module)\r
1062 uint8_t bl_grp; // which BL module\r
1063 uint8_t bl_divisor = (mrc_params->channel_width == x16) ? 2 : 1; // byte lane divisor\r
1064 uint32_t msk[2]; // BLx in module\r
1065 uint32_t sampled_val[SAMPLE_SIZE]; // DQTRAINSTS register contents for each sample\r
1066 uint32_t num_0s; // tracks the number of '0' samples\r
1067 uint32_t num_1s; // tracks the number of '1' samples\r
1068 uint32_t ret_val = 0x00; // assume all '0' samples\r
1069 uint32_t address = get_addr(mrc_params, channel, rank);\r
1070\r
1071 // initialise "msk[]"\r
1072 msk[0] = (rcvn) ? (BIT1) : (BIT9); // BL0\r
1073 msk[1] = (rcvn) ? (BIT0) : (BIT8); // BL1\r
1074\r
1075\r
1076 // cycle through each byte lane group\r
1077 for (bl_grp = 0; bl_grp < (NUM_BYTE_LANES / bl_divisor) / 2; bl_grp++)\r
1078 {\r
1079 // take SAMPLE_SIZE samples\r
1080 for (j = 0; j < SAMPLE_SIZE; j++)\r
1081 {\r
1082 HteMemOp(address, first_run, rcvn?0:1);\r
1083 first_run = 0;\r
1084\r
1085 // record the contents of the proper DQTRAINSTS register\r
1086 sampled_val[j] = isbR32m(DDRPHY, (DQTRAINSTS + (bl_grp * DDRIODQ_BL_OFFSET) + (channel * DDRIODQ_CH_OFFSET)));\r
1087 }\r
1088 // look for a majority value ( (SAMPLE_SIZE/2)+1 ) on the byte lane\r
1089 // and set that value in the corresponding "ret_val" bit\r
1090 for (bl_i = 0; bl_i < 2; bl_i++)\r
1091 {\r
1092 num_0s = 0x00; // reset '0' tracker for byte lane\r
1093 num_1s = 0x00; // reset '1' tracker for byte lane\r
1094 for (j = 0; j < SAMPLE_SIZE; j++)\r
1095 {\r
1096 if (sampled_val[j] & msk[bl_i])\r
1097 {\r
1098 num_1s++;\r
1099 }\r
1100 else\r
1101 {\r
1102 num_0s++;\r
1103 }\r
1104 }\r
1105 if (num_1s > num_0s)\r
1106 {\r
1107 ret_val |= (1 << (bl_i + (bl_grp * 2)));\r
1108 }\r
1109 }\r
1110 }\r
1111\r
1112 // "ret_val.0" contains the status of BL0\r
1113 // "ret_val.1" contains the status of BL1\r
1114 // "ret_val.2" contains the status of BL2\r
1115 // etc.\r
1116 return ret_val;\r
1117}\r
1118\r
1119// get_addr:\r
1120//\r
1121// This function will return a 32 bit address in the desired channel and rank.\r
1122uint32_t get_addr(\r
1123 MRCParams_t *mrc_params,\r
1124 uint8_t channel,\r
1125 uint8_t rank)\r
1126{\r
1127 uint32_t offset = 0x02000000; // 32MB\r
1128\r
1129 // Begin product specific code\r
1130 if (channel > 0)\r
1131 {\r
1132 DPF(D_ERROR, "ILLEGAL CHANNEL\n");\r
1133 DEAD_LOOP();\r
1134 }\r
1135\r
1136 if (rank > 1)\r
1137 {\r
1138 DPF(D_ERROR, "ILLEGAL RANK\n");\r
1139 DEAD_LOOP();\r
1140 }\r
1141\r
1142 // use 256MB lowest density as per DRP == 0x0003\r
1143 offset += rank * (256 * 1024 * 1024);\r
1144\r
1145 return offset;\r
1146}\r
1147\r
1148// byte_lane_mask:\r
1149//\r
1150// This function will return a 32 bit mask that will be used to check for byte lane failures.\r
1151uint32_t byte_lane_mask(\r
1152 MRCParams_t *mrc_params)\r
1153{\r
1154 uint32_t j;\r
1155 uint32_t ret_val = 0x00;\r
1156\r
1157 // set "ret_val" based on NUM_BYTE_LANES such that you will check only BL0 in "result"\r
1158 // (each bit in "result" represents a byte lane)\r
1159 for (j = 0; j < MAX_BYTE_LANES; j += NUM_BYTE_LANES)\r
1160 {\r
1161 ret_val |= (1 << ((j / NUM_BYTE_LANES) * NUM_BYTE_LANES));\r
1162 }\r
1163\r
1164 // HSD#235037\r
1165 // need to adjust the mask for 16-bit mode\r
1166 if (mrc_params->channel_width == x16)\r
1167 {\r
1168 ret_val |= (ret_val << 2);\r
1169 }\r
1170\r
1171 return ret_val;\r
1172}\r
1173\r
1174\r
1175// read_tsc:\r
1176//\r
1177// This function will do some assembly to return TSC register contents as a uint64_t.\r
1178uint64_t read_tsc(\r
1179 void)\r
1180{\r
1181 volatile uint64_t tsc; // EDX:EAX\r
1182\r
1183#if defined (SIM) || defined (GCC)\r
1184 volatile uint32_t tscH; // EDX\r
1185 volatile uint32_t tscL;// EAX\r
1186\r
1187 asm("rdtsc":"=a"(tscL),"=d"(tscH));\r
1188 tsc = tscH;\r
1189 tsc = (tsc<<32)|tscL;\r
1190#else\r
1191 tsc = __rdtsc();\r
1192#endif\r
1193\r
1194 return tsc;\r
1195}\r
1196\r
1197// get_tsc_freq:\r
1198//\r
1199// This function returns the TSC frequency in MHz\r
1200uint32_t get_tsc_freq(\r
1201 void)\r
1202{\r
1203 static uint32_t freq[] =\r
1204 { 533, 400, 200, 100 };\r
1205 uint32_t fuse;\r
1206#if 0\r
1207 fuse = (isbR32m(FUSE, 0) >> 12) & (BIT1|BIT0);\r
1208#else\r
1209 // todo!!! Fixed 533MHz for emulation or debugging\r
1210 fuse = 0;\r
1211#endif\r
1212 return freq[fuse];\r
1213}\r
1214\r
1215#ifndef SIM\r
1216// delay_n:\r
1217//\r
1218// This is a simple delay function.\r
1219// It takes "nanoseconds" as a parameter.\r
1220void delay_n(\r
1221 uint32_t nanoseconds)\r
1222{\r
1223 // 1000 MHz clock has 1ns period --> no conversion required\r
1224 uint64_t final_tsc = read_tsc();\r
1225 final_tsc += ((get_tsc_freq() * (nanoseconds)) / 1000);\r
1226\r
1227 while (read_tsc() < final_tsc)\r
1228 ;\r
1229 return;\r
1230}\r
1231#endif\r
1232\r
1233// delay_u:\r
1234//\r
1235// This is a simple delay function.\r
1236// It takes "microseconds as a parameter.\r
1237void delay_u(\r
1238 uint32_t microseconds)\r
1239{\r
1240 // 64 bit math is not an option, just use loops\r
1241 while (microseconds--)\r
1242 {\r
1243 delay_n(1000);\r
1244 }\r
1245 return;\r
1246}\r
1247\r
1248// delay_m:\r
1249//\r
1250// This is a simple delay function.\r
1251// It takes "milliseconds" as a parameter.\r
1252void delay_m(\r
1253 uint32_t milliseconds)\r
1254{\r
1255 // 64 bit math is not an option, just use loops\r
1256 while (milliseconds--)\r
1257 {\r
1258 delay_u(1000);\r
1259 }\r
1260 return;\r
1261}\r
1262\r
1263// delay_s:\r
1264//\r
1265// This is a simple delay function.\r
1266// It takes "seconds" as a parameter.\r
1267void delay_s(\r
1268 uint32_t seconds)\r
1269{\r
1270 // 64 bit math is not an option, just use loops\r
1271 while (seconds--)\r
1272 {\r
1273 delay_m(1000);\r
1274 }\r
1275 return;\r
1276}\r
1277\r
1278// post_code:\r
1279//\r
1280// This function will output the POST CODE to the four 7-Segment LED displays.\r
1281void post_code(\r
1282 uint8_t major,\r
1283 uint8_t minor)\r
1284{\r
1285#ifdef EMU\r
1286 // Update global variable for execution tracking in debug env\r
1287 PostCode = ((major << 8) | minor);\r
1288#endif\r
1289\r
1290 // send message to UART\r
1291 DPF(D_INFO, "POST: 0x%01X%02X\n", major, minor);\r
1292\r
1293 // error check:\r
1294 if (major == 0xEE)\r
1295 {\r
1296 // todo!!! Consider updating error status and exit MRC\r
1297#ifdef SIM\r
1298 // enable Ctrl-C handling\r
1299 for(;;) delay_n(100);\r
1300#else\r
1301 DEAD_LOOP();\r
1302#endif\r
1303 }\r
1304}\r
1305\r
1306void training_message(\r
1307 uint8_t channel,\r
1308 uint8_t rank,\r
1309 uint8_t byte_lane)\r
1310{\r
1311 // send message to UART\r
1312 DPF(D_INFO, "CH%01X RK%01X BL%01X\n", channel, rank, byte_lane);\r
1313 return;\r
1314}\r
1315\r
1316void print_timings(\r
1317 MRCParams_t *mrc_params)\r
1318{\r
1319 uint8_t algo_i;\r
1320 uint8_t channel_i;\r
1321 uint8_t rank_i;\r
1322 uint8_t bl_i;\r
1323 uint8_t bl_divisor = (mrc_params->channel_width == x16) ? 2 : 1;\r
1324\r
1325 DPF(D_INFO, "\n---------------------------");\r
1326 DPF(D_INFO, "\nALGO[CH:RK] BL0 BL1 BL2 BL3");\r
1327 DPF(D_INFO, "\n===========================");\r
1328 for (algo_i = 0; algo_i < eMAX_ALGOS; algo_i++)\r
1329 {\r
1330 for (channel_i = 0; channel_i < NUM_CHANNELS; channel_i++)\r
1331 {\r
1332 if (mrc_params->channel_enables & (1 << channel_i))\r
1333 {\r
1334 for (rank_i = 0; rank_i < NUM_RANKS; rank_i++)\r
1335 {\r
1336 if (mrc_params->rank_enables & (1 << rank_i))\r
1337 {\r
1338 switch (algo_i)\r
1339 {\r
1340 case eRCVN:\r
1341 DPF(D_INFO, "\nRCVN[%02d:%02d]", channel_i, rank_i);\r
1342 break;\r
1343 case eWDQS:\r
1344 DPF(D_INFO, "\nWDQS[%02d:%02d]", channel_i, rank_i);\r
1345 break;\r
1346 case eWDQx:\r
1347 DPF(D_INFO, "\nWDQx[%02d:%02d]", channel_i, rank_i);\r
1348 break;\r
1349 case eRDQS:\r
1350 DPF(D_INFO, "\nRDQS[%02d:%02d]", channel_i, rank_i);\r
1351 break;\r
1352 case eVREF:\r
1353 DPF(D_INFO, "\nVREF[%02d:%02d]", channel_i, rank_i);\r
1354 break;\r
1355 case eWCMD:\r
1356 DPF(D_INFO, "\nWCMD[%02d:%02d]", channel_i, rank_i);\r
1357 break;\r
1358 case eWCTL:\r
1359 DPF(D_INFO, "\nWCTL[%02d:%02d]", channel_i, rank_i);\r
1360 break;\r
1361 case eWCLK:\r
1362 DPF(D_INFO, "\nWCLK[%02d:%02d]", channel_i, rank_i);\r
1363 break;\r
1364 default:\r
1365 break;\r
1366 } // algo_i switch\r
1367 for (bl_i = 0; bl_i < (NUM_BYTE_LANES / bl_divisor); bl_i++)\r
1368 {\r
1369 switch (algo_i)\r
1370 {\r
1371 case eRCVN:\r
1372 DPF(D_INFO, " %03d", get_rcvn(channel_i, rank_i, bl_i));\r
1373 break;\r
1374 case eWDQS:\r
1375 DPF(D_INFO, " %03d", get_wdqs(channel_i, rank_i, bl_i));\r
1376 break;\r
1377 case eWDQx:\r
1378 DPF(D_INFO, " %03d", get_wdq(channel_i, rank_i, bl_i));\r
1379 break;\r
1380 case eRDQS:\r
1381 DPF(D_INFO, " %03d", get_rdqs(channel_i, rank_i, bl_i));\r
1382 break;\r
1383 case eVREF:\r
1384 DPF(D_INFO, " %03d", get_vref(channel_i, bl_i));\r
1385 break;\r
1386 case eWCMD:\r
1387 DPF(D_INFO, " %03d", get_wcmd(channel_i));\r
1388 break;\r
1389 case eWCTL:\r
1390 DPF(D_INFO, " %03d", get_wctl(channel_i, rank_i));\r
1391 break;\r
1392 case eWCLK:\r
1393 DPF(D_INFO, " %03d", get_wclk(channel_i, rank_i));\r
1394 break;\r
1395 default:\r
1396 break;\r
1397 } // algo_i switch\r
1398 } // bl_i loop\r
1399 } // if rank_i enabled\r
1400 } // rank_i loop\r
1401 } // if channel_i enabled\r
1402 } // channel_i loop\r
1403 } // algo_i loop\r
1404 DPF(D_INFO, "\n---------------------------");\r
1405 DPF(D_INFO, "\n");\r
1406 return;\r
1407}\r
1408\r
1409// 32 bit LFSR with characteristic polynomial: X^32 + X^22 +X^2 + X^1\r
1410// The function takes pointer to previous 32 bit value and modifies it to next value.\r
1411void lfsr32(\r
1412 uint32_t *lfsr_ptr)\r
1413{\r
1414 uint32_t bit;\r
1415 uint32_t lfsr;\r
1416 uint32_t i;\r
1417\r
1418 lfsr = *lfsr_ptr;\r
1419\r
1420 for (i = 0; i < 32; i++)\r
1421 {\r
1422 bit = 1 ^ (lfsr & BIT0);\r
1423 bit = bit ^ ((lfsr & BIT1) >> 1);\r
1424 bit = bit ^ ((lfsr & BIT2) >> 2);\r
1425 bit = bit ^ ((lfsr & BIT22) >> 22);\r
1426\r
1427 lfsr = ((lfsr >> 1) | (bit << 31));\r
1428 }\r
1429\r
1430 *lfsr_ptr = lfsr;\r
1431 return;\r
1432}\r
1433\r
1434// The purpose of this function is to ensure the SEC comes out of reset\r
1435// and IA initiates the SEC enabling Memory Scrambling.\r
1436void enable_scrambling(\r
1437 MRCParams_t *mrc_params)\r
1438{\r
1439 uint32_t lfsr = 0;\r
1440 uint8_t i;\r
1441\r
1442 if (mrc_params->scrambling_enables == 0)\r
1443 return;\r
1444\r
1445 ENTERFN();\r
1446\r
1447 // 32 bit seed is always stored in BIOS NVM.\r
1448 lfsr = mrc_params->timings.scrambler_seed;\r
1449\r
1450 if (mrc_params->boot_mode == bmCold)\r
1451 {\r
1452 // factory value is 0 and in first boot, a clock based seed is loaded.\r
1453 if (lfsr == 0)\r
1454 {\r
1455 lfsr = read_tsc() & 0x0FFFFFFF; // get seed from system clock and make sure it is not all 1's\r
1456 }\r
1457 // need to replace scrambler\r
1458 // get next 32bit LFSR 16 times which is the last part of the previous scrambler vector.\r
1459 else\r
1460 {\r
1461 for (i = 0; i < 16; i++)\r
1462 {\r
1463 lfsr32(&lfsr);\r
1464 }\r
1465 }\r
1466 mrc_params->timings.scrambler_seed = lfsr; // save new seed.\r
1467 } // if (cold_boot)\r
1468\r
1469 // In warm boot or S3 exit, we have the previous seed.\r
1470 // In cold boot, we have the last 32bit LFSR which is the new seed.\r
1471 lfsr32(&lfsr); // shift to next value\r
1472 isbW32m(MCU, SCRMSEED, (lfsr & 0x0003FFFF));\r
1473 for (i = 0; i < 2; i++)\r
1474 {\r
1475 isbW32m(MCU, SCRMLO + i, (lfsr & 0xAAAAAAAA));\r
1476 }\r
1477\r
1478 LEAVEFN();\r
1479 return;\r
1480}\r
1481\r
1482// This function will store relevant timing data\r
1483// This data will be used on subsequent boots to speed up boot times\r
1484// and is required for Suspend To RAM capabilities.\r
1485void store_timings(\r
1486 MRCParams_t *mrc_params)\r
1487{\r
1488 uint8_t ch, rk, bl;\r
1489 MrcTimings_t *mt = &mrc_params->timings;\r
1490\r
1491 for (ch = 0; ch < NUM_CHANNELS; ch++)\r
1492 {\r
1493 for (rk = 0; rk < NUM_RANKS; rk++)\r
1494 {\r
1495 for (bl = 0; bl < NUM_BYTE_LANES; bl++)\r
1496 {\r
1497 mt->rcvn[ch][rk][bl] = get_rcvn(ch, rk, bl); // RCVN\r
1498 mt->rdqs[ch][rk][bl] = get_rdqs(ch, rk, bl); // RDQS\r
1499 mt->wdqs[ch][rk][bl] = get_wdqs(ch, rk, bl); // WDQS\r
1500 mt->wdq[ch][rk][bl] = get_wdq(ch, rk, bl); // WDQ\r
1501 if (rk == 0)\r
1502 {\r
1503 mt->vref[ch][bl] = get_vref(ch, bl); // VREF (RANK0 only)\r
1504 }\r
1505 }\r
1506 mt->wctl[ch][rk] = get_wctl(ch, rk); // WCTL\r
1507 }\r
1508 mt->wcmd[ch] = get_wcmd(ch); // WCMD\r
1509 }\r
1510\r
1511 // need to save for a case of changing frequency after warm reset\r
1512 mt->ddr_speed = mrc_params->ddr_speed;\r
1513\r
1514 return;\r
1515}\r
1516\r
1517// This function will retrieve relevant timing data\r
1518// This data will be used on subsequent boots to speed up boot times\r
1519// and is required for Suspend To RAM capabilities.\r
1520void restore_timings(\r
1521 MRCParams_t *mrc_params)\r
1522{\r
1523 uint8_t ch, rk, bl;\r
1524 const MrcTimings_t *mt = &mrc_params->timings;\r
1525\r
1526 for (ch = 0; ch < NUM_CHANNELS; ch++)\r
1527 {\r
1528 for (rk = 0; rk < NUM_RANKS; rk++)\r
1529 {\r
1530 for (bl = 0; bl < NUM_BYTE_LANES; bl++)\r
1531 {\r
1532 set_rcvn(ch, rk, bl, mt->rcvn[ch][rk][bl]); // RCVN\r
1533 set_rdqs(ch, rk, bl, mt->rdqs[ch][rk][bl]); // RDQS\r
1534 set_wdqs(ch, rk, bl, mt->wdqs[ch][rk][bl]); // WDQS\r
1535 set_wdq(ch, rk, bl, mt->wdq[ch][rk][bl]); // WDQ\r
1536 if (rk == 0)\r
1537 {\r
1538 set_vref(ch, bl, mt->vref[ch][bl]); // VREF (RANK0 only)\r
1539 }\r
1540 }\r
1541 set_wctl(ch, rk, mt->wctl[ch][rk]); // WCTL\r
1542 }\r
1543 set_wcmd(ch, mt->wcmd[ch]); // WCMD\r
1544 }\r
1545\r
1546 return;\r
1547}\r
1548\r
1549// Configure default settings normally set as part of read training\r
1550// Some defaults have to be set earlier as they may affect earlier\r
1551// training steps.\r
1552void default_timings(\r
1553 MRCParams_t *mrc_params)\r
1554{\r
1555 uint8_t ch, rk, bl;\r
1556\r
1557 for (ch = 0; ch < NUM_CHANNELS; ch++)\r
1558 {\r
1559 for (rk = 0; rk < NUM_RANKS; rk++)\r
1560 {\r
1561 for (bl = 0; bl < NUM_BYTE_LANES; bl++)\r
1562 {\r
1563 set_rdqs(ch, rk, bl, 24); // RDQS\r
1564 if (rk == 0)\r
1565 {\r
1566 set_vref(ch, bl, 32); // VREF (RANK0 only)\r
1567 }\r
1568 }\r
1569 }\r
1570 }\r
1571\r
1572 return;\r
1573}\r
1574\r