]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - arch/arm26/boot/compressed/head.S
Linux-2.6.12-rc2
[mirror_ubuntu-bionic-kernel.git] / arch / arm26 / boot / compressed / head.S
1 /*
2 * linux/arch/arm26/boot/compressed/head.S
3 *
4 * Copyright (C) 1996-2002 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10 #include <linux/config.h>
11 #include <linux/linkage.h>
12
13 /*
14 * Debugging stuff
15 *
16 * Note that these macros must not contain any code which is not
17 * 100% relocatable. Any attempt to do so will result in a crash.
18 * Please select one of the following when turning on debugging.
19 */
20
21 .macro kputc,val
22 mov r0, \val
23 bl putc
24 .endm
25
26 .macro kphex,val,len
27 mov r0, \val
28 mov r1, #\len
29 bl phex
30 .endm
31
32 .macro debug_reloc_start
33 .endm
34
35 .macro debug_reloc_end
36 .endm
37
38 .section ".start", #alloc, #execinstr
39 /*
40 * sort out different calling conventions
41 */
42 .align
43 start:
44 .type start,#function
45 .rept 8
46 mov r0, r0
47 .endr
48
49 b 1f
50 .word 0x016f2818 @ Magic numbers to help the loader
51 .word start @ absolute load/run zImage address
52 .word _edata @ zImage end address
53 1: mov r7, r1 @ save architecture ID
54 mov r8, #0 @ save r0
55 teqp pc, #0x0c000003 @ turn off interrupts
56
57 .text
58 adr r0, LC0
59 ldmia r0, {r1, r2, r3, r4, r5, r6, ip, sp}
60 subs r0, r0, r1 @ calculate the delta offset
61
62 teq r0, #0 @ if delta is zero, we're
63 beq not_relocated @ running at the address we
64 @ were linked at.
65
66 add r2, r2, r0 @ different address, so we
67 add r3, r3, r0 @ need to fix up various
68 add r5, r5, r0 @ pointers.
69 add r6, r6, r0
70 add ip, ip, r0
71 add sp, sp, r0
72
73 1: ldr r1, [r6, #0] @ relocate entries in the GOT
74 add r1, r1, r0 @ table. This fixes up the
75 str r1, [r6], #4 @ C references.
76 cmp r6, ip
77 blo 1b
78
79 not_relocated: mov r0, #0
80 1: str r0, [r2], #4 @ clear bss
81 str r0, [r2], #4
82 str r0, [r2], #4
83 str r0, [r2], #4
84 cmp r2, r3
85 blo 1b
86
87 bl cache_on
88
89 mov r1, sp @ malloc space above stack
90 add r2, sp, #0x10000 @ 64k max
91
92 /*
93 * Check to see if we will overwrite ourselves.
94 * r4 = final kernel address
95 * r5 = start of this image
96 * r2 = end of malloc space (and therefore this image)
97 * We basically want:
98 * r4 >= r2 -> OK
99 * r4 + image length <= r5 -> OK
100 */
101 cmp r4, r2
102 bhs wont_overwrite
103 add r0, r4, #4096*1024 @ 4MB largest kernel size
104 cmp r0, r5
105 bls wont_overwrite
106
107 mov r5, r2 @ decompress after malloc space
108 mov r0, r5
109 mov r3, r7
110 bl decompress_kernel
111
112 add r0, r0, #127
113 bic r0, r0, #127 @ align the kernel length
114 /*
115 * r0 = decompressed kernel length
116 * r1-r3 = unused
117 * r4 = kernel execution address
118 * r5 = decompressed kernel start
119 * r6 = processor ID
120 * r7 = architecture ID
121 * r8-r14 = unused
122 */
123 add r1, r5, r0 @ end of decompressed kernel
124 adr r2, reloc_start
125 ldr r3, LC1
126 add r3, r2, r3
127 1: ldmia r2!, {r8 - r13} @ copy relocation code
128 stmia r1!, {r8 - r13}
129 ldmia r2!, {r8 - r13}
130 stmia r1!, {r8 - r13}
131 cmp r2, r3
132 blo 1b
133
134 bl cache_clean_flush
135 add pc, r5, r0 @ call relocation code
136
137 /*
138 * We're not in danger of overwriting ourselves. Do this the simple way.
139 *
140 * r4 = kernel execution address
141 * r7 = architecture ID
142 */
143 wont_overwrite: mov r0, r4
144 mov r3, r7
145 bl decompress_kernel
146 b call_kernel
147
148 .type LC0, #object
149 LC0: .word LC0 @ r1
150 .word __bss_start @ r2
151 .word _end @ r3
152 .word _load_addr @ r4
153 .word _start @ r5
154 .word _got_start @ r6
155 .word _got_end @ ip
156 .word user_stack+4096 @ sp
157 LC1: .word reloc_end - reloc_start
158 .size LC0, . - LC0
159
160 /*
161 * Turn on the cache. We need to setup some page tables so that we
162 * can have both the I and D caches on.
163 *
164 * We place the page tables 16k down from the kernel execution address,
165 * and we hope that nothing else is using it. If we're using it, we
166 * will go pop!
167 *
168 * On entry,
169 * r4 = kernel execution address
170 * r6 = processor ID
171 * r7 = architecture number
172 * r8 = run-time address of "start"
173 * On exit,
174 * r1, r2, r3, r8, r9, r12 corrupted
175 * This routine must preserve:
176 * r4, r5, r6, r7
177 */
178 .align 5
179 cache_on: mov r3, #8 @ cache_on function
180 b call_cache_fn
181
182 __setup_mmu: sub r3, r4, #16384 @ Page directory size
183 bic r3, r3, #0xff @ Align the pointer
184 bic r3, r3, #0x3f00
185 /*
186 * Initialise the page tables, turning on the cacheable and bufferable
187 * bits for the RAM area only.
188 */
189 mov r0, r3
190 mov r8, r0, lsr #18
191 mov r8, r8, lsl #18 @ start of RAM
192 add r9, r8, #0x10000000 @ a reasonable RAM size
193 mov r1, #0x12
194 orr r1, r1, #3 << 10
195 add r2, r3, #16384
196 1: cmp r1, r8 @ if virt > start of RAM
197 orrhs r1, r1, #0x0c @ set cacheable, bufferable
198 cmp r1, r9 @ if virt > end of RAM
199 bichs r1, r1, #0x0c @ clear cacheable, bufferable
200 str r1, [r0], #4 @ 1:1 mapping
201 add r1, r1, #1048576
202 teq r0, r2
203 bne 1b
204 /*
205 * If ever we are running from Flash, then we surely want the cache
206 * to be enabled also for our execution instance... We map 2MB of it
207 * so there is no map overlap problem for up to 1 MB compressed kernel.
208 * If the execution is in RAM then we would only be duplicating the above.
209 */
210 mov r1, #0x1e
211 orr r1, r1, #3 << 10
212 mov r2, pc, lsr #20
213 orr r1, r1, r2, lsl #20
214 add r0, r3, r2, lsl #2
215 str r1, [r0], #4
216 add r1, r1, #1048576
217 str r1, [r0]
218 mov pc, lr
219
220 __armv4_cache_on:
221 mov r12, lr
222 bl __setup_mmu
223 mov r0, #0
224 mcr p15, 0, r0, c7, c10, 4 @ drain write buffer
225 mcr p15, 0, r0, c8, c7, 0 @ flush I,D TLBs
226 mrc p15, 0, r0, c1, c0, 0 @ read control reg
227 orr r0, r0, #0x1000 @ I-cache enable
228 orr r0, r0, #0x0030
229 b __common_cache_on
230
231 __arm6_cache_on:
232 mov r12, lr
233 bl __setup_mmu
234 mov r0, #0
235 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3
236 mcr p15, 0, r0, c5, c0, 0 @ invalidate whole TLB v3
237 mov r0, #0x30
238 __common_cache_on:
239 #ifndef DEBUG
240 orr r0, r0, #0x000d @ Write buffer, mmu
241 #endif
242 mov r1, #-1
243 mcr p15, 0, r3, c2, c0, 0 @ load page table pointer
244 mcr p15, 0, r1, c3, c0, 0 @ load domain access control
245 mcr p15, 0, r0, c1, c0, 0 @ load control register
246 mov pc, r12
247
248 /*
249 * All code following this line is relocatable. It is relocated by
250 * the above code to the end of the decompressed kernel image and
251 * executed there. During this time, we have no stacks.
252 *
253 * r0 = decompressed kernel length
254 * r1-r3 = unused
255 * r4 = kernel execution address
256 * r5 = decompressed kernel start
257 * r6 = processor ID
258 * r7 = architecture ID
259 * r8-r14 = unused
260 */
261 .align 5
262 reloc_start: add r8, r5, r0
263 debug_reloc_start
264 mov r1, r4
265 1:
266 .rept 4
267 ldmia r5!, {r0, r2, r3, r9 - r13} @ relocate kernel
268 stmia r1!, {r0, r2, r3, r9 - r13}
269 .endr
270
271 cmp r5, r8
272 blo 1b
273 debug_reloc_end
274
275 call_kernel: bl cache_clean_flush
276 bl cache_off
277 mov r0, #0
278 mov r1, r7 @ restore architecture number
279 mov pc, r4 @ call kernel
280
281 /*
282 * Here follow the relocatable cache support functions for the
283 * various processors. This is a generic hook for locating an
284 * entry and jumping to an instruction at the specified offset
285 * from the start of the block. Please note this is all position
286 * independent code.
287 *
288 * r1 = corrupted
289 * r2 = corrupted
290 * r3 = block offset
291 * r6 = corrupted
292 * r12 = corrupted
293 */
294
295 call_cache_fn: adr r12, proc_types
296 mrc p15, 0, r6, c0, c0 @ get processor ID
297 1: ldr r1, [r12, #0] @ get value
298 ldr r2, [r12, #4] @ get mask
299 eor r1, r1, r6 @ (real ^ match)
300 tst r1, r2 @ & mask
301 addeq pc, r12, r3 @ call cache function
302 add r12, r12, #4*5
303 b 1b
304
305 /*
306 * Table for cache operations. This is basically:
307 * - CPU ID match
308 * - CPU ID mask
309 * - 'cache on' method instruction
310 * - 'cache off' method instruction
311 * - 'cache flush' method instruction
312 *
313 * We match an entry using: ((real_id ^ match) & mask) == 0
314 *
315 * Writethrough caches generally only need 'on' and 'off'
316 * methods. Writeback caches _must_ have the flush method
317 * defined.
318 */
319 .type proc_types,#object
320 proc_types:
321 .word 0x41560600 @ ARM6/610
322 .word 0xffffffe0
323 b __arm6_cache_off @ works, but slow
324 b __arm6_cache_off
325 mov pc, lr
326 @ b __arm6_cache_on @ untested
327 @ b __arm6_cache_off
328 @ b __armv3_cache_flush
329
330 .word 0x41007000 @ ARM7/710
331 .word 0xfff8fe00
332 b __arm7_cache_off
333 b __arm7_cache_off
334 mov pc, lr
335
336 .word 0x41807200 @ ARM720T (writethrough)
337 .word 0xffffff00
338 b __armv4_cache_on
339 b __armv4_cache_off
340 mov pc, lr
341
342 .word 0x41129200 @ ARM920T
343 .word 0xff00fff0
344 b __armv4_cache_on
345 b __armv4_cache_off
346 b __armv4_cache_flush
347
348 .word 0x4401a100 @ sa110 / sa1100
349 .word 0xffffffe0
350 b __armv4_cache_on
351 b __armv4_cache_off
352 b __armv4_cache_flush
353
354 .word 0x6901b110 @ sa1110
355 .word 0xfffffff0
356 b __armv4_cache_on
357 b __armv4_cache_off
358 b __armv4_cache_flush
359
360 .word 0x69050000 @ xscale
361 .word 0xffff0000
362 b __armv4_cache_on
363 b __armv4_cache_off
364 b __armv4_cache_flush
365
366 .word 0 @ unrecognised type
367 .word 0
368 mov pc, lr
369 mov pc, lr
370 mov pc, lr
371
372 .size proc_types, . - proc_types
373
374 /*
375 * Turn off the Cache and MMU. ARMv3 does not support
376 * reading the control register, but ARMv4 does.
377 *
378 * On entry, r6 = processor ID
379 * On exit, r0, r1, r2, r3, r12 corrupted
380 * This routine must preserve: r4, r6, r7
381 */
382 .align 5
383 cache_off: mov r3, #12 @ cache_off function
384 b call_cache_fn
385
386 __armv4_cache_off:
387 mrc p15, 0, r0, c1, c0
388 bic r0, r0, #0x000d
389 mcr p15, 0, r0, c1, c0 @ turn MMU and cache off
390 mov r0, #0
391 mcr p15, 0, r0, c7, c7 @ invalidate whole cache v4
392 mcr p15, 0, r0, c8, c7 @ invalidate whole TLB v4
393 mov pc, lr
394
395 __arm6_cache_off:
396 mov r0, #0x00000030 @ ARM6 control reg.
397 b __armv3_cache_off
398
399 __arm7_cache_off:
400 mov r0, #0x00000070 @ ARM7 control reg.
401 b __armv3_cache_off
402
403 __armv3_cache_off:
404 mcr p15, 0, r0, c1, c0, 0 @ turn MMU and cache off
405 mov r0, #0
406 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3
407 mcr p15, 0, r0, c5, c0, 0 @ invalidate whole TLB v3
408 mov pc, lr
409
410 /*
411 * Clean and flush the cache to maintain consistency.
412 *
413 * On entry,
414 * r6 = processor ID
415 * On exit,
416 * r1, r2, r3, r12 corrupted
417 * This routine must preserve:
418 * r0, r4, r5, r6, r7
419 */
420 .align 5
421 cache_clean_flush:
422 mov r3, #16
423 b call_cache_fn
424
425 __armv4_cache_flush:
426 bic r1, pc, #31
427 add r2, r1, #65536 @ 2x the largest dcache size
428 1: ldr r12, [r1], #32 @ s/w flush D cache
429 teq r1, r2
430 bne 1b
431
432 mcr p15, 0, r1, c7, c7, 0 @ flush I cache
433 mcr p15, 0, r1, c7, c10, 4 @ drain WB
434 mov pc, lr
435
436 __armv3_cache_flush:
437 mov r1, #0
438 mcr p15, 0, r0, c7, c0, 0 @ invalidate whole cache v3
439 mov pc, lr
440
441 /*
442 * Various debugging routines for printing hex characters and
443 * memory, which again must be relocatable.
444 */
445 #ifdef DEBUG
446 .type phexbuf,#object
447 phexbuf: .space 12
448 .size phexbuf, . - phexbuf
449
450 phex: adr r3, phexbuf
451 mov r2, #0
452 strb r2, [r3, r1]
453 1: subs r1, r1, #1
454 movmi r0, r3
455 bmi puts
456 and r2, r0, #15
457 mov r0, r0, lsr #4
458 cmp r2, #10
459 addge r2, r2, #7
460 add r2, r2, #'0'
461 strb r2, [r3, r1]
462 b 1b
463
464 puts: loadsp r3
465 1: ldrb r2, [r0], #1
466 teq r2, #0
467 moveq pc, lr
468 2: writeb r2
469 mov r1, #0x00020000
470 3: subs r1, r1, #1
471 bne 3b
472 teq r2, #'\n'
473 moveq r2, #'\r'
474 beq 2b
475 teq r0, #0
476 bne 1b
477 mov pc, lr
478 putc:
479 mov r2, r0
480 mov r0, #0
481 loadsp r3
482 b 2b
483
484 memdump: mov r12, r0
485 mov r10, lr
486 mov r11, #0
487 2: mov r0, r11, lsl #2
488 add r0, r0, r12
489 mov r1, #8
490 bl phex
491 mov r0, #':'
492 bl putc
493 1: mov r0, #' '
494 bl putc
495 ldr r0, [r12, r11, lsl #2]
496 mov r1, #8
497 bl phex
498 and r0, r11, #7
499 teq r0, #3
500 moveq r0, #' '
501 bleq putc
502 and r0, r11, #7
503 add r11, r11, #1
504 teq r0, #7
505 bne 1b
506 mov r0, #'\n'
507 bl putc
508 cmp r11, #64
509 blt 2b
510 mov pc, r10
511 #endif
512
513 reloc_end:
514
515 .align
516 .section ".stack", "aw"
517 user_stack: .space 4096