]>
Commit | Line | Data |
---|---|---|
f938d2c8 RR |
1 | /*P:900 This is the Switcher: code which sits at 0xFFC00000 to do the low-level |
2 | * Guest<->Host switch. It is as simple as it can be made, but it's naturally | |
3 | * very specific to x86. | |
4 | * | |
5 | * You have now completed Preparation. If this has whet your appetite; if you | |
6 | * are feeling invigorated and refreshed then the next, more challenging stage | |
7 | * can be found in "make Guest". :*/ | |
d7e28ffe | 8 | |
f8f0fdcd RR |
9 | /*S:100 |
10 | * Welcome to the Switcher itself! | |
11 | * | |
12 | * This file contains the low-level code which changes the CPU to run the Guest | |
13 | * code, and returns to the Host when something happens. Understand this, and | |
14 | * you understand the heart of our journey. | |
15 | * | |
16 | * Because this is in assembler rather than C, our tale switches from prose to | |
17 | * verse. First I tried limericks: | |
18 | * | |
19 | * There once was an eax reg, | |
20 | * To which our pointer was fed, | |
21 | * It needed an add, | |
22 | * Which asm-offsets.h had | |
23 | * But this limerick is hurting my head. | |
24 | * | |
25 | * Next I tried haikus, but fitting the required reference to the seasons in | |
26 | * every stanza was quickly becoming tiresome: | |
27 | * | |
28 | * The %eax reg | |
29 | * Holds "struct lguest_pages" now: | |
30 | * Cherry blossoms fall. | |
31 | * | |
32 | * Then I started with Heroic Verse, but the rhyming requirement leeched away | |
33 | * the content density and led to some uniquely awful oblique rhymes: | |
34 | * | |
35 | * These constants are coming from struct offsets | |
36 | * For use within the asm switcher text. | |
37 | * | |
38 | * Finally, I settled for something between heroic hexameter, and normal prose | |
39 | * with inappropriate linebreaks. Anyway, it aint no Shakespeare. | |
40 | */ | |
41 | ||
42 | // Not all kernel headers work from assembler | |
43 | // But these ones are needed: the ENTRY() define | |
44 | // And constants extracted from struct offsets | |
45 | // To avoid magic numbers and breakage: | |
46 | // Should they change the compiler can't save us | |
47 | // Down here in the depths of assembler code. | |
d7e28ffe RR |
48 | #include <linux/linkage.h> |
49 | #include <asm/asm-offsets.h> | |
0d027c01 | 50 | #include <asm/page.h> |
d7e28ffe RR |
51 | #include "lg.h" |
52 | ||
f8f0fdcd RR |
53 | // We mark the start of the code to copy |
54 | // It's placed in .text tho it's never run here | |
55 | // You'll see the trick macro at the end | |
56 | // Which interleaves data and text to effect. | |
d7e28ffe RR |
57 | .text |
58 | ENTRY(start_switcher_text) | |
59 | ||
f8f0fdcd RR |
60 | // When we reach switch_to_guest we have just left |
61 | // The safe and comforting shores of C code | |
62 | // %eax has the "struct lguest_pages" to use | |
63 | // Where we save state and still see it from the Guest | |
64 | // And %ebx holds the Guest shadow pagetable: | |
65 | // Once set we have truly left Host behind. | |
d7e28ffe | 66 | ENTRY(switch_to_guest) |
f8f0fdcd RR |
67 | // We told gcc all its regs could fade, |
68 | // Clobbered by our journey into the Guest | |
69 | // We could have saved them, if we tried | |
70 | // But time is our master and cycles count. | |
71 | ||
72 | // Segment registers must be saved for the Host | |
73 | // We push them on the Host stack for later | |
d7e28ffe RR |
74 | pushl %es |
75 | pushl %ds | |
76 | pushl %gs | |
77 | pushl %fs | |
f8f0fdcd RR |
78 | // But the compiler is fickle, and heeds |
79 | // No warning of %ebp clobbers | |
80 | // When frame pointers are used. That register | |
81 | // Must be saved and restored or chaos strikes. | |
d7e28ffe | 82 | pushl %ebp |
f8f0fdcd RR |
83 | // The Host's stack is done, now save it away |
84 | // In our "struct lguest_pages" at offset | |
85 | // Distilled into asm-offsets.h | |
d7e28ffe | 86 | movl %esp, LGUEST_PAGES_host_sp(%eax) |
f8f0fdcd RR |
87 | |
88 | // All saved and there's now five steps before us: | |
89 | // Stack, GDT, IDT, TSS | |
90 | // And last of all the page tables are flipped. | |
91 | ||
92 | // Yet beware that our stack pointer must be | |
93 | // Always valid lest an NMI hits | |
94 | // %edx does the duty here as we juggle | |
95 | // %eax is lguest_pages: our stack lies within. | |
d7e28ffe RR |
96 | movl %eax, %edx |
97 | addl $LGUEST_PAGES_regs, %edx | |
98 | movl %edx, %esp | |
f8f0fdcd RR |
99 | |
100 | // The Guest's GDT we so carefully | |
101 | // Placed in the "struct lguest_pages" before | |
d7e28ffe | 102 | lgdt LGUEST_PAGES_guest_gdt_desc(%eax) |
f8f0fdcd RR |
103 | |
104 | // The Guest's IDT we did partially | |
105 | // Move to the "struct lguest_pages" as well. | |
d7e28ffe | 106 | lidt LGUEST_PAGES_guest_idt_desc(%eax) |
f8f0fdcd RR |
107 | |
108 | // The TSS entry which controls traps | |
109 | // Must be loaded up with "ltr" now: | |
110 | // For after we switch over our page tables | |
111 | // It (as the rest) will be writable no more. | |
112 | // (The GDT entry TSS needs | |
113 | // Changes type when we load it: damn Intel!) | |
d7e28ffe RR |
114 | movl $(GDT_ENTRY_TSS*8), %edx |
115 | ltr %dx | |
f8f0fdcd RR |
116 | |
117 | // Look back now, before we take this last step! | |
118 | // The Host's TSS entry was also marked used; | |
119 | // Let's clear it again, ere we return. | |
120 | // The GDT descriptor of the Host | |
121 | // Points to the table after two "size" bytes | |
d7e28ffe | 122 | movl (LGUEST_PAGES_host_gdt_desc+2)(%eax), %edx |
f8f0fdcd | 123 | // Clear the type field of "used" (byte 5, bit 2) |
d7e28ffe | 124 | andb $0xFD, (GDT_ENTRY_TSS*8 + 5)(%edx) |
f8f0fdcd RR |
125 | |
126 | // Once our page table's switched, the Guest is live! | |
127 | // The Host fades as we run this final step. | |
128 | // Our "struct lguest_pages" is now read-only. | |
d7e28ffe | 129 | movl %ebx, %cr3 |
f8f0fdcd RR |
130 | |
131 | // The page table change did one tricky thing: | |
132 | // The Guest's register page has been mapped | |
133 | // Writable onto our %esp (stack) -- | |
134 | // We can simply pop off all Guest regs. | |
d7e28ffe RR |
135 | popl %ebx |
136 | popl %ecx | |
137 | popl %edx | |
138 | popl %esi | |
139 | popl %edi | |
140 | popl %ebp | |
141 | popl %gs | |
142 | popl %eax | |
143 | popl %fs | |
144 | popl %ds | |
145 | popl %es | |
f8f0fdcd RR |
146 | |
147 | // Near the base of the stack lurk two strange fields | |
148 | // Which we fill as we exit the Guest | |
149 | // These are the trap number and its error | |
150 | // We can simply step past them on our way. | |
d7e28ffe | 151 | addl $8, %esp |
f8f0fdcd RR |
152 | |
153 | // The last five stack slots hold return address | |
154 | // And everything needed to change privilege | |
155 | // Into the Guest privilege level of 1, | |
156 | // And the stack where the Guest had last left it. | |
157 | // Interrupts are turned back on: we are Guest. | |
d7e28ffe RR |
158 | iret |
159 | ||
f8f0fdcd RR |
160 | // There are two paths where we switch to the Host |
161 | // So we put the routine in a macro. | |
162 | // We are on our way home, back to the Host | |
163 | // Interrupted out of the Guest, we come here. | |
d7e28ffe | 164 | #define SWITCH_TO_HOST \ |
f8f0fdcd RR |
165 | /* We save the Guest state: all registers first \ |
166 | * Laid out just as "struct lguest_regs" defines */ \ | |
d7e28ffe RR |
167 | pushl %es; \ |
168 | pushl %ds; \ | |
169 | pushl %fs; \ | |
170 | pushl %eax; \ | |
171 | pushl %gs; \ | |
172 | pushl %ebp; \ | |
173 | pushl %edi; \ | |
174 | pushl %esi; \ | |
175 | pushl %edx; \ | |
176 | pushl %ecx; \ | |
177 | pushl %ebx; \ | |
f8f0fdcd RR |
178 | /* Our stack and our code are using segments \ |
179 | * Set in the TSS and IDT \ | |
180 | * Yet if we were to touch data we'd use \ | |
181 | * Whatever data segment the Guest had. \ | |
182 | * Load the lguest ds segment for now. */ \ | |
d7e28ffe RR |
183 | movl $(LGUEST_DS), %eax; \ |
184 | movl %eax, %ds; \ | |
f8f0fdcd | 185 | /* So where are we? Which CPU, which struct? \ |
0d027c01 RR |
186 | * The stack is our clue: our TSS starts \ |
187 | * It at the end of "struct lguest_pages". \ | |
188 | * Or we may have stumbled while restoring \ | |
189 | * Our Guest segment regs while in switch_to_guest, \ | |
190 | * The fault pushed atop that part-unwound stack. \ | |
191 | * If we round the stack down to the page start \ | |
192 | * We're at the start of "struct lguest_pages". */ \ | |
d7e28ffe | 193 | movl %esp, %eax; \ |
0d027c01 | 194 | andl $(~(1 << PAGE_SHIFT - 1)), %eax; \ |
f8f0fdcd RR |
195 | /* Save our trap number: the switch will obscure it \ |
196 | * (The Guest regs are not mapped here in the Host) \ | |
197 | * %ebx holds it safe for deliver_to_host */ \ | |
d7e28ffe | 198 | movl LGUEST_PAGES_regs_trapnum(%eax), %ebx; \ |
f8f0fdcd RR |
199 | /* The Host GDT, IDT and stack! \ |
200 | * All these lie safely hidden from the Guest: \ | |
201 | * We must return to the Host page tables \ | |
202 | * (Hence that was saved in struct lguest_pages) */ \ | |
d7e28ffe RR |
203 | movl LGUEST_PAGES_host_cr3(%eax), %edx; \ |
204 | movl %edx, %cr3; \ | |
f8f0fdcd RR |
205 | /* As before, when we looked back at the Host \ |
206 | * As we left and marked TSS unused \ | |
207 | * So must we now for the Guest left behind. */ \ | |
d7e28ffe | 208 | andb $0xFD, (LGUEST_PAGES_guest_gdt+GDT_ENTRY_TSS*8+5)(%eax); \ |
f8f0fdcd | 209 | /* Switch to Host's GDT, IDT. */ \ |
d7e28ffe RR |
210 | lgdt LGUEST_PAGES_host_gdt_desc(%eax); \ |
211 | lidt LGUEST_PAGES_host_idt_desc(%eax); \ | |
f8f0fdcd | 212 | /* Restore the Host's stack where it's saved regs lie */ \ |
d7e28ffe | 213 | movl LGUEST_PAGES_host_sp(%eax), %esp; \ |
f8f0fdcd | 214 | /* Last the TSS: our Host is complete */ \ |
d7e28ffe RR |
215 | movl $(GDT_ENTRY_TSS*8), %edx; \ |
216 | ltr %dx; \ | |
f8f0fdcd | 217 | /* Restore now the regs saved right at the first. */ \ |
d7e28ffe RR |
218 | popl %ebp; \ |
219 | popl %fs; \ | |
220 | popl %gs; \ | |
221 | popl %ds; \ | |
222 | popl %es | |
223 | ||
f8f0fdcd RR |
224 | // Here's where we come when the Guest has just trapped: |
225 | // (Which trap we'll see has been pushed on the stack). | |
226 | // We need only switch back, and the Host will decode | |
227 | // Why we came home, and what needs to be done. | |
d7e28ffe RR |
228 | return_to_host: |
229 | SWITCH_TO_HOST | |
230 | iret | |
231 | ||
f8f0fdcd RR |
232 | // An interrupt, with some cause external |
233 | // Has ajerked us rudely from the Guest's code | |
234 | // Again we must return home to the Host | |
d7e28ffe RR |
235 | deliver_to_host: |
236 | SWITCH_TO_HOST | |
f8f0fdcd RR |
237 | // But now we must go home via that place |
238 | // Where that interrupt was supposed to go | |
239 | // Had we not been ensconced, running the Guest. | |
240 | // Here we see the cleverness of our stack: | |
241 | // The Host stack is formed like an interrupt | |
242 | // With EIP, CS and EFLAGS layered. | |
243 | // Interrupt handlers end with "iret" | |
244 | // And that will take us home at long long last. | |
245 | ||
246 | // But first we must find the handler to call! | |
247 | // The IDT descriptor for the Host | |
248 | // Has two bytes for size, and four for address: | |
249 | // %edx will hold it for us for now. | |
d7e28ffe | 250 | movl (LGUEST_PAGES_host_idt_desc+2)(%eax), %edx |
f8f0fdcd RR |
251 | // We now know the table address we need, |
252 | // And saved the trap's number inside %ebx. | |
253 | // Yet the pointer to the handler is smeared | |
254 | // Across the bits of the table entry. | |
255 | // What oracle can tell us how to extract | |
256 | // From such a convoluted encoding? | |
257 | // I consulted gcc, and it gave | |
258 | // These instructions, which I gladly credit: | |
d7e28ffe RR |
259 | leal (%edx,%ebx,8), %eax |
260 | movzwl (%eax),%edx | |
261 | movl 4(%eax), %eax | |
262 | xorw %ax, %ax | |
263 | orl %eax, %edx | |
f8f0fdcd RR |
264 | // Now the address of the handler's in %edx |
265 | // We call it now: its "iret" takes us home. | |
d7e28ffe RR |
266 | jmp *%edx |
267 | ||
f8f0fdcd RR |
268 | // Every interrupt can come to us here |
269 | // But we must truly tell each apart. | |
270 | // They number two hundred and fifty six | |
271 | // And each must land in a different spot, | |
272 | // Push its number on stack, and join the stream. | |
273 | ||
274 | // And worse, a mere six of the traps stand apart | |
275 | // And push on their stack an addition: | |
276 | // An error number, thirty two bits long | |
277 | // So we punish the other two fifty | |
278 | // And make them push a zero so they match. | |
279 | ||
280 | // Yet two fifty six entries is long | |
281 | // And all will look most the same as the last | |
282 | // So we create a macro which can make | |
283 | // As many entries as we need to fill. | |
284 | ||
285 | // Note the change to .data then .text: | |
286 | // We plant the address of each entry | |
287 | // Into a (data) table for the Host | |
288 | // To know where each Guest interrupt should go. | |
d7e28ffe RR |
289 | .macro IRQ_STUB N TARGET |
290 | .data; .long 1f; .text; 1: | |
f8f0fdcd RR |
291 | // Trap eight, ten through fourteen and seventeen |
292 | // Supply an error number. Else zero. | |
d7e28ffe RR |
293 | .if (\N <> 8) && (\N < 10 || \N > 14) && (\N <> 17) |
294 | pushl $0 | |
295 | .endif | |
296 | pushl $\N | |
297 | jmp \TARGET | |
298 | ALIGN | |
299 | .endm | |
300 | ||
f8f0fdcd RR |
301 | // This macro creates numerous entries |
302 | // Using GAS macros which out-power C's. | |
d7e28ffe RR |
303 | .macro IRQ_STUBS FIRST LAST TARGET |
304 | irq=\FIRST | |
305 | .rept \LAST-\FIRST+1 | |
306 | IRQ_STUB irq \TARGET | |
307 | irq=irq+1 | |
308 | .endr | |
309 | .endm | |
310 | ||
f8f0fdcd RR |
311 | // Here's the marker for our pointer table |
312 | // Laid in the data section just before | |
313 | // Each macro places the address of code | |
314 | // Forming an array: each one points to text | |
315 | // Which handles interrupt in its turn. | |
d7e28ffe RR |
316 | .data |
317 | .global default_idt_entries | |
318 | default_idt_entries: | |
319 | .text | |
f8f0fdcd RR |
320 | // The first two traps go straight back to the Host |
321 | IRQ_STUBS 0 1 return_to_host | |
322 | // We'll say nothing, yet, about NMI | |
323 | IRQ_STUB 2 handle_nmi | |
324 | // Other traps also return to the Host | |
325 | IRQ_STUBS 3 31 return_to_host | |
326 | // All interrupts go via their handlers | |
327 | IRQ_STUBS 32 127 deliver_to_host | |
328 | // 'Cept system calls coming from userspace | |
329 | // Are to go to the Guest, never the Host. | |
330 | IRQ_STUB 128 return_to_host | |
331 | IRQ_STUBS 129 255 deliver_to_host | |
332 | ||
333 | // The NMI, what a fabulous beast | |
334 | // Which swoops in and stops us no matter that | |
335 | // We're suspended between heaven and hell, | |
336 | // (Or more likely between the Host and Guest) | |
337 | // When in it comes! We are dazed and confused | |
338 | // So we do the simplest thing which one can. | |
339 | // Though we've pushed the trap number and zero | |
340 | // We discard them, return, and hope we live. | |
d7e28ffe RR |
341 | handle_nmi: |
342 | addl $8, %esp | |
343 | iret | |
344 | ||
f8f0fdcd RR |
345 | // We are done; all that's left is Mastery |
346 | // And "make Mastery" is a journey long | |
347 | // Designed to make your fingers itch to code. | |
348 | ||
349 | // Here ends the text, the file and poem. | |
d7e28ffe | 350 | ENTRY(end_switcher_text) |