]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame - arch/ia64/kernel/unaligned.c
ia64: move ia64_done_with_exception out of asm/uaccess.h
[mirror_ubuntu-artful-kernel.git] / arch / ia64 / kernel / unaligned.c
CommitLineData
1da177e4
LT
1/*
2 * Architecture-specific unaligned trap handling.
3 *
4 * Copyright (C) 1999-2002, 2004 Hewlett-Packard Co
5 * Stephane Eranian <eranian@hpl.hp.com>
6 * David Mosberger-Tang <davidm@hpl.hp.com>
7 *
8 * 2002/12/09 Fix rotating register handling (off-by-1 error, missing fr-rotation). Fix
9 * get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame
10 * stacked register returns an undefined value; it does NOT trigger a
11 * "rsvd register fault").
12 * 2001/10/11 Fix unaligned access to rotating registers in s/w pipelined loops.
13 * 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes.
14 * 2001/01/17 Add support emulation of unaligned kernel accesses.
15 */
5cf1f7ce 16#include <linux/jiffies.h>
1da177e4
LT
17#include <linux/kernel.h>
18#include <linux/sched.h>
1da177e4 19#include <linux/tty.h>
7683a3f9 20#include <linux/ratelimit.h>
82ed1ac9 21#include <linux/uaccess.h>
1da177e4
LT
22
23#include <asm/intrinsics.h>
24#include <asm/processor.h>
25#include <asm/rse.h>
82ed1ac9 26#include <asm/exception.h>
1da177e4
LT
27#include <asm/unaligned.h>
28
620de2f5 29extern int die_if_kernel(char *str, struct pt_regs *regs, long err);
1da177e4
LT
30
31#undef DEBUG_UNALIGNED_TRAP
32
33#ifdef DEBUG_UNALIGNED_TRAP
d4ed8084 34# define DPRINT(a...) do { printk("%s %u: ", __func__, __LINE__); printk (a); } while (0)
1da177e4
LT
35# define DDUMP(str,vp,len) dump(str, vp, len)
36
37static void
38dump (const char *str, void *vp, size_t len)
39{
40 unsigned char *cp = vp;
41 int i;
42
43 printk("%s", str);
44 for (i = 0; i < len; ++i)
45 printk (" %02x", *cp++);
46 printk("\n");
47}
48#else
49# define DPRINT(a...)
50# define DDUMP(str,vp,len)
51#endif
52
53#define IA64_FIRST_STACKED_GR 32
54#define IA64_FIRST_ROTATING_FR 32
55#define SIGN_EXT9 0xffffffffffffff00ul
56
d2b176ed
JS
57/*
58 * sysctl settable hook which tells the kernel whether to honor the
59 * IA64_THREAD_UAC_NOPRINT prctl. Because this is user settable, we want
60 * to allow the super user to enable/disable this for security reasons
61 * (i.e. don't allow attacker to fill up logs with unaligned accesses).
62 */
63int no_unaligned_warning;
88fc241f 64int unaligned_dump_stack;
d2b176ed 65
1da177e4
LT
66/*
67 * For M-unit:
68 *
69 * opcode | m | x6 |
70 * --------|------|---------|
71 * [40-37] | [36] | [35:30] |
72 * --------|------|---------|
73 * 4 | 1 | 6 | = 11 bits
74 * --------------------------
75 * However bits [31:30] are not directly useful to distinguish between
76 * load/store so we can use [35:32] instead, which gives the following
77 * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
78 * checking the m-bit until later in the load/store emulation.
79 */
80#define IA64_OPCODE_MASK 0x1ef
81#define IA64_OPCODE_SHIFT 32
82
83/*
84 * Table C-28 Integer Load/Store
85 *
86 * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
87 *
88 * ld8.fill, st8.fill MUST be aligned because the RNATs are based on
89 * the address (bits [8:3]), so we must failed.
90 */
91#define LD_OP 0x080
92#define LDS_OP 0x081
93#define LDA_OP 0x082
94#define LDSA_OP 0x083
95#define LDBIAS_OP 0x084
96#define LDACQ_OP 0x085
97/* 0x086, 0x087 are not relevant */
98#define LDCCLR_OP 0x088
99#define LDCNC_OP 0x089
100#define LDCCLRACQ_OP 0x08a
101#define ST_OP 0x08c
102#define STREL_OP 0x08d
103/* 0x08e,0x8f are not relevant */
104
105/*
106 * Table C-29 Integer Load +Reg
107 *
108 * we use the ld->m (bit [36:36]) field to determine whether or not we have
109 * a load/store of this form.
110 */
111
112/*
113 * Table C-30 Integer Load/Store +Imm
114 *
115 * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
116 *
117 * ld8.fill, st8.fill must be aligned because the Nat register are based on
118 * the address, so we must fail and the program must be fixed.
119 */
120#define LD_IMM_OP 0x0a0
121#define LDS_IMM_OP 0x0a1
122#define LDA_IMM_OP 0x0a2
123#define LDSA_IMM_OP 0x0a3
124#define LDBIAS_IMM_OP 0x0a4
125#define LDACQ_IMM_OP 0x0a5
126/* 0x0a6, 0xa7 are not relevant */
127#define LDCCLR_IMM_OP 0x0a8
128#define LDCNC_IMM_OP 0x0a9
129#define LDCCLRACQ_IMM_OP 0x0aa
130#define ST_IMM_OP 0x0ac
131#define STREL_IMM_OP 0x0ad
132/* 0x0ae,0xaf are not relevant */
133
134/*
135 * Table C-32 Floating-point Load/Store
136 */
137#define LDF_OP 0x0c0
138#define LDFS_OP 0x0c1
139#define LDFA_OP 0x0c2
140#define LDFSA_OP 0x0c3
141/* 0x0c6 is irrelevant */
142#define LDFCCLR_OP 0x0c8
143#define LDFCNC_OP 0x0c9
144/* 0x0cb is irrelevant */
145#define STF_OP 0x0cc
146
147/*
148 * Table C-33 Floating-point Load +Reg
149 *
150 * we use the ld->m (bit [36:36]) field to determine whether or not we have
151 * a load/store of this form.
152 */
153
154/*
155 * Table C-34 Floating-point Load/Store +Imm
156 */
157#define LDF_IMM_OP 0x0e0
158#define LDFS_IMM_OP 0x0e1
159#define LDFA_IMM_OP 0x0e2
160#define LDFSA_IMM_OP 0x0e3
161/* 0x0e6 is irrelevant */
162#define LDFCCLR_IMM_OP 0x0e8
163#define LDFCNC_IMM_OP 0x0e9
164#define STF_IMM_OP 0x0ec
165
166typedef struct {
167 unsigned long qp:6; /* [0:5] */
168 unsigned long r1:7; /* [6:12] */
169 unsigned long imm:7; /* [13:19] */
170 unsigned long r3:7; /* [20:26] */
171 unsigned long x:1; /* [27:27] */
172 unsigned long hint:2; /* [28:29] */
173 unsigned long x6_sz:2; /* [30:31] */
174 unsigned long x6_op:4; /* [32:35], x6 = x6_sz|x6_op */
175 unsigned long m:1; /* [36:36] */
176 unsigned long op:4; /* [37:40] */
177 unsigned long pad:23; /* [41:63] */
178} load_store_t;
179
180
181typedef enum {
182 UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */
183 UPD_REG /* ldXZ r1=[r3],r2 */
184} update_t;
185
186/*
187 * We use tables to keep track of the offsets of registers in the saved state.
188 * This way we save having big switch/case statements.
189 *
190 * We use bit 0 to indicate switch_stack or pt_regs.
191 * The offset is simply shifted by 1 bit.
192 * A 2-byte value should be enough to hold any kind of offset
193 *
194 * In case the calling convention changes (and thus pt_regs/switch_stack)
195 * simply use RSW instead of RPT or vice-versa.
196 */
197
198#define RPO(x) ((size_t) &((struct pt_regs *)0)->x)
199#define RSO(x) ((size_t) &((struct switch_stack *)0)->x)
200
201#define RPT(x) (RPO(x) << 1)
202#define RSW(x) (1| RSO(x)<<1)
203
204#define GR_OFFS(x) (gr_info[x]>>1)
205#define GR_IN_SW(x) (gr_info[x] & 0x1)
206
207#define FR_OFFS(x) (fr_info[x]>>1)
208#define FR_IN_SW(x) (fr_info[x] & 0x1)
209
210static u16 gr_info[32]={
211 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */
212
213 RPT(r1), RPT(r2), RPT(r3),
214
215 RSW(r4), RSW(r5), RSW(r6), RSW(r7),
216
217 RPT(r8), RPT(r9), RPT(r10), RPT(r11),
218 RPT(r12), RPT(r13), RPT(r14), RPT(r15),
219
220 RPT(r16), RPT(r17), RPT(r18), RPT(r19),
221 RPT(r20), RPT(r21), RPT(r22), RPT(r23),
222 RPT(r24), RPT(r25), RPT(r26), RPT(r27),
223 RPT(r28), RPT(r29), RPT(r30), RPT(r31)
224};
225
226static u16 fr_info[32]={
227 0, /* constant : WE SHOULD NEVER GET THIS */
228 0, /* constant : WE SHOULD NEVER GET THIS */
229
230 RSW(f2), RSW(f3), RSW(f4), RSW(f5),
231
232 RPT(f6), RPT(f7), RPT(f8), RPT(f9),
233 RPT(f10), RPT(f11),
234
235 RSW(f12), RSW(f13), RSW(f14),
236 RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
237 RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
238 RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
239 RSW(f30), RSW(f31)
240};
241
242/* Invalidate ALAT entry for integer register REGNO. */
243static void
244invala_gr (int regno)
245{
246# define F(reg) case reg: ia64_invala_gr(reg); break
247
248 switch (regno) {
249 F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
250 F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
251 F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
252 F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
253 F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
254 F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
255 F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
256 F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
257 F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
258 F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
259 F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
260 F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
261 F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
262 F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
263 F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
264 F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
265 }
266# undef F
267}
268
269/* Invalidate ALAT entry for floating-point register REGNO. */
270static void
271invala_fr (int regno)
272{
273# define F(reg) case reg: ia64_invala_fr(reg); break
274
275 switch (regno) {
276 F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
277 F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
278 F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
279 F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
280 F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
281 F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
282 F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
283 F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
284 F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
285 F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
286 F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
287 F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
288 F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
289 F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
290 F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
291 F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
292 }
293# undef F
294}
295
296static inline unsigned long
297rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg)
298{
299 reg += rrb;
300 if (reg >= sor)
301 reg -= sor;
302 return reg;
303}
304
305static void
306set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
307{
308 struct switch_stack *sw = (struct switch_stack *) regs - 1;
309 unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
310 unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
311 unsigned long rnats, nat_mask;
312 unsigned long on_kbs;
313 long sof = (regs->cr_ifs) & 0x7f;
314 long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
315 long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
316 long ridx = r1 - 32;
317
318 if (ridx >= sof) {
319 /* this should never happen, as the "rsvd register fault" has higher priority */
320 DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
321 return;
322 }
323
324 if (ridx < sor)
325 ridx = rotate_reg(sor, rrb_gr, ridx);
326
327 DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
328 r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
329
330 on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
331 addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
332 if (addr >= kbs) {
333 /* the register is on the kernel backing store: easy... */
334 rnat_addr = ia64_rse_rnat_addr(addr);
335 if ((unsigned long) rnat_addr >= sw->ar_bspstore)
336 rnat_addr = &sw->ar_rnat;
337 nat_mask = 1UL << ia64_rse_slot_num(addr);
338
339 *addr = val;
340 if (nat)
341 *rnat_addr |= nat_mask;
342 else
343 *rnat_addr &= ~nat_mask;
344 return;
345 }
346
347 if (!user_stack(current, regs)) {
348 DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1);
349 return;
350 }
351
352 bspstore = (unsigned long *)regs->ar_bspstore;
353 ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
354 bsp = ia64_rse_skip_regs(ubs_end, -sof);
355 addr = ia64_rse_skip_regs(bsp, ridx);
356
357 DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
358
359 ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
360
361 rnat_addr = ia64_rse_rnat_addr(addr);
362
363 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
364 DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n",
365 (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1);
366
367 nat_mask = 1UL << ia64_rse_slot_num(addr);
368 if (nat)
369 rnats |= nat_mask;
370 else
371 rnats &= ~nat_mask;
372 ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats);
373
374 DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats);
375}
376
377
378static void
379get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat)
380{
381 struct switch_stack *sw = (struct switch_stack *) regs - 1;
382 unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
383 unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
384 unsigned long rnats, nat_mask;
385 unsigned long on_kbs;
386 long sof = (regs->cr_ifs) & 0x7f;
387 long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
388 long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
389 long ridx = r1 - 32;
390
391 if (ridx >= sof) {
392 /* read of out-of-frame register returns an undefined value; 0 in our case. */
393 DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
394 goto fail;
395 }
396
397 if (ridx < sor)
398 ridx = rotate_reg(sor, rrb_gr, ridx);
399
400 DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
401 r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
402
403 on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
404 addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
405 if (addr >= kbs) {
406 /* the register is on the kernel backing store: easy... */
407 *val = *addr;
408 if (nat) {
409 rnat_addr = ia64_rse_rnat_addr(addr);
410 if ((unsigned long) rnat_addr >= sw->ar_bspstore)
411 rnat_addr = &sw->ar_rnat;
412 nat_mask = 1UL << ia64_rse_slot_num(addr);
413 *nat = (*rnat_addr & nat_mask) != 0;
414 }
415 return;
416 }
417
418 if (!user_stack(current, regs)) {
419 DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
420 goto fail;
421 }
422
423 bspstore = (unsigned long *)regs->ar_bspstore;
424 ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
425 bsp = ia64_rse_skip_regs(ubs_end, -sof);
426 addr = ia64_rse_skip_regs(bsp, ridx);
427
428 DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
429
430 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
431
432 if (nat) {
433 rnat_addr = ia64_rse_rnat_addr(addr);
434 nat_mask = 1UL << ia64_rse_slot_num(addr);
435
436 DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats);
437
438 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
439 *nat = (rnats & nat_mask) != 0;
440 }
441 return;
442
443 fail:
444 *val = 0;
445 if (nat)
446 *nat = 0;
447 return;
448}
449
450
451static void
452setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
453{
454 struct switch_stack *sw = (struct switch_stack *) regs - 1;
455 unsigned long addr;
456 unsigned long bitmask;
457 unsigned long *unat;
458
459 /*
460 * First takes care of stacked registers
461 */
462 if (regnum >= IA64_FIRST_STACKED_GR) {
463 set_rse_reg(regs, regnum, val, nat);
464 return;
465 }
466
467 /*
468 * Using r0 as a target raises a General Exception fault which has higher priority
469 * than the Unaligned Reference fault.
470 */
471
472 /*
473 * Now look at registers in [0-31] range and init correct UNAT
474 */
475 if (GR_IN_SW(regnum)) {
476 addr = (unsigned long)sw;
477 unat = &sw->ar_unat;
478 } else {
479 addr = (unsigned long)regs;
480 unat = &sw->caller_unat;
481 }
482 DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
483 addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
484 /*
485 * add offset from base of struct
486 * and do it !
487 */
488 addr += GR_OFFS(regnum);
489
490 *(unsigned long *)addr = val;
491
492 /*
493 * We need to clear the corresponding UNAT bit to fully emulate the load
494 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
495 */
496 bitmask = 1UL << (addr >> 3 & 0x3f);
497 DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat);
498 if (nat) {
499 *unat |= bitmask;
500 } else {
501 *unat &= ~bitmask;
502 }
503 DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
504}
505
506/*
507 * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
508 * range from 32-127, result is in the range from 0-95.
509 */
510static inline unsigned long
511fph_index (struct pt_regs *regs, long regnum)
512{
513 unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
514 return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
515}
516
517static void
518setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
519{
520 struct switch_stack *sw = (struct switch_stack *)regs - 1;
521 unsigned long addr;
522
523 /*
524 * From EAS-2.5: FPDisableFault has higher priority than Unaligned
525 * Fault. Thus, when we get here, we know the partition is enabled.
526 * To update f32-f127, there are three choices:
527 *
528 * (1) save f32-f127 to thread.fph and update the values there
529 * (2) use a gigantic switch statement to directly access the registers
530 * (3) generate code on the fly to update the desired register
531 *
532 * For now, we are using approach (1).
533 */
534 if (regnum >= IA64_FIRST_ROTATING_FR) {
535 ia64_sync_fph(current);
536 current->thread.fph[fph_index(regs, regnum)] = *fpval;
537 } else {
538 /*
539 * pt_regs or switch_stack ?
540 */
541 if (FR_IN_SW(regnum)) {
542 addr = (unsigned long)sw;
543 } else {
544 addr = (unsigned long)regs;
545 }
546
547 DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));
548
549 addr += FR_OFFS(regnum);
550 *(struct ia64_fpreg *)addr = *fpval;
551
552 /*
553 * mark the low partition as being used now
554 *
555 * It is highly unlikely that this bit is not already set, but
556 * let's do it for safety.
557 */
558 regs->cr_ipsr |= IA64_PSR_MFL;
559 }
560}
561
562/*
563 * Those 2 inline functions generate the spilled versions of the constant floating point
564 * registers which can be used with stfX
565 */
566static inline void
567float_spill_f0 (struct ia64_fpreg *final)
568{
569 ia64_stf_spill(final, 0);
570}
571
572static inline void
573float_spill_f1 (struct ia64_fpreg *final)
574{
575 ia64_stf_spill(final, 1);
576}
577
578static void
579getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
580{
581 struct switch_stack *sw = (struct switch_stack *) regs - 1;
582 unsigned long addr;
583
584 /*
585 * From EAS-2.5: FPDisableFault has higher priority than
586 * Unaligned Fault. Thus, when we get here, we know the partition is
587 * enabled.
588 *
589 * When regnum > 31, the register is still live and we need to force a save
590 * to current->thread.fph to get access to it. See discussion in setfpreg()
591 * for reasons and other ways of doing this.
592 */
593 if (regnum >= IA64_FIRST_ROTATING_FR) {
594 ia64_flush_fph(current);
595 *fpval = current->thread.fph[fph_index(regs, regnum)];
596 } else {
597 /*
598 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
599 * not saved, we must generate their spilled form on the fly
600 */
601 switch(regnum) {
602 case 0:
603 float_spill_f0(fpval);
604 break;
605 case 1:
606 float_spill_f1(fpval);
607 break;
608 default:
609 /*
610 * pt_regs or switch_stack ?
611 */
612 addr = FR_IN_SW(regnum) ? (unsigned long)sw
613 : (unsigned long)regs;
614
615 DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",
616 FR_IN_SW(regnum), addr, FR_OFFS(regnum));
617
618 addr += FR_OFFS(regnum);
619 *fpval = *(struct ia64_fpreg *)addr;
620 }
621 }
622}
623
624
625static void
626getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
627{
628 struct switch_stack *sw = (struct switch_stack *) regs - 1;
629 unsigned long addr, *unat;
630
631 if (regnum >= IA64_FIRST_STACKED_GR) {
632 get_rse_reg(regs, regnum, val, nat);
633 return;
634 }
635
636 /*
637 * take care of r0 (read-only always evaluate to 0)
638 */
639 if (regnum == 0) {
640 *val = 0;
641 if (nat)
642 *nat = 0;
643 return;
644 }
645
646 /*
647 * Now look at registers in [0-31] range and init correct UNAT
648 */
649 if (GR_IN_SW(regnum)) {
650 addr = (unsigned long)sw;
651 unat = &sw->ar_unat;
652 } else {
653 addr = (unsigned long)regs;
654 unat = &sw->caller_unat;
655 }
656
657 DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum));
658
659 addr += GR_OFFS(regnum);
660
661 *val = *(unsigned long *)addr;
662
663 /*
664 * do it only when requested
665 */
666 if (nat)
667 *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;
668}
669
670static void
671emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa)
672{
673 /*
674 * IMPORTANT:
675 * Given the way we handle unaligned speculative loads, we should
676 * not get to this point in the code but we keep this sanity check,
677 * just in case.
678 */
679 if (ld.x6_op == 1 || ld.x6_op == 3) {
d4ed8084 680 printk(KERN_ERR "%s: register update on speculative load, error\n", __func__);
620de2f5
JB
681 if (die_if_kernel("unaligned reference on speculative load with register update\n",
682 regs, 30))
683 return;
1da177e4
LT
684 }
685
686
687 /*
688 * at this point, we know that the base register to update is valid i.e.,
689 * it's not r0
690 */
691 if (type == UPD_IMMEDIATE) {
692 unsigned long imm;
693
694 /*
695 * Load +Imm: ldXZ r1=[r3],imm(9)
696 *
697 *
698 * form imm9: [13:19] contain the first 7 bits
699 */
700 imm = ld.x << 7 | ld.imm;
701
702 /*
703 * sign extend (1+8bits) if m set
704 */
705 if (ld.m) imm |= SIGN_EXT9;
706
707 /*
708 * ifa == r3 and we know that the NaT bit on r3 was clear so
709 * we can directly use ifa.
710 */
711 ifa += imm;
712
713 setreg(ld.r3, ifa, 0, regs);
714
715 DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);
716
717 } else if (ld.m) {
718 unsigned long r2;
719 int nat_r2;
720
721 /*
722 * Load +Reg Opcode: ldXZ r1=[r3],r2
723 *
724 * Note: that we update r3 even in the case of ldfX.a
725 * (where the load does not happen)
726 *
727 * The way the load algorithm works, we know that r3 does not
728 * have its NaT bit set (would have gotten NaT consumption
729 * before getting the unaligned fault). So we can use ifa
730 * which equals r3 at this point.
731 *
732 * IMPORTANT:
733 * The above statement holds ONLY because we know that we
734 * never reach this code when trying to do a ldX.s.
735 * If we ever make it to here on an ldfX.s then
736 */
737 getreg(ld.imm, &r2, &nat_r2, regs);
738
739 ifa += r2;
740
741 /*
742 * propagate Nat r2 -> r3
743 */
744 setreg(ld.r3, ifa, nat_r2, regs);
745
746 DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);
747 }
748}
749
750
751static int
752emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
753{
754 unsigned int len = 1 << ld.x6_sz;
755 unsigned long val = 0;
756
757 /*
758 * r0, as target, doesn't need to be checked because Illegal Instruction
759 * faults have higher priority than unaligned faults.
760 *
761 * r0 cannot be found as the base as it would never generate an
762 * unaligned reference.
763 */
764
765 /*
766 * ldX.a we will emulate load and also invalidate the ALAT entry.
767 * See comment below for explanation on how we handle ldX.a
768 */
769
770 if (len != 2 && len != 4 && len != 8) {
771 DPRINT("unknown size: x6=%d\n", ld.x6_sz);
772 return -1;
773 }
774 /* this assumes little-endian byte-order: */
775 if (copy_from_user(&val, (void __user *) ifa, len))
776 return -1;
777 setreg(ld.r1, val, 0, regs);
778
779 /*
780 * check for updates on any kind of loads
781 */
782 if (ld.op == 0x5 || ld.m)
783 emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
784
785 /*
786 * handling of various loads (based on EAS2.4):
787 *
788 * ldX.acq (ordered load):
789 * - acquire semantics would have been used, so force fence instead.
790 *
791 * ldX.c.clr (check load and clear):
792 * - if we get to this handler, it's because the entry was not in the ALAT.
793 * Therefore the operation reverts to a normal load
794 *
795 * ldX.c.nc (check load no clear):
796 * - same as previous one
797 *
798 * ldX.c.clr.acq (ordered check load and clear):
799 * - same as above for c.clr part. The load needs to have acquire semantics. So
800 * we use the fence semantics which is stronger and thus ensures correctness.
801 *
802 * ldX.a (advanced load):
803 * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
804 * address doesn't match requested size alignment. This means that we would
805 * possibly need more than one load to get the result.
806 *
807 * The load part can be handled just like a normal load, however the difficult
808 * part is to get the right thing into the ALAT. The critical piece of information
809 * in the base address of the load & size. To do that, a ld.a must be executed,
810 * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
811 * if we use the same target register, we will be okay for the check.a instruction.
812 * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry
813 * which would overlap within [r3,r3+X] (the size of the load was store in the
814 * ALAT). If such an entry is found the entry is invalidated. But this is not good
815 * enough, take the following example:
816 * r3=3
817 * ld4.a r1=[r3]
818 *
819 * Could be emulated by doing:
820 * ld1.a r1=[r3],1
821 * store to temporary;
822 * ld1.a r1=[r3],1
823 * store & shift to temporary;
824 * ld1.a r1=[r3],1
825 * store & shift to temporary;
826 * ld1.a r1=[r3]
827 * store & shift to temporary;
828 * r1=temporary
829 *
830 * So in this case, you would get the right value is r1 but the wrong info in
831 * the ALAT. Notice that you could do it in reverse to finish with address 3
832 * but you would still get the size wrong. To get the size right, one needs to
833 * execute exactly the same kind of load. You could do it from a aligned
834 * temporary location, but you would get the address wrong.
835 *
836 * So no matter what, it is not possible to emulate an advanced load
837 * correctly. But is that really critical ?
838 *
839 * We will always convert ld.a into a normal load with ALAT invalidated. This
840 * will enable compiler to do optimization where certain code path after ld.a
841 * is not required to have ld.c/chk.a, e.g., code path with no intervening stores.
842 *
843 * If there is a store after the advanced load, one must either do a ld.c.* or
844 * chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
845 * entry found in ALAT), and that's perfectly ok because:
846 *
847 * - ld.c.*, if the entry is not present a normal load is executed
848 * - chk.a.*, if the entry is not present, execution jumps to recovery code
849 *
850 * In either case, the load can be potentially retried in another form.
851 *
852 * ALAT must be invalidated for the register (so that chk.a or ld.c don't pick
853 * up a stale entry later). The register base update MUST also be performed.
854 */
855
856 /*
857 * when the load has the .acq completer then
858 * use ordering fence.
859 */
860 if (ld.x6_op == 0x5 || ld.x6_op == 0xa)
861 mb();
862
863 /*
864 * invalidate ALAT entry in case of advanced load
865 */
866 if (ld.x6_op == 0x2)
867 invala_gr(ld.r1);
868
869 return 0;
870}
871
872static int
873emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
874{
875 unsigned long r2;
876 unsigned int len = 1 << ld.x6_sz;
877
878 /*
879 * if we get to this handler, Nat bits on both r3 and r2 have already
880 * been checked. so we don't need to do it
881 *
882 * extract the value to be stored
883 */
884 getreg(ld.imm, &r2, NULL, regs);
885
886 /*
887 * we rely on the macros in unaligned.h for now i.e.,
888 * we let the compiler figure out how to read memory gracefully.
889 *
890 * We need this switch/case because the way the inline function
891 * works. The code is optimized by the compiler and looks like
892 * a single switch/case.
893 */
894 DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);
895
896 if (len != 2 && len != 4 && len != 8) {
897 DPRINT("unknown size: x6=%d\n", ld.x6_sz);
898 return -1;
899 }
900
901 /* this assumes little-endian byte-order: */
902 if (copy_to_user((void __user *) ifa, &r2, len))
903 return -1;
904
905 /*
906 * stX [r3]=r2,imm(9)
907 *
908 * NOTE:
909 * ld.r3 can never be r0, because r0 would not generate an
910 * unaligned access.
911 */
912 if (ld.op == 0x5) {
913 unsigned long imm;
914
915 /*
916 * form imm9: [12:6] contain first 7bits
917 */
918 imm = ld.x << 7 | ld.r1;
919 /*
920 * sign extend (8bits) if m set
921 */
922 if (ld.m) imm |= SIGN_EXT9;
923 /*
924 * ifa == r3 (NaT is necessarily cleared)
925 */
926 ifa += imm;
927
928 DPRINT("imm=%lx r3=%lx\n", imm, ifa);
929
930 setreg(ld.r3, ifa, 0, regs);
931 }
932 /*
933 * we don't have alat_invalidate_multiple() so we need
934 * to do the complete flush :-<<
935 */
936 ia64_invala();
937
938 /*
939 * stX.rel: use fence instead of release
940 */
941 if (ld.x6_op == 0xd)
942 mb();
943
944 return 0;
945}
946
947/*
948 * floating point operations sizes in bytes
949 */
950static const unsigned char float_fsz[4]={
951 10, /* extended precision (e) */
952 8, /* integer (8) */
953 4, /* single precision (s) */
954 8 /* double precision (d) */
955};
956
957static inline void
958mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
959{
960 ia64_ldfe(6, init);
961 ia64_stop();
962 ia64_stf_spill(final, 6);
963}
964
965static inline void
966mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
967{
968 ia64_ldf8(6, init);
969 ia64_stop();
970 ia64_stf_spill(final, 6);
971}
972
973static inline void
974mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
975{
976 ia64_ldfs(6, init);
977 ia64_stop();
978 ia64_stf_spill(final, 6);
979}
980
981static inline void
982mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
983{
984 ia64_ldfd(6, init);
985 ia64_stop();
986 ia64_stf_spill(final, 6);
987}
988
989static inline void
990float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
991{
992 ia64_ldf_fill(6, init);
993 ia64_stop();
994 ia64_stfe(final, 6);
995}
996
997static inline void
998float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
999{
1000 ia64_ldf_fill(6, init);
1001 ia64_stop();
1002 ia64_stf8(final, 6);
1003}
1004
1005static inline void
1006float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
1007{
1008 ia64_ldf_fill(6, init);
1009 ia64_stop();
1010 ia64_stfs(final, 6);
1011}
1012
1013static inline void
1014float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
1015{
1016 ia64_ldf_fill(6, init);
1017 ia64_stop();
1018 ia64_stfd(final, 6);
1019}
1020
1021static int
1022emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1023{
1024 struct ia64_fpreg fpr_init[2];
1025 struct ia64_fpreg fpr_final[2];
1026 unsigned long len = float_fsz[ld.x6_sz];
1027
1028 /*
1029 * fr0 & fr1 don't need to be checked because Illegal Instruction faults have
1030 * higher priority than unaligned faults.
1031 *
1032 * r0 cannot be found as the base as it would never generate an unaligned
1033 * reference.
1034 */
1035
1036 /*
1037 * make sure we get clean buffers
1038 */
1039 memset(&fpr_init, 0, sizeof(fpr_init));
1040 memset(&fpr_final, 0, sizeof(fpr_final));
1041
1042 /*
1043 * ldfpX.a: we don't try to emulate anything but we must
1044 * invalidate the ALAT entry and execute updates, if any.
1045 */
1046 if (ld.x6_op != 0x2) {
1047 /*
1048 * This assumes little-endian byte-order. Note that there is no "ldfpe"
1049 * instruction:
1050 */
1051 if (copy_from_user(&fpr_init[0], (void __user *) ifa, len)
1052 || copy_from_user(&fpr_init[1], (void __user *) (ifa + len), len))
1053 return -1;
1054
1055 DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz);
1056 DDUMP("frp_init =", &fpr_init, 2*len);
1057 /*
1058 * XXX fixme
1059 * Could optimize inlines by using ldfpX & 2 spills
1060 */
1061 switch( ld.x6_sz ) {
1062 case 0:
1063 mem2float_extended(&fpr_init[0], &fpr_final[0]);
1064 mem2float_extended(&fpr_init[1], &fpr_final[1]);
1065 break;
1066 case 1:
1067 mem2float_integer(&fpr_init[0], &fpr_final[0]);
1068 mem2float_integer(&fpr_init[1], &fpr_final[1]);
1069 break;
1070 case 2:
1071 mem2float_single(&fpr_init[0], &fpr_final[0]);
1072 mem2float_single(&fpr_init[1], &fpr_final[1]);
1073 break;
1074 case 3:
1075 mem2float_double(&fpr_init[0], &fpr_final[0]);
1076 mem2float_double(&fpr_init[1], &fpr_final[1]);
1077 break;
1078 }
1079 DDUMP("fpr_final =", &fpr_final, 2*len);
1080 /*
1081 * XXX fixme
1082 *
1083 * A possible optimization would be to drop fpr_final and directly
1084 * use the storage from the saved context i.e., the actual final
1085 * destination (pt_regs, switch_stack or thread structure).
1086 */
1087 setfpreg(ld.r1, &fpr_final[0], regs);
1088 setfpreg(ld.imm, &fpr_final[1], regs);
1089 }
1090
1091 /*
1092 * Check for updates: only immediate updates are available for this
1093 * instruction.
1094 */
1095 if (ld.m) {
1096 /*
1097 * the immediate is implicit given the ldsz of the operation:
1098 * single: 8 (2x4) and for all others it's 16 (2x8)
1099 */
1100 ifa += len<<1;
1101
1102 /*
1103 * IMPORTANT:
1104 * the fact that we force the NaT of r3 to zero is ONLY valid
1105 * as long as we don't come here with a ldfpX.s.
1106 * For this reason we keep this sanity check
1107 */
1108 if (ld.x6_op == 1 || ld.x6_op == 3)
1109 printk(KERN_ERR "%s: register update on speculative load pair, error\n",
d4ed8084 1110 __func__);
1da177e4
LT
1111
1112 setreg(ld.r3, ifa, 0, regs);
1113 }
1114
1115 /*
1116 * Invalidate ALAT entries, if any, for both registers.
1117 */
1118 if (ld.x6_op == 0x2) {
1119 invala_fr(ld.r1);
1120 invala_fr(ld.imm);
1121 }
1122 return 0;
1123}
1124
1125
1126static int
1127emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1128{
1129 struct ia64_fpreg fpr_init;
1130 struct ia64_fpreg fpr_final;
1131 unsigned long len = float_fsz[ld.x6_sz];
1132
1133 /*
1134 * fr0 & fr1 don't need to be checked because Illegal Instruction
1135 * faults have higher priority than unaligned faults.
1136 *
1137 * r0 cannot be found as the base as it would never generate an
1138 * unaligned reference.
1139 */
1140
1141 /*
1142 * make sure we get clean buffers
1143 */
1144 memset(&fpr_init,0, sizeof(fpr_init));
1145 memset(&fpr_final,0, sizeof(fpr_final));
1146
1147 /*
1148 * ldfX.a we don't try to emulate anything but we must
1149 * invalidate the ALAT entry.
1150 * See comments in ldX for descriptions on how the various loads are handled.
1151 */
1152 if (ld.x6_op != 0x2) {
1153 if (copy_from_user(&fpr_init, (void __user *) ifa, len))
1154 return -1;
1155
1156 DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1157 DDUMP("fpr_init =", &fpr_init, len);
1158 /*
1159 * we only do something for x6_op={0,8,9}
1160 */
1161 switch( ld.x6_sz ) {
1162 case 0:
1163 mem2float_extended(&fpr_init, &fpr_final);
1164 break;
1165 case 1:
1166 mem2float_integer(&fpr_init, &fpr_final);
1167 break;
1168 case 2:
1169 mem2float_single(&fpr_init, &fpr_final);
1170 break;
1171 case 3:
1172 mem2float_double(&fpr_init, &fpr_final);
1173 break;
1174 }
1175 DDUMP("fpr_final =", &fpr_final, len);
1176 /*
1177 * XXX fixme
1178 *
1179 * A possible optimization would be to drop fpr_final and directly
1180 * use the storage from the saved context i.e., the actual final
1181 * destination (pt_regs, switch_stack or thread structure).
1182 */
1183 setfpreg(ld.r1, &fpr_final, regs);
1184 }
1185
1186 /*
1187 * check for updates on any loads
1188 */
1189 if (ld.op == 0x7 || ld.m)
1190 emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
1191
1192 /*
1193 * invalidate ALAT entry in case of advanced floating point loads
1194 */
1195 if (ld.x6_op == 0x2)
1196 invala_fr(ld.r1);
1197
1198 return 0;
1199}
1200
1201
1202static int
1203emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1204{
1205 struct ia64_fpreg fpr_init;
1206 struct ia64_fpreg fpr_final;
1207 unsigned long len = float_fsz[ld.x6_sz];
1208
1209 /*
1210 * make sure we get clean buffers
1211 */
1212 memset(&fpr_init,0, sizeof(fpr_init));
1213 memset(&fpr_final,0, sizeof(fpr_final));
1214
1215 /*
1216 * if we get to this handler, Nat bits on both r3 and r2 have already
1217 * been checked. so we don't need to do it
1218 *
1219 * extract the value to be stored
1220 */
1221 getfpreg(ld.imm, &fpr_init, regs);
1222 /*
1223 * during this step, we extract the spilled registers from the saved
1224 * context i.e., we refill. Then we store (no spill) to temporary
1225 * aligned location
1226 */
1227 switch( ld.x6_sz ) {
1228 case 0:
1229 float2mem_extended(&fpr_init, &fpr_final);
1230 break;
1231 case 1:
1232 float2mem_integer(&fpr_init, &fpr_final);
1233 break;
1234 case 2:
1235 float2mem_single(&fpr_init, &fpr_final);
1236 break;
1237 case 3:
1238 float2mem_double(&fpr_init, &fpr_final);
1239 break;
1240 }
1241 DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1242 DDUMP("fpr_init =", &fpr_init, len);
1243 DDUMP("fpr_final =", &fpr_final, len);
1244
1245 if (copy_to_user((void __user *) ifa, &fpr_final, len))
1246 return -1;
1247
1248 /*
1249 * stfX [r3]=r2,imm(9)
1250 *
1251 * NOTE:
1252 * ld.r3 can never be r0, because r0 would not generate an
1253 * unaligned access.
1254 */
1255 if (ld.op == 0x7) {
1256 unsigned long imm;
1257
1258 /*
1259 * form imm9: [12:6] contain first 7bits
1260 */
1261 imm = ld.x << 7 | ld.r1;
1262 /*
1263 * sign extend (8bits) if m set
1264 */
1265 if (ld.m)
1266 imm |= SIGN_EXT9;
1267 /*
1268 * ifa == r3 (NaT is necessarily cleared)
1269 */
1270 ifa += imm;
1271
1272 DPRINT("imm=%lx r3=%lx\n", imm, ifa);
1273
1274 setreg(ld.r3, ifa, 0, regs);
1275 }
1276 /*
1277 * we don't have alat_invalidate_multiple() so we need
1278 * to do the complete flush :-<<
1279 */
1280 ia64_invala();
1281
1282 return 0;
1283}
1284
1285/*
1286 * Make sure we log the unaligned access, so that user/sysadmin can notice it and
1287 * eventually fix the program. However, we don't want to do that for every access so we
7683a3f9 1288 * pace it with jiffies.
1da177e4 1289 */
7683a3f9 1290static DEFINE_RATELIMIT_STATE(logging_rate_limit, 5 * HZ, 5);
1da177e4
LT
1291
1292void
1293ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
1294{
1295 struct ia64_psr *ipsr = ia64_psr(regs);
1296 mm_segment_t old_fs = get_fs();
1297 unsigned long bundle[2];
1298 unsigned long opcode;
1299 struct siginfo si;
1300 const struct exception_table_entry *eh = NULL;
1301 union {
1302 unsigned long l;
1303 load_store_t insn;
1304 } u;
1305 int ret = -1;
1306
1307 if (ia64_psr(regs)->be) {
1308 /* we don't support big-endian accesses */
620de2f5
JB
1309 if (die_if_kernel("big-endian unaligned accesses are not supported", regs, 0))
1310 return;
1da177e4
LT
1311 goto force_sigbus;
1312 }
1313
1314 /*
1315 * Treat kernel accesses for which there is an exception handler entry the same as
1316 * user-level unaligned accesses. Otherwise, a clever program could trick this
1317 * handler into reading an arbitrary kernel addresses...
1318 */
1319 if (!user_mode(regs))
1320 eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri);
1321 if (user_mode(regs) || eh) {
1322 if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
1323 goto force_sigbus;
1324
d2b176ed
JS
1325 if (!no_unaligned_warning &&
1326 !(current->thread.flags & IA64_THREAD_UAC_NOPRINT) &&
7683a3f9 1327 __ratelimit(&logging_rate_limit))
1da177e4
LT
1328 {
1329 char buf[200]; /* comm[] is at most 16 bytes... */
1330 size_t len;
1331
1332 len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
19c5870c
AD
1333 "ip=0x%016lx\n\r", current->comm,
1334 task_pid_nr(current),
1da177e4
LT
1335 ifa, regs->cr_iip + ipsr->ri);
1336 /*
1337 * Don't call tty_write_message() if we're in the kernel; we might
1338 * be holding locks...
1339 */
02f14c79
PH
1340 if (user_mode(regs)) {
1341 struct tty_struct *tty = get_current_tty();
1342 tty_write_message(tty, buf);
1343 tty_kref_put(tty);
1344 }
1da177e4 1345 buf[len-1] = '\0'; /* drop '\r' */
d2b176ed
JS
1346 /* watch for command names containing %s */
1347 printk(KERN_WARNING "%s", buf);
1348 } else {
54f8dd3c
MS
1349 if (no_unaligned_warning) {
1350 printk_once(KERN_WARNING "%s(%d) encountered an "
d2b176ed
JS
1351 "unaligned exception which required\n"
1352 "kernel assistance, which degrades "
1353 "the performance of the application.\n"
1354 "Unaligned exception warnings have "
1355 "been disabled by the system "
1356 "administrator\n"
1357 "echo 0 > /proc/sys/kernel/ignore-"
1358 "unaligned-usertrap to re-enable\n",
19c5870c 1359 current->comm, task_pid_nr(current));
d2b176ed 1360 }
1da177e4
LT
1361 }
1362 } else {
7683a3f9 1363 if (__ratelimit(&logging_rate_limit)) {
1da177e4
LT
1364 printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
1365 ifa, regs->cr_iip + ipsr->ri);
88fc241f
DC
1366 if (unaligned_dump_stack)
1367 dump_stack();
1368 }
1da177e4
LT
1369 set_fs(KERNEL_DS);
1370 }
1371
1372 DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n",
1373 regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it);
1374
1375 if (__copy_from_user(bundle, (void __user *) regs->cr_iip, 16))
1376 goto failure;
1377
1378 /*
1379 * extract the instruction from the bundle given the slot number
1380 */
1381 switch (ipsr->ri) {
787ca32d 1382 default:
1da177e4
LT
1383 case 0: u.l = (bundle[0] >> 5); break;
1384 case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break;
1385 case 2: u.l = (bundle[1] >> 23); break;
1386 }
1387 opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK;
1388
1389 DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
1390 "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm,
1391 u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op);
1392
1393 /*
1394 * IMPORTANT:
1395 * Notice that the switch statement DOES not cover all possible instructions
1396 * that DO generate unaligned references. This is made on purpose because for some
1397 * instructions it DOES NOT make sense to try and emulate the access. Sometimes it
1398 * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
1399 * the program will get a signal and die:
1400 *
1401 * load/store:
1402 * - ldX.spill
1403 * - stX.spill
1404 * Reason: RNATs are based on addresses
1405 * - ld16
1406 * - st16
1407 * Reason: ld16 and st16 are supposed to occur in a single
1408 * memory op
1409 *
1410 * synchronization:
1411 * - cmpxchg
1412 * - fetchadd
1413 * - xchg
1414 * Reason: ATOMIC operations cannot be emulated properly using multiple
1415 * instructions.
1416 *
1417 * speculative loads:
1418 * - ldX.sZ
1419 * Reason: side effects, code must be ready to deal with failure so simpler
1420 * to let the load fail.
1421 * ---------------------------------------------------------------------------------
1422 * XXX fixme
1423 *
1424 * I would like to get rid of this switch case and do something
1425 * more elegant.
1426 */
1427 switch (opcode) {
1428 case LDS_OP:
1429 case LDSA_OP:
1430 if (u.insn.x)
1431 /* oops, really a semaphore op (cmpxchg, etc) */
1432 goto failure;
1433 /* no break */
1434 case LDS_IMM_OP:
1435 case LDSA_IMM_OP:
1436 case LDFS_OP:
1437 case LDFSA_OP:
1438 case LDFS_IMM_OP:
1439 /*
1440 * The instruction will be retried with deferred exceptions turned on, and
1441 * we should get Nat bit installed
1442 *
1443 * IMPORTANT: When PSR_ED is set, the register & immediate update forms
1444 * are actually executed even though the operation failed. So we don't
1445 * need to take care of this.
1446 */
1447 DPRINT("forcing PSR_ED\n");
1448 regs->cr_ipsr |= IA64_PSR_ED;
1449 goto done;
1450
1451 case LD_OP:
1452 case LDA_OP:
1453 case LDBIAS_OP:
1454 case LDACQ_OP:
1455 case LDCCLR_OP:
1456 case LDCNC_OP:
1457 case LDCCLRACQ_OP:
1458 if (u.insn.x)
1459 /* oops, really a semaphore op (cmpxchg, etc) */
1460 goto failure;
1461 /* no break */
1462 case LD_IMM_OP:
1463 case LDA_IMM_OP:
1464 case LDBIAS_IMM_OP:
1465 case LDACQ_IMM_OP:
1466 case LDCCLR_IMM_OP:
1467 case LDCNC_IMM_OP:
1468 case LDCCLRACQ_IMM_OP:
1469 ret = emulate_load_int(ifa, u.insn, regs);
1470 break;
1471
1472 case ST_OP:
1473 case STREL_OP:
1474 if (u.insn.x)
1475 /* oops, really a semaphore op (cmpxchg, etc) */
1476 goto failure;
1477 /* no break */
1478 case ST_IMM_OP:
1479 case STREL_IMM_OP:
1480 ret = emulate_store_int(ifa, u.insn, regs);
1481 break;
1482
1483 case LDF_OP:
1484 case LDFA_OP:
1485 case LDFCCLR_OP:
1486 case LDFCNC_OP:
1da177e4
LT
1487 if (u.insn.x)
1488 ret = emulate_load_floatpair(ifa, u.insn, regs);
1489 else
1490 ret = emulate_load_float(ifa, u.insn, regs);
1491 break;
1492
1a499150
TL
1493 case LDF_IMM_OP:
1494 case LDFA_IMM_OP:
1495 case LDFCCLR_IMM_OP:
1496 case LDFCNC_IMM_OP:
1497 ret = emulate_load_float(ifa, u.insn, regs);
1498 break;
1499
1da177e4
LT
1500 case STF_OP:
1501 case STF_IMM_OP:
1502 ret = emulate_store_float(ifa, u.insn, regs);
1503 break;
1504
1505 default:
1506 goto failure;
1507 }
1508 DPRINT("ret=%d\n", ret);
1509 if (ret)
1510 goto failure;
1511
1512 if (ipsr->ri == 2)
1513 /*
1514 * given today's architecture this case is not likely to happen because a
1515 * memory access instruction (M) can never be in the last slot of a
1516 * bundle. But let's keep it for now.
1517 */
1518 regs->cr_iip += 16;
1519 ipsr->ri = (ipsr->ri + 1) & 0x3;
1520
1521 DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip);
1522 done:
1523 set_fs(old_fs); /* restore original address limit */
1524 return;
1525
1526 failure:
1527 /* something went wrong... */
1528 if (!user_mode(regs)) {
1529 if (eh) {
1530 ia64_handle_exception(regs, eh);
1531 goto done;
1532 }
620de2f5
JB
1533 if (die_if_kernel("error during unaligned kernel access\n", regs, ret))
1534 return;
1da177e4
LT
1535 /* NOT_REACHED */
1536 }
1537 force_sigbus:
1538 si.si_signo = SIGBUS;
1539 si.si_errno = 0;
1540 si.si_code = BUS_ADRALN;
1541 si.si_addr = (void __user *) ifa;
1542 si.si_flags = 0;
1543 si.si_isr = 0;
1544 si.si_imm = 0;
1545 force_sig_info(SIGBUS, &si, current);
1546 goto done;
1547}