1 /*
2 * Architecture-specific unaligned trap handling.
3 *
4 * Copyright (C) 1999-2002 Hewlett-Packard Co
5 * Stephane Eranian <eranian@hpl.hp.com>
6 * David Mosberger-Tang <davidm@hpl.hp.com>
7 *
8 * 2002/12/09 Fix rotating register handling (off-by-1 error, missing fr-rotation). Fix
9 * get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame
10 * stacked register returns an undefined value; it does NOT trigger a
11 * "rsvd register fault").
12 * 2001/10/11 Fix unaligned access to rotating registers in s/w pipelined loops.
13 * 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes.
14 * 2001/01/17 Add support emulation of unaligned kernel accesses.
15 */
16 #include <linux/kernel.h>
17 #include <linux/sched.h>
18 #include <linux/smp_lock.h>
19
20 #include <asm/uaccess.h>
21 #include <asm/rse.h>
22 #include <asm/processor.h>
23 #include <asm/unaligned.h>
24
25 extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn));
26
27 #undef DEBUG_UNALIGNED_TRAP
28
29 #ifdef DEBUG_UNALIGNED_TRAP
30 # define DPRINT(a...) do { printk("%s %u: ", __FUNCTION__, __LINE__); printk (a); } while (0)
31 # define DDUMP(str,vp,len) dump(str, vp, len)
32
33 static void
dump(const char * str,void * vp,size_t len)34 dump (const char *str, void *vp, size_t len)
35 {
36 unsigned char *cp = vp;
37 int i;
38
39 printk("%s", str);
40 for (i = 0; i < len; ++i)
41 printk (" %02x", *cp++);
42 printk("\n");
43 }
44 #else
45 # define DPRINT(a...)
46 # define DDUMP(str,vp,len)
47 #endif
48
49 #define IA64_FIRST_STACKED_GR 32
50 #define IA64_FIRST_ROTATING_FR 32
51 #define SIGN_EXT9 0xffffffffffffff00ul
52
53 /*
54 * For M-unit:
55 *
56 * opcode | m | x6 |
57 * --------|------|---------|
58 * [40-37] | [36] | [35:30] |
59 * --------|------|---------|
60 * 4 | 1 | 6 | = 11 bits
61 * --------------------------
62 * However bits [31:30] are not directly useful to distinguish between
63 * load/store so we can use [35:32] instead, which gives the following
64 * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
65 * checking the m-bit until later in the load/store emulation.
66 */
67 #define IA64_OPCODE_MASK 0x1ef
68 #define IA64_OPCODE_SHIFT 32
69
70 /*
71 * Table C-28 Integer Load/Store
72 *
73 * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
74 *
75 * ld8.fill, st8.fill MUST be aligned because the RNATs are based on
76 * the address (bits [8:3]), so we must failed.
77 */
78 #define LD_OP 0x080
79 #define LDS_OP 0x081
80 #define LDA_OP 0x082
81 #define LDSA_OP 0x083
82 #define LDBIAS_OP 0x084
83 #define LDACQ_OP 0x085
84 /* 0x086, 0x087 are not relevant */
85 #define LDCCLR_OP 0x088
86 #define LDCNC_OP 0x089
87 #define LDCCLRACQ_OP 0x08a
88 #define ST_OP 0x08c
89 #define STREL_OP 0x08d
90 /* 0x08e,0x8f are not relevant */
91
92 /*
93 * Table C-29 Integer Load +Reg
94 *
95 * we use the ld->m (bit [36:36]) field to determine whether or not we have
96 * a load/store of this form.
97 */
98
99 /*
100 * Table C-30 Integer Load/Store +Imm
101 *
102 * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
103 *
104 * ld8.fill, st8.fill must be aligned because the Nat register are based on
105 * the address, so we must fail and the program must be fixed.
106 */
107 #define LD_IMM_OP 0x0a0
108 #define LDS_IMM_OP 0x0a1
109 #define LDA_IMM_OP 0x0a2
110 #define LDSA_IMM_OP 0x0a3
111 #define LDBIAS_IMM_OP 0x0a4
112 #define LDACQ_IMM_OP 0x0a5
113 /* 0x0a6, 0xa7 are not relevant */
114 #define LDCCLR_IMM_OP 0x0a8
115 #define LDCNC_IMM_OP 0x0a9
116 #define LDCCLRACQ_IMM_OP 0x0aa
117 #define ST_IMM_OP 0x0ac
118 #define STREL_IMM_OP 0x0ad
119 /* 0x0ae,0xaf are not relevant */
120
121 /*
122 * Table C-32 Floating-point Load/Store
123 */
124 #define LDF_OP 0x0c0
125 #define LDFS_OP 0x0c1
126 #define LDFA_OP 0x0c2
127 #define LDFSA_OP 0x0c3
128 /* 0x0c6 is irrelevant */
129 #define LDFCCLR_OP 0x0c8
130 #define LDFCNC_OP 0x0c9
131 /* 0x0cb is irrelevant */
132 #define STF_OP 0x0cc
133
134 /*
135 * Table C-33 Floating-point Load +Reg
136 *
137 * we use the ld->m (bit [36:36]) field to determine whether or not we have
138 * a load/store of this form.
139 */
140
141 /*
142 * Table C-34 Floating-point Load/Store +Imm
143 */
144 #define LDF_IMM_OP 0x0e0
145 #define LDFS_IMM_OP 0x0e1
146 #define LDFA_IMM_OP 0x0e2
147 #define LDFSA_IMM_OP 0x0e3
148 /* 0x0e6 is irrelevant */
149 #define LDFCCLR_IMM_OP 0x0e8
150 #define LDFCNC_IMM_OP 0x0e9
151 #define STF_IMM_OP 0x0ec
152
153 typedef struct {
154 unsigned long qp:6; /* [0:5] */
155 unsigned long r1:7; /* [6:12] */
156 unsigned long imm:7; /* [13:19] */
157 unsigned long r3:7; /* [20:26] */
158 unsigned long x:1; /* [27:27] */
159 unsigned long hint:2; /* [28:29] */
160 unsigned long x6_sz:2; /* [30:31] */
161 unsigned long x6_op:4; /* [32:35], x6 = x6_sz|x6_op */
162 unsigned long m:1; /* [36:36] */
163 unsigned long op:4; /* [37:40] */
164 unsigned long pad:23; /* [41:63] */
165 } load_store_t;
166
167
168 typedef enum {
169 UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */
170 UPD_REG /* ldXZ r1=[r3],r2 */
171 } update_t;
172
173 /*
174 * We use tables to keep track of the offsets of registers in the saved state.
175 * This way we save having big switch/case statements.
176 *
177 * We use bit 0 to indicate switch_stack or pt_regs.
178 * The offset is simply shifted by 1 bit.
179 * A 2-byte value should be enough to hold any kind of offset
180 *
181 * In case the calling convention changes (and thus pt_regs/switch_stack)
182 * simply use RSW instead of RPT or vice-versa.
183 */
184
185 #define RPO(x) ((size_t) &((struct pt_regs *)0)->x)
186 #define RSO(x) ((size_t) &((struct switch_stack *)0)->x)
187
188 #define RPT(x) (RPO(x) << 1)
189 #define RSW(x) (1| RSO(x)<<1)
190
191 #define GR_OFFS(x) (gr_info[x]>>1)
192 #define GR_IN_SW(x) (gr_info[x] & 0x1)
193
194 #define FR_OFFS(x) (fr_info[x]>>1)
195 #define FR_IN_SW(x) (fr_info[x] & 0x1)
196
197 static u16 gr_info[32]={
198 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */
199
200 RPT(r1), RPT(r2), RPT(r3),
201
202 RSW(r4), RSW(r5), RSW(r6), RSW(r7),
203
204 RPT(r8), RPT(r9), RPT(r10), RPT(r11),
205 RPT(r12), RPT(r13), RPT(r14), RPT(r15),
206
207 RPT(r16), RPT(r17), RPT(r18), RPT(r19),
208 RPT(r20), RPT(r21), RPT(r22), RPT(r23),
209 RPT(r24), RPT(r25), RPT(r26), RPT(r27),
210 RPT(r28), RPT(r29), RPT(r30), RPT(r31)
211 };
212
213 static u16 fr_info[32]={
214 0, /* constant : WE SHOULD NEVER GET THIS */
215 0, /* constant : WE SHOULD NEVER GET THIS */
216
217 RSW(f2), RSW(f3), RSW(f4), RSW(f5),
218
219 RPT(f6), RPT(f7), RPT(f8), RPT(f9),
220 RPT(f10), RPT(f11),
221
222 RSW(f12), RSW(f13), RSW(f14),
223 RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
224 RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
225 RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
226 RSW(f30), RSW(f31)
227 };
228
229 /* Invalidate ALAT entry for integer register REGNO. */
230 static void
invala_gr(int regno)231 invala_gr (int regno)
232 {
233 # define F(reg) case reg: __asm__ __volatile__ ("invala.e r%0" :: "i"(reg)); break
234
235 switch (regno) {
236 F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
237 F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
238 F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
239 F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
240 F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
241 F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
242 F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
243 F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
244 F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
245 F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
246 F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
247 F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
248 F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
249 F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
250 F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
251 F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
252 }
253 # undef F
254 }
255
256 /* Invalidate ALAT entry for floating-point register REGNO. */
257 static void
invala_fr(int regno)258 invala_fr (int regno)
259 {
260 # define F(reg) case reg: __asm__ __volatile__ ("invala.e f%0" :: "i"(reg)); break
261
262 switch (regno) {
263 F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
264 F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
265 F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
266 F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
267 F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
268 F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
269 F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
270 F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
271 F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
272 F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
273 F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
274 F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
275 F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
276 F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
277 F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
278 F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
279 }
280 # undef F
281 }
282
283 static inline unsigned long
rotate_reg(unsigned long sor,unsigned long rrb,unsigned long reg)284 rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg)
285 {
286 reg += rrb;
287 if (reg >= sor)
288 reg -= sor;
289 return reg;
290 }
291
292 static void
set_rse_reg(struct pt_regs * regs,unsigned long r1,unsigned long val,int nat)293 set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
294 {
295 struct switch_stack *sw = (struct switch_stack *) regs - 1;
296 unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
297 unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
298 unsigned long rnats, nat_mask;
299 unsigned long on_kbs;
300 long sof = (regs->cr_ifs) & 0x7f;
301 long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
302 long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
303 long ridx = r1 - 32;
304
305 if (ridx >= sof) {
306 /* this should never happen, as the "rsvd register fault" has higher priority */
307 DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
308 return;
309 }
310
311 if (ridx < sor)
312 ridx = rotate_reg(sor, rrb_gr, ridx);
313
314 DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
315 r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
316
317 on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
318 addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
319 if (addr >= kbs) {
320 /* the register is on the kernel backing store: easy... */
321 rnat_addr = ia64_rse_rnat_addr(addr);
322 if ((unsigned long) rnat_addr >= sw->ar_bspstore)
323 rnat_addr = &sw->ar_rnat;
324 nat_mask = 1UL << ia64_rse_slot_num(addr);
325
326 *addr = val;
327 if (nat)
328 *rnat_addr |= nat_mask;
329 else
330 *rnat_addr &= ~nat_mask;
331 return;
332 }
333
334 /*
335 * Avoid using user_mode() here: with "epc", we cannot use the privilege level to
336 * infer whether the interrupt task was running on the kernel backing store.
337 */
338 if (regs->r12 >= TASK_SIZE) {
339 DPRINT("ignoring kernel write to r%lu; register isn't on the RBS!", r1);
340 return;
341 }
342
343 bspstore = (unsigned long *)regs->ar_bspstore;
344 ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
345 bsp = ia64_rse_skip_regs(ubs_end, -sof);
346 addr = ia64_rse_skip_regs(bsp, ridx);
347
348 DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
349
350 ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
351
352 rnat_addr = ia64_rse_rnat_addr(addr);
353
354 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
355 DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n",
356 (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1);
357
358 nat_mask = 1UL << ia64_rse_slot_num(addr);
359 if (nat)
360 rnats |= nat_mask;
361 else
362 rnats &= ~nat_mask;
363 ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats);
364
365 DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats);
366 }
367
368
369 static void
get_rse_reg(struct pt_regs * regs,unsigned long r1,unsigned long * val,int * nat)370 get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat)
371 {
372 struct switch_stack *sw = (struct switch_stack *) regs - 1;
373 unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
374 unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
375 unsigned long rnats, nat_mask;
376 unsigned long on_kbs;
377 long sof = (regs->cr_ifs) & 0x7f;
378 long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
379 long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
380 long ridx = r1 - 32;
381
382 if (ridx >= sof) {
383 /* read of out-of-frame register returns an undefined value; 0 in our case. */
384 DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
385 goto fail;
386 }
387
388 if (ridx < sor)
389 ridx = rotate_reg(sor, rrb_gr, ridx);
390
391 DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
392 r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
393
394 on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
395 addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
396 if (addr >= kbs) {
397 /* the register is on the kernel backing store: easy... */
398 *val = *addr;
399 if (nat) {
400 rnat_addr = ia64_rse_rnat_addr(addr);
401 if ((unsigned long) rnat_addr >= sw->ar_bspstore)
402 rnat_addr = &sw->ar_rnat;
403 nat_mask = 1UL << ia64_rse_slot_num(addr);
404 *nat = (*rnat_addr & nat_mask) != 0;
405 }
406 return;
407 }
408
409 /*
410 * Avoid using user_mode() here: with "epc", we cannot use the privilege level to
411 * infer whether the interrupt task was running on the kernel backing store.
412 */
413 if (regs->r12 >= TASK_SIZE) {
414 DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
415 goto fail;
416 }
417
418 bspstore = (unsigned long *)regs->ar_bspstore;
419 ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
420 bsp = ia64_rse_skip_regs(ubs_end, -sof);
421 addr = ia64_rse_skip_regs(bsp, ridx);
422
423 DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
424
425 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
426
427 if (nat) {
428 rnat_addr = ia64_rse_rnat_addr(addr);
429 nat_mask = 1UL << ia64_rse_slot_num(addr);
430
431 DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats);
432
433 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
434 *nat = (rnats & nat_mask) != 0;
435 }
436 return;
437
438 fail:
439 *val = 0;
440 if (nat)
441 *nat = 0;
442 return;
443 }
444
445
446 static void
setreg(unsigned long regnum,unsigned long val,int nat,struct pt_regs * regs)447 setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
448 {
449 struct switch_stack *sw = (struct switch_stack *) regs - 1;
450 unsigned long addr;
451 unsigned long bitmask;
452 unsigned long *unat;
453
454 /*
455 * First takes care of stacked registers
456 */
457 if (regnum >= IA64_FIRST_STACKED_GR) {
458 set_rse_reg(regs, regnum, val, nat);
459 return;
460 }
461
462 /*
463 * Using r0 as a target raises a General Exception fault which has higher priority
464 * than the Unaligned Reference fault.
465 */
466
467 /*
468 * Now look at registers in [0-31] range and init correct UNAT
469 */
470 if (GR_IN_SW(regnum)) {
471 addr = (unsigned long)sw;
472 unat = &sw->ar_unat;
473 } else {
474 addr = (unsigned long)regs;
475 unat = &sw->caller_unat;
476 }
477 DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
478 addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
479 /*
480 * add offset from base of struct
481 * and do it !
482 */
483 addr += GR_OFFS(regnum);
484
485 *(unsigned long *)addr = val;
486
487 /*
488 * We need to clear the corresponding UNAT bit to fully emulate the load
489 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
490 */
491 bitmask = 1UL << (addr >> 3 & 0x3f);
492 DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat);
493 if (nat) {
494 *unat |= bitmask;
495 } else {
496 *unat &= ~bitmask;
497 }
498 DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
499 }
500
501 /*
502 * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
503 * range from 32-127, result is in the range from 0-95.
504 */
505 static inline unsigned long
fph_index(struct pt_regs * regs,long regnum)506 fph_index (struct pt_regs *regs, long regnum)
507 {
508 unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
509 return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
510 }
511
512 static void
setfpreg(unsigned long regnum,struct ia64_fpreg * fpval,struct pt_regs * regs)513 setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
514 {
515 struct switch_stack *sw = (struct switch_stack *)regs - 1;
516 unsigned long addr;
517
518 /*
519 * From EAS-2.5: FPDisableFault has higher priority than Unaligned
520 * Fault. Thus, when we get here, we know the partition is enabled.
521 * To update f32-f127, there are three choices:
522 *
523 * (1) save f32-f127 to thread.fph and update the values there
524 * (2) use a gigantic switch statement to directly access the registers
525 * (3) generate code on the fly to update the desired register
526 *
527 * For now, we are using approach (1).
528 */
529 if (regnum >= IA64_FIRST_ROTATING_FR) {
530 ia64_sync_fph(current);
531 current->thread.fph[fph_index(regs, regnum)] = *fpval;
532 } else {
533 /*
534 * pt_regs or switch_stack ?
535 */
536 if (FR_IN_SW(regnum)) {
537 addr = (unsigned long)sw;
538 } else {
539 addr = (unsigned long)regs;
540 }
541
542 DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));
543
544 addr += FR_OFFS(regnum);
545 *(struct ia64_fpreg *)addr = *fpval;
546
547 /*
548 * mark the low partition as being used now
549 *
550 * It is highly unlikely that this bit is not already set, but
551 * let's do it for safety.
552 */
553 regs->cr_ipsr |= IA64_PSR_MFL;
554 }
555 }
556
557 /*
558 * Those 2 inline functions generate the spilled versions of the constant floating point
559 * registers which can be used with stfX
560 */
561 static inline void
float_spill_f0(struct ia64_fpreg * final)562 float_spill_f0 (struct ia64_fpreg *final)
563 {
564 __asm__ __volatile__ ("stf.spill [%0]=f0" :: "r"(final) : "memory");
565 }
566
567 static inline void
float_spill_f1(struct ia64_fpreg * final)568 float_spill_f1 (struct ia64_fpreg *final)
569 {
570 __asm__ __volatile__ ("stf.spill [%0]=f1" :: "r"(final) : "memory");
571 }
572
573 static void
getfpreg(unsigned long regnum,struct ia64_fpreg * fpval,struct pt_regs * regs)574 getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
575 {
576 struct switch_stack *sw = (struct switch_stack *) regs - 1;
577 unsigned long addr;
578
579 /*
580 * From EAS-2.5: FPDisableFault has higher priority than
581 * Unaligned Fault. Thus, when we get here, we know the partition is
582 * enabled.
583 *
584 * When regnum > 31, the register is still live and we need to force a save
585 * to current->thread.fph to get access to it. See discussion in setfpreg()
586 * for reasons and other ways of doing this.
587 */
588 if (regnum >= IA64_FIRST_ROTATING_FR) {
589 ia64_flush_fph(current);
590 *fpval = current->thread.fph[fph_index(regs, regnum)];
591 } else {
592 /*
593 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
594 * not saved, we must generate their spilled form on the fly
595 */
596 switch(regnum) {
597 case 0:
598 float_spill_f0(fpval);
599 break;
600 case 1:
601 float_spill_f1(fpval);
602 break;
603 default:
604 /*
605 * pt_regs or switch_stack ?
606 */
607 addr = FR_IN_SW(regnum) ? (unsigned long)sw
608 : (unsigned long)regs;
609
610 DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",
611 FR_IN_SW(regnum), addr, FR_OFFS(regnum));
612
613 addr += FR_OFFS(regnum);
614 *fpval = *(struct ia64_fpreg *)addr;
615 }
616 }
617 }
618
619
620 static void
getreg(unsigned long regnum,unsigned long * val,int * nat,struct pt_regs * regs)621 getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
622 {
623 struct switch_stack *sw = (struct switch_stack *) regs - 1;
624 unsigned long addr, *unat;
625
626 if (regnum >= IA64_FIRST_STACKED_GR) {
627 get_rse_reg(regs, regnum, val, nat);
628 return;
629 }
630
631 /*
632 * take care of r0 (read-only always evaluate to 0)
633 */
634 if (regnum == 0) {
635 *val = 0;
636 if (nat)
637 *nat = 0;
638 return;
639 }
640
641 /*
642 * Now look at registers in [0-31] range and init correct UNAT
643 */
644 if (GR_IN_SW(regnum)) {
645 addr = (unsigned long)sw;
646 unat = &sw->ar_unat;
647 } else {
648 addr = (unsigned long)regs;
649 unat = &sw->caller_unat;
650 }
651
652 DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum));
653
654 addr += GR_OFFS(regnum);
655
656 *val = *(unsigned long *)addr;
657
658 /*
659 * do it only when requested
660 */
661 if (nat)
662 *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;
663 }
664
665 static void
emulate_load_updates(update_t type,load_store_t ld,struct pt_regs * regs,unsigned long ifa)666 emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa)
667 {
668 /*
669 * IMPORTANT:
670 * Given the way we handle unaligned speculative loads, we should
671 * not get to this point in the code but we keep this sanity check,
672 * just in case.
673 */
674 if (ld.x6_op == 1 || ld.x6_op == 3) {
675 printk(KERN_ERR "%s: register update on speculative load, error\n", __FUNCTION__);
676 die_if_kernel("unaligned reference on speculative load with register update\n",
677 regs, 30);
678 }
679
680
681 /*
682 * at this point, we know that the base register to update is valid i.e.,
683 * it's not r0
684 */
685 if (type == UPD_IMMEDIATE) {
686 unsigned long imm;
687
688 /*
689 * Load +Imm: ldXZ r1=[r3],imm(9)
690 *
691 *
692 * form imm9: [13:19] contain the first 7 bits
693 */
694 imm = ld.x << 7 | ld.imm;
695
696 /*
697 * sign extend (1+8bits) if m set
698 */
699 if (ld.m) imm |= SIGN_EXT9;
700
701 /*
702 * ifa == r3 and we know that the NaT bit on r3 was clear so
703 * we can directly use ifa.
704 */
705 ifa += imm;
706
707 setreg(ld.r3, ifa, 0, regs);
708
709 DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);
710
711 } else if (ld.m) {
712 unsigned long r2;
713 int nat_r2;
714
715 /*
716 * Load +Reg Opcode: ldXZ r1=[r3],r2
717 *
718 * Note: that we update r3 even in the case of ldfX.a
719 * (where the load does not happen)
720 *
721 * The way the load algorithm works, we know that r3 does not
722 * have its NaT bit set (would have gotten NaT consumption
723 * before getting the unaligned fault). So we can use ifa
724 * which equals r3 at this point.
725 *
726 * IMPORTANT:
727 * The above statement holds ONLY because we know that we
728 * never reach this code when trying to do a ldX.s.
729 * If we ever make it to here on an ldfX.s then
730 */
731 getreg(ld.imm, &r2, &nat_r2, regs);
732
733 ifa += r2;
734
735 /*
736 * propagate Nat r2 -> r3
737 */
738 setreg(ld.r3, ifa, nat_r2, regs);
739
740 DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);
741 }
742 }
743
744
745 static int
emulate_load_int(unsigned long ifa,load_store_t ld,struct pt_regs * regs)746 emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
747 {
748 unsigned int len = 1 << ld.x6_sz;
749
750 /*
751 * r0, as target, doesn't need to be checked because Illegal Instruction
752 * faults have higher priority than unaligned faults.
753 *
754 * r0 cannot be found as the base as it would never generate an
755 * unaligned reference.
756 */
757
758 /*
759 * ldX.a we don't try to emulate anything but we must invalidate the ALAT entry.
760 * See comment below for explanation on how we handle ldX.a
761 */
762 if (ld.x6_op != 0x2) {
763 unsigned long val = 0;
764
765 if (len != 2 && len != 4 && len != 8) {
766 DPRINT("unknown size: x6=%d\n", ld.x6_sz);
767 return -1;
768 }
769 /* this assumes little-endian byte-order: */
770 if (copy_from_user(&val, (void *) ifa, len))
771 return -1;
772 setreg(ld.r1, val, 0, regs);
773 }
774
775 /*
776 * check for updates on any kind of loads
777 */
778 if (ld.op == 0x5 || ld.m)
779 emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
780
781 /*
782 * handling of various loads (based on EAS2.4):
783 *
784 * ldX.acq (ordered load):
785 * - acquire semantics would have been used, so force fence instead.
786 *
787 * ldX.c.clr (check load and clear):
788 * - if we get to this handler, it's because the entry was not in the ALAT.
789 * Therefore the operation reverts to a normal load
790 *
791 * ldX.c.nc (check load no clear):
792 * - same as previous one
793 *
794 * ldX.c.clr.acq (ordered check load and clear):
795 * - same as above for c.clr part. The load needs to have acquire semantics. So
796 * we use the fence semantics which is stronger and thus ensures correctness.
797 *
798 * ldX.a (advanced load):
799 * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
800 * address doesn't match requested size alignement. This means that we would
801 * possibly need more than one load to get the result.
802 *
803 * The load part can be handled just like a normal load, however the difficult
804 * part is to get the right thing into the ALAT. The critical piece of information
805 * in the base address of the load & size. To do that, a ld.a must be executed,
806 * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
807 * if we use the same target register, we will be okay for the check.a instruction.
808 * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry
809 * which would overlap within [r3,r3+X] (the size of the load was store in the
810 * ALAT). If such an entry is found the entry is invalidated. But this is not good
811 * enough, take the following example:
812 * r3=3
813 * ld4.a r1=[r3]
814 *
815 * Could be emulated by doing:
816 * ld1.a r1=[r3],1
817 * store to temporary;
818 * ld1.a r1=[r3],1
819 * store & shift to temporary;
820 * ld1.a r1=[r3],1
821 * store & shift to temporary;
822 * ld1.a r1=[r3]
823 * store & shift to temporary;
824 * r1=temporary
825 *
826 * So int this case, you would get the right value is r1 but the wrong info in
827 * the ALAT. Notice that you could do it in reverse to finish with address 3
828 * but you would still get the size wrong. To get the size right, one needs to
829 * execute exactly the same kind of load. You could do it from a aligned
830 * temporary location, but you would get the address wrong.
831 *
832 * So no matter what, it is not possible to emulate an advanced load
833 * correctly. But is that really critical ?
834 *
835 *
836 * Now one has to look at how ld.a is used, one must either do a ld.c.* or
837 * chck.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
838 * entry found in ALAT), and that's perfectly ok because:
839 *
840 * - ld.c.*, if the entry is not present a normal load is executed
841 * - chk.a.*, if the entry is not present, execution jumps to recovery code
842 *
843 * In either case, the load can be potentially retried in another form.
844 *
845 * So it's okay NOT to do any actual load on an unaligned ld.a. However the ALAT
846 * must be invalidated for the register (so that's chck.a.*,ld.c.* don't pick up
847 * a stale entry later) The register base update MUST also be performed.
848 *
849 * Now what is the content of the register and its NaT bit in the case we don't
850 * do the load ? EAS2.4, says (in case an actual load is needed)
851 *
852 * - r1 = [r3], Nat = 0 if succeeds
853 * - r1 = 0 Nat = 0 if trying to access non-speculative memory
854 *
855 * For us, there is nothing to do, because both ld.c.* and chk.a.* are going to
856 * retry and thus eventually reload the register thereby changing Nat and
857 * register content.
858 */
859
860 /*
861 * when the load has the .acq completer then
862 * use ordering fence.
863 */
864 if (ld.x6_op == 0x5 || ld.x6_op == 0xa)
865 mb();
866
867 /*
868 * invalidate ALAT entry in case of advanced load
869 */
870 if (ld.x6_op == 0x2)
871 invala_gr(ld.r1);
872
873 return 0;
874 }
875
876 static int
emulate_store_int(unsigned long ifa,load_store_t ld,struct pt_regs * regs)877 emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
878 {
879 unsigned long r2;
880 unsigned int len = 1 << ld.x6_sz;
881
882 /*
883 * if we get to this handler, Nat bits on both r3 and r2 have already
884 * been checked. so we don't need to do it
885 *
886 * extract the value to be stored
887 */
888 getreg(ld.imm, &r2, 0, regs);
889
890 /*
891 * we rely on the macros in unaligned.h for now i.e.,
892 * we let the compiler figure out how to read memory gracefully.
893 *
894 * We need this switch/case because the way the inline function
895 * works. The code is optimized by the compiler and looks like
896 * a single switch/case.
897 */
898 DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);
899
900 if (len != 2 && len != 4 && len != 8) {
901 DPRINT("unknown size: x6=%d\n", ld.x6_sz);
902 return -1;
903 }
904
905 /* this assumes little-endian byte-order: */
906 if (copy_to_user((void *) ifa, &r2, len))
907 return -1;
908
909 /*
910 * stX [r3]=r2,imm(9)
911 *
912 * NOTE:
913 * ld.r3 can never be r0, because r0 would not generate an
914 * unaligned access.
915 */
916 if (ld.op == 0x5) {
917 unsigned long imm;
918
919 /*
920 * form imm9: [12:6] contain first 7bits
921 */
922 imm = ld.x << 7 | ld.r1;
923 /*
924 * sign extend (8bits) if m set
925 */
926 if (ld.m) imm |= SIGN_EXT9;
927 /*
928 * ifa == r3 (NaT is necessarily cleared)
929 */
930 ifa += imm;
931
932 DPRINT("imm=%lx r3=%lx\n", imm, ifa);
933
934 setreg(ld.r3, ifa, 0, regs);
935 }
936 /*
937 * we don't have alat_invalidate_multiple() so we need
938 * to do the complete flush :-<<
939 */
940 ia64_invala();
941
942 /*
943 * stX.rel: use fence instead of release
944 */
945 if (ld.x6_op == 0xd)
946 mb();
947
948 return 0;
949 }
950
951 /*
952 * floating point operations sizes in bytes
953 */
954 static const unsigned char float_fsz[4]={
955 10, /* extended precision (e) */
956 8, /* integer (8) */
957 4, /* single precision (s) */
958 8 /* double precision (d) */
959 };
960
961 static inline void
mem2float_extended(struct ia64_fpreg * init,struct ia64_fpreg * final)962 mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
963 {
964 __asm__ __volatile__ ("ldfe f6=[%0];; stf.spill [%1]=f6"
965 :: "r"(init), "r"(final) : "f6","memory");
966 }
967
968 static inline void
mem2float_integer(struct ia64_fpreg * init,struct ia64_fpreg * final)969 mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
970 {
971 __asm__ __volatile__ ("ldf8 f6=[%0];; stf.spill [%1]=f6"
972 :: "r"(init), "r"(final) : "f6","memory");
973 }
974
975 static inline void
mem2float_single(struct ia64_fpreg * init,struct ia64_fpreg * final)976 mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
977 {
978 __asm__ __volatile__ ("ldfs f6=[%0];; stf.spill [%1]=f6"
979 :: "r"(init), "r"(final) : "f6","memory");
980 }
981
982 static inline void
mem2float_double(struct ia64_fpreg * init,struct ia64_fpreg * final)983 mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
984 {
985 __asm__ __volatile__ ("ldfd f6=[%0];; stf.spill [%1]=f6"
986 :: "r"(init), "r"(final) : "f6","memory");
987 }
988
989 static inline void
float2mem_extended(struct ia64_fpreg * init,struct ia64_fpreg * final)990 float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
991 {
992 __asm__ __volatile__ ("ldf.fill f6=[%0];; stfe [%1]=f6"
993 :: "r"(init), "r"(final) : "f6","memory");
994 }
995
996 static inline void
float2mem_integer(struct ia64_fpreg * init,struct ia64_fpreg * final)997 float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
998 {
999 __asm__ __volatile__ ("ldf.fill f6=[%0];; stf8 [%1]=f6"
1000 :: "r"(init), "r"(final) : "f6","memory");
1001 }
1002
1003 static inline void
float2mem_single(struct ia64_fpreg * init,struct ia64_fpreg * final)1004 float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
1005 {
1006 __asm__ __volatile__ ("ldf.fill f6=[%0];; stfs [%1]=f6"
1007 :: "r"(init), "r"(final) : "f6","memory");
1008 }
1009
1010 static inline void
float2mem_double(struct ia64_fpreg * init,struct ia64_fpreg * final)1011 float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
1012 {
1013 __asm__ __volatile__ ("ldf.fill f6=[%0];; stfd [%1]=f6"
1014 :: "r"(init), "r"(final) : "f6","memory");
1015 }
1016
1017 static int
emulate_load_floatpair(unsigned long ifa,load_store_t ld,struct pt_regs * regs)1018 emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1019 {
1020 struct ia64_fpreg fpr_init[2];
1021 struct ia64_fpreg fpr_final[2];
1022 unsigned long len = float_fsz[ld.x6_sz];
1023
1024 /*
1025 * fr0 & fr1 don't need to be checked because Illegal Instruction faults have
1026 * higher priority than unaligned faults.
1027 *
1028 * r0 cannot be found as the base as it would never generate an unaligned
1029 * reference.
1030 */
1031
1032 /*
1033 * make sure we get clean buffers
1034 */
1035 memset(&fpr_init, 0, sizeof(fpr_init));
1036 memset(&fpr_final, 0, sizeof(fpr_final));
1037
1038 /*
1039 * ldfpX.a: we don't try to emulate anything but we must
1040 * invalidate the ALAT entry and execute updates, if any.
1041 */
1042 if (ld.x6_op != 0x2) {
1043 /*
1044 * This assumes little-endian byte-order. Note that there is no "ldfpe"
1045 * instruction:
1046 */
1047 if (copy_from_user(&fpr_init[0], (void *) ifa, len)
1048 || copy_from_user(&fpr_init[1], (void *) (ifa + len), len))
1049 return -1;
1050
1051 DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz);
1052 DDUMP("frp_init =", &fpr_init, 2*len);
1053 /*
1054 * XXX fixme
1055 * Could optimize inlines by using ldfpX & 2 spills
1056 */
1057 switch( ld.x6_sz ) {
1058 case 0:
1059 mem2float_extended(&fpr_init[0], &fpr_final[0]);
1060 mem2float_extended(&fpr_init[1], &fpr_final[1]);
1061 break;
1062 case 1:
1063 mem2float_integer(&fpr_init[0], &fpr_final[0]);
1064 mem2float_integer(&fpr_init[1], &fpr_final[1]);
1065 break;
1066 case 2:
1067 mem2float_single(&fpr_init[0], &fpr_final[0]);
1068 mem2float_single(&fpr_init[1], &fpr_final[1]);
1069 break;
1070 case 3:
1071 mem2float_double(&fpr_init[0], &fpr_final[0]);
1072 mem2float_double(&fpr_init[1], &fpr_final[1]);
1073 break;
1074 }
1075 DDUMP("fpr_final =", &fpr_final, 2*len);
1076 /*
1077 * XXX fixme
1078 *
1079 * A possible optimization would be to drop fpr_final and directly
1080 * use the storage from the saved context i.e., the actual final
1081 * destination (pt_regs, switch_stack or thread structure).
1082 */
1083 setfpreg(ld.r1, &fpr_final[0], regs);
1084 setfpreg(ld.imm, &fpr_final[1], regs);
1085 }
1086
1087 /*
1088 * Check for updates: only immediate updates are available for this
1089 * instruction.
1090 */
1091 if (ld.m) {
1092 /*
1093 * the immediate is implicit given the ldsz of the operation:
1094 * single: 8 (2x4) and for all others it's 16 (2x8)
1095 */
1096 ifa += len<<1;
1097
1098 /*
1099 * IMPORTANT:
1100 * the fact that we force the NaT of r3 to zero is ONLY valid
1101 * as long as we don't come here with a ldfpX.s.
1102 * For this reason we keep this sanity check
1103 */
1104 if (ld.x6_op == 1 || ld.x6_op == 3)
1105 printk(KERN_ERR "%s: register update on speculative load pair, error\n",
1106 __FUNCTION__);
1107
1108 setreg(ld.r3, ifa, 0, regs);
1109 }
1110
1111 /*
1112 * Invalidate ALAT entries, if any, for both registers.
1113 */
1114 if (ld.x6_op == 0x2) {
1115 invala_fr(ld.r1);
1116 invala_fr(ld.imm);
1117 }
1118 return 0;
1119 }
1120
1121
1122 static int
emulate_load_float(unsigned long ifa,load_store_t ld,struct pt_regs * regs)1123 emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1124 {
1125 struct ia64_fpreg fpr_init;
1126 struct ia64_fpreg fpr_final;
1127 unsigned long len = float_fsz[ld.x6_sz];
1128
1129 /*
1130 * fr0 & fr1 don't need to be checked because Illegal Instruction
1131 * faults have higher priority than unaligned faults.
1132 *
1133 * r0 cannot be found as the base as it would never generate an
1134 * unaligned reference.
1135 */
1136
1137 /*
1138 * make sure we get clean buffers
1139 */
1140 memset(&fpr_init,0, sizeof(fpr_init));
1141 memset(&fpr_final,0, sizeof(fpr_final));
1142
1143 /*
1144 * ldfX.a we don't try to emulate anything but we must
1145 * invalidate the ALAT entry.
1146 * See comments in ldX for descriptions on how the various loads are handled.
1147 */
1148 if (ld.x6_op != 0x2) {
1149 if (copy_from_user(&fpr_init, (void *) ifa, len))
1150 return -1;
1151
1152 DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1153 DDUMP("fpr_init =", &fpr_init, len);
1154 /*
1155 * we only do something for x6_op={0,8,9}
1156 */
1157 switch( ld.x6_sz ) {
1158 case 0:
1159 mem2float_extended(&fpr_init, &fpr_final);
1160 break;
1161 case 1:
1162 mem2float_integer(&fpr_init, &fpr_final);
1163 break;
1164 case 2:
1165 mem2float_single(&fpr_init, &fpr_final);
1166 break;
1167 case 3:
1168 mem2float_double(&fpr_init, &fpr_final);
1169 break;
1170 }
1171 DDUMP("fpr_final =", &fpr_final, len);
1172 /*
1173 * XXX fixme
1174 *
1175 * A possible optimization would be to drop fpr_final and directly
1176 * use the storage from the saved context i.e., the actual final
1177 * destination (pt_regs, switch_stack or thread structure).
1178 */
1179 setfpreg(ld.r1, &fpr_final, regs);
1180 }
1181
1182 /*
1183 * check for updates on any loads
1184 */
1185 if (ld.op == 0x7 || ld.m)
1186 emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
1187
1188 /*
1189 * invalidate ALAT entry in case of advanced floating point loads
1190 */
1191 if (ld.x6_op == 0x2)
1192 invala_fr(ld.r1);
1193
1194 return 0;
1195 }
1196
1197
1198 static int
emulate_store_float(unsigned long ifa,load_store_t ld,struct pt_regs * regs)1199 emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1200 {
1201 struct ia64_fpreg fpr_init;
1202 struct ia64_fpreg fpr_final;
1203 unsigned long len = float_fsz[ld.x6_sz];
1204
1205 /*
1206 * make sure we get clean buffers
1207 */
1208 memset(&fpr_init,0, sizeof(fpr_init));
1209 memset(&fpr_final,0, sizeof(fpr_final));
1210
1211 /*
1212 * if we get to this handler, Nat bits on both r3 and r2 have already
1213 * been checked. so we don't need to do it
1214 *
1215 * extract the value to be stored
1216 */
1217 getfpreg(ld.imm, &fpr_init, regs);
1218 /*
1219 * during this step, we extract the spilled registers from the saved
1220 * context i.e., we refill. Then we store (no spill) to temporary
1221 * aligned location
1222 */
1223 switch( ld.x6_sz ) {
1224 case 0:
1225 float2mem_extended(&fpr_init, &fpr_final);
1226 break;
1227 case 1:
1228 float2mem_integer(&fpr_init, &fpr_final);
1229 break;
1230 case 2:
1231 float2mem_single(&fpr_init, &fpr_final);
1232 break;
1233 case 3:
1234 float2mem_double(&fpr_init, &fpr_final);
1235 break;
1236 }
1237 DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1238 DDUMP("fpr_init =", &fpr_init, len);
1239 DDUMP("fpr_final =", &fpr_final, len);
1240
1241 if (copy_to_user((void *) ifa, &fpr_final, len))
1242 return -1;
1243
1244 /*
1245 * stfX [r3]=r2,imm(9)
1246 *
1247 * NOTE:
1248 * ld.r3 can never be r0, because r0 would not generate an
1249 * unaligned access.
1250 */
1251 if (ld.op == 0x7) {
1252 unsigned long imm;
1253
1254 /*
1255 * form imm9: [12:6] contain first 7bits
1256 */
1257 imm = ld.x << 7 | ld.r1;
1258 /*
1259 * sign extend (8bits) if m set
1260 */
1261 if (ld.m)
1262 imm |= SIGN_EXT9;
1263 /*
1264 * ifa == r3 (NaT is necessarily cleared)
1265 */
1266 ifa += imm;
1267
1268 DPRINT("imm=%lx r3=%lx\n", imm, ifa);
1269
1270 setreg(ld.r3, ifa, 0, regs);
1271 }
1272 /*
1273 * we don't have alat_invalidate_multiple() so we need
1274 * to do the complete flush :-<<
1275 */
1276 ia64_invala();
1277
1278 return 0;
1279 }
1280
1281 /*
1282 * Make sure we log the unaligned access, so that user/sysadmin can notice it and
1283 * eventually fix the program. However, we don't want to do that for every access so we
1284 * pace it with jiffies. This isn't really MP-safe, but it doesn't really have to be
1285 * either...
1286 */
1287 static int
within_logging_rate_limit(void)1288 within_logging_rate_limit (void)
1289 {
1290 static unsigned long count, last_time;
1291
1292 if (jiffies - last_time > 5*HZ)
1293 count = 0;
1294 if (++count < 5) {
1295 last_time = jiffies;
1296 return 1;
1297 }
1298 return 0;
1299
1300 }
1301
1302 void
ia64_handle_unaligned(unsigned long ifa,struct pt_regs * regs)1303 ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
1304 {
1305 struct exception_fixup fix = { 0 };
1306 struct ia64_psr *ipsr = ia64_psr(regs);
1307 mm_segment_t old_fs = get_fs();
1308 unsigned long bundle[2];
1309 unsigned long opcode;
1310 struct siginfo si;
1311 union {
1312 unsigned long l;
1313 load_store_t insn;
1314 } u;
1315 int ret = -1;
1316
1317 if (ia64_psr(regs)->be) {
1318 /* we don't support big-endian accesses */
1319 die_if_kernel("big-endian unaligned accesses are not supported", regs, 0);
1320 goto force_sigbus;
1321 }
1322
1323 /*
1324 * Treat kernel accesses for which there is an exception handler entry the same as
1325 * user-level unaligned accesses. Otherwise, a clever program could trick this
1326 * handler into reading an arbitrary kernel addresses...
1327 */
1328 if (!user_mode(regs)) {
1329 fix = SEARCH_EXCEPTION_TABLE(regs);
1330 }
1331 if (user_mode(regs) || fix.cont) {
1332 if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
1333 goto force_sigbus;
1334
1335 if (!(current->thread.flags & IA64_THREAD_UAC_NOPRINT)
1336 && within_logging_rate_limit())
1337 {
1338 char buf[200]; /* comm[] is at most 16 bytes... */
1339 size_t len;
1340
1341 len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
1342 "ip=0x%016lx\n\r", current->comm, current->pid,
1343 ifa, regs->cr_iip + ipsr->ri);
1344 /*
1345 * Don't call tty_write_message() if we're in the kernel; we might
1346 * be holding locks...
1347 */
1348 if (user_mode(regs))
1349 tty_write_message(current->tty, buf);
1350 buf[len-1] = '\0'; /* drop '\r' */
1351 printk(KERN_WARNING "%s", buf); /* watch for command names containing %s */
1352 }
1353 } else {
1354 if (within_logging_rate_limit())
1355 printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
1356 ifa, regs->cr_iip + ipsr->ri);
1357 set_fs(KERNEL_DS);
1358 }
1359
1360 DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n",
1361 regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it);
1362
1363 if (__copy_from_user(bundle, (void *) regs->cr_iip, 16))
1364 goto failure;
1365
1366 /*
1367 * extract the instruction from the bundle given the slot number
1368 */
1369 switch (ipsr->ri) {
1370 case 0: u.l = (bundle[0] >> 5); break;
1371 case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break;
1372 case 2: u.l = (bundle[1] >> 23); break;
1373 }
1374 opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK;
1375
1376 DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
1377 "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm,
1378 u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op);
1379
1380 /*
1381 * IMPORTANT:
1382 * Notice that the switch statement DOES not cover all possible instructions
1383 * that DO generate unaligned references. This is made on purpose because for some
1384 * instructions it DOES NOT make sense to try and emulate the access. Sometimes it
1385 * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
1386 * the program will get a signal and die:
1387 *
1388 * load/store:
1389 * - ldX.spill
1390 * - stX.spill
1391 * Reason: RNATs are based on addresses
1392 *
1393 * synchronization:
1394 * - cmpxchg
1395 * - fetchadd
1396 * - xchg
1397 * Reason: ATOMIC operations cannot be emulated properly using multiple
1398 * instructions.
1399 *
1400 * speculative loads:
1401 * - ldX.sZ
1402 * Reason: side effects, code must be ready to deal with failure so simpler
1403 * to let the load fail.
1404 * ---------------------------------------------------------------------------------
1405 * XXX fixme
1406 *
1407 * I would like to get rid of this switch case and do something
1408 * more elegant.
1409 */
1410 switch (opcode) {
1411 case LDS_OP:
1412 case LDSA_OP:
1413 case LDS_IMM_OP:
1414 case LDSA_IMM_OP:
1415 case LDFS_OP:
1416 case LDFSA_OP:
1417 case LDFS_IMM_OP:
1418 /*
1419 * The instruction will be retried with deferred exceptions turned on, and
1420 * we should get Nat bit installed
1421 *
1422 * IMPORTANT: When PSR_ED is set, the register & immediate update forms
1423 * are actually executed even though the operation failed. So we don't
1424 * need to take care of this.
1425 */
1426 DPRINT("forcing PSR_ED\n");
1427 regs->cr_ipsr |= IA64_PSR_ED;
1428 goto done;
1429
1430 case LD_OP:
1431 case LDA_OP:
1432 case LDBIAS_OP:
1433 case LDACQ_OP:
1434 case LDCCLR_OP:
1435 case LDCNC_OP:
1436 case LDCCLRACQ_OP:
1437 case LD_IMM_OP:
1438 case LDA_IMM_OP:
1439 case LDBIAS_IMM_OP:
1440 case LDACQ_IMM_OP:
1441 case LDCCLR_IMM_OP:
1442 case LDCNC_IMM_OP:
1443 case LDCCLRACQ_IMM_OP:
1444 ret = emulate_load_int(ifa, u.insn, regs);
1445 break;
1446
1447 case ST_OP:
1448 case STREL_OP:
1449 case ST_IMM_OP:
1450 case STREL_IMM_OP:
1451 ret = emulate_store_int(ifa, u.insn, regs);
1452 break;
1453
1454 case LDF_OP:
1455 case LDFA_OP:
1456 case LDFCCLR_OP:
1457 case LDFCNC_OP:
1458 case LDF_IMM_OP:
1459 case LDFA_IMM_OP:
1460 case LDFCCLR_IMM_OP:
1461 case LDFCNC_IMM_OP:
1462 if (u.insn.x)
1463 ret = emulate_load_floatpair(ifa, u.insn, regs);
1464 else
1465 ret = emulate_load_float(ifa, u.insn, regs);
1466 break;
1467
1468 case STF_OP:
1469 case STF_IMM_OP:
1470 ret = emulate_store_float(ifa, u.insn, regs);
1471 break;
1472
1473 default:
1474 goto failure;
1475 }
1476 DPRINT("ret=%d\n", ret);
1477 if (ret)
1478 goto failure;
1479
1480 if (ipsr->ri == 2)
1481 /*
1482 * given today's architecture this case is not likely to happen because a
1483 * memory access instruction (M) can never be in the last slot of a
1484 * bundle. But let's keep it for now.
1485 */
1486 regs->cr_iip += 16;
1487 ipsr->ri = (ipsr->ri + 1) & 0x3;
1488
1489 DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip);
1490 done:
1491 set_fs(old_fs); /* restore original address limit */
1492 return;
1493
1494 failure:
1495 /* something went wrong... */
1496 if (!user_mode(regs)) {
1497 if (fix.cont) {
1498 handle_exception(regs, fix);
1499 goto done;
1500 }
1501 die_if_kernel("error during unaligned kernel access\n", regs, ret);
1502 /* NOT_REACHED */
1503 }
1504 force_sigbus:
1505 si.si_signo = SIGBUS;
1506 si.si_errno = 0;
1507 si.si_code = BUS_ADRALN;
1508 si.si_addr = (void *) ifa;
1509 si.si_flags = 0;
1510 si.si_isr = 0;
1511 si.si_imm = 0;
1512 force_sig_info(SIGBUS, &si, current);
1513 goto done;
1514 }
1515