1 /*
2  * Architecture-specific unaligned trap handling.
3  *
4  * Copyright (C) 1999-2002 Hewlett-Packard Co
5  *	Stephane Eranian <eranian@hpl.hp.com>
6  *	David Mosberger-Tang <davidm@hpl.hp.com>
7  *
8  * 2002/12/09   Fix rotating register handling (off-by-1 error, missing fr-rotation).  Fix
9  *		get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame
10  *		stacked register returns an undefined value; it does NOT trigger a
11  *		"rsvd register fault").
12  * 2001/10/11	Fix unaligned access to rotating registers in s/w pipelined loops.
13  * 2001/08/13	Correct size of extended floats (float_fsz) from 16 to 10 bytes.
14  * 2001/01/17	Add support emulation of unaligned kernel accesses.
15  */
16 #include <linux/kernel.h>
17 #include <linux/sched.h>
18 #include <linux/smp_lock.h>
19 
20 #include <asm/uaccess.h>
21 #include <asm/rse.h>
22 #include <asm/processor.h>
23 #include <asm/unaligned.h>
24 
25 extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn));
26 
27 #undef DEBUG_UNALIGNED_TRAP
28 
29 #ifdef DEBUG_UNALIGNED_TRAP
30 # define DPRINT(a...)	do { printk("%s %u: ", __FUNCTION__, __LINE__); printk (a); } while (0)
31 # define DDUMP(str,vp,len)	dump(str, vp, len)
32 
33 static void
dump(const char * str,void * vp,size_t len)34 dump (const char *str, void *vp, size_t len)
35 {
36 	unsigned char *cp = vp;
37 	int i;
38 
39 	printk("%s", str);
40 	for (i = 0; i < len; ++i)
41 		printk (" %02x", *cp++);
42 	printk("\n");
43 }
44 #else
45 # define DPRINT(a...)
46 # define DDUMP(str,vp,len)
47 #endif
48 
49 #define IA64_FIRST_STACKED_GR	32
50 #define IA64_FIRST_ROTATING_FR	32
51 #define SIGN_EXT9		0xffffffffffffff00ul
52 
53 /*
54  * For M-unit:
55  *
56  *  opcode |   m  |   x6    |
57  * --------|------|---------|
58  * [40-37] | [36] | [35:30] |
59  * --------|------|---------|
60  *     4   |   1  |    6    | = 11 bits
61  * --------------------------
62  * However bits [31:30] are not directly useful to distinguish between
63  * load/store so we can use [35:32] instead, which gives the following
64  * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
65  * checking the m-bit until later in the load/store emulation.
66  */
67 #define IA64_OPCODE_MASK	0x1ef
68 #define IA64_OPCODE_SHIFT	32
69 
70 /*
71  * Table C-28 Integer Load/Store
72  *
73  * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
74  *
75  * ld8.fill, st8.fill  MUST be aligned because the RNATs are based on
76  * the address (bits [8:3]), so we must failed.
77  */
78 #define LD_OP            0x080
79 #define LDS_OP           0x081
80 #define LDA_OP           0x082
81 #define LDSA_OP          0x083
82 #define LDBIAS_OP        0x084
83 #define LDACQ_OP         0x085
84 /* 0x086, 0x087 are not relevant */
85 #define LDCCLR_OP        0x088
86 #define LDCNC_OP         0x089
87 #define LDCCLRACQ_OP     0x08a
88 #define ST_OP            0x08c
89 #define STREL_OP         0x08d
90 /* 0x08e,0x8f are not relevant */
91 
92 /*
93  * Table C-29 Integer Load +Reg
94  *
95  * we use the ld->m (bit [36:36]) field to determine whether or not we have
96  * a load/store of this form.
97  */
98 
99 /*
100  * Table C-30 Integer Load/Store +Imm
101  *
102  * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
103  *
104  * ld8.fill, st8.fill  must be aligned because the Nat register are based on
105  * the address, so we must fail and the program must be fixed.
106  */
107 #define LD_IMM_OP            0x0a0
108 #define LDS_IMM_OP           0x0a1
109 #define LDA_IMM_OP           0x0a2
110 #define LDSA_IMM_OP          0x0a3
111 #define LDBIAS_IMM_OP        0x0a4
112 #define LDACQ_IMM_OP         0x0a5
113 /* 0x0a6, 0xa7 are not relevant */
114 #define LDCCLR_IMM_OP        0x0a8
115 #define LDCNC_IMM_OP         0x0a9
116 #define LDCCLRACQ_IMM_OP     0x0aa
117 #define ST_IMM_OP            0x0ac
118 #define STREL_IMM_OP         0x0ad
119 /* 0x0ae,0xaf are not relevant */
120 
121 /*
122  * Table C-32 Floating-point Load/Store
123  */
124 #define LDF_OP           0x0c0
125 #define LDFS_OP          0x0c1
126 #define LDFA_OP          0x0c2
127 #define LDFSA_OP         0x0c3
128 /* 0x0c6 is irrelevant */
129 #define LDFCCLR_OP       0x0c8
130 #define LDFCNC_OP        0x0c9
131 /* 0x0cb is irrelevant  */
132 #define STF_OP           0x0cc
133 
134 /*
135  * Table C-33 Floating-point Load +Reg
136  *
137  * we use the ld->m (bit [36:36]) field to determine whether or not we have
138  * a load/store of this form.
139  */
140 
141 /*
142  * Table C-34 Floating-point Load/Store +Imm
143  */
144 #define LDF_IMM_OP       0x0e0
145 #define LDFS_IMM_OP      0x0e1
146 #define LDFA_IMM_OP      0x0e2
147 #define LDFSA_IMM_OP     0x0e3
148 /* 0x0e6 is irrelevant */
149 #define LDFCCLR_IMM_OP   0x0e8
150 #define LDFCNC_IMM_OP    0x0e9
151 #define STF_IMM_OP       0x0ec
152 
153 typedef struct {
154 	unsigned long	 qp:6;	/* [0:5]   */
155 	unsigned long    r1:7;	/* [6:12]  */
156 	unsigned long   imm:7;	/* [13:19] */
157 	unsigned long    r3:7;	/* [20:26] */
158 	unsigned long     x:1;  /* [27:27] */
159 	unsigned long  hint:2;	/* [28:29] */
160 	unsigned long x6_sz:2;	/* [30:31] */
161 	unsigned long x6_op:4;	/* [32:35], x6 = x6_sz|x6_op */
162 	unsigned long     m:1;	/* [36:36] */
163 	unsigned long    op:4;	/* [37:40] */
164 	unsigned long   pad:23; /* [41:63] */
165 } load_store_t;
166 
167 
168 typedef enum {
169 	UPD_IMMEDIATE,	/* ldXZ r1=[r3],imm(9) */
170 	UPD_REG		/* ldXZ r1=[r3],r2     */
171 } update_t;
172 
173 /*
174  * We use tables to keep track of the offsets of registers in the saved state.
175  * This way we save having big switch/case statements.
176  *
177  * We use bit 0 to indicate switch_stack or pt_regs.
178  * The offset is simply shifted by 1 bit.
179  * A 2-byte value should be enough to hold any kind of offset
180  *
181  * In case the calling convention changes (and thus pt_regs/switch_stack)
182  * simply use RSW instead of RPT or vice-versa.
183  */
184 
185 #define RPO(x)	((size_t) &((struct pt_regs *)0)->x)
186 #define RSO(x)	((size_t) &((struct switch_stack *)0)->x)
187 
188 #define RPT(x)		(RPO(x) << 1)
189 #define RSW(x)		(1| RSO(x)<<1)
190 
191 #define GR_OFFS(x)	(gr_info[x]>>1)
192 #define GR_IN_SW(x)	(gr_info[x] & 0x1)
193 
194 #define FR_OFFS(x)	(fr_info[x]>>1)
195 #define FR_IN_SW(x)	(fr_info[x] & 0x1)
196 
197 static u16 gr_info[32]={
198 	0,			/* r0 is read-only : WE SHOULD NEVER GET THIS */
199 
200 	RPT(r1), RPT(r2), RPT(r3),
201 
202 	RSW(r4), RSW(r5), RSW(r6), RSW(r7),
203 
204 	RPT(r8), RPT(r9), RPT(r10), RPT(r11),
205 	RPT(r12), RPT(r13), RPT(r14), RPT(r15),
206 
207 	RPT(r16), RPT(r17), RPT(r18), RPT(r19),
208 	RPT(r20), RPT(r21), RPT(r22), RPT(r23),
209 	RPT(r24), RPT(r25), RPT(r26), RPT(r27),
210 	RPT(r28), RPT(r29), RPT(r30), RPT(r31)
211 };
212 
213 static u16 fr_info[32]={
214 	0,			/* constant : WE SHOULD NEVER GET THIS */
215 	0,			/* constant : WE SHOULD NEVER GET THIS */
216 
217 	RSW(f2), RSW(f3), RSW(f4), RSW(f5),
218 
219 	RPT(f6), RPT(f7), RPT(f8), RPT(f9),
220 	RPT(f10), RPT(f11),
221 
222 	RSW(f12), RSW(f13), RSW(f14),
223 	RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
224 	RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
225 	RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
226 	RSW(f30), RSW(f31)
227 };
228 
229 /* Invalidate ALAT entry for integer register REGNO.  */
230 static void
invala_gr(int regno)231 invala_gr (int regno)
232 {
233 #	define F(reg)	case reg: __asm__ __volatile__ ("invala.e r%0" :: "i"(reg)); break
234 
235 	switch (regno) {
236 		F(  0); F(  1); F(  2); F(  3); F(  4); F(  5); F(  6); F(  7);
237 		F(  8); F(  9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
238 		F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
239 		F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
240 		F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
241 		F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
242 		F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
243 		F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
244 		F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
245 		F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
246 		F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
247 		F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
248 		F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
249 		F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
250 		F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
251 		F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
252 	}
253 #	undef F
254 }
255 
256 /* Invalidate ALAT entry for floating-point register REGNO.  */
257 static void
invala_fr(int regno)258 invala_fr (int regno)
259 {
260 #	define F(reg)	case reg: __asm__ __volatile__ ("invala.e f%0" :: "i"(reg)); break
261 
262 	switch (regno) {
263 		F(  0); F(  1); F(  2); F(  3); F(  4); F(  5); F(  6); F(  7);
264 		F(  8); F(  9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
265 		F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
266 		F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
267 		F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
268 		F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
269 		F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
270 		F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
271 		F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
272 		F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
273 		F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
274 		F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
275 		F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
276 		F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
277 		F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
278 		F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
279 	}
280 #	undef F
281 }
282 
283 static inline unsigned long
rotate_reg(unsigned long sor,unsigned long rrb,unsigned long reg)284 rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg)
285 {
286 	reg += rrb;
287 	if (reg >= sor)
288 		reg -= sor;
289 	return reg;
290 }
291 
292 static void
set_rse_reg(struct pt_regs * regs,unsigned long r1,unsigned long val,int nat)293 set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
294 {
295 	struct switch_stack *sw = (struct switch_stack *) regs - 1;
296 	unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
297 	unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
298 	unsigned long rnats, nat_mask;
299 	unsigned long on_kbs;
300 	long sof = (regs->cr_ifs) & 0x7f;
301 	long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
302 	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
303 	long ridx = r1 - 32;
304 
305 	if (ridx >= sof) {
306 		/* this should never happen, as the "rsvd register fault" has higher priority */
307 		DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
308 		return;
309 	}
310 
311 	if (ridx < sor)
312 		ridx = rotate_reg(sor, rrb_gr, ridx);
313 
314 	DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
315 	       r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
316 
317 	on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
318 	addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
319 	if (addr >= kbs) {
320 		/* the register is on the kernel backing store: easy... */
321 		rnat_addr = ia64_rse_rnat_addr(addr);
322 		if ((unsigned long) rnat_addr >= sw->ar_bspstore)
323 			rnat_addr = &sw->ar_rnat;
324 		nat_mask = 1UL << ia64_rse_slot_num(addr);
325 
326 		*addr = val;
327 		if (nat)
328 			*rnat_addr |=  nat_mask;
329 		else
330 			*rnat_addr &= ~nat_mask;
331 		return;
332 	}
333 
334 	/*
335 	 * Avoid using user_mode() here: with "epc", we cannot use the privilege level to
336 	 * infer whether the interrupt task was running on the kernel backing store.
337 	 */
338 	if (regs->r12 >= TASK_SIZE) {
339 		DPRINT("ignoring kernel write to r%lu; register isn't on the RBS!", r1);
340 		return;
341 	}
342 
343 	bspstore = (unsigned long *)regs->ar_bspstore;
344 	ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
345 	bsp     = ia64_rse_skip_regs(ubs_end, -sof);
346 	addr    = ia64_rse_skip_regs(bsp, ridx);
347 
348 	DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
349 
350 	ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
351 
352 	rnat_addr = ia64_rse_rnat_addr(addr);
353 
354 	ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
355 	DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n",
356 	       (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1);
357 
358 	nat_mask = 1UL << ia64_rse_slot_num(addr);
359 	if (nat)
360 		rnats |=  nat_mask;
361 	else
362 		rnats &= ~nat_mask;
363 	ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats);
364 
365 	DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats);
366 }
367 
368 
369 static void
get_rse_reg(struct pt_regs * regs,unsigned long r1,unsigned long * val,int * nat)370 get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat)
371 {
372 	struct switch_stack *sw = (struct switch_stack *) regs - 1;
373 	unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
374 	unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
375 	unsigned long rnats, nat_mask;
376 	unsigned long on_kbs;
377 	long sof = (regs->cr_ifs) & 0x7f;
378 	long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
379 	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
380 	long ridx = r1 - 32;
381 
382 	if (ridx >= sof) {
383 		/* read of out-of-frame register returns an undefined value; 0 in our case.  */
384 		DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
385 		goto fail;
386 	}
387 
388 	if (ridx < sor)
389 		ridx = rotate_reg(sor, rrb_gr, ridx);
390 
391 	DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
392 	       r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
393 
394 	on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
395 	addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
396 	if (addr >= kbs) {
397 		/* the register is on the kernel backing store: easy... */
398 		*val = *addr;
399 		if (nat) {
400 			rnat_addr = ia64_rse_rnat_addr(addr);
401 			if ((unsigned long) rnat_addr >= sw->ar_bspstore)
402 				rnat_addr = &sw->ar_rnat;
403 			nat_mask = 1UL << ia64_rse_slot_num(addr);
404 			*nat = (*rnat_addr & nat_mask) != 0;
405 		}
406 		return;
407 	}
408 
409 	/*
410 	 * Avoid using user_mode() here: with "epc", we cannot use the privilege level to
411 	 * infer whether the interrupt task was running on the kernel backing store.
412 	 */
413 	if (regs->r12 >= TASK_SIZE) {
414 		DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
415 		goto fail;
416 	}
417 
418 	bspstore = (unsigned long *)regs->ar_bspstore;
419 	ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
420 	bsp     = ia64_rse_skip_regs(ubs_end, -sof);
421 	addr    = ia64_rse_skip_regs(bsp, ridx);
422 
423 	DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
424 
425 	ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
426 
427 	if (nat) {
428 		rnat_addr = ia64_rse_rnat_addr(addr);
429 		nat_mask = 1UL << ia64_rse_slot_num(addr);
430 
431 		DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats);
432 
433 		ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
434 		*nat = (rnats & nat_mask) != 0;
435 	}
436 	return;
437 
438   fail:
439 	*val = 0;
440 	if (nat)
441 		*nat = 0;
442 	return;
443 }
444 
445 
446 static void
setreg(unsigned long regnum,unsigned long val,int nat,struct pt_regs * regs)447 setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
448 {
449 	struct switch_stack *sw = (struct switch_stack *) regs - 1;
450 	unsigned long addr;
451 	unsigned long bitmask;
452 	unsigned long *unat;
453 
454 	/*
455 	 * First takes care of stacked registers
456 	 */
457 	if (regnum >= IA64_FIRST_STACKED_GR) {
458 		set_rse_reg(regs, regnum, val, nat);
459 		return;
460 	}
461 
462 	/*
463 	 * Using r0 as a target raises a General Exception fault which has higher priority
464 	 * than the Unaligned Reference fault.
465 	 */
466 
467 	/*
468 	 * Now look at registers in [0-31] range and init correct UNAT
469 	 */
470 	if (GR_IN_SW(regnum)) {
471 		addr = (unsigned long)sw;
472 		unat = &sw->ar_unat;
473 	} else {
474 		addr = (unsigned long)regs;
475 		unat = &sw->caller_unat;
476 	}
477 	DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
478 	       addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
479 	/*
480 	 * add offset from base of struct
481 	 * and do it !
482 	 */
483 	addr += GR_OFFS(regnum);
484 
485 	*(unsigned long *)addr = val;
486 
487 	/*
488 	 * We need to clear the corresponding UNAT bit to fully emulate the load
489 	 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
490 	 */
491 	bitmask   = 1UL << (addr >> 3 & 0x3f);
492 	DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat);
493 	if (nat) {
494 		*unat |= bitmask;
495 	} else {
496 		*unat &= ~bitmask;
497 	}
498 	DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
499 }
500 
501 /*
502  * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
503  * range from 32-127, result is in the range from 0-95.
504  */
505 static inline unsigned long
fph_index(struct pt_regs * regs,long regnum)506 fph_index (struct pt_regs *regs, long regnum)
507 {
508 	unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
509 	return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
510 }
511 
512 static void
setfpreg(unsigned long regnum,struct ia64_fpreg * fpval,struct pt_regs * regs)513 setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
514 {
515 	struct switch_stack *sw = (struct switch_stack *)regs - 1;
516 	unsigned long addr;
517 
518 	/*
519 	 * From EAS-2.5: FPDisableFault has higher priority than Unaligned
520 	 * Fault. Thus, when we get here, we know the partition is enabled.
521 	 * To update f32-f127, there are three choices:
522 	 *
523 	 *	(1) save f32-f127 to thread.fph and update the values there
524 	 *	(2) use a gigantic switch statement to directly access the registers
525 	 *	(3) generate code on the fly to update the desired register
526 	 *
527 	 * For now, we are using approach (1).
528 	 */
529 	if (regnum >= IA64_FIRST_ROTATING_FR) {
530 		ia64_sync_fph(current);
531 		current->thread.fph[fph_index(regs, regnum)] = *fpval;
532 	} else {
533 		/*
534 		 * pt_regs or switch_stack ?
535 		 */
536 		if (FR_IN_SW(regnum)) {
537 			addr = (unsigned long)sw;
538 		} else {
539 			addr = (unsigned long)regs;
540 		}
541 
542 		DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));
543 
544 		addr += FR_OFFS(regnum);
545 		*(struct ia64_fpreg *)addr = *fpval;
546 
547 		/*
548 		 * mark the low partition as being used now
549 		 *
550 		 * It is highly unlikely that this bit is not already set, but
551 		 * let's do it for safety.
552 		 */
553 		regs->cr_ipsr |= IA64_PSR_MFL;
554 	}
555 }
556 
557 /*
558  * Those 2 inline functions generate the spilled versions of the constant floating point
559  * registers which can be used with stfX
560  */
561 static inline void
float_spill_f0(struct ia64_fpreg * final)562 float_spill_f0 (struct ia64_fpreg *final)
563 {
564 	__asm__ __volatile__ ("stf.spill [%0]=f0" :: "r"(final) : "memory");
565 }
566 
567 static inline void
float_spill_f1(struct ia64_fpreg * final)568 float_spill_f1 (struct ia64_fpreg *final)
569 {
570 	__asm__ __volatile__ ("stf.spill [%0]=f1" :: "r"(final) : "memory");
571 }
572 
573 static void
getfpreg(unsigned long regnum,struct ia64_fpreg * fpval,struct pt_regs * regs)574 getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
575 {
576 	struct switch_stack *sw = (struct switch_stack *) regs - 1;
577 	unsigned long addr;
578 
579 	/*
580 	 * From EAS-2.5: FPDisableFault has higher priority than
581 	 * Unaligned Fault. Thus, when we get here, we know the partition is
582 	 * enabled.
583 	 *
584 	 * When regnum > 31, the register is still live and we need to force a save
585 	 * to current->thread.fph to get access to it.  See discussion in setfpreg()
586 	 * for reasons and other ways of doing this.
587 	 */
588 	if (regnum >= IA64_FIRST_ROTATING_FR) {
589 		ia64_flush_fph(current);
590 		*fpval = current->thread.fph[fph_index(regs, regnum)];
591 	} else {
592 		/*
593 		 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
594 		 * not saved, we must generate their spilled form on the fly
595 		 */
596 		switch(regnum) {
597 		case 0:
598 			float_spill_f0(fpval);
599 			break;
600 		case 1:
601 			float_spill_f1(fpval);
602 			break;
603 		default:
604 			/*
605 			 * pt_regs or switch_stack ?
606 			 */
607 			addr =  FR_IN_SW(regnum) ? (unsigned long)sw
608 						 : (unsigned long)regs;
609 
610 			DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",
611 			       FR_IN_SW(regnum), addr, FR_OFFS(regnum));
612 
613 			addr  += FR_OFFS(regnum);
614 			*fpval = *(struct ia64_fpreg *)addr;
615 		}
616 	}
617 }
618 
619 
620 static void
getreg(unsigned long regnum,unsigned long * val,int * nat,struct pt_regs * regs)621 getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
622 {
623 	struct switch_stack *sw = (struct switch_stack *) regs - 1;
624 	unsigned long addr, *unat;
625 
626 	if (regnum >= IA64_FIRST_STACKED_GR) {
627 		get_rse_reg(regs, regnum, val, nat);
628 		return;
629 	}
630 
631 	/*
632 	 * take care of r0 (read-only always evaluate to 0)
633 	 */
634 	if (regnum == 0) {
635 		*val = 0;
636 		if (nat)
637 			*nat = 0;
638 		return;
639 	}
640 
641 	/*
642 	 * Now look at registers in [0-31] range and init correct UNAT
643 	 */
644 	if (GR_IN_SW(regnum)) {
645 		addr = (unsigned long)sw;
646 		unat = &sw->ar_unat;
647 	} else {
648 		addr = (unsigned long)regs;
649 		unat = &sw->caller_unat;
650 	}
651 
652 	DPRINT("addr_base=%lx offset=0x%x\n", addr,  GR_OFFS(regnum));
653 
654 	addr += GR_OFFS(regnum);
655 
656 	*val  = *(unsigned long *)addr;
657 
658 	/*
659 	 * do it only when requested
660 	 */
661 	if (nat)
662 		*nat  = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;
663 }
664 
665 static void
emulate_load_updates(update_t type,load_store_t ld,struct pt_regs * regs,unsigned long ifa)666 emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa)
667 {
668 	/*
669 	 * IMPORTANT:
670 	 * Given the way we handle unaligned speculative loads, we should
671 	 * not get to this point in the code but we keep this sanity check,
672 	 * just in case.
673 	 */
674 	if (ld.x6_op == 1 || ld.x6_op == 3) {
675 		printk(KERN_ERR "%s: register update on speculative load, error\n", __FUNCTION__);
676 		die_if_kernel("unaligned reference on speculative load with register update\n",
677 			      regs, 30);
678 	}
679 
680 
681 	/*
682 	 * at this point, we know that the base register to update is valid i.e.,
683 	 * it's not r0
684 	 */
685 	if (type == UPD_IMMEDIATE) {
686 		unsigned long imm;
687 
688 		/*
689 		 * Load +Imm: ldXZ r1=[r3],imm(9)
690 		 *
691 		 *
692 		 * form imm9: [13:19] contain the first 7 bits
693 		 */
694 		imm = ld.x << 7 | ld.imm;
695 
696 		/*
697 		 * sign extend (1+8bits) if m set
698 		 */
699 		if (ld.m) imm |= SIGN_EXT9;
700 
701 		/*
702 		 * ifa == r3 and we know that the NaT bit on r3 was clear so
703 		 * we can directly use ifa.
704 		 */
705 		ifa += imm;
706 
707 		setreg(ld.r3, ifa, 0, regs);
708 
709 		DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);
710 
711 	} else if (ld.m) {
712 		unsigned long r2;
713 		int nat_r2;
714 
715 		/*
716 		 * Load +Reg Opcode: ldXZ r1=[r3],r2
717 		 *
718 		 * Note: that we update r3 even in the case of ldfX.a
719 		 * (where the load does not happen)
720 		 *
721 		 * The way the load algorithm works, we know that r3 does not
722 		 * have its NaT bit set (would have gotten NaT consumption
723 		 * before getting the unaligned fault). So we can use ifa
724 		 * which equals r3 at this point.
725 		 *
726 		 * IMPORTANT:
727 		 * The above statement holds ONLY because we know that we
728 		 * never reach this code when trying to do a ldX.s.
729 		 * If we ever make it to here on an ldfX.s then
730 		 */
731 		getreg(ld.imm, &r2, &nat_r2, regs);
732 
733 		ifa += r2;
734 
735 		/*
736 		 * propagate Nat r2 -> r3
737 		 */
738 		setreg(ld.r3, ifa, nat_r2, regs);
739 
740 		DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);
741 	}
742 }
743 
744 
745 static int
emulate_load_int(unsigned long ifa,load_store_t ld,struct pt_regs * regs)746 emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
747 {
748 	unsigned int len = 1 << ld.x6_sz;
749 
750 	/*
751 	 * r0, as target, doesn't need to be checked because Illegal Instruction
752 	 * faults have higher priority than unaligned faults.
753 	 *
754 	 * r0 cannot be found as the base as it would never generate an
755 	 * unaligned reference.
756 	 */
757 
758 	/*
759 	 * ldX.a we don't try to emulate anything but we must invalidate the ALAT entry.
760 	 * See comment below for explanation on how we handle ldX.a
761 	 */
762 	if (ld.x6_op != 0x2) {
763 		unsigned long val = 0;
764 
765 		if (len != 2 && len != 4 && len != 8) {
766 			DPRINT("unknown size: x6=%d\n", ld.x6_sz);
767 			return -1;
768 		}
769 		/* this assumes little-endian byte-order: */
770 		if (copy_from_user(&val, (void *) ifa, len))
771 		    return -1;
772 		setreg(ld.r1, val, 0, regs);
773 	}
774 
775 	/*
776 	 * check for updates on any kind of loads
777 	 */
778 	if (ld.op == 0x5 || ld.m)
779 		emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
780 
781 	/*
782 	 * handling of various loads (based on EAS2.4):
783 	 *
784 	 * ldX.acq (ordered load):
785 	 *	- acquire semantics would have been used, so force fence instead.
786 	 *
787 	 * ldX.c.clr (check load and clear):
788 	 *	- if we get to this handler, it's because the entry was not in the ALAT.
789 	 *	  Therefore the operation reverts to a normal load
790 	 *
791 	 * ldX.c.nc (check load no clear):
792 	 *	- same as previous one
793 	 *
794 	 * ldX.c.clr.acq (ordered check load and clear):
795 	 *	- same as above for c.clr part. The load needs to have acquire semantics. So
796 	 *	  we use the fence semantics which is stronger and thus ensures correctness.
797 	 *
798 	 * ldX.a (advanced load):
799 	 *	- suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
800 	 *	  address doesn't match requested size alignement. This means that we would
801 	 *	  possibly need more than one load to get the result.
802 	 *
803 	 *	  The load part can be handled just like a normal load, however the difficult
804 	 *	  part is to get the right thing into the ALAT. The critical piece of information
805 	 *	  in the base address of the load & size. To do that, a ld.a must be executed,
806 	 *	  clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
807 	 *	  if we use the same target register, we will be okay for the check.a instruction.
808 	 *	  If we look at the store, basically a stX [r3]=r1 checks the ALAT  for any entry
809 	 *	  which would overlap within [r3,r3+X] (the size of the load was store in the
810 	 *	  ALAT). If such an entry is found the entry is invalidated. But this is not good
811 	 *	  enough, take the following example:
812 	 *		r3=3
813 	 *		ld4.a r1=[r3]
814 	 *
815 	 *	  Could be emulated by doing:
816 	 *		ld1.a r1=[r3],1
817 	 *		store to temporary;
818 	 *		ld1.a r1=[r3],1
819 	 *		store & shift to temporary;
820 	 *		ld1.a r1=[r3],1
821 	 *		store & shift to temporary;
822 	 *		ld1.a r1=[r3]
823 	 *		store & shift to temporary;
824 	 *		r1=temporary
825 	 *
826 	 *	  So int this case, you would get the right value is r1 but the wrong info in
827 	 *	  the ALAT.  Notice that you could do it in reverse to finish with address 3
828 	 *	  but you would still get the size wrong.  To get the size right, one needs to
829 	 *	  execute exactly the same kind of load. You could do it from a aligned
830 	 *	  temporary location, but you would get the address wrong.
831 	 *
832 	 *	  So no matter what, it is not possible to emulate an advanced load
833 	 *	  correctly. But is that really critical ?
834 	 *
835 	 *
836 	 *	  Now one has to look at how ld.a is used, one must either do a ld.c.* or
837 	 *	  chck.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
838 	 *	  entry found in ALAT), and that's perfectly ok because:
839 	 *
840 	 *		- ld.c.*, if the entry is not present a  normal load is executed
841 	 *		- chk.a.*, if the entry is not present, execution jumps to recovery code
842 	 *
843 	 *	  In either case, the load can be potentially retried in another form.
844 	 *
845 	 *	  So it's okay NOT to do any actual load on an unaligned ld.a. However the ALAT
846 	 *	  must be invalidated for the register (so that's chck.a.*,ld.c.* don't pick up
847 	 *	  a stale entry later) The register base update MUST also be performed.
848 	 *
849 	 *	  Now what is the content of the register and its NaT bit in the case we don't
850 	 *	  do the load ?  EAS2.4, says (in case an actual load is needed)
851 	 *
852 	 *		- r1 = [r3], Nat = 0 if succeeds
853 	 *		- r1 = 0 Nat = 0 if trying to access non-speculative memory
854 	 *
855 	 *	  For us, there is nothing to do, because both ld.c.* and chk.a.* are going to
856 	 *	  retry and thus eventually reload the register thereby changing Nat and
857 	 *	  register content.
858 	 */
859 
860 	/*
861 	 * when the load has the .acq completer then
862 	 * use ordering fence.
863 	 */
864 	if (ld.x6_op == 0x5 || ld.x6_op == 0xa)
865 		mb();
866 
867 	/*
868 	 * invalidate ALAT entry in case of advanced load
869 	 */
870 	if (ld.x6_op == 0x2)
871 		invala_gr(ld.r1);
872 
873 	return 0;
874 }
875 
876 static int
emulate_store_int(unsigned long ifa,load_store_t ld,struct pt_regs * regs)877 emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
878 {
879 	unsigned long r2;
880 	unsigned int len = 1 << ld.x6_sz;
881 
882 	/*
883 	 * if we get to this handler, Nat bits on both r3 and r2 have already
884 	 * been checked. so we don't need to do it
885 	 *
886 	 * extract the value to be stored
887 	 */
888 	getreg(ld.imm, &r2, 0, regs);
889 
890 	/*
891 	 * we rely on the macros in unaligned.h for now i.e.,
892 	 * we let the compiler figure out how to read memory gracefully.
893 	 *
894 	 * We need this switch/case because the way the inline function
895 	 * works. The code is optimized by the compiler and looks like
896 	 * a single switch/case.
897 	 */
898 	DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);
899 
900 	if (len != 2 && len != 4 && len != 8) {
901 		DPRINT("unknown size: x6=%d\n", ld.x6_sz);
902 		return -1;
903 	}
904 
905 	/* this assumes little-endian byte-order: */
906 	if (copy_to_user((void *) ifa, &r2, len))
907 		return -1;
908 
909 	/*
910 	 * stX [r3]=r2,imm(9)
911 	 *
912 	 * NOTE:
913 	 * ld.r3 can never be r0, because r0 would not generate an
914 	 * unaligned access.
915 	 */
916 	if (ld.op == 0x5) {
917 		unsigned long imm;
918 
919 		/*
920 		 * form imm9: [12:6] contain first 7bits
921 		 */
922 		imm = ld.x << 7 | ld.r1;
923 		/*
924 		 * sign extend (8bits) if m set
925 		 */
926 		if (ld.m) imm |= SIGN_EXT9;
927 		/*
928 		 * ifa == r3 (NaT is necessarily cleared)
929 		 */
930 		ifa += imm;
931 
932 		DPRINT("imm=%lx r3=%lx\n", imm, ifa);
933 
934 		setreg(ld.r3, ifa, 0, regs);
935 	}
936 	/*
937 	 * we don't have alat_invalidate_multiple() so we need
938 	 * to do the complete flush :-<<
939 	 */
940 	ia64_invala();
941 
942 	/*
943 	 * stX.rel: use fence instead of release
944 	 */
945 	if (ld.x6_op == 0xd)
946 		mb();
947 
948 	return 0;
949 }
950 
951 /*
952  * floating point operations sizes in bytes
953  */
954 static const unsigned char float_fsz[4]={
955 	10, /* extended precision (e) */
956 	8,  /* integer (8)            */
957 	4,  /* single precision (s)   */
958 	8   /* double precision (d)   */
959 };
960 
961 static inline void
mem2float_extended(struct ia64_fpreg * init,struct ia64_fpreg * final)962 mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
963 {
964 	__asm__ __volatile__ ("ldfe f6=[%0];; stf.spill [%1]=f6"
965 			      :: "r"(init), "r"(final) : "f6","memory");
966 }
967 
968 static inline void
mem2float_integer(struct ia64_fpreg * init,struct ia64_fpreg * final)969 mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
970 {
971 	__asm__ __volatile__ ("ldf8 f6=[%0];; stf.spill [%1]=f6"
972 			      :: "r"(init), "r"(final) : "f6","memory");
973 }
974 
975 static inline void
mem2float_single(struct ia64_fpreg * init,struct ia64_fpreg * final)976 mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
977 {
978 	__asm__ __volatile__ ("ldfs f6=[%0];; stf.spill [%1]=f6"
979 			      :: "r"(init), "r"(final) : "f6","memory");
980 }
981 
982 static inline void
mem2float_double(struct ia64_fpreg * init,struct ia64_fpreg * final)983 mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
984 {
985 	__asm__ __volatile__ ("ldfd f6=[%0];; stf.spill [%1]=f6"
986 			      :: "r"(init), "r"(final) : "f6","memory");
987 }
988 
989 static inline void
float2mem_extended(struct ia64_fpreg * init,struct ia64_fpreg * final)990 float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
991 {
992 	__asm__ __volatile__ ("ldf.fill f6=[%0];; stfe [%1]=f6"
993 			      :: "r"(init), "r"(final) : "f6","memory");
994 }
995 
996 static inline void
float2mem_integer(struct ia64_fpreg * init,struct ia64_fpreg * final)997 float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
998 {
999 	__asm__ __volatile__ ("ldf.fill f6=[%0];; stf8 [%1]=f6"
1000 			      :: "r"(init), "r"(final) : "f6","memory");
1001 }
1002 
1003 static inline void
float2mem_single(struct ia64_fpreg * init,struct ia64_fpreg * final)1004 float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
1005 {
1006 	__asm__ __volatile__ ("ldf.fill f6=[%0];; stfs [%1]=f6"
1007 			      :: "r"(init), "r"(final) : "f6","memory");
1008 }
1009 
1010 static inline void
float2mem_double(struct ia64_fpreg * init,struct ia64_fpreg * final)1011 float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
1012 {
1013 	__asm__ __volatile__ ("ldf.fill f6=[%0];; stfd [%1]=f6"
1014 			      :: "r"(init), "r"(final) : "f6","memory");
1015 }
1016 
1017 static int
emulate_load_floatpair(unsigned long ifa,load_store_t ld,struct pt_regs * regs)1018 emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1019 {
1020 	struct ia64_fpreg fpr_init[2];
1021 	struct ia64_fpreg fpr_final[2];
1022 	unsigned long len = float_fsz[ld.x6_sz];
1023 
1024 	/*
1025 	 * fr0 & fr1 don't need to be checked because Illegal Instruction faults have
1026 	 * higher priority than unaligned faults.
1027 	 *
1028 	 * r0 cannot be found as the base as it would never generate an unaligned
1029 	 * reference.
1030 	 */
1031 
1032 	/*
1033 	 * make sure we get clean buffers
1034 	 */
1035 	memset(&fpr_init, 0, sizeof(fpr_init));
1036 	memset(&fpr_final, 0, sizeof(fpr_final));
1037 
1038 	/*
1039 	 * ldfpX.a: we don't try to emulate anything but we must
1040 	 * invalidate the ALAT entry and execute updates, if any.
1041 	 */
1042 	if (ld.x6_op != 0x2) {
1043 		/*
1044 		 * This assumes little-endian byte-order.  Note that there is no "ldfpe"
1045 		 * instruction:
1046 		 */
1047 		if (copy_from_user(&fpr_init[0], (void *) ifa, len)
1048 		    || copy_from_user(&fpr_init[1], (void *) (ifa + len), len))
1049 			return -1;
1050 
1051 		DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz);
1052 		DDUMP("frp_init =", &fpr_init, 2*len);
1053 		/*
1054 		 * XXX fixme
1055 		 * Could optimize inlines by using ldfpX & 2 spills
1056 		 */
1057 		switch( ld.x6_sz ) {
1058 			case 0:
1059 				mem2float_extended(&fpr_init[0], &fpr_final[0]);
1060 				mem2float_extended(&fpr_init[1], &fpr_final[1]);
1061 				break;
1062 			case 1:
1063 				mem2float_integer(&fpr_init[0], &fpr_final[0]);
1064 				mem2float_integer(&fpr_init[1], &fpr_final[1]);
1065 				break;
1066 			case 2:
1067 				mem2float_single(&fpr_init[0], &fpr_final[0]);
1068 				mem2float_single(&fpr_init[1], &fpr_final[1]);
1069 				break;
1070 			case 3:
1071 				mem2float_double(&fpr_init[0], &fpr_final[0]);
1072 				mem2float_double(&fpr_init[1], &fpr_final[1]);
1073 				break;
1074 		}
1075 		DDUMP("fpr_final =", &fpr_final, 2*len);
1076 		/*
1077 		 * XXX fixme
1078 		 *
1079 		 * A possible optimization would be to drop fpr_final and directly
1080 		 * use the storage from the saved context i.e., the actual final
1081 		 * destination (pt_regs, switch_stack or thread structure).
1082 		 */
1083 		setfpreg(ld.r1, &fpr_final[0], regs);
1084 		setfpreg(ld.imm, &fpr_final[1], regs);
1085 	}
1086 
1087 	/*
1088 	 * Check for updates: only immediate updates are available for this
1089 	 * instruction.
1090 	 */
1091 	if (ld.m) {
1092 		/*
1093 		 * the immediate is implicit given the ldsz of the operation:
1094 		 * single: 8 (2x4) and for  all others it's 16 (2x8)
1095 		 */
1096 		ifa += len<<1;
1097 
1098 		/*
1099 		 * IMPORTANT:
1100 		 * the fact that we force the NaT of r3 to zero is ONLY valid
1101 		 * as long as we don't come here with a ldfpX.s.
1102 		 * For this reason we keep this sanity check
1103 		 */
1104 		if (ld.x6_op == 1 || ld.x6_op == 3)
1105 			printk(KERN_ERR "%s: register update on speculative load pair, error\n",
1106 			       __FUNCTION__);
1107 
1108 		setreg(ld.r3, ifa, 0, regs);
1109 	}
1110 
1111 	/*
1112 	 * Invalidate ALAT entries, if any, for both registers.
1113 	 */
1114 	if (ld.x6_op == 0x2) {
1115 		invala_fr(ld.r1);
1116 		invala_fr(ld.imm);
1117 	}
1118 	return 0;
1119 }
1120 
1121 
1122 static int
emulate_load_float(unsigned long ifa,load_store_t ld,struct pt_regs * regs)1123 emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1124 {
1125 	struct ia64_fpreg fpr_init;
1126 	struct ia64_fpreg fpr_final;
1127 	unsigned long len = float_fsz[ld.x6_sz];
1128 
1129 	/*
1130 	 * fr0 & fr1 don't need to be checked because Illegal Instruction
1131 	 * faults have higher priority than unaligned faults.
1132 	 *
1133 	 * r0 cannot be found as the base as it would never generate an
1134 	 * unaligned reference.
1135 	 */
1136 
1137 	/*
1138 	 * make sure we get clean buffers
1139 	 */
1140 	memset(&fpr_init,0, sizeof(fpr_init));
1141 	memset(&fpr_final,0, sizeof(fpr_final));
1142 
1143 	/*
1144 	 * ldfX.a we don't try to emulate anything but we must
1145 	 * invalidate the ALAT entry.
1146 	 * See comments in ldX for descriptions on how the various loads are handled.
1147 	 */
1148 	if (ld.x6_op != 0x2) {
1149 		if (copy_from_user(&fpr_init, (void *) ifa, len))
1150 			return -1;
1151 
1152 		DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1153 		DDUMP("fpr_init =", &fpr_init, len);
1154 		/*
1155 		 * we only do something for x6_op={0,8,9}
1156 		 */
1157 		switch( ld.x6_sz ) {
1158 			case 0:
1159 				mem2float_extended(&fpr_init, &fpr_final);
1160 				break;
1161 			case 1:
1162 				mem2float_integer(&fpr_init, &fpr_final);
1163 				break;
1164 			case 2:
1165 				mem2float_single(&fpr_init, &fpr_final);
1166 				break;
1167 			case 3:
1168 				mem2float_double(&fpr_init, &fpr_final);
1169 				break;
1170 		}
1171 		DDUMP("fpr_final =", &fpr_final, len);
1172 		/*
1173 		 * XXX fixme
1174 		 *
1175 		 * A possible optimization would be to drop fpr_final and directly
1176 		 * use the storage from the saved context i.e., the actual final
1177 		 * destination (pt_regs, switch_stack or thread structure).
1178 		 */
1179 		setfpreg(ld.r1, &fpr_final, regs);
1180 	}
1181 
1182 	/*
1183 	 * check for updates on any loads
1184 	 */
1185 	if (ld.op == 0x7 || ld.m)
1186 		emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
1187 
1188 	/*
1189 	 * invalidate ALAT entry in case of advanced floating point loads
1190 	 */
1191 	if (ld.x6_op == 0x2)
1192 		invala_fr(ld.r1);
1193 
1194 	return 0;
1195 }
1196 
1197 
1198 static int
emulate_store_float(unsigned long ifa,load_store_t ld,struct pt_regs * regs)1199 emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1200 {
1201 	struct ia64_fpreg fpr_init;
1202 	struct ia64_fpreg fpr_final;
1203 	unsigned long len = float_fsz[ld.x6_sz];
1204 
1205 	/*
1206 	 * make sure we get clean buffers
1207 	 */
1208 	memset(&fpr_init,0, sizeof(fpr_init));
1209 	memset(&fpr_final,0, sizeof(fpr_final));
1210 
1211 	/*
1212 	 * if we get to this handler, Nat bits on both r3 and r2 have already
1213 	 * been checked. so we don't need to do it
1214 	 *
1215 	 * extract the value to be stored
1216 	 */
1217 	getfpreg(ld.imm, &fpr_init, regs);
1218 	/*
1219 	 * during this step, we extract the spilled registers from the saved
1220 	 * context i.e., we refill. Then we store (no spill) to temporary
1221 	 * aligned location
1222 	 */
1223 	switch( ld.x6_sz ) {
1224 		case 0:
1225 			float2mem_extended(&fpr_init, &fpr_final);
1226 			break;
1227 		case 1:
1228 			float2mem_integer(&fpr_init, &fpr_final);
1229 			break;
1230 		case 2:
1231 			float2mem_single(&fpr_init, &fpr_final);
1232 			break;
1233 		case 3:
1234 			float2mem_double(&fpr_init, &fpr_final);
1235 			break;
1236 	}
1237 	DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1238 	DDUMP("fpr_init =", &fpr_init, len);
1239 	DDUMP("fpr_final =", &fpr_final, len);
1240 
1241 	if (copy_to_user((void *) ifa, &fpr_final, len))
1242 		return -1;
1243 
1244 	/*
1245 	 * stfX [r3]=r2,imm(9)
1246 	 *
1247 	 * NOTE:
1248 	 * ld.r3 can never be r0, because r0 would not generate an
1249 	 * unaligned access.
1250 	 */
1251 	if (ld.op == 0x7) {
1252 		unsigned long imm;
1253 
1254 		/*
1255 		 * form imm9: [12:6] contain first 7bits
1256 		 */
1257 		imm = ld.x << 7 | ld.r1;
1258 		/*
1259 		 * sign extend (8bits) if m set
1260 		 */
1261 		if (ld.m)
1262 			imm |= SIGN_EXT9;
1263 		/*
1264 		 * ifa == r3 (NaT is necessarily cleared)
1265 		 */
1266 		ifa += imm;
1267 
1268 		DPRINT("imm=%lx r3=%lx\n", imm, ifa);
1269 
1270 		setreg(ld.r3, ifa, 0, regs);
1271 	}
1272 	/*
1273 	 * we don't have alat_invalidate_multiple() so we need
1274 	 * to do the complete flush :-<<
1275 	 */
1276 	ia64_invala();
1277 
1278 	return 0;
1279 }
1280 
1281 /*
1282  * Make sure we log the unaligned access, so that user/sysadmin can notice it and
1283  * eventually fix the program.  However, we don't want to do that for every access so we
1284  * pace it with jiffies.  This isn't really MP-safe, but it doesn't really have to be
1285  * either...
1286  */
1287 static int
within_logging_rate_limit(void)1288 within_logging_rate_limit (void)
1289 {
1290 	static unsigned long count, last_time;
1291 
1292 	if (jiffies - last_time > 5*HZ)
1293 		count = 0;
1294 	if (++count < 5) {
1295 		last_time = jiffies;
1296 		return 1;
1297 	}
1298 	return 0;
1299 
1300 }
1301 
1302 void
ia64_handle_unaligned(unsigned long ifa,struct pt_regs * regs)1303 ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
1304 {
1305 	struct exception_fixup fix = { 0 };
1306 	struct ia64_psr *ipsr = ia64_psr(regs);
1307 	mm_segment_t old_fs = get_fs();
1308 	unsigned long bundle[2];
1309 	unsigned long opcode;
1310 	struct siginfo si;
1311 	union {
1312 		unsigned long l;
1313 		load_store_t insn;
1314 	} u;
1315 	int ret = -1;
1316 
1317 	if (ia64_psr(regs)->be) {
1318 		/* we don't support big-endian accesses */
1319 		die_if_kernel("big-endian unaligned accesses are not supported", regs, 0);
1320 		goto force_sigbus;
1321 	}
1322 
1323 	/*
1324 	 * Treat kernel accesses for which there is an exception handler entry the same as
1325 	 * user-level unaligned accesses.  Otherwise, a clever program could trick this
1326 	 * handler into reading an arbitrary kernel addresses...
1327 	 */
1328 	if (!user_mode(regs)) {
1329 		fix = SEARCH_EXCEPTION_TABLE(regs);
1330 	}
1331 	if (user_mode(regs) || fix.cont) {
1332 		if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
1333 			goto force_sigbus;
1334 
1335 		if (!(current->thread.flags & IA64_THREAD_UAC_NOPRINT)
1336 		    && within_logging_rate_limit())
1337 		{
1338 			char buf[200];	/* comm[] is at most 16 bytes... */
1339 			size_t len;
1340 
1341 			len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
1342 				      "ip=0x%016lx\n\r", current->comm, current->pid,
1343 				      ifa, regs->cr_iip + ipsr->ri);
1344 			/*
1345 			 * Don't call tty_write_message() if we're in the kernel; we might
1346 			 * be holding locks...
1347 			 */
1348 			if (user_mode(regs))
1349 				tty_write_message(current->tty, buf);
1350 			buf[len-1] = '\0';	/* drop '\r' */
1351 			printk(KERN_WARNING "%s", buf);	/* watch for command names containing %s */
1352 		}
1353 	} else {
1354 		if (within_logging_rate_limit())
1355 			printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
1356 			       ifa, regs->cr_iip + ipsr->ri);
1357 		set_fs(KERNEL_DS);
1358 	}
1359 
1360 	DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n",
1361 	       regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it);
1362 
1363 	if (__copy_from_user(bundle, (void *) regs->cr_iip, 16))
1364 		goto failure;
1365 
1366 	/*
1367 	 * extract the instruction from the bundle given the slot number
1368 	 */
1369 	switch (ipsr->ri) {
1370 	      case 0: u.l = (bundle[0] >>  5); break;
1371 	      case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break;
1372 	      case 2: u.l = (bundle[1] >> 23); break;
1373 	}
1374 	opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK;
1375 
1376 	DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
1377 	       "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm,
1378 	       u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op);
1379 
1380 	/*
1381 	 * IMPORTANT:
1382 	 * Notice that the switch statement DOES not cover all possible instructions
1383 	 * that DO generate unaligned references. This is made on purpose because for some
1384 	 * instructions it DOES NOT make sense to try and emulate the access. Sometimes it
1385 	 * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
1386 	 * the program will get a signal and die:
1387 	 *
1388 	 *	load/store:
1389 	 *		- ldX.spill
1390 	 *		- stX.spill
1391 	 *	Reason: RNATs are based on addresses
1392 	 *
1393 	 *	synchronization:
1394 	 *		- cmpxchg
1395 	 *		- fetchadd
1396 	 *		- xchg
1397 	 *	Reason: ATOMIC operations cannot be emulated properly using multiple
1398 	 *	        instructions.
1399 	 *
1400 	 *	speculative loads:
1401 	 *		- ldX.sZ
1402 	 *	Reason: side effects, code must be ready to deal with failure so simpler
1403 	 *		to let the load fail.
1404 	 * ---------------------------------------------------------------------------------
1405 	 * XXX fixme
1406 	 *
1407 	 * I would like to get rid of this switch case and do something
1408 	 * more elegant.
1409 	 */
1410 	switch (opcode) {
1411 	      case LDS_OP:
1412 	      case LDSA_OP:
1413 	      case LDS_IMM_OP:
1414 	      case LDSA_IMM_OP:
1415 	      case LDFS_OP:
1416 	      case LDFSA_OP:
1417 	      case LDFS_IMM_OP:
1418 		/*
1419 		 * The instruction will be retried with deferred exceptions turned on, and
1420 		 * we should get Nat bit installed
1421 		 *
1422 		 * IMPORTANT: When PSR_ED is set, the register & immediate update forms
1423 		 * are actually executed even though the operation failed. So we don't
1424 		 * need to take care of this.
1425 		 */
1426 		DPRINT("forcing PSR_ED\n");
1427 		regs->cr_ipsr |= IA64_PSR_ED;
1428 		goto done;
1429 
1430 	      case LD_OP:
1431 	      case LDA_OP:
1432 	      case LDBIAS_OP:
1433 	      case LDACQ_OP:
1434 	      case LDCCLR_OP:
1435 	      case LDCNC_OP:
1436 	      case LDCCLRACQ_OP:
1437 	      case LD_IMM_OP:
1438 	      case LDA_IMM_OP:
1439 	      case LDBIAS_IMM_OP:
1440 	      case LDACQ_IMM_OP:
1441 	      case LDCCLR_IMM_OP:
1442 	      case LDCNC_IMM_OP:
1443 	      case LDCCLRACQ_IMM_OP:
1444 		ret = emulate_load_int(ifa, u.insn, regs);
1445 		break;
1446 
1447 	      case ST_OP:
1448 	      case STREL_OP:
1449 	      case ST_IMM_OP:
1450 	      case STREL_IMM_OP:
1451 		ret = emulate_store_int(ifa, u.insn, regs);
1452 		break;
1453 
1454 	      case LDF_OP:
1455 	      case LDFA_OP:
1456 	      case LDFCCLR_OP:
1457 	      case LDFCNC_OP:
1458 	      case LDF_IMM_OP:
1459 	      case LDFA_IMM_OP:
1460 	      case LDFCCLR_IMM_OP:
1461 	      case LDFCNC_IMM_OP:
1462 		if (u.insn.x)
1463 			ret = emulate_load_floatpair(ifa, u.insn, regs);
1464 		else
1465 			ret = emulate_load_float(ifa, u.insn, regs);
1466 		break;
1467 
1468 	      case STF_OP:
1469 	      case STF_IMM_OP:
1470 		ret = emulate_store_float(ifa, u.insn, regs);
1471 		break;
1472 
1473 	      default:
1474 		goto failure;
1475 	}
1476 	DPRINT("ret=%d\n", ret);
1477 	if (ret)
1478 		goto failure;
1479 
1480 	if (ipsr->ri == 2)
1481 		/*
1482 		 * given today's architecture this case is not likely to happen because a
1483 		 * memory access instruction (M) can never be in the last slot of a
1484 		 * bundle. But let's keep it for now.
1485 		 */
1486 		regs->cr_iip += 16;
1487 	ipsr->ri = (ipsr->ri + 1) & 0x3;
1488 
1489 	DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip);
1490   done:
1491 	set_fs(old_fs);		/* restore original address limit */
1492 	return;
1493 
1494   failure:
1495 	/* something went wrong... */
1496 	if (!user_mode(regs)) {
1497 		if (fix.cont) {
1498 			handle_exception(regs, fix);
1499 			goto done;
1500 		}
1501 		die_if_kernel("error during unaligned kernel access\n", regs, ret);
1502 		/* NOT_REACHED */
1503 	}
1504   force_sigbus:
1505 	si.si_signo = SIGBUS;
1506 	si.si_errno = 0;
1507 	si.si_code = BUS_ADRALN;
1508 	si.si_addr = (void *) ifa;
1509 	si.si_flags = 0;
1510 	si.si_isr = 0;
1511 	si.si_imm = 0;
1512 	force_sig_info(SIGBUS, &si, current);
1513 	goto done;
1514 }
1515