1/* Save all processor states
2 *
3 * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com>
4 * Copyright (c) 2007 Anthony Xu   <anthony.xu@intel.com>
5 */
6
7#include <asm/asmmacro.h>
8#include "asm-offsets.h"
9
10
11#define CTX(name)    VMM_CTX_##name##_OFFSET
12
13	/*
14	 *	r32:		context_t base address
15	 */
16#define	SAVE_BRANCH_REGS			\
17	add	r2 = CTX(B0),r32;		\
18	add	r3 = CTX(B1),r32;		\
19	mov	r16 = b0;			\
20	mov	r17 = b1;			\
21	;;					\
22	st8	[r2]=r16,16;			\
23	st8	[r3]=r17,16;			\
24	;;					\
25	mov	r16 = b2;			\
26	mov	r17 = b3;			\
27	;;					\
28	st8	[r2]=r16,16;			\
29	st8	[r3]=r17,16;			\
30	;;					\
31	mov	r16 = b4;			\
32	mov	r17 = b5;			\
33	;;					\
34	st8	[r2]=r16;   			\
35	st8	[r3]=r17;   			\
36	;;
37
38	/*
39	 *	r33:		context_t base address
40	 */
41#define	RESTORE_BRANCH_REGS			\
42	add	r2 = CTX(B0),r33;		\
43	add	r3 = CTX(B1),r33;		\
44	;;					\
45	ld8	r16=[r2],16;			\
46	ld8	r17=[r3],16;			\
47	;;					\
48	mov	b0 = r16;			\
49	mov	b1 = r17;			\
50	;;					\
51	ld8	r16=[r2],16;			\
52	ld8	r17=[r3],16;			\
53	;;					\
54	mov	b2 = r16;			\
55	mov	b3 = r17;			\
56	;;					\
57	ld8	r16=[r2];   			\
58	ld8	r17=[r3];   			\
59	;;					\
60	mov	b4=r16;				\
61	mov	b5=r17;				\
62	;;
63
64
65	/*
66	 *	r32: context_t base address
67	 *	bsw == 1
68	 *	Save all bank1 general registers, r4 ~ r7
69	 */
70#define	SAVE_GENERAL_REGS			\
71	add	r2=CTX(R4),r32;			\
72	add	r3=CTX(R5),r32;			\
73	;;					\
74.mem.offset 0,0;        			\
75	st8.spill	[r2]=r4,16;		\
76.mem.offset 8,0;        			\
77	st8.spill	[r3]=r5,16;		\
78	;;					\
79.mem.offset 0,0;        			\
80	st8.spill	[r2]=r6,48;		\
81.mem.offset 8,0;        			\
82	st8.spill	[r3]=r7,48;		\
83	;;                          		\
84.mem.offset 0,0;        			\
85    st8.spill    [r2]=r12;			\
86.mem.offset 8,0;				\
87    st8.spill    [r3]=r13;			\
88    ;;
89
90	/*
91	 *	r33: context_t base address
92	 *	bsw == 1
93	 */
94#define	RESTORE_GENERAL_REGS			\
95	add	r2=CTX(R4),r33;			\
96	add	r3=CTX(R5),r33;			\
97	;;					\
98	ld8.fill	r4=[r2],16;		\
99	ld8.fill	r5=[r3],16;		\
100	;;					\
101	ld8.fill	r6=[r2],48;		\
102	ld8.fill	r7=[r3],48;		\
103	;;					\
104	ld8.fill    r12=[r2];			\
105	ld8.fill    r13 =[r3];			\
106	;;
107
108
109
110
111	/*
112	 *	r32:		context_t base address
113	 */
114#define	SAVE_KERNEL_REGS			\
115	add	r2 = CTX(KR0),r32;		\
116	add	r3 = CTX(KR1),r32;		\
117	mov	r16 = ar.k0;			\
118	mov	r17 = ar.k1;			\
119	;;		        		\
120	st8	[r2] = r16,16;			\
121	st8	[r3] = r17,16;			\
122	;;		        		\
123	mov	r16 = ar.k2;			\
124	mov	r17 = ar.k3;			\
125	;;		        		\
126	st8	[r2] = r16,16;			\
127	st8	[r3] = r17,16;			\
128	;;					\
129	mov	r16 = ar.k4;			\
130	mov	r17 = ar.k5;			\
131	;;				    	\
132	st8	[r2] = r16,16;			\
133	st8	[r3] = r17,16;			\
134	;;					\
135	mov	r16 = ar.k6;			\
136	mov	r17 = ar.k7;			\
137	;;		    			\
138	st8	[r2] = r16;     		\
139	st8	[r3] = r17;			\
140	;;
141
142
143
144	/*
145	 *	r33:		context_t base address
146	 */
147#define	RESTORE_KERNEL_REGS			\
148	add	r2 = CTX(KR0),r33;		\
149	add	r3 = CTX(KR1),r33;		\
150	;;		    			\
151	ld8	r16=[r2],16;     		\
152	ld8	r17=[r3],16;			\
153	;;					\
154	mov	ar.k0=r16;  			\
155	mov	ar.k1=r17;	    		\
156	;;		        		\
157	ld8	r16=[r2],16;			\
158	ld8	r17=[r3],16;			\
159	;;		        		\
160	mov	ar.k2=r16;   			\
161	mov	ar.k3=r17;	    		\
162	;;		        		\
163	ld8	r16=[r2],16;			\
164	ld8	r17=[r3],16;			\
165	;;					\
166	mov	ar.k4=r16;			\
167	mov	ar.k5=r17;	    		\
168	;;				    	\
169	ld8	r16=[r2],16;			\
170	ld8	r17=[r3],16;			\
171	;;					\
172	mov	ar.k6=r16;  			\
173	mov	ar.k7=r17;	    		\
174	;;
175
176
177
178	/*
179	 *	r32:		context_t base address
180	 */
181#define	SAVE_APP_REGS				\
182	add  r2 = CTX(BSPSTORE),r32;		\
183	mov  r16 = ar.bspstore;			\
184	;;					\
185	st8  [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\
186	mov  r16 = ar.rnat;			\
187	;;					\
188	st8  [r2] = r16,CTX(FCR)-CTX(RNAT);	\
189	mov  r16 = ar.fcr;			\
190	;;					\
191	st8  [r2] = r16,CTX(EFLAG)-CTX(FCR);	\
192	mov  r16 = ar.eflag;			\
193	;;					\
194	st8  [r2] = r16,CTX(CFLG)-CTX(EFLAG);	\
195	mov  r16 = ar.cflg;			\
196	;;					\
197	st8  [r2] = r16,CTX(FSR)-CTX(CFLG);	\
198	mov  r16 = ar.fsr;			\
199	;;					\
200	st8  [r2] = r16,CTX(FIR)-CTX(FSR);	\
201	mov  r16 = ar.fir;			\
202	;;					\
203	st8  [r2] = r16,CTX(FDR)-CTX(FIR);	\
204	mov  r16 = ar.fdr;			\
205	;;					\
206	st8  [r2] = r16,CTX(UNAT)-CTX(FDR);	\
207	mov  r16 = ar.unat;			\
208	;;					\
209	st8  [r2] = r16,CTX(FPSR)-CTX(UNAT);	\
210	mov  r16 = ar.fpsr;			\
211	;;					\
212	st8  [r2] = r16,CTX(PFS)-CTX(FPSR);	\
213	mov  r16 = ar.pfs;			\
214	;;					\
215	st8  [r2] = r16,CTX(LC)-CTX(PFS);	\
216	mov  r16 = ar.lc;			\
217	;;					\
218	st8  [r2] = r16;			\
219	;;
220
221	/*
222	 *	r33:		context_t base address
223	 */
224#define	RESTORE_APP_REGS			\
225	add  r2=CTX(BSPSTORE),r33;		\
226	;;					\
227	ld8  r16=[r2],CTX(RNAT)-CTX(BSPSTORE);	\
228	;;					\
229	mov  ar.bspstore=r16;			\
230	ld8  r16=[r2],CTX(FCR)-CTX(RNAT);	\
231	;;					\
232	mov  ar.rnat=r16;			\
233	ld8  r16=[r2],CTX(EFLAG)-CTX(FCR);	\
234	;;					\
235	mov  ar.fcr=r16;			\
236	ld8  r16=[r2],CTX(CFLG)-CTX(EFLAG);	\
237	;;					\
238	mov  ar.eflag=r16;			\
239	ld8  r16=[r2],CTX(FSR)-CTX(CFLG);	\
240	;;					\
241	mov  ar.cflg=r16;			\
242	ld8  r16=[r2],CTX(FIR)-CTX(FSR);	\
243	;;					\
244	mov  ar.fsr=r16;			\
245	ld8  r16=[r2],CTX(FDR)-CTX(FIR);	\
246	;;					\
247	mov  ar.fir=r16;			\
248	ld8  r16=[r2],CTX(UNAT)-CTX(FDR);	\
249	;;					\
250	mov  ar.fdr=r16;			\
251	ld8  r16=[r2],CTX(FPSR)-CTX(UNAT);	\
252	;;					\
253	mov  ar.unat=r16;			\
254	ld8  r16=[r2],CTX(PFS)-CTX(FPSR);	\
255	;;					\
256	mov  ar.fpsr=r16;			\
257	ld8  r16=[r2],CTX(LC)-CTX(PFS);		\
258	;;					\
259	mov  ar.pfs=r16;			\
260	ld8  r16=[r2];				\
261	;;					\
262	mov  ar.lc=r16;				\
263	;;
264
265	/*
266	 *	r32:		context_t base address
267	 */
268#define	SAVE_CTL_REGS				\
269	add	r2 = CTX(DCR),r32;		\
270	mov	r16 = cr.dcr;			\
271	;;					\
272	st8	[r2] = r16,CTX(IVA)-CTX(DCR);	\
273	;;                          		\
274	mov	r16 = cr.iva;			\
275	;;					\
276	st8	[r2] = r16,CTX(PTA)-CTX(IVA);	\
277	;;					\
278	mov r16 = cr.pta;			\
279	;;					\
280	st8 [r2] = r16 ;			\
281	;;
282
283	/*
284	 *	r33:		context_t base address
285	 */
286#define	RESTORE_CTL_REGS				\
287	add	r2 = CTX(DCR),r33;	        	\
288	;;						\
289	ld8	r16 = [r2],CTX(IVA)-CTX(DCR);		\
290	;;                      			\
291	mov	cr.dcr = r16;				\
292	dv_serialize_data;				\
293	;;						\
294	ld8	r16 = [r2],CTX(PTA)-CTX(IVA);		\
295	;;						\
296	mov	cr.iva = r16;				\
297	dv_serialize_data;				\
298	;;						\
299	ld8 r16 = [r2];					\
300	;;						\
301	mov cr.pta = r16;				\
302	dv_serialize_data;				\
303	;;
304
305
306	/*
307	 *	r32:		context_t base address
308	 */
309#define	SAVE_REGION_REGS			\
310	add	r2=CTX(RR0),r32;		\
311	mov	r16=rr[r0];			\
312	dep.z	r18=1,61,3;			\
313	;;					\
314	st8	[r2]=r16,8;			\
315	mov	r17=rr[r18];			\
316	dep.z	r18=2,61,3;			\
317	;;					\
318	st8	[r2]=r17,8;			\
319	mov	r16=rr[r18];			\
320	dep.z	r18=3,61,3;			\
321	;;					\
322	st8	[r2]=r16,8;			\
323	mov	r17=rr[r18];			\
324	dep.z	r18=4,61,3;			\
325	;;					\
326	st8	[r2]=r17,8;			\
327	mov	r16=rr[r18];			\
328	dep.z	r18=5,61,3;			\
329	;;					\
330	st8	[r2]=r16,8;			\
331	mov	r17=rr[r18];			\
332	dep.z	r18=7,61,3;			\
333	;;					\
334	st8	[r2]=r17,16;			\
335	mov	r16=rr[r18];			\
336	;;					\
337	st8	[r2]=r16,8;			\
338	;;
339
340	/*
341	 *	r33:context_t base address
342	 */
343#define	RESTORE_REGION_REGS	\
344	add	r2=CTX(RR0),r33;\
345	mov r18=r0;		\
346	;;			\
347	ld8	r20=[r2],8;	\
348	;;	/* rr0 */	\
349	ld8	r21=[r2],8;	\
350	;;	/* rr1 */	\
351	ld8	r22=[r2],8;	\
352	;;	/* rr2 */	\
353	ld8	r23=[r2],8;	\
354	;;	/* rr3 */	\
355	ld8	r24=[r2],8;	\
356	;;	/* rr4 */	\
357	ld8	r25=[r2],16;	\
358	;;	/* rr5 */	\
359	ld8	r27=[r2];	\
360	;;	/* rr7 */	\
361	mov rr[r18]=r20;	\
362	dep.z	r18=1,61,3;	\
363	;;  /* rr1 */		\
364	mov rr[r18]=r21;	\
365	dep.z	r18=2,61,3;	\
366	;;  /* rr2 */		\
367	mov rr[r18]=r22;	\
368	dep.z	r18=3,61,3;	\
369	;;  /* rr3 */		\
370	mov rr[r18]=r23;	\
371	dep.z	r18=4,61,3;	\
372	;;  /* rr4 */		\
373	mov rr[r18]=r24;	\
374	dep.z	r18=5,61,3;	\
375	;;  /* rr5 */		\
376	mov rr[r18]=r25;	\
377	dep.z	r18=7,61,3;	\
378	;;  /* rr7 */		\
379	mov rr[r18]=r27;	\
380	;;			\
381	srlz.i;			\
382	;;
383
384
385
386	/*
387	 *	r32:	context_t base address
388	 *	r36~r39:scratch registers
389	 */
390#define	SAVE_DEBUG_REGS				\
391	add	r2=CTX(IBR0),r32;		\
392	add	r3=CTX(DBR0),r32;		\
393	mov	r16=ibr[r0];			\
394	mov	r17=dbr[r0];			\
395	;;					\
396	st8	[r2]=r16,8; 			\
397	st8	[r3]=r17,8;	    		\
398	add	r18=1,r0;		    	\
399	;;					\
400	mov	r16=ibr[r18];			\
401	mov	r17=dbr[r18];			\
402	;;					\
403	st8	[r2]=r16,8;		    	\
404	st8	[r3]=r17,8;			\
405	add	r18=2,r0;			\
406	;;					\
407	mov	r16=ibr[r18];			\
408	mov	r17=dbr[r18];			\
409	;;					\
410	st8	[r2]=r16,8;		    	\
411	st8	[r3]=r17,8;			\
412	add	r18=2,r0;			\
413	;;					\
414	mov	r16=ibr[r18];			\
415	mov	r17=dbr[r18];			\
416	;;					\
417	st8	[r2]=r16,8;		    	\
418	st8	[r3]=r17,8;			\
419	add	r18=3,r0;			\
420	;;					\
421	mov	r16=ibr[r18];			\
422	mov	r17=dbr[r18];			\
423	;;					\
424	st8	[r2]=r16,8;		    	\
425	st8	[r3]=r17,8;			\
426	add	r18=4,r0;			\
427	;;					\
428	mov	r16=ibr[r18];			\
429	mov	r17=dbr[r18];			\
430	;;					\
431	st8	[r2]=r16,8;		    	\
432	st8	[r3]=r17,8;			\
433	add	r18=5,r0;			\
434	;;					\
435	mov	r16=ibr[r18];			\
436	mov	r17=dbr[r18];			\
437	;;					\
438	st8	[r2]=r16,8;		    	\
439	st8	[r3]=r17,8;			\
440	add	r18=6,r0;			\
441	;;					\
442	mov	r16=ibr[r18];			\
443	mov	r17=dbr[r18];			\
444	;;					\
445	st8	[r2]=r16,8;		    	\
446	st8	[r3]=r17,8;			\
447	add	r18=7,r0;			\
448	;;					\
449	mov	r16=ibr[r18];			\
450	mov	r17=dbr[r18];			\
451	;;					\
452	st8	[r2]=r16,8;		    	\
453	st8	[r3]=r17,8;			\
454	;;
455
456
457/*
458 *      r33:    point to context_t structure
459 *      ar.lc are corrupted.
460 */
461#define RESTORE_DEBUG_REGS			\
462	add	r2=CTX(IBR0),r33;		\
463	add	r3=CTX(DBR0),r33;		\
464	mov r16=7;    				\
465	mov r17=r0;				\
466	;;                    			\
467	mov ar.lc = r16;			\
468	;; 					\
4691:						\
470	ld8 r18=[r2],8;		    		\
471	ld8 r19=[r3],8;				\
472	;;					\
473	mov ibr[r17]=r18;			\
474	mov dbr[r17]=r19;			\
475	;;   					\
476	srlz.i;					\
477	;; 					\
478	add r17=1,r17;				\
479	br.cloop.sptk 1b;			\
480	;;
481
482
483	/*
484	 *	r32:		context_t base address
485	 */
486#define	SAVE_FPU_LOW				\
487	add	r2=CTX(F2),r32;			\
488	add	r3=CTX(F3),r32;			\
489	;;					\
490	stf.spill.nta	[r2]=f2,32;		\
491	stf.spill.nta	[r3]=f3,32;		\
492	;;					\
493	stf.spill.nta	[r2]=f4,32;		\
494	stf.spill.nta	[r3]=f5,32;		\
495	;;					\
496	stf.spill.nta	[r2]=f6,32;		\
497	stf.spill.nta	[r3]=f7,32;		\
498	;;					\
499	stf.spill.nta	[r2]=f8,32;		\
500	stf.spill.nta	[r3]=f9,32;		\
501	;;					\
502	stf.spill.nta	[r2]=f10,32;		\
503	stf.spill.nta	[r3]=f11,32;		\
504	;;					\
505	stf.spill.nta	[r2]=f12,32;		\
506	stf.spill.nta	[r3]=f13,32;		\
507	;;					\
508	stf.spill.nta	[r2]=f14,32;		\
509	stf.spill.nta	[r3]=f15,32;		\
510	;;					\
511	stf.spill.nta	[r2]=f16,32;		\
512	stf.spill.nta	[r3]=f17,32;		\
513	;;					\
514	stf.spill.nta	[r2]=f18,32;		\
515	stf.spill.nta	[r3]=f19,32;		\
516	;;					\
517	stf.spill.nta	[r2]=f20,32;		\
518	stf.spill.nta	[r3]=f21,32;		\
519	;;					\
520	stf.spill.nta	[r2]=f22,32;		\
521	stf.spill.nta	[r3]=f23,32;		\
522	;;					\
523	stf.spill.nta	[r2]=f24,32;		\
524	stf.spill.nta	[r3]=f25,32;		\
525	;;					\
526	stf.spill.nta	[r2]=f26,32;		\
527	stf.spill.nta	[r3]=f27,32;		\
528	;;					\
529	stf.spill.nta	[r2]=f28,32;		\
530	stf.spill.nta	[r3]=f29,32;		\
531	;;					\
532	stf.spill.nta	[r2]=f30;		\
533	stf.spill.nta	[r3]=f31;		\
534	;;
535
536	/*
537	 *	r32:		context_t base address
538	 */
539#define	SAVE_FPU_HIGH				\
540	add	r2=CTX(F32),r32;		\
541	add	r3=CTX(F33),r32;		\
542	;;					\
543	stf.spill.nta	[r2]=f32,32;		\
544	stf.spill.nta	[r3]=f33,32;		\
545	;;					\
546	stf.spill.nta	[r2]=f34,32;		\
547	stf.spill.nta	[r3]=f35,32;		\
548	;;					\
549	stf.spill.nta	[r2]=f36,32;		\
550	stf.spill.nta	[r3]=f37,32;		\
551	;;					\
552	stf.spill.nta	[r2]=f38,32;		\
553	stf.spill.nta	[r3]=f39,32;		\
554	;;					\
555	stf.spill.nta	[r2]=f40,32;		\
556	stf.spill.nta	[r3]=f41,32;		\
557	;;					\
558	stf.spill.nta	[r2]=f42,32;		\
559	stf.spill.nta	[r3]=f43,32;		\
560	;;					\
561	stf.spill.nta	[r2]=f44,32;		\
562	stf.spill.nta	[r3]=f45,32;		\
563	;;					\
564	stf.spill.nta	[r2]=f46,32;		\
565	stf.spill.nta	[r3]=f47,32;		\
566	;;					\
567	stf.spill.nta	[r2]=f48,32;		\
568	stf.spill.nta	[r3]=f49,32;		\
569	;;					\
570	stf.spill.nta	[r2]=f50,32;		\
571	stf.spill.nta	[r3]=f51,32;		\
572	;;					\
573	stf.spill.nta	[r2]=f52,32;		\
574	stf.spill.nta	[r3]=f53,32;		\
575	;;					\
576	stf.spill.nta	[r2]=f54,32;		\
577	stf.spill.nta	[r3]=f55,32;		\
578	;;					\
579	stf.spill.nta	[r2]=f56,32;		\
580	stf.spill.nta	[r3]=f57,32;		\
581	;;					\
582	stf.spill.nta	[r2]=f58,32;		\
583	stf.spill.nta	[r3]=f59,32;		\
584	;;					\
585	stf.spill.nta	[r2]=f60,32;		\
586	stf.spill.nta	[r3]=f61,32;		\
587	;;					\
588	stf.spill.nta	[r2]=f62,32;		\
589	stf.spill.nta	[r3]=f63,32;		\
590	;;					\
591	stf.spill.nta	[r2]=f64,32;		\
592	stf.spill.nta	[r3]=f65,32;		\
593	;;					\
594	stf.spill.nta	[r2]=f66,32;		\
595	stf.spill.nta	[r3]=f67,32;		\
596	;;					\
597	stf.spill.nta	[r2]=f68,32;		\
598	stf.spill.nta	[r3]=f69,32;		\
599	;;					\
600	stf.spill.nta	[r2]=f70,32;		\
601	stf.spill.nta	[r3]=f71,32;		\
602	;;					\
603	stf.spill.nta	[r2]=f72,32;		\
604	stf.spill.nta	[r3]=f73,32;		\
605	;;					\
606	stf.spill.nta	[r2]=f74,32;		\
607	stf.spill.nta	[r3]=f75,32;		\
608	;;					\
609	stf.spill.nta	[r2]=f76,32;		\
610	stf.spill.nta	[r3]=f77,32;		\
611	;;					\
612	stf.spill.nta	[r2]=f78,32;		\
613	stf.spill.nta	[r3]=f79,32;		\
614	;;					\
615	stf.spill.nta	[r2]=f80,32;		\
616	stf.spill.nta	[r3]=f81,32;		\
617	;;					\
618	stf.spill.nta	[r2]=f82,32;		\
619	stf.spill.nta	[r3]=f83,32;		\
620	;;					\
621	stf.spill.nta	[r2]=f84,32;		\
622	stf.spill.nta	[r3]=f85,32;		\
623	;;					\
624	stf.spill.nta	[r2]=f86,32;		\
625	stf.spill.nta	[r3]=f87,32;		\
626	;;					\
627	stf.spill.nta	[r2]=f88,32;		\
628	stf.spill.nta	[r3]=f89,32;		\
629	;;					\
630	stf.spill.nta	[r2]=f90,32;		\
631	stf.spill.nta	[r3]=f91,32;		\
632	;;					\
633	stf.spill.nta	[r2]=f92,32;		\
634	stf.spill.nta	[r3]=f93,32;		\
635	;;					\
636	stf.spill.nta	[r2]=f94,32;		\
637	stf.spill.nta	[r3]=f95,32;		\
638	;;					\
639	stf.spill.nta	[r2]=f96,32;		\
640	stf.spill.nta	[r3]=f97,32;		\
641	;;					\
642	stf.spill.nta	[r2]=f98,32;		\
643	stf.spill.nta	[r3]=f99,32;		\
644	;;					\
645	stf.spill.nta	[r2]=f100,32;		\
646	stf.spill.nta	[r3]=f101,32;		\
647	;;					\
648	stf.spill.nta	[r2]=f102,32;		\
649	stf.spill.nta	[r3]=f103,32;		\
650	;;					\
651	stf.spill.nta	[r2]=f104,32;		\
652	stf.spill.nta	[r3]=f105,32;		\
653	;;					\
654	stf.spill.nta	[r2]=f106,32;		\
655	stf.spill.nta	[r3]=f107,32;		\
656	;;					\
657	stf.spill.nta	[r2]=f108,32;		\
658	stf.spill.nta	[r3]=f109,32;		\
659	;;					\
660	stf.spill.nta	[r2]=f110,32;		\
661	stf.spill.nta	[r3]=f111,32;		\
662	;;					\
663	stf.spill.nta	[r2]=f112,32;		\
664	stf.spill.nta	[r3]=f113,32;		\
665	;;					\
666	stf.spill.nta	[r2]=f114,32;		\
667	stf.spill.nta	[r3]=f115,32;		\
668	;;					\
669	stf.spill.nta	[r2]=f116,32;		\
670	stf.spill.nta	[r3]=f117,32;		\
671	;;					\
672	stf.spill.nta	[r2]=f118,32;		\
673	stf.spill.nta	[r3]=f119,32;		\
674	;;					\
675	stf.spill.nta	[r2]=f120,32;		\
676	stf.spill.nta	[r3]=f121,32;		\
677	;;					\
678	stf.spill.nta	[r2]=f122,32;		\
679	stf.spill.nta	[r3]=f123,32;		\
680	;;					\
681	stf.spill.nta	[r2]=f124,32;		\
682	stf.spill.nta	[r3]=f125,32;		\
683	;;					\
684	stf.spill.nta	[r2]=f126;		\
685	stf.spill.nta	[r3]=f127;		\
686	;;
687
688     /*
689      *      r33:    point to context_t structure
690      */
691#define	RESTORE_FPU_LOW				\
692    add     r2 = CTX(F2), r33;			\
693    add     r3 = CTX(F3), r33;			\
694    ;;						\
695    ldf.fill.nta f2 = [r2], 32;			\
696    ldf.fill.nta f3 = [r3], 32;			\
697    ;;						\
698    ldf.fill.nta f4 = [r2], 32;			\
699    ldf.fill.nta f5 = [r3], 32;			\
700    ;;						\
701    ldf.fill.nta f6 = [r2], 32;			\
702    ldf.fill.nta f7 = [r3], 32;			\
703    ;;						\
704    ldf.fill.nta f8 = [r2], 32;			\
705    ldf.fill.nta f9 = [r3], 32;			\
706    ;;						\
707    ldf.fill.nta f10 = [r2], 32;		\
708    ldf.fill.nta f11 = [r3], 32;		\
709    ;;						\
710    ldf.fill.nta f12 = [r2], 32;		\
711    ldf.fill.nta f13 = [r3], 32;		\
712    ;;						\
713    ldf.fill.nta f14 = [r2], 32;		\
714    ldf.fill.nta f15 = [r3], 32;		\
715    ;;						\
716    ldf.fill.nta f16 = [r2], 32;		\
717    ldf.fill.nta f17 = [r3], 32;		\
718    ;;						\
719    ldf.fill.nta f18 = [r2], 32;		\
720    ldf.fill.nta f19 = [r3], 32;		\
721    ;;						\
722    ldf.fill.nta f20 = [r2], 32;		\
723    ldf.fill.nta f21 = [r3], 32;		\
724    ;;						\
725    ldf.fill.nta f22 = [r2], 32;		\
726    ldf.fill.nta f23 = [r3], 32;		\
727    ;;						\
728    ldf.fill.nta f24 = [r2], 32;		\
729    ldf.fill.nta f25 = [r3], 32;		\
730    ;;						\
731    ldf.fill.nta f26 = [r2], 32;		\
732    ldf.fill.nta f27 = [r3], 32;		\
733	;;					\
734    ldf.fill.nta f28 = [r2], 32;		\
735    ldf.fill.nta f29 = [r3], 32;		\
736    ;;						\
737    ldf.fill.nta f30 = [r2], 32;		\
738    ldf.fill.nta f31 = [r3], 32;		\
739    ;;
740
741
742
743    /*
744     *      r33:    point to context_t structure
745     */
746#define	RESTORE_FPU_HIGH			\
747    add     r2 = CTX(F32), r33;			\
748    add     r3 = CTX(F33), r33;			\
749    ;;						\
750    ldf.fill.nta f32 = [r2], 32;		\
751    ldf.fill.nta f33 = [r3], 32;		\
752    ;;						\
753    ldf.fill.nta f34 = [r2], 32;		\
754    ldf.fill.nta f35 = [r3], 32;		\
755    ;;						\
756    ldf.fill.nta f36 = [r2], 32;		\
757    ldf.fill.nta f37 = [r3], 32;		\
758    ;;						\
759    ldf.fill.nta f38 = [r2], 32;		\
760    ldf.fill.nta f39 = [r3], 32;		\
761    ;;						\
762    ldf.fill.nta f40 = [r2], 32;		\
763    ldf.fill.nta f41 = [r3], 32;		\
764    ;;						\
765    ldf.fill.nta f42 = [r2], 32;		\
766    ldf.fill.nta f43 = [r3], 32;		\
767    ;;						\
768    ldf.fill.nta f44 = [r2], 32;		\
769    ldf.fill.nta f45 = [r3], 32;		\
770    ;;						\
771    ldf.fill.nta f46 = [r2], 32;		\
772    ldf.fill.nta f47 = [r3], 32;		\
773    ;;						\
774    ldf.fill.nta f48 = [r2], 32;		\
775    ldf.fill.nta f49 = [r3], 32;		\
776    ;;						\
777    ldf.fill.nta f50 = [r2], 32;		\
778    ldf.fill.nta f51 = [r3], 32;		\
779    ;;						\
780    ldf.fill.nta f52 = [r2], 32;		\
781    ldf.fill.nta f53 = [r3], 32;		\
782    ;;						\
783    ldf.fill.nta f54 = [r2], 32;		\
784    ldf.fill.nta f55 = [r3], 32;		\
785    ;;						\
786    ldf.fill.nta f56 = [r2], 32;		\
787    ldf.fill.nta f57 = [r3], 32;   		\
788    ;;						\
789    ldf.fill.nta f58 = [r2], 32;		\
790    ldf.fill.nta f59 = [r3], 32;		\
791    ;;						\
792    ldf.fill.nta f60 = [r2], 32;		\
793    ldf.fill.nta f61 = [r3], 32;		\
794    ;;						\
795    ldf.fill.nta f62 = [r2], 32;		\
796    ldf.fill.nta f63 = [r3], 32;		\
797    ;;						\
798    ldf.fill.nta f64 = [r2], 32;		\
799    ldf.fill.nta f65 = [r3], 32;		\
800    ;;						\
801    ldf.fill.nta f66 = [r2], 32;		\
802    ldf.fill.nta f67 = [r3], 32;		\
803    ;;						\
804    ldf.fill.nta f68 = [r2], 32;		\
805    ldf.fill.nta f69 = [r3], 32;		\
806    ;;						\
807    ldf.fill.nta f70 = [r2], 32;		\
808    ldf.fill.nta f71 = [r3], 32;		\
809    ;;						\
810    ldf.fill.nta f72 = [r2], 32;		\
811    ldf.fill.nta f73 = [r3], 32;		\
812    ;;						\
813    ldf.fill.nta f74 = [r2], 32;		\
814    ldf.fill.nta f75 = [r3], 32;		\
815    ;;						\
816    ldf.fill.nta f76 = [r2], 32;		\
817    ldf.fill.nta f77 = [r3], 32;		\
818    ;;						\
819    ldf.fill.nta f78 = [r2], 32;		\
820    ldf.fill.nta f79 = [r3], 32;		\
821    ;;						\
822    ldf.fill.nta f80 = [r2], 32;		\
823    ldf.fill.nta f81 = [r3], 32;		\
824    ;;						\
825    ldf.fill.nta f82 = [r2], 32;		\
826    ldf.fill.nta f83 = [r3], 32;		\
827    ;;						\
828    ldf.fill.nta f84 = [r2], 32;		\
829    ldf.fill.nta f85 = [r3], 32;		\
830    ;;						\
831    ldf.fill.nta f86 = [r2], 32;		\
832    ldf.fill.nta f87 = [r3], 32;		\
833    ;;						\
834    ldf.fill.nta f88 = [r2], 32;		\
835    ldf.fill.nta f89 = [r3], 32;		\
836    ;;						\
837    ldf.fill.nta f90 = [r2], 32;		\
838    ldf.fill.nta f91 = [r3], 32;		\
839    ;;						\
840    ldf.fill.nta f92 = [r2], 32;		\
841    ldf.fill.nta f93 = [r3], 32;		\
842    ;;						\
843    ldf.fill.nta f94 = [r2], 32;		\
844    ldf.fill.nta f95 = [r3], 32;		\
845    ;;						\
846    ldf.fill.nta f96 = [r2], 32;		\
847    ldf.fill.nta f97 = [r3], 32;		\
848    ;;						\
849    ldf.fill.nta f98 = [r2], 32;		\
850    ldf.fill.nta f99 = [r3], 32;		\
851    ;;						\
852    ldf.fill.nta f100 = [r2], 32;		\
853    ldf.fill.nta f101 = [r3], 32;		\
854    ;;						\
855    ldf.fill.nta f102 = [r2], 32;		\
856    ldf.fill.nta f103 = [r3], 32;		\
857    ;;						\
858    ldf.fill.nta f104 = [r2], 32;		\
859    ldf.fill.nta f105 = [r3], 32;		\
860    ;;						\
861    ldf.fill.nta f106 = [r2], 32;		\
862    ldf.fill.nta f107 = [r3], 32;		\
863    ;;						\
864    ldf.fill.nta f108 = [r2], 32;		\
865    ldf.fill.nta f109 = [r3], 32;   		\
866    ;;						\
867    ldf.fill.nta f110 = [r2], 32;		\
868    ldf.fill.nta f111 = [r3], 32;		\
869    ;;						\
870    ldf.fill.nta f112 = [r2], 32;		\
871    ldf.fill.nta f113 = [r3], 32;		\
872    ;;						\
873    ldf.fill.nta f114 = [r2], 32;		\
874    ldf.fill.nta f115 = [r3], 32;		\
875    ;;						\
876    ldf.fill.nta f116 = [r2], 32;		\
877    ldf.fill.nta f117 = [r3], 32;		\
878    ;;						\
879    ldf.fill.nta f118 = [r2], 32;		\
880    ldf.fill.nta f119 = [r3], 32;		\
881    ;;						\
882    ldf.fill.nta f120 = [r2], 32;		\
883    ldf.fill.nta f121 = [r3], 32;		\
884    ;;						\
885    ldf.fill.nta f122 = [r2], 32;		\
886    ldf.fill.nta f123 = [r3], 32;		\
887    ;;						\
888    ldf.fill.nta f124 = [r2], 32;		\
889    ldf.fill.nta f125 = [r3], 32;		\
890    ;;						\
891    ldf.fill.nta f126 = [r2], 32;		\
892    ldf.fill.nta f127 = [r3], 32;		\
893    ;;
894
895	/*
896	 *	r32:		context_t base address
897	 */
898#define	SAVE_PTK_REGS				\
899    add r2=CTX(PKR0), r32;			\
900    mov r16=7;    				\
901    ;;                         			\
902    mov ar.lc=r16;  				\
903    mov r17=r0;					\
904    ;;						\
9051:						\
906    mov r18=pkr[r17];				\
907    ;;                     			\
908    srlz.i;					\
909    ;; 						\
910    st8 [r2]=r18, 8;				\
911    ;;    					\
912    add r17 =1,r17;				\
913    ;;                     			\
914    br.cloop.sptk 1b;				\
915    ;;
916
917/*
918 *      r33:    point to context_t structure
919 *      ar.lc are corrupted.
920 */
921#define RESTORE_PTK_REGS	    		\
922    add r2=CTX(PKR0), r33;			\
923    mov r16=7;    				\
924    ;;                         			\
925    mov ar.lc=r16;  				\
926    mov r17=r0;					\
927    ;;						\
9281: 						\
929    ld8 r18=[r2], 8;				\
930    ;;						\
931    mov pkr[r17]=r18;				\
932    ;;    					\
933    srlz.i;					\
934    ;; 						\
935    add r17 =1,r17;				\
936    ;;                     			\
937    br.cloop.sptk 1b;				\
938    ;;
939
940
941/*
942 * void vmm_trampoline( context_t * from,
943 *			context_t * to)
944 *
945 * 	from:	r32
946 *	to:	r33
947 *  note: interrupt disabled before call this function.
948 */
949GLOBAL_ENTRY(vmm_trampoline)
950    mov r16 = psr
951    adds r2 = CTX(PSR), r32
952    ;;
953    st8 [r2] = r16, 8       // psr
954    mov r17 = pr
955    ;;
956    st8 [r2] = r17, 8       // pr
957    mov r18 = ar.unat
958    ;;
959    st8 [r2] = r18
960    mov r17 = ar.rsc
961    ;;
962    adds r2 = CTX(RSC),r32
963    ;;
964    st8 [r2]= r17
965    mov ar.rsc =0
966    flushrs
967    ;;
968    SAVE_GENERAL_REGS
969    ;;
970    SAVE_KERNEL_REGS
971    ;;
972    SAVE_APP_REGS
973    ;;
974    SAVE_BRANCH_REGS
975    ;;
976    SAVE_CTL_REGS
977    ;;
978    SAVE_REGION_REGS
979    ;;
980    //SAVE_DEBUG_REGS
981    ;;
982    rsm  psr.dfl
983    ;;
984    srlz.d
985    ;;
986    SAVE_FPU_LOW
987    ;;
988    rsm  psr.dfh
989    ;;
990    srlz.d
991    ;;
992    SAVE_FPU_HIGH
993    ;;
994    SAVE_PTK_REGS
995    ;;
996    RESTORE_PTK_REGS
997    ;;
998    RESTORE_FPU_HIGH
999    ;;
1000    RESTORE_FPU_LOW
1001    ;;
1002    //RESTORE_DEBUG_REGS
1003    ;;
1004    RESTORE_REGION_REGS
1005    ;;
1006    RESTORE_CTL_REGS
1007    ;;
1008    RESTORE_BRANCH_REGS
1009    ;;
1010    RESTORE_APP_REGS
1011    ;;
1012    RESTORE_KERNEL_REGS
1013    ;;
1014    RESTORE_GENERAL_REGS
1015    ;;
1016    adds r2=CTX(PSR), r33
1017    ;;
1018    ld8 r16=[r2], 8       // psr
1019    ;;
1020    mov psr.l=r16
1021    ;;
1022    srlz.d
1023    ;;
1024    ld8 r16=[r2], 8       // pr
1025    ;;
1026    mov pr =r16,-1
1027    ld8 r16=[r2]       // unat
1028    ;;
1029    mov ar.unat=r16
1030    ;;
1031    adds r2=CTX(RSC),r33
1032    ;;
1033    ld8 r16 =[r2]
1034    ;;
1035    mov ar.rsc = r16
1036    ;;
1037    br.ret.sptk.few b0
1038END(vmm_trampoline)
1039