1/* $Id: VIScsum.S,v 1.6 2000/02/20 23:21:39 davem Exp $
2 * VIScsum.S: High bandwidth IP checksumming utilizing the UltraSparc
3 *            Visual Instruction Set.
4 *
5 * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
6 * Copyright (C) 2000 David S. Miller (davem@redhat.com)
7 *
8 * Based on older sparc32/sparc64 checksum.S, which is:
9 *
10 *      Copyright(C) 1995 Linus Torvalds
11 *      Copyright(C) 1995 Miguel de Icaza
12 *      Copyright(C) 1996, 1997 David S. Miller
13 *    derived from:
14 *	  Linux/Alpha checksum c-code
15 *        Linux/ix86 inline checksum assembly
16 *        RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
17 *	  David Mosberger-Tang for optimized reference c-code
18 *	  BSD4.4 portable checksum routine
19 */
20
21#ifdef __sparc_v9__
22#define STACKOFF	2175
23#else
24#define STACKOFF	64
25#endif
26
27#ifdef __KERNEL__
28#include <asm/head.h>
29#include <asm/asi.h>
30#include <asm/visasm.h>
31#include <asm/asm_offsets.h>
32#else
33#define ASI_BLK_P	0xf0
34#define FRPS_FEF	0x04
35#endif
36
37/* Dobrou noc, SunSoft engineers. Spete sladce.
38 * This has a couple of tricks in and those
39 * tricks are UltraLinux trade secrets :))
40 */
41
42#define START_THE_TRICK(fz,f0,f2,f4,f6,f8,f10)						\
43	fcmpgt32	%fz, %f0, %g1		/*  FPM		Group	*/;		\
44	fcmpgt32	%fz, %f2, %g2		/*  FPM		Group	*/;		\
45	fcmpgt32	%fz, %f4, %g3		/*  FPM		Group	*/;		\
46	inc		%g1			/*  IEU0	Group	*/;		\
47	fcmpgt32	%fz, %f6, %g5		/*  FPM			*/;		\
48	srl		%g1, 1, %g1		/*  IEU0	Group	*/;		\
49	fcmpgt32	%fz, %f8, %g7		/*  FPM			*/;		\
50	inc		%g2			/*  IEU0	Group	*/;		\
51	fcmpgt32	%fz, %f10, %o3		/*  FPM			*/;		\
52	srl		%g2, 1, %g2		/*  IEU0	Group	*/;		\
53	inc		%g3			/*  IEU1		*/;		\
54	srl		%g3, 1, %g3		/*  IEU0	Group	*/;		\
55	add		%o2, %g1, %o2		/*  IEU1		*/;		\
56	add		%o2, %g2, %o2		/*  IEU0	Group	*/;		\
57	inc		%g5			/*  IEU1		*/;		\
58	add		%o2, %g3, %o2		/*  IEU0	Group	*/;
59
60#define DO_THE_TRICK(O12,O14,f0,f2,f4,f6,f8,f10,f12,f14,F0,F2,F4,F6,F8,F10,F12,F14)	\
61	srl		%g5, 1, %g5		/*  IEU0	Group	*/;		\
62	fpadd32		%F0, %f0, %F0		/*  FPA			*/;		\
63	fcmpgt32	%O12, %f12, %o4		/*  FPM			*/;		\
64	inc		%g7			/*  IEU0	Group	*/;		\
65	fpadd32		%F2, %f2, %F2		/*  FPA			*/;		\
66	fcmpgt32	%O14, %f14, %o5		/*  FPM			*/;		\
67	add		%o2, %g5, %o2		/*  IEU1	Group	*/;		\
68	fpadd32		%F4, %f4, %F4		/*  FPA			*/;		\
69	fcmpgt32	%f0, %F0, %g1		/*  FPM			*/;		\
70	srl		%g7, 1, %g7		/*  IEU0	Group	*/;		\
71	fpadd32		%F6, %f6, %F6		/*  FPA			*/;		\
72	fcmpgt32	%f2, %F2, %g2		/*  FPM			*/;		\
73	add		%o2, %g7, %o2		/*  IEU0	Group	*/;		\
74	fpadd32		%F8, %f8, %F8		/*  FPA			*/;		\
75	fcmpgt32	%f4, %F4, %g3		/*  FPM			*/;		\
76	inc		%o3			/*  IEU0	Group	*/;		\
77	fpadd32		%F10, %f10, %F10	/*  FPA			*/;		\
78	fcmpgt32	%f6, %F6, %g5		/*  FPM			*/;		\
79	srl		%o3, 1, %o3		/*  IEU0	Group	*/;		\
80	fpadd32		%F12, %f12, %F12	/*  FPA			*/;		\
81	fcmpgt32	%f8, %F8, %g7		/*  FPM			*/;		\
82	add		%o2, %o3, %o2		/*  IEU0	Group	*/;		\
83	fpadd32		%F14, %f14, %F14	/*  FPA			*/;		\
84	fcmpgt32	%f10, %F10, %o3		/*  FPM			*/;		\
85	inc		%o4			/*  IEU0	Group	*/;		\
86	inc		%o5			/*  IEU1		*/;		\
87	srl		%o4, 1, %o4		/*  IEU0	Group	*/;		\
88	inc		%g1			/*  IEU1		*/;		\
89	srl		%o5, 1, %o5		/*  IEU0	Group	*/;		\
90	add		%o2, %o4, %o2		/*  IEU1		*/;		\
91	srl		%g1, 1, %g1		/*  IEU0	Group	*/;		\
92	add		%o2, %o5, %o2		/*  IEU1		*/;		\
93	inc		%g2			/*  IEU0	Group	*/;		\
94	add		%o2, %g1, %o2		/*  IEU1		*/;		\
95	srl		%g2, 1, %g2		/*  IEU0	Group	*/;		\
96	inc		%g3			/*  IEU1		*/;		\
97	srl		%g3, 1, %g3		/*  IEU0	Group	*/;		\
98	add		%o2, %g2, %o2		/*  IEU1		*/;		\
99	inc		%g5			/*  IEU0	Group	*/;		\
100	add		%o2, %g3, %o2		/*  IEU0		*/;
101
102#define END_THE_TRICK(O12,O14,f0,f2,f4,f6,f8,f10,f12,f14,S0,S1,S2,S3,T0,T1,U0,fz)	\
103	srl		%g5, 1, %g5		/*  IEU0	Group	*/;		\
104	fpadd32		%f2, %f0, %S0		/*  FPA			*/;		\
105	fcmpgt32	%O12, %f12, %o4		/*  FPM			*/;		\
106	inc		%g7			/*  IEU0	Group	*/;		\
107	fpadd32		%f6, %f4, %S1		/*  FPA			*/;		\
108	fcmpgt32	%O14, %f14, %o5		/*  FPM			*/;		\
109	srl		%g7, 1, %g7		/*  IEU0	Group	*/;		\
110	fpadd32		%f10, %f8, %S2		/*  FPA			*/;		\
111	fcmpgt32	%f0, %S0, %g1		/*  FPM			*/;		\
112	inc		%o3			/*  IEU0	Group	*/;		\
113	fpadd32		%f14, %f12, %S3		/*  FPA			*/;		\
114	fcmpgt32	%f4, %S1, %g2		/*  FPM			*/;		\
115	add		%o2, %g5, %o2		/*  IEU0	Group	*/;		\
116	fpadd32		%S0, %S1, %T0		/*  FPA			*/;		\
117	fcmpgt32	%f8, %S2, %g3		/*  FPM			*/;		\
118	add		%o2, %g7, %o2		/*  IEU0	Group	*/;		\
119	fzero		%fz			/*  FPA			*/;		\
120	fcmpgt32	%f12, %S3, %g5		/*  FPM			*/;		\
121	srl		%o3, 1, %o3		/*  IEU0	Group	*/;		\
122	fpadd32		%S2, %S3, %T1		/*  FPA			*/;		\
123	fcmpgt32	%S0, %T0, %g7		/*  FPM			*/;		\
124	add		%o2, %o3, %o2		/*  IEU0	Group	*/;		\
125	fpadd32		%T0, %T1, %U0		/*  FPA			*/;		\
126	fcmpgt32	%S2, %T1, %o3		/*  FPM			*/;		\
127	inc		%o4			/*  IEU0	Group	*/;		\
128	inc		%o5			/*  IEU1		*/;		\
129	srl		%o4, 1, %o4		/*  IEU0	Group	*/;		\
130	inc		%g1			/*  IEU1		*/;		\
131	add		%o2, %o4, %o2		/*  IEU0	Group	*/;		\
132	fcmpgt32	%fz, %f2, %o4		/*  FPM			*/;		\
133	srl		%o5, 1, %o5		/*  IEU0	Group	*/;		\
134	inc		%g2			/*  IEU1		*/;		\
135	add		%o2, %o5, %o2		/*  IEU0	Group	*/;		\
136	fcmpgt32	%fz, %f6, %o5		/*  FPM			*/;		\
137	srl		%g1, 1, %g1		/*  IEU0	Group	*/;		\
138	inc		%g3			/*  IEU1		*/;		\
139	add		%o2, %g1, %o2		/*  IEU0	Group	*/;		\
140	fcmpgt32	%fz, %f10, %g1		/*  FPM			*/;		\
141	srl		%g2, 1, %g2		/*  IEU0	Group	*/;		\
142	inc		%g5			/*  IEU1		*/;		\
143	add		%o2, %g2, %o2		/*  IEU0	Group	*/;		\
144	fcmpgt32	%fz, %f14, %g2		/*  FPM			*/;		\
145	srl		%g3, 1, %g3		/*  IEU0	Group	*/;		\
146	inc		%g7			/*  IEU1		*/;		\
147	add		%o2, %g3, %o2		/*  IEU0	Group	*/;		\
148	fcmpgt32	%fz, %S1, %g3		/*  FPM			*/;		\
149	srl		%g5, 1, %g5		/*  IEU0	Group	*/;		\
150	inc		%o3			/*  IEU1		*/;		\
151	add		%o2, %g5, %o2		/*  IEU0	Group	*/;		\
152	fcmpgt32	%fz, %S3, %g5		/*  FPM			*/;		\
153	srl		%g7, 1, %g7		/*  IEU0	Group	*/;		\
154	inc		%o4			/*  IEU1		*/;		\
155	add		%o2, %g7, %o2		/*  IEU0	Group	*/;		\
156	fcmpgt32	%fz, %T1, %g7		/*  FPM			*/;		\
157	srl		%o3, 1, %o3		/*  IEU0	Group	*/;		\
158	inc		%o5			/*  IEU1		*/;		\
159	add		%o2, %o3, %o2		/*  IEU0	Group	*/;		\
160	fcmpgt32	%T0, %U0, %o3		/*  FPM			*/;		\
161	srl		%o4, 1, %o4		/*  IEU0	Group	*/;		\
162	inc		%g1			/*  IEU1		*/;		\
163	sub		%o2, %o4, %o2		/*  IEU0	Group	*/;		\
164	fcmpgt32	%fz, %U0, %o4		/*  FPM			*/;		\
165	srl		%o5, 1, %o5		/*  IEU0	Group	*/;		\
166	inc		%g2			/*  IEU1		*/;		\
167	srl		%g1, 1, %g1		/*  IEU0	Group	*/;		\
168	sub		%o2, %o5, %o2		/*  IEU1		*/;		\
169	std		%U0, [%sp + STACKOFF]	/*  Store		*/;		\
170	srl		%g2, 1, %g2		/*  IEU0	Group	*/;		\
171	sub		%o2, %g1, %o2		/*  IEU1		*/;		\
172	inc		%g3			/*  IEU0	Group	*/;		\
173	sub		%o2, %g2, %o2		/*  IEU1		*/;		\
174	srl		%g3, 1, %g3		/*  IEU0	Group	*/;		\
175	inc		%g5			/*  IEU1		*/;		\
176	srl		%g5, 1, %g5		/*  IEU0	Group	*/;		\
177	sub		%o2, %g3, %o2		/*  IEU1		*/;		\
178	ldx		[%sp + STACKOFF], %o5	/*  Load	Group	*/;		\
179	inc		%g7			/*  IEU0		*/;		\
180	sub		%o2, %g5, %o2		/*  IEU1		*/;		\
181	srl		%g7, 1, %g7		/*  IEU0	Group	*/;		\
182	inc		%o3			/*  IEU1		*/;		\
183	srl		%o3, 1, %o3		/*  IEU0	Group	*/;		\
184	sub		%o2, %g7, %o2		/*  IEU1		*/;		\
185	inc		%o4			/*  IEU0	Group	*/;		\
186	add		%o2, %o3, %o2		/*  IEU1		*/;		\
187	srl		%o4, 1, %o4		/*  IEU0	Group	*/;		\
188	sub		%o2, %o4, %o2		/*  IEU0	Group	*/;		\
189	addcc		%o2, %o5, %o2		/*  IEU1	Group	*/;		\
190	bcs,a,pn	%xcc, 33f		/*  CTI			*/;		\
191	 add		%o2, 1, %o2		/*  IEU0		*/;		\
19233:						/*  That's it		*/;
193
194#define CSUM_LASTCHUNK(offset)								\
195        ldx             [%o0 - offset - 0x10], %g2;					\
196        ldx             [%o0 - offset - 0x08], %g3;					\
197        addcc           %g2, %o2, %o2;							\
198        bcs,a,pn        %xcc, 31f;							\
199         add            %o2, 1, %o2;							\
20031:     addcc           %g3, %o2, %o2;							\
201        bcs,a,pn        %xcc, 32f;							\
202         add            %o2, 1, %o2;							\
20332:
204
205	.text
206	.globl		csum_partial
207	.align		32
208csum_partial:
209	andcc		%o0, 7, %g0		/*  IEU1	Group		*/
210	be,pt		%icc, 4f		/*  CTI				*/
211	 andcc		%o0, 0x38, %g3		/*  IEU1			*/
212	mov		1, %g5			/*  IEU0	Group		*/
213	cmp		%o1, 6			/*  IEU1			*/
214	bl,pn		%icc, 21f		/*  CTI				*/
215	 andcc		%o0, 1, %g0		/*  IEU1	Group		*/
216	bne,pn		%icc, csump_really_slow /*  CTI				*/
217	 andcc		%o0, 2, %g0		/*  IEU1	Group		*/
218	be,pt		%icc, 1f		/*  CTI				*/
219	 and		%o0, 4, %g7		/*  IEU0			*/
220	lduh		[%o0], %g2		/*  Load			*/
221	sub		%o1, 2, %o1		/*  IEU0	Group		*/
222	add		%o0, 2, %o0		/*  IEU1			*/
223	andcc		%o0, 4, %g7		/*  IEU1	Group		*/
224	sll		%g5, 16, %g5		/*  IEU0			*/
225	sll		%g2, 16, %g2		/*  IEU0	Group 		*/
226	addcc		%g2, %o2, %o2		/*  IEU1	Group (regdep)	*/
227	bcs,a,pn	%icc, 1f		/*  CTI				*/
228	 add		%o2, %g5, %o2		/*  IEU0			*/
2291:	ld		[%o0], %g2		/*  Load			*/
230	brz,a,pn	%g7, 4f			/*  CTI+IEU1	Group		*/
231	 and		%o0, 0x38, %g3		/*  IEU0			*/
232	add		%o0, 4, %o0		/*  IEU0	Group		*/
233	sub		%o1, 4, %o1		/*  IEU1			*/
234	addcc		%g2, %o2, %o2		/*  IEU1	Group		*/
235	bcs,a,pn	%icc, 1f		/*  CTI				*/
236	 add		%o2, 1, %o2		/*  IEU0			*/
2371:	and		%o0, 0x38, %g3		/*  IEU1	Group		*/
2384:	srl		%o2, 0, %o2		/*  IEU0	Group		*/
239	mov		0x40, %g1		/*  IEU1			*/
240	brz,pn		%g3, 3f			/*  CTI+IEU1	Group		*/
241	 sub		%g1, %g3, %g1		/*  IEU0			*/
242	cmp		%o1, 56			/*  IEU1	Group		*/
243	blu,pn		%icc, 20f		/*  CTI				*/
244	 andcc		%o0, 8, %g0		/*  IEU1	Group		*/
245	be,pn		%icc, 1f		/*  CTI				*/
246	 ldx		[%o0], %g2		/*  Load			*/
247	add		%o0, 8, %o0		/*  IEU0	Group		*/
248	sub		%o1, 8, %o1		/*  IEU1			*/
249	addcc		%g2, %o2, %o2		/*  IEU1	Group		*/
250	bcs,a,pn	%xcc, 1f		/*  CTI				*/
251	 add		%o2, 1, %o2		/*  IEU0			*/
2521:	andcc		%g1, 0x10, %g0		/*  IEU1	Group		*/
253	be,pn		%icc, 2f		/*  CTI				*/
254	 and		%g1, 0x20, %g1		/*  IEU0			*/
255	ldx		[%o0], %g2		/*  Load			*/
256	ldx		[%o0+8], %g3		/*  Load	Group		*/
257	add		%o0, 16, %o0		/*  IEU0			*/
258	sub		%o1, 16, %o1		/*  IEU1			*/
259	addcc		%g2, %o2, %o2		/*  IEU1	Group		*/
260	bcs,a,pn	%xcc, 1f		/*  CTI				*/
261	 add		%o2, 1, %o2		/*  IEU0			*/
2621:	addcc		%g3, %o2, %o2		/*  IEU1	Group		*/
263	bcs,a,pn	%xcc, 2f		/*  CTI				*/
264	 add		%o2, 1, %o2		/*  IEU0			*/
2652:	brz,pn		%g1, 3f			/*  CTI+IEU1	Group		*/
266	 ldx		[%o0], %g2		/*  Load			*/
267	ldx		[%o0+8], %g3		/*  Load	Group		*/
268	ldx		[%o0+16], %g5		/*  Load	Group		*/
269	ldx		[%o0+24], %g7		/*  Load	Group		*/
270	add		%o0, 32, %o0		/*  IEU0			*/
271	sub		%o1, 32, %o1		/*  IEU1			*/
272	addcc		%g2, %o2, %o2		/*  IEU1	Group		*/
273	bcs,a,pn	%xcc, 1f		/*  CTI				*/
274	 add		%o2, 1, %o2		/*  IEU0			*/
2751:	addcc		%g3, %o2, %o2		/*  IEU1	Group		*/
276	bcs,a,pn	%xcc, 1f		/*  CTI				*/
277	 add		%o2, 1, %o2		/*  IEU0			*/
2781:	addcc		%g5, %o2, %o2		/*  IEU1	Group		*/
279	bcs,a,pn	%xcc, 1f		/*  CTI				*/
280	 add		%o2, 1, %o2		/*  IEU0			*/
2811:	addcc		%g7, %o2, %o2		/*  IEU1	Group		*/
282	bcs,a,pn	%xcc, 3f		/*  CTI				*/
283	 add		%o2, 1, %o2		/*  IEU0			*/
2843:	cmp		%o1, 0xc0		/*  IEU1	Group		*/
285	blu,pn		%icc, 20f		/*  CTI				*/
286	 sllx		%o2, 32, %g5		/*  IEU0			*/
287#ifdef __KERNEL__
288	VISEntry
289#endif
290	addcc		%o2, %g5, %o2		/*  IEU1	Group		*/
291	sub		%o1, 0xc0, %o1		/*  IEU0			*/
292	wr		%g0, ASI_BLK_P, %asi	/*  LSU		Group		*/
293	membar		#StoreLoad		/*  LSU		Group		*/
294	srlx		%o2, 32, %o2		/*  IEU0	Group		*/
295	bcs,a,pn	%xcc, 1f		/*  CTI				*/
296	 add		%o2, 1, %o2		/*  IEU1			*/
2971:	andcc		%o1, 0x80, %g0		/*  IEU1	Group		*/
298	bne,pn		%icc, 7f		/*  CTI				*/
299	 andcc		%o1, 0x40, %g0		/*  IEU1	Group		*/
300	be,pn		%icc, 6f		/*  CTI				*/
301	 fzero		%f12			/*  FPA				*/
302	fzero		%f14			/*  FPA		Group		*/
303	ldda		[%o0 + 0x000] %asi, %f16
304	ldda		[%o0 + 0x040] %asi, %f32
305	ldda		[%o0 + 0x080] %asi, %f48
306	START_THE_TRICK(f12,f16,f18,f20,f22,f24,f26)
307	ba,a,pt		%xcc, 3f
3086:	sub		%o0, 0x40, %o0		/*  IEU0	Group		*/
309	fzero		%f28			/*  FPA				*/
310	fzero		%f30			/*  FPA		Group		*/
311	ldda		[%o0 + 0x040] %asi, %f32
312	ldda		[%o0 + 0x080] %asi, %f48
313	ldda		[%o0 + 0x0c0] %asi, %f0
314	START_THE_TRICK(f28,f32,f34,f36,f38,f40,f42)
315	ba,a,pt		%xcc, 4f
3167:	bne,pt		%icc, 8f		/*  CTI				*/
317	 fzero		%f44			/*  FPA				*/
318	add		%o0, 0x40, %o0		/*  IEU0	Group		*/
319	fzero		%f60			/*  FPA				*/
320	fzero		%f62			/*  FPA		Group		*/
321	ldda		[%o0 - 0x040] %asi, %f0
322	ldda		[%o0 + 0x000] %asi, %f16
323	ldda		[%o0 + 0x040] %asi, %f32
324	START_THE_TRICK(f60,f0,f2,f4,f6,f8,f10)
325	ba,a,pt		%xcc, 2f
3268:	add		%o0, 0x80, %o0		/*  IEU0	Group		*/
327	fzero		%f46			/*  FPA				*/
328	ldda		[%o0 - 0x080] %asi, %f48
329	ldda		[%o0 - 0x040] %asi, %f0
330	ldda		[%o0 + 0x000] %asi, %f16
331	START_THE_TRICK(f44,f48,f50,f52,f54,f56,f58)
3321:	DO_THE_TRICK(f44,f46,f48,f50,f52,f54,f56,f58,f60,f62,f0,f2,f4,f6,f8,f10,f12,f14)
333	ldda		[%o0 + 0x040] %asi, %f32
3342:	DO_THE_TRICK(f60,f62,f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30)
335	ldda		[%o0 + 0x080] %asi, %f48
3363:	DO_THE_TRICK(f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46)
337	ldda		[%o0 + 0x0c0] %asi, %f0
3384:	DO_THE_TRICK(f28,f30,f32,f34,f36,f38,f40,f42,f44,f46,f48,f50,f52,f54,f56,f58,f60,f62)
339	add		%o0, 0x100, %o0		/*  IEU0	Group		*/
340	subcc		%o1, 0x100, %o1		/*  IEU1			*/
341	bgeu,a,pt	%icc, 1b		/*  CTI				*/
342	 ldda		[%o0 + 0x000] %asi, %f16
343	membar		#Sync			/*  LSU		Group		*/
344	DO_THE_TRICK(f44,f46,f48,f50,f52,f54,f56,f58,f60,f62,f0,f2,f4,f6,f8,f10,f12,f14)
345	END_THE_TRICK(f60,f62,f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30)
346#ifdef __KERNEL__
347	ldub		[%g6 + AOFF_task_thread + AOFF_thread_current_ds], %g7
348#endif
349	and		%o1, 0x3f, %o1		/*  IEU0	Group		*/
350#ifdef __KERNEL__
351	VISExit
352	wr		%g7, %g0, %asi
353#endif
35420:	andcc		%o1, 0xf0, %g1		/*  IEU1	Group		*/
355	be,pn		%icc, 23f		/*  CTI				*/
356	 and		%o1, 0xf, %o3		/*  IEU0			*/
357#ifdef __KERNEL__
35822:	sll		%g1, 1, %o4		/*  IEU0	Group		*/
359	sethi		%hi(23f), %g7		/*  IEU1			*/
360	sub		%g7, %o4, %g7		/*  IEU0	Group		*/
361	jmpl		%g7 + %lo(23f), %g0	/*  CTI		Group brk forced*/
362	 add		%o0, %g1, %o0		/*  IEU0			*/
363#else
36422:	rd		%pc, %g7		/*  LSU		Group+4bubbles	*/
365	sll		%g1, 1, %o4		/*  IEU0	Group		*/
366	sub		%g7, %o4, %g7		/*  IEU0	Group (regdep)	*/
367	jmpl		%g7 + (23f - 22b), %g0	/*  CTI		Group brk forced*/
368	 add		%o0, %g1, %o0		/*  IEU0			*/
369#endif
370	CSUM_LASTCHUNK(0xe0)
371	CSUM_LASTCHUNK(0xd0)
372	CSUM_LASTCHUNK(0xc0)
373	CSUM_LASTCHUNK(0xb0)
374	CSUM_LASTCHUNK(0xa0)
375	CSUM_LASTCHUNK(0x90)
376	CSUM_LASTCHUNK(0x80)
377	CSUM_LASTCHUNK(0x70)
378	CSUM_LASTCHUNK(0x60)
379	CSUM_LASTCHUNK(0x50)
380	CSUM_LASTCHUNK(0x40)
381	CSUM_LASTCHUNK(0x30)
382	CSUM_LASTCHUNK(0x20)
383	CSUM_LASTCHUNK(0x10)
384	CSUM_LASTCHUNK(0x00)
38523:	brnz,pn		%o3, 26f		/*  CTI+IEU1	Group		*/
38624:	 sllx		%o2, 32, %g1		/*  IEU0			*/
38725:	addcc		%o2, %g1, %o0		/*  IEU1	Group		*/
388	srlx		%o0, 32, %o0		/*  IEU0	Group (regdep)	*/
389	bcs,a,pn	%xcc, 1f		/*  CTI				*/
390	 add		%o0, 1, %o0		/*  IEU1			*/
3911:	retl					/*  CTI		Group brk forced*/
392	 srl		%o0, 0, %o0		/*  IEU0			*/
39326:	andcc		%o1, 8, %g0		/*  IEU1	Group		*/
394	be,pn		%icc, 1f		/*  CTI				*/
395	 ldx		[%o0], %g3		/*  Load			*/
396	add		%o0, 8, %o0		/*  IEU0	Group		*/
397	addcc		%g3, %o2, %o2		/*  IEU1	Group		*/
398	bcs,a,pn	%xcc, 1f		/*  CTI				*/
399	 add		%o2, 1, %o2		/*  IEU0			*/
4001:	andcc		%o1, 4, %g0		/*  IEU1	Group		*/
401	be,a,pn		%icc, 1f		/*  CTI				*/
402	 clr		%g2			/*  IEU0			*/
403	ld		[%o0], %g2		/*  Load			*/
404	add		%o0, 4, %o0		/*  IEU0	Group		*/
405	sllx		%g2, 32, %g2		/*  IEU0	Group		*/
4061:	andcc		%o1, 2, %g0		/*  IEU1			*/
407	be,a,pn		%icc, 1f		/*  CTI				*/
408	 clr		%o4			/*  IEU0	Group		*/
409	lduh		[%o0], %o4		/*  Load			*/
410	add		%o0, 2, %o0		/*  IEU1			*/
411	sll		%o4, 16, %o4		/*  IEU0	Group		*/
4121:	andcc		%o1, 1, %g0		/*  IEU1			*/
413	be,a,pn		%icc, 1f		/*  CTI				*/
414	 clr		%o5			/*  IEU0	Group		*/
415	ldub		[%o0], %o5		/*  Load			*/
416	sll		%o5, 8, %o5		/*  IEU0	Group		*/
4171:	or		%g2, %o4, %o4		/*  IEU1			*/
418	or		%o5, %o4, %o4		/*  IEU0	Group (regdep)	*/
419	addcc		%o4, %o2, %o2		/*  IEU1	Group (regdep)	*/
420	bcs,a,pn	%xcc, 1f		/*  CTI				*/
421	 add		%o2, 1, %o2		/*  IEU0			*/
4221:	ba,pt		%xcc, 25b		/*  CTI		Group		*/
423	 sllx		%o2, 32, %g1		/*  IEU0			*/
42421:	srl		%o2, 0, %o2		/*  IEU0	Group		*/
425	cmp		%o1, 0			/*  IEU1			*/
426	be,pn		%icc, 24b		/*  CTI				*/
427	 andcc		%o1, 4, %g0		/*  IEU1	Group		*/
428	be,a,pn		%icc, 1f		/*  CTI				*/
429	 clr		%g2			/*  IEU0			*/
430	lduh		[%o0], %g3		/*  Load			*/
431	lduh		[%o0+2], %g2		/*  Load	Group		*/
432	add		%o0, 4, %o0		/*  IEU0	Group		*/
433	sllx		%g3, 48, %g3		/*  IEU0	Group		*/
434	sllx		%g2, 32, %g2		/*  IEU0	Group		*/
435	or		%g3, %g2, %g2		/*  IEU0	Group		*/
4361:	andcc		%o1, 2, %g0		/*  IEU1			*/
437	be,a,pn		%icc, 1f		/*  CTI				*/
438	 clr		%o4			/*  IEU0	Group		*/
439	lduh		[%o0], %o4		/*  Load			*/
440	add		%o0, 2, %o0		/*  IEU1			*/
441	sll		%o4, 16, %o4		/*  IEU0	Group		*/
4421:	andcc		%o1, 1, %g0		/*  IEU1			*/
443	be,a,pn		%icc, 1f		/*  CTI				*/
444	 clr		%o5			/*  IEU0	Group		*/
445	ldub		[%o0], %o5		/*  Load			*/
446	sll		%o5, 8, %o5		/*  IEU0	Group		*/
4471:	or		%g2, %o4, %o4		/*  IEU1			*/
448	or		%o5, %o4, %o4		/*  IEU0	Group (regdep)	*/
449	addcc		%o4, %o2, %o2		/*  IEU1	Group (regdep)	*/
450	bcs,a,pn	%xcc, 1f		/*  CTI				*/
451	 add		%o2, 1, %o2		/*  IEU0			*/
4521:	ba,pt		%xcc, 25b		/*  CTI		Group		*/
453	 sllx		%o2, 32, %g1		/*  IEU0			*/
454
455	/* When buff is byte aligned and len is large, we backoff to
456	 * this really slow handling.  The issue is that we cannot do
457	 * the VIS stuff when buff is byte aligned as unaligned.c will
458	 * not fix it up.
459	 */
460csump_really_slow:
461	mov	%o0, %o3
462	mov	%o1, %o4
463	cmp	%o1, 0
464	ble,pn	%icc, 9f
465	 mov	0, %o0
466	andcc	%o3, 1, %o5
467	be,pt	%icc, 1f
468	 sra	%o4, 1, %g3
469	add	%o1, -1, %o4
470	ldub	[%o3], %o0
471	add	%o3, 1, %o3
472	sra	%o4, 1, %g3
4731:
474	cmp	%g3, 0
475	be,pt	%icc, 3f
476	 and	%o4, 1, %g2
477	and	%o3, 2, %g2
478	brz,a,pt %g2, 1f
479	 sra	%g3, 1, %g3
480	add	%g3, -1, %g3
481	add	%o4, -2, %o4
482	lduh	[%o3], %g2
483	add	%o3, 2, %o3
484	add	%o0, %g2, %o0
485	sra	%g3, 1, %g3
4861:
487	cmp	%g3, 0
488	be,pt	%icc, 2f
489	 and	%o4, 2, %g2
4901:
491	ld	[%o3], %g2
492	addcc	%o0, %g2, %o0
493	addx	%o0, %g0, %o0
494	addcc	%g3, -1, %g3
495	bne,pt	%icc, 1b
496	 add	%o3, 4, %o3
497	srl	%o0, 16, %o1
498	sethi	%hi(64512), %g2
499	or	%g2, 1023, %g2
500	and	%o0, %g2, %g3
501	add	%g3, %o1, %g3
502	srl	%g3, 16, %o0
503	and	%g3, %g2, %g2
504	add	%g2, %o0, %g3
505	sll	%g3, 16, %g3
506	srl	%g3, 16, %o0
507	and	%o4, 2, %g2
5082:
509	cmp	%g2, 0
510	be,pt	%icc, 3f
511	 and	%o4, 1, %g2
512	lduh	[%o3], %g2
513	add	%o3, 2, %o3
514	add	%o0, %g2, %o0
515	and	%o4, 1, %g2
5163:
517	cmp	%g2, 0
518	be,pt	%icc, 1f
519	 srl	%o0, 16, %o1
520	ldub	[%o3], %g2
521	sll	%g2, 8, %g2
522	add	%o0, %g2, %o0
523	srl	%o0, 16, %o1
5241:
525	sethi	%hi(64512), %g2
526	or	%g2, 1023, %g2
527	cmp	%o5, 0
528	and	%o0, %g2, %g3
529	add	%g3, %o1, %g3
530	srl	%g3, 16, %o0
531	and	%g3, %g2, %g2
532	add	%g2, %o0, %g3
533	sll	%g3, 16, %g3
534	srl	%g3, 16, %o0
535	srl	%g3, 24, %g3
536	and	%o0, 255, %g2
537	sll	%g2, 8, %g2
538	bne,pt	%icc, 1f
539	 or	%g3, %g2, %g2
5409:
541	mov	%o0, %g2
5421:
543	addcc	%g2, %o2, %g2
544	addx	%g2, %g0, %g2
545	retl
546	 srl	%g2, 0, %o0
547