1/*
2 * INET		An implementation of the TCP/IP protocol suite for the LINUX
3 *		operating system.  INET is implemented using the  BSD Socket
4 *		interface as the means of communication with the user level.
5 *
6 *		IP/TCP/UDP checksumming routines
7 *
8 * Authors:	Jorge Cwik, <jorge@laser.satlink.net>
9 *		Arnt Gulbrandsen, <agulbra@nvg.unit.no>
10 *		Tom May, <ftom@netcom.com>
11 *              Pentium Pro/II routines:
12 *              Alexander Kjeldaas <astor@guardian.no>
13 *              Finn Arne Gangstad <finnag@guardian.no>
14 *		Lots of code moved from tcp.c and ip.c; see those files
15 *		for more names.
16 *
17 * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
18 *			     handling.
19 *		Andi Kleen,  add zeroing on error
20 *                   converted to pure assembler
21 *		Hirokazu Takata,Hiroyuki Kondo rewrite for the m32r architecture.
22 *
23 *		This program is free software; you can redistribute it and/or
24 *		modify it under the terms of the GNU General Public License
25 *		as published by the Free Software Foundation; either version
26 *		2 of the License, or (at your option) any later version.
27 */
28
29#include <linux/linkage.h>
30#include <asm/assembler.h>
31#include <asm/errno.h>
32
33/*
34 * computes a partial checksum, e.g. for TCP/UDP fragments
35 */
36
37/*
38unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
39 */
40
41
42#ifdef CONFIG_ISA_DUAL_ISSUE
43
44	/*
45	 * Experiments with Ethernet and SLIP connections show that buff
46	 * is aligned on either a 2-byte or 4-byte boundary.  We get at
47	 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
48	 * Fortunately, it is easy to convert 2-byte alignment to 4-byte
49	 * alignment for the unrolled loop.
50	 */
51
52	.text
53ENTRY(csum_partial)
54	; Function args
55	;  r0: unsigned char *buff
56	;  r1: int len
57	;  r2: unsigned int sum
58
59	push	r2		    ||	ldi	r2, #0
60	and3	r7, r0, #1		; Check alignment.
61	beqz	r7, 1f	 		; Jump if alignment is ok.
62	; 1-byte mis aligned
63	ldub	r4, @r0		    ||	addi	r0, #1
64	; clear c-bit || Alignment uses up bytes.
65	cmp	r0, r0		    ||	addi	r1, #-1
66	ldi	r3, #0		    ||	addx	r2, r4
67	addx	r2, r3
68	.fillinsn
691:
70	and3	r4, r0, #2		; Check alignment.
71	beqz	r4, 2f	 		; Jump if alignment is ok.
72	; clear c-bit || Alignment uses up two bytes.
73	cmp	r0, r0		    ||	addi	r1, #-2
74	bgtz	r1, 1f			; Jump if we had at least two bytes.
75	bra	4f		    ||	addi	r1, #2
76	.fillinsn			; len(r1) was < 2.  Deal with it.
771:
78	; 2-byte aligned
79	lduh	r4, @r0		    ||	ldi	r3, #0
80	addx	r2, r4		    ||	addi	r0, #2
81	addx	r2, r3
82	.fillinsn
832:
84	; 4-byte aligned
85	cmp	r0, r0			; clear c-bit
86	srl3	r6, r1, #5
87	beqz	r6, 2f
88	.fillinsn
89
901:	ld	r3, @r0+
91	ld	r4, @r0+					; +4
92	ld	r5, @r0+					; +8
93	ld	r3, @r0+	    ||	addx    r2, r3		; +12
94	ld	r4, @r0+	    ||	addx    r2, r4		; +16
95	ld	r5, @r0+	    ||	addx    r2, r5		; +20
96	ld	r3, @r0+	    ||	addx    r2, r3		; +24
97	ld	r4, @r0+	    ||	addx    r2, r4		; +28
98	addx	r2, r5		    ||	addi	r6, #-1
99	addx	r2, r3
100	addx	r2, r4
101	bnez	r6, 1b
102
103	addx	r2, r6			; r6=0
104	cmp	r0, r0			; This clears c-bit
105	.fillinsn
1062:	and3	r6, r1, #0x1c		; withdraw len
107	beqz	r6, 4f
108	srli	r6, #2
109	.fillinsn
110
1113:	ld	r4, @r0+	    ||	addi	r6, #-1
112	addx	r2, r4
113	bnez	r6, 3b
114
115	addx	r2, r6			; r6=0
116	cmp	r0, r0			; This clears c-bit
117	.fillinsn
1184:	and3	r1, r1, #3
119	beqz	r1, 7f			; if len == 0 goto end
120	and3	r6, r1, #2
121	beqz	r6, 5f			; if len < 2  goto 5f(1byte)
122	lduh	r4, @r0		    ||	addi	r0, #2
123	addi	r1, #-2		    ||	slli    r4, #16
124	addx	r2, r4
125	beqz	r1, 6f
126	.fillinsn
1275:	ldub	r4, @r0		    ||	ldi	r1, #0
128#ifndef __LITTLE_ENDIAN__
129	slli    r4, #8
130#endif
131	addx	r2, r4
132	.fillinsn
1336:	addx	r2, r1
134	.fillinsn
1357:
136	and3	r0, r2, #0xffff
137	srli	r2, #16
138	add	r0, r2
139	srl3	r2, r0, #16
140	beqz	r2, 1f
141	addi	r0, #1
142	and3	r0, r0, #0xffff
143	.fillinsn
1441:
145	beqz	r7, 1f			; swap the upper byte for the lower
146	and3	r2, r0, #0xff
147	srl3	r0, r0, #8
148	slli	r2, #8
149	or	r0, r2
150	.fillinsn
1511:
152	pop	r2		    ||	cmp	r0, r0
153	addx	r0, r2		    ||	ldi	r2, #0
154	addx	r0, r2
155	jmp	r14
156
157#else /* not CONFIG_ISA_DUAL_ISSUE */
158
159	/*
160	 * Experiments with Ethernet and SLIP connections show that buff
161	 * is aligned on either a 2-byte or 4-byte boundary.  We get at
162	 * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
163	 * Fortunately, it is easy to convert 2-byte alignment to 4-byte
164	 * alignment for the unrolled loop.
165	 */
166
167	.text
168ENTRY(csum_partial)
169	; Function args
170	;  r0: unsigned char *buff
171	;  r1: int len
172	;  r2: unsigned int sum
173
174	push	r2
175	ldi	r2, #0
176	and3	r7, r0, #1		; Check alignment.
177	beqz	r7, 1f	 		; Jump if alignment is ok.
178	; 1-byte mis aligned
179	ldub	r4, @r0
180	addi	r0, #1
181	addi	r1, #-1			; Alignment uses up bytes.
182	cmp	r0, r0			; clear c-bit
183	ldi	r3, #0
184	addx	r2, r4
185	addx	r2, r3
186	.fillinsn
1871:
188	and3	r4, r0, #2		; Check alignment.
189	beqz	r4, 2f	 		; Jump if alignment is ok.
190	addi	r1, #-2			; Alignment uses up two bytes.
191	cmp		r0, r0			; clear c-bit
192	bgtz	r1, 1f			; Jump if we had at least two bytes.
193	addi	r1, #2			; len(r1) was < 2.  Deal with it.
194	bra	4f
195	.fillinsn
1961:
197	; 2-byte aligned
198	lduh	r4, @r0
199	addi	r0, #2
200	ldi		r3, #0
201	addx	r2, r4
202	addx	r2, r3
203	.fillinsn
2042:
205	; 4-byte aligned
206	cmp	r0, r0			; clear c-bit
207	srl3	r6, r1, #5
208	beqz	r6, 2f
209	.fillinsn
210
2111:	ld	r3, @r0+
212	ld	r4, @r0+		; +4
213	ld	r5, @r0+		; +8
214	addx	r2, r3
215	addx	r2, r4
216	addx	r2, r5
217	ld	r3, @r0+		; +12
218	ld	r4, @r0+		; +16
219	ld	r5, @r0+		; +20
220	addx	r2, r3
221	addx	r2, r4
222	addx	r2, r5
223	ld	r3, @r0+		; +24
224	ld	r4, @r0+		; +28
225	addi	r6, #-1
226	addx	r2, r3
227	addx	r2, r4
228	bnez	r6, 1b
229	addx	r2, r6			; r6=0
230	cmp	r0, r0			; This clears c-bit
231	.fillinsn
232
2332:	and3	r6, r1, #0x1c		; withdraw len
234	beqz	r6, 4f
235	srli	r6, #2
236	.fillinsn
237
2383:	ld	r4, @r0+
239	addi	r6, #-1
240	addx	r2, r4
241	bnez	r6, 3b
242	addx	r2, r6			; r6=0
243	cmp	r0, r0			; This clears c-bit
244	.fillinsn
245
2464:	and3	r1, r1, #3
247	beqz	r1, 7f			; if len == 0 goto end
248	and3	r6, r1, #2
249	beqz	r6, 5f			; if len < 2  goto 5f(1byte)
250
251	lduh	r4, @r0
252	addi	r0, #2
253	addi	r1, #-2
254	slli    r4, #16
255	addx	r2, r4
256	beqz	r1, 6f
257	.fillinsn
2585:	ldub	r4, @r0
259#ifndef __LITTLE_ENDIAN__
260	slli    r4, #8
261#endif
262	addx	r2, r4
263	.fillinsn
2646:	ldi	r5, #0
265	addx	r2, r5
266	.fillinsn
2677:
268	and3	r0, r2, #0xffff
269	srli	r2, #16
270	add	r0, r2
271	srl3	r2, r0, #16
272	beqz	r2, 1f
273	addi	r0, #1
274	and3	r0, r0, #0xffff
275	.fillinsn
2761:
277	beqz	r7, 1f
278	mv	r2, r0
279	srl3	r0, r2, #8
280	and3	r2, r2, #0xff
281	slli	r2, #8
282	or	r0, r2
283	.fillinsn
2841:
285	pop	r2
286	cmp	r0, r0
287	addx	r0, r2
288	ldi	r2, #0
289	addx	r0, r2
290	jmp	r14
291
292#endif /* not CONFIG_ISA_DUAL_ISSUE */
293
294/*
295unsigned int csum_partial_copy_generic (const char *src, char *dst,
296				  int len, int sum, int *src_err_ptr, int *dst_err_ptr)
297 */
298
299/*
300 * Copy from ds while checksumming, otherwise like csum_partial
301 *
302 * The macros SRC and DST specify the type of access for the instruction.
303 * thus we can call a custom exception handler for all access types.
304 *
305 * FIXME: could someone double-check whether I haven't mixed up some SRC and
306 *	  DST definitions? It's damn hard to trigger all cases.  I hope I got
307 *	  them all but there's no guarantee.
308 */
309
310ENTRY(csum_partial_copy_generic)
311	nop
312	nop
313	nop
314	nop
315	jmp r14
316	nop
317	nop
318	nop
319
320	.end
321