1/*
2 *  linux/arch/arm/lib/csumpartialcopygeneric.S
3 *
4 *  Copyright (C) 1995-2001 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11/*
12 * unsigned int
13 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, )
14 *  r0 = src, r1 = dst, r2 = len, r3 = sum
15 *  Returns : r0 = checksum
16 *
17 * Note that 'tst' and 'teq' preserve the carry flag.
18 */
19
20src	.req	r0
21dst	.req	r1
22len	.req	r2
23sum	.req	r3
24
25.zero:		mov	r0, sum
26		load_regs	ea
27
28		/*
29		 * Align an unaligned destination pointer.  We know that
30		 * we have >= 8 bytes here, so we don't need to check
31		 * the length.  Note that the source pointer hasn't been
32		 * aligned yet.
33		 */
34.dst_unaligned:	tst	dst, #1
35		beq	.dst_16bit
36
37		load1b	ip
38		sub	len, len, #1
39		adcs	sum, sum, ip, lsl #8	@ update checksum
40		strb	ip, [dst], #1
41		tst	dst, #2
42		moveq	pc, lr			@ dst is now 32bit aligned
43
44.dst_16bit:	load2b	r8, ip
45		sub	len, len, #2
46		adcs	sum, sum, r8
47		strb	r8, [dst], #1
48		adcs	sum, sum, ip, lsl #8
49		strb	ip, [dst], #1
50		mov	pc, lr			@ dst is now 32bit aligned
51
52		/*
53		 * Handle 0 to 7 bytes, with any alignment of source and
54		 * destination pointers.  Note that when we get here, C = 0
55		 */
56.less8:		teq	len, #0			@ check for zero count
57		beq	.zero
58
59		/* we must have at least one byte. */
60		tst	dst, #1			@ dst 16-bit aligned
61		beq	.less8_aligned
62
63		/* Align dst */
64		load1b	ip
65		sub	len, len, #1
66		adcs	sum, sum, ip, lsl #8	@ update checksum
67		strb	ip, [dst], #1
68		tst	len, #6
69		beq	.less8_byteonly
70
711:		load2b	r8, ip
72		sub	len, len, #2
73		adcs	sum, sum, r8
74		strb	r8, [dst], #1
75		adcs	sum, sum, ip, lsl #8
76		strb	ip, [dst], #1
77.less8_aligned:	tst	len, #6
78		bne	1b
79.less8_byteonly:
80		tst	len, #1
81		beq	.done
82		load1b	r8
83		adcs	sum, sum, r8		@ update checksum
84		strb	r8, [dst], #1
85		b	.done
86
87FN_ENTRY
88		mov	ip, sp
89		save_regs
90		sub	fp, ip, #4
91
92		cmp	len, #8			@ Ensure that we have at least
93		blo	.less8			@ 8 bytes to copy.
94
95		adds	sum, sum, #0		@ C = 0
96		tst	dst, #3			@ Test destination alignment
97		blne	.dst_unaligned		@ align destination, return here
98
99		/*
100		 * Ok, the dst pointer is now 32bit aligned, and we know
101		 * that we must have more than 4 bytes to copy.  Note
102		 * that C contains the carry from the dst alignment above.
103		 */
104
105		tst	src, #3			@ Test source alignment
106		bne	.src_not_aligned
107
108		/* Routine for src & dst aligned */
109
110		bics	ip, len, #15
111		beq	2f
112
1131:		load4l	r4, r5, r6, r7
114		stmia	dst!, {r4, r5, r6, r7}
115		adcs	sum, sum, r4
116		adcs	sum, sum, r5
117		adcs	sum, sum, r6
118		adcs	sum, sum, r7
119		sub	ip, ip, #16
120		teq	ip, #0
121		bne	1b
122
1232:		ands	ip, len, #12
124		beq	4f
125		tst	ip, #8
126		beq	3f
127		load2l	r4, r5
128		stmia	dst!, {r4, r5}
129		adcs	sum, sum, r4
130		adcs	sum, sum, r5
131		tst	ip, #4
132		beq	4f
133
1343:		load1l	r4
135		str	r4, [dst], #4
136		adcs	sum, sum, r4
137
1384:		ands	len, len, #3
139		beq	.done
140		load1l	r4
141		tst	len, #2
142		beq	.exit
143		adcs	sum, sum, r4, lsl #16
144		strb	r4, [dst], #1
145		mov	r4, r4, lsr #8
146		strb	r4, [dst], #1
147		mov	r4, r4, lsr #8
148.exit:		tst	len, #1
149		strneb	r4, [dst], #1
150		andne	r4, r4, #255
151		adcnes	sum, sum, r4
152
153		/*
154		 * If the dst pointer was not 16-bit aligned, we
155		 * need to rotate the checksum here to get around
156		 * the inefficient byte manipulations in the
157		 * architecture independent code.
158		 */
159.done:		adc	r0, sum, #0
160		ldr	sum, [sp, #0]		@ dst
161		tst	sum, #1
162		movne	sum, r0, lsl #8
163		orrne	r0, sum, r0, lsr #24
164		load_regs	ea
165
166.src_not_aligned:
167		adc	sum, sum, #0		@ include C from dst alignment
168		and	ip, src, #3
169		bic	src, src, #3
170		load1l	r4
171		cmp	ip, #2
172		beq	.src2_aligned
173		bhi	.src3_aligned
174		mov	r4, r4, lsr #8		@ C = 0
175		bics	ip, len, #15
176		beq	2f
1771:		load4l	r5, r6, r7, r8
178		orr	r4, r4, r5, lsl #24
179		mov	r5, r5, lsr #8
180		orr	r5, r5, r6, lsl #24
181		mov	r6, r6, lsr #8
182		orr	r6, r6, r7, lsl #24
183		mov	r7, r7, lsr #8
184		orr	r7, r7, r8, lsl #24
185		stmia	dst!, {r4, r5, r6, r7}
186		adcs	sum, sum, r4
187		adcs	sum, sum, r5
188		adcs	sum, sum, r6
189		adcs	sum, sum, r7
190		mov	r4, r8, lsr #8
191		sub	ip, ip, #16
192		teq	ip, #0
193		bne	1b
1942:		ands	ip, len, #12
195		beq	4f
196		tst	ip, #8
197		beq	3f
198		load2l	r5, r6
199		orr	r4, r4, r5, lsl #24
200		mov	r5, r5, lsr #8
201		orr	r5, r5, r6, lsl #24
202		stmia	dst!, {r4, r5}
203		adcs	sum, sum, r4
204		adcs	sum, sum, r5
205		mov	r4, r6, lsr #8
206		tst	ip, #4
207		beq	4f
2083:		load1l	r5
209		orr	r4, r4, r5, lsl #24
210		str	r4, [dst], #4
211		adcs	sum, sum, r4
212		mov	r4, r5, lsr #8
2134:		ands	len, len, #3
214		beq	.done
215		tst	len, #2
216		beq	.exit
217		adcs	sum, sum, r4, lsl #16
218		strb	r4, [dst], #1
219		mov	r4, r4, lsr #8
220		strb	r4, [dst], #1
221		mov	r4, r4, lsr #8
222		b	.exit
223
224.src2_aligned:	mov	r4, r4, lsr #16
225		adds	sum, sum, #0
226		bics	ip, len, #15
227		beq	2f
2281:		load4l	r5, r6, r7, r8
229		orr	r4, r4, r5, lsl #16
230		mov	r5, r5, lsr #16
231		orr	r5, r5, r6, lsl #16
232		mov	r6, r6, lsr #16
233		orr	r6, r6, r7, lsl #16
234		mov	r7, r7, lsr #16
235		orr	r7, r7, r8, lsl #16
236		stmia	dst!, {r4, r5, r6, r7}
237		adcs	sum, sum, r4
238		adcs	sum, sum, r5
239		adcs	sum, sum, r6
240		adcs	sum, sum, r7
241		mov	r4, r8, lsr #16
242		sub	ip, ip, #16
243		teq	ip, #0
244		bne	1b
2452:		ands	ip, len, #12
246		beq	4f
247		tst	ip, #8
248		beq	3f
249		load2l	r5, r6
250		orr	r4, r4, r5, lsl #16
251		mov	r5, r5, lsr #16
252		orr	r5, r5, r6, lsl #16
253		stmia	dst!, {r4, r5}
254		adcs	sum, sum, r4
255		adcs	sum, sum, r5
256		mov	r4, r6, lsr #16
257		tst	ip, #4
258		beq	4f
2593:		load1l	r5
260		orr	r4, r4, r5, lsl #16
261		str	r4, [dst], #4
262		adcs	sum, sum, r4
263		mov	r4, r5, lsr #16
2644:		ands	len, len, #3
265		beq	.done
266		tst	len, #2
267		beq	.exit
268		adcs	sum, sum, r4, lsl #16
269		strb	r4, [dst], #1
270		mov	r4, r4, lsr #8
271		strb	r4, [dst], #1
272		tst	len, #1
273		beq	.done
274		load1b	r4
275		b	.exit
276
277.src3_aligned:	mov	r4, r4, lsr #24
278		adds	sum, sum, #0
279		bics	ip, len, #15
280		beq	2f
2811:		load4l	r5, r6, r7, r8
282		orr	r4, r4, r5, lsl #8
283		mov	r5, r5, lsr #24
284		orr	r5, r5, r6, lsl #8
285		mov	r6, r6, lsr #24
286		orr	r6, r6, r7, lsl #8
287		mov	r7, r7, lsr #24
288		orr	r7, r7, r8, lsl #8
289		stmia	dst!, {r4, r5, r6, r7}
290		adcs	sum, sum, r4
291		adcs	sum, sum, r5
292		adcs	sum, sum, r6
293		adcs	sum, sum, r7
294		mov	r4, r8, lsr #24
295		sub	ip, ip, #16
296		teq	ip, #0
297		bne	1b
2982:		ands	ip, len, #12
299		beq	4f
300		tst	ip, #8
301		beq	3f
302		load2l	r5, r6
303		orr	r4, r4, r5, lsl #8
304		mov	r5, r5, lsr #24
305		orr	r5, r5, r6, lsl #8
306		stmia	dst!, {r4, r5}
307		adcs	sum, sum, r4
308		adcs	sum, sum, r5
309		mov	r4, r6, lsr #24
310		tst	ip, #4
311		beq	4f
3123:		load1l	r5
313		orr	r4, r4, r5, lsl #8
314		str	r4, [dst], #4
315		adcs	sum, sum, r4
316		mov	r4, r5, lsr #24
3174:		ands	len, len, #3
318		beq	.done
319		tst	len, #2
320		beq	.exit
321		adcs	sum, sum, r4, lsl #16
322		strb	r4, [dst], #1
323		load1l	r4
324		strb	r4, [dst], #1
325		adcs	sum, sum, r4, lsl #24
326		mov	r4, r4, lsr #8
327		b	.exit
328