1 /*
2  * include/asm-generic/xor.h
3  *
4  * Generic optimized RAID-5 checksumming functions.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2, or (at your option)
9  * any later version.
10  *
11  * You should have received a copy of the GNU General Public License
12  * (for example /usr/src/linux/COPYING); if not, write to the Free
13  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
14  */
15 
16 static void
xor_8regs_2(unsigned long bytes,unsigned long * p1,unsigned long * p2)17 xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
18 {
19 	long lines = bytes / (sizeof (long)) / 8;
20 
21 	do {
22 		p1[0] ^= p2[0];
23 		p1[1] ^= p2[1];
24 		p1[2] ^= p2[2];
25 		p1[3] ^= p2[3];
26 		p1[4] ^= p2[4];
27 		p1[5] ^= p2[5];
28 		p1[6] ^= p2[6];
29 		p1[7] ^= p2[7];
30 		p1 += 8;
31 		p2 += 8;
32 	} while (--lines > 0);
33 }
34 
35 static void
xor_8regs_3(unsigned long bytes,unsigned long * p1,unsigned long * p2,unsigned long * p3)36 xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
37 	    unsigned long *p3)
38 {
39 	long lines = bytes / (sizeof (long)) / 8;
40 
41 	do {
42 		p1[0] ^= p2[0] ^ p3[0];
43 		p1[1] ^= p2[1] ^ p3[1];
44 		p1[2] ^= p2[2] ^ p3[2];
45 		p1[3] ^= p2[3] ^ p3[3];
46 		p1[4] ^= p2[4] ^ p3[4];
47 		p1[5] ^= p2[5] ^ p3[5];
48 		p1[6] ^= p2[6] ^ p3[6];
49 		p1[7] ^= p2[7] ^ p3[7];
50 		p1 += 8;
51 		p2 += 8;
52 		p3 += 8;
53 	} while (--lines > 0);
54 }
55 
56 static void
xor_8regs_4(unsigned long bytes,unsigned long * p1,unsigned long * p2,unsigned long * p3,unsigned long * p4)57 xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
58 	    unsigned long *p3, unsigned long *p4)
59 {
60 	long lines = bytes / (sizeof (long)) / 8;
61 
62 	do {
63 		p1[0] ^= p2[0] ^ p3[0] ^ p4[0];
64 		p1[1] ^= p2[1] ^ p3[1] ^ p4[1];
65 		p1[2] ^= p2[2] ^ p3[2] ^ p4[2];
66 		p1[3] ^= p2[3] ^ p3[3] ^ p4[3];
67 		p1[4] ^= p2[4] ^ p3[4] ^ p4[4];
68 		p1[5] ^= p2[5] ^ p3[5] ^ p4[5];
69 		p1[6] ^= p2[6] ^ p3[6] ^ p4[6];
70 		p1[7] ^= p2[7] ^ p3[7] ^ p4[7];
71 		p1 += 8;
72 		p2 += 8;
73 		p3 += 8;
74 		p4 += 8;
75 	} while (--lines > 0);
76 }
77 
78 static void
xor_8regs_5(unsigned long bytes,unsigned long * p1,unsigned long * p2,unsigned long * p3,unsigned long * p4,unsigned long * p5)79 xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
80 	    unsigned long *p3, unsigned long *p4, unsigned long *p5)
81 {
82 	long lines = bytes / (sizeof (long)) / 8;
83 
84 	do {
85 		p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0];
86 		p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1];
87 		p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2];
88 		p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3];
89 		p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4];
90 		p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5];
91 		p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6];
92 		p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7];
93 		p1 += 8;
94 		p2 += 8;
95 		p3 += 8;
96 		p4 += 8;
97 		p5 += 8;
98 	} while (--lines > 0);
99 }
100 
101 static void
xor_32regs_2(unsigned long bytes,unsigned long * p1,unsigned long * p2)102 xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
103 {
104 	long lines = bytes / (sizeof (long)) / 8;
105 
106 	do {
107 		register long d0, d1, d2, d3, d4, d5, d6, d7;
108 		d0 = p1[0];	/* Pull the stuff into registers	*/
109 		d1 = p1[1];	/*  ... in bursts, if possible.		*/
110 		d2 = p1[2];
111 		d3 = p1[3];
112 		d4 = p1[4];
113 		d5 = p1[5];
114 		d6 = p1[6];
115 		d7 = p1[7];
116 		d0 ^= p2[0];
117 		d1 ^= p2[1];
118 		d2 ^= p2[2];
119 		d3 ^= p2[3];
120 		d4 ^= p2[4];
121 		d5 ^= p2[5];
122 		d6 ^= p2[6];
123 		d7 ^= p2[7];
124 		p1[0] = d0;	/* Store the result (in burts)		*/
125 		p1[1] = d1;
126 		p1[2] = d2;
127 		p1[3] = d3;
128 		p1[4] = d4;
129 		p1[5] = d5;
130 		p1[6] = d6;
131 		p1[7] = d7;
132 		p1 += 8;
133 		p2 += 8;
134 	} while (--lines > 0);
135 }
136 
137 static void
xor_32regs_3(unsigned long bytes,unsigned long * p1,unsigned long * p2,unsigned long * p3)138 xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
139 	    unsigned long *p3)
140 {
141 	long lines = bytes / (sizeof (long)) / 8;
142 
143 	do {
144 		register long d0, d1, d2, d3, d4, d5, d6, d7;
145 		d0 = p1[0];	/* Pull the stuff into registers	*/
146 		d1 = p1[1];	/*  ... in bursts, if possible.		*/
147 		d2 = p1[2];
148 		d3 = p1[3];
149 		d4 = p1[4];
150 		d5 = p1[5];
151 		d6 = p1[6];
152 		d7 = p1[7];
153 		d0 ^= p2[0];
154 		d1 ^= p2[1];
155 		d2 ^= p2[2];
156 		d3 ^= p2[3];
157 		d4 ^= p2[4];
158 		d5 ^= p2[5];
159 		d6 ^= p2[6];
160 		d7 ^= p2[7];
161 		d0 ^= p3[0];
162 		d1 ^= p3[1];
163 		d2 ^= p3[2];
164 		d3 ^= p3[3];
165 		d4 ^= p3[4];
166 		d5 ^= p3[5];
167 		d6 ^= p3[6];
168 		d7 ^= p3[7];
169 		p1[0] = d0;	/* Store the result (in burts)		*/
170 		p1[1] = d1;
171 		p1[2] = d2;
172 		p1[3] = d3;
173 		p1[4] = d4;
174 		p1[5] = d5;
175 		p1[6] = d6;
176 		p1[7] = d7;
177 		p1 += 8;
178 		p2 += 8;
179 		p3 += 8;
180 	} while (--lines > 0);
181 }
182 
183 static void
xor_32regs_4(unsigned long bytes,unsigned long * p1,unsigned long * p2,unsigned long * p3,unsigned long * p4)184 xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
185 	    unsigned long *p3, unsigned long *p4)
186 {
187 	long lines = bytes / (sizeof (long)) / 8;
188 
189 	do {
190 		register long d0, d1, d2, d3, d4, d5, d6, d7;
191 		d0 = p1[0];	/* Pull the stuff into registers	*/
192 		d1 = p1[1];	/*  ... in bursts, if possible.		*/
193 		d2 = p1[2];
194 		d3 = p1[3];
195 		d4 = p1[4];
196 		d5 = p1[5];
197 		d6 = p1[6];
198 		d7 = p1[7];
199 		d0 ^= p2[0];
200 		d1 ^= p2[1];
201 		d2 ^= p2[2];
202 		d3 ^= p2[3];
203 		d4 ^= p2[4];
204 		d5 ^= p2[5];
205 		d6 ^= p2[6];
206 		d7 ^= p2[7];
207 		d0 ^= p3[0];
208 		d1 ^= p3[1];
209 		d2 ^= p3[2];
210 		d3 ^= p3[3];
211 		d4 ^= p3[4];
212 		d5 ^= p3[5];
213 		d6 ^= p3[6];
214 		d7 ^= p3[7];
215 		d0 ^= p4[0];
216 		d1 ^= p4[1];
217 		d2 ^= p4[2];
218 		d3 ^= p4[3];
219 		d4 ^= p4[4];
220 		d5 ^= p4[5];
221 		d6 ^= p4[6];
222 		d7 ^= p4[7];
223 		p1[0] = d0;	/* Store the result (in burts)		*/
224 		p1[1] = d1;
225 		p1[2] = d2;
226 		p1[3] = d3;
227 		p1[4] = d4;
228 		p1[5] = d5;
229 		p1[6] = d6;
230 		p1[7] = d7;
231 		p1 += 8;
232 		p2 += 8;
233 		p3 += 8;
234 		p4 += 8;
235 	} while (--lines > 0);
236 }
237 
238 static void
xor_32regs_5(unsigned long bytes,unsigned long * p1,unsigned long * p2,unsigned long * p3,unsigned long * p4,unsigned long * p5)239 xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
240 	    unsigned long *p3, unsigned long *p4, unsigned long *p5)
241 {
242 	long lines = bytes / (sizeof (long)) / 8;
243 
244 	do {
245 		register long d0, d1, d2, d3, d4, d5, d6, d7;
246 		d0 = p1[0];	/* Pull the stuff into registers	*/
247 		d1 = p1[1];	/*  ... in bursts, if possible.		*/
248 		d2 = p1[2];
249 		d3 = p1[3];
250 		d4 = p1[4];
251 		d5 = p1[5];
252 		d6 = p1[6];
253 		d7 = p1[7];
254 		d0 ^= p2[0];
255 		d1 ^= p2[1];
256 		d2 ^= p2[2];
257 		d3 ^= p2[3];
258 		d4 ^= p2[4];
259 		d5 ^= p2[5];
260 		d6 ^= p2[6];
261 		d7 ^= p2[7];
262 		d0 ^= p3[0];
263 		d1 ^= p3[1];
264 		d2 ^= p3[2];
265 		d3 ^= p3[3];
266 		d4 ^= p3[4];
267 		d5 ^= p3[5];
268 		d6 ^= p3[6];
269 		d7 ^= p3[7];
270 		d0 ^= p4[0];
271 		d1 ^= p4[1];
272 		d2 ^= p4[2];
273 		d3 ^= p4[3];
274 		d4 ^= p4[4];
275 		d5 ^= p4[5];
276 		d6 ^= p4[6];
277 		d7 ^= p4[7];
278 		d0 ^= p5[0];
279 		d1 ^= p5[1];
280 		d2 ^= p5[2];
281 		d3 ^= p5[3];
282 		d4 ^= p5[4];
283 		d5 ^= p5[5];
284 		d6 ^= p5[6];
285 		d7 ^= p5[7];
286 		p1[0] = d0;	/* Store the result (in burts)		*/
287 		p1[1] = d1;
288 		p1[2] = d2;
289 		p1[3] = d3;
290 		p1[4] = d4;
291 		p1[5] = d5;
292 		p1[6] = d6;
293 		p1[7] = d7;
294 		p1 += 8;
295 		p2 += 8;
296 		p3 += 8;
297 		p4 += 8;
298 		p5 += 8;
299 	} while (--lines > 0);
300 }
301 
302 static struct xor_block_template xor_block_8regs = {
303 	name: "8regs",
304 	do_2: xor_8regs_2,
305 	do_3: xor_8regs_3,
306 	do_4: xor_8regs_4,
307 	do_5: xor_8regs_5,
308 };
309 
310 static struct xor_block_template xor_block_32regs = {
311 	name: "32regs",
312 	do_2: xor_32regs_2,
313 	do_3: xor_32regs_3,
314 	do_4: xor_32regs_4,
315 	do_5: xor_32regs_5,
316 };
317 
318 #define XOR_TRY_TEMPLATES			\
319 	do {					\
320 		xor_speed(&xor_block_8regs);	\
321 		xor_speed(&xor_block_32regs);	\
322 	} while (0)
323