1 /*
2  * include/asm-generic/xor.h
3  *
4  * Generic optimized RAID-5 checksumming functions.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2, or (at your option)
9  * any later version.
10  *
11  * You should have received a copy of the GNU General Public License
12  * (for example /usr/src/linux/COPYING); if not, write to the Free
13  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
14  */
15 
16 #include <asm/processor.h>
17 
18 static void
xor_8regs_2(unsigned long bytes,unsigned long * p1,unsigned long * p2)19 xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
20 {
21 	long lines = bytes / (sizeof (long)) / 8;
22 
23 	do {
24 		p1[0] ^= p2[0];
25 		p1[1] ^= p2[1];
26 		p1[2] ^= p2[2];
27 		p1[3] ^= p2[3];
28 		p1[4] ^= p2[4];
29 		p1[5] ^= p2[5];
30 		p1[6] ^= p2[6];
31 		p1[7] ^= p2[7];
32 		p1 += 8;
33 		p2 += 8;
34 	} while (--lines > 0);
35 }
36 
37 static void
xor_8regs_3(unsigned long bytes,unsigned long * p1,unsigned long * p2,unsigned long * p3)38 xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
39 	    unsigned long *p3)
40 {
41 	long lines = bytes / (sizeof (long)) / 8;
42 
43 	do {
44 		p1[0] ^= p2[0] ^ p3[0];
45 		p1[1] ^= p2[1] ^ p3[1];
46 		p1[2] ^= p2[2] ^ p3[2];
47 		p1[3] ^= p2[3] ^ p3[3];
48 		p1[4] ^= p2[4] ^ p3[4];
49 		p1[5] ^= p2[5] ^ p3[5];
50 		p1[6] ^= p2[6] ^ p3[6];
51 		p1[7] ^= p2[7] ^ p3[7];
52 		p1 += 8;
53 		p2 += 8;
54 		p3 += 8;
55 	} while (--lines > 0);
56 }
57 
58 static void
xor_8regs_4(unsigned long bytes,unsigned long * p1,unsigned long * p2,unsigned long * p3,unsigned long * p4)59 xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
60 	    unsigned long *p3, unsigned long *p4)
61 {
62 	long lines = bytes / (sizeof (long)) / 8;
63 
64 	do {
65 		p1[0] ^= p2[0] ^ p3[0] ^ p4[0];
66 		p1[1] ^= p2[1] ^ p3[1] ^ p4[1];
67 		p1[2] ^= p2[2] ^ p3[2] ^ p4[2];
68 		p1[3] ^= p2[3] ^ p3[3] ^ p4[3];
69 		p1[4] ^= p2[4] ^ p3[4] ^ p4[4];
70 		p1[5] ^= p2[5] ^ p3[5] ^ p4[5];
71 		p1[6] ^= p2[6] ^ p3[6] ^ p4[6];
72 		p1[7] ^= p2[7] ^ p3[7] ^ p4[7];
73 		p1 += 8;
74 		p2 += 8;
75 		p3 += 8;
76 		p4 += 8;
77 	} while (--lines > 0);
78 }
79 
80 static void
xor_8regs_5(unsigned long bytes,unsigned long * p1,unsigned long * p2,unsigned long * p3,unsigned long * p4,unsigned long * p5)81 xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
82 	    unsigned long *p3, unsigned long *p4, unsigned long *p5)
83 {
84 	long lines = bytes / (sizeof (long)) / 8;
85 
86 	do {
87 		p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0];
88 		p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1];
89 		p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2];
90 		p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3];
91 		p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4];
92 		p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5];
93 		p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6];
94 		p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7];
95 		p1 += 8;
96 		p2 += 8;
97 		p3 += 8;
98 		p4 += 8;
99 		p5 += 8;
100 	} while (--lines > 0);
101 }
102 
103 static void
xor_32regs_2(unsigned long bytes,unsigned long * p1,unsigned long * p2)104 xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
105 {
106 	long lines = bytes / (sizeof (long)) / 8;
107 
108 	do {
109 		register long d0, d1, d2, d3, d4, d5, d6, d7;
110 		d0 = p1[0];	/* Pull the stuff into registers	*/
111 		d1 = p1[1];	/*  ... in bursts, if possible.		*/
112 		d2 = p1[2];
113 		d3 = p1[3];
114 		d4 = p1[4];
115 		d5 = p1[5];
116 		d6 = p1[6];
117 		d7 = p1[7];
118 		d0 ^= p2[0];
119 		d1 ^= p2[1];
120 		d2 ^= p2[2];
121 		d3 ^= p2[3];
122 		d4 ^= p2[4];
123 		d5 ^= p2[5];
124 		d6 ^= p2[6];
125 		d7 ^= p2[7];
126 		p1[0] = d0;	/* Store the result (in bursts)		*/
127 		p1[1] = d1;
128 		p1[2] = d2;
129 		p1[3] = d3;
130 		p1[4] = d4;
131 		p1[5] = d5;
132 		p1[6] = d6;
133 		p1[7] = d7;
134 		p1 += 8;
135 		p2 += 8;
136 	} while (--lines > 0);
137 }
138 
139 static void
xor_32regs_3(unsigned long bytes,unsigned long * p1,unsigned long * p2,unsigned long * p3)140 xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
141 	    unsigned long *p3)
142 {
143 	long lines = bytes / (sizeof (long)) / 8;
144 
145 	do {
146 		register long d0, d1, d2, d3, d4, d5, d6, d7;
147 		d0 = p1[0];	/* Pull the stuff into registers	*/
148 		d1 = p1[1];	/*  ... in bursts, if possible.		*/
149 		d2 = p1[2];
150 		d3 = p1[3];
151 		d4 = p1[4];
152 		d5 = p1[5];
153 		d6 = p1[6];
154 		d7 = p1[7];
155 		d0 ^= p2[0];
156 		d1 ^= p2[1];
157 		d2 ^= p2[2];
158 		d3 ^= p2[3];
159 		d4 ^= p2[4];
160 		d5 ^= p2[5];
161 		d6 ^= p2[6];
162 		d7 ^= p2[7];
163 		d0 ^= p3[0];
164 		d1 ^= p3[1];
165 		d2 ^= p3[2];
166 		d3 ^= p3[3];
167 		d4 ^= p3[4];
168 		d5 ^= p3[5];
169 		d6 ^= p3[6];
170 		d7 ^= p3[7];
171 		p1[0] = d0;	/* Store the result (in bursts)		*/
172 		p1[1] = d1;
173 		p1[2] = d2;
174 		p1[3] = d3;
175 		p1[4] = d4;
176 		p1[5] = d5;
177 		p1[6] = d6;
178 		p1[7] = d7;
179 		p1 += 8;
180 		p2 += 8;
181 		p3 += 8;
182 	} while (--lines > 0);
183 }
184 
185 static void
xor_32regs_4(unsigned long bytes,unsigned long * p1,unsigned long * p2,unsigned long * p3,unsigned long * p4)186 xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
187 	    unsigned long *p3, unsigned long *p4)
188 {
189 	long lines = bytes / (sizeof (long)) / 8;
190 
191 	do {
192 		register long d0, d1, d2, d3, d4, d5, d6, d7;
193 		d0 = p1[0];	/* Pull the stuff into registers	*/
194 		d1 = p1[1];	/*  ... in bursts, if possible.		*/
195 		d2 = p1[2];
196 		d3 = p1[3];
197 		d4 = p1[4];
198 		d5 = p1[5];
199 		d6 = p1[6];
200 		d7 = p1[7];
201 		d0 ^= p2[0];
202 		d1 ^= p2[1];
203 		d2 ^= p2[2];
204 		d3 ^= p2[3];
205 		d4 ^= p2[4];
206 		d5 ^= p2[5];
207 		d6 ^= p2[6];
208 		d7 ^= p2[7];
209 		d0 ^= p3[0];
210 		d1 ^= p3[1];
211 		d2 ^= p3[2];
212 		d3 ^= p3[3];
213 		d4 ^= p3[4];
214 		d5 ^= p3[5];
215 		d6 ^= p3[6];
216 		d7 ^= p3[7];
217 		d0 ^= p4[0];
218 		d1 ^= p4[1];
219 		d2 ^= p4[2];
220 		d3 ^= p4[3];
221 		d4 ^= p4[4];
222 		d5 ^= p4[5];
223 		d6 ^= p4[6];
224 		d7 ^= p4[7];
225 		p1[0] = d0;	/* Store the result (in bursts)		*/
226 		p1[1] = d1;
227 		p1[2] = d2;
228 		p1[3] = d3;
229 		p1[4] = d4;
230 		p1[5] = d5;
231 		p1[6] = d6;
232 		p1[7] = d7;
233 		p1 += 8;
234 		p2 += 8;
235 		p3 += 8;
236 		p4 += 8;
237 	} while (--lines > 0);
238 }
239 
240 static void
xor_32regs_5(unsigned long bytes,unsigned long * p1,unsigned long * p2,unsigned long * p3,unsigned long * p4,unsigned long * p5)241 xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
242 	    unsigned long *p3, unsigned long *p4, unsigned long *p5)
243 {
244 	long lines = bytes / (sizeof (long)) / 8;
245 
246 	do {
247 		register long d0, d1, d2, d3, d4, d5, d6, d7;
248 		d0 = p1[0];	/* Pull the stuff into registers	*/
249 		d1 = p1[1];	/*  ... in bursts, if possible.		*/
250 		d2 = p1[2];
251 		d3 = p1[3];
252 		d4 = p1[4];
253 		d5 = p1[5];
254 		d6 = p1[6];
255 		d7 = p1[7];
256 		d0 ^= p2[0];
257 		d1 ^= p2[1];
258 		d2 ^= p2[2];
259 		d3 ^= p2[3];
260 		d4 ^= p2[4];
261 		d5 ^= p2[5];
262 		d6 ^= p2[6];
263 		d7 ^= p2[7];
264 		d0 ^= p3[0];
265 		d1 ^= p3[1];
266 		d2 ^= p3[2];
267 		d3 ^= p3[3];
268 		d4 ^= p3[4];
269 		d5 ^= p3[5];
270 		d6 ^= p3[6];
271 		d7 ^= p3[7];
272 		d0 ^= p4[0];
273 		d1 ^= p4[1];
274 		d2 ^= p4[2];
275 		d3 ^= p4[3];
276 		d4 ^= p4[4];
277 		d5 ^= p4[5];
278 		d6 ^= p4[6];
279 		d7 ^= p4[7];
280 		d0 ^= p5[0];
281 		d1 ^= p5[1];
282 		d2 ^= p5[2];
283 		d3 ^= p5[3];
284 		d4 ^= p5[4];
285 		d5 ^= p5[5];
286 		d6 ^= p5[6];
287 		d7 ^= p5[7];
288 		p1[0] = d0;	/* Store the result (in bursts)		*/
289 		p1[1] = d1;
290 		p1[2] = d2;
291 		p1[3] = d3;
292 		p1[4] = d4;
293 		p1[5] = d5;
294 		p1[6] = d6;
295 		p1[7] = d7;
296 		p1 += 8;
297 		p2 += 8;
298 		p3 += 8;
299 		p4 += 8;
300 		p5 += 8;
301 	} while (--lines > 0);
302 }
303 
304 static void
xor_8regs_p_2(unsigned long bytes,unsigned long * p1,unsigned long * p2)305 xor_8regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
306 {
307 	long lines = bytes / (sizeof (long)) / 8 - 1;
308 	prefetchw(p1);
309 	prefetch(p2);
310 
311 	do {
312 		prefetchw(p1+8);
313 		prefetch(p2+8);
314  once_more:
315 		p1[0] ^= p2[0];
316 		p1[1] ^= p2[1];
317 		p1[2] ^= p2[2];
318 		p1[3] ^= p2[3];
319 		p1[4] ^= p2[4];
320 		p1[5] ^= p2[5];
321 		p1[6] ^= p2[6];
322 		p1[7] ^= p2[7];
323 		p1 += 8;
324 		p2 += 8;
325 	} while (--lines > 0);
326 	if (lines == 0)
327 		goto once_more;
328 }
329 
330 static void
xor_8regs_p_3(unsigned long bytes,unsigned long * p1,unsigned long * p2,unsigned long * p3)331 xor_8regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
332 	    unsigned long *p3)
333 {
334 	long lines = bytes / (sizeof (long)) / 8 - 1;
335 	prefetchw(p1);
336 	prefetch(p2);
337 	prefetch(p3);
338 
339 	do {
340 		prefetchw(p1+8);
341 		prefetch(p2+8);
342 		prefetch(p3+8);
343  once_more:
344 		p1[0] ^= p2[0] ^ p3[0];
345 		p1[1] ^= p2[1] ^ p3[1];
346 		p1[2] ^= p2[2] ^ p3[2];
347 		p1[3] ^= p2[3] ^ p3[3];
348 		p1[4] ^= p2[4] ^ p3[4];
349 		p1[5] ^= p2[5] ^ p3[5];
350 		p1[6] ^= p2[6] ^ p3[6];
351 		p1[7] ^= p2[7] ^ p3[7];
352 		p1 += 8;
353 		p2 += 8;
354 		p3 += 8;
355 	} while (--lines > 0);
356 	if (lines == 0)
357 		goto once_more;
358 }
359 
360 static void
xor_8regs_p_4(unsigned long bytes,unsigned long * p1,unsigned long * p2,unsigned long * p3,unsigned long * p4)361 xor_8regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
362 	    unsigned long *p3, unsigned long *p4)
363 {
364 	long lines = bytes / (sizeof (long)) / 8 - 1;
365 
366 	prefetchw(p1);
367 	prefetch(p2);
368 	prefetch(p3);
369 	prefetch(p4);
370 
371 	do {
372 		prefetchw(p1+8);
373 		prefetch(p2+8);
374 		prefetch(p3+8);
375 		prefetch(p4+8);
376  once_more:
377 		p1[0] ^= p2[0] ^ p3[0] ^ p4[0];
378 		p1[1] ^= p2[1] ^ p3[1] ^ p4[1];
379 		p1[2] ^= p2[2] ^ p3[2] ^ p4[2];
380 		p1[3] ^= p2[3] ^ p3[3] ^ p4[3];
381 		p1[4] ^= p2[4] ^ p3[4] ^ p4[4];
382 		p1[5] ^= p2[5] ^ p3[5] ^ p4[5];
383 		p1[6] ^= p2[6] ^ p3[6] ^ p4[6];
384 		p1[7] ^= p2[7] ^ p3[7] ^ p4[7];
385 		p1 += 8;
386 		p2 += 8;
387 		p3 += 8;
388 		p4 += 8;
389 	} while (--lines > 0);
390 	if (lines == 0)
391 		goto once_more;
392 }
393 
394 static void
xor_8regs_p_5(unsigned long bytes,unsigned long * p1,unsigned long * p2,unsigned long * p3,unsigned long * p4,unsigned long * p5)395 xor_8regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
396 	    unsigned long *p3, unsigned long *p4, unsigned long *p5)
397 {
398 	long lines = bytes / (sizeof (long)) / 8 - 1;
399 
400 	prefetchw(p1);
401 	prefetch(p2);
402 	prefetch(p3);
403 	prefetch(p4);
404 	prefetch(p5);
405 
406 	do {
407 		prefetchw(p1+8);
408 		prefetch(p2+8);
409 		prefetch(p3+8);
410 		prefetch(p4+8);
411 		prefetch(p5+8);
412  once_more:
413 		p1[0] ^= p2[0] ^ p3[0] ^ p4[0] ^ p5[0];
414 		p1[1] ^= p2[1] ^ p3[1] ^ p4[1] ^ p5[1];
415 		p1[2] ^= p2[2] ^ p3[2] ^ p4[2] ^ p5[2];
416 		p1[3] ^= p2[3] ^ p3[3] ^ p4[3] ^ p5[3];
417 		p1[4] ^= p2[4] ^ p3[4] ^ p4[4] ^ p5[4];
418 		p1[5] ^= p2[5] ^ p3[5] ^ p4[5] ^ p5[5];
419 		p1[6] ^= p2[6] ^ p3[6] ^ p4[6] ^ p5[6];
420 		p1[7] ^= p2[7] ^ p3[7] ^ p4[7] ^ p5[7];
421 		p1 += 8;
422 		p2 += 8;
423 		p3 += 8;
424 		p4 += 8;
425 		p5 += 8;
426 	} while (--lines > 0);
427 	if (lines == 0)
428 		goto once_more;
429 }
430 
431 static void
xor_32regs_p_2(unsigned long bytes,unsigned long * p1,unsigned long * p2)432 xor_32regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
433 {
434 	long lines = bytes / (sizeof (long)) / 8 - 1;
435 
436 	prefetchw(p1);
437 	prefetch(p2);
438 
439 	do {
440 		register long d0, d1, d2, d3, d4, d5, d6, d7;
441 
442 		prefetchw(p1+8);
443 		prefetch(p2+8);
444  once_more:
445 		d0 = p1[0];	/* Pull the stuff into registers	*/
446 		d1 = p1[1];	/*  ... in bursts, if possible.		*/
447 		d2 = p1[2];
448 		d3 = p1[3];
449 		d4 = p1[4];
450 		d5 = p1[5];
451 		d6 = p1[6];
452 		d7 = p1[7];
453 		d0 ^= p2[0];
454 		d1 ^= p2[1];
455 		d2 ^= p2[2];
456 		d3 ^= p2[3];
457 		d4 ^= p2[4];
458 		d5 ^= p2[5];
459 		d6 ^= p2[6];
460 		d7 ^= p2[7];
461 		p1[0] = d0;	/* Store the result (in bursts)		*/
462 		p1[1] = d1;
463 		p1[2] = d2;
464 		p1[3] = d3;
465 		p1[4] = d4;
466 		p1[5] = d5;
467 		p1[6] = d6;
468 		p1[7] = d7;
469 		p1 += 8;
470 		p2 += 8;
471 	} while (--lines > 0);
472 	if (lines == 0)
473 		goto once_more;
474 }
475 
476 static void
xor_32regs_p_3(unsigned long bytes,unsigned long * p1,unsigned long * p2,unsigned long * p3)477 xor_32regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
478 	    unsigned long *p3)
479 {
480 	long lines = bytes / (sizeof (long)) / 8 - 1;
481 
482 	prefetchw(p1);
483 	prefetch(p2);
484 	prefetch(p3);
485 
486 	do {
487 		register long d0, d1, d2, d3, d4, d5, d6, d7;
488 
489 		prefetchw(p1+8);
490 		prefetch(p2+8);
491 		prefetch(p3+8);
492  once_more:
493 		d0 = p1[0];	/* Pull the stuff into registers	*/
494 		d1 = p1[1];	/*  ... in bursts, if possible.		*/
495 		d2 = p1[2];
496 		d3 = p1[3];
497 		d4 = p1[4];
498 		d5 = p1[5];
499 		d6 = p1[6];
500 		d7 = p1[7];
501 		d0 ^= p2[0];
502 		d1 ^= p2[1];
503 		d2 ^= p2[2];
504 		d3 ^= p2[3];
505 		d4 ^= p2[4];
506 		d5 ^= p2[5];
507 		d6 ^= p2[6];
508 		d7 ^= p2[7];
509 		d0 ^= p3[0];
510 		d1 ^= p3[1];
511 		d2 ^= p3[2];
512 		d3 ^= p3[3];
513 		d4 ^= p3[4];
514 		d5 ^= p3[5];
515 		d6 ^= p3[6];
516 		d7 ^= p3[7];
517 		p1[0] = d0;	/* Store the result (in bursts)		*/
518 		p1[1] = d1;
519 		p1[2] = d2;
520 		p1[3] = d3;
521 		p1[4] = d4;
522 		p1[5] = d5;
523 		p1[6] = d6;
524 		p1[7] = d7;
525 		p1 += 8;
526 		p2 += 8;
527 		p3 += 8;
528 	} while (--lines > 0);
529 	if (lines == 0)
530 		goto once_more;
531 }
532 
533 static void
xor_32regs_p_4(unsigned long bytes,unsigned long * p1,unsigned long * p2,unsigned long * p3,unsigned long * p4)534 xor_32regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
535 	    unsigned long *p3, unsigned long *p4)
536 {
537 	long lines = bytes / (sizeof (long)) / 8 - 1;
538 
539 	prefetchw(p1);
540 	prefetch(p2);
541 	prefetch(p3);
542 	prefetch(p4);
543 
544 	do {
545 		register long d0, d1, d2, d3, d4, d5, d6, d7;
546 
547 		prefetchw(p1+8);
548 		prefetch(p2+8);
549 		prefetch(p3+8);
550 		prefetch(p4+8);
551  once_more:
552 		d0 = p1[0];	/* Pull the stuff into registers	*/
553 		d1 = p1[1];	/*  ... in bursts, if possible.		*/
554 		d2 = p1[2];
555 		d3 = p1[3];
556 		d4 = p1[4];
557 		d5 = p1[5];
558 		d6 = p1[6];
559 		d7 = p1[7];
560 		d0 ^= p2[0];
561 		d1 ^= p2[1];
562 		d2 ^= p2[2];
563 		d3 ^= p2[3];
564 		d4 ^= p2[4];
565 		d5 ^= p2[5];
566 		d6 ^= p2[6];
567 		d7 ^= p2[7];
568 		d0 ^= p3[0];
569 		d1 ^= p3[1];
570 		d2 ^= p3[2];
571 		d3 ^= p3[3];
572 		d4 ^= p3[4];
573 		d5 ^= p3[5];
574 		d6 ^= p3[6];
575 		d7 ^= p3[7];
576 		d0 ^= p4[0];
577 		d1 ^= p4[1];
578 		d2 ^= p4[2];
579 		d3 ^= p4[3];
580 		d4 ^= p4[4];
581 		d5 ^= p4[5];
582 		d6 ^= p4[6];
583 		d7 ^= p4[7];
584 		p1[0] = d0;	/* Store the result (in bursts)		*/
585 		p1[1] = d1;
586 		p1[2] = d2;
587 		p1[3] = d3;
588 		p1[4] = d4;
589 		p1[5] = d5;
590 		p1[6] = d6;
591 		p1[7] = d7;
592 		p1 += 8;
593 		p2 += 8;
594 		p3 += 8;
595 		p4 += 8;
596 	} while (--lines > 0);
597 	if (lines == 0)
598 		goto once_more;
599 }
600 
601 static void
xor_32regs_p_5(unsigned long bytes,unsigned long * p1,unsigned long * p2,unsigned long * p3,unsigned long * p4,unsigned long * p5)602 xor_32regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
603 	    unsigned long *p3, unsigned long *p4, unsigned long *p5)
604 {
605 	long lines = bytes / (sizeof (long)) / 8 - 1;
606 
607 	prefetchw(p1);
608 	prefetch(p2);
609 	prefetch(p3);
610 	prefetch(p4);
611 	prefetch(p5);
612 
613 	do {
614 		register long d0, d1, d2, d3, d4, d5, d6, d7;
615 
616 		prefetchw(p1+8);
617 		prefetch(p2+8);
618 		prefetch(p3+8);
619 		prefetch(p4+8);
620 		prefetch(p5+8);
621  once_more:
622 		d0 = p1[0];	/* Pull the stuff into registers	*/
623 		d1 = p1[1];	/*  ... in bursts, if possible.		*/
624 		d2 = p1[2];
625 		d3 = p1[3];
626 		d4 = p1[4];
627 		d5 = p1[5];
628 		d6 = p1[6];
629 		d7 = p1[7];
630 		d0 ^= p2[0];
631 		d1 ^= p2[1];
632 		d2 ^= p2[2];
633 		d3 ^= p2[3];
634 		d4 ^= p2[4];
635 		d5 ^= p2[5];
636 		d6 ^= p2[6];
637 		d7 ^= p2[7];
638 		d0 ^= p3[0];
639 		d1 ^= p3[1];
640 		d2 ^= p3[2];
641 		d3 ^= p3[3];
642 		d4 ^= p3[4];
643 		d5 ^= p3[5];
644 		d6 ^= p3[6];
645 		d7 ^= p3[7];
646 		d0 ^= p4[0];
647 		d1 ^= p4[1];
648 		d2 ^= p4[2];
649 		d3 ^= p4[3];
650 		d4 ^= p4[4];
651 		d5 ^= p4[5];
652 		d6 ^= p4[6];
653 		d7 ^= p4[7];
654 		d0 ^= p5[0];
655 		d1 ^= p5[1];
656 		d2 ^= p5[2];
657 		d3 ^= p5[3];
658 		d4 ^= p5[4];
659 		d5 ^= p5[5];
660 		d6 ^= p5[6];
661 		d7 ^= p5[7];
662 		p1[0] = d0;	/* Store the result (in bursts)		*/
663 		p1[1] = d1;
664 		p1[2] = d2;
665 		p1[3] = d3;
666 		p1[4] = d4;
667 		p1[5] = d5;
668 		p1[6] = d6;
669 		p1[7] = d7;
670 		p1 += 8;
671 		p2 += 8;
672 		p3 += 8;
673 		p4 += 8;
674 		p5 += 8;
675 	} while (--lines > 0);
676 	if (lines == 0)
677 		goto once_more;
678 }
679 
680 static struct xor_block_template xor_block_8regs = {
681 	.name = "8regs",
682 	.do_2 = xor_8regs_2,
683 	.do_3 = xor_8regs_3,
684 	.do_4 = xor_8regs_4,
685 	.do_5 = xor_8regs_5,
686 };
687 
688 static struct xor_block_template xor_block_32regs = {
689 	.name = "32regs",
690 	.do_2 = xor_32regs_2,
691 	.do_3 = xor_32regs_3,
692 	.do_4 = xor_32regs_4,
693 	.do_5 = xor_32regs_5,
694 };
695 
696 static struct xor_block_template xor_block_8regs_p = {
697 	.name = "8regs_prefetch",
698 	.do_2 = xor_8regs_p_2,
699 	.do_3 = xor_8regs_p_3,
700 	.do_4 = xor_8regs_p_4,
701 	.do_5 = xor_8regs_p_5,
702 };
703 
704 static struct xor_block_template xor_block_32regs_p = {
705 	.name = "32regs_prefetch",
706 	.do_2 = xor_32regs_p_2,
707 	.do_3 = xor_32regs_p_3,
708 	.do_4 = xor_32regs_p_4,
709 	.do_5 = xor_32regs_p_5,
710 };
711 
712 #define XOR_TRY_TEMPLATES			\
713 	do {					\
714 		xor_speed(&xor_block_8regs);	\
715 		xor_speed(&xor_block_8regs_p);	\
716 		xor_speed(&xor_block_32regs);	\
717 		xor_speed(&xor_block_32regs_p);	\
718 	} while (0)
719