1/* 2 * This file contains assembly-language implementations 3 * of IP-style 1's complement checksum routines. 4 * 5 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). 13 */ 14 15#include <linux/sys.h> 16#include <asm/processor.h> 17#include <asm/errno.h> 18#include <asm/ppc_asm.tmpl> 19 20 .text 21 22/* 23 * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header 24 * len is in words and is always >= 5. 25 * 26 * In practice len == 5, but this is not guaranteed. So this code does not 27 * attempt to use doubleword instructions. 28 */ 29_GLOBAL(ip_fast_csum) 30 lwz r0,0(r3) 31 lwzu r5,4(r3) 32 addic. r4,r4,-2 33 addc r0,r0,r5 34 mtctr r4 35 blelr- 361: lwzu r4,4(r3) 37 adde r0,r0,r4 38 bdnz 1b 39 addze r0,r0 /* add in final carry */ 40 rldicl r4,r0,32,0 /* fold two 32-bit halves together */ 41 add r0,r0,r4 42 srdi r0,r0,32 43 rlwinm r3,r0,16,0,31 /* fold two halves together */ 44 add r3,r0,r3 45 not r3,r3 46 srwi r3,r3,16 47 blr 48 49/* 50 * Compute checksum of TCP or UDP pseudo-header: 51 * csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum) 52 * No real gain trying to do this specially for 64 bit, but 53 * the 32 bit addition may spill into the upper bits of 54 * the doubleword so we still must fold it down from 64. 55 */ 56_GLOBAL(csum_tcpudp_magic) 57 rlwimi r5,r6,16,0,15 /* put proto in upper half of len */ 58 addc r0,r3,r4 /* add 4 32-bit words together */ 59 adde r0,r0,r5 60 adde r0,r0,r7 61 rldicl r4,r0,32,0 /* fold 64 bit value */ 62 add r0,r4,r0 63 srdi r0,r0,32 64 rlwinm r3,r0,16,0,31 /* fold two halves together */ 65 add r3,r0,r3 66 not r3,r3 67 srwi r3,r3,16 68 blr 69 70/* 71 * Computes the checksum of a memory block at buff, length len, 72 * and adds in "sum" (32-bit). 73 * 74 * This code assumes at least halfword alignment, though the length 75 * can be any number of bytes. The sum is accumulated in r5. 76 * 77 * csum_partial(r3=buff, r4=len, r5=sum) 78 */ 79_GLOBAL(csum_partial) 80 subi r3,r3,8 /* we'll offset by 8 for the loads */ 81 srdi. r6,r4,3 /* divide by 8 for doubleword count */ 82 addic r5,r5,0 /* clear carry */ 83 beq 3f /* if we're doing < 8 bytes */ 84 andi. r0,r3,2 /* aligned on a word boundary already? */ 85 beq+ 1f 86 lhz r6,8(r3) /* do 2 bytes to get aligned */ 87 addi r3,r3,2 88 subi r4,r4,2 89 addc r5,r5,r6 90 srdi. r6,r4,3 /* recompute number of doublewords */ 91 beq 3f /* any left? */ 921: mtctr r6 932: ldu r6,8(r3) /* main sum loop */ 94 adde r5,r5,r6 95 bdnz 2b 96 andi. r4,r4,7 /* compute bytes left to sum after doublewords */ 973: cmpi 0,r4,4 /* is at least a full word left? */ 98 blt 4f 99 lwz r6,8(r3) /* sum this word */ 100 addi r3,r3,4 101 subi r4,r4,4 102 adde r5,r5,r6 1034: cmpi 0,r4,2 /* is at least a halfword left? */ 104 blt+ 5f 105 lhz r6,8(r3) /* sum this halfword */ 106 addi r3,r3,2 107 subi r4,r4,2 108 adde r5,r5,r6 1095: cmpi 0,r4,1 /* is at least a byte left? */ 110 bne+ 6f 111 lbz r6,8(r3) /* sum this byte */ 112 slwi r6,r6,8 /* this byte is assumed to be the upper byte of a halfword */ 113 adde r5,r5,r6 1146: addze r5,r5 /* add in final carry */ 115 rldicl r4,r5,32,0 /* fold two 32-bit halves together */ 116 add r3,r4,r5 117 srdi r3,r3,32 118 blr 119 120/* 121 * Computes the checksum of a memory block at src, length len, 122 * and adds in "sum" (32-bit), while copying the block to dst. 123 * If an access exception occurs on src or dst, it stores -EFAULT 124 * to *src_err or *dst_err respectively, and (for an error on 125 * src) zeroes the rest of dst. 126 * 127 * This code needs to be reworked to take advantage of 64 bit sum+copy. 128 * However, due to tokenring halfword alignment problems this will be very 129 * tricky. For now we'll leave it until we instrument it somehow. 130 * 131 * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err) 132 */ 133_GLOBAL(csum_partial_copy_generic) 134 addic r0,r6,0 135 subi r3,r3,4 136 subi r4,r4,4 137 srwi. r6,r5,2 138 beq 3f /* if we're doing < 4 bytes */ 139 andi. r9,r4,2 /* Align dst to longword boundary */ 140 beq+ 1f 14181: lhz r6,4(r3) /* do 2 bytes to get aligned */ 142 addi r3,r3,2 143 subi r5,r5,2 14491: sth r6,4(r4) 145 addi r4,r4,2 146 addc r0,r0,r6 147 srwi. r6,r5,2 /* # words to do */ 148 beq 3f 1491: mtctr r6 15082: lwzu r6,4(r3) /* the bdnz has zero overhead, so it should */ 15192: stwu r6,4(r4) /* be unnecessary to unroll this loop */ 152 adde r0,r0,r6 153 bdnz 82b 154 andi. r5,r5,3 1553: cmpi 0,r5,2 156 blt+ 4f 15783: lhz r6,4(r3) 158 addi r3,r3,2 159 subi r5,r5,2 16093: sth r6,4(r4) 161 addi r4,r4,2 162 adde r0,r0,r6 1634: cmpi 0,r5,1 164 bne+ 5f 16584: lbz r6,4(r3) 16694: stb r6,4(r4) 167 slwi r6,r6,8 /* Upper byte of word */ 168 adde r0,r0,r6 1695: addze r3,r0 /* add in final carry (unlikely with 64-bit regs) */ 170 rldicl r4,r3,32,0 /* fold 64 bit value */ 171 add r3,r4,r3 172 srdi r3,r3,32 173 blr 174 175/* These shouldn't go in the fixup section, since that would 176 cause the ex_table addresses to get out of order. */ 177 178 .globl src_error_1 179src_error_1: 180 li r6,0 181 subi r5,r5,2 18295: sth r6,4(r4) 183 addi r4,r4,2 184 srwi. r6,r5,2 185 beq 3f 186 mtctr r6 187 .globl src_error_2 188src_error_2: 189 li r6,0 19096: stwu r6,4(r4) 191 bdnz 96b 1923: andi. r5,r5,3 193 beq src_error 194 .globl src_error_3 195src_error_3: 196 li r6,0 197 mtctr r5 198 addi r4,r4,3 19997: stbu r6,1(r4) 200 bdnz 97b 201 .globl src_error 202src_error: 203 cmpi 0,r7,0 204 beq 1f 205 li r6,-EFAULT 206 stw r6,0(r7) 2071: addze r3,r0 208 blr 209 210 .globl dst_error 211dst_error: 212 cmpi 0,r8,0 213 beq 1f 214 li r6,-EFAULT 215 stw r6,0(r8) 2161: addze r3,r0 217 blr 218 219.section __ex_table,"a" 220 .align 3 221 .llong 81b,src_error_1 222 .llong 91b,dst_error 223 .llong 82b,src_error_2 224 .llong 92b,dst_error 225 .llong 83b,src_error_3 226 .llong 93b,dst_error 227 .llong 84b,src_error_3 228 .llong 94b,dst_error 229 .llong 95b,dst_error 230 .llong 96b,dst_error 231 .llong 97b,dst_error 232