1/* 2 * linux/arch/arm/lib/csumpartialcopygeneric.S 3 * 4 * Copyright (C) 1995-2001 Russell King 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License version 2 as 8 * published by the Free Software Foundation. 9 */ 10 11/* 12 * unsigned int 13 * csum_partial_copy_xxx(const char *src, char *dst, int len, int sum, ) 14 * r0 = src, r1 = dst, r2 = len, r3 = sum 15 * Returns : r0 = checksum 16 * 17 * Note that 'tst' and 'teq' preserve the carry flag. 18 */ 19 20src .req r0 21dst .req r1 22len .req r2 23sum .req r3 24 25.zero: mov r0, sum 26 load_regs ea 27 28 /* 29 * Align an unaligned destination pointer. We know that 30 * we have >= 8 bytes here, so we don't need to check 31 * the length. Note that the source pointer hasn't been 32 * aligned yet. 33 */ 34.dst_unaligned: tst dst, #1 35 beq .dst_16bit 36 37 load1b ip 38 sub len, len, #1 39 adcs sum, sum, ip, lsl #8 @ update checksum 40 strb ip, [dst], #1 41 tst dst, #2 42 moveq pc, lr @ dst is now 32bit aligned 43 44.dst_16bit: load2b r8, ip 45 sub len, len, #2 46 adcs sum, sum, r8 47 strb r8, [dst], #1 48 adcs sum, sum, ip, lsl #8 49 strb ip, [dst], #1 50 mov pc, lr @ dst is now 32bit aligned 51 52 /* 53 * Handle 0 to 7 bytes, with any alignment of source and 54 * destination pointers. Note that when we get here, C = 0 55 */ 56.less8: teq len, #0 @ check for zero count 57 beq .zero 58 59 /* we must have at least one byte. */ 60 tst dst, #1 @ dst 16-bit aligned 61 beq .less8_aligned 62 63 /* Align dst */ 64 load1b ip 65 sub len, len, #1 66 adcs sum, sum, ip, lsl #8 @ update checksum 67 strb ip, [dst], #1 68 tst len, #6 69 beq .less8_byteonly 70 711: load2b r8, ip 72 sub len, len, #2 73 adcs sum, sum, r8 74 strb r8, [dst], #1 75 adcs sum, sum, ip, lsl #8 76 strb ip, [dst], #1 77.less8_aligned: tst len, #6 78 bne 1b 79.less8_byteonly: 80 tst len, #1 81 beq .done 82 load1b r8 83 adcs sum, sum, r8 @ update checksum 84 strb r8, [dst], #1 85 b .done 86 87FN_ENTRY 88 mov ip, sp 89 save_regs 90 sub fp, ip, #4 91 92 cmp len, #8 @ Ensure that we have at least 93 blo .less8 @ 8 bytes to copy. 94 95 adds sum, sum, #0 @ C = 0 96 tst dst, #3 @ Test destination alignment 97 blne .dst_unaligned @ align destination, return here 98 99 /* 100 * Ok, the dst pointer is now 32bit aligned, and we know 101 * that we must have more than 4 bytes to copy. Note 102 * that C contains the carry from the dst alignment above. 103 */ 104 105 tst src, #3 @ Test source alignment 106 bne .src_not_aligned 107 108 /* Routine for src & dst aligned */ 109 110 bics ip, len, #15 111 beq 2f 112 1131: load4l r4, r5, r6, r7 114 stmia dst!, {r4, r5, r6, r7} 115 adcs sum, sum, r4 116 adcs sum, sum, r5 117 adcs sum, sum, r6 118 adcs sum, sum, r7 119 sub ip, ip, #16 120 teq ip, #0 121 bne 1b 122 1232: ands ip, len, #12 124 beq 4f 125 tst ip, #8 126 beq 3f 127 load2l r4, r5 128 stmia dst!, {r4, r5} 129 adcs sum, sum, r4 130 adcs sum, sum, r5 131 tst ip, #4 132 beq 4f 133 1343: load1l r4 135 str r4, [dst], #4 136 adcs sum, sum, r4 137 1384: ands len, len, #3 139 beq .done 140 load1l r4 141 tst len, #2 142 beq .exit 143 adcs sum, sum, r4, lsl #16 144 strb r4, [dst], #1 145 mov r4, r4, lsr #8 146 strb r4, [dst], #1 147 mov r4, r4, lsr #8 148.exit: tst len, #1 149 strneb r4, [dst], #1 150 andne r4, r4, #255 151 adcnes sum, sum, r4 152 153 /* 154 * If the dst pointer was not 16-bit aligned, we 155 * need to rotate the checksum here to get around 156 * the inefficient byte manipulations in the 157 * architecture independent code. 158 */ 159.done: adc r0, sum, #0 160 ldr sum, [sp, #0] @ dst 161 tst sum, #1 162 movne sum, r0, lsl #8 163 orrne r0, sum, r0, lsr #24 164 load_regs ea 165 166.src_not_aligned: 167 adc sum, sum, #0 @ include C from dst alignment 168 and ip, src, #3 169 bic src, src, #3 170 load1l r4 171 cmp ip, #2 172 beq .src2_aligned 173 bhi .src3_aligned 174 mov r4, r4, lsr #8 @ C = 0 175 bics ip, len, #15 176 beq 2f 1771: load4l r5, r6, r7, r8 178 orr r4, r4, r5, lsl #24 179 mov r5, r5, lsr #8 180 orr r5, r5, r6, lsl #24 181 mov r6, r6, lsr #8 182 orr r6, r6, r7, lsl #24 183 mov r7, r7, lsr #8 184 orr r7, r7, r8, lsl #24 185 stmia dst!, {r4, r5, r6, r7} 186 adcs sum, sum, r4 187 adcs sum, sum, r5 188 adcs sum, sum, r6 189 adcs sum, sum, r7 190 mov r4, r8, lsr #8 191 sub ip, ip, #16 192 teq ip, #0 193 bne 1b 1942: ands ip, len, #12 195 beq 4f 196 tst ip, #8 197 beq 3f 198 load2l r5, r6 199 orr r4, r4, r5, lsl #24 200 mov r5, r5, lsr #8 201 orr r5, r5, r6, lsl #24 202 stmia dst!, {r4, r5} 203 adcs sum, sum, r4 204 adcs sum, sum, r5 205 mov r4, r6, lsr #8 206 tst ip, #4 207 beq 4f 2083: load1l r5 209 orr r4, r4, r5, lsl #24 210 str r4, [dst], #4 211 adcs sum, sum, r4 212 mov r4, r5, lsr #8 2134: ands len, len, #3 214 beq .done 215 tst len, #2 216 beq .exit 217 adcs sum, sum, r4, lsl #16 218 strb r4, [dst], #1 219 mov r4, r4, lsr #8 220 strb r4, [dst], #1 221 mov r4, r4, lsr #8 222 b .exit 223 224.src2_aligned: mov r4, r4, lsr #16 225 adds sum, sum, #0 226 bics ip, len, #15 227 beq 2f 2281: load4l r5, r6, r7, r8 229 orr r4, r4, r5, lsl #16 230 mov r5, r5, lsr #16 231 orr r5, r5, r6, lsl #16 232 mov r6, r6, lsr #16 233 orr r6, r6, r7, lsl #16 234 mov r7, r7, lsr #16 235 orr r7, r7, r8, lsl #16 236 stmia dst!, {r4, r5, r6, r7} 237 adcs sum, sum, r4 238 adcs sum, sum, r5 239 adcs sum, sum, r6 240 adcs sum, sum, r7 241 mov r4, r8, lsr #16 242 sub ip, ip, #16 243 teq ip, #0 244 bne 1b 2452: ands ip, len, #12 246 beq 4f 247 tst ip, #8 248 beq 3f 249 load2l r5, r6 250 orr r4, r4, r5, lsl #16 251 mov r5, r5, lsr #16 252 orr r5, r5, r6, lsl #16 253 stmia dst!, {r4, r5} 254 adcs sum, sum, r4 255 adcs sum, sum, r5 256 mov r4, r6, lsr #16 257 tst ip, #4 258 beq 4f 2593: load1l r5 260 orr r4, r4, r5, lsl #16 261 str r4, [dst], #4 262 adcs sum, sum, r4 263 mov r4, r5, lsr #16 2644: ands len, len, #3 265 beq .done 266 tst len, #2 267 beq .exit 268 adcs sum, sum, r4, lsl #16 269 strb r4, [dst], #1 270 mov r4, r4, lsr #8 271 strb r4, [dst], #1 272 tst len, #1 273 beq .done 274 load1b r4 275 b .exit 276 277.src3_aligned: mov r4, r4, lsr #24 278 adds sum, sum, #0 279 bics ip, len, #15 280 beq 2f 2811: load4l r5, r6, r7, r8 282 orr r4, r4, r5, lsl #8 283 mov r5, r5, lsr #24 284 orr r5, r5, r6, lsl #8 285 mov r6, r6, lsr #24 286 orr r6, r6, r7, lsl #8 287 mov r7, r7, lsr #24 288 orr r7, r7, r8, lsl #8 289 stmia dst!, {r4, r5, r6, r7} 290 adcs sum, sum, r4 291 adcs sum, sum, r5 292 adcs sum, sum, r6 293 adcs sum, sum, r7 294 mov r4, r8, lsr #24 295 sub ip, ip, #16 296 teq ip, #0 297 bne 1b 2982: ands ip, len, #12 299 beq 4f 300 tst ip, #8 301 beq 3f 302 load2l r5, r6 303 orr r4, r4, r5, lsl #8 304 mov r5, r5, lsr #24 305 orr r5, r5, r6, lsl #8 306 stmia dst!, {r4, r5} 307 adcs sum, sum, r4 308 adcs sum, sum, r5 309 mov r4, r6, lsr #24 310 tst ip, #4 311 beq 4f 3123: load1l r5 313 orr r4, r4, r5, lsl #8 314 str r4, [dst], #4 315 adcs sum, sum, r4 316 mov r4, r5, lsr #24 3174: ands len, len, #3 318 beq .done 319 tst len, #2 320 beq .exit 321 adcs sum, sum, r4, lsl #16 322 strb r4, [dst], #1 323 load1l r4 324 strb r4, [dst], #1 325 adcs sum, sum, r4, lsl #24 326 mov r4, r4, lsr #8 327 b .exit 328