1/* $Id: VIScsum.S,v 1.6 2000/02/20 23:21:39 davem Exp $ 2 * VIScsum.S: High bandwidth IP checksumming utilizing the UltraSparc 3 * Visual Instruction Set. 4 * 5 * Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz) 6 * Copyright (C) 2000 David S. Miller (davem@redhat.com) 7 * 8 * Based on older sparc32/sparc64 checksum.S, which is: 9 * 10 * Copyright(C) 1995 Linus Torvalds 11 * Copyright(C) 1995 Miguel de Icaza 12 * Copyright(C) 1996, 1997 David S. Miller 13 * derived from: 14 * Linux/Alpha checksum c-code 15 * Linux/ix86 inline checksum assembly 16 * RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code) 17 * David Mosberger-Tang for optimized reference c-code 18 * BSD4.4 portable checksum routine 19 */ 20 21#ifdef __sparc_v9__ 22#define STACKOFF 2175 23#else 24#define STACKOFF 64 25#endif 26 27#ifdef __KERNEL__ 28#include <asm/head.h> 29#include <asm/asi.h> 30#include <asm/visasm.h> 31#include <asm/asm_offsets.h> 32#else 33#define ASI_BLK_P 0xf0 34#define FRPS_FEF 0x04 35#endif 36 37/* Dobrou noc, SunSoft engineers. Spete sladce. 38 * This has a couple of tricks in and those 39 * tricks are UltraLinux trade secrets :)) 40 */ 41 42#define START_THE_TRICK(fz,f0,f2,f4,f6,f8,f10) \ 43 fcmpgt32 %fz, %f0, %g1 /* FPM Group */; \ 44 fcmpgt32 %fz, %f2, %g2 /* FPM Group */; \ 45 fcmpgt32 %fz, %f4, %g3 /* FPM Group */; \ 46 inc %g1 /* IEU0 Group */; \ 47 fcmpgt32 %fz, %f6, %g5 /* FPM */; \ 48 srl %g1, 1, %g1 /* IEU0 Group */; \ 49 fcmpgt32 %fz, %f8, %g7 /* FPM */; \ 50 inc %g2 /* IEU0 Group */; \ 51 fcmpgt32 %fz, %f10, %o3 /* FPM */; \ 52 srl %g2, 1, %g2 /* IEU0 Group */; \ 53 inc %g3 /* IEU1 */; \ 54 srl %g3, 1, %g3 /* IEU0 Group */; \ 55 add %o2, %g1, %o2 /* IEU1 */; \ 56 add %o2, %g2, %o2 /* IEU0 Group */; \ 57 inc %g5 /* IEU1 */; \ 58 add %o2, %g3, %o2 /* IEU0 Group */; 59 60#define DO_THE_TRICK(O12,O14,f0,f2,f4,f6,f8,f10,f12,f14,F0,F2,F4,F6,F8,F10,F12,F14) \ 61 srl %g5, 1, %g5 /* IEU0 Group */; \ 62 fpadd32 %F0, %f0, %F0 /* FPA */; \ 63 fcmpgt32 %O12, %f12, %o4 /* FPM */; \ 64 inc %g7 /* IEU0 Group */; \ 65 fpadd32 %F2, %f2, %F2 /* FPA */; \ 66 fcmpgt32 %O14, %f14, %o5 /* FPM */; \ 67 add %o2, %g5, %o2 /* IEU1 Group */; \ 68 fpadd32 %F4, %f4, %F4 /* FPA */; \ 69 fcmpgt32 %f0, %F0, %g1 /* FPM */; \ 70 srl %g7, 1, %g7 /* IEU0 Group */; \ 71 fpadd32 %F6, %f6, %F6 /* FPA */; \ 72 fcmpgt32 %f2, %F2, %g2 /* FPM */; \ 73 add %o2, %g7, %o2 /* IEU0 Group */; \ 74 fpadd32 %F8, %f8, %F8 /* FPA */; \ 75 fcmpgt32 %f4, %F4, %g3 /* FPM */; \ 76 inc %o3 /* IEU0 Group */; \ 77 fpadd32 %F10, %f10, %F10 /* FPA */; \ 78 fcmpgt32 %f6, %F6, %g5 /* FPM */; \ 79 srl %o3, 1, %o3 /* IEU0 Group */; \ 80 fpadd32 %F12, %f12, %F12 /* FPA */; \ 81 fcmpgt32 %f8, %F8, %g7 /* FPM */; \ 82 add %o2, %o3, %o2 /* IEU0 Group */; \ 83 fpadd32 %F14, %f14, %F14 /* FPA */; \ 84 fcmpgt32 %f10, %F10, %o3 /* FPM */; \ 85 inc %o4 /* IEU0 Group */; \ 86 inc %o5 /* IEU1 */; \ 87 srl %o4, 1, %o4 /* IEU0 Group */; \ 88 inc %g1 /* IEU1 */; \ 89 srl %o5, 1, %o5 /* IEU0 Group */; \ 90 add %o2, %o4, %o2 /* IEU1 */; \ 91 srl %g1, 1, %g1 /* IEU0 Group */; \ 92 add %o2, %o5, %o2 /* IEU1 */; \ 93 inc %g2 /* IEU0 Group */; \ 94 add %o2, %g1, %o2 /* IEU1 */; \ 95 srl %g2, 1, %g2 /* IEU0 Group */; \ 96 inc %g3 /* IEU1 */; \ 97 srl %g3, 1, %g3 /* IEU0 Group */; \ 98 add %o2, %g2, %o2 /* IEU1 */; \ 99 inc %g5 /* IEU0 Group */; \ 100 add %o2, %g3, %o2 /* IEU0 */; 101 102#define END_THE_TRICK(O12,O14,f0,f2,f4,f6,f8,f10,f12,f14,S0,S1,S2,S3,T0,T1,U0,fz) \ 103 srl %g5, 1, %g5 /* IEU0 Group */; \ 104 fpadd32 %f2, %f0, %S0 /* FPA */; \ 105 fcmpgt32 %O12, %f12, %o4 /* FPM */; \ 106 inc %g7 /* IEU0 Group */; \ 107 fpadd32 %f6, %f4, %S1 /* FPA */; \ 108 fcmpgt32 %O14, %f14, %o5 /* FPM */; \ 109 srl %g7, 1, %g7 /* IEU0 Group */; \ 110 fpadd32 %f10, %f8, %S2 /* FPA */; \ 111 fcmpgt32 %f0, %S0, %g1 /* FPM */; \ 112 inc %o3 /* IEU0 Group */; \ 113 fpadd32 %f14, %f12, %S3 /* FPA */; \ 114 fcmpgt32 %f4, %S1, %g2 /* FPM */; \ 115 add %o2, %g5, %o2 /* IEU0 Group */; \ 116 fpadd32 %S0, %S1, %T0 /* FPA */; \ 117 fcmpgt32 %f8, %S2, %g3 /* FPM */; \ 118 add %o2, %g7, %o2 /* IEU0 Group */; \ 119 fzero %fz /* FPA */; \ 120 fcmpgt32 %f12, %S3, %g5 /* FPM */; \ 121 srl %o3, 1, %o3 /* IEU0 Group */; \ 122 fpadd32 %S2, %S3, %T1 /* FPA */; \ 123 fcmpgt32 %S0, %T0, %g7 /* FPM */; \ 124 add %o2, %o3, %o2 /* IEU0 Group */; \ 125 fpadd32 %T0, %T1, %U0 /* FPA */; \ 126 fcmpgt32 %S2, %T1, %o3 /* FPM */; \ 127 inc %o4 /* IEU0 Group */; \ 128 inc %o5 /* IEU1 */; \ 129 srl %o4, 1, %o4 /* IEU0 Group */; \ 130 inc %g1 /* IEU1 */; \ 131 add %o2, %o4, %o2 /* IEU0 Group */; \ 132 fcmpgt32 %fz, %f2, %o4 /* FPM */; \ 133 srl %o5, 1, %o5 /* IEU0 Group */; \ 134 inc %g2 /* IEU1 */; \ 135 add %o2, %o5, %o2 /* IEU0 Group */; \ 136 fcmpgt32 %fz, %f6, %o5 /* FPM */; \ 137 srl %g1, 1, %g1 /* IEU0 Group */; \ 138 inc %g3 /* IEU1 */; \ 139 add %o2, %g1, %o2 /* IEU0 Group */; \ 140 fcmpgt32 %fz, %f10, %g1 /* FPM */; \ 141 srl %g2, 1, %g2 /* IEU0 Group */; \ 142 inc %g5 /* IEU1 */; \ 143 add %o2, %g2, %o2 /* IEU0 Group */; \ 144 fcmpgt32 %fz, %f14, %g2 /* FPM */; \ 145 srl %g3, 1, %g3 /* IEU0 Group */; \ 146 inc %g7 /* IEU1 */; \ 147 add %o2, %g3, %o2 /* IEU0 Group */; \ 148 fcmpgt32 %fz, %S1, %g3 /* FPM */; \ 149 srl %g5, 1, %g5 /* IEU0 Group */; \ 150 inc %o3 /* IEU1 */; \ 151 add %o2, %g5, %o2 /* IEU0 Group */; \ 152 fcmpgt32 %fz, %S3, %g5 /* FPM */; \ 153 srl %g7, 1, %g7 /* IEU0 Group */; \ 154 inc %o4 /* IEU1 */; \ 155 add %o2, %g7, %o2 /* IEU0 Group */; \ 156 fcmpgt32 %fz, %T1, %g7 /* FPM */; \ 157 srl %o3, 1, %o3 /* IEU0 Group */; \ 158 inc %o5 /* IEU1 */; \ 159 add %o2, %o3, %o2 /* IEU0 Group */; \ 160 fcmpgt32 %T0, %U0, %o3 /* FPM */; \ 161 srl %o4, 1, %o4 /* IEU0 Group */; \ 162 inc %g1 /* IEU1 */; \ 163 sub %o2, %o4, %o2 /* IEU0 Group */; \ 164 fcmpgt32 %fz, %U0, %o4 /* FPM */; \ 165 srl %o5, 1, %o5 /* IEU0 Group */; \ 166 inc %g2 /* IEU1 */; \ 167 srl %g1, 1, %g1 /* IEU0 Group */; \ 168 sub %o2, %o5, %o2 /* IEU1 */; \ 169 std %U0, [%sp + STACKOFF] /* Store */; \ 170 srl %g2, 1, %g2 /* IEU0 Group */; \ 171 sub %o2, %g1, %o2 /* IEU1 */; \ 172 inc %g3 /* IEU0 Group */; \ 173 sub %o2, %g2, %o2 /* IEU1 */; \ 174 srl %g3, 1, %g3 /* IEU0 Group */; \ 175 inc %g5 /* IEU1 */; \ 176 srl %g5, 1, %g5 /* IEU0 Group */; \ 177 sub %o2, %g3, %o2 /* IEU1 */; \ 178 ldx [%sp + STACKOFF], %o5 /* Load Group */; \ 179 inc %g7 /* IEU0 */; \ 180 sub %o2, %g5, %o2 /* IEU1 */; \ 181 srl %g7, 1, %g7 /* IEU0 Group */; \ 182 inc %o3 /* IEU1 */; \ 183 srl %o3, 1, %o3 /* IEU0 Group */; \ 184 sub %o2, %g7, %o2 /* IEU1 */; \ 185 inc %o4 /* IEU0 Group */; \ 186 add %o2, %o3, %o2 /* IEU1 */; \ 187 srl %o4, 1, %o4 /* IEU0 Group */; \ 188 sub %o2, %o4, %o2 /* IEU0 Group */; \ 189 addcc %o2, %o5, %o2 /* IEU1 Group */; \ 190 bcs,a,pn %xcc, 33f /* CTI */; \ 191 add %o2, 1, %o2 /* IEU0 */; \ 19233: /* That's it */; 193 194#define CSUM_LASTCHUNK(offset) \ 195 ldx [%o0 - offset - 0x10], %g2; \ 196 ldx [%o0 - offset - 0x08], %g3; \ 197 addcc %g2, %o2, %o2; \ 198 bcs,a,pn %xcc, 31f; \ 199 add %o2, 1, %o2; \ 20031: addcc %g3, %o2, %o2; \ 201 bcs,a,pn %xcc, 32f; \ 202 add %o2, 1, %o2; \ 20332: 204 205 .text 206 .globl csum_partial 207 .align 32 208csum_partial: 209 andcc %o0, 7, %g0 /* IEU1 Group */ 210 be,pt %icc, 4f /* CTI */ 211 andcc %o0, 0x38, %g3 /* IEU1 */ 212 mov 1, %g5 /* IEU0 Group */ 213 cmp %o1, 6 /* IEU1 */ 214 bl,pn %icc, 21f /* CTI */ 215 andcc %o0, 1, %g0 /* IEU1 Group */ 216 bne,pn %icc, csump_really_slow /* CTI */ 217 andcc %o0, 2, %g0 /* IEU1 Group */ 218 be,pt %icc, 1f /* CTI */ 219 and %o0, 4, %g7 /* IEU0 */ 220 lduh [%o0], %g2 /* Load */ 221 sub %o1, 2, %o1 /* IEU0 Group */ 222 add %o0, 2, %o0 /* IEU1 */ 223 andcc %o0, 4, %g7 /* IEU1 Group */ 224 sll %g5, 16, %g5 /* IEU0 */ 225 sll %g2, 16, %g2 /* IEU0 Group */ 226 addcc %g2, %o2, %o2 /* IEU1 Group (regdep) */ 227 bcs,a,pn %icc, 1f /* CTI */ 228 add %o2, %g5, %o2 /* IEU0 */ 2291: ld [%o0], %g2 /* Load */ 230 brz,a,pn %g7, 4f /* CTI+IEU1 Group */ 231 and %o0, 0x38, %g3 /* IEU0 */ 232 add %o0, 4, %o0 /* IEU0 Group */ 233 sub %o1, 4, %o1 /* IEU1 */ 234 addcc %g2, %o2, %o2 /* IEU1 Group */ 235 bcs,a,pn %icc, 1f /* CTI */ 236 add %o2, 1, %o2 /* IEU0 */ 2371: and %o0, 0x38, %g3 /* IEU1 Group */ 2384: srl %o2, 0, %o2 /* IEU0 Group */ 239 mov 0x40, %g1 /* IEU1 */ 240 brz,pn %g3, 3f /* CTI+IEU1 Group */ 241 sub %g1, %g3, %g1 /* IEU0 */ 242 cmp %o1, 56 /* IEU1 Group */ 243 blu,pn %icc, 20f /* CTI */ 244 andcc %o0, 8, %g0 /* IEU1 Group */ 245 be,pn %icc, 1f /* CTI */ 246 ldx [%o0], %g2 /* Load */ 247 add %o0, 8, %o0 /* IEU0 Group */ 248 sub %o1, 8, %o1 /* IEU1 */ 249 addcc %g2, %o2, %o2 /* IEU1 Group */ 250 bcs,a,pn %xcc, 1f /* CTI */ 251 add %o2, 1, %o2 /* IEU0 */ 2521: andcc %g1, 0x10, %g0 /* IEU1 Group */ 253 be,pn %icc, 2f /* CTI */ 254 and %g1, 0x20, %g1 /* IEU0 */ 255 ldx [%o0], %g2 /* Load */ 256 ldx [%o0+8], %g3 /* Load Group */ 257 add %o0, 16, %o0 /* IEU0 */ 258 sub %o1, 16, %o1 /* IEU1 */ 259 addcc %g2, %o2, %o2 /* IEU1 Group */ 260 bcs,a,pn %xcc, 1f /* CTI */ 261 add %o2, 1, %o2 /* IEU0 */ 2621: addcc %g3, %o2, %o2 /* IEU1 Group */ 263 bcs,a,pn %xcc, 2f /* CTI */ 264 add %o2, 1, %o2 /* IEU0 */ 2652: brz,pn %g1, 3f /* CTI+IEU1 Group */ 266 ldx [%o0], %g2 /* Load */ 267 ldx [%o0+8], %g3 /* Load Group */ 268 ldx [%o0+16], %g5 /* Load Group */ 269 ldx [%o0+24], %g7 /* Load Group */ 270 add %o0, 32, %o0 /* IEU0 */ 271 sub %o1, 32, %o1 /* IEU1 */ 272 addcc %g2, %o2, %o2 /* IEU1 Group */ 273 bcs,a,pn %xcc, 1f /* CTI */ 274 add %o2, 1, %o2 /* IEU0 */ 2751: addcc %g3, %o2, %o2 /* IEU1 Group */ 276 bcs,a,pn %xcc, 1f /* CTI */ 277 add %o2, 1, %o2 /* IEU0 */ 2781: addcc %g5, %o2, %o2 /* IEU1 Group */ 279 bcs,a,pn %xcc, 1f /* CTI */ 280 add %o2, 1, %o2 /* IEU0 */ 2811: addcc %g7, %o2, %o2 /* IEU1 Group */ 282 bcs,a,pn %xcc, 3f /* CTI */ 283 add %o2, 1, %o2 /* IEU0 */ 2843: cmp %o1, 0xc0 /* IEU1 Group */ 285 blu,pn %icc, 20f /* CTI */ 286 sllx %o2, 32, %g5 /* IEU0 */ 287#ifdef __KERNEL__ 288 VISEntry 289#endif 290 addcc %o2, %g5, %o2 /* IEU1 Group */ 291 sub %o1, 0xc0, %o1 /* IEU0 */ 292 wr %g0, ASI_BLK_P, %asi /* LSU Group */ 293 membar #StoreLoad /* LSU Group */ 294 srlx %o2, 32, %o2 /* IEU0 Group */ 295 bcs,a,pn %xcc, 1f /* CTI */ 296 add %o2, 1, %o2 /* IEU1 */ 2971: andcc %o1, 0x80, %g0 /* IEU1 Group */ 298 bne,pn %icc, 7f /* CTI */ 299 andcc %o1, 0x40, %g0 /* IEU1 Group */ 300 be,pn %icc, 6f /* CTI */ 301 fzero %f12 /* FPA */ 302 fzero %f14 /* FPA Group */ 303 ldda [%o0 + 0x000] %asi, %f16 304 ldda [%o0 + 0x040] %asi, %f32 305 ldda [%o0 + 0x080] %asi, %f48 306 START_THE_TRICK(f12,f16,f18,f20,f22,f24,f26) 307 ba,a,pt %xcc, 3f 3086: sub %o0, 0x40, %o0 /* IEU0 Group */ 309 fzero %f28 /* FPA */ 310 fzero %f30 /* FPA Group */ 311 ldda [%o0 + 0x040] %asi, %f32 312 ldda [%o0 + 0x080] %asi, %f48 313 ldda [%o0 + 0x0c0] %asi, %f0 314 START_THE_TRICK(f28,f32,f34,f36,f38,f40,f42) 315 ba,a,pt %xcc, 4f 3167: bne,pt %icc, 8f /* CTI */ 317 fzero %f44 /* FPA */ 318 add %o0, 0x40, %o0 /* IEU0 Group */ 319 fzero %f60 /* FPA */ 320 fzero %f62 /* FPA Group */ 321 ldda [%o0 - 0x040] %asi, %f0 322 ldda [%o0 + 0x000] %asi, %f16 323 ldda [%o0 + 0x040] %asi, %f32 324 START_THE_TRICK(f60,f0,f2,f4,f6,f8,f10) 325 ba,a,pt %xcc, 2f 3268: add %o0, 0x80, %o0 /* IEU0 Group */ 327 fzero %f46 /* FPA */ 328 ldda [%o0 - 0x080] %asi, %f48 329 ldda [%o0 - 0x040] %asi, %f0 330 ldda [%o0 + 0x000] %asi, %f16 331 START_THE_TRICK(f44,f48,f50,f52,f54,f56,f58) 3321: DO_THE_TRICK(f44,f46,f48,f50,f52,f54,f56,f58,f60,f62,f0,f2,f4,f6,f8,f10,f12,f14) 333 ldda [%o0 + 0x040] %asi, %f32 3342: DO_THE_TRICK(f60,f62,f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30) 335 ldda [%o0 + 0x080] %asi, %f48 3363: DO_THE_TRICK(f12,f14,f16,f18,f20,f22,f24,f26,f28,f30,f32,f34,f36,f38,f40,f42,f44,f46) 337 ldda [%o0 + 0x0c0] %asi, %f0 3384: DO_THE_TRICK(f28,f30,f32,f34,f36,f38,f40,f42,f44,f46,f48,f50,f52,f54,f56,f58,f60,f62) 339 add %o0, 0x100, %o0 /* IEU0 Group */ 340 subcc %o1, 0x100, %o1 /* IEU1 */ 341 bgeu,a,pt %icc, 1b /* CTI */ 342 ldda [%o0 + 0x000] %asi, %f16 343 membar #Sync /* LSU Group */ 344 DO_THE_TRICK(f44,f46,f48,f50,f52,f54,f56,f58,f60,f62,f0,f2,f4,f6,f8,f10,f12,f14) 345 END_THE_TRICK(f60,f62,f0,f2,f4,f6,f8,f10,f12,f14,f16,f18,f20,f22,f24,f26,f28,f30) 346#ifdef __KERNEL__ 347 ldub [%g6 + AOFF_task_thread + AOFF_thread_current_ds], %g7 348#endif 349 and %o1, 0x3f, %o1 /* IEU0 Group */ 350#ifdef __KERNEL__ 351 VISExit 352 wr %g7, %g0, %asi 353#endif 35420: andcc %o1, 0xf0, %g1 /* IEU1 Group */ 355 be,pn %icc, 23f /* CTI */ 356 and %o1, 0xf, %o3 /* IEU0 */ 357#ifdef __KERNEL__ 35822: sll %g1, 1, %o4 /* IEU0 Group */ 359 sethi %hi(23f), %g7 /* IEU1 */ 360 sub %g7, %o4, %g7 /* IEU0 Group */ 361 jmpl %g7 + %lo(23f), %g0 /* CTI Group brk forced*/ 362 add %o0, %g1, %o0 /* IEU0 */ 363#else 36422: rd %pc, %g7 /* LSU Group+4bubbles */ 365 sll %g1, 1, %o4 /* IEU0 Group */ 366 sub %g7, %o4, %g7 /* IEU0 Group (regdep) */ 367 jmpl %g7 + (23f - 22b), %g0 /* CTI Group brk forced*/ 368 add %o0, %g1, %o0 /* IEU0 */ 369#endif 370 CSUM_LASTCHUNK(0xe0) 371 CSUM_LASTCHUNK(0xd0) 372 CSUM_LASTCHUNK(0xc0) 373 CSUM_LASTCHUNK(0xb0) 374 CSUM_LASTCHUNK(0xa0) 375 CSUM_LASTCHUNK(0x90) 376 CSUM_LASTCHUNK(0x80) 377 CSUM_LASTCHUNK(0x70) 378 CSUM_LASTCHUNK(0x60) 379 CSUM_LASTCHUNK(0x50) 380 CSUM_LASTCHUNK(0x40) 381 CSUM_LASTCHUNK(0x30) 382 CSUM_LASTCHUNK(0x20) 383 CSUM_LASTCHUNK(0x10) 384 CSUM_LASTCHUNK(0x00) 38523: brnz,pn %o3, 26f /* CTI+IEU1 Group */ 38624: sllx %o2, 32, %g1 /* IEU0 */ 38725: addcc %o2, %g1, %o0 /* IEU1 Group */ 388 srlx %o0, 32, %o0 /* IEU0 Group (regdep) */ 389 bcs,a,pn %xcc, 1f /* CTI */ 390 add %o0, 1, %o0 /* IEU1 */ 3911: retl /* CTI Group brk forced*/ 392 srl %o0, 0, %o0 /* IEU0 */ 39326: andcc %o1, 8, %g0 /* IEU1 Group */ 394 be,pn %icc, 1f /* CTI */ 395 ldx [%o0], %g3 /* Load */ 396 add %o0, 8, %o0 /* IEU0 Group */ 397 addcc %g3, %o2, %o2 /* IEU1 Group */ 398 bcs,a,pn %xcc, 1f /* CTI */ 399 add %o2, 1, %o2 /* IEU0 */ 4001: andcc %o1, 4, %g0 /* IEU1 Group */ 401 be,a,pn %icc, 1f /* CTI */ 402 clr %g2 /* IEU0 */ 403 ld [%o0], %g2 /* Load */ 404 add %o0, 4, %o0 /* IEU0 Group */ 405 sllx %g2, 32, %g2 /* IEU0 Group */ 4061: andcc %o1, 2, %g0 /* IEU1 */ 407 be,a,pn %icc, 1f /* CTI */ 408 clr %o4 /* IEU0 Group */ 409 lduh [%o0], %o4 /* Load */ 410 add %o0, 2, %o0 /* IEU1 */ 411 sll %o4, 16, %o4 /* IEU0 Group */ 4121: andcc %o1, 1, %g0 /* IEU1 */ 413 be,a,pn %icc, 1f /* CTI */ 414 clr %o5 /* IEU0 Group */ 415 ldub [%o0], %o5 /* Load */ 416 sll %o5, 8, %o5 /* IEU0 Group */ 4171: or %g2, %o4, %o4 /* IEU1 */ 418 or %o5, %o4, %o4 /* IEU0 Group (regdep) */ 419 addcc %o4, %o2, %o2 /* IEU1 Group (regdep) */ 420 bcs,a,pn %xcc, 1f /* CTI */ 421 add %o2, 1, %o2 /* IEU0 */ 4221: ba,pt %xcc, 25b /* CTI Group */ 423 sllx %o2, 32, %g1 /* IEU0 */ 42421: srl %o2, 0, %o2 /* IEU0 Group */ 425 cmp %o1, 0 /* IEU1 */ 426 be,pn %icc, 24b /* CTI */ 427 andcc %o1, 4, %g0 /* IEU1 Group */ 428 be,a,pn %icc, 1f /* CTI */ 429 clr %g2 /* IEU0 */ 430 lduh [%o0], %g3 /* Load */ 431 lduh [%o0+2], %g2 /* Load Group */ 432 add %o0, 4, %o0 /* IEU0 Group */ 433 sllx %g3, 48, %g3 /* IEU0 Group */ 434 sllx %g2, 32, %g2 /* IEU0 Group */ 435 or %g3, %g2, %g2 /* IEU0 Group */ 4361: andcc %o1, 2, %g0 /* IEU1 */ 437 be,a,pn %icc, 1f /* CTI */ 438 clr %o4 /* IEU0 Group */ 439 lduh [%o0], %o4 /* Load */ 440 add %o0, 2, %o0 /* IEU1 */ 441 sll %o4, 16, %o4 /* IEU0 Group */ 4421: andcc %o1, 1, %g0 /* IEU1 */ 443 be,a,pn %icc, 1f /* CTI */ 444 clr %o5 /* IEU0 Group */ 445 ldub [%o0], %o5 /* Load */ 446 sll %o5, 8, %o5 /* IEU0 Group */ 4471: or %g2, %o4, %o4 /* IEU1 */ 448 or %o5, %o4, %o4 /* IEU0 Group (regdep) */ 449 addcc %o4, %o2, %o2 /* IEU1 Group (regdep) */ 450 bcs,a,pn %xcc, 1f /* CTI */ 451 add %o2, 1, %o2 /* IEU0 */ 4521: ba,pt %xcc, 25b /* CTI Group */ 453 sllx %o2, 32, %g1 /* IEU0 */ 454 455 /* When buff is byte aligned and len is large, we backoff to 456 * this really slow handling. The issue is that we cannot do 457 * the VIS stuff when buff is byte aligned as unaligned.c will 458 * not fix it up. 459 */ 460csump_really_slow: 461 mov %o0, %o3 462 mov %o1, %o4 463 cmp %o1, 0 464 ble,pn %icc, 9f 465 mov 0, %o0 466 andcc %o3, 1, %o5 467 be,pt %icc, 1f 468 sra %o4, 1, %g3 469 add %o1, -1, %o4 470 ldub [%o3], %o0 471 add %o3, 1, %o3 472 sra %o4, 1, %g3 4731: 474 cmp %g3, 0 475 be,pt %icc, 3f 476 and %o4, 1, %g2 477 and %o3, 2, %g2 478 brz,a,pt %g2, 1f 479 sra %g3, 1, %g3 480 add %g3, -1, %g3 481 add %o4, -2, %o4 482 lduh [%o3], %g2 483 add %o3, 2, %o3 484 add %o0, %g2, %o0 485 sra %g3, 1, %g3 4861: 487 cmp %g3, 0 488 be,pt %icc, 2f 489 and %o4, 2, %g2 4901: 491 ld [%o3], %g2 492 addcc %o0, %g2, %o0 493 addx %o0, %g0, %o0 494 addcc %g3, -1, %g3 495 bne,pt %icc, 1b 496 add %o3, 4, %o3 497 srl %o0, 16, %o1 498 sethi %hi(64512), %g2 499 or %g2, 1023, %g2 500 and %o0, %g2, %g3 501 add %g3, %o1, %g3 502 srl %g3, 16, %o0 503 and %g3, %g2, %g2 504 add %g2, %o0, %g3 505 sll %g3, 16, %g3 506 srl %g3, 16, %o0 507 and %o4, 2, %g2 5082: 509 cmp %g2, 0 510 be,pt %icc, 3f 511 and %o4, 1, %g2 512 lduh [%o3], %g2 513 add %o3, 2, %o3 514 add %o0, %g2, %o0 515 and %o4, 1, %g2 5163: 517 cmp %g2, 0 518 be,pt %icc, 1f 519 srl %o0, 16, %o1 520 ldub [%o3], %g2 521 sll %g2, 8, %g2 522 add %o0, %g2, %o0 523 srl %o0, 16, %o1 5241: 525 sethi %hi(64512), %g2 526 or %g2, 1023, %g2 527 cmp %o5, 0 528 and %o0, %g2, %g3 529 add %g3, %o1, %g3 530 srl %g3, 16, %o0 531 and %g3, %g2, %g2 532 add %g2, %o0, %g3 533 sll %g3, 16, %g3 534 srl %g3, 16, %o0 535 srl %g3, 24, %g3 536 and %o0, 255, %g2 537 sll %g2, 8, %g2 538 bne,pt %icc, 1f 539 or %g3, %g2, %g2 5409: 541 mov %o0, %g2 5421: 543 addcc %g2, %o2, %g2 544 addx %g2, %g0, %g2 545 retl 546 srl %g2, 0, %o0 547