1/* Copy SIZE bytes from SRC to DEST. 2 For UltraSPARC. 3 Copyright (C) 1996-2022 Free Software Foundation, Inc. 4 This file is part of the GNU C Library. 5 6 The GNU C Library is free software; you can redistribute it and/or 7 modify it under the terms of the GNU Lesser General Public 8 License as published by the Free Software Foundation; either 9 version 2.1 of the License, or (at your option) any later version. 10 11 The GNU C Library is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 Lesser General Public License for more details. 15 16 You should have received a copy of the GNU Lesser General Public 17 License along with the GNU C Library; if not, see 18 <https://www.gnu.org/licenses/>. */ 19 20#include <sysdep.h> 21#include <asm/asi.h> 22#ifndef XCC 23#define USE_BPR 24 .register %g2, #scratch 25 .register %g3, #scratch 26 .register %g6, #scratch 27#define XCC xcc 28#endif 29#define FPRS_FEF 4 30 31#define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9) \ 32 faligndata %f1, %f2, %f48; \ 33 faligndata %f2, %f3, %f50; \ 34 faligndata %f3, %f4, %f52; \ 35 faligndata %f4, %f5, %f54; \ 36 faligndata %f5, %f6, %f56; \ 37 faligndata %f6, %f7, %f58; \ 38 faligndata %f7, %f8, %f60; \ 39 faligndata %f8, %f9, %f62; 40 41#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ 42 ldda [%src] %asi, %fdest; \ 43 add %src, 0x40, %src; \ 44 add %dest, 0x40, %dest; \ 45 subcc %len, 0x40, %len; \ 46 be,pn %xcc, jmptgt; \ 47 stda %fsrc, [%dest - 0x40] %asi; 48 49#define LOOP_CHUNK1(src, dest, len, branch_dest) \ 50 MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) 51#define LOOP_CHUNK2(src, dest, len, branch_dest) \ 52 MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) 53#define LOOP_CHUNK3(src, dest, len, branch_dest) \ 54 MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) 55 56#define STORE_SYNC(dest, fsrc) \ 57 stda %fsrc, [%dest] %asi; \ 58 add %dest, 0x40, %dest; 59 60#define STORE_JUMP(dest, fsrc, target) \ 61 stda %fsrc, [%dest] %asi; \ 62 add %dest, 0x40, %dest; \ 63 ba,pt %xcc, target; 64 65#define VISLOOP_PAD nop; nop; nop; nop; \ 66 nop; nop; nop; nop; \ 67 nop; nop; nop; nop; \ 68 nop; nop; nop; 69 70#define FINISH_VISCHUNK(dest, f0, f1, left) \ 71 subcc %left, 8, %left; \ 72 bl,pn %xcc, 205f; \ 73 faligndata %f0, %f1, %f48; \ 74 std %f48, [%dest]; \ 75 add %dest, 8, %dest; 76 77#define UNEVEN_VISCHUNK(dest, f0, f1, left) \ 78 subcc %left, 8, %left; \ 79 bl,pn %xcc, 205f; \ 80 fsrc2 %f0, %f1; \ 81 ba,a,pt %xcc, 204f; 82 83 /* Macros for non-VIS memcpy code. */ 84#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3) \ 85 ldx [%src + offset + 0x00], %t0; \ 86 ldx [%src + offset + 0x08], %t1; \ 87 ldx [%src + offset + 0x10], %t2; \ 88 ldx [%src + offset + 0x18], %t3; \ 89 stw %t0, [%dst + offset + 0x04]; \ 90 srlx %t0, 32, %t0; \ 91 stw %t0, [%dst + offset + 0x00]; \ 92 stw %t1, [%dst + offset + 0x0c]; \ 93 srlx %t1, 32, %t1; \ 94 stw %t1, [%dst + offset + 0x08]; \ 95 stw %t2, [%dst + offset + 0x14]; \ 96 srlx %t2, 32, %t2; \ 97 stw %t2, [%dst + offset + 0x10]; \ 98 stw %t3, [%dst + offset + 0x1c]; \ 99 srlx %t3, 32, %t3; \ 100 stw %t3, [%dst + offset + 0x18]; 101 102#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ 103 ldx [%src + offset + 0x00], %t0; \ 104 ldx [%src + offset + 0x08], %t1; \ 105 ldx [%src + offset + 0x10], %t2; \ 106 ldx [%src + offset + 0x18], %t3; \ 107 stx %t0, [%dst + offset + 0x00]; \ 108 stx %t1, [%dst + offset + 0x08]; \ 109 stx %t2, [%dst + offset + 0x10]; \ 110 stx %t3, [%dst + offset + 0x18]; \ 111 ldx [%src + offset + 0x20], %t0; \ 112 ldx [%src + offset + 0x28], %t1; \ 113 ldx [%src + offset + 0x30], %t2; \ 114 ldx [%src + offset + 0x38], %t3; \ 115 stx %t0, [%dst + offset + 0x20]; \ 116 stx %t1, [%dst + offset + 0x28]; \ 117 stx %t2, [%dst + offset + 0x30]; \ 118 stx %t3, [%dst + offset + 0x38]; 119 120#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ 121 ldx [%src - offset - 0x10], %t0; \ 122 ldx [%src - offset - 0x08], %t1; \ 123 stw %t0, [%dst - offset - 0x0c]; \ 124 srlx %t0, 32, %t2; \ 125 stw %t2, [%dst - offset - 0x10]; \ 126 stw %t1, [%dst - offset - 0x04]; \ 127 srlx %t1, 32, %t3; \ 128 stw %t3, [%dst - offset - 0x08]; 129 130#define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1) \ 131 ldx [%src - offset - 0x10], %t0; \ 132 ldx [%src - offset - 0x08], %t1; \ 133 stx %t0, [%dst - offset - 0x10]; \ 134 stx %t1, [%dst - offset - 0x08]; 135 136 .text 137 .align 32 138ENTRY(__memcpy_large) 139200: be,pt %xcc, 201f /* CTI */ 140 andcc %o0, 0x38, %g5 /* IEU1 Group */ 141 mov 8, %g1 /* IEU0 */ 142 sub %g1, %g2, %g2 /* IEU0 Group */ 143 andcc %o0, 1, %g0 /* IEU1 */ 144 be,pt %icc, 2f /* CTI */ 145 sub %o2, %g2, %o2 /* IEU0 Group */ 1461: ldub [%o1], %o5 /* Load Group */ 147 add %o1, 1, %o1 /* IEU0 */ 148 add %o0, 1, %o0 /* IEU1 */ 149 subcc %g2, 1, %g2 /* IEU1 Group */ 150 be,pn %xcc, 3f /* CTI */ 151 stb %o5, [%o0 - 1] /* Store */ 1522: ldub [%o1], %o5 /* Load Group */ 153 add %o0, 2, %o0 /* IEU0 */ 154 ldub [%o1 + 1], %g3 /* Load Group */ 155 subcc %g2, 2, %g2 /* IEU1 Group */ 156 stb %o5, [%o0 - 2] /* Store */ 157 add %o1, 2, %o1 /* IEU0 */ 158 bne,pt %xcc, 2b /* CTI Group */ 159 stb %g3, [%o0 - 1] /* Store */ 1603: andcc %o0, 0x38, %g5 /* IEU1 Group */ 161201: be,pt %icc, 202f /* CTI */ 162 mov 64, %g1 /* IEU0 */ 163 fsrc2 %f0, %f2 /* FPU */ 164 sub %g1, %g5, %g5 /* IEU0 Group */ 165 alignaddr %o1, %g0, %g1 /* GRU Group */ 166 ldd [%g1], %f4 /* Load Group */ 167 sub %o2, %g5, %o2 /* IEU0 */ 1681: ldd [%g1 + 0x8], %f6 /* Load Group */ 169 add %g1, 0x8, %g1 /* IEU0 Group */ 170 subcc %g5, 8, %g5 /* IEU1 */ 171 faligndata %f4, %f6, %f0 /* GRU Group */ 172 std %f0, [%o0] /* Store */ 173 add %o1, 8, %o1 /* IEU0 Group */ 174 be,pn %xcc, 202f /* CTI */ 175 add %o0, 8, %o0 /* IEU1 */ 176 ldd [%g1 + 0x8], %f4 /* Load Group */ 177 add %g1, 8, %g1 /* IEU0 */ 178 subcc %g5, 8, %g5 /* IEU1 */ 179 faligndata %f6, %f4, %f0 /* GRU Group */ 180 std %f0, [%o0] /* Store */ 181 add %o1, 8, %o1 /* IEU0 */ 182 bne,pt %xcc, 1b /* CTI Group */ 183 add %o0, 8, %o0 /* IEU0 */ 184202: membar #LoadStore | #StoreStore | #StoreLoad /* LSU Group */ 185 wr %g0, ASI_BLK_P, %asi /* LSU Group */ 186 subcc %o2, 0x40, %g6 /* IEU1 Group */ 187 mov %o1, %g1 /* IEU0 */ 188 andncc %g6, (0x40 - 1), %g6 /* IEU1 Group */ 189 srl %g1, 3, %g2 /* IEU0 */ 190 sub %o2, %g6, %g3 /* IEU0 Group */ 191 andn %o1, (0x40 - 1), %o1 /* IEU1 */ 192 and %g2, 7, %g2 /* IEU0 Group */ 193 andncc %g3, 0x7, %g3 /* IEU1 */ 194 fsrc2 %f0, %f2 /* FPU */ 195 sub %g3, 0x10, %g3 /* IEU0 Group */ 196 sub %o2, %g6, %o2 /* IEU1 */ 197 alignaddr %g1, %g0, %g0 /* GRU Group */ 198 add %g1, %g6, %g1 /* IEU0 Group */ 199 subcc %o2, %g3, %o2 /* IEU1 */ 200 ldda [%o1 + 0x00] %asi, %f0 /* LSU Group */ 201 add %g1, %g3, %g1 /* IEU0 */ 202 ldda [%o1 + 0x40] %asi, %f16 /* LSU Group */ 203 sub %g6, 0x80, %g6 /* IEU0 */ 204 ldda [%o1 + 0x80] %asi, %f32 /* LSU Group */ 205 /* Clk1 Group 8-( */ 206 /* Clk2 Group 8-( */ 207 /* Clk3 Group 8-( */ 208 /* Clk4 Group 8-( */ 209203: rd %pc, %g5 /* PDU Group 8-( */ 210 addcc %g5, %lo(300f - 203b), %g5 /* IEU1 Group */ 211 sll %g2, 9, %g2 /* IEU0 */ 212 jmpl %g5 + %g2, %g0 /* CTI Group brk forced*/ 213 addcc %o1, 0xc0, %o1 /* IEU1 Group */ 214 215 .align 512 /* OK, here comes the fun part... */ 216300: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) LOOP_CHUNK1(o1, o0, g6, 301f) 217 FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) LOOP_CHUNK2(o1, o0, g6, 302f) 218 FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) LOOP_CHUNK3(o1, o0, g6, 303f) 219 b,pt %xcc, 300b+4; faligndata %f0, %f2, %f48 220301: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) STORE_SYNC(o0, f48) membar #Sync 221 FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) STORE_JUMP(o0, f48, 400f) membar #Sync 222302: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) STORE_SYNC(o0, f48) membar #Sync 223 FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) STORE_JUMP(o0, f48, 416f) membar #Sync 224303: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) STORE_SYNC(o0, f48) membar #Sync 225 FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) STORE_JUMP(o0, f48, 432f) membar #Sync 226 VISLOOP_PAD 227310: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) LOOP_CHUNK1(o1, o0, g6, 311f) 228 FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) LOOP_CHUNK2(o1, o0, g6, 312f) 229 FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) LOOP_CHUNK3(o1, o0, g6, 313f) 230 b,pt %xcc, 310b+4; faligndata %f2, %f4, %f48 231311: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) STORE_SYNC(o0, f48) membar #Sync 232 FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) STORE_JUMP(o0, f48, 402f) membar #Sync 233312: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) STORE_SYNC(o0, f48) membar #Sync 234 FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) STORE_JUMP(o0, f48, 418f) membar #Sync 235313: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) STORE_SYNC(o0, f48) membar #Sync 236 FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) STORE_JUMP(o0, f48, 434f) membar #Sync 237 VISLOOP_PAD 238320: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) LOOP_CHUNK1(o1, o0, g6, 321f) 239 FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) LOOP_CHUNK2(o1, o0, g6, 322f) 240 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) LOOP_CHUNK3(o1, o0, g6, 323f) 241 b,pt %xcc, 320b+4; faligndata %f4, %f6, %f48 242321: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) STORE_SYNC(o0, f48) membar #Sync 243 FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) STORE_JUMP(o0, f48, 404f) membar #Sync 244322: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) STORE_SYNC(o0, f48) membar #Sync 245 FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) STORE_JUMP(o0, f48, 420f) membar #Sync 246323: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) STORE_SYNC(o0, f48) membar #Sync 247 FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) STORE_JUMP(o0, f48, 436f) membar #Sync 248 VISLOOP_PAD 249330: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) LOOP_CHUNK1(o1, o0, g6, 331f) 250 FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) LOOP_CHUNK2(o1, o0, g6, 332f) 251 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) LOOP_CHUNK3(o1, o0, g6, 333f) 252 b,pt %xcc, 330b+4; faligndata %f6, %f8, %f48 253331: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) STORE_SYNC(o0, f48) membar #Sync 254 FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) STORE_JUMP(o0, f48, 406f) membar #Sync 255332: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) STORE_SYNC(o0, f48) membar #Sync 256 FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) STORE_JUMP(o0, f48, 422f) membar #Sync 257333: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) STORE_SYNC(o0, f48) membar #Sync 258 FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) STORE_JUMP(o0, f48, 438f) membar #Sync 259 VISLOOP_PAD 260340: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) LOOP_CHUNK1(o1, o0, g6, 341f) 261 FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) LOOP_CHUNK2(o1, o0, g6, 342f) 262 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) LOOP_CHUNK3(o1, o0, g6, 343f) 263 b,pt %xcc, 340b+4; faligndata %f8, %f10, %f48 264341: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) STORE_SYNC(o0, f48) membar #Sync 265 FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) STORE_JUMP(o0, f48, 408f) membar #Sync 266342: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) STORE_SYNC(o0, f48) membar #Sync 267 FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) STORE_JUMP(o0, f48, 424f) membar #Sync 268343: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) STORE_SYNC(o0, f48) membar #Sync 269 FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) STORE_JUMP(o0, f48, 440f) membar #Sync 270 VISLOOP_PAD 271350: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) LOOP_CHUNK1(o1, o0, g6, 351f) 272 FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) LOOP_CHUNK2(o1, o0, g6, 352f) 273 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) LOOP_CHUNK3(o1, o0, g6, 353f) 274 b,pt %xcc, 350b+4; faligndata %f10, %f12, %f48 275351: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) STORE_SYNC(o0, f48) membar #Sync 276 FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) STORE_JUMP(o0, f48, 410f) membar #Sync 277352: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) STORE_SYNC(o0, f48) membar #Sync 278 FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) STORE_JUMP(o0, f48, 426f) membar #Sync 279353: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) STORE_SYNC(o0, f48) membar #Sync 280 FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) STORE_JUMP(o0, f48, 442f) membar #Sync 281 VISLOOP_PAD 282360: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) LOOP_CHUNK1(o1, o0, g6, 361f) 283 FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) LOOP_CHUNK2(o1, o0, g6, 362f) 284 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) LOOP_CHUNK3(o1, o0, g6, 363f) 285 b,pt %xcc, 360b+4; faligndata %f12, %f14, %f48 286361: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) STORE_SYNC(o0, f48) membar #Sync 287 FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) STORE_JUMP(o0, f48, 412f) membar #Sync 288362: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) STORE_SYNC(o0, f48) membar #Sync 289 FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) STORE_JUMP(o0, f48, 428f) membar #Sync 290363: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) STORE_SYNC(o0, f48) membar #Sync 291 FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) STORE_JUMP(o0, f48, 444f) membar #Sync 292 VISLOOP_PAD 293370: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) LOOP_CHUNK1(o1, o0, g6, 371f) 294 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) LOOP_CHUNK2(o1, o0, g6, 372f) 295 FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) LOOP_CHUNK3(o1, o0, g6, 373f) 296 b,pt %xcc, 370b+4; faligndata %f14, %f16, %f48 297371: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_SYNC(o0, f48) membar #Sync 298 FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) STORE_JUMP(o0, f48, 414f) membar #Sync 299372: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) STORE_SYNC(o0, f48) membar #Sync 300 FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) STORE_JUMP(o0, f48, 430f) membar #Sync 301373: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) STORE_SYNC(o0, f48) membar #Sync 302 FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_JUMP(o0, f48, 446f) membar #Sync 303 VISLOOP_PAD 304400: FINISH_VISCHUNK(o0, f0, f2, g3) 305402: FINISH_VISCHUNK(o0, f2, f4, g3) 306404: FINISH_VISCHUNK(o0, f4, f6, g3) 307406: FINISH_VISCHUNK(o0, f6, f8, g3) 308408: FINISH_VISCHUNK(o0, f8, f10, g3) 309410: FINISH_VISCHUNK(o0, f10, f12, g3) 310412: FINISH_VISCHUNK(o0, f12, f14, g3) 311414: UNEVEN_VISCHUNK(o0, f14, f0, g3) 312416: FINISH_VISCHUNK(o0, f16, f18, g3) 313418: FINISH_VISCHUNK(o0, f18, f20, g3) 314420: FINISH_VISCHUNK(o0, f20, f22, g3) 315422: FINISH_VISCHUNK(o0, f22, f24, g3) 316424: FINISH_VISCHUNK(o0, f24, f26, g3) 317426: FINISH_VISCHUNK(o0, f26, f28, g3) 318428: FINISH_VISCHUNK(o0, f28, f30, g3) 319430: UNEVEN_VISCHUNK(o0, f30, f0, g3) 320432: FINISH_VISCHUNK(o0, f32, f34, g3) 321434: FINISH_VISCHUNK(o0, f34, f36, g3) 322436: FINISH_VISCHUNK(o0, f36, f38, g3) 323438: FINISH_VISCHUNK(o0, f38, f40, g3) 324440: FINISH_VISCHUNK(o0, f40, f42, g3) 325442: FINISH_VISCHUNK(o0, f42, f44, g3) 326444: FINISH_VISCHUNK(o0, f44, f46, g3) 327446: UNEVEN_VISCHUNK(o0, f46, f0, g3) 328204: ldd [%o1], %f2 /* Load Group */ 329 add %o1, 8, %o1 /* IEU0 */ 330 subcc %g3, 8, %g3 /* IEU1 */ 331 faligndata %f0, %f2, %f8 /* GRU Group */ 332 std %f8, [%o0] /* Store */ 333 bl,pn %xcc, 205f /* CTI */ 334 add %o0, 8, %o0 /* IEU0 Group */ 335 ldd [%o1], %f0 /* Load Group */ 336 add %o1, 8, %o1 /* IEU0 */ 337 subcc %g3, 8, %g3 /* IEU1 */ 338 faligndata %f2, %f0, %f8 /* GRU Group */ 339 std %f8, [%o0] /* Store */ 340 bge,pt %xcc, 204b /* CTI */ 341 add %o0, 8, %o0 /* IEU0 Group */ 342205: brz,pt %o2, 207f /* CTI Group */ 343 mov %g1, %o1 /* IEU0 */ 344206: ldub [%o1], %g5 /* LOAD */ 345 add %o1, 1, %o1 /* IEU0 */ 346 add %o0, 1, %o0 /* IEU1 */ 347 subcc %o2, 1, %o2 /* IEU1 */ 348 bne,pt %xcc, 206b /* CTI */ 349 stb %g5, [%o0 - 1] /* Store Group */ 350207: membar #StoreLoad | #StoreStore /* LSU Group */ 351 wr %g0, FPRS_FEF, %fprs 352 retl 353 mov %g4, %o0 354 355208: andcc %o2, 1, %g0 /* IEU1 Group */ 356 be,pt %icc, 2f+4 /* CTI */ 3571: ldub [%o1], %g5 /* LOAD Group */ 358 add %o1, 1, %o1 /* IEU0 */ 359 add %o0, 1, %o0 /* IEU1 */ 360 subcc %o2, 1, %o2 /* IEU1 Group */ 361 be,pn %xcc, 209f /* CTI */ 362 stb %g5, [%o0 - 1] /* Store */ 3632: ldub [%o1], %g5 /* LOAD Group */ 364 add %o0, 2, %o0 /* IEU0 */ 365 ldub [%o1 + 1], %o5 /* LOAD Group */ 366 add %o1, 2, %o1 /* IEU0 */ 367 subcc %o2, 2, %o2 /* IEU1 Group */ 368 stb %g5, [%o0 - 2] /* Store */ 369 bne,pt %xcc, 2b /* CTI */ 370 stb %o5, [%o0 - 1] /* Store */ 371209: retl 372 mov %g4, %o0 373END(__memcpy_large) 374 375ENTRY(__mempcpy) 376 ba,pt %xcc, 210f 377 add %o0, %o2, %g4 378END(__mempcpy) 379 380 .align 32 381ENTRY(memcpy) 382 mov %o0, %g4 /* IEU0 Group */ 383210: 384#ifndef USE_BPR 385 srl %o2, 0, %o2 /* IEU1 */ 386#endif 387 brz,pn %o2, 209b /* CTI Group */ 388218: cmp %o2, 15 /* IEU1 */ 389 bleu,pn %xcc, 208b /* CTI Group */ 390 cmp %o2, (64 * 6) /* IEU1 */ 391 bgeu,pn %xcc, 200b /* CTI Group */ 392 andcc %o0, 7, %g2 /* IEU1 */ 393 sub %o0, %o1, %g5 /* IEU0 */ 394 andcc %g5, 3, %o5 /* IEU1 Group */ 395 bne,pn %xcc, 212f /* CTI */ 396 andcc %o1, 3, %g0 /* IEU1 Group */ 397 be,a,pt %xcc, 216f /* CTI */ 398 andcc %o1, 4, %g0 /* IEU1 Group */ 399 andcc %o1, 1, %g0 /* IEU1 Group */ 400 be,pn %xcc, 4f /* CTI */ 401 andcc %o1, 2, %g0 /* IEU1 Group */ 402 ldub [%o1], %g2 /* Load Group */ 403 add %o1, 1, %o1 /* IEU0 */ 404 add %o0, 1, %o0 /* IEU1 */ 405 sub %o2, 1, %o2 /* IEU0 Group */ 406 bne,pn %xcc, 5f /* CTI Group */ 407 stb %g2, [%o0 - 1] /* Store */ 4084: lduh [%o1], %g2 /* Load Group */ 409 add %o1, 2, %o1 /* IEU0 */ 410 add %o0, 2, %o0 /* IEU1 */ 411 sub %o2, 2, %o2 /* IEU0 */ 412 sth %g2, [%o0 - 2] /* Store Group + bubble */ 4135: andcc %o1, 4, %g0 /* IEU1 */ 414216: be,a,pn %xcc, 2f /* CTI */ 415 andcc %o2, -128, %g6 /* IEU1 Group */ 416 lduw [%o1], %g5 /* Load Group */ 417 add %o1, 4, %o1 /* IEU0 */ 418 add %o0, 4, %o0 /* IEU1 */ 419 sub %o2, 4, %o2 /* IEU0 Group */ 420 stw %g5, [%o0 - 4] /* Store */ 421 andcc %o2, -128, %g6 /* IEU1 Group */ 4222: be,pn %xcc, 215f /* CTI */ 423 andcc %o0, 4, %g0 /* IEU1 Group */ 424 be,pn %xcc, 82f + 4 /* CTI Group */ 4255: MOVE_BIGCHUNK(o1, o0, 0x00, g1, g3, g5, o5) 426 MOVE_BIGCHUNK(o1, o0, 0x20, g1, g3, g5, o5) 427 MOVE_BIGCHUNK(o1, o0, 0x40, g1, g3, g5, o5) 428 MOVE_BIGCHUNK(o1, o0, 0x60, g1, g3, g5, o5) 42935: subcc %g6, 128, %g6 /* IEU1 Group */ 430 add %o1, 128, %o1 /* IEU0 */ 431 bne,pt %xcc, 5b /* CTI */ 432 add %o0, 128, %o0 /* IEU0 Group */ 433215: andcc %o2, 0x70, %g6 /* IEU1 Group */ 43441: be,pn %xcc, 80f /* CTI */ 435 andcc %o2, 8, %g0 /* IEU1 Group */ 436 /* Clk1 8-( */ 437 /* Clk2 8-( */ 438 /* Clk3 8-( */ 439 /* Clk4 8-( */ 44079: rd %pc, %o5 /* PDU Group */ 441 sll %g6, 1, %g5 /* IEU0 Group */ 442 add %o1, %g6, %o1 /* IEU1 */ 443 sub %o5, %g5, %o5 /* IEU0 Group */ 444 jmpl %o5 + %lo(80f - 79b), %g0 /* CTI Group brk forced*/ 445 add %o0, %g6, %o0 /* IEU0 Group */ 44636: MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g5, o5) 447 MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g5, o5) 448 MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g5, o5) 449 MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g5, o5) 450 MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g5, o5) 451 MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g5, o5) 452 MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g5, o5) 45380: be,pt %xcc, 81f /* CTI */ 454 andcc %o2, 4, %g0 /* IEU1 */ 455 ldx [%o1], %g2 /* Load Group */ 456 add %o0, 8, %o0 /* IEU0 */ 457 stw %g2, [%o0 - 0x4] /* Store Group */ 458 add %o1, 8, %o1 /* IEU1 */ 459 srlx %g2, 32, %g2 /* IEU0 Group */ 460 stw %g2, [%o0 - 0x8] /* Store */ 46181: be,pt %xcc, 1f /* CTI */ 462 andcc %o2, 2, %g0 /* IEU1 Group */ 463 lduw [%o1], %g2 /* Load Group */ 464 add %o1, 4, %o1 /* IEU0 */ 465 stw %g2, [%o0] /* Store Group */ 466 add %o0, 4, %o0 /* IEU0 */ 4671: be,pt %xcc, 1f /* CTI */ 468 andcc %o2, 1, %g0 /* IEU1 Group */ 469 lduh [%o1], %g2 /* Load Group */ 470 add %o1, 2, %o1 /* IEU0 */ 471 sth %g2, [%o0] /* Store Group */ 472 add %o0, 2, %o0 /* IEU0 */ 4731: be,pt %xcc, 211f /* CTI */ 474 nop /* IEU1 */ 475 ldub [%o1], %g2 /* Load Group */ 476 stb %g2, [%o0] /* Store Group + bubble */ 477211: retl 478 mov %g4, %o0 479 48082: MOVE_BIGALIGNCHUNK(o1, o0, 0x00, g1, g3, g5, o5) 481 MOVE_BIGALIGNCHUNK(o1, o0, 0x40, g1, g3, g5, o5) 48237: subcc %g6, 128, %g6 /* IEU1 Group */ 483 add %o1, 128, %o1 /* IEU0 */ 484 bne,pt %xcc, 82b /* CTI */ 485 add %o0, 128, %o0 /* IEU0 Group */ 486 andcc %o2, 0x70, %g6 /* IEU1 */ 487 be,pn %xcc, 84f /* CTI */ 488 andcc %o2, 8, %g0 /* IEU1 Group */ 489 /* Clk1 8-( */ 490 /* Clk2 8-( */ 491 /* Clk3 8-( */ 492 /* Clk4 8-( */ 49383: rd %pc, %o5 /* PDU Group */ 494 add %o1, %g6, %o1 /* IEU0 Group */ 495 sub %o5, %g6, %o5 /* IEU1 */ 496 jmpl %o5 + %lo(84f - 83b), %g0 /* CTI Group brk forced*/ 497 add %o0, %g6, %o0 /* IEU0 Group */ 49838: MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3) 499 MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3) 500 MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3) 501 MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3) 502 MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3) 503 MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3) 504 MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3) 50584: be,pt %xcc, 85f /* CTI Group */ 506 andcc %o2, 4, %g0 /* IEU1 */ 507 ldx [%o1], %g2 /* Load Group */ 508 add %o0, 8, %o0 /* IEU0 */ 509 add %o1, 8, %o1 /* IEU0 Group */ 510 stx %g2, [%o0 - 0x8] /* Store */ 51185: be,pt %xcc, 1f /* CTI */ 512 andcc %o2, 2, %g0 /* IEU1 Group */ 513 lduw [%o1], %g2 /* Load Group */ 514 add %o0, 4, %o0 /* IEU0 */ 515 add %o1, 4, %o1 /* IEU0 Group */ 516 stw %g2, [%o0 - 0x4] /* Store */ 5171: be,pt %xcc, 1f /* CTI */ 518 andcc %o2, 1, %g0 /* IEU1 Group */ 519 lduh [%o1], %g2 /* Load Group */ 520 add %o0, 2, %o0 /* IEU0 */ 521 add %o1, 2, %o1 /* IEU0 Group */ 522 sth %g2, [%o0 - 0x2] /* Store */ 5231: be,pt %xcc, 1f /* CTI */ 524 nop /* IEU0 Group */ 525 ldub [%o1], %g2 /* Load Group */ 526 stb %g2, [%o0] /* Store Group + bubble */ 5271: retl 528 mov %g4, %o0 529 530212: brz,pt %g2, 2f /* CTI Group */ 531 mov 8, %g1 /* IEU0 */ 532 sub %g1, %g2, %g2 /* IEU0 Group */ 533 sub %o2, %g2, %o2 /* IEU0 Group */ 5341: ldub [%o1], %g5 /* Load Group */ 535 add %o1, 1, %o1 /* IEU0 */ 536 add %o0, 1, %o0 /* IEU1 */ 537 subcc %g2, 1, %g2 /* IEU1 Group */ 538 bne,pt %xcc, 1b /* CTI */ 539 stb %g5, [%o0 - 1] /* Store */ 5402: andn %o2, 7, %g5 /* IEU0 Group */ 541 and %o2, 7, %o2 /* IEU1 */ 542 fsrc2 %f0, %f2 /* FPU */ 543 alignaddr %o1, %g0, %g1 /* GRU Group */ 544 ldd [%g1], %f4 /* Load Group */ 5451: ldd [%g1 + 0x8], %f6 /* Load Group */ 546 add %g1, 0x8, %g1 /* IEU0 Group */ 547 subcc %g5, 8, %g5 /* IEU1 */ 548 faligndata %f4, %f6, %f0 /* GRU Group */ 549 std %f0, [%o0] /* Store */ 550 add %o1, 8, %o1 /* IEU0 Group */ 551 be,pn %xcc, 213f /* CTI */ 552 add %o0, 8, %o0 /* IEU1 */ 553 ldd [%g1 + 0x8], %f4 /* Load Group */ 554 add %g1, 8, %g1 /* IEU0 */ 555 subcc %g5, 8, %g5 /* IEU1 */ 556 faligndata %f6, %f4, %f0 /* GRU Group */ 557 std %f0, [%o0] /* Store */ 558 add %o1, 8, %o1 /* IEU0 */ 559 bne,pn %xcc, 1b /* CTI Group */ 560 add %o0, 8, %o0 /* IEU0 */ 561213: brz,pn %o2, 214f /* CTI Group */ 562 nop /* IEU0 */ 563 ldub [%o1], %g5 /* LOAD */ 564 add %o1, 1, %o1 /* IEU0 */ 565 add %o0, 1, %o0 /* IEU1 */ 566 subcc %o2, 1, %o2 /* IEU1 */ 567 bne,pt %xcc, 206b /* CTI */ 568 stb %g5, [%o0 - 1] /* Store Group */ 569214: wr %g0, FPRS_FEF, %fprs 570 retl 571 mov %g4, %o0 572END(memcpy) 573 574libc_hidden_builtin_def (memcpy) 575 576libc_hidden_def (__mempcpy) 577weak_alias (__mempcpy, mempcpy) 578libc_hidden_builtin_def (mempcpy) 579