1/* $Id: udiv.S,v 1.4 1996/09/30 02:22:38 davem Exp $ 2 * udiv.S: This routine was taken from glibc-1.09 and is covered 3 * by the GNU Library General Public License Version 2. 4 */ 5 6 7/* This file is generated from divrem.m4; DO NOT EDIT! */ 8/* 9 * Division and remainder, from Appendix E of the Sparc Version 8 10 * Architecture Manual, with fixes from Gordon Irlam. 11 */ 12 13/* 14 * Input: dividend and divisor in %o0 and %o1 respectively. 15 * 16 * m4 parameters: 17 * .udiv name of function to generate 18 * div div=div => %o0 / %o1; div=rem => %o0 % %o1 19 * false false=true => signed; false=false => unsigned 20 * 21 * Algorithm parameters: 22 * N how many bits per iteration we try to get (4) 23 * WORDSIZE total number of bits (32) 24 * 25 * Derived constants: 26 * TOPBITS number of bits in the top decade of a number 27 * 28 * Important variables: 29 * Q the partial quotient under development (initially 0) 30 * R the remainder so far, initially the dividend 31 * ITER number of main division loop iterations required; 32 * equal to ceil(log2(quotient) / N). Note that this 33 * is the log base (2^N) of the quotient. 34 * V the current comparand, initially divisor*2^(ITER*N-1) 35 * 36 * Cost: 37 * Current estimate for non-large dividend is 38 * ceil(log2(quotient) / N) * (10 + 7N/2) + C 39 * A large dividend is one greater than 2^(31-TOPBITS) and takes a 40 * different path, as the upper bits of the quotient must be developed 41 * one bit at a time. 42 */ 43 44 45 .globl .udiv 46.udiv: 47 48 ! Ready to divide. Compute size of quotient; scale comparand. 49 orcc %o1, %g0, %o5 50 bne 1f 51 mov %o0, %o3 52 53 ! Divide by zero trap. If it returns, return 0 (about as 54 ! wrong as possible, but that is what SunOS does...). 55 ta ST_DIV0 56 retl 57 clr %o0 58 591: 60 cmp %o3, %o5 ! if %o1 exceeds %o0, done 61 blu Lgot_result ! (and algorithm fails otherwise) 62 clr %o2 63 64 sethi %hi(1 << (32 - 4 - 1)), %g1 65 66 cmp %o3, %g1 67 blu Lnot_really_big 68 clr %o4 69 70 ! Here the dividend is >= 2**(31-N) or so. We must be careful here, 71 ! as our usual N-at-a-shot divide step will cause overflow and havoc. 72 ! The number of bits in the result here is N*ITER+SC, where SC <= N. 73 ! Compute ITER in an unorthodox manner: know we need to shift V into 74 ! the top decade: so do not even bother to compare to R. 75 1: 76 cmp %o5, %g1 77 bgeu 3f 78 mov 1, %g7 79 80 sll %o5, 4, %o5 81 82 b 1b 83 add %o4, 1, %o4 84 85 ! Now compute %g7. 86 2: 87 addcc %o5, %o5, %o5 88 bcc Lnot_too_big 89 add %g7, 1, %g7 90 91 ! We get here if the %o1 overflowed while shifting. 92 ! This means that %o3 has the high-order bit set. 93 ! Restore %o5 and subtract from %o3. 94 sll %g1, 4, %g1 ! high order bit 95 srl %o5, 1, %o5 ! rest of %o5 96 add %o5, %g1, %o5 97 98 b Ldo_single_div 99 sub %g7, 1, %g7 100 101 Lnot_too_big: 102 3: 103 cmp %o5, %o3 104 blu 2b 105 nop 106 107 be Ldo_single_div 108 nop 109 /* NB: these are commented out in the V8-Sparc manual as well */ 110 /* (I do not understand this) */ 111 ! %o5 > %o3: went too far: back up 1 step 112 ! srl %o5, 1, %o5 113 ! dec %g7 114 ! do single-bit divide steps 115 ! 116 ! We have to be careful here. We know that %o3 >= %o5, so we can do the 117 ! first divide step without thinking. BUT, the others are conditional, 118 ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- 119 ! order bit set in the first step, just falling into the regular 120 ! division loop will mess up the first time around. 121 ! So we unroll slightly... 122 Ldo_single_div: 123 subcc %g7, 1, %g7 124 bl Lend_regular_divide 125 nop 126 127 sub %o3, %o5, %o3 128 mov 1, %o2 129 130 b Lend_single_divloop 131 nop 132 Lsingle_divloop: 133 sll %o2, 1, %o2 134 bl 1f 135 srl %o5, 1, %o5 136 ! %o3 >= 0 137 sub %o3, %o5, %o3 138 b 2f 139 add %o2, 1, %o2 140 1: ! %o3 < 0 141 add %o3, %o5, %o3 142 sub %o2, 1, %o2 143 2: 144 Lend_single_divloop: 145 subcc %g7, 1, %g7 146 bge Lsingle_divloop 147 tst %o3 148 149 b,a Lend_regular_divide 150 151Lnot_really_big: 1521: 153 sll %o5, 4, %o5 154 155 cmp %o5, %o3 156 bleu 1b 157 addcc %o4, 1, %o4 158 159 be Lgot_result 160 sub %o4, 1, %o4 161 162 tst %o3 ! set up for initial iteration 163Ldivloop: 164 sll %o2, 4, %o2 165 ! depth 1, accumulated bits 0 166 bl L.1.16 167 srl %o5,1,%o5 168 ! remainder is positive 169 subcc %o3,%o5,%o3 170 ! depth 2, accumulated bits 1 171 bl L.2.17 172 srl %o5,1,%o5 173 ! remainder is positive 174 subcc %o3,%o5,%o3 175 ! depth 3, accumulated bits 3 176 bl L.3.19 177 srl %o5,1,%o5 178 ! remainder is positive 179 subcc %o3,%o5,%o3 180 ! depth 4, accumulated bits 7 181 bl L.4.23 182 srl %o5,1,%o5 183 ! remainder is positive 184 subcc %o3,%o5,%o3 185 b 9f 186 add %o2, (7*2+1), %o2 187 188L.4.23: 189 ! remainder is negative 190 addcc %o3,%o5,%o3 191 b 9f 192 add %o2, (7*2-1), %o2 193 194L.3.19: 195 ! remainder is negative 196 addcc %o3,%o5,%o3 197 ! depth 4, accumulated bits 5 198 bl L.4.21 199 srl %o5,1,%o5 200 ! remainder is positive 201 subcc %o3,%o5,%o3 202 b 9f 203 add %o2, (5*2+1), %o2 204 205L.4.21: 206 ! remainder is negative 207 addcc %o3,%o5,%o3 208 b 9f 209 add %o2, (5*2-1), %o2 210 211L.2.17: 212 ! remainder is negative 213 addcc %o3,%o5,%o3 214 ! depth 3, accumulated bits 1 215 bl L.3.17 216 srl %o5,1,%o5 217 ! remainder is positive 218 subcc %o3,%o5,%o3 219 ! depth 4, accumulated bits 3 220 bl L.4.19 221 srl %o5,1,%o5 222 ! remainder is positive 223 subcc %o3,%o5,%o3 224 b 9f 225 add %o2, (3*2+1), %o2 226 227L.4.19: 228 ! remainder is negative 229 addcc %o3,%o5,%o3 230 b 9f 231 add %o2, (3*2-1), %o2 232 233L.3.17: 234 ! remainder is negative 235 addcc %o3,%o5,%o3 236 ! depth 4, accumulated bits 1 237 bl L.4.17 238 srl %o5,1,%o5 239 ! remainder is positive 240 subcc %o3,%o5,%o3 241 b 9f 242 add %o2, (1*2+1), %o2 243 244L.4.17: 245 ! remainder is negative 246 addcc %o3,%o5,%o3 247 b 9f 248 add %o2, (1*2-1), %o2 249 250L.1.16: 251 ! remainder is negative 252 addcc %o3,%o5,%o3 253 ! depth 2, accumulated bits -1 254 bl L.2.15 255 srl %o5,1,%o5 256 ! remainder is positive 257 subcc %o3,%o5,%o3 258 ! depth 3, accumulated bits -1 259 bl L.3.15 260 srl %o5,1,%o5 261 ! remainder is positive 262 subcc %o3,%o5,%o3 263 ! depth 4, accumulated bits -1 264 bl L.4.15 265 srl %o5,1,%o5 266 ! remainder is positive 267 subcc %o3,%o5,%o3 268 b 9f 269 add %o2, (-1*2+1), %o2 270 271L.4.15: 272 ! remainder is negative 273 addcc %o3,%o5,%o3 274 b 9f 275 add %o2, (-1*2-1), %o2 276 277L.3.15: 278 ! remainder is negative 279 addcc %o3,%o5,%o3 280 ! depth 4, accumulated bits -3 281 bl L.4.13 282 srl %o5,1,%o5 283 ! remainder is positive 284 subcc %o3,%o5,%o3 285 b 9f 286 add %o2, (-3*2+1), %o2 287 288L.4.13: 289 ! remainder is negative 290 addcc %o3,%o5,%o3 291 b 9f 292 add %o2, (-3*2-1), %o2 293 294L.2.15: 295 ! remainder is negative 296 addcc %o3,%o5,%o3 297 ! depth 3, accumulated bits -3 298 bl L.3.13 299 srl %o5,1,%o5 300 ! remainder is positive 301 subcc %o3,%o5,%o3 302 ! depth 4, accumulated bits -5 303 bl L.4.11 304 srl %o5,1,%o5 305 ! remainder is positive 306 subcc %o3,%o5,%o3 307 b 9f 308 add %o2, (-5*2+1), %o2 309 310L.4.11: 311 ! remainder is negative 312 addcc %o3,%o5,%o3 313 b 9f 314 add %o2, (-5*2-1), %o2 315 316L.3.13: 317 ! remainder is negative 318 addcc %o3,%o5,%o3 319 ! depth 4, accumulated bits -7 320 bl L.4.9 321 srl %o5,1,%o5 322 ! remainder is positive 323 subcc %o3,%o5,%o3 324 b 9f 325 add %o2, (-7*2+1), %o2 326 327L.4.9: 328 ! remainder is negative 329 addcc %o3,%o5,%o3 330 b 9f 331 add %o2, (-7*2-1), %o2 332 333 9: 334Lend_regular_divide: 335 subcc %o4, 1, %o4 336 bge Ldivloop 337 tst %o3 338 339 bl,a Lgot_result 340 ! non-restoring fixup here (one instruction only!) 341 sub %o2, 1, %o2 342 343Lgot_result: 344 345 retl 346 mov %o2, %o0 347 348 .globl .udiv_patch 349.udiv_patch: 350 wr %g0, 0x0, %y 351 nop 352 nop 353 retl 354 udiv %o0, %o1, %o0 355 nop 356