1/* 2 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines 3 * 4 * Author: Nicolas Pitre <nico@fluxnic.net> 5 * - contributed to gcc-3.4 on Sep 30, 2003 6 * - adapted for the Linux kernel on Oct 2, 2003 7 */ 8 9/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. 10 11This file is free software; you can redistribute it and/or modify it 12under the terms of the GNU General Public License as published by the 13Free Software Foundation; either version 2, or (at your option) any 14later version. 15 16In addition to the permissions in the GNU General Public License, the 17Free Software Foundation gives you unlimited permission to link the 18compiled version of this file into combinations with other programs, 19and to distribute those combinations without any restriction coming 20from the use of this file. (The General Public License restrictions 21do apply in other respects; for example, they cover modification of 22the file, and distribution when not linked into a combine 23executable.) 24 25This file is distributed in the hope that it will be useful, but 26WITHOUT ANY WARRANTY; without even the implied warranty of 27MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 28General Public License for more details. 29 30You should have received a copy of the GNU General Public License 31along with this program; see the file COPYING. If not, write to 32the Free Software Foundation, 59 Temple Place - Suite 330, 33Boston, MA 02111-1307, USA. */ 34 35 36#include <linux/linkage.h> 37#include <asm/assembler.h> 38#include <asm/unwind.h> 39 40.macro ARM_DIV_BODY dividend, divisor, result, curbit 41 42#if __LINUX_ARM_ARCH__ >= 5 43 44 clz \curbit, \divisor 45 clz \result, \dividend 46 sub \result, \curbit, \result 47 mov \curbit, #1 48 mov \divisor, \divisor, lsl \result 49 mov \curbit, \curbit, lsl \result 50 mov \result, #0 51 52#else 53 54 @ Initially shift the divisor left 3 bits if possible, 55 @ set curbit accordingly. This allows for curbit to be located 56 @ at the left end of each 4 bit nibbles in the division loop 57 @ to save one loop in most cases. 58 tst \divisor, #0xe0000000 59 moveq \divisor, \divisor, lsl #3 60 moveq \curbit, #8 61 movne \curbit, #1 62 63 @ Unless the divisor is very big, shift it up in multiples of 64 @ four bits, since this is the amount of unwinding in the main 65 @ division loop. Continue shifting until the divisor is 66 @ larger than the dividend. 671: cmp \divisor, #0x10000000 68 cmplo \divisor, \dividend 69 movlo \divisor, \divisor, lsl #4 70 movlo \curbit, \curbit, lsl #4 71 blo 1b 72 73 @ For very big divisors, we must shift it a bit at a time, or 74 @ we will be in danger of overflowing. 751: cmp \divisor, #0x80000000 76 cmplo \divisor, \dividend 77 movlo \divisor, \divisor, lsl #1 78 movlo \curbit, \curbit, lsl #1 79 blo 1b 80 81 mov \result, #0 82 83#endif 84 85 @ Division loop 861: cmp \dividend, \divisor 87 subhs \dividend, \dividend, \divisor 88 orrhs \result, \result, \curbit 89 cmp \dividend, \divisor, lsr #1 90 subhs \dividend, \dividend, \divisor, lsr #1 91 orrhs \result, \result, \curbit, lsr #1 92 cmp \dividend, \divisor, lsr #2 93 subhs \dividend, \dividend, \divisor, lsr #2 94 orrhs \result, \result, \curbit, lsr #2 95 cmp \dividend, \divisor, lsr #3 96 subhs \dividend, \dividend, \divisor, lsr #3 97 orrhs \result, \result, \curbit, lsr #3 98 cmp \dividend, #0 @ Early termination? 99 movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? 100 movne \divisor, \divisor, lsr #4 101 bne 1b 102 103.endm 104 105 106.macro ARM_DIV2_ORDER divisor, order 107 108#if __LINUX_ARM_ARCH__ >= 5 109 110 clz \order, \divisor 111 rsb \order, \order, #31 112 113#else 114 115 cmp \divisor, #(1 << 16) 116 movhs \divisor, \divisor, lsr #16 117 movhs \order, #16 118 movlo \order, #0 119 120 cmp \divisor, #(1 << 8) 121 movhs \divisor, \divisor, lsr #8 122 addhs \order, \order, #8 123 124 cmp \divisor, #(1 << 4) 125 movhs \divisor, \divisor, lsr #4 126 addhs \order, \order, #4 127 128 cmp \divisor, #(1 << 2) 129 addhi \order, \order, #3 130 addls \order, \order, \divisor, lsr #1 131 132#endif 133 134.endm 135 136 137.macro ARM_MOD_BODY dividend, divisor, order, spare 138 139#if __LINUX_ARM_ARCH__ >= 5 140 141 clz \order, \divisor 142 clz \spare, \dividend 143 sub \order, \order, \spare 144 mov \divisor, \divisor, lsl \order 145 146#else 147 148 mov \order, #0 149 150 @ Unless the divisor is very big, shift it up in multiples of 151 @ four bits, since this is the amount of unwinding in the main 152 @ division loop. Continue shifting until the divisor is 153 @ larger than the dividend. 1541: cmp \divisor, #0x10000000 155 cmplo \divisor, \dividend 156 movlo \divisor, \divisor, lsl #4 157 addlo \order, \order, #4 158 blo 1b 159 160 @ For very big divisors, we must shift it a bit at a time, or 161 @ we will be in danger of overflowing. 1621: cmp \divisor, #0x80000000 163 cmplo \divisor, \dividend 164 movlo \divisor, \divisor, lsl #1 165 addlo \order, \order, #1 166 blo 1b 167 168#endif 169 170 @ Perform all needed substractions to keep only the reminder. 171 @ Do comparisons in batch of 4 first. 172 subs \order, \order, #3 @ yes, 3 is intended here 173 blt 2f 174 1751: cmp \dividend, \divisor 176 subhs \dividend, \dividend, \divisor 177 cmp \dividend, \divisor, lsr #1 178 subhs \dividend, \dividend, \divisor, lsr #1 179 cmp \dividend, \divisor, lsr #2 180 subhs \dividend, \dividend, \divisor, lsr #2 181 cmp \dividend, \divisor, lsr #3 182 subhs \dividend, \dividend, \divisor, lsr #3 183 cmp \dividend, #1 184 mov \divisor, \divisor, lsr #4 185 subges \order, \order, #4 186 bge 1b 187 188 tst \order, #3 189 teqne \dividend, #0 190 beq 5f 191 192 @ Either 1, 2 or 3 comparison/substractions are left. 1932: cmn \order, #2 194 blt 4f 195 beq 3f 196 cmp \dividend, \divisor 197 subhs \dividend, \dividend, \divisor 198 mov \divisor, \divisor, lsr #1 1993: cmp \dividend, \divisor 200 subhs \dividend, \dividend, \divisor 201 mov \divisor, \divisor, lsr #1 2024: cmp \dividend, \divisor 203 subhs \dividend, \dividend, \divisor 2045: 205.endm 206 207 208ENTRY(__udivsi3) 209ENTRY(__aeabi_uidiv) 210UNWIND(.fnstart) 211 212 subs r2, r1, #1 213 moveq pc, lr 214 bcc Ldiv0 215 cmp r0, r1 216 bls 11f 217 tst r1, r2 218 beq 12f 219 220 ARM_DIV_BODY r0, r1, r2, r3 221 222 mov r0, r2 223 mov pc, lr 224 22511: moveq r0, #1 226 movne r0, #0 227 mov pc, lr 228 22912: ARM_DIV2_ORDER r1, r2 230 231 mov r0, r0, lsr r2 232 mov pc, lr 233 234UNWIND(.fnend) 235ENDPROC(__udivsi3) 236ENDPROC(__aeabi_uidiv) 237 238ENTRY(__umodsi3) 239UNWIND(.fnstart) 240 241 subs r2, r1, #1 @ compare divisor with 1 242 bcc Ldiv0 243 cmpne r0, r1 @ compare dividend with divisor 244 moveq r0, #0 245 tsthi r1, r2 @ see if divisor is power of 2 246 andeq r0, r0, r2 247 movls pc, lr 248 249 ARM_MOD_BODY r0, r1, r2, r3 250 251 mov pc, lr 252 253UNWIND(.fnend) 254ENDPROC(__umodsi3) 255 256ENTRY(__divsi3) 257ENTRY(__aeabi_idiv) 258UNWIND(.fnstart) 259 260 cmp r1, #0 261 eor ip, r0, r1 @ save the sign of the result. 262 beq Ldiv0 263 rsbmi r1, r1, #0 @ loops below use unsigned. 264 subs r2, r1, #1 @ division by 1 or -1 ? 265 beq 10f 266 movs r3, r0 267 rsbmi r3, r0, #0 @ positive dividend value 268 cmp r3, r1 269 bls 11f 270 tst r1, r2 @ divisor is power of 2 ? 271 beq 12f 272 273 ARM_DIV_BODY r3, r1, r0, r2 274 275 cmp ip, #0 276 rsbmi r0, r0, #0 277 mov pc, lr 278 27910: teq ip, r0 @ same sign ? 280 rsbmi r0, r0, #0 281 mov pc, lr 282 28311: movlo r0, #0 284 moveq r0, ip, asr #31 285 orreq r0, r0, #1 286 mov pc, lr 287 28812: ARM_DIV2_ORDER r1, r2 289 290 cmp ip, #0 291 mov r0, r3, lsr r2 292 rsbmi r0, r0, #0 293 mov pc, lr 294 295UNWIND(.fnend) 296ENDPROC(__divsi3) 297ENDPROC(__aeabi_idiv) 298 299ENTRY(__modsi3) 300UNWIND(.fnstart) 301 302 cmp r1, #0 303 beq Ldiv0 304 rsbmi r1, r1, #0 @ loops below use unsigned. 305 movs ip, r0 @ preserve sign of dividend 306 rsbmi r0, r0, #0 @ if negative make positive 307 subs r2, r1, #1 @ compare divisor with 1 308 cmpne r0, r1 @ compare dividend with divisor 309 moveq r0, #0 310 tsthi r1, r2 @ see if divisor is power of 2 311 andeq r0, r0, r2 312 bls 10f 313 314 ARM_MOD_BODY r0, r1, r2, r3 315 31610: cmp ip, #0 317 rsbmi r0, r0, #0 318 mov pc, lr 319 320UNWIND(.fnend) 321ENDPROC(__modsi3) 322 323#ifdef CONFIG_AEABI 324 325ENTRY(__aeabi_uidivmod) 326UNWIND(.fnstart) 327UNWIND(.save {r0, r1, ip, lr} ) 328 329 stmfd sp!, {r0, r1, ip, lr} 330 bl __aeabi_uidiv 331 ldmfd sp!, {r1, r2, ip, lr} 332 mul r3, r0, r2 333 sub r1, r1, r3 334 mov pc, lr 335 336UNWIND(.fnend) 337ENDPROC(__aeabi_uidivmod) 338 339ENTRY(__aeabi_idivmod) 340UNWIND(.fnstart) 341UNWIND(.save {r0, r1, ip, lr} ) 342 stmfd sp!, {r0, r1, ip, lr} 343 bl __aeabi_idiv 344 ldmfd sp!, {r1, r2, ip, lr} 345 mul r3, r0, r2 346 sub r1, r1, r3 347 mov pc, lr 348 349UNWIND(.fnend) 350ENDPROC(__aeabi_idivmod) 351 352#endif 353 354Ldiv0: 355UNWIND(.fnstart) 356UNWIND(.pad #4) 357UNWIND(.save {lr}) 358 str lr, [sp, #-8]! 359 bl __div0 360 mov r0, #0 @ About as wrong as it could be. 361 ldr pc, [sp], #8 362UNWIND(.fnend) 363ENDPROC(Ldiv0) 364