1 # Alpha EV5 __mpn_rshift -- 2 3 # Copyright (C) 1994-2022 Free Software Foundation, Inc. 4 5 # This file is part of the GNU MP Library. 6 7 # The GNU MP Library is free software; you can redistribute it and/or modify 8 # it under the terms of the GNU Lesser General Public License as published by 9 # the Free Software Foundation; either version 2.1 of the License, or (at your 10 # option) any later version. 11 12 # The GNU MP Library is distributed in the hope that it will be useful, but 13 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15 # License for more details. 16 17 # You should have received a copy of the GNU Lesser General Public License 18 # along with the GNU MP Library. If not, see <https://www.gnu.org/licenses/>. 19 20 21 # INPUT PARAMETERS 22 # res_ptr r16 23 # s1_ptr r17 24 # size r18 25 # cnt r19 26 27 # This code runs at 3.25 cycles/limb on the EV5. 28 29 .set noreorder 30 .set noat 31.text 32 .align 3 33 .globl __mpn_rshift 34 .ent __mpn_rshift 35__mpn_rshift: 36 .frame $30,0,$26,0 37 38 ldq $4,0($17) # load first limb 39 subq $31,$19,$20 40 subq $18,1,$18 41 and $18,4-1,$28 # number of limbs in first loop 42 sll $4,$20,$0 # compute function result 43 44 beq $28,.L0 45 subq $18,$28,$18 46 47 .align 3 48.Loop0: ldq $3,8($17) 49 addq $16,8,$16 50 srl $4,$19,$5 51 addq $17,8,$17 52 subq $28,1,$28 53 sll $3,$20,$6 54 or $3,$3,$4 55 or $5,$6,$8 56 stq $8,-8($16) 57 bne $28,.Loop0 58 59.L0: srl $4,$19,$24 60 beq $18,.Lend 61 # warm up phase 1 62 ldq $1,8($17) 63 subq $18,4,$18 64 ldq $2,16($17) 65 ldq $3,24($17) 66 ldq $4,32($17) 67 beq $18,.Lend1 68 # warm up phase 2 69 sll $1,$20,$7 70 srl $1,$19,$21 71 sll $2,$20,$8 72 ldq $1,40($17) 73 srl $2,$19,$22 74 ldq $2,48($17) 75 sll $3,$20,$5 76 or $7,$24,$7 77 srl $3,$19,$23 78 or $8,$21,$8 79 sll $4,$20,$6 80 ldq $3,56($17) 81 srl $4,$19,$24 82 ldq $4,64($17) 83 subq $18,4,$18 84 beq $18,.Lend2 85 .align 4 86 # main loop 87.Loop: stq $7,0($16) 88 or $5,$22,$5 89 stq $8,8($16) 90 or $6,$23,$6 91 92 sll $1,$20,$7 93 subq $18,4,$18 94 srl $1,$19,$21 95 unop # ldq $31,-96($17) 96 97 sll $2,$20,$8 98 ldq $1,72($17) 99 srl $2,$19,$22 100 ldq $2,80($17) 101 102 stq $5,16($16) 103 or $7,$24,$7 104 stq $6,24($16) 105 or $8,$21,$8 106 107 sll $3,$20,$5 108 unop # ldq $31,-96($17) 109 srl $3,$19,$23 110 addq $16,32,$16 111 112 sll $4,$20,$6 113 ldq $3,88($17) 114 srl $4,$19,$24 115 ldq $4,96($17) 116 117 addq $17,32,$17 118 bne $18,.Loop 119 # cool down phase 2/1 120.Lend2: stq $7,0($16) 121 or $5,$22,$5 122 stq $8,8($16) 123 or $6,$23,$6 124 sll $1,$20,$7 125 srl $1,$19,$21 126 sll $2,$20,$8 127 srl $2,$19,$22 128 stq $5,16($16) 129 or $7,$24,$7 130 stq $6,24($16) 131 or $8,$21,$8 132 sll $3,$20,$5 133 srl $3,$19,$23 134 sll $4,$20,$6 135 srl $4,$19,$24 136 # cool down phase 2/2 137 stq $7,32($16) 138 or $5,$22,$5 139 stq $8,40($16) 140 or $6,$23,$6 141 stq $5,48($16) 142 stq $6,56($16) 143 # cool down phase 2/3 144 stq $24,64($16) 145 ret $31,($26),1 146 147 # cool down phase 1/1 148.Lend1: sll $1,$20,$7 149 srl $1,$19,$21 150 sll $2,$20,$8 151 srl $2,$19,$22 152 sll $3,$20,$5 153 or $7,$24,$7 154 srl $3,$19,$23 155 or $8,$21,$8 156 sll $4,$20,$6 157 srl $4,$19,$24 158 # cool down phase 1/2 159 stq $7,0($16) 160 or $5,$22,$5 161 stq $8,8($16) 162 or $6,$23,$6 163 stq $5,16($16) 164 stq $6,24($16) 165 stq $24,32($16) 166 ret $31,($26),1 167 168.Lend: stq $24,0($16) 169 ret $31,($26),1 170 .end __mpn_rshift 171