1 /* mpn_mul_n -- Multiply two natural numbers of length n.
2
3 Copyright (C) 1991-2022 Free Software Foundation, Inc.
4
5 This file is part of the GNU MP Library.
6
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or (at your
10 option) any later version.
11
12 The GNU MP Library is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
15 License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with the GNU MP Library; see the file COPYING.LIB. If not, see
19 <https://www.gnu.org/licenses/>. */
20
21 #include <gmp.h>
22 #include "gmp-impl.h"
23
24 /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
25 both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are
26 always stored. Return the most significant limb.
27
28 Argument constraints:
29 1. PRODP != UP and PRODP != VP, i.e. the destination
30 must be distinct from the multiplier and the multiplicand. */
31
32 /* If KARATSUBA_THRESHOLD is not already defined, define it to a
33 value which is good on most machines. */
34 #ifndef KARATSUBA_THRESHOLD
35 #define KARATSUBA_THRESHOLD 32
36 #endif
37
38 /* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */
39 #if KARATSUBA_THRESHOLD < 2
40 #undef KARATSUBA_THRESHOLD
41 #define KARATSUBA_THRESHOLD 2
42 #endif
43
44 /* Handle simple cases with traditional multiplication.
45
46 This is the most critical code of multiplication. All multiplies rely
47 on this, both small and huge. Small ones arrive here immediately. Huge
48 ones arrive here as this is the base case for Karatsuba's recursive
49 algorithm below. */
50
51 void
impn_mul_n_basecase(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size)52 impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
53 {
54 mp_size_t i;
55 mp_limb_t cy_limb;
56 mp_limb_t v_limb;
57
58 /* Multiply by the first limb in V separately, as the result can be
59 stored (not added) to PROD. We also avoid a loop for zeroing. */
60 v_limb = vp[0];
61 if (v_limb <= 1)
62 {
63 if (v_limb == 1)
64 MPN_COPY (prodp, up, size);
65 else
66 MPN_ZERO (prodp, size);
67 cy_limb = 0;
68 }
69 else
70 cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
71
72 prodp[size] = cy_limb;
73 prodp++;
74
75 /* For each iteration in the outer loop, multiply one limb from
76 U with one limb from V, and add it to PROD. */
77 for (i = 1; i < size; i++)
78 {
79 v_limb = vp[i];
80 if (v_limb <= 1)
81 {
82 cy_limb = 0;
83 if (v_limb == 1)
84 cy_limb = mpn_add_n (prodp, prodp, up, size);
85 }
86 else
87 cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
88
89 prodp[size] = cy_limb;
90 prodp++;
91 }
92 }
93
94 void
impn_mul_n(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size,mp_ptr tspace)95 impn_mul_n (mp_ptr prodp,
96 mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
97 {
98 if ((size & 1) != 0)
99 {
100 /* The size is odd, the code code below doesn't handle that.
101 Multiply the least significant (size - 1) limbs with a recursive
102 call, and handle the most significant limb of S1 and S2
103 separately. */
104 /* A slightly faster way to do this would be to make the Karatsuba
105 code below behave as if the size were even, and let it check for
106 odd size in the end. I.e., in essence move this code to the end.
107 Doing so would save us a recursive call, and potentially make the
108 stack grow a lot less. */
109
110 mp_size_t esize = size - 1; /* even size */
111 mp_limb_t cy_limb;
112
113 MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
114 cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
115 prodp[esize + esize] = cy_limb;
116 cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
117
118 prodp[esize + size] = cy_limb;
119 }
120 else
121 {
122 /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
123
124 Split U in two pieces, U1 and U0, such that
125 U = U0 + U1*(B**n),
126 and V in V1 and V0, such that
127 V = V0 + V1*(B**n).
128
129 UV is then computed recursively using the identity
130
131 2n n n n
132 UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V
133 1 1 1 0 0 1 0 0
134
135 Where B = 2**BITS_PER_MP_LIMB. */
136
137 mp_size_t hsize = size >> 1;
138 mp_limb_t cy;
139 int negflg;
140
141 /*** Product H. ________________ ________________
142 |_____U1 x V1____||____U0 x V0_____| */
143 /* Put result in upper part of PROD and pass low part of TSPACE
144 as new TSPACE. */
145 MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
146
147 /*** Product M. ________________
148 |_(U1-U0)(V0-V1)_| */
149 if (mpn_cmp (up + hsize, up, hsize) >= 0)
150 {
151 mpn_sub_n (prodp, up + hsize, up, hsize);
152 negflg = 0;
153 }
154 else
155 {
156 mpn_sub_n (prodp, up, up + hsize, hsize);
157 negflg = 1;
158 }
159 if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
160 {
161 mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
162 negflg ^= 1;
163 }
164 else
165 {
166 mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
167 /* No change of NEGFLG. */
168 }
169 /* Read temporary operands from low part of PROD.
170 Put result in low part of TSPACE using upper part of TSPACE
171 as new TSPACE. */
172 MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
173
174 /*** Add/copy product H. */
175 MPN_COPY (prodp + hsize, prodp + size, hsize);
176 cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
177
178 /*** Add product M (if NEGFLG M is a negative number). */
179 if (negflg)
180 cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
181 else
182 cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
183
184 /*** Product L. ________________ ________________
185 |________________||____U0 x V0_____| */
186 /* Read temporary operands from low part of PROD.
187 Put result in low part of TSPACE using upper part of TSPACE
188 as new TSPACE. */
189 MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
190
191 /*** Add/copy Product L (twice). */
192
193 cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
194 if (cy)
195 mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
196
197 MPN_COPY (prodp, tspace, hsize);
198 cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
199 if (cy)
200 mpn_add_1 (prodp + size, prodp + size, size, 1);
201 }
202 }
203
204 void
impn_sqr_n_basecase(mp_ptr prodp,mp_srcptr up,mp_size_t size)205 impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
206 {
207 mp_size_t i;
208 mp_limb_t cy_limb;
209 mp_limb_t v_limb;
210
211 /* Multiply by the first limb in V separately, as the result can be
212 stored (not added) to PROD. We also avoid a loop for zeroing. */
213 v_limb = up[0];
214 if (v_limb <= 1)
215 {
216 if (v_limb == 1)
217 MPN_COPY (prodp, up, size);
218 else
219 MPN_ZERO (prodp, size);
220 cy_limb = 0;
221 }
222 else
223 cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
224
225 prodp[size] = cy_limb;
226 prodp++;
227
228 /* For each iteration in the outer loop, multiply one limb from
229 U with one limb from V, and add it to PROD. */
230 for (i = 1; i < size; i++)
231 {
232 v_limb = up[i];
233 if (v_limb <= 1)
234 {
235 cy_limb = 0;
236 if (v_limb == 1)
237 cy_limb = mpn_add_n (prodp, prodp, up, size);
238 }
239 else
240 cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
241
242 prodp[size] = cy_limb;
243 prodp++;
244 }
245 }
246
247 void
impn_sqr_n(mp_ptr prodp,mp_srcptr up,mp_size_t size,mp_ptr tspace)248 impn_sqr_n (mp_ptr prodp,
249 mp_srcptr up, mp_size_t size, mp_ptr tspace)
250 {
251 if ((size & 1) != 0)
252 {
253 /* The size is odd, the code code below doesn't handle that.
254 Multiply the least significant (size - 1) limbs with a recursive
255 call, and handle the most significant limb of S1 and S2
256 separately. */
257 /* A slightly faster way to do this would be to make the Karatsuba
258 code below behave as if the size were even, and let it check for
259 odd size in the end. I.e., in essence move this code to the end.
260 Doing so would save us a recursive call, and potentially make the
261 stack grow a lot less. */
262
263 mp_size_t esize = size - 1; /* even size */
264 mp_limb_t cy_limb;
265
266 MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
267 cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
268 prodp[esize + esize] = cy_limb;
269 cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]);
270
271 prodp[esize + size] = cy_limb;
272 }
273 else
274 {
275 mp_size_t hsize = size >> 1;
276 mp_limb_t cy;
277
278 /*** Product H. ________________ ________________
279 |_____U1 x U1____||____U0 x U0_____| */
280 /* Put result in upper part of PROD and pass low part of TSPACE
281 as new TSPACE. */
282 MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace);
283
284 /*** Product M. ________________
285 |_(U1-U0)(U0-U1)_| */
286 if (mpn_cmp (up + hsize, up, hsize) >= 0)
287 {
288 mpn_sub_n (prodp, up + hsize, up, hsize);
289 }
290 else
291 {
292 mpn_sub_n (prodp, up, up + hsize, hsize);
293 }
294
295 /* Read temporary operands from low part of PROD.
296 Put result in low part of TSPACE using upper part of TSPACE
297 as new TSPACE. */
298 MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size);
299
300 /*** Add/copy product H. */
301 MPN_COPY (prodp + hsize, prodp + size, hsize);
302 cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
303
304 /*** Add product M (if NEGFLG M is a negative number). */
305 cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
306
307 /*** Product L. ________________ ________________
308 |________________||____U0 x U0_____| */
309 /* Read temporary operands from low part of PROD.
310 Put result in low part of TSPACE using upper part of TSPACE
311 as new TSPACE. */
312 MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
313
314 /*** Add/copy Product L (twice). */
315
316 cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
317 if (cy)
318 mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
319
320 MPN_COPY (prodp, tspace, hsize);
321 cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
322 if (cy)
323 mpn_add_1 (prodp + size, prodp + size, size, 1);
324 }
325 }
326
327 /* This should be made into an inline function in gmp.h. */
328 void
mpn_mul_n(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size)329 mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
330 {
331 TMP_DECL (marker);
332 TMP_MARK (marker);
333 if (up == vp)
334 {
335 if (size < KARATSUBA_THRESHOLD)
336 {
337 impn_sqr_n_basecase (prodp, up, size);
338 }
339 else
340 {
341 mp_ptr tspace;
342 tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
343 impn_sqr_n (prodp, up, size, tspace);
344 }
345 }
346 else
347 {
348 if (size < KARATSUBA_THRESHOLD)
349 {
350 impn_mul_n_basecase (prodp, up, vp, size);
351 }
352 else
353 {
354 mp_ptr tspace;
355 tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
356 impn_mul_n (prodp, up, vp, size, tspace);
357 }
358 }
359 TMP_FREE (marker);
360 }
361