1 /* mpn_mul_n -- Multiply two natural numbers of length n.
2 
3 Copyright (C) 1991-2022 Free Software Foundation, Inc.
4 
5 This file is part of the GNU MP Library.
6 
7 The GNU MP Library is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or (at your
10 option) any later version.
11 
12 The GNU MP Library is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15 License for more details.
16 
17 You should have received a copy of the GNU Lesser General Public License
18 along with the GNU MP Library; see the file COPYING.LIB.  If not, see
19 <https://www.gnu.org/licenses/>.  */
20 
21 #include <gmp.h>
22 #include "gmp-impl.h"
23 
24 /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
25    both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
26    always stored.  Return the most significant limb.
27 
28    Argument constraints:
29    1. PRODP != UP and PRODP != VP, i.e. the destination
30       must be distinct from the multiplier and the multiplicand.  */
31 
32 /* If KARATSUBA_THRESHOLD is not already defined, define it to a
33    value which is good on most machines.  */
34 #ifndef KARATSUBA_THRESHOLD
35 #define KARATSUBA_THRESHOLD 32
36 #endif
37 
38 /* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */
39 #if KARATSUBA_THRESHOLD < 2
40 #undef KARATSUBA_THRESHOLD
41 #define KARATSUBA_THRESHOLD 2
42 #endif
43 
44 /* Handle simple cases with traditional multiplication.
45 
46    This is the most critical code of multiplication.  All multiplies rely
47    on this, both small and huge.  Small ones arrive here immediately.  Huge
48    ones arrive here as this is the base case for Karatsuba's recursive
49    algorithm below.  */
50 
51 void
impn_mul_n_basecase(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size)52 impn_mul_n_basecase (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
53 {
54   mp_size_t i;
55   mp_limb_t cy_limb;
56   mp_limb_t v_limb;
57 
58   /* Multiply by the first limb in V separately, as the result can be
59      stored (not added) to PROD.  We also avoid a loop for zeroing.  */
60   v_limb = vp[0];
61   if (v_limb <= 1)
62     {
63       if (v_limb == 1)
64 	MPN_COPY (prodp, up, size);
65       else
66 	MPN_ZERO (prodp, size);
67       cy_limb = 0;
68     }
69   else
70     cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
71 
72   prodp[size] = cy_limb;
73   prodp++;
74 
75   /* For each iteration in the outer loop, multiply one limb from
76      U with one limb from V, and add it to PROD.  */
77   for (i = 1; i < size; i++)
78     {
79       v_limb = vp[i];
80       if (v_limb <= 1)
81 	{
82 	  cy_limb = 0;
83 	  if (v_limb == 1)
84 	    cy_limb = mpn_add_n (prodp, prodp, up, size);
85 	}
86       else
87 	cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
88 
89       prodp[size] = cy_limb;
90       prodp++;
91     }
92 }
93 
94 void
impn_mul_n(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size,mp_ptr tspace)95 impn_mul_n (mp_ptr prodp,
96 	     mp_srcptr up, mp_srcptr vp, mp_size_t size, mp_ptr tspace)
97 {
98   if ((size & 1) != 0)
99     {
100       /* The size is odd, the code code below doesn't handle that.
101 	 Multiply the least significant (size - 1) limbs with a recursive
102 	 call, and handle the most significant limb of S1 and S2
103 	 separately.  */
104       /* A slightly faster way to do this would be to make the Karatsuba
105 	 code below behave as if the size were even, and let it check for
106 	 odd size in the end.  I.e., in essence move this code to the end.
107 	 Doing so would save us a recursive call, and potentially make the
108 	 stack grow a lot less.  */
109 
110       mp_size_t esize = size - 1;	/* even size */
111       mp_limb_t cy_limb;
112 
113       MPN_MUL_N_RECURSE (prodp, up, vp, esize, tspace);
114       cy_limb = mpn_addmul_1 (prodp + esize, up, esize, vp[esize]);
115       prodp[esize + esize] = cy_limb;
116       cy_limb = mpn_addmul_1 (prodp + esize, vp, size, up[esize]);
117 
118       prodp[esize + size] = cy_limb;
119     }
120   else
121     {
122       /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
123 
124 	 Split U in two pieces, U1 and U0, such that
125 	 U = U0 + U1*(B**n),
126 	 and V in V1 and V0, such that
127 	 V = V0 + V1*(B**n).
128 
129 	 UV is then computed recursively using the identity
130 
131 		2n   n          n                     n
132 	 UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
133 			1 1        1  0   0  1              0 0
134 
135 	 Where B = 2**BITS_PER_MP_LIMB.  */
136 
137       mp_size_t hsize = size >> 1;
138       mp_limb_t cy;
139       int negflg;
140 
141       /*** Product H.	 ________________  ________________
142 			|_____U1 x V1____||____U0 x V0_____|  */
143       /* Put result in upper part of PROD and pass low part of TSPACE
144 	 as new TSPACE.  */
145       MPN_MUL_N_RECURSE (prodp + size, up + hsize, vp + hsize, hsize, tspace);
146 
147       /*** Product M.	 ________________
148 			|_(U1-U0)(V0-V1)_|  */
149       if (mpn_cmp (up + hsize, up, hsize) >= 0)
150 	{
151 	  mpn_sub_n (prodp, up + hsize, up, hsize);
152 	  negflg = 0;
153 	}
154       else
155 	{
156 	  mpn_sub_n (prodp, up, up + hsize, hsize);
157 	  negflg = 1;
158 	}
159       if (mpn_cmp (vp + hsize, vp, hsize) >= 0)
160 	{
161 	  mpn_sub_n (prodp + hsize, vp + hsize, vp, hsize);
162 	  negflg ^= 1;
163 	}
164       else
165 	{
166 	  mpn_sub_n (prodp + hsize, vp, vp + hsize, hsize);
167 	  /* No change of NEGFLG.  */
168 	}
169       /* Read temporary operands from low part of PROD.
170 	 Put result in low part of TSPACE using upper part of TSPACE
171 	 as new TSPACE.  */
172       MPN_MUL_N_RECURSE (tspace, prodp, prodp + hsize, hsize, tspace + size);
173 
174       /*** Add/copy product H.  */
175       MPN_COPY (prodp + hsize, prodp + size, hsize);
176       cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
177 
178       /*** Add product M (if NEGFLG M is a negative number).  */
179       if (negflg)
180 	cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
181       else
182 	cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
183 
184       /*** Product L.	 ________________  ________________
185 			|________________||____U0 x V0_____|  */
186       /* Read temporary operands from low part of PROD.
187 	 Put result in low part of TSPACE using upper part of TSPACE
188 	 as new TSPACE.  */
189       MPN_MUL_N_RECURSE (tspace, up, vp, hsize, tspace + size);
190 
191       /*** Add/copy Product L (twice).  */
192 
193       cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
194       if (cy)
195 	mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
196 
197       MPN_COPY (prodp, tspace, hsize);
198       cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
199       if (cy)
200 	mpn_add_1 (prodp + size, prodp + size, size, 1);
201     }
202 }
203 
204 void
impn_sqr_n_basecase(mp_ptr prodp,mp_srcptr up,mp_size_t size)205 impn_sqr_n_basecase (mp_ptr prodp, mp_srcptr up, mp_size_t size)
206 {
207   mp_size_t i;
208   mp_limb_t cy_limb;
209   mp_limb_t v_limb;
210 
211   /* Multiply by the first limb in V separately, as the result can be
212      stored (not added) to PROD.  We also avoid a loop for zeroing.  */
213   v_limb = up[0];
214   if (v_limb <= 1)
215     {
216       if (v_limb == 1)
217 	MPN_COPY (prodp, up, size);
218       else
219 	MPN_ZERO (prodp, size);
220       cy_limb = 0;
221     }
222   else
223     cy_limb = mpn_mul_1 (prodp, up, size, v_limb);
224 
225   prodp[size] = cy_limb;
226   prodp++;
227 
228   /* For each iteration in the outer loop, multiply one limb from
229      U with one limb from V, and add it to PROD.  */
230   for (i = 1; i < size; i++)
231     {
232       v_limb = up[i];
233       if (v_limb <= 1)
234 	{
235 	  cy_limb = 0;
236 	  if (v_limb == 1)
237 	    cy_limb = mpn_add_n (prodp, prodp, up, size);
238 	}
239       else
240 	cy_limb = mpn_addmul_1 (prodp, up, size, v_limb);
241 
242       prodp[size] = cy_limb;
243       prodp++;
244     }
245 }
246 
247 void
impn_sqr_n(mp_ptr prodp,mp_srcptr up,mp_size_t size,mp_ptr tspace)248 impn_sqr_n (mp_ptr prodp,
249 	     mp_srcptr up, mp_size_t size, mp_ptr tspace)
250 {
251   if ((size & 1) != 0)
252     {
253       /* The size is odd, the code code below doesn't handle that.
254 	 Multiply the least significant (size - 1) limbs with a recursive
255 	 call, and handle the most significant limb of S1 and S2
256 	 separately.  */
257       /* A slightly faster way to do this would be to make the Karatsuba
258 	 code below behave as if the size were even, and let it check for
259 	 odd size in the end.  I.e., in essence move this code to the end.
260 	 Doing so would save us a recursive call, and potentially make the
261 	 stack grow a lot less.  */
262 
263       mp_size_t esize = size - 1;	/* even size */
264       mp_limb_t cy_limb;
265 
266       MPN_SQR_N_RECURSE (prodp, up, esize, tspace);
267       cy_limb = mpn_addmul_1 (prodp + esize, up, esize, up[esize]);
268       prodp[esize + esize] = cy_limb;
269       cy_limb = mpn_addmul_1 (prodp + esize, up, size, up[esize]);
270 
271       prodp[esize + size] = cy_limb;
272     }
273   else
274     {
275       mp_size_t hsize = size >> 1;
276       mp_limb_t cy;
277 
278       /*** Product H.	 ________________  ________________
279 			|_____U1 x U1____||____U0 x U0_____|  */
280       /* Put result in upper part of PROD and pass low part of TSPACE
281 	 as new TSPACE.  */
282       MPN_SQR_N_RECURSE (prodp + size, up + hsize, hsize, tspace);
283 
284       /*** Product M.	 ________________
285 			|_(U1-U0)(U0-U1)_|  */
286       if (mpn_cmp (up + hsize, up, hsize) >= 0)
287 	{
288 	  mpn_sub_n (prodp, up + hsize, up, hsize);
289 	}
290       else
291 	{
292 	  mpn_sub_n (prodp, up, up + hsize, hsize);
293 	}
294 
295       /* Read temporary operands from low part of PROD.
296 	 Put result in low part of TSPACE using upper part of TSPACE
297 	 as new TSPACE.  */
298       MPN_SQR_N_RECURSE (tspace, prodp, hsize, tspace + size);
299 
300       /*** Add/copy product H.  */
301       MPN_COPY (prodp + hsize, prodp + size, hsize);
302       cy = mpn_add_n (prodp + size, prodp + size, prodp + size + hsize, hsize);
303 
304       /*** Add product M (if NEGFLG M is a negative number).  */
305       cy -= mpn_sub_n (prodp + hsize, prodp + hsize, tspace, size);
306 
307       /*** Product L.	 ________________  ________________
308 			|________________||____U0 x U0_____|  */
309       /* Read temporary operands from low part of PROD.
310 	 Put result in low part of TSPACE using upper part of TSPACE
311 	 as new TSPACE.  */
312       MPN_SQR_N_RECURSE (tspace, up, hsize, tspace + size);
313 
314       /*** Add/copy Product L (twice).  */
315 
316       cy += mpn_add_n (prodp + hsize, prodp + hsize, tspace, size);
317       if (cy)
318 	mpn_add_1 (prodp + hsize + size, prodp + hsize + size, hsize, cy);
319 
320       MPN_COPY (prodp, tspace, hsize);
321       cy = mpn_add_n (prodp + hsize, prodp + hsize, tspace + hsize, hsize);
322       if (cy)
323 	mpn_add_1 (prodp + size, prodp + size, size, 1);
324     }
325 }
326 
327 /* This should be made into an inline function in gmp.h.  */
328 void
mpn_mul_n(mp_ptr prodp,mp_srcptr up,mp_srcptr vp,mp_size_t size)329 mpn_mul_n (mp_ptr prodp, mp_srcptr up, mp_srcptr vp, mp_size_t size)
330 {
331   TMP_DECL (marker);
332   TMP_MARK (marker);
333   if (up == vp)
334     {
335       if (size < KARATSUBA_THRESHOLD)
336 	{
337 	  impn_sqr_n_basecase (prodp, up, size);
338 	}
339       else
340 	{
341 	  mp_ptr tspace;
342 	  tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
343 	  impn_sqr_n (prodp, up, size, tspace);
344 	}
345     }
346   else
347     {
348       if (size < KARATSUBA_THRESHOLD)
349 	{
350 	  impn_mul_n_basecase (prodp, up, vp, size);
351 	}
352       else
353 	{
354 	  mp_ptr tspace;
355 	  tspace = (mp_ptr) TMP_ALLOC (2 * size * BYTES_PER_MP_LIMB);
356 	  impn_mul_n (prodp, up, vp, size, tspace);
357 	}
358     }
359   TMP_FREE (marker);
360 }
361