1 /*
2  * Copyright (C) 2017 Denys Vlasenko <vda.linux@googlemail.com>
3  *
4  * Licensed under GPLv2, see file LICENSE in this source tree.
5  */
6 //config:config FACTOR
7 //config:	bool "factor (2.7 kb)"
8 //config:	default y
9 //config:	help
10 //config:	factor factorizes integers
11 
12 //applet:IF_FACTOR(APPLET(factor, BB_DIR_USR_BIN, BB_SUID_DROP))
13 
14 //kbuild:lib-$(CONFIG_FACTOR) += factor.o
15 
16 //usage:#define factor_trivial_usage
17 //usage:       "[NUMBER]..."
18 //usage:#define factor_full_usage "\n\n"
19 //usage:       "Print prime factors"
20 
21 #include "libbb.h"
22 #include "common_bufsiz.h"
23 
24 #if 0
25 # define dbg(...) bb_error_msg(__VA_ARGS__)
26 #else
27 # define dbg(...) ((void)0)
28 #endif
29 
30 typedef unsigned long long wide_t;
31 
32 #if ULLONG_MAX == (UINT_MAX * UINT_MAX + 2 * UINT_MAX)
33 /* "unsigned" is half as wide as ullong */
34 typedef unsigned half_t;
35 #define HALF_MAX UINT_MAX
36 #define HALF_FMT ""
37 #elif ULLONG_MAX == (ULONG_MAX * ULONG_MAX + 2 * ULONG_MAX)
38 /* long is half as wide as ullong */
39 typedef unsigned long half_t;
40 #define HALF_MAX ULONG_MAX
41 #define HALF_FMT "l"
42 #else
43 #error Cant find an integer type which is half as wide as ullong
44 #endif
45 
46 /* The trial divisor increment wheel.  Use it to skip over divisors that
47  * are composites of 2, 3, 5, 7, or 11.
48  * Larger wheels improve sieving only slightly, but quickly grow in size
49  * (adding just one prime, 13, results in 5766 element sieve).
50  */
51 #define R(a,b,c,d,e,f,g,h,i,j,A,B,C,D,E,F,G,H,I,J) \
52 	(((uint64_t)(a<<0) | (b<<3) | (c<<6) | (d<<9) | (e<<12) | (f<<15) | (g<<18) | (h<<21) | (i<<24) | (j<<27)) << 1) | \
53 	(((uint64_t)(A<<0) | (B<<3) | (C<<6) | (D<<9) | (E<<12) | (F<<15) | (G<<18) | (H<<21) | (I<<24) | (J<<27)) << 31)
54 #define P(a,b,c,d,e,f,g,h,i,j,A,B,C,D,E,F,G,H,I,J) \
55 	R(	(a/2),(b/2),(c/2),(d/2),(e/2),(f/2),(g/2),(h/2),(i/2),(j/2), \
56 		(A/2),(B/2),(C/2),(D/2),(E/2),(F/2),(G/2),(H/2),(I/2),(J/2)  )
57 static const uint64_t packed_wheel[] = {
58 	/*1, 2, 2, 4, 2,*/
59 	P( 4, 2, 4, 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 4, 2, 6, 4, 6, 8, 4), //01
60 	P( 2, 4, 2, 4,14, 4, 6, 2,10, 2, 6, 6, 4, 2, 4, 6, 2,10, 2, 4), //02
61 	P( 2,12,10, 2, 4, 2, 4, 6, 2, 6, 4, 6, 6, 6, 2, 6, 4, 2, 6, 4), //03
62 	P( 6, 8, 4, 2, 4, 6, 8, 6,10, 2, 4, 6, 2, 6, 6, 4, 2, 4, 6, 2), //04
63 	P( 6, 4, 2, 6,10, 2,10, 2, 4, 2, 4, 6, 8, 4, 2, 4,12, 2, 6, 4), //05
64 	P( 2, 6, 4, 6,12, 2, 4, 2, 4, 8, 6, 4, 6, 2, 4, 6, 2, 6,10, 2), //06
65 	P( 4, 6, 2, 6, 4, 2, 4, 2,10, 2,10, 2, 4, 6, 6, 2, 6, 6, 4, 6), //07
66 	P( 6, 2, 6, 4, 2, 6, 4, 6, 8, 4, 2, 6, 4, 8, 6, 4, 6, 2, 4, 6), //08
67 	P( 8, 6, 4, 2,10, 2, 6, 4, 2, 4, 2,10, 2,10, 2, 4, 2, 4, 8, 6), //09
68 	P( 4, 2, 4, 6, 6, 2, 6, 4, 8, 4, 6, 8, 4, 2, 4, 2, 4, 8, 6, 4), //10
69 	P( 6, 6, 6, 2, 6, 6, 4, 2, 4, 6, 2, 6, 4, 2, 4, 2,10, 2,10, 2), //11
70 	P( 6, 4, 6, 2, 6, 4, 2, 4, 6, 6, 8, 4, 2, 6,10, 8, 4, 2, 4, 2), //12
71 	P( 4, 8,10, 6, 2, 4, 8, 6, 6, 4, 2, 4, 6, 2, 6, 4, 6, 2,10, 2), //13
72 	P(10, 2, 4, 2, 4, 6, 2, 6, 4, 2, 4, 6, 6, 2, 6, 6, 6, 4, 6, 8), //14
73 	P( 4, 2, 4, 2, 4, 8, 6, 4, 8, 4, 6, 2, 6, 6, 4, 2, 4, 6, 8, 4), //15
74 	P( 2, 4, 2,10, 2,10, 2, 4, 2, 4, 6, 2,10, 2, 4, 6, 8, 6, 4, 2), //16
75 	P( 6, 4, 6, 8, 4, 6, 2, 4, 8, 6, 4, 6, 2, 4, 6, 2, 6, 6, 4, 6), //17
76 	P( 6, 2, 6, 6, 4, 2,10, 2,10, 2, 4, 2, 4, 6, 2, 6, 4, 2,10, 6), //18
77 	P( 2, 6, 4, 2, 6, 4, 6, 8, 4, 2, 4, 2,12, 6, 4, 6, 2, 4, 6, 2), //19
78 	P(12, 4, 2, 4, 8, 6, 4, 2, 4, 2,10, 2,10, 6, 2, 4, 6, 2, 6, 4), //20
79 	P( 2, 4, 6, 6, 2, 6, 4, 2,10, 6, 8, 6, 4, 2, 4, 8, 6, 4, 6, 2), //21
80 	P( 4, 6, 2, 6, 6, 6, 4, 6, 2, 6, 4, 2, 4, 2,10,12, 2, 4, 2,10), //22
81 	P( 2, 6, 4, 2, 4, 6, 6, 2,10, 2, 6, 4,14, 4, 2, 4, 2, 4, 8, 6), //23
82 	P( 4, 6, 2, 4, 6, 2, 6, 6, 4, 2, 4, 6, 2, 6, 4, 2, 4,12, 2,12), //24
83 };
84 #undef P
85 #undef R
86 #define WHEEL_START 5
87 #define WHEEL_SIZE (5 + 24 * 20)
88 #define square_count (((uint8_t*)&bb_common_bufsiz1)[0])
89 #define wheel_tab    (((uint8_t*)&bb_common_bufsiz1) + 1)
90 /*
91  * Why, you ask?
92  * plain byte array:
93  *	function                old     new   delta
94  *	wheel_tab                 -     485    +485
95  * 3-bit-packed insanity:
96  *	packed_wheel              -     192    +192
97  *	factor_main             108     171     +63
98  */
unpack_wheel(void)99 static void unpack_wheel(void)
100 {
101 	int i;
102 	uint8_t *p;
103 
104 	setup_common_bufsiz();
105 	wheel_tab[0] = 1;
106 	wheel_tab[1] = 2;
107 	wheel_tab[2] = 2;
108 	wheel_tab[3] = 4;
109 	wheel_tab[4] = 2;
110 	p = &wheel_tab[5];
111 	for (i = 0; i < ARRAY_SIZE(packed_wheel); i++) {
112 		uint64_t v = packed_wheel[i];
113 		while ((v & 0xe) != 0) {
114 			*p = v & 0xe;
115 			//printf("%2u,", *p);
116 			p++;
117 			v >>= 3;
118 		}
119 		//printf("\n");
120 	}
121 }
122 
123 /* Prevent inlining, factorize() needs all help it can get with reducing register pressure */
print_w(wide_t n)124 static NOINLINE void print_w(wide_t n)
125 {
126 	unsigned rep = square_count;
127 	do
128 		printf(" %llu", n);
129 	while (--rep != 0);
130 }
print_h(half_t n)131 static NOINLINE void print_h(half_t n)
132 {
133 	print_w(n);
134 }
135 
136 static void factorize(wide_t N);
137 
isqrt_odd(wide_t N)138 static half_t isqrt_odd(wide_t N)
139 {
140 	half_t s = isqrt(N);
141 	/* s^2 is <= N, (s+1)^2 > N */
142 
143 	/* If s^2 in fact is EQUAL to N, it's very lucky.
144 	 * Examples:
145 	 * factor 18446743988964486098 = 2 * 3037000493 * 3037000493
146 	 * factor 18446743902517389507 = 3 * 2479700513 * 2479700513
147 	 */
148 	if ((wide_t)s * s == N) {
149 		/* factorize sqrt(N), printing each factor twice */
150 		square_count *= 2;
151 		factorize(s);
152 		/* Let caller know we recursed */
153 		return 0;
154 	}
155 
156 	/* Subtract 1 from even s, odd s won't change: */
157 	/* (doesnt work for zero, but we know that s != 0 here) */
158 	s = (s - 1) | 1;
159 	return s;
160 }
161 
factorize(wide_t N)162 static NOINLINE void factorize(wide_t N)
163 {
164 	unsigned w;
165 	half_t factor;
166 	half_t max_factor;
167 
168 	if (N < 4)
169 		goto end;
170 
171 	/* The code needs to be optimized for the case where
172 	 * there are large prime factors. For example,
173 	 * this is not hard:
174 	 * 8262075252869367027 = 3 7 17 23 47 101 113 127 131 137 823
175 	 *  (the largest divisor to test for largest factor 823
176 	 *  is only ~sqrt(823) = 28, the entire factorization needs
177 	 *  only ~33 trial divisions)
178 	 * but this is:
179 	 * 18446744073709551601 = 53 348051774975651917
180 	 * the last factor requires testing up to
181 	 * 589959129 - about 100 million iterations.
182 	 * The slowest case (largest prime) for N < 2^64 is
183 	 * factor 18446744073709551557 (0xffffffffffffffc5).
184 	 */
185 	max_factor = isqrt_odd(N);
186 	if (!max_factor)
187 		return; /* square was detected and recursively factored */
188 	factor = 2;
189 	w = 0;
190 	for (;;) {
191 		half_t fw;
192 
193 		/* The division is the most costly part of the loop.
194 		 * On 64bit CPUs, takes at best 12 cycles, often ~20.
195 		 */
196 		while ((N % factor) == 0) { /* not likely */
197 			N = N / factor;
198 			print_h(factor);
199 			max_factor = isqrt_odd(N);
200 			if (!max_factor)
201 				return; /* square was detected */
202 		}
203 		if (factor >= max_factor)
204 			break;
205 		fw = factor + wheel_tab[w];
206 		if (fw < factor)
207 			break; /* overflow */
208 		factor = fw;
209 		w++;
210 		if (w < WHEEL_SIZE)
211 			continue;
212 		w = WHEEL_START;
213 	}
214  end:
215 	if (N > 1)
216 		print_w(N);
217 	bb_putchar('\n');
218 }
219 
factorize_numstr(const char * numstr)220 static void factorize_numstr(const char *numstr)
221 {
222 	wide_t N;
223 
224 	/* Leading + is ok (coreutils compat) */
225 	if (*numstr == '+')
226 		numstr++;
227 	N = bb_strtoull(numstr, NULL, 10);
228 	if (errno)
229 		bb_show_usage();
230 	printf("%llu:", N);
231 	square_count = 1;
232 	factorize(N);
233 }
234 
235 int factor_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
factor_main(int argc UNUSED_PARAM,char ** argv)236 int factor_main(int argc UNUSED_PARAM, char **argv)
237 {
238 	unpack_wheel();
239 
240 	//// coreutils has undocumented option ---debug (three dashes)
241 	//getopt32(argv, "");
242 	//argv += optind;
243 	argv++;
244 
245 	if (!*argv) {
246 		/* Read from stdin, several numbers per line are accepted */
247 		for (;;) {
248 			char *numstr, *line;
249 			line = xmalloc_fgetline(stdin);
250 			if (!line)
251 				return EXIT_SUCCESS;
252 			numstr = line;
253 			for (;;) {
254 				char *end;
255 				numstr = skip_whitespace(numstr);
256 				if (!numstr[0])
257 					break;
258 				end = skip_non_whitespace(numstr);
259 				if (*end != '\0')
260 					*end++ = '\0';
261 				factorize_numstr(numstr);
262 				numstr = end;
263 			}
264 			free(line);
265 		}
266 	}
267 
268 	do {
269 		/* Leading spaces are ok (coreutils compat) */
270 		factorize_numstr(skip_whitespace(*argv));
271 	} while (*++argv);
272 
273 	return EXIT_SUCCESS;
274 }
275