1 /*
2  * Copyright (c) 2010 Broadcom Corporation
3  *
4  * Permission to use, copy, modify, and/or distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
11  * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
13  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
14  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15  */
16 
17 #include <linux/types.h>
18 #include "qmath.h"
19 
20 /*
21 Description: This function saturate input 32 bit number into a 16 bit number.
22 If input number is greater than 0x7fff then output is saturated to 0x7fff.
23 else if input number is less than 0xffff8000 then output is saturated to 0xffff8000
24 else output is same as input.
25 */
qm_sat32(s32 op)26 s16 qm_sat32(s32 op)
27 {
28 	s16 result;
29 	if (op > (s32) 0x7fff) {
30 		result = 0x7fff;
31 	} else if (op < (s32) 0xffff8000) {
32 		result = (s16) (0x8000);
33 	} else {
34 		result = (s16) op;
35 	}
36 	return result;
37 }
38 
39 /*
40 Description: This function multiply two input 16 bit numbers and return the 32 bit result.
41 This multiplication is similar to compiler multiplication. This operation is defined if
42 16 bit multiplication on the processor platform is cheaper than 32 bit multiplication (as
43 the most of qmath functions can be replaced with processor intrinsic instructions).
44 */
qm_mul321616(s16 op1,s16 op2)45 s32 qm_mul321616(s16 op1, s16 op2)
46 {
47 	return (s32) (op1) * (s32) (op2);
48 }
49 
50 /*
51 Description: This function make 16 bit multiplication and return the result in 16 bits.
52 To fit the result into 16 bits the 32 bit multiplication result is right
53 shifted by 16 bits.
54 */
qm_mul16(s16 op1,s16 op2)55 s16 qm_mul16(s16 op1, s16 op2)
56 {
57 	s32 result;
58 	result = ((s32) (op1) * (s32) (op2));
59 	return (s16) (result >> 16);
60 }
61 
62 /*
63 Description: This function multiply two 16 bit numbers and return the result in 32 bits.
64 This function remove the extra sign bit created by the multiplication by leftshifting the
65 32 bit multiplication result by 1 bit before returning the result. So the output is
66 twice that of compiler multiplication. (i.e. qm_muls321616(2,3)=12).
67 When both input 16 bit numbers are 0x8000, then the result is saturated to 0x7fffffff.
68 */
qm_muls321616(s16 op1,s16 op2)69 s32 qm_muls321616(s16 op1, s16 op2)
70 {
71 	s32 result;
72 	if (op1 == (s16) (0x8000) && op2 == (s16) (0x8000)) {
73 		result = 0x7fffffff;
74 	} else {
75 		result = ((s32) (op1) * (s32) (op2));
76 		result = result << 1;
77 	}
78 	return result;
79 }
80 
81 /*
82 Description: This function make 16 bit unsigned multiplication. To fit the output into
83 16 bits the 32 bit multiplication result is right shifted by 16 bits.
84 */
qm_mulu16(u16 op1,u16 op2)85 u16 qm_mulu16(u16 op1, u16 op2)
86 {
87 	return (u16) (((u32) op1 * (u32) op2) >> 16);
88 }
89 
90 /*
91 Description: This function make 16 bit multiplication and return the result in 16 bits.
92 To fit the multiplication result into 16 bits the multiplication result is right shifted by
93 15 bits. Right shifting 15 bits instead of 16 bits is done to remove the extra sign bit formed
94 due to the multiplication.
95 When both the 16bit inputs are 0x8000 then the output is saturated to 0x7fffffff.
96 */
qm_muls16(s16 op1,s16 op2)97 s16 qm_muls16(s16 op1, s16 op2)
98 {
99 	s32 result;
100 	if (op1 == (s16) 0x8000 && op2 == (s16) 0x8000) {
101 		result = 0x7fffffff;
102 	} else {
103 		result = ((s32) (op1) * (s32) (op2));
104 	}
105 	return (s16) (result >> 15);
106 }
107 
108 /*
109 Description: This function add two 32 bit numbers and return the 32bit result.
110 If the result overflow 32 bits, the output will be saturated to 32bits.
111 */
qm_add32(s32 op1,s32 op2)112 s32 qm_add32(s32 op1, s32 op2)
113 {
114 	s32 result;
115 	result = op1 + op2;
116 	if (op1 < 0 && op2 < 0 && result > 0) {
117 		result = 0x80000000;
118 	} else if (op1 > 0 && op2 > 0 && result < 0) {
119 		result = 0x7fffffff;
120 	}
121 	return result;
122 }
123 
124 /*
125 Description: This function add two 16 bit numbers and return the 16bit result.
126 If the result overflow 16 bits, the output will be saturated to 16bits.
127 */
qm_add16(s16 op1,s16 op2)128 s16 qm_add16(s16 op1, s16 op2)
129 {
130 	s16 result;
131 	s32 temp = (s32) op1 + (s32) op2;
132 	if (temp > (s32) 0x7fff) {
133 		result = (s16) 0x7fff;
134 	} else if (temp < (s32) 0xffff8000) {
135 		result = (s16) 0xffff8000;
136 	} else {
137 		result = (s16) temp;
138 	}
139 	return result;
140 }
141 
142 /*
143 Description: This function make 16 bit subtraction and return the 16bit result.
144 If the result overflow 16 bits, the output will be saturated to 16bits.
145 */
qm_sub16(s16 op1,s16 op2)146 s16 qm_sub16(s16 op1, s16 op2)
147 {
148 	s16 result;
149 	s32 temp = (s32) op1 - (s32) op2;
150 	if (temp > (s32) 0x7fff) {
151 		result = (s16) 0x7fff;
152 	} else if (temp < (s32) 0xffff8000) {
153 		result = (s16) 0xffff8000;
154 	} else {
155 		result = (s16) temp;
156 	}
157 	return result;
158 }
159 
160 /*
161 Description: This function make 32 bit subtraction and return the 32bit result.
162 If the result overflow 32 bits, the output will be saturated to 32bits.
163 */
qm_sub32(s32 op1,s32 op2)164 s32 qm_sub32(s32 op1, s32 op2)
165 {
166 	s32 result;
167 	result = op1 - op2;
168 	if (op1 >= 0 && op2 < 0 && result < 0) {
169 		result = 0x7fffffff;
170 	} else if (op1 < 0 && op2 > 0 && result > 0) {
171 		result = 0x80000000;
172 	}
173 	return result;
174 }
175 
176 /*
177 Description: This function multiply input 16 bit numbers and accumulate the result
178 into the input 32 bit number and return the 32 bit accumulated result.
179 If the accumulation result in overflow, then the output will be saturated.
180 */
qm_mac321616(s32 acc,s16 op1,s16 op2)181 s32 qm_mac321616(s32 acc, s16 op1, s16 op2)
182 {
183 	s32 result;
184 	result = qm_add32(acc, qm_mul321616(op1, op2));
185 	return result;
186 }
187 
188 /*
189 Description: This function make a 32 bit saturated left shift when the specified shift
190 is +ve. This function will make a 32 bit right shift when the specified shift is -ve.
191 This function return the result after shifting operation.
192 */
qm_shl32(s32 op,int shift)193 s32 qm_shl32(s32 op, int shift)
194 {
195 	int i;
196 	s32 result;
197 	result = op;
198 	if (shift > 31)
199 		shift = 31;
200 	else if (shift < -31)
201 		shift = -31;
202 	if (shift >= 0) {
203 		for (i = 0; i < shift; i++) {
204 			result = qm_add32(result, result);
205 		}
206 	} else {
207 		result = result >> (-shift);
208 	}
209 	return result;
210 }
211 
212 /*
213 Description: This function make a 32 bit right shift when shift is +ve.
214 This function make a 32 bit saturated left shift when shift is -ve. This function
215 return the result of the shift operation.
216 */
qm_shr32(s32 op,int shift)217 s32 qm_shr32(s32 op, int shift)
218 {
219 	return qm_shl32(op, -shift);
220 }
221 
222 /*
223 Description: This function make a 16 bit saturated left shift when the specified shift
224 is +ve. This function will make a 16 bit right shift when the specified shift is -ve.
225 This function return the result after shifting operation.
226 */
qm_shl16(s16 op,int shift)227 s16 qm_shl16(s16 op, int shift)
228 {
229 	int i;
230 	s16 result;
231 	result = op;
232 	if (shift > 15)
233 		shift = 15;
234 	else if (shift < -15)
235 		shift = -15;
236 	if (shift > 0) {
237 		for (i = 0; i < shift; i++) {
238 			result = qm_add16(result, result);
239 		}
240 	} else {
241 		result = result >> (-shift);
242 	}
243 	return result;
244 }
245 
246 /*
247 Description: This function make a 16 bit right shift when shift is +ve.
248 This function make a 16 bit saturated left shift when shift is -ve. This function
249 return the result of the shift operation.
250 */
qm_shr16(s16 op,int shift)251 s16 qm_shr16(s16 op, int shift)
252 {
253 	return qm_shl16(op, -shift);
254 }
255 
256 /*
257 Description: This function return the number of redundant sign bits in a 16 bit number.
258 Example: qm_norm16(0x0080) = 7.
259 */
qm_norm16(s16 op)260 s16 qm_norm16(s16 op)
261 {
262 	u16 u16extraSignBits;
263 	if (op == 0) {
264 		return 15;
265 	} else {
266 		u16extraSignBits = 0;
267 		while ((op >> 15) == (op >> 14)) {
268 			u16extraSignBits++;
269 			op = op << 1;
270 		}
271 	}
272 	return u16extraSignBits;
273 }
274 
275 /*
276 Description: This function return the number of redundant sign bits in a 32 bit number.
277 Example: qm_norm32(0x00000080) = 23
278 */
qm_norm32(s32 op)279 s16 qm_norm32(s32 op)
280 {
281 	u16 u16extraSignBits;
282 	if (op == 0) {
283 		return 31;
284 	} else {
285 		u16extraSignBits = 0;
286 		while ((op >> 31) == (op >> 30)) {
287 			u16extraSignBits++;
288 			op = op << 1;
289 		}
290 	}
291 	return u16extraSignBits;
292 }
293 
294 /*
295 Description: This function divide two 16 bit unsigned numbers.
296 The numerator should be less than denominator. So the quotient is always less than 1.
297 This function return the quotient in q.15 format.
298 */
qm_div_s(s16 num,s16 denom)299 s16 qm_div_s(s16 num, s16 denom)
300 {
301 	s16 var_out;
302 	s16 iteration;
303 	s32 L_num;
304 	s32 L_denom;
305 	L_num = (num) << 15;
306 	L_denom = (denom) << 15;
307 	for (iteration = 0; iteration < 15; iteration++) {
308 		L_num <<= 1;
309 		if (L_num >= L_denom) {
310 			L_num = qm_sub32(L_num, L_denom);
311 			L_num = qm_add32(L_num, 1);
312 		}
313 	}
314 	var_out = (s16) (L_num & 0x7fff);
315 	return var_out;
316 }
317 
318 /*
319 Description: This function compute the absolute value of a 16 bit number.
320 */
qm_abs16(s16 op)321 s16 qm_abs16(s16 op)
322 {
323 	if (op < 0) {
324 		if (op == (s16) 0xffff8000) {
325 			return 0x7fff;
326 		} else {
327 			return -op;
328 		}
329 	} else {
330 		return op;
331 	}
332 }
333 
334 /*
335 Description: This function divide two 16 bit numbers.
336 The quotient is returned through return value.
337 The qformat of the quotient is returned through the pointer (qQuotient) passed
338 to this function. The qformat of quotient is adjusted appropriately such that
339 the quotient occupies all 16 bits.
340 */
qm_div16(s16 num,s16 denom,s16 * qQuotient)341 s16 qm_div16(s16 num, s16 denom, s16 *qQuotient)
342 {
343 	s16 sign;
344 	s16 nNum, nDenom;
345 	sign = num ^ denom;
346 	num = qm_abs16(num);
347 	denom = qm_abs16(denom);
348 	nNum = qm_norm16(num);
349 	nDenom = qm_norm16(denom);
350 	num = qm_shl16(num, nNum - 1);
351 	denom = qm_shl16(denom, nDenom);
352 	*qQuotient = nNum - 1 - nDenom + 15;
353 	if (sign >= 0) {
354 		return qm_div_s(num, denom);
355 	} else {
356 		return -qm_div_s(num, denom);
357 	}
358 }
359 
360 /*
361 Description: This function compute absolute value of a 32 bit number.
362 */
qm_abs32(s32 op)363 s32 qm_abs32(s32 op)
364 {
365 	if (op < 0) {
366 		if (op == (s32) 0x80000000) {
367 			return 0x7fffffff;
368 		} else {
369 			return -op;
370 		}
371 	} else {
372 		return op;
373 	}
374 }
375 
376 /*
377 Description: This function divide two 32 bit numbers. The division is performed
378 by considering only important 16 bits in 32 bit numbers.
379 The quotient is returned through return value.
380 The qformat of the quotient is returned through the pointer (qquotient) passed
381 to this function. The qformat of quotient is adjusted appropriately such that
382 the quotient occupies all 16 bits.
383 */
qm_div163232(s32 num,s32 denom,s16 * qquotient)384 s16 qm_div163232(s32 num, s32 denom, s16 *qquotient)
385 {
386 	s32 sign;
387 	s16 nNum, nDenom;
388 	sign = num ^ denom;
389 	num = qm_abs32(num);
390 	denom = qm_abs32(denom);
391 	nNum = qm_norm32(num);
392 	nDenom = qm_norm32(denom);
393 	num = qm_shl32(num, nNum - 1);
394 	denom = qm_shl32(denom, nDenom);
395 	*qquotient = nNum - 1 - nDenom + 15;
396 	if (sign >= 0) {
397 		return qm_div_s((s16) (num >> 16), (s16) (denom >> 16));
398 	} else {
399 		return -qm_div_s((s16) (num >> 16), (s16) (denom >> 16));
400 	}
401 }
402 
403 /*
404 Description: This function multiply a 32 bit number with a 16 bit number.
405 The multiplicaton result is right shifted by 16 bits to fit the result
406 into 32 bit output.
407 */
qm_mul323216(s32 op1,s16 op2)408 s32 qm_mul323216(s32 op1, s16 op2)
409 {
410 	s16 hi;
411 	u16 lo;
412 	s32 result;
413 	hi = op1 >> 16;
414 	lo = (s16) (op1 & 0xffff);
415 	result = qm_mul321616(hi, op2);
416 	result = result + (qm_mulsu321616(op2, lo) >> 16);
417 	return result;
418 }
419 
420 /*
421 Description: This function multiply signed 16 bit number with unsigned 16 bit number and return
422 the result in 32 bits.
423 */
qm_mulsu321616(s16 op1,u16 op2)424 s32 qm_mulsu321616(s16 op1, u16 op2)
425 {
426 	return (s32) (op1) * op2;
427 }
428 
429 /*
430 Description: This function multiply 32 bit number with 16 bit number. The multiplication result is
431 right shifted by 15 bits to fit the result into 32 bits. Right shifting by only 15 bits instead of
432 16 bits is done to remove the extra sign bit formed by multiplication from the return value.
433 When the input numbers are 0x80000000, 0x8000 the return value is saturated to 0x7fffffff.
434 */
qm_muls323216(s32 op1,s16 op2)435 s32 qm_muls323216(s32 op1, s16 op2)
436 {
437 	s16 hi;
438 	u16 lo;
439 	s32 result;
440 	hi = op1 >> 16;
441 	lo = (s16) (op1 & 0xffff);
442 	result = qm_muls321616(hi, op2);
443 	result = qm_add32(result, (qm_mulsu321616(op2, lo) >> 15));
444 	return result;
445 }
446 
447 /*
448 Description: This function multiply two 32 bit numbers. The multiplication result is right
449 shifted by 32 bits to fit the multiplication result into 32 bits. The right shifted
450 multiplication result is returned as output.
451 */
qm_mul32(s32 a,s32 b)452 s32 qm_mul32(s32 a, s32 b)
453 {
454 	s16 hi1, hi2;
455 	u16 lo1, lo2;
456 	s32 result;
457 	hi1 = a >> 16;
458 	hi2 = b >> 16;
459 	lo1 = (u16) (a & 0xffff);
460 	lo2 = (u16) (b & 0xffff);
461 	result = qm_mul321616(hi1, hi2);
462 	result = result + (qm_mulsu321616(hi1, lo2) >> 16);
463 	result = result + (qm_mulsu321616(hi2, lo1) >> 16);
464 	return result;
465 }
466 
467 /*
468 Description: This function multiply two 32 bit numbers. The multiplication result is
469 right shifted by 31 bits to fit the multiplication result into 32 bits. The right
470 shifted multiplication result is returned as output. Right shifting by only 31 bits
471 instead of 32 bits is done to remove the extra sign bit formed by multiplication.
472 When the input numbers are 0x80000000, 0x80000000 the return value is saturated to
473 0x7fffffff.
474 */
qm_muls32(s32 a,s32 b)475 s32 qm_muls32(s32 a, s32 b)
476 {
477 	s16 hi1, hi2;
478 	u16 lo1, lo2;
479 	s32 result;
480 	hi1 = a >> 16;
481 	hi2 = b >> 16;
482 	lo1 = (u16) (a & 0xffff);
483 	lo2 = (u16) (b & 0xffff);
484 	result = qm_muls321616(hi1, hi2);
485 	result = qm_add32(result, (qm_mulsu321616(hi1, lo2) >> 15));
486 	result = qm_add32(result, (qm_mulsu321616(hi2, lo1) >> 15));
487 	result = qm_add32(result, (qm_mulu16(lo1, lo2) >> 15));
488 	return result;
489 }
490 
491 /* This table is log2(1+(i/32)) where i=[0:1:31], in q.15 format */
492 static const s16 log_table[] = {
493 	0,
494 	1455,
495 	2866,
496 	4236,
497 	5568,
498 	6863,
499 	8124,
500 	9352,
501 	10549,
502 	11716,
503 	12855,
504 	13968,
505 	15055,
506 	16117,
507 	17156,
508 	18173,
509 	19168,
510 	20143,
511 	21098,
512 	22034,
513 	22952,
514 	23852,
515 	24736,
516 	25604,
517 	26455,
518 	27292,
519 	28114,
520 	28922,
521 	29717,
522 	30498,
523 	31267,
524 	32024
525 };
526 
527 #define LOG_TABLE_SIZE 32	/* log_table size */
528 #define LOG2_LOG_TABLE_SIZE 5	/* log2(log_table size) */
529 #define Q_LOG_TABLE 15		/* qformat of log_table */
530 #define LOG10_2		19728	/* log10(2) in q.16 */
531 
532 /*
533 Description:
534 This routine takes the input number N and its q format qN and compute
535 the log10(N). This routine first normalizes the input no N.	Then N is in mag*(2^x) format.
536 mag is any number in the range 2^30-(2^31 - 1). Then log2(mag * 2^x) = log2(mag) + x is computed.
537 From that log10(mag * 2^x) = log2(mag * 2^x) * log10(2) is computed.
538 This routine looks the log2 value in the table considering LOG2_LOG_TABLE_SIZE+1 MSBs.
539 As the MSB is always 1, only next LOG2_OF_LOG_TABLE_SIZE MSBs are used for table lookup.
540 Next 16 MSBs are used for interpolation.
541 Inputs:
542 N - number to which log10 has to be found.
543 qN - q format of N
544 log10N - address where log10(N) will be written.
545 qLog10N - address where log10N qformat will be written.
546 Note/Problem:
547 For accurate results input should be in normalized or near normalized form.
548 */
qm_log10(s32 N,s16 qN,s16 * log10N,s16 * qLog10N)549 void qm_log10(s32 N, s16 qN, s16 *log10N, s16 *qLog10N)
550 {
551 	s16 s16norm, s16tableIndex, s16errorApproximation;
552 	u16 u16offset;
553 	s32 s32log;
554 
555 	/* Logerithm of negative values is undefined.
556 	 * assert N is greater than 0.
557 	 */
558 	/* ASSERT(N > 0); */
559 
560 	/* normalize the N. */
561 	s16norm = qm_norm32(N);
562 	N = N << s16norm;
563 
564 	/* The qformat of N after normalization.
565 	 * -30 is added to treat the no as between 1.0 to 2.0
566 	 * i.e. after adding the -30 to the qformat the decimal point will be
567 	 * just rigtht of the MSB. (i.e. after sign bit and 1st MSB). i.e.
568 	 * at the right side of 30th bit.
569 	 */
570 	qN = qN + s16norm - 30;
571 
572 	/* take the table index as the LOG2_OF_LOG_TABLE_SIZE bits right of the MSB */
573 	s16tableIndex = (s16) (N >> (32 - (2 + LOG2_LOG_TABLE_SIZE)));
574 
575 	/* remove the MSB. the MSB is always 1 after normalization. */
576 	s16tableIndex =
577 	    s16tableIndex & (s16) ((1 << LOG2_LOG_TABLE_SIZE) - 1);
578 
579 	/* remove the (1+LOG2_OF_LOG_TABLE_SIZE) MSBs in the N. */
580 	N = N & ((1 << (32 - (2 + LOG2_LOG_TABLE_SIZE))) - 1);
581 
582 	/* take the offset as the 16 MSBS after table index.
583 	 */
584 	u16offset = (u16) (N >> (32 - (2 + LOG2_LOG_TABLE_SIZE + 16)));
585 
586 	/* look the log value in the table. */
587 	s32log = log_table[s16tableIndex];	/* q.15 format */
588 
589 	/* interpolate using the offset. */
590 	s16errorApproximation = (s16) qm_mulu16(u16offset, (u16) (log_table[s16tableIndex + 1] - log_table[s16tableIndex]));	/* q.15 */
591 
592 	s32log = qm_add16((s16) s32log, s16errorApproximation);	/* q.15 format */
593 
594 	/* adjust for the qformat of the N as
595 	 * log2(mag * 2^x) = log2(mag) + x
596 	 */
597 	s32log = qm_add32(s32log, ((s32) -qN) << 15);	/* q.15 format */
598 
599 	/* normalize the result. */
600 	s16norm = qm_norm32(s32log);
601 
602 	/* bring all the important bits into lower 16 bits */
603 	s32log = qm_shl32(s32log, s16norm - 16);	/* q.15+s16norm-16 format */
604 
605 	/* compute the log10(N) by multiplying log2(N) with log10(2).
606 	 * as log10(mag * 2^x) = log2(mag * 2^x) * log10(2)
607 	 * log10N in q.15+s16norm-16+1 (LOG10_2 is in q.16)
608 	 */
609 	*log10N = qm_muls16((s16) s32log, (s16) LOG10_2);
610 
611 	/* write the q format of the result. */
612 	*qLog10N = 15 + s16norm - 16 + 1;
613 
614 	return;
615 }
616 
617 /*
618 Description:
619 This routine compute 1/N.
620 This routine reformates the given no N as N * 2^qN where N is in between 0.5 and 1.0
621 in q.15 format in 16 bits. So the problem now boils down to finding the inverse of a
622 q.15 no in 16 bits which is in the range of 0.5 to 1.0. The output is always between
623 2.0 to 1. So the output is 2.0 to 1.0 in q.30 format. Once the final output format is found
624 by taking the qN into account. Inverse is found with newton rapson method. Initially
625 inverse (x) is guessed as 1/0.75 (with appropriate sign). The new guess is calculated
626 using the formula x' = 2*x - N*x*x. After 4 or 5 iterations the inverse is very close to
627 inverse of N.
628 Inputs:
629 N - number to which 1/N has to be found.
630 qn - q format of N.
631 sqrtN - address where 1/N has to be written.
632 qsqrtN - address where q format of 1/N has to be written.
633 */
634 #define qx 29
qm_1byN(s32 N,s16 qN,s32 * result,s16 * qResult)635 void qm_1byN(s32 N, s16 qN, s32 *result, s16 *qResult)
636 {
637 	s16 normN;
638 	s32 s32firstTerm, s32secondTerm, x;
639 	int i;
640 
641 	normN = qm_norm32(N);
642 
643 	/* limit N to least significant 16 bits. 15th bit is the sign bit. */
644 	N = qm_shl32(N, normN - 16);
645 	qN = qN + normN - 16 - 15;
646 	/* -15 is added to treat N as 16 bit q.15 number in the range from 0.5 to 1 */
647 
648 	/* Take the initial guess as 1/0.75 in qx format with appropriate sign. */
649 	if (N >= 0) {
650 		x = (s32) ((1 / 0.75) * (1 << qx));
651 		/* input no is in the range 0.5 to 1. So 1/0.75 is taken as initial guess. */
652 	} else {
653 		x = (s32) ((1 / -0.75) * (1 << qx));
654 		/* input no is in the range -0.5 to -1. So 1/-0.75 is taken as initial guess. */
655 	}
656 
657 	/* iterate the equation x = 2*x - N*x*x for 4 times. */
658 	for (i = 0; i < 4; i++) {
659 		s32firstTerm = qm_shl32(x, 1);	/* s32firstTerm = 2*x in q.29 */
660 		s32secondTerm =
661 		    qm_muls321616((s16) (s32firstTerm >> 16),
662 				  (s16) (s32firstTerm >> 16));
663 		/* s32secondTerm = x*x in q.(29+1-16)*2+1 */
664 		s32secondTerm =
665 		    qm_muls321616((s16) (s32secondTerm >> 16), (s16) N);
666 		/* s32secondTerm = N*x*x in q.((29+1-16)*2+1)-16+15+1 i.e. in q.29 */
667 		x = qm_sub32(s32firstTerm, s32secondTerm);
668 		/* can be added directly as both are in q.29 */
669 	}
670 
671 	/* Bring the x to q.30 format. */
672 	*result = qm_shl32(x, 1);
673 	/* giving the output in q.30 format for q.15 input in 16 bits. */
674 
675 	/* compute the final q format of the result. */
676 	*qResult = -qN + 30;	/* adjusting the q format of actual output */
677 
678 	return;
679 }
680 
681 #undef qx
682