/******************************************************************************
*
* (c) Copyright 2009, Freescale & STMicroelectronics
*
***************************************************************************//*!
*
* @file     GFLIB_Sqrt.c
*
* @author   r29302
*
* @version  1.0.19.0
*
* @date     Apr-26-2010
*
* @brief    Function performs calculation of square root.
*
*
*******************************************************************************
*
* Function implemented as ANSIC ISO/IEC 9899:1990, C90.
*
******************************************************************************/
/*!
@if GFLIB_GROUP
    @addtogroup GFLIB_GROUP
@else
    @defgroup GFLIB_GROUP   GFLIB
@endif
*/

#ifdef __cplusplus
extern "C" {
#endif

/******************************************************************************
* Includes
******************************************************************************/
#include "SWLIBS_Typedefs.h"

#include "GFLIB_Sqrt.h"

/******************************************************************************
* External declarations
******************************************************************************/

/******************************************************************************
* Defines and macros            (scope: module-local)
******************************************************************************/

/******************************************************************************
* Typedefs and structures       (scope: module-local)
******************************************************************************/

/******************************************************************************
* Global variable definitions   (scope: module-exported)
******************************************************************************/

/******************************************************************************
* Global variable definitions   (scope: module-local)
******************************************************************************/

/******************************************************************************
* Function prototypes           (scope: module-local)
******************************************************************************/

/******************************************************************************
* Function implementations      (scope: module-local)
******************************************************************************/

/******************************************************************************
* Function implementations      (scope: module-exported)
******************************************************************************/

/**************************************************************************//*!
@brief      Square root function.

@param[in]  s32In   The input value.

@return     The function returns the square root of the input value.  The
            return value is within the \f$[0, 1)\f$ fraction range.

@details    The #GFLIB_Sqrt function computes the square root of the
            input value.
            
            The computations are made by a simple iterative
            testing of each bit starting from the most significant one. In
            total 15 iteration are made, each performing the following steps:

            -# Add to the tentative square root value a single testing bit.
            -# Square the tentative square root value and test whether
               it is greater or lower than the input value.
            -# If greater, then clear the bit in the tentative square
               root value.
            -# Shift the single testing bit right

            \par
            Fig.\ref fig1_GFLIB_Sqrt depicts a floating point \e sqrt(x)
            function generated from Matlab and calculated value of \e sqrt
            function obtained from #GFLIB_Sqrt, plus their difference.  The
            course of calculation accuracy as a function of input value can be
            observed from this figure.  The computed value is equal to or is
            1LSB (on upper 16-bit of the 32bit result; 1LSB \f$\equiv
            2^{-15}\f$) less than the true square root value.  In order to
            obtain the value with the 0.5LSB (16bit) accuracy, additional
            iterations are required.

            \anchor fig1_GFLIB_Sqrt
            \image latex GFLIB_Sqrt_Figure1.eps "real sqrt(x) vs. GFLIB_Sqrt(s32In)" width=14cm
            \image html GFLIB_Sqrt_Figure1.jpg "real sqrt(x) vs. GFLIB_Sqrt(s32In)"

@note       The input value is limited to the range \f$[0, 1)\f$, if
            not within this range the computed value is undefined.

@par Reentrancy:
            The function is reentrant.

@par Code Example:
\code
#include "gflib.h"

tFrac32 s32In;
tFrac32 s32Out;

void main(void)
{
    // s32In = 0.5 = 0x40000000
    s32In  = FRAC32(0.5);

    // s32Out should be 0x5a820000
    s32Out = GFLIB_Sqrt(s32In);

    return;
}
\endcode

@par Performance:
            \anchor tab1_GFLIB_Sqrt
            <table border="1" CELLPADDING="5" align = "center">
            <caption>#GFLIB_Sqrt function performance</caption>
            <tr>
              <th>Code size [bytes] GHS/CW</th> <td>242/238</td>
            </tr>
            <tr>
              <th>Data size [bytes] GHS/CW</th> <td>0/0</td>
            </tr>
            <tr>
              <th>Execution clock cycles max [clk] GHS/CW</th> <td>99/99</td>
            </tr>
            <tr>
              <th>Execution clock cycles min [clk] GHS/CW</th> <td>91/90</td>
            </tr>
            </table>

@internal   The function was updated with this algorithm for the following
            reasons:
            -# division operation in MPC5604 is very inefficient and
               may last over 30 cycles
            -# long division deteriorates performance of algorithm
               involving division

******************************************************************************/
tFrac32 GFLIB_SqrtANSIC(tFrac32 s32In)
{
    register tU32 u32Bit;       /* Single bit shifted with each
                                       iteration */
    register tU32 u32Guess0;    /* Square root guess with the currently
                                       tested bit equal 0 */
    register tU32 u32Guess1;    /* Square root guess with the currently
                                       tested bit equal 1 */
    register tU32 u32Sqr;       /* Square of the the u32Guess1 */

    u32Bit = 0x00004000;
    u32Guess0 = 0;

    /* Iteration 1 */
    u32Guess1 = u32Guess0 + u32Bit;
    u32Sqr = (u32Guess1*u32Guess1)<<1;
    u32Guess0 = (u32Sqr > ((tU32) s32In)) ? u32Guess0:u32Guess1;
    u32Bit = u32Bit>>1;

    u32Guess1 = u32Guess0 + u32Bit;
    u32Sqr = (u32Guess1*u32Guess1)<<1;
    u32Guess0 = (u32Sqr > ((tU32) s32In)) ? u32Guess0:u32Guess1;
    u32Bit = u32Bit>>1;

    u32Guess1 = u32Guess0 + u32Bit;
    u32Sqr = (u32Guess1*u32Guess1)<<1;
    u32Guess0 = (u32Sqr > ((tU32) s32In)) ? u32Guess0:u32Guess1;
    u32Bit = u32Bit>>1;

    u32Guess1 = u32Guess0 + u32Bit;
    u32Sqr = (u32Guess1*u32Guess1)<<1;
    u32Guess0 = (u32Sqr > ((tU32) s32In)) ? u32Guess0:u32Guess1;
    u32Bit = u32Bit>>1;

    /* Iteration 5 */
    u32Guess1 = u32Guess0 + u32Bit;
    u32Sqr = (u32Guess1*u32Guess1)<<1;
    u32Guess0 = (u32Sqr > ((tU32) s32In)) ? u32Guess0:u32Guess1;
    u32Bit = u32Bit>>1;

    u32Guess1 = u32Guess0 + u32Bit;
    u32Sqr = (u32Guess1*u32Guess1)<<1;
    u32Guess0 = (u32Sqr > ((tU32) s32In)) ? u32Guess0:u32Guess1;
    u32Bit = u32Bit>>1;

    u32Guess1 = u32Guess0 + u32Bit;
    u32Sqr = (u32Guess1*u32Guess1)<<1;
    u32Guess0 = (u32Sqr > ((tU32) s32In)) ? u32Guess0:u32Guess1;
    u32Bit = u32Bit>>1;

    u32Guess1 = u32Guess0 + u32Bit;
    u32Sqr = (u32Guess1*u32Guess1)<<1;
    u32Guess0 = (u32Sqr > ((tU32) s32In)) ? u32Guess0:u32Guess1;
    u32Bit = u32Bit>>1;

    u32Guess1 = u32Guess0 + u32Bit;
    u32Sqr = (u32Guess1*u32Guess1)<<1;
    u32Guess0 = (u32Sqr > ((tU32) s32In)) ? u32Guess0:u32Guess1;
    u32Bit = u32Bit>>1;

    /* Iteration 10 */
    u32Guess1 = u32Guess0 + u32Bit;
    u32Sqr = (u32Guess1*u32Guess1)<<1;
    u32Guess0 = (u32Sqr > ((tU32) s32In)) ? u32Guess0:u32Guess1;
    u32Bit = u32Bit>>1;

    u32Guess1 = u32Guess0 + u32Bit;
    u32Sqr = (u32Guess1*u32Guess1)<<1;
    u32Guess0 = (u32Sqr > ((tU32) s32In)) ? u32Guess0:u32Guess1;
    u32Bit = u32Bit>>1;

    u32Guess1 = u32Guess0 + u32Bit;
    u32Sqr = (u32Guess1*u32Guess1)<<1;
    u32Guess0 = (u32Sqr > ((tU32) s32In)) ? u32Guess0:u32Guess1;
    u32Bit = u32Bit>>1;

    u32Guess1 = u32Guess0 + u32Bit;
    u32Sqr = (u32Guess1*u32Guess1)<<1;
    u32Guess0 = (u32Sqr > ((tU32) s32In)) ? u32Guess0:u32Guess1;
    u32Bit = u32Bit>>1;

    u32Guess1 = u32Guess0 + u32Bit;
    u32Sqr = (u32Guess1*u32Guess1)<<1;
    u32Guess0 = (u32Sqr > ((tU32) s32In)) ? u32Guess0:u32Guess1;
    u32Bit = u32Bit>>1;

    /* Iteration 15 */
    u32Guess1 = u32Guess0 + u32Bit;
    u32Sqr = (u32Guess1*u32Guess1)<<1;
    u32Guess0 = (u32Sqr > ((tU32) s32In)) ? u32Guess0:u32Guess1;
    /* not need at last iteration */
    /* u32Bit = u32Bit>>1;*/

    return (tFrac32) (u32Guess0<<16);
}


#ifdef __cplusplus
}
#endif

/* End of file */
