/******************************************************************************
*
* (c) Copyright 2009, Freescale & STMicroelectronics
*
***************************************************************************//*!
*
* @file     GDFLIB_FilterIIR2.c
*
* @author   Roman Filka
*
* @version  1.0.12.0
*
* @date     Apr-26-2010
*
* @brief    Direct form II second order IIR filter.
*
*******************************************************************************
*
* Function implemented as ANSIC ISO/IEC 9899:1990, C90.
*
******************************************************************************/
/*!
@if GDFLIB_GROUP
    @addtogroup GDFLIB_GROUP
@else
    @defgroup GDFLIB_GROUP   GDFLIB
@endif
*/

#ifdef __cplusplus
extern "C" {
#endif

/******************************************************************************
| Includes
-----------------------------------------------------------------------------*/
#include "SWLIBS_Typedefs.h"
#include "SWLIBS_Inlines.h"
#include "SWLIBS_Defines.h"

#include "GDFLIB_FilterIIR2.h"

/******************************************************************************
| External declarations
-----------------------------------------------------------------------------*/

/******************************************************************************
| Defines and macros            (scope: module-local)
-----------------------------------------------------------------------------*/

/******************************************************************************
| Typedefs and structures       (scope: module-local)
-----------------------------------------------------------------------------*/

/******************************************************************************
| Global variable definitions   (scope: module-exported)
-----------------------------------------------------------------------------*/

/******************************************************************************
| Global variable definitions   (scope: module-local)
-----------------------------------------------------------------------------*/

/******************************************************************************
| Function prototypes           (scope: module-local)
-----------------------------------------------------------------------------*/

/******************************************************************************
| Function implementations      (scope: module-local)
-----------------------------------------------------------------------------*/

/******************************************************************************
| Function implementations      (scope: module-exported)
-----------------------------------------------------------------------------*/

/**************************************************************************//*!
@ingroup    GDFLIB_GROUP
@brief      This function clears internal filter buffers used in function
            #GDFLIB_FilterIIR2.

@param[in,out]  *pParam     Pointer to filter structure with filter buffer and
                            filter parameters

@return     void

@details    This function clears internal buffers of second order IIR filter. It
            shall be called after filter parameter initialization and whenever
            the filter initialization is required.

@note       This function shall not be called together with #GDFLIB_FilterIIR2
            unless periodical clearing of filter buffers is required.

@par Reentrancy:
            The function is reentrant.

@par Code Example:
\code
#include "gdflib.h"

tFrac32 s32Input;
tFrac32 s32Output;

GDFLIB_FILTER_IIR2_T trMyIIR2 = GDFLIB_FILTER_IIR2_DEFAULT;

void main(void)
{
    // input value = 0.25
    s32Input  = FRAC32(0.25);

    // filter coefficients (BPF 400-625Hz, Ts=100e-6)
    trMyIIR2.trFiltCoeff.s32B0           = FRAC32(0.066122101544579/8);
    trMyIIR2.trFiltCoeff.s32B1           = FRAC32(0.0);
    trMyIIR2.trFiltCoeff.s32B2           = FRAC32(-0.066122101544579/8);
    trMyIIR2.trFiltCoeff.s32A1           = FRAC32(-1.776189018043779/8);
    trMyIIR2.trFiltCoeff.s32A2           = FRAC32(0.867755796910841/8);
    GDFLIB_FilterIIR2Init(&trMyIIR2);
}
\endcode

@par Performance:
            \anchor tab1_GDFLIB_FilterIIR2Init
            <table border="1" CELLPADDING="5" align = "center">
            <caption>#GDFLIB_FilterIIR2Init function performance</caption>
            <tr>
              <th>Code size [bytes] GHS/CW</th> <td>8/12</td>
            </tr>
            <tr>
              <th>Data size [bytes] GHS/CW</th> <td>0/0</td>
            </tr>
            <tr>
              <th>Execution clock cycles max [clk] GHS/CW</th> <td>13/13</td>
            </tr>
            <tr>
              <th>Execution clock cycles min [clk] GHS/CW</th> <td>12/11</td>
            </tr>
            </table>
******************************************************************************/
void GDFLIB_FilterIIR2InitANSIC(GDFLIB_FILTER_IIR2_T *pParam)
{
    pParam->s32FiltBufferX[1]   = 0;
    pParam->s32FiltBufferY[1]   = 0;
    pParam->s32FiltBufferX[2]   = 0;
    pParam->s32FiltBufferY[2]   = 0;
}

/**************************************************************************//*!
@ingroup    GDFLIB_GROUP

@brief      This function implements Direct Form I second order IIR filter.

@param[in,out]  *pParam     Pointer to filter structure with filter buffer and
                            filter parameters
@param[in]      s32In       Value of input signal to be filtered in step (k).
                            The value is 32 bit number in 1.31 fractional format

@return     The function returns 32-bit value in fractional format 1.31,
            representing the filtered value of the input signal in step (k).

@details    The #GDFLIB_FilterIIR2ANSIC function, denoting ANSI-C compatible
            implementation, can be called via function alias #GDFLIB_FilterIIR2.

            \par
            This function calculates the second order infinite impulse (IIR)
            filter. The IIR filters are also called recursive filters because
            both, the input and the previously calculated output
            values, are used for calculation of the filter equation in each step.
            This form of feedback enables transfer of the energy from the output
            to the input, which theoretically leads to an infinitely long
            impulse response (IIR).

            A general form of the IIR filter expressed as a transfer function
            in the Z-domain is described as follows:
            \anchor eq1_GDFLIB_FilterIIR2
            \f[
                H(z)=\frac{Y(z)}{X(z)}=\frac{b_0+b_1z^{-1}+b_2z^{-2}+ \cdot +b_{N}z^{-N}}{1+a_1z^{-1}+a_2z^{-2}+ \cdot +a_{N}z^{-N}}
            \f]

            where \f$ N \f$ denotes the filter order. The second order IIR filter
            in the Z-domain is therefore given from eq. \ref eq1_GDFLIB_FilterIIR2
            as:
            \anchor eq2_GDFLIB_FilterIIR2
            \f[
                H(z)=\frac{Y(z)}{X(z)}=\frac{b_0+b_1z^{-1}+b_2z^{-2}}{1+a_1z^{-1}+a_2z^{-2}}
            \f]

            In order to implement the second order IIR filter on a microcontroller,
            the discrete time domain representation of the filter,
            described by eq. \ref eq2_GDFLIB_FilterIIR2, must be transformed into
            a time difference equation as follows:
            \anchor eq3_GDFLIB_FilterIIR2
            \f[
                y(k)= b_0 x(k) + b_1 x(k-1) + b_2 x(k-2) - a_1 y(k-1) - a_2 y(k-2)
            \f]

            Equation \ref eq3_GDFLIB_FilterIIR2 represents a Direct Form I
            implementation of second order IIR filter. It is well known, that
            Direct Form I (DF-I) and Direct Form II (DF-II) implementations of
            IIR filter are generally sensitive to parameter quantization, if
            a finite precision arithmetic is considered. This however can be
            neglected when the filter transfer function is broken up to low order
            sections, i.e. first or second order.
            The main difference between DF-I and DF-II implementations of IIR
            filter is in number of delay buffers and in number of guard bits
            required to handle potential overflow.
            The DF-II implementation requires less delay buffers than DF-I,
            hence less data memory is utilized. On the other hand,
            since the poles come first in the DF-II realization,
            the signal entering the state delay-line typically requires a larger
            dynamic range than the output signal \f$y(k)\f$. Therefore overflow
            can occur at the delay-line input of DF-II implementation,
            unlike in the DF-I implementation.

            \anchor fig1_GDFLIB_FilterIIR2
            \image latex GDFLIB_FilterIIR2_Figure1.eps "Direct Form 1 second order IIR filter" width=10cm
            \image html GDFLIB_FilterIIR2_Figure1.jpg "Direct Form 1 second order IIR filter"

            \par
            The coefficients of the filter depicted in Fig. \ref fig1_GDFLIB_FilterIIR2
            can be designed to meet requirements for the second order Band Pass (BPF)
            or Band Stop filter (BSF). Filter coefficients can be calculated
            using various tools, for example Matlab \e butter function. In order
            to avoid overflow during the calculation of the #GDFLIB_FilterIIR2
            function, filter coefficients must be divided by eight.
            The coefficient quantization error due to
            finite precision arithmetic can be neglected in case of a second order
            filter. Therefore calculation of coefficients can be done using
            Matlab as follows:

\code
freq_bot    = 400;
freq_top    = 625;
T_sampling  = 100e-6;

[b,a]= butter(1,[freq_bot freq_top]*T_sampling *2, 'bandpass');
sys =tf(b,a,T_sampling);
bode(sys,[freq_bot:1:freq_top]*2*pi)

s32B0 = b(1)/8;
s32B1 = b(2)/8;
s32B2 = b(3)/8;
s32A1 = a(2)/8;
s32A2 = a(3)/8;
disp (' Coefficients for GDFLIB_FilterIIR2 function :');
disp ([ 's32B0 = FRAC32(' num2str( s32B0 ) ')']);
disp ([ 's32B1 = FRAC32(' num2str( s32B1 ) ')']);
disp ([ 's32B2 = FRAC32(' num2str( s32B2 ) ')']);
disp ([ 's32A1 = FRAC32(' num2str( s32A1 ) ')']);
disp ([ 's32A2 = FRAC32(' num2str( s32A2 ) ')']);
\endcode

@note       Filter delay line includes four delay buffers which should be reset
            after filter initialization. This can be
            done by assigning filter instance a #GDFLIB_FILTER_IIR2_DEFAULT
            macro during instance declaration or by calling #GDFLIB_FilterIIR2Init
            function.

@warning    Because of fixed point implementation and to avoid overflow
            during the calculation of the #GDFLIB_FilterIIR2 function,
            filter coefficients must be divided by eight. Function output
            is internally multiplied by eight to correct the coefficient scaling.

@par Reentrancy:
            The function is reentrant.

@par Code Example:
\code
#include "gdflib.h"

tFrac32 s32Input;
tFrac32 s32Output;

GDFLIB_FILTER_IIR2_T trMyIIR2 = GDFLIB_FILTER_IIR2_DEFAULT;

void main(void)
{
    // input value = 0.25
    s32Input  = FRAC32(0.25);

    // filter coefficients (BPF 400-625Hz, Ts=100e-6)
    trMyIIR2.trFiltCoeff.s32B0           = FRAC32(0.066122101544579/8);
    trMyIIR2.trFiltCoeff.s32B1           = FRAC32(0.0);
    trMyIIR2.trFiltCoeff.s32B2           = FRAC32(-0.066122101544579/8);
    trMyIIR2.trFiltCoeff.s32A1           = FRAC32(-1.776189018043779/8);
    trMyIIR2.trFiltCoeff.s32A2           = FRAC32(0.867755796910841/8);
    GDFLIB_FilterIIR2Init(&trMyIIR2);

    // output should be 0x0021DAC18
    s32Output = GDFLIB_FilterIIR2(s32Input,&trMyIIR2);
}
\endcode

@par Performance:
            \anchor tab1_GDFLIB_FilterIIR2
            <table border="1" CELLPADDING="5" align = "center">
            <caption>#GDFLIB_FilterIIR2 function performance</caption>
            <tr>
              <th>Code size [bytes] GHS/CW</th> <td>310/168</td>
            </tr>
            <tr>
              <th>Data size [bytes] GHS/CW</th> <td>0/0</td>
            </tr>
            <tr>
              <th>Execution clock cycles max [clk] GHS/CW</th> <td>163/113</td>
            </tr>
            <tr>
              <th>Execution clock cycles min [clk] GHS/CW</th> <td>126/86</td>
            </tr>
            </table>

******************************************************************************/
tFrac32 GDFLIB_FilterIIR2ANSIC(tFrac32 s32In, GDFLIB_FILTER_IIR2_T *pParam)
{
#if defined (USE_FRAC32_ARITHMETIC) && defined(USE_ASM)
    /*
     * Implemented equation:
     * y(k) = b0*x(k) + b1*x(k-1) + b2*x(k-2) - a1*y(k-1) - a2*y(k-2)
     *
     * r3 <- x(k)
     * 0x0(r4) <- b0>>3     parameter is scaled down by eight to format Q4.28
     * 0x4(r4) <- b1>>3     parameter is scaled down by eight to format Q4.28
     * 0x8(r4) <- b2>>3     parameter is scaled down by eight to format Q4.28
     * 0xC(r4) <- a1>>3     parameter is scaled down by eight to format Q4.28
     * 0x10(r4) <- a2>>3    parameter is scaled down by eight to format Q4.28
     * 0x14(r4) <- x(k-1)
     * 0x18(r4) <- x(k-2)
     * 0x1C(r4) <- x(k-3)
     * 0x20(r4) <- y(k-1)
     * 0x24(r4) <- y(k-2)
     * 0x28(r4) <- y(k-3)
     */

    /*--------------
    (b2>>3)*x(k-2)
    --------------*/
    asm("e_lwz      r5,0x8(r4)");   // r5 = b2>>3
    asm("e_lwz      r6,0x18(r4)");  // r6 = x(k-2)

    asm("mullw      r0,r5,r6");     // r0 = r5 * r6; multiply low word
    asm("mulhw      r5,r5,r6");     // r5 = (r5*r6)>>32; multiply high word
    asm("e_slwi     r7,r5,1");      // r7 = r5<<1; convert high word to frac format
    asm("e_rlwimi   r7,r0,1,31,31");// r7 = r6 & r0(MSB)

    /*--------------
    (b1>>3)*x(k-1)
    --------------*/
    asm("e_lwz      r5,0x4(r4)");   // r5 = b1>>3
    asm("e_lwz      r6,0x14(r4)");  // r6 = x(k-1)
    asm("se_stw     r6,0x18(r4)");  // store x(k-1) into x(k-2) in filter structure

    asm("mullw      r0,r5,r6");     // r0 = r5 * r6; multiply low word
    asm("mulhw      r5,r5,r6");     // r5 = (r5*r6)>>32; multiply high word
    asm("e_slwi     r6,r5,1");      // r6 = r5<<1; convert high word to frac format
    asm("e_rlwimi   r6,r0,1,31,31");// r6 = r6 & r0(MSB)

    /*--------------
    (b0>>3)*x(k)
    --------------*/
    asm("e_lwz      r5,0(r4)");     // r5 = b0>>3
    asm("se_stw     r3,0x14(r4)");  // store x(k) into x(k-1) in filter structure

    asm("mullw      r0,r3,r5");     // r0 = r3 * r5; multiply low word
    asm("mulhw      r3,r3,r5");     // r3 = (r3*r5)>>32; multiply high word
    asm("e_slwi     r3,r3,1");      // r3 = r3<<1; convert high word to frac format
    asm("e_rlwimi   r3,r0,1,31,31");// r3 = r3 & r0(MSB)

    /*--------------
    (b0>>3)*x(k) + (b1>>3)*x(k-1)
    --------------*/
    asm("se_add     r3,r6");        // r3 = (b0>>3)*x(k) + (b1>>3)*x(k-1)

    /*--------------
    (b0>>3)*x(k) + (b1>>3)*x(k-1) + (b2>>3)*x(k-2)
    --------------*/
    asm("se_add     r3,r7");        // r3 = (b0>>3)*x(k) + (b1>>3)*x(k-1) + (b2>>3)*x(k-2)

    /*--------------
    (a2>>3)*y(k-2)
    --------------*/
    asm("e_lwz      r5,0x10(r4)");  // r5 = a2>>3
    asm("e_lwz      r6,0x24(r4)");  // r6 = y(k-2)

    asm("mullw      r0,r5,r6");     // r0 = r5 * r6; multiply low word
    asm("mulhw      r5,r5,r6");     // r5 = (r5*r6)>>32; multiply high word
    asm("e_slwi     r5,r5,1");      // r5 = r5<<1; convert high word to frac format
    asm("e_rlwimi   r5,r0,1,31,31");// r5 = r5 & r0(MSB)

    /*--------------
    Acc = (b0>>3)*x(k) + (b1>>3)*x(k-1) + (b2>>3)*x(k-2) - (a2>>3)*y(k-2)
    --------------*/
    asm("se_sub     r3,r5");        // r3 = (b0>>3)*x(k) + (b1>>3)*x(k-1) + (b2>>3)*x(k-2) - (a2>>3)*y(k-2)

    /*--------------
    (a1>>3)*y(k-1)
    --------------*/
    asm("e_lwz      r5,0xC(r4)");   // r5 = a1>>3
    asm("e_lwz      r6,0x20(r4)");  // r6 = y(k-1)
    asm("se_stw     r6,0x24(r4)");  // store y(k-1) into y(k-2) in filter structure

    asm("mullw      r0,r5,r6");     // r0 = r5 * r6; multiply low word
    asm("mulhw      r5,r5,r6");     // r5 = (r5*r6)>>32; multiply high word
    asm("e_slwi     r5,r5,1");      // r5 = r5<<1; convert high word to frac format
    asm("e_rlwimi   r5,r0,1,31,31");// r5 = r5 & r0(MSB)

    /*--------------
    Acc = (b0>>3)*x(k) + (b1>>3)*x(k-1) + (b2>>3)*x(k-2) - (a1>>3)*y(k-1) - (a2>>3)*y(k-2)
    --------------*/
    asm("se_sub     r3,r5");        // r3 = (b0>>3)*x(k) + (b1>>3)*x(k-1) + (b2>>3)*x(k-2) - (a1>>3)*y(k-1) - (a2>>3)*y(k-2)

    /*--------------
     y(k) = Acc * 8;
     Correction due to scaled filter coefficients.
     Result saturates if overflow caused by arithmetic left shift occurs.
    --------------*/
    asm("e_slwi     r5,r3,3");      // temp = y(k)<<3;  r5 = r3<<3;
                                    // transform result from Q4.28 back to
                                    // Q1.31 format

    //y(k) = (Acc > Xmax)? 0x7fff ffff : y(k)
    asm("se_bmaski  r6,31");        // r6 = 0x7FFFFFFF
    asm("se_bmaski  r7,28");        // r7 = 0x7FFFFFFF>>3 = Xmax
    asm("se_cmp     r3,r7");        //
    asm("iselgt     r3,r6,r5");     // r3 = (r3 > r7) ? r6 : r5

    //y(k) = (temp <= Xmin)? 0x8000 0000:y(k)
    asm("e_lis      r6,0x8000");    // r6 = 0x80000000
    asm("e_lis      r7,0xf000");    // r7 = 0x80000000>>3 = Xmin
    asm("se_cmp     r3,r7");        //
    asm("isellt     r3,r6,r3");     // r3 = (r3 < r7) ? r6 : r3
    asm("iseleq     r3,r6,r3");     // r3 = (r3 == r7)? r6 : r3

    asm("se_stw     r3,0x20(r4)");  // y(k-1) <- r3
#endif

#if defined (USE_FRAC32_ARITHMETIC) && !defined(USE_ASM)
//USE_FRAC32_ARITHMETIC
    register tFrac32 s32M1;
    register tFrac32 s32M2;
    register tFrac32 s32M3;
    register tFrac32 s32M4;
    register tFrac32 s32M5;
    register tFrac32 s32Acc;
    register tFrac32 s32Acc1;
    register tFrac32 s32Acc2;
    register tFrac32 s32Out;

    /*
     * Implemented equation:
     * y(k) = b0*x(k) + b1*x(k-1) +b2*x(k-2) - a1*y(k-1) - a2*y(k-2)
     *
     * Calculation steps:
     * M1   = b0*x(k)
     * M2   = b1*x(k-1)
     * M3   = b2*x(k-2)
     * M4   = a1*y(k-1)
     * M5   = a2*y(k-2)
     * Acc1 = M3+(-M5)
     * Acc2 = M2+(-M4)
     * Acc  = Acc1+Acc2
     * Acc  = Acc+M1
     * y(k) = Acc * 8
     */

    // M1   = b0*x(k), number format Q1.31
    s32M1   = F32Mul(pParam->trFiltCoeff.s32B0,s32In);

    // M2   = b1*x(k-1), number format Q1.31
    s32M2   = F32Mul(pParam->trFiltCoeff.s32B1,pParam->s32FiltBufferX[1]);

    // M3   = b2*x(k-2), number format Q1.31
    s32M3   = F32Mul(pParam->trFiltCoeff.s32B2,pParam->s32FiltBufferX[2]);

    // M4   = a1*y(k-1), number format Q1.31
    s32M4   = F32Mul(pParam->trFiltCoeff.s32A1,pParam->s32FiltBufferY[1]);

    // M5   = a2*y(k-2), number format Q1.31
    s32M5   = F32Mul(pParam->trFiltCoeff.s32A2,pParam->s32FiltBufferY[2]);
    
    s32Acc1 = F32Add(s32M3,F32Neg((tFrac32)s32M5));
    s32Acc2 = F32Add(s32M2,F32Neg((tFrac32)s32M4));
    s32Acc  = F32Add(s32Acc1,s32Acc2);
    s32Acc  = F32Add(s32Acc,s32M1);

    /* y(k) = Acc * 8; correction due to scaled filter coefficients
     * Result saturates if overflow caused by arithmetic left shift occurs.
     */
    s32Out  = F32ShlSat(s32Acc,3);

    // Storing filter states in the buffer
    pParam->s32FiltBufferX[2]   = pParam->s32FiltBufferX[1];
    pParam->s32FiltBufferX[1]   = s32In;
    pParam->s32FiltBufferY[2]   = pParam->s32FiltBufferY[1];
    pParam->s32FiltBufferY[1]   = s32Out;

    // Returning de-scaled value of internal accumulator
    return(s32Out);
#endif

#if !defined (USE_FRAC32_ARITHMETIC) && defined(USE_ASM)
// ASM 16bit arithmetic
    /*
     * Implemented equation:
     * y(k) = b0*x(k) + b1*x(k-1) + b2*x(k-2) - a1*y(k-1) - a2*y(k-2)
     *
     * r3 <- x(k)
     * 0x0(r4) <- b0>>3     parameter is scaled down by eight to format Q4.28
     * 0x4(r4) <- b1>>3     parameter is scaled down by eight to format Q4.28
     * 0x8(r4) <- b2>>3     parameter is scaled down by eight to format Q4.28
     * 0xC(r4) <- a1>>3     parameter is scaled down by eight to format Q4.28
     * 0x10(r4) <- a2>>3    parameter is scaled down by eight to format Q4.28
     * 0x14(r4) <- x(k-1)
     * 0x18(r4) <- x(k-2)
     * 0x1C(r4) <- x(k-3)
     * 0x20(r4) <- y(k-1)
     * 0x24(r4) <- y(k-2)
     * 0x28(r4) <- y(k-3)
     */

    /*--------------
    (b2>>3)*x(k-2)
    --------------*/
    asm("e_lwz      r5,0x8(r4)");   // r5 = b2>>3
    asm("e_lwz      r6,0x18(r4)");  // r6 = x(k-2)

    asm("mulhw      r5,r5,r6");     // r5 = (r5*r6)>>32; multiply high word
    asm("e_slwi     r7,r5,1");      // r7 = r5<<1; convert high word to frac format

    /*--------------
    (b1>>3)*x(k-1)
    --------------*/
    asm("e_lwz      r5,0x4(r4)");   // r5 = b1>>3
    asm("e_lwz      r6,0x14(r4)");  // r6 = x(k-1)
    asm("se_stw     r6,0x18(r4)");  // store x(k-1) into x(k-2) in filter structure

    asm("mulhw      r5,r5,r6");     // r5 = (r5*r6)>>32; multiply high word
    asm("e_slwi     r6,r5,1");      // r6 = r5<<1; convert high word to frac format

    /*--------------
    (b0>>3)*x(k)
    --------------*/
    asm("e_lwz      r5,0(r4)");     // r5 = b0>>3
    asm("se_stw     r3,0x14(r4)");  // store x(k) into x(k-1) in filter structure

    asm("mulhw      r3,r3,r5");     // r3 = (r3*r5)>>32; multiply high word
    asm("e_slwi     r3,r3,1");      // r3 = r3<<1; convert high word to frac format

    /*--------------
    (b0>>3)*x(k) + (b1>>3)*x(k-1)
    --------------*/
    asm("se_add     r3,r6");        // r3 = (b0>>3)*x(k) + (b1>>3)*x(k-1)

    /*--------------
    (b0>>3)*x(k) + (b1>>3)*x(k-1) + (b2>>3)*x(k-2)
    --------------*/
    asm("se_add     r3,r7");        // r3 = (b0>>3)*x(k) + (b1>>3)*x(k-1) + (b2>>3)*x(k-2)

    /*--------------
    (a2>>3)*y(k-2)
    --------------*/
    asm("e_lwz      r5,0x10(r4)");  // r5 = a2>>3
    asm("e_lwz      r6,0x24(r4)");  // r6 = y(k-2)

    asm("mulhw      r5,r5,r6");     // r5 = (r5*r6)>>32; multiply high word
    asm("e_slwi     r5,r5,1");      // r5 = r5<<1; convert high word to frac format

    /*--------------
    Acc = (b0>>3)*x(k) + (b1>>3)*x(k-1) + (b2>>3)*x(k-2) - (a2>>3)*y(k-2)
    --------------*/
    asm("se_sub     r3,r5");        // r3 = (b0>>3)*x(k) + (b1>>3)*x(k-1) + (b2>>3)*x(k-2) - (a2>>3)*y(k-2)

    /*--------------
    (a1>>3)*y(k-1)
    --------------*/
    asm("e_lwz      r5,0xC(r4)");   // r5 = a1>>3
    asm("e_lwz      r6,0x20(r4)");  // r6 = y(k-1)
    asm("se_stw     r6,0x24(r4)");  // store y(k-1) into y(k-2) in filter structure

    asm("mulhw      r5,r5,r6");     // r5 = (r5*r6)>>32; multiply high word
    asm("e_slwi     r5,r5,1");      // r5 = r5<<1; convert high word to frac format

    /*--------------
    Acc = (b0>>3)*x(k) + (b1>>3)*x(k-1) + (b2>>3)*x(k-2) - (a1>>3)*y(k-1) - (a2>>3)*y(k-2)
    --------------*/
    asm("se_sub     r3,r5");        // r3 = (b0>>3)*x(k) + (b1>>3)*x(k-1) + (b2>>3)*x(k-2) - (a1>>3)*y(k-1) - (a2>>3)*y(k-2)

    /*--------------
     y(k) = Acc * 8;
     Correction due to scaled filter coefficients.
     Result saturates if overflow caused by arithmetic left shift occurs.
    --------------*/
    asm("e_slwi     r5,r3,3");      // temp = y(k)<<3;  r5 = r3<<3;
                                    // transform result from Q4.28 back to
                                    // Q1.31 format

    //y(k) = (Acc > Xmax)? 0x7fff ffff : y(k)
    asm("se_bmaski  r6,31");        // r6 = 0x7FFFFFFF
    asm("se_bmaski  r7,28");        // r7 = 0x7FFFFFFF>>3 = Xmax
    asm("se_cmp     r3,r7");        //
    asm("iselgt     r3,r6,r5");     // r3 = (r3 > r7) ? r6 : r5

    //y(k) = (temp <= Xmin)? 0x8000 0000:y(k)
    asm("e_lis      r6,0x8000");    // r6 = 0x80000000
    asm("e_lis      r7,0xf000");    // r7 = 0x80000000>>3 = Xmin
    asm("se_cmp     r3,r7");        //
    asm("isellt     r3,r6,r3");     // r3 = (r3 < r7) ? r6 : r3
    asm("iseleq     r3,r6,r3");     // r3 = (r3 == r7)? r6 : r3

    asm("se_stw     r3,0x20(r4)");  // y(k-1) <- r3
#endif

#if !defined (USE_FRAC32_ARITHMETIC) && !defined(USE_ASM)
//USE_FRAC16_ARITHMETIC
    register tFrac32 s32M1;
    register tFrac32 s32M2;
    register tFrac32 s32A1;
    register tFrac32 s32Acc;
    register tFrac32 s32Out;

    /*
     * Implemented equation:
     * y(k) = b0*x(k) + b1*x(k-1) +b2*x(k-2) - a1*y(k-1) - a2*y(k-2)
     *
     * Calculation steps:
     * M1   = a1*y(k-1)
     * M2   = a2*y(k-1)
     * Acc1 = M3+(-M5)
     * Acc2 = M2+(-M4)
     * Acc  = Acc1+Acc2
     * Acc  = Acc+M1
     * y(k) = Acc * 8
     */

    //M1    = a1*y(k-1)
    s32M1   = F32Mul(pParam->trFiltCoeff.s32A1,pParam->s32FiltBufferY[1]);

    //M2    = a2*y(k-2)
    s32M2   = F32Mul(pParam->trFiltCoeff.s32A2,pParam->s32FiltBufferY[2]);

    /*A1    = -(M1+M2), note: all coefficients A1,A2,B0,B1,B2 are divided by 8
     *    in order to avoid overflow during calculation, hence F32Add is used without
     *     saturation detection
     */
    s32A1   = F32NegSat(F32Add(s32M1,s32M2));

    //Acc   = -a1*y(k-1) - a2*y(k-2) + b0*x(k)
    s32Acc  = F32MacF16F16(s32A1,
                (tFrac16)((pParam->trFiltCoeff.s32B0)>>16),
                 (tFrac16)(s32In>>16));

    //Acc   = -a1*y(k-1) - a2*y(k-2) + b0*x(k) + b1*x(k-1)
    s32Acc  = F32MacF16F16(s32Acc,
                (tFrac16)((pParam->trFiltCoeff.s32B1)>>16),
                (tFrac16)((pParam->s32FiltBufferX[1])>>16));

    //Acc   = -a1*y(k-1) - a2*y(k-2) + b0*x(k) + b1*x(k-1) + b2*x(k-2)
    s32Acc  = F32MacF16F16(s32Acc,
                (tFrac16)((pParam->trFiltCoeff.s32B2)>>16),
                (tFrac16)((pParam->s32FiltBufferX[2])>>16));

    /* y(k) = Acc * 8; correction due to scaled filter coefficients
     * Result saturates if overflow caused by arithmetic left shift occurs.
     */
    s32Out  = F32ShlSat(s32Acc,3);

    // Storing filter states in the buffer
    pParam->s32FiltBufferX[2]   = pParam->s32FiltBufferX[1];
    pParam->s32FiltBufferX[1]   = s32In;
    pParam->s32FiltBufferY[2]   = pParam->s32FiltBufferY[1];
    pParam->s32FiltBufferY[1]   = s32Out;

    // Returning de-scaled value of internal accumulator
    return(s32Out);
#endif
}

#ifdef __cplusplus
}
#endif

/* End of file */
