#************************************************************************************/
#*                                                                         			*/
#* DESCRIPTION:                                                            			*/
#*                                                                         			*/
#* MPC57xx FIR Filter using Lightweight Signal Processor                   			*/
#*==================================================================================*/
#*                                                                        			*/
#* REV      AUTHOR                   DATE       DESCRIPTION OF CHANGE               */
#* ---   -----------              ----------   ---------------------                */
#* 0.1  I Howie and A Turner  		25/4/13		Initial 10 TAP FIR					*/
#*    																				*/
#*                                           										*/
#************************************************************************************/

  .globl fir_Signed16
 # .type fir_frac16,@function
  .section .vletext  , axv
  .vle

  # N - number of output samples
  # ntaps - number of filter coefficients  //not needed if fixed tap length
  # x - input array of size N+ntaps-1
  # y - output array of size N
  # hr - array of coefficients of length ntaps, coefficients are stored in reversed order 
            
  # void fir_Signed_c(unsigned short N, unsigned short ntaps, short *x, short *y, short *h);
  #fir_Signed16                       (N, ntaps, SDADC1_RESULTS, LSP_FIR_OUT, hr1);
  .align 16
fir_Signed16:
#<# register definition
.equ N, r3
.equ x, r4
.equ y, r5
.equ h, r6


#co-efficients
.equ h0, r10
.equ h1, r11
.equ h2, r12
.equ h3, r13
.equ h4, r14
.equ h5, r15
.equ h6, r16
.equ h7, r17
.equ h8, r18
.equ h9, r19

#Data
.equ x0, r20
.equ x1, r21
.equ x2, r22
.equ x3, r23
.equ x4, r24
.equ x5, r25
.equ x6, r26
.equ x7, r27
.equ x8, r28
.equ x9, r29
.equ x10,r30

.equ temp, r8
.equ temp1,r9
.equ cnt, r7

#># 


  # store nonvolatile registers
  e_stwu      r1, -28(r1)                # stwu - store with update - r1 contains stack pointer. In this case local DMEM
  # store r25 to r31 onto stack
  e_stmw      r25, 0(r1)                # stmw - store multiple word - r 14 ro r31 are volatile and as such must be saved and restored if used in the routine
 
  # decrement x & y pointer				 
  #se_subi     x, 6						# Why subtract 6 from SDADC results pointer?
  #se_subi     y, 2						# Why subtract 2 from the Filter output pointer?
  # decrement coeff pionter
  se_subi     h, 2  				        # why do this too? Because when we use update immediate in zlhhsplaatu we must be offset by 2
 
 
 #Initialize counter to zero
 e_li cnt,0          						# clear counter
 
  #load the co-efficients

  zlhhsplatu h0, 2(h)
  zlhhsplatu h1, 2(h)
  zlhhsplatu h2, 2(h)
  zlhhsplatu h3, 2(h)
  zlhhsplatu h4, 2(h)
  zlhhsplatu h5, 2(h)
  zlhhsplatu h6, 2(h)
  zlhhsplatu h7, 2(h)
  zlhhsplatu h8, 2(h)
  zlhhsplatu h9, 2(h)
  
  
  # latch x address
  #se_mr       x_ptr, x  
  
    # decrement x pointer
  se_subi     x, 8  				             # why do this too? Because when we use update immediate in zlhhsplaatu we must be offset by 2
  se_subi     y, 4
  
  #Load the X-data
  zlddu x0, 8(x)
  zlddu x2, 8(x)
  zlwwu x4, 8(x)
  
  #####
  #Loop
  #####
  loop:
  
  zlwwu x5, 4(x)						         # load in the first new data (should be a half word?)
  
  #Start Mac CING
  
  # multiply and accumulate using even coeffs
  zvmhulsf	  temp, h9, x0  				# The first mac is not accumulated because the temp reg would still have data from previous execution. Also prevents having to clear temp
  zvmhulsfaas temp, h7, x1 
  zvmhulsfaas temp, h5, x2
  zvmhulsfaas temp, h3, x3
  zvmhulsfaas temp, h1, x4
  
  
  
  #merge to rotate input data vectors
  zvmergelohih x0,x0,x1
  zvmergelohih x1,x1,x2
  zvmergelohih x2,x2,x3
  zvmergelohih x3,x3,x4
  zvmergelohih x4,x4,x5                       #"New" vector comes in here
  
  
  # multiply and accumulate using odd coeffs
  zvmhulsfaas temp, h8, x0  
  zvmhulsfaas temp, h6, x1 
  zvmhulsfaas temp, h4, x2
  zvmhulsfaas temp, h2, x3
  zvmhulsfaas temp, h0, x4
  
   #store output
  
  zstwhedu temp,4(y)
  
  
  #merge to rotate input data vectors
  zvmergelohih x0,x0,x1
  zvmergelohih x1,x1,x2
  zvmergelohih x2,x2,x3
  zvmergelohih x3,x3,x4
  zvmergehiloh x4,x5,x5                       #"New" vector comes in here
  
  #e_addi    x, x, 4                           # Update R4 to point to next input vector
  e_addi    cnt, cnt, 2
  cmpw cnt, N
  e_bne loop
  
  
  
  # restore non-volatile regs
  e_lmw       r25, 0(r1)
  # delete stack frame 
  e_addi      r1, r1, 28          
      
  se_blr
