/*
 * Copyright (c) 2015, Freescale Semiconductor, Inc.
 * Copyright 2016-2020 NXP
 * All rights reserved.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 */
#include <math.h>
#include "fsl_common.h"
#include "fsl_debug_console.h"
#include "fsl_mu.h"
#include "board.h"
#include "arm_math.h"
#include "fsl_usart.h"

#include "clock_config.h"
#include "pin_mux.h"
#include "dsp_support.h"
#include "fsl_power.h"
/*******************************************************************************
 * Definitions
 ******************************************************************************/
#define APP_MU MUA
/* Flag indicates Core Boot Up*/
#define BOOT_FLAG 0x01U

/* Channel transmit and receive register */
#define CHN_MU_REG_NUM 0U

/* How many message is used to test message sending */
#define MSG_LENGTH 32U

#define FLOAT_2_Q31(x) ((int32_t)((x)*2147483648.0f))
#define VEC_ADD_LENGTH 200
#define VEC_DOT_LENGTH 16
#define LOOP_COUNT     100

#define DEMO_USART              USART0
#define DEMO_USART_CLK_SRC      kCLOCK_Flexcomm0
#define DEMO_USART_CLK_FREQ     CLOCK_GetFlexCommClkFreq(0U)
#define DEMO_USART_IRQHandler   FLEXCOMM0_IRQHandler
#define DEMO_USART_IRQn         FLEXCOMM0_IRQn

#define CYCLES_PER_SYSTICK  ((BOARD_BOOTCLOCKRUN_CORE_CLOCK)*(.0000005))  /* .0000005 = 125 cycles per 500ns */

/*******************************************************************************
 * Prototypes
 ******************************************************************************/
static void TEST_InitTime(void);
static uint32_t TEST_GetTime(void);
static void arm_mat_sqrt_Test();
static void arm_mat_sine_Test();
static void arm_mat_vec_add_Test();
static void arm_mat_vec_dot_Test();
static void arm_mat_mtx_inv_Test();
static void arm_mat_mtx_tnsp_Test();

void LED_INIT();

/*******************************************************************************
 * Variables
 ******************************************************************************/
static volatile uint32_t s_timeMs;
static volatile bool uartTyped = false;
static volatile uint8_t dataTyped = 0;

static q31_t vec_add_x[VEC_ADD_LENGTH];
static q31_t vec_add_y[VEC_ADD_LENGTH];
static q31_t vec_add_out[VEC_ADD_LENGTH];
static q31_t vec_add_out_ref[VEC_ADD_LENGTH];

static float32_t vec_dot_a[VEC_DOT_LENGTH] = {1.01, 2.02,  3.03,  4.04,  5.05,  6.06,  7.07,  8.08, 9.09, 10.10, 11.11, 12.12, 13.13, 14.14, 15.15, 16.16};
static float32_t vec_dot_b[VEC_DOT_LENGTH] = {-1.01, 2.02,  -3.03,  4.04,  -5.05,  6.06,  -7.07,  8.08, -9.09, 10.10, -11.11, 12.12, -13.13, 14.14, -15.15, 16.16};

static float32_t transpose[64]       = {0.0};
static float32_t transposeResult[64];
static float32_t transposeRel[64]    = {0.0};

/*******************************************************************************
 * Code
 ******************************************************************************/
void DEMO_USART_IRQHandler(void)
{
        /* If new data arrived. */
    if ((kUSART_RxFifoNotEmptyFlag | kUSART_RxError) & USART_GetStatusFlags(DEMO_USART))
    {
        dataTyped = USART_ReadByte(DEMO_USART);
        uartTyped = true;
    }
/* Add for ARM errata 838869, affects Cortex-M4, Cortex-M4F Store immediate overlapping
  exception return operation might vector to incorrect interrupt */
#if defined __CORTEX_M && (__CORTEX_M == 4U)
    __DSB();
#endif
}

void delay(void)
{
    volatile uint32_t i = 0;
    for (i = 0; i < 5000000; ++i)
    {
        __NOP();
    }
}

/*!
 * @brief Main function
 */
int main(void)
{
    usart_config_t config;

    /* Init board hardware.*/
    BOARD_InitPins();
    BOARD_BootClockRUN();
    BOARD_InitDebugConsole();

    /* Initialize LED */
    LED_INIT();

    /* Initialize SysTick */
    TEST_InitTime();

    /* Clear MUA reset */
    RESET_PeripheralReset(kMU_RST_SHIFT_RSTn);

    /* MUA init */
    MU_Init(APP_MU);

    /* Copy DSP image to RAM and start DSP core. */
    BOARD_DSP_Init();

    /* Wait DSP core is Boot Up */
    while (BOOT_FLAG != MU_GetFlags(APP_MU));

    /* Enable Rx and Tx on UART */
    USART_GetDefaultConfig(&config);
    config.baudRate_Bps = BOARD_DEBUG_UART_BAUDRATE;
    config.enableTx     = true;
    config.enableRx     = true;

    USART_Init(DEMO_USART, &config, DEMO_USART_CLK_FREQ);
    /* Enable RX interrupt. */
    USART_EnableInterrupts(DEMO_USART, kUSART_RxLevelInterruptEnable | kUSART_RxErrorInterruptEnable);
    EnableIRQ(DEMO_USART_IRQn);

    /* Erase to beginning of screen */
    PRINTF("\033[1J");
    /* Place cursor at top */
    PRINTF("\033[f");
    /* Print the initial banner */
    PRINTF("Type a number between 1 - 6 to select a function and execute it on \r\nCM33 and HiFi4: \r\n1.SQRT\r\n2.SINE\r\n3.VECTOR ADD\r\n4.VECTOR DOT\r\n5.INV MATRIX\r\n6.MATRIX TRANSPOSE\r\n\r\n");

    while (1)
    {
      if(uartTyped)
      {
	      /* Erase to beginning of screen */
          PRINTF("\033[1J");
          /* Place cursor at top */
          PRINTF("\033[f");
          PRINTF("Type a number between 1 - 6 to select a function and execute it on \r\nCM33 and HiFi4: \r\n1.SQRT\r\n2.SINE\r\n3.VECTOR ADD\r\n4.VECTOR DOT\r\n5.INV MATRIX\r\n6.MATRIX TRANSPOSE\r\n\r\n");

          switch(dataTyped)
          {
            case '1':
              /* Execute square root */
              arm_mat_sqrt_Test();
              /* Communicate with HiFi4 to execute math function */
              MU_SendMsg(APP_MU, CHN_MU_REG_NUM, 1);
              dataTyped = 0;
              break;
            case '2':
              /* Execute sine */
              arm_mat_sine_Test();
              /* Communicate with HiFi4 to execute math function */
              MU_SendMsg(APP_MU, CHN_MU_REG_NUM, 2);
              dataTyped = 0;
              break;
            case '3':
              /* Execute vector add */
              arm_mat_vec_add_Test();
              /* Communicate with HiFi4 to execute math function */
              MU_SendMsg(APP_MU, CHN_MU_REG_NUM, 3);
              dataTyped = 0;
              break;
            case '4':
              /* Execute vector dot product */
              arm_mat_vec_dot_Test();
              /* Communicate with HiFi4 to execute math function */
              MU_SendMsg(APP_MU, CHN_MU_REG_NUM, 4);
              dataTyped = 0;
              break;
            case '5':
              /* Execute inverse matrix */
              arm_mat_mtx_inv_Test();
              /* Communicate with HiFi4 to execute math function */
              MU_SendMsg(APP_MU, CHN_MU_REG_NUM, 5);
              dataTyped = 0;
              break;
            case '6':
              /* Execute matrix transpose */
              arm_mat_mtx_tnsp_Test();
              /* Communicate with HiFi4 to execute math function */
              MU_SendMsg(APP_MU, CHN_MU_REG_NUM, 6);
              dataTyped = 0;
              break;

          default:
            break;
          }
        uartTyped = false;
      }
      delay();
      /* Toggle led */
      LED_RED_TOGGLE();
    }
}

static void arm_mat_sqrt_Test()
{
  uint16_t i;
  uint32_t tic, toc, cycles;
  PRINTF("----------------------------------\r\n");
  PRINTF("        SQRT FUNCTION\r\n");

  q31_t input      = FLOAT_2_Q31(0.25f);
  q31_t sqrtResult = 0;
  q31_t sqrtRef    = FLOAT_2_Q31(0.5f);

  /* Obtain init time */
  tic = TEST_GetTime();

  /* Execute math function */
  for(i = 0; i < LOOP_COUNT; i++)
  {
    arm_sqrt_q31(input,&sqrtResult);
  }

  /* Obtain end time */
  toc = TEST_GetTime();

  /* Verify the result */
  if(abs(sqrtRef - sqrtResult) > 2)
    PRINTF("ERROR on SQRT\r\n");

  /* Convert systick counts to cycles */
  cycles = (uint32_t)(((toc - tic)*CYCLES_PER_SYSTICK)/LOOP_COUNT);

  PRINTF("CM33 SQRT takes %d cycles\r\n\r\n", cycles);
}

static void arm_mat_sine_Test()
{
  uint16_t i;
  uint32_t tic, toc, cycles;
  PRINTF("----------------------------------\r\n");
  PRINTF("         SINE FUNCTION\r\n");

  q31_t input      = FLOAT_2_Q31(0.5f / 6.0f);
  q31_t sinResult  = 0;
  q31_t sinRef     = FLOAT_2_Q31(0.5f);

  /* Obtain init time */
  tic = TEST_GetTime();

  /* Execute math function */
  for(i = 0; i < LOOP_COUNT; i++)
  {
    sinResult = arm_sin_q31(input);
  }

  /* Obtain end time */
  toc = TEST_GetTime();

  /* Verify the result */
  if((sinRef - sinResult) > 20000)
    PRINTF("ERROR on SINE\r\n");

  /* Convert systick counts to cycles */
  cycles = (uint32_t)(((toc - tic)*CYCLES_PER_SYSTICK)/LOOP_COUNT);

  PRINTF("CM33 SINE takes %d cycles\r\n\r\n", cycles);
}

static void arm_mat_vec_add_Test()
{
  uint32_t i;
  uint32_t tic, toc, cycles;
  PRINTF("----------------------------------\r\n");
  PRINTF("       VECTOR ADD FUNCTION\r\n");

  /* Initialise vector values */
  for(i=0;i<VEC_ADD_LENGTH;i++)
  {
    vec_add_x[i] = i;
    vec_add_y[i] = i;
    vec_add_out_ref[i] = i+i;
  }

  arm_matrix_instance_q31 vecA;
  arm_matrix_instance_q31 vecB;
  arm_matrix_instance_q31 vecR;
  /* Initialise Vector Instance vecA with numRows, numCols and data array(vec_add_x) */
  arm_mat_init_q31(&vecA, 1, VEC_ADD_LENGTH, vec_add_x);
  /* Initialise Vector Instance vecB with numRows, numCols and data array(vec_add_y) */
  arm_mat_init_q31(&vecB, 1, VEC_ADD_LENGTH, vec_add_y);
  /* Initialise Vector Instance vecR with numRows, numCols and data array(vec_add_out) */
  arm_mat_init_q31(&vecR, 1, VEC_ADD_LENGTH, vec_add_out);

  /* Obtain init time */
  tic = TEST_GetTime();

  /* Execute math function */
  for(i = 0; i < LOOP_COUNT; i++)
  {
    arm_mat_add_q31(&vecA, &vecB, &vecR);
  }

  /* Obtain end time */
  toc = TEST_GetTime();

  /* Verify the result */
  for (i = 0; i < VEC_ADD_LENGTH; i++)
  {
      if (vec_add_out[i] != vec_add_out_ref[i])
      {
          PRINTF("ERROR on dot vector\r\n");
      }
  }

  /* Convert systick counts to cycles */
  cycles = (uint32_t)(((toc - tic)*CYCLES_PER_SYSTICK)/LOOP_COUNT);

  PRINTF("CM33 VECTOR ADD takes %d cycles\r\n\r\n", cycles);
}

static void arm_mat_vec_dot_Test()
{
  uint32_t i;
  uint32_t tic, toc, cycles;
  PRINTF("----------------------------------\r\n");
  PRINTF("    VECTOR DOT PRODUCT FUNCTION\r\n");

  float32_t vec_dot_out;
  float32_t vec_dot_out_ref = 138.733643;

  /* Obtain init time */
  tic = TEST_GetTime();

  /* Execute math function */
  for(i = 0; i < LOOP_COUNT; i++)
  {
    arm_dot_prod_f32(vec_dot_a, vec_dot_b, VEC_DOT_LENGTH, &vec_dot_out);
  }

  /* Obtain end time */
  toc = TEST_GetTime();

  /* Verify the result */
  if((vec_dot_out - vec_dot_out_ref) > 0.001)
    PRINTF("ERROR on dot vector\r\n");

  /* Convert systick counts to cycles */
  cycles = (uint32_t)(((toc - tic)*CYCLES_PER_SYSTICK)/LOOP_COUNT);

  PRINTF("CM33 VECTOR DOT PRODUCT takes %d cycles\r\n\r\n", cycles);
}

static void arm_mat_mtx_inv_Test()
{
  uint32_t i;
  uint32_t tic, toc, cycles;
  PRINTF("----------------------------------\r\n");
  PRINTF("     INVERSE MATRIX FUNCTION\r\n");

  float32_t M1[4]            = {0.0, 0.0, 0.0, 0.0};
  float32_t inverseResult[4] = {0.0, 0.0, 0.0, 0.0};
  float32_t inverseRef[4]    = {0.0, 0.0, 0.0, 0.0};

  arm_matrix_instance_f32 inverseMatrix;
  arm_matrix_instance_f32 inverseMatrixR;

  /* inv(I) = I */
  for (i = 0; i < 2; i++)
  {
      M1[i * 2 + i]         = 1.0f;
      inverseRef[i * 2 + i] = 1.0f;
  }

  /* Initialise Matrix Instance inverseMatrix with numRows, numCols and data array(M1) */
  arm_mat_init_f32(&inverseMatrix, 2, 2, M1);
  /* Initialise Matrix Instance inverseMatrixR with numRows, numCols and data array(inverseResult) */
  arm_mat_init_f32(&inverseMatrixR, 2, 2, inverseResult);

  /* Obtain init time */
  tic = TEST_GetTime();

  /* Execute math function */
  for(i = 0; i < LOOP_COUNT; i++)
  {
    arm_mat_inverse_f32(&inverseMatrix,&inverseMatrixR);
  }

  /* Obtain end time */
  toc = TEST_GetTime();

  /* Verify the result */
  for(i = 0; i < 4; i++)
  {
      if(inverseResult[i] != inverseRef[i])
      {
          PRINTF("ERROR on inverse matrix\r\n");
      }
  }

  /* Convert systick counts to cycles */
  cycles = (uint32_t)(((toc - tic)*CYCLES_PER_SYSTICK)/LOOP_COUNT);

  PRINTF("CM33 INVERSE MATRIX takes %d cycles\r\n\r\n", cycles);
}

static void arm_mat_mtx_tnsp_Test()
{
  uint32_t i;
  uint32_t tic, toc, cycles;
  PRINTF("----------------------------------\r\n");
  PRINTF("     TANSPOSE MATRIX FUNCTION\r\n");

  arm_matrix_instance_f32 transMatrix;
  arm_matrix_instance_f32 transMatrixR;

  /* Initialise matrix values */
  for (i = 0; i < 8; i++)
  {
      transpose[i]                 = 1.0f;
      transposeRel[8 * i] = 1.0f;
  }

  /* Initialise Matrix Instance transMatrix with numRows, numCols and data array(transpose) */
  arm_mat_init_f32(&transMatrix, 8, 8, transpose);
  /* Initialise Matrix Instance transMatrixR with numRows, numCols and data array(transposeResult) */
  arm_mat_init_f32(&transMatrixR, 8, 8, transposeResult);

  /* Obtain init time */
  tic = TEST_GetTime();

  /* Execute math function */
  for(i = 0; i < LOOP_COUNT; i++)
  {
    arm_mat_trans_f32(&transMatrix, &transMatrixR);
  }

  /* Obtain end time */
  toc = TEST_GetTime();

  /* Verify the result */
  for(i = 0; i < 64; i++)
  {
      if(transMatrixR.pData[i] != transposeRel[i])
      {
          PRINTF("ERROR on transpose matrix\r\n");
      }
  }

  /* Convert systick counts to cycles */
  cycles = (uint32_t)(((toc - tic)*CYCLES_PER_SYSTICK)/LOOP_COUNT);

  PRINTF("CM33 MATRIX TANSPOSE takes %d cycles\r\n\r\n", cycles);
}

void SysTick_Handler(void)
{
    s_timeMs++;
}

static void TEST_InitTime(void)
{
    s_timeMs = 0;

    /*
      1   cycle  ----- 1/SystemCoreClock     SystemCoreClock = 250'105,263
      125 cycles ----- 500ns
    */

    /* Configure to 500ns. */
    SysTick_Config(SystemCoreClock / 2000000); /* 250105263/2000000 = 125 cycles */
}

static uint32_t TEST_GetTime(void)
{
    return s_timeMs;
}

void arm_mat_init_f32(arm_matrix_instance_f32 *S, uint16_t nRows, uint16_t nColumns, float32_t *pData)
{
    /* Assign Number of Rows */
    S->numRows = nRows;

    /* Assign Number of Columns */
    S->numCols = nColumns;

    /* Assign Data pointer */
    S->pData = pData;
}

void arm_mat_init_q31(arm_matrix_instance_q31 *S, uint16_t nRows, uint16_t nColumns, q31_t *pData)
{
    /* Assign Number of Rows */
    S->numRows = nRows;

    /* Assign Number of Columns */
    S->numCols = nColumns;

    /* Assign Data pointer */
    S->pData = pData;
}

void LED_INIT()
{
    CLOCK_EnableClock(kCLOCK_HsGpio0);
    RESET_PeripheralReset(kHSGPIO0_RST_SHIFT_RSTn);
    LED_RED_INIT(LOGIC_LED_OFF);
    LED_GREEN_INIT(LOGIC_LED_OFF);
    LED_BLUE_INIT(LOGIC_LED_OFF);
}
