/*
 * Copyright 2018-2021 NXP
 * All rights reserved.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>


#include "fsl_debug_console.h"
#include "srtm_naturedsp_test.h"

/*******************************************************************************
 * Commands processing for NatureDSP libraries
 ******************************************************************************/
#define FLOAT_2_Q31(x) ((int32_t)((x)*2147483648.0f))

static int tic, toc;
static unsigned long inline get_ccount(void)
{
    unsigned long r;
    __asm__ volatile("rsr.ccount %0" : "=r"(r));
    return r;
}


void TEST_SQRT()
{
	uint32_t i;
	const int32_t input      = FLOAT_2_Q31(0.25f);
	int32_t sqrtResult = 0;
	const int32_t sqrtRef    = FLOAT_2_Q31(0.5f);

	/* Obtain init cycle count */
	tic = get_ccount();

	/* Execute math function */
	for(i = 0; i < LOOP_COUNT; i++)
	{
		sqrtResult = scl_sqrt32x32(input);
	}

	/* Obtain end cycle count */
	toc = get_ccount();

	/* Verify the result */
	if((sqrtRef - sqrtResult) > 1000)
		PRINTF("ERROR on SQRT\r\n");

	PRINTF("Fusion F1 SQRT takes %d cycles\r\n\r\n", (toc - tic)/LOOP_COUNT);
}

void TEST_SINE()
{
	uint32_t i;

	const int32_t input = (0.5f / 6.0f);
	int32_t sinResult  = 0;
	const int32_t sinRef     = 0.5f;

	/* Obtain init cycle count */
	tic = get_ccount();

	/* Execute math function */
	for(i = 0; i < LOOP_COUNT; i++)
	{
		sinResult = scl_sine32x32(input);
	}

	/* Obtain end cycle count */
	toc = get_ccount();

	/* Verify the result */
	if((sinRef - sinResult) > .001)
	    PRINTF("ERROR on SINE\r\n");

	PRINTF("Fusion F1 SINE takes %d cycles\r\n\r\n", (toc - tic)/LOOP_COUNT);
}

#define INV_MTX_SIZE 2
void TEST_MATRIX_INV()
{
	uint32_t i;
	float32_t matrix[INV_MTX_SIZE*INV_MTX_SIZE] = {0.0, 0.0, 0.0, 0.0};
	float32_t inverseRef[INV_MTX_SIZE*INV_MTX_SIZE]    = {0.0, 0.0, 0.0, 0.0};

	/* inv(I) = I */
	for (i = 0; i < 2; i++)
	{
		matrix[i * INV_MTX_SIZE + i]     = 1.0f;
		inverseRef[i * INV_MTX_SIZE + i] = 1.0f;
	}

	/* Obtain init cycle count */
	tic = get_ccount();

	/* Execute math function */
	for(i = 0; i < LOOP_COUNT; i++)
	{
		mtx_inv2x2f(matrix);
	}

	/* Obtain end cycle count */
	toc = get_ccount();

	/* Verify the result */
	for(i = 0; i < (INV_MTX_SIZE*INV_MTX_SIZE); i++)
	{
		if(matrix[i] != inverseRef[i])
		{
			PRINTF("ERROR on inverse matrix\r\n");
		}
	}

	PRINTF("Fusion F1 INVERSE MATRIX takes %d cycles\r\n\r\n", (toc - tic)/LOOP_COUNT);
}

#define TRAN_MTX_SIZE 4
void TEST_MATRIX_TRANSPOSE()
{
	uint32_t i;
	float32_t transpose[TRAN_MTX_SIZE*TRAN_MTX_SIZE]       = {0.0};
	float32_t transposeR[TRAN_MTX_SIZE*TRAN_MTX_SIZE]      = {0.0};
    float32_t transposeRel[TRAN_MTX_SIZE*TRAN_MTX_SIZE]     = {0.0};

    for (i = 0; i < TRAN_MTX_SIZE; i++)
    {
        transpose[i]        = 1.0f;
        transposeRel[TRAN_MTX_SIZE * i] = 1.0f;
    }

    /* Obtain init cycle count */
	tic = get_ccount();

	/* Execute math function */
	for(i = 0; i < LOOP_COUNT; i++)
	{
		mtx_tran4x4f(transposeR, transpose, 1);
	}

	/* Obtain end cycle count */
	toc = get_ccount();

	/* Verify the result */
	for(i = 0; i < TRAN_MTX_SIZE*TRAN_MTX_SIZE; i++)
	{
		if(transposeR[i] != transposeRel[i])
		{
			PRINTF("ERROR on transpose matrix\r\n");
		}
	}

	PRINTF("Fusion F1 MATRIX TRANSPOSE takes %d cycles\r\n\r\n", (toc - tic)/LOOP_COUNT);
}

#define VEC_DOT_LENGTH 16
const static float32_t vec_dot_out_ref = 138.733643;
const static float32_t vec_dot_x[VEC_DOT_LENGTH] = {1.01, 2.02,  3.03,  4.04,  5.05,  6.06,  7.07,  8.08,
                                                    9.09, 10.10, 11.11, 12.12, 13.13, 14.14, 15.15, 16.16};
const static float32_t vec_dot_y[VEC_DOT_LENGTH] = {-1.01, 2.02,  -3.03,  4.04,  -5.05,  6.06,  -7.07,  8.08,
                                                    -9.09, 10.10, -11.11, 12.12, -13.13, 14.14, -15.15, 16.16};
static float32_t vec_dot_out;
void TEST_VEC_DOT()
{
	int i;

    /* Obtain init cycle count */
    tic = get_ccount();

    /* Execute math function */
    for(i = 0; i < LOOP_COUNT; i++)
    {
    	vec_dot_out = vec_dotf(vec_dot_x, vec_dot_y, VEC_DOT_LENGTH);
    }

    /* Obtain end cycle count */
    toc = get_ccount();

    /* Verify the result */
    if (abs(vec_dot_out - vec_dot_out_ref) > 0.001)
    {
        PRINTF("MISMATCH @ \r\n", (uint32_t)vec_dot_out_ref, (uint32_t)vec_dot_out);
    }

    PRINTF("Fusion F1 VECTOR DOT PRODUCT takes %d cycles\r\n\r\n", (toc - tic)/LOOP_COUNT);
}

#define VEC_ADD_LENGTH 200


void TEST_VEC_ADD()
{
	int32_t vec_add_x[VEC_ADD_LENGTH]           = {};
	int32_t vec_add_y[VEC_ADD_LENGTH]           = {};
	int32_t vec_add_out[VEC_ADD_LENGTH]           = {};
	int32_t vec_add_out_ref[VEC_ADD_LENGTH];
    int i;

    /* Initialise vector values */
    for(i=0;i<VEC_ADD_LENGTH;i++)
    {
      vec_add_x[i] = i;
      vec_add_y[i] = i;
      vec_add_out_ref[i] = i+i;
    }
    /* Obtain init cycle count */
    tic = get_ccount();

    /* Execute math function */
    for(i = 0; i < LOOP_COUNT; i++)
    {
    	vec_add32x32_fast(vec_add_out, vec_add_x, vec_add_y, VEC_ADD_LENGTH);
    }

    /* Obtain end cycle count */
    toc = get_ccount();

    /* Verify the result */
    for (i = 0; i < VEC_ADD_LENGTH; i++)
    {
        if (vec_add_out[i] != vec_add_out_ref[i])
        {
            PRINTF("MISMATCH @ %d sample expected %d result %d\r\n", i, vec_add_out_ref[i], vec_add_out[i]);
        }
    }

    PRINTF("Fusion F1 VECTOR ADD takes %d cycles\r\n\r\n", (toc - tic)/LOOP_COUNT);

}
