#include "noisecancellation.h"

#define MaxNumberOfsources 10

cudaError_t cudaStatus;

noisecancellation::noisecancellation(int windowsize, int blocksize)
{
	hp_kernel = new healpixkernel_nc(HEALPixOrder);

//Init variables
	magkernels = hp_kernel->calculatelegendrekernels(MaxSphericalHarmonicsOrder);
	NofSamples = hp_kernel->gethealpixnumberofpixels();
	AudioBlockSize =  blocksize;
	NofFrequency =  windowsize /2;
	nofsources = 0;

	cudaStatus = cudaMalloc((void**)&dev_pinv_multiplier, NofSamples * MaxNumberOfsources * sizeof(float));
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "cudaMalloc failed!");
	}

	int len_covariance = AudioBlockSize * NofFrequency * NofSamples * NofSamples;

	// Covariance matrix is 2 x 1024 x 192 x 192 
	cudaStatus = cudaMalloc((void**)&dev_covariance, len_covariance * sizeof(float));
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "cudaMalloc failed!");
	}


	// Covariance matrix is 2 x 1024 x 192 x 192 
	cudaStatus = cudaMalloc((void**)&dev_multip, len_covariance * sizeof(float));
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "cudaMalloc failed!");
	}

	// Covariance matrix is 2 x 1024 x 192 x 192 
	cudaStatus = cudaMalloc((void**)&dev_multip_im, len_covariance * sizeof(float));
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "cudaMalloc failed!");
	}

	cudaMemset((void*)dev_multip, 0, len_covariance * sizeof(float));
	cudaMemset((void*)dev_multip_im, 0, len_covariance * sizeof(float));

	int len_tfsum = AudioBlockSize * NofFrequency;

	// Covariance matrix is 2 x 1024 x 192 x 192 
	cudaStatus = cudaMalloc((void**)&dev_sum_tfmultiplier, len_tfsum * sizeof(float));
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "cudaMalloc failed!");
	}

	cudaMemset((void*)dev_sum_tfmultiplier, 0, len_tfsum * sizeof(float));
	
}

void noisecancellation::getwienermatrices(json audioscenegeometry)
{

// parsing and serializing JSON
	json j_complete = audioscenegeometry;

	nofsources = (int)j_complete["SpeechCount"];

	vector<float> Az, El;

	for (int klm = 0; klm < nofsources; klm++)
	{
		Az.push_back(static_cast<float>(j_complete["SpeechList"][klm]["AzimuthDirection"]));
		El.push_back(static_cast<float>(j_complete["SpeechList"][klm]["ElevationDirection"]));
	}

/* This matrix is used for pinv matrix calculation and memcpy*/
/***********************************************************/
	cudaStatus = cudaMemcpy((void*)dev_pinv_multiplier, (float*)hp_kernel->calc_pinv_matrices(nofsources, Az, El), NofSamples * nofsources * sizeof(float), cudaMemcpyHostToDevice);
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "cudaMemcpy failed 21!");
	}

/*FOR TESTING PURPOSES CREATE SRF FUNCTION AND COPY THE MAGKERNELS*/
	/*******************************************************************/
	/*******************************************************************/
/*	cudaStatus = cudaMalloc((void**)&SRF_multiplier, AudioBlockSize * NofFrequency * NofSamples * sizeof(float));
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "cudaMalloc failed!");
	}

	cudaStatus = cudaMalloc((void**)&SRF_multiplier_im, AudioBlockSize * NofFrequency * NofSamples * sizeof(float));
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "cudaMalloc failed!");
	}

	cudaMemset((void*)SRF_multiplier_im, 0, AudioBlockSize * NofFrequency * NofSamples * sizeof(float));

	for (int klm = 0; klm < AudioBlockSize * NofFrequency; klm++)
	{

		cudaStatus = cudaMemcpy((void*)(SRF_multiplier+ klm * NofSamples), (float*)(summ_magnitude), NofSamples * sizeof(float), cudaMemcpyHostToDevice);
		if (cudaStatus != cudaSuccess) {
			fprintf(stderr, "cudaMemcpy failed 21!");
		}
	}*/
	/*******************************************************************/
	/*******************************************************************/

		
}
complex_num_out noisecancellation::Operate_NoiseCancellation(void* srf_multiplier,void* srf_multiplier_im,void* separated_sources, void* separated_sources_im)
{

	cudaError_t cudaStatus;

	int len_srf = AudioBlockSize * NofFrequency * NofSamples;

	int len_covariance = AudioBlockSize * NofFrequency * NofSamples * NofSamples;

	cudaMemset((void*)dev_multip, 0, len_covariance * sizeof(float));
	cudaMemset((void*)dev_multip_im, 0, len_covariance * sizeof(float));
	
	int len_tfsum = AudioBlockSize * NofFrequency;

	cudaMemset((void*)dev_sum_tfmultiplier, 0, len_tfsum * sizeof(float));

	/* This matrix is used for pinv matrix calculation and memcpy*/
/***********************************************************/
	cudaStatus = cudaMemcpy((void*)dev_multip, (float*)srf_multiplier, len_srf * sizeof(float), cudaMemcpyHostToDevice);
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "Failed SRF multiplier real memcpy!");
	}

	cudaStatus = cudaMemcpy((void*)dev_multip_im, (float*)srf_multiplier_im, len_srf * sizeof(float), cudaMemcpyHostToDevice);
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "Failed SRF multiplier imag memcpy");
	}
// Calculate covariance matrix
	cudaStatus = Device_CovarianceSRF((float*)dev_covariance, (float*)dev_multip, (float*)dev_multip_im, NofFrequency, AudioBlockSize, NofFrequency, 0, NofSamples);
	
	// Reset the variable to use again	
	cudaMemset((void*)dev_multip, 0, AudioBlockSize * NofFrequency * nofsources * NofSamples * sizeof(float));
	
// Left multiplier
	cudaStatus = Device_LeftMultiplier((float*)dev_multip, (float*)dev_covariance, (float*)dev_pinv_multiplier, NofFrequency, AudioBlockSize, NofFrequency, 0, NofSamples, nofsources);

// Reset the variable to use again	
	cudaMemset((void*)dev_covariance, 0, AudioBlockSize * NofFrequency * nofsources * nofsources * sizeof(float));

// Right multiplier
	cudaStatus = Device_RightMultiplier((float*)dev_covariance, (float*)dev_multip, (float*)dev_pinv_multiplier, NofFrequency, AudioBlockSize, NofFrequency, 0, NofSamples, nofsources);
	
	int len_sepsources = AudioBlockSize * NofFrequency * nofsources;

	cudaStatus = cudaMemcpy((void*)dev_multip, (float*)separated_sources, len_sepsources * sizeof(float), cudaMemcpyHostToDevice);
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "Failed SRF multiplier real memcpy!");
	}

	cudaStatus = cudaMemcpy((void*)dev_multip_im, (float*)separated_sources_im, len_sepsources * sizeof(float), cudaMemcpyHostToDevice);
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "Failed SRF multiplier imag memcpy");
	}

	cudaStatus = Device_TFBinSummation((float*)dev_sum_tfmultiplier, (float*)dev_covariance, NofFrequency, AudioBlockSize, NofFrequency, 0, NofSamples, nofsources);

	cudaStatus = Device_TFBinMasking((float*)dev_multip, (float*)dev_multip_im, (float*)dev_covariance, (float*)dev_sum_tfmultiplier, NofFrequency, AudioBlockSize, NofFrequency, 0, NofSamples, nofsources);

	cudaStatus = cudaMemcpy((void*)separated_sources, (float*)dev_multip, len_sepsources *  sizeof(float), cudaMemcpyDeviceToHost);
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "Failed SRF multiplier real memcpy!");
	}

	cudaStatus = cudaMemcpy((void*)separated_sources_im, (float*)dev_multip_im, len_sepsources * sizeof(float), cudaMemcpyDeviceToHost);
	if (cudaStatus != cudaSuccess) {
		fprintf(stderr, "Failed SRF multiplier imag memcpy");
	}


	complex_num_out res_wpf = complex_num_out();

	res_wpf.real = (float*)separated_sources;
	res_wpf.imag = (float*)separated_sources_im;

	return complex_num_out();
}


