/*
* SACD Decoder plugin
* Copyright (c) 2011-2012 Maxim V.Anisiutkin <maxim.anisiutkin@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include <math.h>
#include <memory.h>
#include "dsdpcm_converter_integer.h"

void dsdpcm_fir_i::init(ctable_i* fir_ctables, int fir_length, int channels, int decimation, uint8_t* fir_buffer) {
	this->fir_ctables = fir_ctables;
	this->fir_order   = fir_length - 1;
	this->fir_length  = CTABLES(fir_length);
	this->channels    = channels;
	this->decimation  = decimation / 8;
	this->fir_buffer  = fir_buffer;
	memset(this->fir_buffer, 0xAA, 2 * this->fir_length * this->channels * sizeof(*this->fir_buffer));
	fir_index = 0;
}

float dsdpcm_fir_i::get_delay() {
	return (float)fir_order / 2 / 8 / decimation;
}

int dsdpcm_fir_i::run(uint8_t* dsd_data, int32_t* pcm_data, int dsd_samples) {
	int pcm_samples = dsd_samples / decimation;
	for (int sample = 0; sample < pcm_samples; sample += channels) {
		for (int i = 0; i < decimation; i++) {
			for (int ch = 0; ch < channels; ch++) {
				fir_buffer[fir_index * channels + ch + fir_length * channels] = fir_buffer[fir_index * channels + ch] = *dsd_data;
				dsd_data++;
			}
			fir_index = (++fir_index) % fir_length;
		}
		for (int ch = 0; ch < channels; ch++) {
			pcm_data[sample + ch] = 0;
			for (int j = 0; j < fir_length; j++) {
				pcm_data[sample + ch] += fir_ctables[fir_length - 1 - j][fir_buffer[(fir_index + j) * channels + ch]];
			}
		}
	}
	return pcm_samples;
}

void pcmpcm_fir_i::init(int32_t* fir_coefs, int fir_length, int channels, int decimation, int32_t* fir_buffer, int offset, int scale) {
	this->fir_coefs  = fir_coefs;
	this->fir_order  = fir_length - 1;
	this->fir_length = fir_length;
	this->channels   = channels;
	this->decimation = decimation;
	this->fir_buffer = fir_buffer;
	this->offset     = offset;
	this->scale      = scale;
	memset(this->fir_buffer, 0x00, 2 * this->fir_length * this->channels * sizeof(*this->fir_buffer));
	fir_index = 0;
}

float pcmpcm_fir_i::get_delay() {
	return (float)fir_order / 2 / decimation;
}

int pcmpcm_fir_i::run(int32_t* pcm_data, int32_t* out_data, int pcm_samples) {
	int out_samples = pcm_samples / decimation;
	switch (channels) {
	case 2:
		for (int sample = 0; sample < out_samples; sample += 2) {
			for (int i = 0; i < decimation; i++) {
				fir_buffer[(fir_index + fir_length) * 2 + 0] = fir_buffer[fir_index * 2 + 0] = *(pcm_data++);
				fir_buffer[(fir_index + fir_length) * 2 + 1] = fir_buffer[fir_index * 2 + 1] = *(pcm_data++);
				fir_index = (++fir_index) % fir_length;
			}
			double out_value[DSDPCM_MAX_CHANNELS];
			out_value[0] = 0;
			out_value[1] = 0;
			for (int j = 0; j < fir_length; j++) {
				out_value[0] += (double)fir_coefs[fir_length - 1 - j] * (double)fir_buffer[(fir_index + j) * 2 + 0];
				out_value[1] += (double)fir_coefs[fir_length - 1 - j] * (double)fir_buffer[(fir_index + j) * 2 + 1];
			}
			out_data[sample + 0] = (int)((out_value[0] + (double)offset) / (1 << scale));
			out_data[sample + 1] = (int)((out_value[1] + (double)offset) / (1 << scale));
		}
		break;
	case 5:
		for (int sample = 0; sample < out_samples; sample += 5) {
			for (int i = 0; i < decimation; i++) {
				fir_buffer[(fir_index + fir_length) * 5 + 0] = fir_buffer[fir_index * 5 + 0] = *(pcm_data++);
				fir_buffer[(fir_index + fir_length) * 5 + 1] = fir_buffer[fir_index * 5 + 1] = *(pcm_data++);
				fir_buffer[(fir_index + fir_length) * 5 + 2] = fir_buffer[fir_index * 5 + 2] = *(pcm_data++);
				fir_buffer[(fir_index + fir_length) * 5 + 3] = fir_buffer[fir_index * 5 + 3] = *(pcm_data++);
				fir_buffer[(fir_index + fir_length) * 5 + 4] = fir_buffer[fir_index * 5 + 4] = *(pcm_data++);
				fir_index = (++fir_index) % fir_length;
			}
			double out_value[DSDPCM_MAX_CHANNELS];
			out_value[0] = 0;
			out_value[1] = 0;
			out_value[2] = 0;
			out_value[3] = 0;
			out_value[4] = 0;
			for (int j = 0; j < fir_length; j++) {
				out_value[0] += (double)fir_coefs[fir_length - 1 - j] * (double)fir_buffer[(fir_index + j) * 5 + 0];
				out_value[1] += (double)fir_coefs[fir_length - 1 - j] * (double)fir_buffer[(fir_index + j) * 5 + 1];
				out_value[2] += (double)fir_coefs[fir_length - 1 - j] * (double)fir_buffer[(fir_index + j) * 5 + 2];
				out_value[3] += (double)fir_coefs[fir_length - 1 - j] * (double)fir_buffer[(fir_index + j) * 5 + 3];
				out_value[4] += (double)fir_coefs[fir_length - 1 - j] * (double)fir_buffer[(fir_index + j) * 5 + 4];
			}
			out_data[sample + 0] = (int)((out_value[0] + (double)offset) / (1 << scale));
			out_data[sample + 1] = (int)((out_value[1] + (double)offset) / (1 << scale));
			out_data[sample + 2] = (int)((out_value[2] + (double)offset) / (1 << scale));
			out_data[sample + 3] = (int)((out_value[3] + (double)offset) / (1 << scale));
			out_data[sample + 4] = (int)((out_value[4] + (double)offset) / (1 << scale));
		}
		break;
	case 6:
		for (int sample = 0; sample < out_samples; sample += 6) {
			for (int i = 0; i < decimation; i++) {
				fir_buffer[(fir_index + fir_length) * 6 + 0] = fir_buffer[fir_index * 6 + 0] = *(pcm_data++);
				fir_buffer[(fir_index + fir_length) * 6 + 1] = fir_buffer[fir_index * 6 + 1] = *(pcm_data++);
				fir_buffer[(fir_index + fir_length) * 6 + 2] = fir_buffer[fir_index * 6 + 2] = *(pcm_data++);
				fir_buffer[(fir_index + fir_length) * 6 + 3] = fir_buffer[fir_index * 6 + 3] = *(pcm_data++);
				fir_buffer[(fir_index + fir_length) * 6 + 4] = fir_buffer[fir_index * 6 + 4] = *(pcm_data++);
				fir_buffer[(fir_index + fir_length) * 6 + 5] = fir_buffer[fir_index * 6 + 5] = *(pcm_data++);
				fir_index = (++fir_index) % fir_length;
			}
			double out_value[DSDPCM_MAX_CHANNELS];
			out_value[0] = 0;
			out_value[1] = 0;
			out_value[2] = 0;
			out_value[3] = 0;
			out_value[4] = 0;
			out_value[5] = 0;
			for (int j = 0; j < fir_length; j++) {
				out_value[0] += (double)fir_coefs[fir_length - 1 - j] * (double)fir_buffer[(fir_index + j) * 6 + 0];
				out_value[1] += (double)fir_coefs[fir_length - 1 - j] * (double)fir_buffer[(fir_index + j) * 6 + 1];
				out_value[2] += (double)fir_coefs[fir_length - 1 - j] * (double)fir_buffer[(fir_index + j) * 6 + 2];
				out_value[3] += (double)fir_coefs[fir_length - 1 - j] * (double)fir_buffer[(fir_index + j) * 6 + 3];
				out_value[4] += (double)fir_coefs[fir_length - 1 - j] * (double)fir_buffer[(fir_index + j) * 6 + 4];
				out_value[5] += (double)fir_coefs[fir_length - 1 - j] * (double)fir_buffer[(fir_index + j) * 6 + 5];
			}
			out_data[sample + 0] = (int)((out_value[0] + (double)offset) / (1 << scale));
			out_data[sample + 1] = (int)((out_value[1] + (double)offset) / (1 << scale));
			out_data[sample + 2] = (int)((out_value[2] + (double)offset) / (1 << scale));
			out_data[sample + 3] = (int)((out_value[3] + (double)offset) / (1 << scale));
			out_data[sample + 4] = (int)((out_value[4] + (double)offset) / (1 << scale));
			out_data[sample + 5] = (int)((out_value[5] + (double)offset) / (1 << scale));
		}
		break;
	default:
		for (int sample = 0; sample < out_samples; sample += channels) {
			for (int i = 0; i < decimation; i++) {
				for (int ch = 0; ch < channels; ch++) {
					fir_buffer[fir_index * channels + ch + fir_length * channels] = fir_buffer[fir_index * channels + ch] = *pcm_data;
					pcm_data++;
				}
				fir_index = (++fir_index) % fir_length;
			}
			double out_value[DSDPCM_MAX_CHANNELS];
			for (int ch = 0; ch < channels; ch++) {
				out_value[ch] = 0;
				for (int j = 0; j < fir_length; j++) {
					out_value[ch] += (double)fir_coefs[fir_length - 1 - j] * (double)fir_buffer[(fir_index + j) * channels + ch];
				}
				out_data[sample + ch] = (int)((out_value[ch] + (double)offset) / (1 << scale));
			}
		}
		break;
	}
	return out_samples;
}

int32_t dsdpcm_converter_i::dsd_fir1_8_ctables[CTABLES(DSDFIR1_8_LENGTH)][256];
int32_t dsdpcm_converter_i::dsd_fir1_16_ctables[CTABLES(DSDFIR1_16_LENGTH)][256];

dsdpcm_converter_i::dsdpcm_converter_i(conv_type_t conv_type) : dsdpcm_conv_impl_t(conv_type) {
}

dsdpcm_converter_i::~dsdpcm_converter_i() {
}

int dsdpcm_converter_i::init(int channels, int dsd_samplerate, int pcm_samplerate) {
	static bool preinitialized = false;
	if (!preinitialized) {
		preinit();
		preinitialized = true;
	}
	this->channels = channels;
	this->dsd_samplerate = dsd_samplerate;
	this->pcm_samplerate = pcm_samplerate;
	switch (dsd_samplerate) {
	case DSDxFs64:
		switch (pcm_samplerate) {
		case DSDxFs1:
			conv_mode = DSD64_44100;
			break;
		case DSDxFs2:
			conv_mode = DSD64_88200;
			break;
		case DSDxFs4:
			conv_mode = DSD64_176400;
			break;
		case DSDxFs8:
			conv_mode = DSD64_352800;
			break;
		default:
			return -2;
		}
		break;
	case DSDxFs128:
		switch (pcm_samplerate) {
		case DSDxFs1:
			conv_mode = DSD128_44100;
			break;
		case DSDxFs2:
			conv_mode = DSD128_88200;
			break;
		case DSDxFs4:
			conv_mode = DSD128_176400;
			break;
		case DSDxFs8:
			conv_mode = DSD128_352800;
			break;
		default:
			return -2;
		}
		break;
	default:
		return -1;
		break;
	}
	out_maxval = 0x7fffffff;
	out_minval = -out_maxval - 1;
	float dB_gain_adjust = 12.0f;
	switch (conv_mode) {
	case DSD64_44100:
		dsd_fir1.init(dsd_fir1_16_ctables, DSDFIR1_16_LENGTH, channels, 16, dsd_fir1_buffer);
		pcm_fir2a.init((int32_t*)PCMFIR2_2_COEFS, PCMFIR2_2_LENGTH, channels, 2, pcm_fir2a_buffer);
		pcm_fir3.init((int32_t*)PCMFIR3_2_COEFS, PCMFIR3_2_LENGTH, channels, 2, pcm_fir3_buffer);
		delay = (dsd_fir1.get_delay() / pcm_fir2a.get_decimation() + pcm_fir2a.get_delay()) / pcm_fir3.get_decimation() + pcm_fir3.get_delay();
		break;
	case DSD64_88200:
		dsd_fir1.init(dsd_fir1_8_ctables, DSDFIR1_8_LENGTH, channels, 8, dsd_fir1_buffer);
		pcm_fir2a.init((int32_t*)PCMFIR2_2_COEFS, PCMFIR2_2_LENGTH, channels, 2, pcm_fir2a_buffer);
		pcm_fir3.init((int32_t*)PCMFIR3_2_COEFS, PCMFIR3_2_LENGTH, channels, 2, pcm_fir3_buffer);
		delay = (dsd_fir1.get_delay() / pcm_fir2a.get_decimation() + pcm_fir2a.get_delay()) / pcm_fir3.get_decimation() + pcm_fir3.get_delay();
		break;
	case DSD64_176400:
		dsd_fir1.init(dsd_fir1_8_ctables, DSDFIR1_8_LENGTH, channels, 8, dsd_fir1_buffer);
		pcm_fir3.init((int32_t*)PCMFIR3_2_COEFS, PCMFIR3_2_LENGTH, channels, 2, pcm_fir3_buffer);
		delay = dsd_fir1.get_delay() / pcm_fir3.get_decimation() + pcm_fir3.get_delay();
		break;
	case DSD64_352800:
		dsd_fir1.init(dsd_fir1_8_ctables, DSDFIR1_8_LENGTH, channels, 8, dsd_fir1_buffer);
		delay = dsd_fir1.get_delay();
		break;
	case DSD128_44100:
		dsd_fir1.init(dsd_fir1_16_ctables, DSDFIR1_16_LENGTH, channels, 16, dsd_fir1_buffer);
		pcm_fir2a.init((int32_t*)PCMFIR2_2_COEFS, PCMFIR2_2_LENGTH, channels, 2, pcm_fir2a_buffer);
		pcm_fir2b.init((int32_t*)PCMFIR2_2_COEFS, PCMFIR2_2_LENGTH, channels, 2, pcm_fir2b_buffer);
		pcm_fir3.init((int32_t*)PCMFIR3_2_COEFS, PCMFIR3_2_LENGTH, channels, 2, pcm_fir3_buffer);
		delay = ((dsd_fir1.get_delay() / pcm_fir2a.get_decimation() + pcm_fir2a.get_delay()) / pcm_fir2b.get_decimation() + pcm_fir2b.get_delay()) / pcm_fir3.get_decimation() + pcm_fir3.get_delay();
		break;
	case DSD128_88200:
		dsd_fir1.init(dsd_fir1_16_ctables, DSDFIR1_16_LENGTH, channels, 16, dsd_fir1_buffer);
		pcm_fir2a.init((int32_t*)PCMFIR2_2_COEFS, PCMFIR2_2_LENGTH, channels, 2, pcm_fir2a_buffer);
		pcm_fir3.init((int32_t*)PCMFIR3_2_COEFS, PCMFIR3_2_LENGTH, channels, 2, pcm_fir3_buffer);
		delay = (dsd_fir1.get_delay() / pcm_fir2a.get_decimation() + pcm_fir2a.get_delay()) / pcm_fir3.get_decimation() + pcm_fir3.get_delay();
		break;
	case DSD128_176400:
		dsd_fir1.init(dsd_fir1_16_ctables, DSDFIR1_16_LENGTH, channels, 16, dsd_fir1_buffer);
		pcm_fir3.init((int32_t*)PCMFIR3_2_COEFS, PCMFIR3_2_LENGTH, channels, 2, pcm_fir3_buffer);
		delay = dsd_fir1.get_delay() / pcm_fir3.get_decimation() + pcm_fir3.get_delay();
		break;
	case DSD128_352800:
		dsd_fir1.init(dsd_fir1_16_ctables, DSDFIR1_16_LENGTH, channels, 16, dsd_fir1_buffer);
		delay = dsd_fir1.get_delay();
		break;
	}
	gain0 = DSDPCM_GAIN_0 * pow(10.0f, dB_gain_adjust / 20.0f);
	gain = (int32_t)gain0;
	conv_called = false;
	return 0;
}

float dsdpcm_converter_i::get_delay() {
	return delay - 1;
}

bool dsdpcm_converter_i::is_convert_called() {
	return conv_called;
}

int dsdpcm_converter_i::convert(uint8_t* dsd_data, int32_t* pcm_data, int dsd_samples) {
	int pcm_samples;
	pcm_samples = convert_internal(dsd_data, pcm_temp4, dsd_samples);
	for (int i = 0; i < pcm_samples; i++) {
		pcm_data[i] = pcm_temp4[i];
	}
	return pcm_samples;
}

int dsdpcm_converter_i::convert(uint8_t* dsd_data, float* pcm_data, int dsd_samples) {
	int pcm_samples;
	pcm_samples = convert_internal(dsd_data, pcm_temp4, dsd_samples);
	for (int i = 0; i < pcm_samples; i++) {
		pcm_data[i] = 4.66e-10f * (float)pcm_temp4[i];
	}
	return pcm_samples;
}

void dsdpcm_converter_i::set_gain(float dB_gain) {
	gain = (int32_t)(gain0 * pow(10.0f, dB_gain / 20.0f));
}

int dsdpcm_converter_i::set_ctables(int32_t* fir_coefs, int fir_length, ctable_i* fir_ctables) {
	int ctables = CTABLES(fir_length);
	for (int ct = 0; ct < ctables; ct++) {
		int k = fir_length - ct * 8;
		if (k > 8) {
			k = 8;
		}
		for (int i = 0; i < 256; i++) {
			int cvalue = 0;
			for (int j = 0; j < k; j++) {
				cvalue += (((i >> j) & 1) * 2 - 1) * fir_coefs[ct * 8 + j];
			}
			fir_ctables[ct][i] = cvalue;
		}
	}
	return ctables;
}

void dsdpcm_converter_i::preinit() {
	set_ctables((int32_t*)DSDFIR1_8_COEFS, DSDFIR1_8_LENGTH, (ctable_i*)dsd_fir1_8_ctables);
	set_ctables((int32_t*)DSDFIR1_16_COEFS, DSDFIR1_16_LENGTH, (ctable_i*)dsd_fir1_16_ctables);
}

int dsdpcm_converter_i::convert_internal(uint8_t* dsd_data, int32_t* pcm_data, int dsd_samples) {
	int pcm_samples;
	switch (conv_mode) {
	case DSD64_44100:
		pcm_samples = dsd_fir1.run(dsd_data, pcm_temp1, dsd_samples);
		pcm_samples = pcm_fir2a.run(pcm_temp1, pcm_temp2, pcm_samples);
		pcm_samples = pcm_fir3.run(pcm_temp2, pcm_data, pcm_samples);
		break;
	case DSD64_88200:
		pcm_samples = dsd_fir1.run(dsd_data, pcm_temp1, dsd_samples);
		pcm_samples = pcm_fir2a.run(pcm_temp1, pcm_temp2, pcm_samples);
		pcm_samples = pcm_fir3.run(pcm_temp2, pcm_data, pcm_samples);
		break;
	case DSD64_176400:
		pcm_samples = dsd_fir1.run(dsd_data, pcm_temp1, dsd_samples);
		pcm_samples = pcm_fir3.run(pcm_temp1, pcm_data, pcm_samples);
		break;
	case DSD64_352800:
		pcm_samples = dsd_fir1.run(dsd_data, pcm_data, dsd_samples);
		break;
	case DSD128_44100:
		pcm_samples = dsd_fir1.run(dsd_data, pcm_temp1, dsd_samples);
		pcm_samples = pcm_fir2a.run(pcm_temp1, pcm_temp2, pcm_samples);
		pcm_samples = pcm_fir2b.run(pcm_temp2, pcm_temp3, pcm_samples);
		pcm_samples = pcm_fir3.run(pcm_temp3, pcm_data, pcm_samples);
		break;
	case DSD128_88200:
		pcm_samples = dsd_fir1.run(dsd_data, pcm_temp1, dsd_samples);
		pcm_samples = pcm_fir2a.run(pcm_temp1, pcm_temp2, pcm_samples);
		pcm_samples = pcm_fir3.run(pcm_temp2, pcm_data, pcm_samples);
		break;
	case DSD128_176400:
		pcm_samples = dsd_fir1.run(dsd_data, pcm_temp1, dsd_samples);
		pcm_samples = pcm_fir3.run(pcm_temp1, pcm_data, pcm_samples);
		break;
	case DSD128_352800:
		pcm_samples = dsd_fir1.run(dsd_data, pcm_data, dsd_samples);
		break;
	}
	pcm_samples = fracmul(pcm_data, pcm_data, pcm_samples);
	conv_called = true;
	return pcm_samples;
}

int dsdpcm_converter_i::fracmul(int32_t* pcm_data, int32_t* out_data, int pcm_samples) {
	for (int i = 0; i < pcm_samples; i++) {
		double out_value = ((double)pcm_data[i] * (double)gain) / (1 << 22);
		if (out_value > out_maxval) {
			out_value = (double)out_maxval;
		}
		if (out_value < out_minval) {
			out_value = (double)out_minval;
		}
		out_data[i] = (int32_t)out_value;
	}
	return pcm_samples;
}
