/*
* SACD Decoder plugin
* Copyright (c) 2011 Maxim V.Anisiutkin <maxim.anisiutkin@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/

#include <math.h>
#include <memory.h>
#include "dsdpcm_converter_real.h"

void dsdpcm_fir_r::init(ctable_r* fir_ctables, int fir_length, int channels, int decimation) {
	this->fir_ctables = fir_ctables;
	this->fir_order   = fir_length - 1;
	this->fir_length  = CTABLES(fir_length);
	this->fir_size    = SSE_ASIZE(this->fir_length, sizeof(real_t));
	this->channels    = channels;
	this->decimation  = decimation / 8;
	free();
	int buf_size = 2 * this->fir_size * sizeof(uint8_t);
	for (int ch = 0; ch < this->channels; ch++) {
		this->fir_buffer[ch] = (uint8_t*)_aligned_malloc(buf_size, SSE_ALIGN);
		memset(this->fir_buffer[ch], 0x55, buf_size);
	}
	fir_index = 0;
}

void dsdpcm_fir_r::free() {
	for (int ch = 0; ch < DSDPCM_MAX_CHANNELS; ch++) {
		if (fir_buffer[ch]) {
			_aligned_free(fir_buffer[ch]);
			fir_buffer[ch] = NULL;
		}
	}
}

float dsdpcm_fir_r::get_delay() {
	return (float)fir_order / 2 / 8 / decimation;
}

int dsdpcm_fir_r::run(uint8_t* dsd_data, real_t* pcm_data, int dsd_samples) {
	int pcm_samples = dsd_samples / decimation;
	for (int sample = 0; sample < pcm_samples; sample += channels) {
		for (int i = 0; i < decimation; i++) {
			for (int ch = 0; ch < channels; ch++) {
				fir_buffer[ch][fir_index + fir_length] = fir_buffer[ch][fir_index] = *(dsd_data++);
			}
			fir_index = (++fir_index) % fir_length;
		}
		for (int ch = 0; ch < channels; ch++) {
			pcm_data[sample + ch] = 0;
			for (int j = 0; j < fir_length; j++) {
				pcm_data[sample + ch] += fir_ctables[j][fir_buffer[ch][fir_index + j]];
			}
		}
	}
	return pcm_samples;
}

int dsdpcm_fir_r::run_sse(uint8_t* dsd_data, real_t* pcm_data, int dsd_samples) {
	int pcm_samples = dsd_samples / decimation;
	for (int sample = 0; sample < pcm_samples; sample += channels) {
		for (int i = 0; i < decimation; i++) {
			for (int ch = 0; ch < channels; ch++) {
				fir_buffer[ch][fir_index + fir_length] = fir_buffer[ch][fir_index] = *(dsd_data++);
			}
			fir_index = (++fir_index) % fir_length;
		}
		for (int ch = 0; ch < channels; ch++) {
			__m128 s = _mm_setzero_ps();
			for (int j = 0; j < fir_size; j += SSE_ALIGN / sizeof(real_t)) {
				__m128 x = _mm_set_ps(
					fir_ctables[j + 3][fir_buffer[ch][fir_index + j + 3]],
					fir_ctables[j + 2][fir_buffer[ch][fir_index + j + 2]],
					fir_ctables[j + 1][fir_buffer[ch][fir_index + j + 1]],
					fir_ctables[j + 0][fir_buffer[ch][fir_index + j + 0]]
				);
				s = _mm_add_ps(s, x);
			}
			s = _mm_add_ps(s, _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 0, 3, 2)));
			s = _mm_add_ps(s, _mm_shuffle_ps(s, s, _MM_SHUFFLE(0, 1, 2, 3)));
			_mm_store_ss(&pcm_data[sample + ch], s);
		}
	}
	return pcm_samples;
}

void pcmpcm_fir_r::init(real_t* fir_coefs, int fir_length, int channels, int decimation) {
	this->fir_coefs  = fir_coefs;
	this->fir_order  = fir_length - 1;
	this->fir_length = fir_length;
	this->fir_size   = SSE_ASIZE(this->fir_length, sizeof(real_t));
	this->channels   = channels;
	this->decimation = decimation;
	free();
	int buf_size = 2 * this->fir_size * sizeof(real_t);
	for (int ch = 0; ch < this->channels; ch++) {
		this->fir_buffer[ch] = (real_t*)_aligned_malloc(buf_size, SSE_ALIGN);
		memset(this->fir_buffer[ch], 0, buf_size);
	}
	fir_index = 0;
}

void pcmpcm_fir_r::free() {
	for (int ch = 0; ch < DSDPCM_MAX_CHANNELS; ch++) {
		if (fir_buffer[ch]) {
			_aligned_free(fir_buffer[ch]);
			fir_buffer[ch] = NULL;
		}
	}
}

float pcmpcm_fir_r::get_delay() {
	return (float)fir_order / 2 / decimation;
}

int pcmpcm_fir_r::run(real_t* pcm_data, real_t* out_data, int pcm_samples) {
	int out_samples = pcm_samples / decimation;
	for (int sample = 0; sample < out_samples; sample += channels) {
		for (int i = 0; i < decimation; i++) {
			for (int ch = 0; ch < channels; ch++) {
				fir_buffer[ch][fir_index + fir_length] = fir_buffer[ch][fir_index] = *(pcm_data++);
			}
			fir_index = (++fir_index) % fir_length;
		}
		for (int ch = 0; ch < channels; ch++) {
			out_data[sample + ch] = (real_t)0;
			for (int j = 0; j < fir_length; j++) {
				out_data[sample + ch] += fir_coefs[j] * fir_buffer[ch][fir_index + j];
			}
		}
	}
	return out_samples;
}

int pcmpcm_fir_r::run_sse(real_t* pcm_data, real_t* out_data, int pcm_samples) {
	int out_samples = pcm_samples / decimation;
	for (int sample = 0; sample < out_samples; sample += channels) {
		for (int i = 0; i < decimation; i++) {
			for (int ch = 0; ch < channels; ch++) {
				fir_buffer[ch][fir_index + fir_length] = fir_buffer[ch][fir_index] = *(pcm_data++);
			}
			fir_index = (++fir_index) % fir_length;
		}
		for (int ch = 0; ch < channels; ch++) {
			__m128 s = _mm_setzero_ps();
			for (int j = 0; j < fir_size; j += SSE_ALIGN / sizeof(real_t)) {
				s = _mm_add_ps(s, _mm_mul_ps(_mm_load_ps(&fir_coefs[j]), _mm_loadu_ps(&fir_buffer[ch][fir_index + j])));
			}
			s = _mm_add_ps(s, _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 0, 3, 2)));
			s = _mm_add_ps(s, _mm_shuffle_ps(s, s, _MM_SHUFFLE(0, 1, 2, 3)));
			_mm_store_ss(&out_data[sample + ch], s);
		}
	}
	return out_samples;
}

real_t dsdpcm_converter_r::dsd_fir1_8_ctables[SSE_ASIZE(CTABLES(DSDFIR1_8_LENGTH), sizeof(real_t))][256];
real_t dsdpcm_converter_r::dsd_fir1_16_ctables[SSE_ASIZE(CTABLES(DSDFIR1_16_LENGTH), sizeof(real_t))][256];
real_t dsdpcm_converter_r::dsd_fir1_64_ctables[SSE_ASIZE(CTABLES(DSDFIR1_64_LENGTH), sizeof(real_t))][256];
real_t dsdpcm_converter_r::pcm_fir2_2_coefs[SSE_ASIZE(PCMFIR2_2_LENGTH, sizeof(real_t))];
real_t dsdpcm_converter_r::pcm_fir3_2_coefs[SSE_ASIZE(PCMFIR3_2_LENGTH, sizeof(real_t))];

dsdpcm_converter_r::dsdpcm_converter_r(conv_type_t conv_type) : dsdpcm_conv_impl_t(conv_type) {
}

dsdpcm_converter_r::~dsdpcm_converter_r() {
}

int dsdpcm_converter_r::init(int channels, int dsd_samplerate, int pcm_samplerate) {
	static bool preinitialized = false;
	if (!preinitialized) {
		preinit();
		preinitialized = true;
	}
	int CPUInfo[4];
  __cpuid(CPUInfo, 1);
	sse2_enabled = (CPUInfo[3] & (1L << 26)) ? true : false;
	this->channels = channels;
	this->dsd_samplerate = dsd_samplerate;
	this->pcm_samplerate = pcm_samplerate;
	switch (dsd_samplerate) {
	case DSDxFs64:
		switch (pcm_samplerate) {
		case DSDxFs1:
			conv_mode = DSD64_44100;
			break;
		case DSDxFs2:
			conv_mode = DSD64_88200;
			break;
		case DSDxFs4:
			conv_mode = DSD64_176400;
			break;
		case DSDxFs8:
			conv_mode = DSD64_352800;
			break;
		default:
			return -2;
		}
		break;
	case DSDxFs128:
		switch (pcm_samplerate) {
		case DSDxFs1:
			conv_mode = DSD128_44100;
			break;
		case DSDxFs2:
			conv_mode = DSD128_88200;
			break;
		case DSDxFs4:
			conv_mode = DSD128_176400;
			break;
		case DSDxFs8:
			conv_mode = DSD128_352800;
			break;
		default:
			return -2;
		}
		break;
	default:
		return -1;
		break;
	}
	float dB_gain_adjust;
	switch (conv_type) {
	case DSDPCM_CONV_MULTISTAGE_SINGLE:
		dB_gain_adjust = 18.0f;
		switch (conv_mode) {
		case DSD64_44100:
			dsd_fir1.init(dsd_fir1_16_ctables, DSDFIR1_16_LENGTH, channels, 16);
			pcm_fir2a.init(pcm_fir2_2_coefs, PCMFIR2_2_LENGTH, channels, 2);
			pcm_fir3.init(pcm_fir3_2_coefs, PCMFIR3_2_LENGTH, channels, 2);
			delay = (dsd_fir1.get_delay() / pcm_fir2a.get_decimation() + pcm_fir2a.get_delay()) / pcm_fir3.get_decimation() + pcm_fir3.get_delay();
			break;
		case DSD64_88200:
			dsd_fir1.init(dsd_fir1_8_ctables, DSDFIR1_8_LENGTH, channels, 8);
			pcm_fir2a.init(pcm_fir2_2_coefs, PCMFIR2_2_LENGTH, channels, 2);
			pcm_fir3.init(pcm_fir3_2_coefs, PCMFIR3_2_LENGTH, channels, 2);
			delay = (dsd_fir1.get_delay() / pcm_fir2a.get_decimation() + pcm_fir2a.get_delay()) / pcm_fir3.get_decimation() + pcm_fir3.get_delay();
			break;
		case DSD64_176400:
			dsd_fir1.init(dsd_fir1_8_ctables, DSDFIR1_8_LENGTH, channels, 8);
			pcm_fir3.init(pcm_fir3_2_coefs, PCMFIR3_2_LENGTH, channels, 2);
			delay = dsd_fir1.get_delay() / pcm_fir3.get_decimation() + pcm_fir3.get_delay();
			break;
		case DSD64_352800:
			dsd_fir1.init(dsd_fir1_8_ctables, DSDFIR1_8_LENGTH, channels, 8);
			delay = dsd_fir1.get_delay();
			break;
		case DSD128_44100:
			dsd_fir1.init(dsd_fir1_16_ctables, DSDFIR1_16_LENGTH, channels, 16);
			pcm_fir2a.init(pcm_fir2_2_coefs, PCMFIR2_2_LENGTH, channels, 2);
			pcm_fir2b.init(pcm_fir2_2_coefs, PCMFIR2_2_LENGTH, channels, 2);
			pcm_fir3.init(pcm_fir3_2_coefs, PCMFIR3_2_LENGTH, channels, 2);
			delay = ((dsd_fir1.get_delay() / pcm_fir2a.get_decimation() + pcm_fir2a.get_delay()) / pcm_fir2b.get_decimation() + pcm_fir2b.get_delay()) / pcm_fir3.get_decimation() + pcm_fir3.get_delay();
			break;
		case DSD128_88200:
			dsd_fir1.init(dsd_fir1_16_ctables, DSDFIR1_16_LENGTH, channels, 16);
			pcm_fir2a.init(pcm_fir2_2_coefs, PCMFIR2_2_LENGTH, channels, 2);
			pcm_fir3.init(pcm_fir3_2_coefs, PCMFIR3_2_LENGTH, channels, 2);
			delay = (dsd_fir1.get_delay() / pcm_fir2a.get_decimation() + pcm_fir2a.get_delay()) / pcm_fir3.get_decimation() + pcm_fir3.get_delay();
			break;
		case DSD128_176400:
			dsd_fir1.init(dsd_fir1_16_ctables, DSDFIR1_16_LENGTH, channels, 16);
			pcm_fir3.init(pcm_fir3_2_coefs, PCMFIR3_2_LENGTH, channels, 2);
			delay = dsd_fir1.get_delay() / pcm_fir3.get_decimation() + pcm_fir3.get_delay();
			break;
		case DSD128_352800:
			dsd_fir1.init(dsd_fir1_16_ctables, DSDFIR1_16_LENGTH, channels, 16);
			delay = dsd_fir1.get_delay();
			break;
		}
		break;
	case DSDPCM_CONV_DIRECT_SINGLE:
		dB_gain_adjust = 0.0f;
		switch (conv_mode) {
		case DSD64_44100:
			dsd_fir1.init(dsd_fir1_64_ctables, DSDFIR1_64_LENGTH, channels, 32);
			pcm_fir3.init(pcm_fir3_2_coefs, PCMFIR3_2_LENGTH, channels, 2);
			delay = dsd_fir1.get_delay() / pcm_fir3.get_decimation() + pcm_fir3.get_delay();
			break;
		case DSD64_88200:
			dsd_fir1.init(dsd_fir1_64_ctables, DSDFIR1_64_LENGTH, channels, 32);
			delay = dsd_fir1.get_delay();
			break;
		case DSD64_176400:
			dsd_fir1.init(dsd_fir1_64_ctables, DSDFIR1_64_LENGTH, channels, 16);
			delay = dsd_fir1.get_delay();
			break;
		case DSD64_352800:
			dsd_fir1.init(dsd_fir1_64_ctables, DSDFIR1_64_LENGTH, channels, 8);
			delay = dsd_fir1.get_delay();
			break;
		case DSD128_44100:
			dsd_fir1.init(dsd_fir1_64_ctables, DSDFIR1_64_LENGTH, channels, 64);
			pcm_fir3.init(pcm_fir3_2_coefs, PCMFIR3_2_LENGTH, channels, 2);
			delay = dsd_fir1.get_delay() / pcm_fir3.get_decimation() + pcm_fir3.get_delay();
			break;
		case DSD128_88200:
			dsd_fir1.init(dsd_fir1_64_ctables, DSDFIR1_64_LENGTH, channels, 64);
			delay = dsd_fir1.get_delay();
			break;
		case DSD128_176400:
			dsd_fir1.init(dsd_fir1_64_ctables, DSDFIR1_64_LENGTH, channels, 32);
			delay = dsd_fir1.get_delay();
			break;
		case DSD128_352800:
			dsd_fir1.init(dsd_fir1_64_ctables, DSDFIR1_64_LENGTH, channels, 16);
			delay = dsd_fir1.get_delay();
			break;
		}
	}
	gain0 = pow(10.0f, dB_gain_adjust / 20.0f);
	gain = (real_t)gain0;
	conv_called = false;
	return 0;
}

float dsdpcm_converter_r::get_delay() {
	return delay - 1;
}

bool dsdpcm_converter_r::is_convert_called() {
	return conv_called;
}

int dsdpcm_converter_r::convert(uint8_t* dsd_data, int32_t* pcm_data, int dsd_samples) {
	int pcm_samples;
	if (sse2_enabled) {
		pcm_samples = convert_internal_sse(dsd_data, pcm_tempo, dsd_samples);
	}
	else {
		pcm_samples = convert_internal(dsd_data, pcm_tempo, dsd_samples);
	}
	for (int i = 0; i < pcm_samples; i++) {
		pcm_data[i] = (int32_t)pcm_tempo[i];
	}
	return pcm_samples;
}

int dsdpcm_converter_r::convert(uint8_t* dsd_data, float* pcm_data, int dsd_samples) {
	int pcm_samples;
	if (sse2_enabled) {
		pcm_samples = convert_internal_sse(dsd_data, pcm_tempo, dsd_samples);
	}
	else {
		pcm_samples = convert_internal(dsd_data, pcm_tempo, dsd_samples);
	}
	for (int i = 0; i < pcm_samples; i++) {
		pcm_data[i] = (float)(gain * pcm_tempo[i]);
	}
	return pcm_samples;
}

void dsdpcm_converter_r::set_gain(float dB_gain) {
	gain = (real_t)(gain0 * pow(10.0f, dB_gain / 20.0f));
}

int dsdpcm_converter_r::set_ctables(int32_t* fir_coefs, int fir_length, ctable_r* fir_ctables) {
	int ctables = SSE_ASIZE(CTABLES(fir_length), sizeof(real_t));
	for (int ct = 0; ct < ctables; ct++) {
		int k = fir_length - ct * 8;
		if (k > 8) {
			k = 8;
		}
		if (k < 0) {
			k = 0;
		}
		for (int i = 0; i < 256; i++) {
			real_t cvalue = (real_t)0;
			for (int j = 0; j < k; j++) {
				cvalue += (real_t)(((i >> (7 - j)) & 1) * 2 - 1) * (real_t)fir_coefs[fir_length - 1 - (ct * 8 + j)] * NORM;
			}
			fir_ctables[ct][i] = cvalue;
		}
	}
	return ctables;
}

void dsdpcm_converter_r::set_coefs(const int32_t* int_coefs, int fir_length, real_t* real_coefs) {
	for (int i = 0; i < fir_length; i++) {
		real_coefs[i] = (real_t)int_coefs[fir_length - 1 - i] * NORM;
	}
	for (int i = fir_length; i < (int)SSE_ASIZE(fir_length, sizeof(real_t)); i++) {
		real_coefs[i] = (real_t)0;
	}
}

void dsdpcm_converter_r::preinit() {
	set_ctables((int32_t*)DSDFIR1_8_COEFS, DSDFIR1_8_LENGTH, (ctable_r*)dsd_fir1_8_ctables);
	set_ctables((int32_t*)DSDFIR1_16_COEFS, DSDFIR1_16_LENGTH, (ctable_r*)dsd_fir1_16_ctables);
	set_ctables((int32_t*)DSDFIR1_64_COEFS, DSDFIR1_64_LENGTH, (ctable_r*)dsd_fir1_64_ctables);
	set_coefs(PCMFIR2_2_COEFS, PCMFIR2_2_LENGTH, pcm_fir2_2_coefs);
	set_coefs(PCMFIR3_2_COEFS, PCMFIR3_2_LENGTH, pcm_fir3_2_coefs);
}

int dsdpcm_converter_r::convert_internal(uint8_t* dsd_data, real_t* pcm_data, int dsd_samples) {
	int pcm_samples;
	switch (conv_type) {
	case DSDPCM_CONV_MULTISTAGE_SINGLE:
		switch (conv_mode) {
		case DSD64_44100:
			pcm_samples = dsd_fir1.run(dsd_data, pcm_temp1, dsd_samples);
			pcm_samples = pcm_fir2a.run(pcm_temp1, pcm_temp2, pcm_samples);
			pcm_samples = pcm_fir3.run(pcm_temp2, pcm_data, pcm_samples);
			break;
		case DSD64_88200:
			pcm_samples = dsd_fir1.run(dsd_data, pcm_temp1, dsd_samples);
			pcm_samples = pcm_fir2a.run(pcm_temp1, pcm_temp2, pcm_samples);
			pcm_samples = pcm_fir3.run(pcm_temp2, pcm_data, pcm_samples);
			break;
		case DSD64_176400:
			pcm_samples = dsd_fir1.run(dsd_data, pcm_temp1, dsd_samples);
			pcm_samples = pcm_fir3.run(pcm_temp1, pcm_data, pcm_samples);
			break;
		case DSD64_352800:
			pcm_samples = dsd_fir1.run(dsd_data, pcm_data, dsd_samples);
			break;
		case DSD128_44100:
			pcm_samples = dsd_fir1.run(dsd_data, pcm_temp1, dsd_samples);
			pcm_samples = pcm_fir2a.run(pcm_temp1, pcm_temp2, pcm_samples);
			pcm_samples = pcm_fir2b.run(pcm_temp2, pcm_temp3, pcm_samples);
			pcm_samples = pcm_fir3.run(pcm_temp3, pcm_data, pcm_samples);
			break;
		case DSD128_88200:
			pcm_samples = dsd_fir1.run(dsd_data, pcm_temp1, dsd_samples);
			pcm_samples = pcm_fir2a.run(pcm_temp1, pcm_temp2, pcm_samples);
			pcm_samples = pcm_fir3.run(pcm_temp2, pcm_data, pcm_samples);
			break;
		case DSD128_176400:
			pcm_samples = dsd_fir1.run(dsd_data, pcm_temp1, dsd_samples);
			pcm_samples = pcm_fir3.run(pcm_temp1, pcm_data, pcm_samples);
			break;
		case DSD128_352800:
			pcm_samples = dsd_fir1.run(dsd_data, pcm_data, dsd_samples);
			break;
		}
		break;
	case DSDPCM_CONV_DIRECT_SINGLE:
		switch (conv_mode) {
		case DSD64_44100:
			pcm_samples = dsd_fir1.run(dsd_data, pcm_temp1, dsd_samples);
			pcm_samples = pcm_fir3.run(pcm_temp1, pcm_data, pcm_samples);
			break;
		case DSD64_88200:
			pcm_samples = dsd_fir1.run(dsd_data, pcm_data, dsd_samples);
			break;
		case DSD64_176400:
			pcm_samples = dsd_fir1.run(dsd_data, pcm_data, dsd_samples);
			break;
		case DSD64_352800:
			pcm_samples = dsd_fir1.run(dsd_data, pcm_data, dsd_samples);
			break;
		case DSD128_44100:
			pcm_samples = dsd_fir1.run(dsd_data, pcm_temp1, dsd_samples);
			pcm_samples = pcm_fir3.run(pcm_temp1, pcm_data, pcm_samples);
			break;
		case DSD128_88200:
			pcm_samples = dsd_fir1.run(dsd_data, pcm_data, dsd_samples);
			break;
		case DSD128_176400:
			pcm_samples = dsd_fir1.run(dsd_data, pcm_data, dsd_samples);
			break;
		case DSD128_352800:
			pcm_samples = dsd_fir1.run(dsd_data, pcm_data, dsd_samples);
			break;
		}
		break;
	}
	conv_called = true;
	return pcm_samples;
}

int dsdpcm_converter_r::convert_internal_sse(uint8_t* dsd_data, real_t* pcm_data, int dsd_samples) {
	int pcm_samples;
	switch (conv_type) {
	case DSDPCM_CONV_MULTISTAGE_SINGLE:
		switch (conv_mode) {
		case DSD64_44100:
			pcm_samples = dsd_fir1.run_sse(dsd_data, pcm_temp1, dsd_samples);
			pcm_samples = pcm_fir2a.run_sse(pcm_temp1, pcm_temp2, pcm_samples);
			pcm_samples = pcm_fir3.run_sse(pcm_temp2, pcm_data, pcm_samples);
			break;
		case DSD64_88200:
			pcm_samples = dsd_fir1.run_sse(dsd_data, pcm_temp1, dsd_samples);
			pcm_samples = pcm_fir2a.run_sse(pcm_temp1, pcm_temp2, pcm_samples);
			pcm_samples = pcm_fir3.run_sse(pcm_temp2, pcm_data, pcm_samples);
			break;
		case DSD64_176400:
			pcm_samples = dsd_fir1.run_sse(dsd_data, pcm_temp1, dsd_samples);
			pcm_samples = pcm_fir3.run_sse(pcm_temp1, pcm_data, pcm_samples);
			break;
		case DSD64_352800:
			pcm_samples = dsd_fir1.run_sse(dsd_data, pcm_data, dsd_samples);
			break;
		case DSD128_44100:
			pcm_samples = dsd_fir1.run_sse(dsd_data, pcm_temp1, dsd_samples);
			pcm_samples = pcm_fir2a.run_sse(pcm_temp1, pcm_temp2, pcm_samples);
			pcm_samples = pcm_fir2b.run_sse(pcm_temp2, pcm_temp3, pcm_samples);
			pcm_samples = pcm_fir3.run_sse(pcm_temp3, pcm_data, pcm_samples);
			break;
		case DSD128_88200:
			pcm_samples = dsd_fir1.run_sse(dsd_data, pcm_temp1, dsd_samples);
			pcm_samples = pcm_fir2a.run_sse(pcm_temp1, pcm_temp2, pcm_samples);
			pcm_samples = pcm_fir3.run_sse(pcm_temp2, pcm_data, pcm_samples);
			break;
		case DSD128_176400:
			pcm_samples = dsd_fir1.run_sse(dsd_data, pcm_temp1, dsd_samples);
			pcm_samples = pcm_fir3.run_sse(pcm_temp1, pcm_data, pcm_samples);
			break;
		case DSD128_352800:
			pcm_samples = dsd_fir1.run_sse(dsd_data, pcm_data, dsd_samples);
			break;
		}
		break;
	case DSDPCM_CONV_DIRECT_SINGLE:
		switch (conv_mode) {
		case DSD64_44100:
			pcm_samples = dsd_fir1.run(dsd_data, pcm_temp1, dsd_samples);
			pcm_samples = pcm_fir3.run(pcm_temp1, pcm_data, pcm_samples);
			break;
		case DSD64_88200:
			pcm_samples = dsd_fir1.run_sse(dsd_data, pcm_data, dsd_samples);
			break;
		case DSD64_176400:
			pcm_samples = dsd_fir1.run_sse(dsd_data, pcm_data, dsd_samples);
			break;
		case DSD64_352800:
			pcm_samples = dsd_fir1.run_sse(dsd_data, pcm_data, dsd_samples);
			break;
		case DSD128_44100:
			pcm_samples = dsd_fir1.run_sse(dsd_data, pcm_temp1, dsd_samples);
			pcm_samples = pcm_fir3.run_sse(pcm_temp1, pcm_data, pcm_samples);
			break;
		case DSD128_88200:
			pcm_samples = dsd_fir1.run_sse(dsd_data, pcm_data, dsd_samples);
			break;
		case DSD128_176400:
			pcm_samples = dsd_fir1.run_sse(dsd_data, pcm_data, dsd_samples);
			break;
		case DSD128_352800:
			pcm_samples = dsd_fir1.run_sse(dsd_data, pcm_data, dsd_samples);
			break;
		}
		break;
	}
	conv_called = true;
	return pcm_samples;
}
