#include "gba.h"
#include "fixed.h"
#include "sound.h"
#include <malloc.h>
#include <string.h>
#include <stdio.h>

#define	INT_TM0		(0x0008)
#define	INT_TM1		(0x0010)

void play_sfx(const s8* data, int length);

/* mixing buffer for sound system */
s8* soundbuffers[4];
u32 sbuffer = 0;

u16 fifo_stalled = 1;

#define SOUNDBUFFER_LENGTH 300

typedef struct
{
	const s8* data;
	u32 length, cur;
	u16 volume; // volume value from 0-16
	s16 panning; // panning value from -8 to 8
	u16 volcache[2]; // volume cache mixing code
	u32 freq;				// playback frequency
	u16 allocated;	// channel is allocated
	u32 loop_start, loop_end; // looping
} buffer_sound;

#define SOUNDBUFFER_CHANNELS 10

buffer_sound sounds[SOUNDBUFFER_CHANNELS];

// init_sfx_system - sets the registers for enabling the sound hardware and
//                   Direct Sound
// PARAMETERS:  none
// RETURNS:     none
//void init_sfx_system(void)
void sfxSystemInit()
{
	// turn on the sound chip
	REG_SOUNDCNT_X = SND_ENABLED;

	// make sure sound channels 1-4 are turned off
	REG_SOUNDCNT_L = 0;

	// set the direct sound output control register
	REG_SOUNDCNT_H = SND_OUTPUT_RATIO_100 | // 100% sound output
		DSA_OUTPUT_RATIO_100 | // 100% direct sound A output
		DSA_OUTPUT_TO_LEFT |   // output Direct Sound A to left speaker
		DSA_TIMER0 |           // use timer 0 to determine the playback frequency of Direct Sound A
		DSA_FIFO_RESET |        // reset the FIFO for Direct Sound A
		DSB_OUTPUT_RATIO_100 | // 100% direct sound B output
		DSB_OUTPUT_TO_RIGHT |   // output Direct Sound B to right speaker
		DSB_TIMER0 |           // use timer 0 to determine the playback frequency of Direct Sound B
		DSB_FIFO_RESET;        // reset the FIFO for Direct Sound B

	/* allocate mixing buffers */
	soundbuffers[0] = malloc(SOUNDBUFFER_LENGTH);
	soundbuffers[1] = malloc(SOUNDBUFFER_LENGTH);
	soundbuffers[2] = malloc(SOUNDBUFFER_LENGTH);
	soundbuffers[3] = malloc(SOUNDBUFFER_LENGTH);

	REG_TM0D   = TIMER_INTERVAL;
	REG_TM0CNT = TIMER_ENABLE;
	
	/*REG_TM1D = 0xffff - SOUNDBUFFER_LENGTH;
	REG_TM1CNT = TIMER_CASCADE | TIMER_IRQ_ENABLE | TIMER_ENABLE;*/
	
	memset(&sounds[0], 0, sizeof(buffer_sound) * SOUNDBUFFER_CHANNELS);
}

//void stop_sound_system()
void sfxSystemStop()
{
	int i;
	
	REG_DMA1CNT = 0;
	REG_DMA2CNT = 0;
	REG_TM0CNT = 0;
	REG_TM1CNT = 0;
	fifo_stalled = 1;
	
	for(i=0; i<4; i++) free(soundbuffers[i]);
}
	
void dump_samples()
{
	int i;
	char foo[100];

	for(i=0; i < SOUNDBUFFER_CHANNELS; i++)
	{
		sprintf(foo, "channel: %d\n", i); print(foo);
		sprintf(foo, "\tlen: %d\n", sounds[i].length); print(foo);
		sprintf(foo, "\tcur: %d\n", sounds[i].cur); print(foo);
		sprintf(foo, "\tvol: %d\n", sounds[i].volume); print(foo);
		sprintf(foo, "\tpan: %d\n", sounds[i].panning); print(foo);
		sprintf(foo, "\tfre: %d\n", sounds[i].freq); print(foo);
	}
}

/* C implementations of buffer mix functions - replaced with asm versions */
#if 0
void mix_buffer(u16* vals, const s8* data, u32 vol, u32 mix_len)
{
	int i;

	for(i=0; i < mix_len; i++)
	{
		vals[i] += ((data[i] * vol) >> 8);
	}
}

void downsample_mix_buffer(u16* vals, const s8* data, u32 vol, u32 mix_len, u32 scale_low, u32 scale_high)
{
	u32 mix_count=0, mix_pos=0;

	int i;

	for(i=0; i < mix_len; i++)
	{
		vals[i] += ((data[mix_pos] * vol) >> 8);

		mix_count += scale_low;
		if(mix_count > (1 << 16))
		{
			mix_count -= (1 << 16);
			mix_pos++;
		}

		mix_pos += scale_high;
	}
}

void downsample_mix_buffer_looped(u16* vals, const s8* data, u32 vol, u32 mix_len, u32 scale_low, u32 scale_high, u32 start_offset, u32 loop_start, u32 loop_end)
{
	u32 mix_count=0, mix_pos=start_offset;

	int i;

	for(i=0; i < mix_len; i++)
	{
		vals[i] += ((data[mix_pos] * vol) >> 8);

		mix_count += scale_low;
		if(mix_count > (1 << 16))
		{
			mix_count -= (1 << 16);
			mix_pos++;
		}

		mix_pos += scale_high;

		if(mix_pos > loop_end)
		{
			mix_pos -= (loop_end - loop_start);
		}
	}
}
#endif

void asm_mix_buffer(s16* vals, const s8* data, u32 vol, u32 mix_len) CODE_IN_IWRAM;
void asm_downsample_mix_buffer(u16* vals, const s8* data, u32 vol, u32 mix_len, u32 scale_low, u32 scale_high) CODE_IN_IWRAM;
void asm_downsample_mix_buffer_unroll(u16* vals, const s8* data, u32 vol, u32 mix_len, u32 scale_low, u32 scale_high) CODE_IN_IWRAM;
void asm_downsample_mix_buffer_looped(u16* vals, const s8* data, u32 vol, u32 mix_len, u32 scale_low, u32 scale_high, u32 start_offset, u32 loop_start, u32 loop_end) CODE_IN_IWRAM;

void asm_flush_work_buffer(s16* in, s8* out, u32 len) CODE_IN_IWRAM;

u32 sfxSystemWorker()
{
	//char foo[100];
	int i, s;
	int channel;
	u32 process_time;
	u32 samples_played;
	s16 vals[SOUNDBUFFER_LENGTH];
	s8* buf[2];

	if(sbuffer)
	{
		sbuffer = 0;
	} else {
		sbuffer = 1;
	}
	buf[0] = soundbuffers[sbuffer];
	buf[1] = soundbuffers[sbuffer + 2];

	//dump_samples();

	REG_TM2D = 0;
	REG_TM2CNT = TIMER_FREQ_1024 | TIMER_ENABLE;
	/* pre-calculate volume levels for each channel of each sound */
	for(s=0; s < SOUNDBUFFER_CHANNELS; s++)
	{
		sounds[s].volcache[0] = sounds[s].volume * (8 - sounds[s].panning);
		sounds[s].volcache[1] = sounds[s].volume * (8 + sounds[s].panning);
	}
	
	for(channel=0; channel < 2; channel++)
	{
		memset(vals, 0, SOUNDBUFFER_LENGTH * sizeof(s16));
		
		for(s=0; s < SOUNDBUFFER_CHANNELS; s++)
		{
			if(sounds[s].cur < sounds[s].length)
			{
				u16 mix_len;

				if(sounds[s].freq == FREQUENCY)
				{
					mix_len = sounds[s].length - sounds[s].cur;
					if(mix_len > SOUNDBUFFER_LENGTH) mix_len = SOUNDBUFFER_LENGTH;
				
					asm_mix_buffer(vals, sounds[s].data + sounds[s].cur, sounds[s].volcache[channel], mix_len);
				} else {
					u32 scale_low, scale_high;
					
					mix_len = (((sounds[s].length - sounds[s].cur) * FREQUENCY) / sounds[s].freq);
					if(mix_len > SOUNDBUFFER_LENGTH)
					{
						mix_len = SOUNDBUFFER_LENGTH;
					}	
					scale_high = sounds[s].freq / FREQUENCY;
					scale_low = (((sounds[s].freq % FREQUENCY) << 16) / FREQUENCY);
					//sprintf(foo,"sh: %d sl: %d ml: %d\n", scale_high, scale_low, mix_len); print(foo);
					if(sounds[s].loop_end)
					{	
						asm_downsample_mix_buffer_looped(vals, sounds[s].data, sounds[s].volcache[channel], mix_len, scale_low, scale_high, sounds[s].cur, sounds[s].loop_start, sounds[s].loop_end);
					} else {
						asm_downsample_mix_buffer(vals, sounds[s].data + sounds[s].cur, sounds[s].volcache[channel], mix_len, scale_low, scale_high);
					}
				}
			}
		}
		asm_flush_work_buffer(vals, buf[channel], SOUNDBUFFER_LENGTH);
	}
	process_time = REG_TM2D;
	
	samples_played = 265;
	for(i=0; i < SOUNDBUFFER_CHANNELS; i++)
	{
		if(sounds[i].length)
		{
			sounds[i].cur += ((samples_played * sounds[i].freq) / FREQUENCY);
			
			if(sounds[i].loop_end)
			{
				//sprintf(foo, "cur: %d loopend: %d len: %d\n", soud
				if(sounds[i].cur > sounds[i].loop_end)
				{
					sounds[i].cur -= (sounds[i].loop_end - sounds[i].loop_start);
					//sounds[i].cur = sounds[i].loop_start + (sounds[i].cur - sounds[i].loop_end);
				}
			} else {
				if(sounds[i].cur > sounds[i].length)
				{
					sounds[i].cur = 0; sounds[i].length = 0;
				}
			}
		}
	}
	// make sure Timer 0 is off
	REG_TM0CNT = 0;
	REG_TM1CNT = 0;

	// make sure DMA channel 1 is turned off
	REG_DMA1CNT = 0;
	REG_DMA2CNT = 0;

	// make sure the FIFO is reset
	if(fifo_stalled)
	{
		*(volatile u8*)REG_FIFO_A = buf[0][0];
		*(volatile u8*)REG_FIFO_B = buf[1][0];
		REG_SOUNDCNT_H |= (DSA_FIFO_RESET | DSB_FIFO_RESET);   // just set the reset bit and leave the other ones alone
		fifo_stalled = 0;
	}

	// start the timer using the appropriate frequency
	REG_TM0D   = TIMER_INTERVAL;
	REG_TM0CNT = TIMER_ENABLE;

	// start the DMA transfer on channel 1
	REG_DMA1SAD = (u32)(buf[0]);
	REG_DMA1DAD = (u32)REG_FIFO_A;
	REG_DMA1CNT = ENABLE_DMA | START_ON_FIFO_EMPTY | WORD_DMA | DMA_REPEAT;
	
	REG_DMA2SAD = (u32)(buf[1]);
	REG_DMA2DAD = (u32)REG_FIFO_B;
	REG_DMA2CNT = ENABLE_DMA | START_ON_FIFO_EMPTY | WORD_DMA | DMA_REPEAT;

	// set up timer 1 as a sample length counter
	REG_TM1D = 0xffff - (SOUNDBUFFER_LENGTH);
	REG_TM1CNT = TIMER_CASCADE | TIMER_IRQ_ENABLE | TIMER_ENABLE;

	// and enable interrupt used to stop the sample playing when it's finished
	REG_IE |= INT_TM1;
	REG_IME = 1;

	return(process_time);
}

void sfxSystemPlay(const sfxSystemSound* s)
{
	int i;
	for(i=0; i < SOUNDBUFFER_CHANNELS; i++)
	{
		if(sounds[i].allocated == 0 && sounds[i].length == 0)
		{
			//print("start play!\n");
			sounds[i].data = s->pData;
			sounds[i].cur = s->start_offset;
			sounds[i].volume = s->volume;
			sounds[i].panning = s->panning;	
			sounds[i].length = s->length;
			sounds[i].freq = s->freq;
			sounds[i].loop_start = s->loop_start;
			sounds[i].loop_end = s->loop_end;
			return;
		}
	}
	//print("play failed!\n");
}

int sfxSystemAllocChannel()
{
	int i;
	for(i=0; i < SOUNDBUFFER_CHANNELS; i++)
	{
		if(sounds[i].allocated == 0 && sounds[i].length == 0)
		{
			sounds[i].allocated = 1;
			return i;
		}
	}
	return -1;
}

void sfxSystemFreeChannel(u16 channel)
{
	sounds[channel].allocated = 0;
}

void sfxSystemSetChannel(u16 i, const sfxSystemSound* s)
{
	sounds[i].data = s->pData;
	sounds[i].cur = s->start_offset;
	sounds[i].volume = s->volume;
	sounds[i].panning = s->panning;	
	sounds[i].length = s->length;
	sounds[i].freq = s->freq;
	sounds[i].loop_start = s->loop_start;
	sounds[i].loop_end = s->loop_end;
}

void sfxSystemChannelSetVolume(u16 i, u16 vol)
{
	sounds[i].volume = vol;
}

void sfxSystemChannelSetPanning(u16 i, s16 pan)
{
	sounds[i].panning = pan;
}

void sfxSystemChannelSetOffset(u16 i, u32 off)
{
	sounds[i].cur = off;
}

void sfxSystemChannelSetFreq(u16 i, u16 freq)
{
	sounds[i].freq = freq;
}

