
	.global asm_mix_buffer
	.global asm_downsample_mix_buffer
	.global asm_flush_work_buffer
	.section	.iwram,"ax",%progbits
	.arm
	.align
	.text

@ void asm_mix_buffer(u16* vals, const s8* data, u32 vol, u32 mix_len)
@												r0							r1				r2			r3
asm_mix_buffer:
@	mov ip, sp
	stmfd	sp!, {r4-r5}
	
1:
@ while(mix_len > 0)

	@ grab current sample
	ldrsb	r4, [r1], #1
	@ multply by volume
	mul		r5, r4, r2
	ldrsh	r4, [r0]
	add		r5, r4, r5, lsr #8
	@ and write out
	strh	r5, [r0], #2
	
	subs	r3, r3, #1
	bgt		1b

	ldmfd	sp!, {r4-r5}
	bx		lr

@ void asm_downsample_mix_buffer(u16* vals, const s8* data, u32 vol, u32 mix_len, u32 scale_low, u32 scale_high)
@																			r0							r1				r2			r3							r4				r5
asm_downsample_mix_buffer:
	mov ip, sp
	stmfd	sp!, {r4-r9}
	ldmfd	ip, {r4-r9} @ get args 5 and 6
	
	@ shift scale_low up so that overflows will trigger the carry
	mov		r4, r4, lsl #16

	mov		r6, #0	@ mix_count - fractional part
	mov		r7, #0	@ mix_pos - integer part
1:
@ while(mix_len > 0)

	@ grab current sample
	ldrsb	r8, [r1, +r7]
	@ multply by volume
	mul		r9, r8, r2
	ldrsh	r8, [r0]
	add		r9, r8, r9, lsr #8
	@ and write out
	strh	r9, [r0], #2

	@ add scale value to source position
	adds	r6, r6, r4
	adc		r7, r7, r5	@ if mix_count overflows, will add an extra 1 to mix_pos
	
	subs	r3, r3, #1
	bgt		1b

	ldmfd	sp!, {r4-r9}
	bx		lr



@ void asm_flush_work_buffer(s16* in, s8* out, u32 mix_len)
@												r0							r1				r2
asm_flush_work_buffer:
@	stmfd	sp!, {r4-r5}

1:
	ldrsh	r3, [r0], #2
	mov		r3, r3, lsr #1
	strb	r3, [r1], #1

	subs	r2, r2, #1
	bgt		1b

	bx		lr
