/* Copyright (c) 2007, Ingo Elsen

   Portions of documentation Copyright (c) 1990 - 1994
   The Regents of the University of California.

   All rights reserved.

   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions are met:

   * Redistributions of source code must retain the above copyright
     notice, this list of conditions and the following disclaimer.

   * Redistributions in binary form must reproduce the above copyright
     notice, this list of conditions and the following disclaimer in
     the documentation and/or other materials provided with the
     distribution.

   * Neither the name of the copyright holders nor the names of
     contributors may be used to endorse or promote products derived
     from this software without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  POSSIBILITY OF SUCH DAMAGE. */

#include <inttypes.h>
#include "fixedPointArithmetics.h"


uint8_t fadd8(uint8_t op1, uint8_t op2)
{
	uint8_t result;

	// clipping:
	// C=0 & V=1 -> pos overflow, result = 0x7f
	// C=1 & V=1 -> neg overflow, result = 0x81 (instead of 80, this prevents fmul overflows)
	asm volatile (
		"add %0, %3" 	"\n\t"
		"brvc 0f" 		"\n\t" // no overflow -> done
		"ldi %0, 0x7f"  "\n\t" // overflow -> set max pos value (might still be negative overflow)
		"brcc 0f"		"\n\t" // no carry (thus no negative overflow) -> done
		"ldi %0, 0x81"	"\n\t" // negative overflow -> set min neg value
		"0:"
		: "=r" (result), "=r" (op1)
		: "0" (op1), "r" (op2)
	);

	return result;
}


uint8_t fsub8(uint8_t op1, uint8_t op2)
{
	uint8_t result;

	// clipping:
	// C=1 & V=1 -> pos overflow, result = 0x7f
	// C=0 & V=1 -> neg overflow, result = 0x80
	asm volatile (
		"sub %0, %3" 	"\n\t"
		"brvc 0f" 		"\n\t" // no overflow -> done
		"ldi %0, 0x81"  "\n\t" // overflow -> set min neg value (might still be positive overflow)
		"brcc 0f"		"\n\t" // no carry (thus no positive overflow) -> done
		"ldi %0, 0x7f"	"\n\t" // positive overflow -> set max pos value
		"0:"
		: "=r" (result), "=r" (op1)
		: "0" (op1), "r" (op2)
	);
	return result;
}


uint8_t fmul8(uint8_t op1, uint8_t op2)
{
	uint8_t result;

	// range check to prevent overflow. It is necessary that at least one
	// operand is != 0x80
	if (op1 == op2 == 0x80)
	{
		op1 = 0x81;
	}


	asm volatile (
		"fmuls %1, %2" 	"\n\t" // result of the operation is a 16 bit pixed point value
		"mov %0, r1"    "\n\t" // of which we only take the most significant 8 bits
		"clr r1"		"\n\t"
		: "=r" (result)
		: "a" (op1), "a" (op2)
	);

	return result;
}


uint8_t fmac8(uint8_t op1, uint8_t op2, uint8_t op3)
{
	uint8_t result;

	// range check to prevent overflow. It is necessary that at least one
	// operand is != 0x80
	if (op1 == op2 == 0x80)
	{
		op1 = 0x81;
	}


	
	asm volatile (
		"fmuls %1, %2" 	"\n\t" // do the fixed point multiply
		"add r1, %3" 	"\n\t" // add op3 to the result
		"mov %0, r1"    "\n\t" // move to result variable
		"brvc 0f" 		"\n\t" // check for pos/neg overflow (@see fadd8)
		"ldi %0, 0x7f"  "\n\t"
		"brcc 0f"		"\n\t"
		"ldi %0, 0x80"	"\n\t"
		"0:"			"\n\t"
		"clr r1"		"\n\t"
		: "=a" (result)
		: "a" (op1), "a" (op2), "a" (op3)
	);

	return result;
}



//---------------------------------- 8 -> 16 bit section starts here --------------------------------



uint16_t fadd816(uint8_t op1, uint8_t op2)
{
	uint16_t result;

	// clipping:
	// C=0 & V=1 -> pos overflow, result = 0x7f
	// C=1 & V=1 -> neg overflow, result = 0x81 (instead of 80, this prevents fmul overflows)
	asm volatile (
		"mov %B0, %1"	"\n\t"
		"add %B0, %2" 	"\n\t"
		"brvc 0f" 		"\n\t"
		"ldi %B0, 0x7f" "\n\t"
		"brcc 0f"		"\n\t"
		"ldi %B0, 0x81"	"\n\t"
		"0:"
		: "=&r" (result)
		: "r" (op1), "r" (op2)
	);

	return result;
}


uint16_t fsub816(uint8_t op1, uint8_t op2)
{
	uint16_t result;

	// clipping:
	// C=1 & V=1 -> pos overflow, result = 0x7f
	// C=0 & V=1 -> neg overflow, result = 0x80
	asm volatile (
		"mov %B0, %1"	"\n\t"
		"sub %B0, %2" 	"\n\t"
		"brvc 0f" 		"\n\t"
		"ldi %A0, 0x01" "\n\t"
		"ldi %B0, 0x80" "\n\t"
		"brcc 0f"		"\n\t"
		"ldi %A0, 0xff"	"\n\t"
		"ldi %B0, 0x7f"	"\n\t"
		"0:"
		: "=&r" (result)
		: "r" (op1), "r" (op2)
	);
	return result;
}


uint16_t fmul816(uint8_t op1, uint8_t op2)
{
	uint16_t result;

	// range check to prevent overflow. It is necessary that at least one
	// operand is != 0x80
	if (op1 == op2 == 0x80)
	{
		op1 = 0x81;
	}



	asm volatile (
		"fmuls %1, %2" 	"\n\t"
		"movw %0, r0" 	"\n\t"
		"clr r1"		"\n\t"
		: "=&r" (result)
		: "a" (op1), "a" (op2)
	);

	return result;
}


uint16_t fmac816(uint16_t op1, uint8_t op2, uint16_t op3)
{
	uint16_t result;

	// range check to prevent overflow. It is necessary that at least one
	// operand's high byte is != 0x80
	if (op1 == (op2<<8) == 0x8000)
	{
		op1 = 0x8100;
	}

	
	asm volatile (
		"fmuls %B1, %2" "\n\t" // take only the high byte of op1, rest is equal to @see fmac8
		"add r1, %3" 	"\n\t" 
		"movw %0, r0"   "\n\t"
		"brvc 0f" 		"\n\t"
		"ldi %A0, 0xff" "\n\t"
		"ldi %B0, 0x7f" "\n\t"
		"brcc 0f"		"\n\t"
		"ldi %A0, 0x00"	"\n\t"
		"ldi %B0, 0x80"	"\n\t"
		"0:"			"\n\t"
		"clr r1"		"\n\t"
		: "=&a" (result)
		: "a" (op1), "a" (op2), "a" (op3)
	);
	
	return result;
}

//---------------------------------- 16 bit section starts here --------------------------------






uint16_t fadd16(uint16_t op1, uint16_t op2)
{
	uint16_t result;

	// clipping:
	// C=0 & V=1 -> pos overflow, result = 0x7f
	// C=1 & V=1 -> neg overflow, result = 0x81 (instead of 80, this prevents fmul overflows)
	asm volatile (
		"add %A0, %A3" 	"\n\t"
		"adc %B0, %B3"	"\n\t"
		"brvc 0f" 		"\n\t"
		"ldi %B0, 0x7f" "\n\t"
		"ldi %A0, 0xff" "\n\t"
		"brcc 0f"		"\n\t"
		"ldi %A0, 0x01"	"\n\t"
		"ldi %B0, 0x80"	"\n\t"
		"0:"
		: "=r" (result), "=r" (op1)
		: "0" (op1), "r" (op2)
	);

	return result;
}


uint16_t fsub16(uint16_t op1, uint16_t op2)
{
	uint16_t result;

	// clipping:
	// C=1 & V=1 -> pos overflow, result = 0x7f
	// C=0 & V=1 -> neg overflow, result = 0x81 (instead of 80, this prevents fmul overflows)
	asm volatile (
		"sub %A0, %A3" 	"\n\t"
		"sbc %B0, %B3"	"\n\t"
		"brvc 0f" 		"\n\t"
		"ldi %B0, 0x01" "\n\t"
		"ldi %A0, 0x80" "\n\t"
		"brcc 0f"		"\n\t"
		"ldi %A0, 0xff"	"\n\t"
		"ldi %B0, 0x7f"	"\n\t"
		"0:"
		: "=r" (result), "=r" (op1)
		: "0" (op1), "r" (op2)
	);
	
	return result;
}


uint16_t fmul16(uint16_t op1, uint16_t op2)
{
	uint16_t result;

	// range check to prevent overflow. It is necessary that at least one
	// operand's high byte is != 0x80
	if (op1 == op2 == 0x8000)
	{
		op1 = 0x8100;
	}
	
	asm volatile (
		"fmuls %B1, %B2" 	"\n\t" // take high byte of op1 and op2 only
		"movw %0, r0"    	"\n\t"
		"clr r1"			"\n\t"
		: "=&r" (result)
		: "a" (op1), "a" (op2)
	);
	
	return result;
}


uint16_t fmac16(uint16_t op1, uint16_t op2, uint16_t op3)
{
	uint16_t result;

	// range check to prevent overflow. It is necessary that at least one
	// operand's high byte is != 0x80
	if (op1 == op2 == 0x8000)
	{
		op1 = 0x8100;
	}
	
	asm volatile (
		"fmuls %B1, %B2"	"\n\t" // take high byte of op1 and op2 only
		"add r0, %A3"		"\n\t" // do a 16 bit add
		"adc r1, %B3" 		"\n\t"
		"movw %0, r0"   	"\n\t"
		"brvc 0f" 			"\n\t" // check for overflow (pos/neg) @see fadd8
		"ldi %A0, 0xff" 	"\n\t"
		"ldi %B0, 0x7f" 	"\n\t"
		"brcc 0f"			"\n\t"
		"ldi %A0, 0x00"		"\n\t"
		"ldi %B0, 0x80"		"\n\t"
		"0:"				"\n\t"
		"clr r1"			"\n\t"
		: "=&a" (result)
		: "a" (op1), "a" (op2), "a" (op3)
	);

	return result;
}