Multiplikation in Assembler

Aus der Mikrocontroller.net Artikelsammlung, mit Beiträgen verschiedener Autoren (siehe Versionsgeschichte)
Wechseln zu: Navigation, Suche

Autor: Steffen H.

Dies ist eine Sammlung von verschiedenen Assembler Include-Datein für einen 8-Bit AVR zur Multiplikation in Hardware oder Software.

Einleitung

Ich habe diesen Wiki zur Multiplikation in Assembler erstellt da ich hauptsächlich Programmschnipsel für eine "unsigned" Multiplikation gefunden habe oder diese nicht in der benötigten Bytegröße vorhanden war. Deshalb findet ihr hier verschiedene Sub-Programme zur "unsigned" und "signed" Multiplikation.

Wie funktioniert das ganze?

Die Sub-Programme sind Include Dateien. Diese müssen im .cseg Teil des Programms eingebunden werden. Liegen diese Include Datein im Hauptverzeichnis des Assembler Projektes, also da wo auch die "main.asm" liegt, werden diese so eingebunden:

 
.include  "Datei.inc"

Man kann die Include-Dateien aber auch an beliebiger anderer Stelle ablegen. Dann müssen diese aber mit voller Baumstruktur eingebunden werden. Beispiel:

 
.include  "C:\AVR\Libraries\Datei.inc"

Für die reinen Software Lösungen ist eine Datei Namens "sw_mul.inc" vor allen anderen MUL-Includes einzubinden. Und die Software MUL-Includes unterscheiden sich zu den Hardware MUL-Includes im vorangestellten "_" wie zum Beispiel _muls_32x16_48.inc.

Der Aufbau des Dateinamens ist schon fast selbsterklärend.

  • mul -> unsigned x unsigned Multiplikation
  • mulsu -> signed x unsigned Multiplikation
  • muls -> signed x signed Multiplikation

Dann kommt die Byte-Größe wie zum Beispiel _32x16 und dann die Result-Größe _48. Die Eingangsbytes sind dabei fest vorgeschrieben! Und die Unteren r11..r0 Register werden zerstört. Wer diese in Benutzung hat, sollte diese vorher sichern.

Was es noch zu beachten gibt, sind eventuelle Änderungen an "rcall's" die in "calls" umgewandelt werden müssen wenn die Sprungadresse zum Unterprogramm mehr als 2000 Byte Programmcounter entfernt ist. Dies bemängelt dann aber das Atmel Studio in einer Fehlermeldung.

Benutzte Register

In allen Include Dateien werden nach folgendem Muster die Register wie folgt benutzt:

MULT r23 r22 r21 r20 r19 r18 r17 r16 r9 r8 r7 r6 r5 r4 r3 r2
16x8 A1 A0 B0 E2 E1 E0
16x16 A1 A2 B1 B0 E3 E2 E1 E0
24x8 A2 A1 A0 B0 E3 E2 E1 E0
24x16 A2 A1 A0 B1 B0 E4 E3 E2 E1 E0
24x24 A2 A1 A0 B2 B1 B0 E5 E4 E3 E2 E1 E0
32x8 A3 A2 A1 A0 B0 E4 E3 E2 E1 E0
32x16 A3 A2 A1 A0 B1 B0 E5 E4 E3 E2 E1 E0
32x24 A3 A2 A1 A0 B2 B1 B0 E6 E5 E4 E3 E2 E1 E0
32x32 A3 A2 A1 A0 B3 B2 B1 B0 E7 E6 E5 E4 E3 E2 E1 E0

Software Version

MACRO

Hier muss vorher zwingend die "sw_mult.inc" Datei eingebunden werden. Diese besteht teilweise aus Makros um die in den weiteren MUL-Include-Dateien verwendeten Funktionen nutzen zu können. Vor allem die richtige Erzeugung des Zero und Carry Flags!

 
.macro mul_
        ;---------------------------------
        ;mul Befehl in Software als Makro
        ;---------------------------------
        clr     r1      ;Ergebnis = 0
        push    @0      ;Multiplikant wegspeichern
        sec             ;Multiplikant ersetzt gleichzeitig den Schleifenzähler
        ror     @0      ;Eine 1 rein, LSB raus
        rjmp    mul2    ;Beim 1. Durchgang, gleich LSB auswerten
mul1:   lsr     @0      ;Bit (von 1..7 Niederwertige zuerst) ins Carry (Multiplikand)
        breq    mul4    ;Wenn die 1 von sec (oben) wieder im Carry ist, Ende
mul2:   brcc    mul3    ;Wenn 0, Addition überspringen
        add     r1,@1   ;Ansonsten Multiplikator ins Highbyte addieren
mul3:   ror     r1      ;Produkt mit evt. Übertrag nach rechts schieben
        ror     r0
        rjmp    mul1    ;Noch ne Runde (von insgesamt 8)
mul4:   push	r16
		in		r16,SREG
		andi	r16,0x80
		out		SREG,r16
		sbrc	r1,7
		sec
		pop		r16
		pop     @0      ;Multiplikant zurückholen
.endm



.macro	muls_
		push	r16
		push	r17
		push	r18
		clt
		mov		r1,@0
		mov		r0,@1
		clr		r17  
		sbrc	r1,7
		inc		r17
		sbrc	r0,7
		inc		r17
		sbrc	r17,0
		set
		mov		r16,r1
		rcall	_muls_
		andi	r17,0x82
		sbrc	r17,SREG_Z	
		rjmp	PC+3
		brtc	PC+2
		sbr		r17,(1<<SREG_C)
		out		SREG,r17
		pop		r18
		pop		r17
		pop		r16
.endm


_muls_:	sub		r1,r1			; clear result High byte and carry
		ldi		r18,8			; init loop counter
_m8s_1:	brcc	_m8s_2			; if carry (previous bit) set
		add		r1,r16			; add multiplicand to result High byte
_m8s_2:	sbrc	r0,0			; if current bit set
		sub		r1,r16			; subtract multiplicand from result High
		asr		r1				; shift right result High byte
		ror		r0				; shift right result L byte and multiplier
		in		r17,SREG
		dec		r18				; decrement loop counter
		brne	_m8s_1			; if not done, loop more
		ret

.macro	mulsu_
		push	r16
		push	r17
		push	r18
		mov		r1,@0
		mov		r0,@1
		mov		r16,r1
		clt
		sbrc	r16,7
		set
		sbrc	r16,7
		neg		r16
		rcall	_mulsu_
		andi	r17,0x83
		brtc	PC+6
		com		r1
		com		r0
		ldi		r16,1	
		add		r0,r16
		adc		r1,r18
		sbrc	r1,7   
		sbr		r17,(1<<SREG_C)
		out		SREG,r17	
		pop		r18
		pop		r17
		pop		r16
.endm

_mulsu_:clr		r1			;clear result High byte
		ldi		r18,8		;init loop counter
		lsr		r0			;rotate multiplier	
m8su_1:	brcc	m8su_2		; carry set 
		add 	r1,r16		; add multiplicand to result High byte
m8su_2:	ror		r1			; rotate right result High byte
		ror		r0			; rotate right result L byte and multiplier
		in		r17,SREG
		dec		r18			; decrement loop counter
		brne	m8su_1		; if not done, loop more
		ret

unsigned x unsigned

_mul_16x8_24

_mul_16x16_32

_mul_24x8_32

_mul_24x16_40

_mul_24x24_48

_mul_32x8_40

_mul_32x16_48

_mul_32x24_56

_mul_32x32_64

 
;***************************************************
;* Mutiply 32x32 -> 64 bit unsigned
;*  AH   AM  AL AXL   BH  BM  BL  BXL    E7 E6 E5 E4 E3 E2 E1 E0
;*  R23:R22:R21:R20 x R19:R18:R17:R16 -> R9:R8:R7:R6:R5:R4:R3:R2
;*	
;*	Scrached Register:	R10,R11,R12,R13,R14
;*  108 cycles + 4 (RET) = 112 Cycles
;*
mul32x32_64:
		push	R10
		push	R11
		push	R12
		push	R13
		push	R14
		clr		R14

;   R9:R2 = R19:R16 * R23:R20

		sub R14,R14                  ; Null
                      
		mul_ 	R20,R16                ; AXL*BXL
		movw 	R3:R2,R1:R0
		mul_ 	R21,R17                ; AL*BL
		movw 	R5:R4,R1:R0
		mul_ 	R22,R18                ; AM*BM
		movw 	R7:R6,R1:R0  
		mul_ 	R23,R19                ; AH*BH
		movw 	R9:R8,R1:R0

		mul_ 	R22,R19                ; AH*BM
		movw 	R13:R12,R1:R0
		mul_ 	R21,R18                ; AM*BL
		movw 	R11:R10,R1:R0
		mul_ 	R20,R17                ; AL*BXL

		add 	R3, R0
		adc 	R4, R1
		adc 	R5, R10
		adc 	R6, R11
		adc 	R7, R12
		adc 	R8,R13
		adc 	R9,R14

		mul_ 	R23,R18                ; AM*BH
		movw 	R13:R12,R1:R0
		mul_ 	R22,R17                ; AL*BM
		movw 	R11:R10,R1:R0
		mul_ 	R21,R16                ; AXL*BL

		add 	R3, R0
		adc 	R4, R1
		adc 	R5, R10
		adc 	R6, R11
		adc 	R7, R12
		adc 	R8,R13
		adc 	R9,R14

		mul_ 	R21,R19                ; AH*BL
		movw 	R11:R10,R1:R0
		mul_ 	R20,R18                ; AM*BXL

		add 	R4, R0
		adc 	R5, R1
		adc 	R6, R10
		adc 	R7, R11
		adc 	R8,R14
		adc 	R9,R14

		mul_ 	R23,R17                ; AL*BH
		movw 	R11:R10,R1:R0
		mul_ 	R22,R16                ; AXL*BM

		add 	R4, R0
		adc 	R5, R1
		adc 	R6, R10
		adc 	R7, R11
		adc 	R8,R14
		adc 	R9,R14

		mul_ 	R20,R19                ; AH*BXL
		movw 	R11:R10,R1:R0
		mul_ 	R23,R16                ; AXL*BH

		add 	R5, R0
		adc 	R6, R1
		adc 	R7, R14
		adc 	R8,R14
		adc 	R9,R14

		add 	R5, R10
		adc 	R6, R11
		adc 	R7, R14
		adc 	R8,R14
		adc 	R9,R14
		pop		R14
		pop		R13
		pop		R12
		pop		R11
		pop		R10  
		ret

signed x unsigned

_mulsu_16x8_24

_mulsu_16x16_32

_mulsu_24x8_32

_mulsu_24x16_40

_mulsu_24x24_48

_mulsu_32x8_40

 
;***************************************************
;* Mutiply 32x8 -> 40 bit
;*       AH  AM  AL AXL   BL     E4 E3 E2 E1 E0
;*      R23:R22:R21:R20 x R16 -> R6:R5:R4:R3:R2
;*
;*  19 cycles + 4 (RET) = 23 Cycles
;*
mulsu_32x8:
		push	R10
		clr		R10
		clr		r4
		mul_	R20,R16			; AXL x BL
		movw	R3:R2,R1:R0		; r1:r0 -> E1:E0
		mulsu_	R23,R16			; AH x BL
		mov		R5,R0			; r0 -> E0
		mov		R6,R1			; r1 -> E0
		mul_	R21,R16			; AL x BL	
		add		R3,R0			; R0 -> E1
		adc		R4,R1			; R1 -> E2
		adc		R5,R10			; T0 -> E3
		adc		R6,R10			; T0 -> E4
		mul_	R22,R16			; AM x BL
		add		R4,R0			; R0 -> E2
		adc		R5,R1			; R1 -> E3
		adc		R6,R10			; T0 -> E4
		pop		R10
		ret

_mulsu_32x16_48

 
;*****************************************************************************
;* Mutiply 32x16 -> 48 bit
;*
;*			signed  x  unsigned
;*  AH   AM  AL AXL   BH  BL     E5 E4 E3 E2 E1 E0
;*  R23:R22:R21:R20 x R17:R16 -> R7:R6:R5:R4:R3:R2
;*
;*  56 cycles + 4 (RET) = 60 Cycles
;*******************************************************************************
mulsu_32x16:
		push	R8
		push	R9
		push	R10	
		clr		R10
		mul_	R20,R16			; AXL x BL
		movw	R3:R2,R1:R0		; r1:r0 -> E1:E0
		mul_	R22,R16			; AM x BL
		movw	R5:R4,R1:R0		; r1:r0 -> E3:E2
		mulsu_	R23,R17			; AH x BH
		movw	R7:R6,R1:R0		; r1:r0 -> E5:E4

		mulsu_	R23,R16			; AH x BL
		movw	R9:R8,R1:R0		; r1:r0 -> T4:T3
		sbc		R7,R10			; signed DUMMY T5	
		mul_	R21,R16			; AL x BL
		add		R3,R0			; r0 ----> + E1
		adc		R4,R1			; r1 +c -> + E2
		adc		R5,R8			; T3 +c -> + E3
		adc		R6,R9			; T4 +c -> + E4
		adc		R7,R10			; T5 +c -> + E5

		mul_	R17,R20			; AXL x BH -> BH x AXL
		add		R3,R0			; r0 ----> + E1
		adc		R4,R1			; r1 +c -> + E2
		adc		R5,R10			; T5 +c -> + E3
		adc		R6,R10			; T5 +c -> + E4
		adc		R7,R10			; T5 +c -> + E5
		
		mul_	R17,R21			; AL x BH -> BH x AL
		add		R4,R0			; r0 +c -> + E2
		adc		R5,R1			; r1 +c -> + E3
		adc		R6,R10			; T5 +c -> + E4
		adc		R7,R10			; T5 +c -> + E5
		
		mul_	R17,R22			; BH x AM
		add		R5,R0			; r0 +c -> + E3
		adc		R6,R1			; r1 +c -> + E4
		adc		R7,R10			; T5 +c -> + E5
		pop		R10
		pop		R9
		pop		R8	
		ret

_mulsu_32x24_56

_mulsu_32x32_64

signed x signed

_muls_16x8_24

_muls_16x16_32

 
;******************************************************************************
;*
;* FUNCTION
;*	muls16x16_32
;* DECRIPTION
;*	Signed multiply of two 16bits numbers with 32bits result.
;* USAGE		E3:E2:E1:E0 =  AH:AL  *  BH:BL
;*				r5:r4:r3:r2 = r21:r20 * r17:r16
;* STATISTICS
;*	Cycles :	19 + ret
;*	Words :		15 + ret
;*	Scrached Register usage: r6
;* NOTE
;*	The routine is non-destructive to the operands.
;*
;******************************************************************************

muls_16x16:
		push	R6
		clr		R6
		muls_	R21,R17			; (signed)ah * (signed)bh
		movw	R5:R4,R1:R0
		mul_	R20,R16			; al * bl
		movw	R3:R2,R1:R0
		mulsu_	R21, R16		; (signed)ah * bl
		sbc		R5, R6
		add		R3, R0
		adc		R4, R1
		adc		R5, R6
		mulsu_	R17, R20		; (signed)bh * al
		sbc		R5, R6
		add		R3, R0
		adc		R4, R1
		adc		R5, R6
		pop		R6
		ret

_muls_24x8_32

 
;***************************************************
;* Mutiply 24x8 -> 32 bit
;*       AH  AM  AL   BL     E3 E2 E1 E0
;*      R22:R21:R20 x R16 -> R5:R4:R3:R2
;*
;*  19 cycles + 4 (RET) = 23 Cycles
;*
muls24x8_32:
		push	R10
		clr		R10
		mulsu_	R16,R20			; AL x BL -> BL x AL
		movw	R3:R2,R1:R0		; r1:r0 -> E1:E0
		muls_	R22,R16			; AH x BL
		movw	R5:R4,R1:R0		; r1:r0 -> E3:E2
		mul_	R21,R16			; AM x BL
		add		R3,R0			; R0 -> E1
		adc		R4,R1			; R1 -> E2
		adc		R5,R10			; T0 -> E3
		pop		R10  
		ret

_muls_24x16_40

 
;***************************************************
;* Mutiply 24x16 -> 64 bit
;*       AH  AM  AL   BH  BL     E4 E3 E2 E1 E0
;*      R22:R21:R20 x R17:R16 -> R6:R5:R4:R3:R2
;*
;*  108 cycles + 4 (RET) = 112 Cycles
;*
muls_24x16:
		push	R10		
		clr		R10
		mul_	R20,R16			; AL x BL
		movw	R3:R2,R1:R0		; r1:r0 -> E1:E0
		muls_	R22,R17			; AH x BH
		mov		R5,R0			; r0 -> E3
		mov		R6,R1			; r1 -> E4
		clr		R4				;clr -> E2
		
		mul_	R21,R16			; AM x BL
		add		R3,R0			; r0 -> E1
		adc		R4,R1			; r1 -> E2
		
		mulsu_	R22,R16			; AH x BL
		sbc		R6,R10			; signed DUMMY T4
		add		R4,R0			; r0 -> E2
		adc		R5,R1			; r1 -> E3
		adc		R6,R10			; T4 -> E4

		mulsu_	R17,R20			; AL x BH  ->  BH x AL
		sbc		R5,R10			; signed DUMMY T3
		sbc		R6,R10			; signed DUMMY T4
		add		R3,R0			; r0 -> E1
		adc		R4,R1			; r1 -> E2
		adc		R5,R10			; T3 -> E3
		adc		R6,R10			; T4 -> E4
		
		mulsu_	R17,R21			; AM x BH  ->  BH x AM
		sbc		R6,R10			; signed DUMMY T4
		add		R4,R0			; r0 -> E2
		adc		R5,R1			; r1 -> E3
		adc		R6,R10			; T4 -> E4
		pop		R10  
		ret

_muls_24x24_48

 
;***************************************************
;* Mutiply 24x24 -> 48 bit
;*       AH  AM  AL   BH  BM  BL     E5 E4 E3 E2 E1 E0
;*      R22:R21:R20 x R18:R17:R16 -> R7:R6:R5:R4:R3:R2
;*
;*  108 cycles + 4 (RET) = 112 Cycles
;*
muls_24x24:
		push	R8
		push	R9
		push	R10
		clr		R10
		mul_	R20,R16			; AL x BL
		movw	R3:R2,R1:R0		; r1:r0 -> E1:E0
		mul_	R21,R17			; AL x BL
		movw	R5:R4,R1:R0		; r1:r0 -> E3:E2
		muls_	R22,R17			; AH x BH
		movw	R7:R6,R1:R0		; r1:r0 -> E5:E4
		
		mulsu_	R22,R17			; AH x BM
		sbc		R7,R10			; signed DUMMY T2
		movw	R9:R8,R1:R0		; r1:r0 -> T1:T0
		mul_	R21,R16			; AM x BL
		add		R3,R0			; R0 -> E1
		adc		R4,R1			; R1 -> E2
		adc		R5,R8			; T0 -> E3
		adc		R6,R9			; T1 -> E4
		adc		R7,R10			; T2 -> E5
		
		mulsu_	R18,R21			; AM x BH  ->  BH x AM
		sbc		R7,R10			; signed DUMMY T2
		movw	R9:R8,R1:R0		; r1:r0 -> T1:T0
		mul_	R20,R17			; AL x BM
		add		R3,R0			; R0 -> E1
		adc		R4,R1			; R1 -> E2
		adc		R5,R8			; T0 -> E3
		adc		R6,R9			; T1 -> E4
		adc		R7,R10			; T2 -> E5

		mulsu_	R22,R16			; AH x BL
		sbc		R6,R10			; signed DUMMY T2
		sbc		R7,R10			; signed DUMMY T2
		adc		R4,R0			; R0 -> E2
		adc		R5,R1			; R1 -> E3
		adc		R6,R10			; T2 -> E4
		adc		R7,R10			; T2 -> E5

		mulsu_	R18,R20			; AL x BH -> BH x AL
		sbc		R6,R10			; signed DUMMY T2
		sbc		R7,R10			; signed DUMMY T2
		adc		R4,R0			; R0 -> E2
		adc		R5,R1			; R1 -> E3
		adc		R6,R10			; T2 -> E4
		adc		R7,R10			; T2 -> E5
		pop		R10
		pop		R9
		pop		R8  
		ret

_muls_32x8_40

 
;***************************************************
;* Mutiply 32x8 -> 40 bit
;*       AH  AM  AL AXL   BL     E4 E3 E2 E1 E0
;*      R23:R22:R21:R20 x R16 -> R6:R5:R4:R3:R2
;*
;*  19 cycles + 4 (RET) = 23 Cycles
;*
muls_32x8:
		push	R10
		clr		R10
		clr		R6
		clr		r5
		clr		r4
		mulsu_	R16,R20			; BL x AXL
		movw	R3:R2,R1:R0		; r1:r0 -> E1:E0
		mulsu_	R16,R21			; BL x AL	
		sbc		R5,R10			; signed DUMMY T0
		sbc		R6,R10			; signed DUMMY T0
		add		R3,R0			; R0 -> E1
		adc		R4,R1			; R1 -> E2
		adc		R5,R10			; T0 -> E3
		adc		R6,R10			; T0 -> E4
		mulsu_	R16,R22			; BL x AM
		sbc		R6,R10			; signed DUMMY T0
		add		R4,R0			; R0 -> E2
		adc		R5,R1			; R1 -> E3
		adc		R6,R10			; T0 -> E4
		muls_	R16,R23			; BL x AH
		add		R5,R0			; R0 -> E3
		adc		R6,R1			; R1 -> E4
		pop		R10
		ret

_muls_32x16_48

 
;***************************************************
;* Mutiply 32x16 -> 48 bit
;*  AH   AM  AL AXL   BH  BL     E5 E4 E3 E2 E1 E0
;*  R23:R22:R21:R20 x R17:R16 -> R7:R6:R5:R4:R3:R2
;*
;*  56 cycles + 4 (RET) = 60 Cycles
;*
muls_32x16:
		push	R8
		push	R9
		push	R10	
		clr		R10
		mul_	R20,R16			; AXL x BL
		movw	R3:R2,R1:R0		; r1:r0 -> E1:E0
		mul_	R22,R16			; AM x BL
		movw	R5:R4,R1:R0		; r1:r0 -> E3:E2
		muls_	R23,R17			; AH x BH
		movw	R7:R6,R1:R0		; r1:r0 -> E5:E4

		mulsu_	R23,R16			; AH x BL
		movw	R9:R8,R1:R0		; r1:r0 -> T4:T3
		sbc		R7,R10			; signed DUMMY T5	
		mul_	R21,R16			; AL x BL
		add		R3,R0			; r0 ----> + E1
		adc		R4,R1			; r1 +c -> + E2
		adc		R5,R8			; T3 +c -> + E3
		adc		R6,R9			; T4 +c -> + E4
		adc		R7,R10			; T5 +c -> + E5

		mulsu_	R17,R20			; AXL x BH -> BH x AXL
		sbc		R5,R10			; signed DUMMY T5
		sbc		R6,R10			; signed DUMMY T5
		sbc		R7,R10			; signed DUMMY T5
		add		R3,R0			; r0 ----> + E1
		adc		R4,R1			; r1 +c -> + E2
		adc		R5,R10			; T5 +c -> + E3
		adc		R6,R10			; T5 +c -> + E4
		adc		R7,R10			; T5 +c -> + E5
		
		mulsu_	R17,R21			; AL x BH -> BH x AL
		sbc		R6,R10			; signed DUMMY T5
		sbc		R7,R10			; signed DUMMY T5
		add		R4,R0			; r0 +c -> + E2
		adc		R5,R1			; r1 +c -> + E3
		adc		R6,R10			; T5 +c -> + E4
		adc		R7,R10			; T5 +c -> + E5
		
		mulsu_	R17,R22			; BH x AM
		sbc		R7,R10			; signed DUMMY T5
		add		R5,R0			; r0 +c -> + E3
		adc		R6,R1			; r1 +c -> + E4
		adc		R7,R10			; T5 +c -> + E5
		pop		R10
		pop		R9
		pop		R8	
		ret

_muls_32x24_56

_muls_32x32_64

 
;***************************************************
;* Mutiply 32x32 -> 64 bit
;*  AH   AM  AL AXL   BH  BM  BL  BXL    E7 E6 E5 E4 E3 E2 E1 E0
;*  R23:R22:R21:R20 x R19:R18:R17:R16 -> R9:R8:R7:R6:R5:R4:R3:R2
;*
;*  108 cycles + 4 (RET) = 112 Cycles
;*
muls_32x32:
		push	R10
		push	R11
		push	r12
		push	r13
		push	r14
		clr		R14
		mul_	R20,R16			; AXL x BXL
		movw	R3:R2,R1:R0		; r1:r0 -> E1:E0
		mul_	R21,R17			; AL x BL
		movw	R5:R4,R1:R0		; r1:r0 -> E3:E2
		mul_	R22,R18			; AM x BM
		movw	R7:R6,R1:R0		; r1:r0 -> E5:E4
		muls_	R23,R19			; AH x BH
		movw	R9:R8,R1:R0		; r1:r0 -> E7:E6

		mulsu_	R23,R18			; AH x BM
		movw	R13:R12,R1:R0	; r1:r0 -> T5:T4
		sbc		R9,R14			; signed DUMMY T6
		mul_	R22,R17			; AM x BL
		movw	R11:R10,R1:R0	; r1:r0 -> T3:T2
		mul_	R21,R16			; AL x BXL
		add		R3,R0			; r0 ----> + E1
		adc		R4,R1			; r1 +c -> + E2
		adc		R5,R10			; T2 +c -> + E3
		adc		R6,R11			; T3 +c -> + E4
		adc		R7,R12			; T4 +c -> + E5
		adc		R8,R13			; T5 +c -> + E6
		adc		R9,R14			; T6 +c -> + E7

		mulsu_	R19,R22			; AM x BH -> BH x AM
		movw	R13:R12,R1:R0	; r1:r0 -> T5:T4
		sbc		R9,R14			; signed DUMMY T6
		mul_	R21,R18			; AL x BM
		movw	R11:R10,R1:R0	; r1:r0 -> T3:T2
		mul_	R20,R17			; AXL x BL
		add		R3,R0			; r0 ----> + E1
		adc		R4,R1			; r1 +c -> + E2
		adc		R5,R10			; T2 +c -> + E3
		adc		R6,R11			; T3 +c -> + E4
		adc		R7,R12			; T4 +c -> + E5
		adc		R8,R13			; T5 +c -> + E6
		adc		R9,R14			; T6 +c -> + E7

		mulsu_	R23,R17			; AH x BL
		movw	R11:R10,R1:R0	; r1:r0 -> T3:T2
		sbc		R9,R14			; signed DUMMY T6
		sbc		R8,R14			; signed DUMMY T6
		mul_	R20,R18			; AXL x BM
		add		R4,R0			; r0 +c -> + E2
		adc		R5,R1			; r1 +c -> + E3
		adc		R6,R10			; T3 +c -> + E4
		adc		R7,R11			; T4 +c -> + E5
		adc		R8,R14			; T5 +c -> + E6
		adc		R9,R14			; T6 +c -> + E7

		mulsu_	R19,R21			; AL x BH -> BH x AL
		movw	R11:R10,R1:R0	; r1:r0 -> T3:T2
		sbc		R9,R14			; signed DUMMY T6
		sbc		R8,R14			; signed DUMMY T6
		mul_	R22,R16			; AM x BXL
		add		R4,R0			; r0 +c -> + E2
		adc		R5,R1			; r1 +c -> + E3
		adc		R6,R10			; T3 +c -> + E4
		adc		R7,R11			; T4 +c -> + E5
		adc		R8,R14			; T5 +c -> + E6
		adc		R9,R14			; T6 +c -> + E7

		mulsu_	R19,R20			; AXL x BH -> BH x AXL
		sbc		R9,R14			; signed DUMMY T6
		sbc		R8,R14			; signed DUMMY T6
		sbc		R7,R14			; signed DUMMY T6
		add		R5,R0			; r0 +c -> + E3
		adc		R6,R1			; r1 +c -> + E4
		adc		R7,R14			; T6 +c -> + E5
		adc		R8,R14			; T6 +c -> + E6
		adc		R9,R14			; T6 +c -> + E7

		mulsu_	R23,R16			; AH x BXL
		sbc		R9,R14			; signed DUMMY T6
		sbc		R8,R14			; signed DUMMY T6
		sbc		R7,R14			; signed DUMMY T6
		add		R5,R0			; r0 +c -> + E3
		adc		R6,R1			; r1 +c -> + E4
		adc		R7,R14			; T6 +c -> + E5
		adc		R8,R14			; T6 +c -> + E6
		adc		R9,R14			; T6 +c -> + E7
		pop		R14
		pop		R13
		pop		R12
		pop		R11
		pop		R10  
		ret

Hardware Version

Die Cycle Time der einzelnen Funktionen sind in der Tabelle zusammengefasst.

HW 16x8 16x16 24x8 24x16 24x24 32x8 32x16 32x24 32x32
mul 14 26 21 40 57 29 49 75 102
mulsu 20 27 21 41 60 29 54 78 106
muls 19 28 23 42 63 38 60 86 112

unsigned x unsigned

mul_16x8_24

;******************************************************************************
;*  FUNCTION	mul16x8_24.inc
;*  Mutiply 	16x8 -> 24 bit 		(unsigned * unsigned)
;*
;* 		AH  AL    BL     E2 E1 E0
;*  	R21:R20 x R16 -> R4:R3:R2
;*	
;*	Scrached Register:	-
;*  10 cycles + 4 (RET) = 14 Cycles
;*
;******************************************************************************
mul_16x8:
;   R5:R2 = R17:R16 * R21:R20

		mul 	R20,R16					; AL * BL
		movw 	R3:R2,R1:R0
		clr		R4
		mul 	R21,R16					; AH * BL
		add		R3,R0
		adc		R4,R1
		ret

mul_16x16_32

;******************************************************************************
;*  FUNCTION	mul16x16_32.inc
;*  Mutiply 	16x16 -> 32 bit 		(unsigned * unsigned)
;*
;* 		AH  AL    BH  BL     E3 E2 E1 E0
;*  	R21:R20 x R17:R16 -> R5:R4:R3:R2
;*	
;*	Scrached Register:	-
;*  22 cycles + 4 (RET) = 26 Cycles
;*
;******************************************************************************
mul_16x16:
		push	R9
;   R5:R2 = R17:R16 * R21:R20
        sub		R9,R9            
		mul 	R20,R16					; AL * BL
		movw 	R3:R2,R1:R0
		mul 	R21,R17					; AH * BH
		movw 	R5:R4,R1:R0
		
		mul 	R21,R16					; AH * BL
		add		R3,R0
		adc		R4,R1
		adc		R5,R9
		
		mul		R20,R17					; AL * BH
		add		R3,R0
		adc		R4,R1
		adc		R5,R9
		pop		R9
		ret

mul_24x8_32

;******************************************************************************
;*  FUNCTION	mul24x8_32.inc
;*  Mutiply 	24x8 -> 32 bit 		(unsigned * unsigned)
;*
;* 		AH  AM  AL    BL     E3 E2 E1 E0
;*  	R22:R21:R20 x R16 -> R5:R4:R3:R2
;*	
;*	Scrached Register:	-
;*  17 cycles + 4 (RET) = 21 Cycles
;*
;******************************************************************************
mul_24x8:
		push	R9
;   R5:R2 = R16 * R22:R20
		sub		R9,R9				; NULL
		mul 	R20,R16				; AL*BL
		movw 	R3:R2,R1:R0
		mul 	R22,R16				; AH*BL
		movw 	R5:R4,R1:R0

		mul		R21,R16				; AM*BL
		add		R3,R0
		adc		R4,R1
		adc		R5,R9
		pop		R9		
		ret

mul_24x16_40

;******************************************************************************
;*  FUNCTION	mul24x16_40.inc
;*  Mutiply 	24x16 -> 40 bit 		(unsigned * unsigned)
;*
;* 		AH  AM  AL    BH  BL     E4 E3 E2 E1 E0
;*  	R22:R21:R20 x R17:R16 -> R6:R5:R4:R3:R2
;*	
;*	Scrached Register:	-
;*  36 cycles + 4 (RET) = 40 Cycles
;*
;******************************************************************************
mul_24x16:
		push	R9
;   R6:R2 = R17:R16 * R22:R20

		sub		R4,R4				; NULL
		sub		R9,R9
		mul 	R20,R16				; AL*BL
		movw 	R3:R2,R1:R0
		mul 	R22,R17				; AH*BH
		mov 	R5,R0
		mov 	R6,R1

		mul		R21,R16				; AM*BL
		add		R3,R0
		adc		R4,R1
		adc		R5,R9
		adc		R6,R9
		
		mul		R22,R16				; AH*BL
		add		R4,R0
		adc		R5,R1
		adc		R6,R9

		mul		R20,R17				; AL*BH
		add		R3,R0
		adc		R4,R1
		adc		R5,R9
		adc		R6,R9

		mul		R21,R17				; AM*BH
		add		R4,R0
		adc		R5,R1
		adc		R6,R9
		pop		R9		
		ret

mul_24x24_48

;******************************************************************************
;*  FUNCTION	mul24x24_48.inc
;*  Mutiply 	24x24 -> 48 bit 		(unsigned * unsigned)
;*
;* 		AH  AM  AL    BH  BM  BL     E5 E4 E3 E2 E1 E0
;*  	R22:R21:R20 x R18:R17:R16 -> R7:R6:R5:R4:R3:R2
;*	
;*	Scrached Register:	-
;*  53 cycles + 4 (RET) = 57 Cycles
;*
;******************************************************************************
mul_24x24:
		push	R9
		push	R10
		push	R11
		
;   R7:R2 = R18:R16 * R22:R20

		sub		R9,R9				; NULL
		mul 	R20,R16				; AL*BL
		movw 	R3:R2,R1:R0
		mul 	R22,R16				; AH*BL
		movw 	R5:R4,R1:R0
		mul 	R22,R18				; AH*BH
		movw 	R7:R6,R1:R0

		mul		R21,R16				; AM*BL
		movw	R11:R10,R1:R0
		mul		R22,R17				; AH*BM		
		add		R3,R10
		adc		R4,R11
		adc		R5,R0
		adc		R6,R1
		adc		R7,R9

		mul		R20,R17				; AL*BM
		movw	R11:R10,R1:R0
		mul		R21,R18				; AM*BH
		add		R3,R10
		adc		R4,R11
		adc		R5,R0
		adc		R6,R1
		adc		R7,R9
		
		mul		R21,R17				; AM*BM	
		add		R4,R0
		adc		R5,R1
		adc		R6,R9
		adc		R7,R9

		mul		R20,R18				; AL*BH
		add		R4,R0
		adc		R5,R1
		adc		R6,R9
		adc		R7,R9
				
		pop		R11
		pop		R10
		pop		R9
		ret

mul_32x8_40

;******************************************************************************
;*  FUNCTION	mul32x8_40.inc
;*  Mutiply 	32x8 -> 40 bit 		(unsigned * unsigned)
;*
;* 		AH  AM  AL  AXL   BL     E4 E3 E2 E1 E0
;*  	R23:R22:R21:R20 x R16 -> R6:R5:R4:R3:R2
;*	
;*	Scrached Register:	-
;*  25 cycles + 4 (RET) = 29 Cycles
;*
;******************************************************************************
mul_32x8:
		push	R9
;   R6:R2 = R16 * R23:R20

		sub		R4,R4				; NULL
        sub		R9,R9            
		mul 	R20,R16				; AXL*BL
		movw 	R3:R2,R1:R0 
		mul 	R23,R16				; AH*BL
		mov 	R5,R0
		mov 	R6,R1

		mul 	R21,R16				; AL*BL
		add		R3,R0
		adc		R4,R1
		adc		R5,R9
		adc		R6,R9

		mul 	R22,R16				; AM*BL
		add		R4,R0
		adc		R5,R1
		adc		R6,R9
		pop		R9
		ret

mul_32x16_48

;******************************************************************************
;*  FUNCTION	mul32x16_48.inc
;*  Mutiply 	32x16 -> 48 bit 		(unsigned * unsigned)
;*
;* 		AH  AM  AL  AXL   BH  BL     E5 E4 E3 E2 E1 E0
;*  	R23:R22:R21:R20 x R17:R16 -> R7:R6:R5:R4:R3:R2
;*	
;*	Scrached Register:	-
;*  45 cycles + 4 (RET) = 49 Cycles
;*
;******************************************************************************
mul_32x16:
		push	R9
;   R7:R2 = R17:R16 * R23:R20
		sub		R9,R9				; NULL
		mul 	R20,R16				; AXL*BL
		movw 	R3:R2,R1:R0
		mul 	R22,R16				; AM*BL
		movw 	R5:R4,R1:R0
		mul 	R23,R17				; AH*BH
		movw 	R7:R6,R1:R0

		mul		R21,R16				; AL*BL
		add		R3,R0
		adc		R4,R1
		adc		R5,R9
		adc		R6,R9
		adc		R7,R9
		
		mul		R23,R16				; AH*BL
		add		R5,R0
		adc		R6,R1
		adc		R7,R9
		
		mul		R20,R17				; AXL*BH
		add		R3,R0
		adc		R4,R1
		adc		R5,R9
		adc		R6,R9
		adc		R7,R9
		
		mul		R21,R17				; AL*BH
		add		R4,R0
		adc		R5,R1
		adc		R6,R9
		adc		R7,R9
		
		mul		R22,R17				; AM*BH
		add		R5,R0
		adc		R6,R1
		adc		R7,R9
		pop		R9
		ret

mul_32x24_56

;******************************************************************************
;*  FUNCTION	mul32x24_56.inc
;*  Mutiply 	32x24 -> 56 bit 		(unsigned * unsigned)
;*
;* 		AH  AM  AL  AXL   BH  BM  BL     E6 E5 E4 E3 E2 E1 E0
;*  	R23:R22:R21:R20 x R18:R17:R16 -> R8:R7:R6:R5:R4:R3:R2
;*	
;*	Scrached Register:	-
;*  71 cycles + 4 (RET) = 75 Cycles
;*
;******************************************************************************
mul_32x24:
		push	R9
		push	R10
		push	R11
		
;   R8:R2 = R18:R16 * R23:R20

		sub		R6,R6				; NULL
		sub		R9,R9				; NULL
		mul 	R20,R16				; AXL*BL
		movw 	R3:R2,R1:R0
		mul 	R22,R16				; AM*BL
		movw 	R5:R4,R1:R0
		mul 	R23,R18				; AH*BH
		mov 	R7,R0
		mov 	R8,R1

		mul		R21,R16				; AL*BL
		movw	R11:R10,R1:R0
		mul		R23,R16				; AH*BL		
		add		R3,R10
		adc		R4,R11
		adc		R5,R0
		adc		R6,R1
		adc		R7,R9
		adc		R8,R9

		mul		R20,R17				; AXL*BM
		movw	R11:R10,R1:R0
		mul		R22,R17				; AM*BM
		add		R3,R10
		adc		R4,R11
		adc		R5,R0
		adc		R6,R1
		adc		R7,R9
		adc		R8,R9
		
		mul		R21,R17				; AL*BM
		movw	R11:R10,R1:R0
		mul		R23,R17				; AH*BM		
		add		R4,R10
		adc		R5,R11
		adc		R6,R0
		adc		R7,R1
		adc		R8,R9

		mul		R20,R18				; AXL*BH
		movw	R11:R10,R1:R0
		mul		R22,R18				; AM*BH
		add		R4,R10
		adc		R5,R11
		adc		R6,R0
		adc		R7,R1
		adc		R8,R9
		
		mul		R21,R18				; AL*BH
		add		R5,R0
		adc		R6,R1
		adc		R7,R9
		adc		R8,R9
				
		pop		R11
		pop		R10
		pop		R9
		ret

mul_32x32_64

;******************************************************************************
;*  FUNCTION	mul32x32_64.inc
;*  Mutiply 	32x32 -> 64 bit 		(unsigned * unsigned)
;*
;* 		AH  AM  AL  AXL   BH  BM  BL  BXL    E7 E6 E5 E4 E3 E2 E1 E0
;*  	R23:R22:R21:R20 x R19:R18:R17:R16 -> R9:R8:R7:R6:R5:R4:R3:R2
;*	
;*	Scrached Register:	-
;*  98 cycles + 4 (RET) = 102 Cycles
;*
;******************************************************************************
mul_32x32:
		push	R10
		push	R11
		push	R12
		push	R13
		push	R14
		clr		R14

;   R9:R2 = R19:R16 * R23:R20

		sub R14,R14                  ; Null
                      
		mul 	R20,R16                ; AXL*BXL
		movw 	R3:R2,R1:R0
		mul 	R21,R17                ; AL*BL
		movw 	R5:R4,R1:R0
		mul 	R22,R18                ; AM*BM
		movw 	R7:R6,R1:R0  
		mul 	R23,R19                ; AH*BH
		movw 	R9:R8,R1:R0

		mul 	R22,R19                ; AH*BM
		movw 	R13:R12,R1:R0
		mul 	R21,R18                ; AM*BL
		movw 	R11:R10,R1:R0
		mul 	R20,R17                ; AL*BXL

		add 	R3, R0
		adc 	R4, R1
		adc 	R5, R10
		adc 	R6, R11
		adc 	R7, R12
		adc 	R8,R13
		adc 	R9,R14

		mul 	R23,R18                ; AM*BH
		movw 	R13:R12,R1:R0
		mul 	R22,R17                ; AL*BM
		movw 	R11:R10,R1:R0
		mul 	R21,R16                ; AXL*BL

		add 	R3, R0
		adc 	R4, R1
		adc 	R5, R10
		adc 	R6, R11
		adc 	R7, R12
		adc 	R8,R13
		adc 	R9,R14

		mul 	R21,R19                ; AH*BL
		movw 	R11:R10,R1:R0
		mul 	R20,R18                ; AM*BXL

		add 	R4, R0
		adc 	R5, R1
		adc 	R6, R10
		adc 	R7, R11
		adc 	R8,R14
		adc 	R9,R14

		mul 	R23,R17                ; AL*BH
		movw 	R11:R10,R1:R0
		mul 	R22,R16                ; AXL*BM

		add 	R4, R0
		adc 	R5, R1
		adc 	R6, R10
		adc 	R7, R11
		adc 	R8,R14
		adc 	R9,R14

		mul 	R20,R19                ; AH*BXL
		movw 	R11:R10,R1:R0
		mul 	R23,R16                ; AXL*BH

		add 	R5, R0
		adc 	R6, R1
		adc 	R7, R14
		adc 	R8,R14
		adc 	R9,R14

		add 	R5, R10
		adc 	R6, R11
		adc 	R7, R14
		adc 	R8,R14
		adc 	R9,R14
		pop		R14
		pop		R13
		pop		R12
		pop		R11
		pop		R10  
		ret

signed x unsigned

mulsu_16x8_24

;******************************************************************************
;*  FUNCTION	mulsu16x8_24.inc
;*  Mutiply 	16x8 -> 24 bit 		(signed * unsigned)
;*
;* 		AH  AL    BL     E2 E1 E0
;*  	R21:R20 x R16 -> R4:R3:R2
;*	
;*	Scrached Register:	-
;*  16 cycles + 4 (RET) = 20 Cycles
;*
;******************************************************************************
mulsu_16x8:
		push	R10
		sub		R10,R10			; NULL
		clr		R4				
		mul		R20,R16			; AL x BL		(u*u)		
		movw	R3:R2,R1:R0
		adc		R4,R10
		mulsu	R21,R16			; AH x BL		(s*u)
		sbc		R4,R10		
		add		R3,R0
		adc		R4,R1
		pop		R10
		ret

mulsu_16x16_32

;******************************************************************************
;*  FUNCTION	mulsu16x16_32.inc
;*  Mutiply 	16x16 -> 32 bit 		(signed * unsigned)
;*
;* 		AH  AL    BH  BL     E3 E2 E1 E0
;*  	R21:R20 x R17:R16 -> R5:R4:R3:R2
;*	
;*	Scrached Register:	-
;*  23 cycles + 4 (RET) = 27 Cycles
;*
;******************************************************************************
mulsu_16x16:
		push	R10
		sub		R10,R10			; NULL
		
		mul		R20,R16			; AL x BL		(u*u)
		movw	R3:R2,R1:R0
		mulsu	R21,R17			; AH x BH		(s*u)
		movw	R5:R4,R1:R0
		
		mulsu	R21,R16			; AH x BL		(s*u)
		sbc		R5,R10
		add		R3,R0
		adc		R4,R1
		adc		R5,R10
		
		mul		R20,R17			; AL x BH		(u*u)
		add		R3,R0
		adc		R4,R1
		adc		R5,R10
		pop		R10
		ret

mulsu_24x8_32

;***********************************************************************************
;*
;*		FUNCTION	mulsu24x8_32.inc
;*		Mutiply 	24x8 -> 32 bit			(signed * unsigned)
;*
;*      AH  AM  AL    BL     E3 E2 E1 E0
;*      R22:R21:R20 x R16 -> R5:R4:R3:R2
;*
;*  17 cycles + 4 (RET) = 21 Cycles
;*
;***********************************************************************************
mulsu_24x8:
		push	R10
		sub		R10,R10		
		mul		R20,R16			; AL x BL
		movw	R3:R2,R1:R0
		mulsu	R22,R16			; AH x BL
		movw	R5:R4,R1:R0	
		mul		R21,R16			; AM x BL				
		add		R3,R0
		adc		R4,R1
		adc		R5,R10
		pop		R10
		ret

mulsu_24x16_40

;***********************************************************************************
;*
;*		FUNCTION	mulsu24x16_40.inc
;*		Mutiply 	24x16 -> 40 bit			(signed * unsigned)
;*
;*      AH  AM  AL    BH  BL     E4 E3 E2 E1 E0
;*      R22:R21:R20 x R17:R16 -> R6:R5:R4:R3:R2
;*
;*  37 cycles + 4 (RET) = 41 Cycles
;*
;***********************************************************************************
mulsu_24x16:
		push	R10
		clr		R4
		sub		R10,R10			; NULL
		
		mul		R20,R16			; AL x BL		(u*u)
		movw	R3:R2,R1:R0		
		mulsu	R22,R17			; AH x BH		(s*u)
		mov		R5,R0
		mov		R6,R1
		
		mul		R21,R16			; AM x BL		(u*u)
		add		R3,R0
		adc		R4,R1
		adc		R5,R10
		adc		R6,R10
		
		mulsu	R22,R16			; AH x BL		(s*u)
		sbc		R6,R10
		add		R4,R0
		adc		R5,R1
		adc		R6,R10

		mul		R20,R17			; AL x BH		(u*u)
		add		R3,R0
		adc		R4,R1
		adc		R5,R10
		adc		R6,R10
		
		mul		R21,R17			; AM x BH		(u*u)
		add		R4,R0
		adc		R5,R1
		adc		R6,R10
		
		pop		R10
		ret

mulsu_24x24_48

;***********************************************************************************
;*
;*		FUNCTION	mulsu24x24_48.inc
;*		Mutiply 	24x24 -> 48 bit			(signed * unsigned)
;*
;*      AH  AM  AL    BH  BM  BL     E5 E4 E3 E2 E1 E0
;*      R22:R21:R20 x R18:R17:R16 -> R7:R6:R5:R4:R3:R2
;*
;*  56 cycles + 4 (RET) = 60 Cycles
;*
;***********************************************************************************
mulsu_24x24:
		push	R8
		push	R9
		push	R10
		sub		R10,R10			; NULL
		mul		R20,R16			; AL x BL		(u*u)
		movw	R3:R2,R1:R0
		mul		R21,R17			; AM x BM		(u*u)
		movw	R5:R4,R1:R0
		mulsu	R22,R17			; AH x BH		(s*u)
		movw	R7:R6,R1:R0
		
		mulsu	R22,R17			; AH x BM		(s*u)
		sbc		R7,R10			; signed DUMMY
		movw	R9:R8,R1:R0
		mul		R21,R16			; AM x BL		(u*u)
		add		R3,R0
		adc		R4,R1
		adc		R5,R8
		adc		R6,R9
		adc		R7,R10
		
		mul		R21,R18			; AM x BH		(u*u)
		movw	R9:R8,R1:R0
		mul		R20,R17			; AL x BM		(u*u)
		add		R3,R0
		adc		R4,R1
		adc		R5,R8
		adc		R6,R9
		adc		R7,R10

		mulsu	R22,R16			; AH x BL		(s*u)
		sbc		R6,R10			; signed DUMMY
		sbc		R7,R10			; signed DUMMY
		add		R4,R0
		adc		R5,R1
		adc		R6,R10
		adc		R7,R10

		mul		R20,R18			; AL x BH		(u*u)
		add		R4,R0
		adc		R5,R1
		adc		R6,R10
		adc		R7,R10
		pop		R10
		pop		R9
		pop		R8  
		ret

mulsu_32x8_40

;***********************************************************************************
;*
;*		FUNCTION	mulsu32x8_40.inc
;*		Mutiply 	32x8 -> 40 bit			(signed * unsigned)
;*
;*      AH  AM  AL  AXL   BL     E4 E3 E2 E1 E0
;*      R23:R22:R21:R20 x R16 -> R6:R5:R4:R3:R2
;*
;*  25 cycles + 4 (RET) = 29 Cycles
;*
;***********************************************************************************
mulsu_32x8:
		push	R10
		clr		R10
		clr		r4
		mul		R20,R16			; AXL x BL
		movw	R3:R2,R1:R0		; r1:r0 -> E1:E0
		mulsu	R23,R16			; AH x BL
		mov		R5,R0			; r0 -> E0
		mov		R6,R1			; r1 -> E0
		mul		R21,R16			; AL x BL	
		add		R3,R0			; R0 -> E1
		adc		R4,R1			; R1 -> E2
		adc		R5,R10			; T0 -> E3
		adc		R6,R10			; T0 -> E4
		mul		R22,R16			; AM x BL
		add		R4,R0			; R0 -> E2
		adc		R5,R1			; R1 -> E3
		adc		R6,R10			; T0 -> E4
		pop		R10
		ret

mulsu_32x16_48

;***********************************************************************************
;*
;*		FUNCTION	mulsu32x16_48.inc
;*		Mutiply 	32x16 -> 48 bit			(signed * unsigned)
;*
;*      AH  AM  AL  AXL   BH  BL     E5 E4 E3 E2 E1 E0
;*      R23:R22:R21:R20 x R17:R16 -> R7:R6:R5:R4:R3:R2
;*
;*  50 cycles + 4 (RET) = 54 Cycles
;*
;***********************************************************************************
mulsu_32x16:
		push	R8
		push	R9
		push	R10	
		clr		R10
		mul		R20,R16			; AXL x BL
		movw	R3:R2,R1:R0		; r1:r0 -> E1:E0
		mul		R22,R16			; AM x BL
		movw	R5:R4,R1:R0		; r1:r0 -> E3:E2
		mulsu	R23,R17			; AH x BH
		movw	R7:R6,R1:R0		; r1:r0 -> E5:E4

		mulsu	R23,R16			; AH x BL
		movw	R9:R8,R1:R0		; r1:r0 -> T4:T3
		sbc		R7,R10			; signed DUMMY T5	
		mul		R21,R16			; AL x BL
		add		R3,R0			; r0 ----> + E1
		adc		R4,R1			; r1 +c -> + E2
		adc		R5,R8			; T3 +c -> + E3
		adc		R6,R9			; T4 +c -> + E4
		adc		R7,R10			; T5 +c -> + E5

		mul		R17,R20			; AXL x BH -> BH x AXL
		add		R3,R0			; r0 ----> + E1
		adc		R4,R1			; r1 +c -> + E2
		adc		R5,R10			; T5 +c -> + E3
		adc		R6,R10			; T5 +c -> + E4
		adc		R7,R10			; T5 +c -> + E5
		
		mul		R17,R21			; AL x BH -> BH x AL
		add		R4,R0			; r0 +c -> + E2
		adc		R5,R1			; r1 +c -> + E3
		adc		R6,R10			; T5 +c -> + E4
		adc		R7,R10			; T5 +c -> + E5
		
		mul		R17,R22			; BH x AM
		add		R5,R0			; r0 +c -> + E3
		adc		R6,R1			; r1 +c -> + E4
		adc		R7,R10			; T5 +c -> + E5
		pop		R10
		pop		R9
		pop		R8	
		ret

mulsu_32x24_56

;***********************************************************************************
;*
;*		FUNCTION	mulsu32x24_56.inc
;*		Mutiply 	32x24 -> 56 bit			(signed * unsigned)
;*
;*      AH  AM  AL  AXL   BH  BM  BL     E6 E5 E4 E3 E2 E1 E0
;*      R23:R22:R21:R20 x R18:R17:R16 -> R8:R7:R6:R5:R4:R3:R2
;*
;*  74 cycles + 4 (RET) = 78 Cycles
;*
;***********************************************************************************
mulsu_32x24:
		push	R9
		push	R10
		push	R11	
		sub		R9,R9
		clr		R6
		mul		R20,R16			; AXL x BL		(u*u)
		movw	R3:R2,R1:R0
		mul		R22,R16			; AM x BL		(u*u)
		movw	R5:R4,R1:R0
		mulsu	R23,R18			; AH x BH		(s*u)
		mov		R7,R0
		mov		R8,R1			

		mul		R21,R16			; AL x BL		(u*u)
		movw	R11:R10,R1:R0
		mulsu	R23,R16			; AH x BL		(s*u)
		sbc		R7,R9			; signed DUMMY
		sbc		R8,R9			; signed DUMMY
		add		R3,R10
		adc		R4,R11
		adc		R5,R0
		adc		R6,R1
		adc		R7,R9
		adc		R8,R9
		
		mul		R20,R17			; AXL x BM		(u*u)
		movw	R11:R10,R1:R0
		mul		R21,R18			; AL x BH		(u*u)
		add		R3,R10
		adc		R4,R11
		adc		R5,R0
		adc		R6,R1
		adc		R7,R9
		adc		R8,R9

		mul		R21,R17			; AL x BM		(u*u)
		movw	R11:R10,R1:R0
		mulsu	R23,R17			; AH x BM		(s*u)
		sbc		R8,R9			; signed DUMMY
		add		R4,R10
		adc		R5,R11
		adc		R6,R0
		adc		R7,R1
		adc		R8,R9
		
		mul		R22,R17			; AM*BM			(u*u)
		add		R5,R0
		adc		R6,R1
		adc		R7,R9
		adc		R8,R9
		
		mul		R20,R18			; AXL x BH		(u*u)
		movw	R11:R10,R1:R0
		mul		R22,R18			; AM x BH		(u*u)
		add		R4,R10
		adc		R5,R11
		adc		R6,R0
		adc		R7,R1
		adc		R8,R9
		
		pop		R11
		pop		R10
		pop		R9	
		ret

mulsu_32x32_64

;***********************************************************************************
;*
;*		FUNCTION	mulsu32x32_64.inc
;*		Mutiply 	32x32 -> 64 bit			(signed * unsigned)
;*
;*      AH  AM  AL  AXL   BH  BM  BL  BXL    E7 E6 E5 E4 E3 E2 E1 E0
;*      R23:R22:R21:R20 x R19:R18:R17:R16 -> R9:R8:R7:R6:R5:R4:R3:R2
;*
;*  102 cycles + 4 (RET) = 106 Cycles
;*
;***********************************************************************************
mulsu_32x32:
		push	R10
		push	R11
		push	R12
		push	R13
		push	R14
		sub		R14,R14			; NULL
		
		mul		R20,R16			; AXL x BXL		(u*u)
		movw	R3:R2,R1:R0
		mul		R21,R17			; AL x BL		(u*u)
		movw	R5:R4,R1:R0
		mul		R22,R18			; AM x BM		(u*u)
		movw	R7:R6,R1:R0
		mulsu	R23,R19			; AH x BH		(s*u)
		movw	R9:R8,R1:R0		

		mul		R20,R17			; AXL x BL		(u*u)
		movw	R11:R10,R1:R0
		mul		R21,R18			; AL x BM		(u*u)
		movw	R13:R12,R1:R0
		mul		R22,R19			; AM x BH		(u*u)	
		add		R3,R10
		adc		R4,R11
		adc		R5,R12
		adc		R6,R13
		adc		R7,R0
		adc		R8,R1
		adc		R9,R14
		
		mul		R21,R16			; AL x BXL		(u*u)
		movw	R11:R10,R1:R0
		mul		R22,R17			; AM x BL		(u*u)
		movw	R13:R12,R1:R0
		mulsu	R23,R18			; AH x BM		(s*u)
		sbc		R9,R14
		add		R3,R10
		adc		R4,R11
		adc		R5,R12
		adc		R6,R13
		adc		R7,R0
		adc		R8,R1
		adc		R9,R14
				
		mul		R20,R18			; AXL x BM		(u*u)
		movw	R11:R10,R1:R0
		mul		R21,R19			; AL x BH		(u*u)
		add		R4,R10
		adc		R5,R11
		adc		R6,R0
		adc		R7,R1
		adc		R8,R14
		adc		R9,R14

		mul		R22,R16			; AM x BXL		(u*u)
		movw	R11:R10,R1:R0
		mulsu	R23,R17			; AH x BL		(s*u)
		sbc		R8,R14			; signed DUMMY
		sbc		R9,R14			; signed DUMMY
		add		R4,R10
		adc		R5,R11
		adc		R6,R0
		adc		R7,R1
		adc		R8,R14
		adc		R9,R14

		mulsu	R23,R16			; AH*BXL		(s*u)
		sbc		R7,R14
		sbc		R8,R14
		sbc		R9,R14
		add		R5,R0
		adc		R6,R1
		adc		R7,R14
		adc		R8,R14
		adc		R9,R14
		
		mul		R20,R19			; AXL x BH		(u*u)
		add		R5,R0
		adc		R6,R1
		adc		R7,R14
		adc		R8,R14
		adc		R9,R14
		
		pop		R14
		pop		R13
		pop		R12
		pop		R11
		pop		R10	
		ret

signed x signed

muls_16x8_24

;******************************************************************************
;*  FUNCTION	muls16x8_24.inc
;*  Mutiply 	16x8 -> 24 bit 		(signed * signed)
;*
;* 		AH  AL    BL     E2 E1 E0
;*  	R21:R20 x R16 -> R4:R3:R2
;*	
;*	Scrached Register:	-
;*  11 cycles + 4 (RET) = 15 Cycles
;*
;******************************************************************************
muls_16x8:
		push	R10
		sub		R10,R10			; NULL
		sub		R4,R4
		mulsu	R16,R20			; BL*AL	(s*u)
		sbc		R4,R10
		movw	R3:R2,R1:R0
		muls	R21,R16			; AH*BL	(s*s)		
		add		R3,R0
		adc		R4,R1
		pop		R10
		ret

muls_16x16_32

;******************************************************************************
;*  FUNCTION	muls16x16_32.inc
;*  Mutiply 	16x16 -> 32 bit 		(signed * signed)
;*
;* 		AH  AL    BH  BL     E3 E2 E1 E0
;*  	R21:R20 x R17:R16 -> R5:R4:R3:R2
;*	
;*	Scrached Register:	-
;*  13 cycles + 4 (RET) = 17 Cycles
;*
;******************************************************************************
muls_16x16:
		push	R10
		clr		R10
		muls	R21,R17			; (signed)ah * (signed)bh
		movw	R5:R4,R1:R0
		mul		R20,R16			; al * bl
		movw	R3:R2,R1:R0
		mulsu	R21, R16		; (signed)ah * bl
		sbc		R5, R10
		add		R3, R0
		adc		R4, R1
		adc		R5, R10
		mulsu	R17, R20		; (signed)bh * al
		sbc		R5, R10
		add		R3, R0
		adc		R4, R1
		adc		R5, R10
		pop		R10
		ret

muls_24x8_32

;******************************************************************************
;*  FUNCTION	muls24x8_32.inc
;*  Mutiply 	24x8 -> 32 bit 		(signed * signed)
;*
;*      AH  AM  AL    BL     E3 E2 E1 E0
;*      R22:R21:R20 x R16 -> R5:R4:R3:R2
;*	
;*	Scrached Register:	-
;*  19 cycles + 4 (RET) = 23 Cycles
;*
;******************************************************************************
muls_24x8:
		push	R10
		clr		R10
		mulsu	R16,R20			; AL x BL -> BL x AL
		movw	R3:R2,R1:R0		; r1:r0 -> E1:E0
		muls	R22,R16			; AH x BL
		movw	R5:R4,R1:R0		; r1:r0 -> E3:E2
		mul		R21,R16			; AM x BL
		add		R3,R0			; R0 -> E1
		adc		R4,R1			; R1 -> E2
		adc		R5,R10			; T0 -> E3
		pop		R10  
		ret

muls_24x16_40

;******************************************************************************
;*  FUNCTION	muls24x16_40.inc
;*  Mutiply 	24x16 -> 40 bit 		(signed * signed)
;*
;*      AH  AM  AL    BH  BL     E4 E3 E2 E1 E0
;*      R22:R21:R20 x R17:R16 -> R6 R5:R4:R3:R2
;*	
;*	Scrached Register:	-
;*  38 cycles + 4 (RET) = 42 Cycles
;*
;******************************************************************************
muls_24x16:
		push	R10		
		clr		R10
		mul		R20,R16			; AL x BL
		movw	R3:R2,R1:R0		; r1:r0 -> E1:E0
		muls	R22,R17			; AH x BH
		mov		R5,R0			; r0 -> E3
		mov		R6,R1			; r1 -> E4
		clr		R4				;clr -> E2
		
		mul		R21,R16			; AM x BL
		add		R3,R0			; r0 -> E1
		adc		R4,R1			; r1 -> E2
		
		mulsu	R22,R16			; AH x BL
		sbc		R6,R10			; signed DUMMY T4
		add		R4,R0			; r0 -> E2
		adc		R5,R1			; r1 -> E3
		adc		R6,R10			; T4 -> E4

		mulsu	R17,R20			; AL x BH  ->  BH x AL
		sbc		R5,R10			; signed DUMMY T3
		sbc		R6,R10			; signed DUMMY T4
		add		R3,R0			; r0 -> E1
		adc		R4,R1			; r1 -> E2
		adc		R5,R10			; T3 -> E3
		adc		R6,R10			; T4 -> E4
		
		mulsu	R17,R21			; AM x BH  ->  BH x AM
		sbc		R6,R10			; signed DUMMY T4
		add		R4,R0			; r0 -> E2
		adc		R5,R1			; r1 -> E3
		adc		R6,R10			; T4 -> E4
		pop		R10  
		ret

muls_24x24_48

;***********************************************************************************
;*
;*		FUNCTION	muls24x24_48.inc
;*		Mutiply 	24x24 -> 48 bit			(signed * signed)
;*
;*      AH  AM  AL    BH  BM  BL     E5 E4 E3 E2 E1 E0
;*      R22:R21:R20 x R18:R17:R16 -> R7:R6:R5:R4:R3:R2
;*
;*  59 cycles + 4 (RET) = 63 Cycles
;*
;***********************************************************************************
muls_24x24:
		push	R8
		push	R9
		push	R10
		clr		R10
		mul		R20,R16			; AL x BL
		movw	R3:R2,R1:R0		; r1:r0 -> E1:E0
		mul		R21,R17			; AM x BM
		movw	R5:R4,R1:R0		; r1:r0 -> E3:E2
		muls	R22,R17			; AH x BH
		movw	R7:R6,R1:R0		; r1:r0 -> E5:E4
		
		mulsu	R22,R17			; AH x BM
		sbc		R7,R10			; signed DUMMY T2
		movw	R9:R8,R1:R0		; r1:r0 -> T1:T0
		mul		R21,R16			; AM x BL
		add		R3,R0			; R0 -> E1
		adc		R4,R1			; R1 -> E2
		adc		R5,R8			; T0 -> E3
		adc		R6,R9			; T1 -> E4
		adc		R7,R10			; T2 -> E5
		
		mulsu	R18,R21			; AM x BH  ->  BH x AM
		sbc		R7,R10			; signed DUMMY T2
		movw	R9:R8,R1:R0		; r1:r0 -> T1:T0
		mul		R20,R17			; AL x BM
		add		R3,R0			; R0 -> E1
		adc		R4,R1			; R1 -> E2
		adc		R5,R8			; T0 -> E3
		adc		R6,R9			; T1 -> E4
		adc		R7,R10			; T2 -> E5

		mulsu	R22,R16			; AH x BL
		sbc		R6,R10			; signed DUMMY T2
		sbc		R7,R10			; signed DUMMY T2
		add		R4,R0			; R0 -> E2
		adc		R5,R1			; R1 -> E3
		adc		R6,R10			; T2 -> E4
		adc		R7,R10			; T2 -> E5

		mulsu	R18,R20			; AL x BH -> BH x AL
		sbc		R6,R10			; signed DUMMY T2
		sbc		R7,R10			; signed DUMMY T2
		add		R4,R0			; R0 -> E2
		adc		R5,R1			; R1 -> E3
		adc		R6,R10			; T2 -> E4
		adc		R7,R10			; T2 -> E5
		pop		R10
		pop		R9
		pop		R8  
		ret

muls_32x8_40

;***********************************************************************************
;*
;*		FUNCTION	muls32x8_40.inc
;*		Mutiply 	32x8 -> 40 bit			(signed * signed)
;*
;*      AH  AM  AL  AXL   BL     E4 E3 E2 E1 E0
;*      R23:R22:R21:R20 x R16 -> R6:R5:R4:R3:R2
;*
;*  34 cycles + 4 (RET) = 38 Cycles
;*
;***********************************************************************************
muls_32x8:
		push	R10
		clr		R4
		sub		R10,R10			; NULL

		muls	R23,R16			; AH x BL			(s*s)
		mov		R5,R0
		mov		R6,R1
		
		mulsu	R16,R20			; BL x AXL			(s*u)
		sbc		R4,R10
		sbc		R5,R10
		sbc		R6,R10
		movw	R3:R2,R1:R0
		adc		R4,R10
		adc		R5,R10
		adc		R6,R10
		
		mulsu	R16,R21			; BL x AL			(s*u)
		sbc		R5,R10			; signed DUMMY
		sbc		R6,R10			; signed DUMMY
		add		R3,R0
		adc		R4,R1
		adc		R5,R10
		adc		R6,R10
		
		mulsu	R16,R22			; BL x AM			(s*u)
		sbc		R6,R10			; signed DUMMY
		add		R4,R0
		adc		R5,R1
		adc		R6,R10
		
		pop		R10
		ret

muls_32x16_48

;***********************************************************************************
;*
;*		FUNCTION	muls32x16_48.inc
;*		Mutiply 	32x16 -> 48 bit			(signed * signed)
;*
;*      AH  AM  AL  AXL   BH  BL     E5 E4 E3 E2 E1 E0
;*      R23:R22:R21:R20 x R17:R16 -> R7:R6:R5:R4:R3:R2
;*
;*  56 cycles + 4 (RET) = 60 Cycles
;*
;***********************************************************************************
muls_32x16:
		push	R8
		push	R9
		push	R10	
		clr		R10
		mul		R20,R16			; AXL x BL
		movw	R3:R2,R1:R0		; r1:r0 -> E1:E0
		mul		R22,R16			; AM x BL
		movw	R5:R4,R1:R0		; r1:r0 -> E3:E2
		muls	R23,R17			; AH x BH
		movw	R7:R6,R1:R0		; r1:r0 -> E5:E4

		mulsu	R23,R16			; AH x BL
		movw	R9:R8,R1:R0		; r1:r0 -> T4:T3
		sbc		R7,R10			; signed DUMMY T5	
		mul		R21,R16			; AL x BL
		add		R3,R0			; r0 ----> + E1
		adc		R4,R1			; r1 +c -> + E2
		adc		R5,R8			; T3 +c -> + E3
		adc		R6,R9			; T4 +c -> + E4
		adc		R7,R10			; T5 +c -> + E5

		mulsu	R17,R20			; AXL x BH -> BH x AXL
		sbc		R5,R10			; signed DUMMY T5
		sbc		R6,R10			; signed DUMMY T5
		sbc		R7,R10			; signed DUMMY T5
		add		R3,R0			; r0 ----> + E1
		adc		R4,R1			; r1 +c -> + E2
		adc		R5,R10			; T5 +c -> + E3
		adc		R6,R10			; T5 +c -> + E4
		adc		R7,R10			; T5 +c -> + E5
		
		mulsu	R17,R21			; AL x BH -> BH x AL
		sbc		R6,R10			; signed DUMMY T5
		sbc		R7,R10			; signed DUMMY T5
		add		R4,R0			; r0 +c -> + E2
		adc		R5,R1			; r1 +c -> + E3
		adc		R6,R10			; T5 +c -> + E4
		adc		R7,R10			; T5 +c -> + E5
		
		mulsu	R17,R22			; BH x AM
		sbc		R7,R10			; signed DUMMY T5
		add		R5,R0			; r0 +c -> + E3
		adc		R6,R1			; r1 +c -> + E4
		adc		R7,R10			; T5 +c -> + E5
		pop		R10
		pop		R9
		pop		R8	
		ret

muls_32x24_56

;***********************************************************************************
;*
;*		FUNCTION	muls32x24_56.inc
;*		Mutiply 	32x24 -> 56 bit			(signed * signed)
;*
;*      AH  AM  AL  AXL   BH  BM  BL     E6 E5 E4 E3 E2 E1 E0
;*      R23:R22:R21:R20 x R18:R17:R16 -> R8:R7:R6:R5:R4:R3:R2
;*
;*  82 cycles + 4 (RET) = 86 Cycles
;*
;***********************************************************************************
muls_32x24:
		push	R9
		push	R10
		push	R11
		sub		R9,R9
		sub		R6,R6			; NULL
		
		mul		R20,R16			; AXL*BL	(u*u)
		movw	R3:R2,R1:R0
		mul		R22,R16			; AM*BL		(u*u)
		movw	R5:R4,R1:R0
		muls	R23,R18			; AH*BH		(s*s)
		mov		R7,R0
		mov		R8,R1


		mul		R21,R16			; AL*BL		(u*u)
		movw	R11:R10,R1:R0		
		mulsu	R23,R16			; AH*BL		(s*u)
		sbc		R7,R9
		sbc		R8,R9
		add		R3,R10
		adc		R4,R11
		adc		R5,R0
		adc		R6,R1
		adc		R7,R9
		adc		R8,R9

		mul		R20,R17			; AXL*BM	(u*u)
		movw	R11:R10,R1:R0		
		mulsu	R18,R21			; BH*AL		(s*u)
		sbc		R7,R9
		sbc		R8,R9
		add		R3,R10
		adc		R4,R11
		adc		R5,R0
		adc		R6,R1
		adc		R7,R9
		adc		R8,R9

		mul		R21,R17			; AL*BM		(u*u)
		movw	R11:R10,R1:R0		
		mulsu	R23,R17			; AH*BM		(s*u)
		sbc		R8,R9
		add		R4,R10
		adc		R5,R11
		adc		R6,R0
		adc		R7,R1
		adc		R8,R9
		
		mul		R22,R17			; AM*BM		(u*u)		
		add		R5,R0
		adc		R6,R1
		adc		R7,R9
		adc		R8,R9
		
		mulsu	R18,R20			; BH*AXL	(s*u)
		sbc		R6,R9
		sbc		R7,R9
		sbc		R8,R9
		add		R4,R0
		adc		R5,R1
		adc		R6,R9
		adc		R7,R9
		adc		R8,R9

		mulsu	R18,R22			; BH*AM		(s*u)
		sbc		R8,R9
		add		R6,R0
		adc		R7,R1
		adc		R8,R9

		pop		R11
		pop		R10
		pop		R9
		ret

muls_32x32_64

;***********************************************************************************
;*
;*		FUNCTION	muls32x32_64.inc
;*		Mutiply 	32x32 -> 64 bit			(signed * signed)
;*
;*      AH  AM  AL  AXL   BH  BM  BL  BXL    E7 E6 E5 E4 E3 E2 E1 E0
;*      R23:R22:R21:R20 x R19:R18:R17:R16 -> R9:R8:R7:R6:R5:R4:R3:R2
;*
;*  108 cycles + 4 (RET) = 112 Cycles
;*
;***********************************************************************************
muls_32x32:
		push	R10
		push	R11
		push	r12
		push	r13
		push	r14
		clr		R14
		mul		R20,R16			; AXL x BXL
		movw	R3:R2,R1:R0		; r1:r0 -> E1:E0
		mul		R21,R17			; AL x BL
		movw	R5:R4,R1:R0		; r1:r0 -> E3:E2
		mul		R22,R18			; AM x BM
		movw	R7:R6,R1:R0		; r1:r0 -> E5:E4
		muls	R23,R19			; AH x BH
		movw	R9:R8,R1:R0		; r1:r0 -> E7:E6

		mulsu	R23,R18			; AH x BM
		movw	R13:R12,R1:R0	; r1:r0 -> T5:T4
		sbc		R9,R14			; signed DUMMY T6
		mul		R22,R17			; AM x BL
		movw	R11:R10,R1:R0	; r1:r0 -> T3:T2
		mul		R21,R16			; AL x BXL
		add		R3,R0			; r0 ----> + E1
		adc		R4,R1			; r1 +c -> + E2
		adc		R5,R10			; T2 +c -> + E3
		adc		R6,R11			; T3 +c -> + E4
		adc		R7,R12			; T4 +c -> + E5
		adc		R8,R13			; T5 +c -> + E6
		adc		R9,R14			; T6 +c -> + E7

		mulsu	R19,R22			; AM x BH -> BH x AM
		movw	R13:R12,R1:R0	; r1:r0 -> T5:T4
		sbc		R9,R14			; signed DUMMY T6
		mul		R21,R18			; AL x BM
		movw	R11:R10,R1:R0	; r1:r0 -> T3:T2
		mul		R20,R17			; AXL x BL
		add		R3,R0			; r0 ----> + E1
		adc		R4,R1			; r1 +c -> + E2
		adc		R5,R10			; T2 +c -> + E3
		adc		R6,R11			; T3 +c -> + E4
		adc		R7,R12			; T4 +c -> + E5
		adc		R8,R13			; T5 +c -> + E6
		adc		R9,R14			; T6 +c -> + E7

		mulsu	R23,R17			; AH x BL
		movw	R11:R10,R1:R0	; r1:r0 -> T3:T2
		sbc		R8,R14			; signed DUMMY T6
		sbc		R9,R14			; signed DUMMY T6
		mul		R20,R18			; AXL x BM
		add		R4,R0			; r0 +c -> + E2
		adc		R5,R1			; r1 +c -> + E3
		adc		R6,R10			; T3 +c -> + E4
		adc		R7,R11			; T4 +c -> + E5
		adc		R8,R14			; T5 +c -> + E6
		adc		R9,R14			; T6 +c -> + E7

		mulsu	R19,R21			; AL x BH -> BH x AL
		movw	R11:R10,R1:R0	; r1:r0 -> T3:T2
		sbc		R8,R14			; signed DUMMY T6
		sbc		R9,R14			; signed DUMMY T6
		mul		R22,R16			; AM x BXL
		add		R4,R0			; r0 +c -> + E2
		adc		R5,R1			; r1 +c -> + E3
		adc		R6,R10			; T3 +c -> + E4
		adc		R7,R11			; T4 +c -> + E5
		adc		R8,R14			; T5 +c -> + E6
		adc		R9,R14			; T6 +c -> + E7

		mulsu	R19,R20			; AXL x BH -> BH x AXL
		sbc		R7,R14			; signed DUMMY T6
		sbc		R8,R14			; signed DUMMY T6
		sbc		R9,R14			; signed DUMMY T6
		add		R5,R0			; r0 +c -> + E3
		adc		R6,R1			; r1 +c -> + E4
		adc		R7,R14			; T6 +c -> + E5
		adc		R8,R14			; T6 +c -> + E6
		adc		R9,R14			; T6 +c -> + E7

		mulsu	R23,R16			; AH x BXL
		sbc		R7,R14			; signed DUMMY T6
		sbc		R8,R14			; signed DUMMY T6
		sbc		R9,R14			; signed DUMMY T6
		add		R5,R0			; r0 +c -> + E3
		adc		R6,R1			; r1 +c -> + E4
		adc		R7,R14			; T6 +c -> + E5
		adc		R8,R14			; T6 +c -> + E6
		adc		R9,R14			; T6 +c -> + E7
		pop		R14
		pop		R13
		pop		R12
		pop		R11
		pop		R10  
		ret

Download

SOFTWARE VERSION --> Datei:

HARDWARE VERSION --> Datei:MULT ASM.zip

Diskkussion