Multiplikation in Assembler
Autor: Steffen H.
Dies ist eine Sammlung von verschiedenen Assembler Include-Datein für einen 8-Bit AVR zur Multiplikation in Hardware oder Software.
Einleitung
Ich habe diesen Wiki zur Multiplikation in Assembler erstellt da ich hauptsächlich Programmschnipsel für eine "unsigned" Multiplikation gefunden habe oder diese nicht in der benötigten Bytegröße vorhanden war. Deshalb findet ihr hier verschiedene Sub-Programme zur "unsigned" und "signed" Multiplikation.
Wie funktioniert das ganze?
Die Sub-Programme sind Include Dateien. Diese müssen im .cseg Teil des Programms eingebunden werden. Liegen diese Include Datein im Hauptverzeichnis des Assembler Projektes, also da wo auch die "main.asm" liegt, werden diese so eingebunden:
.include "Datei.inc"
Man kann die Include-Dateien aber auch an beliebiger anderer Stelle ablegen. Dann müssen diese aber mit voller Baumstruktur eingebunden werden. Beispiel:
.include "C:\AVR\Libraries\Datei.inc"
Für die reinen Software Lösungen ist eine Datei Namens "sw_mul.inc" vor allen anderen MUL-Includes einzubinden. Und die Software MUL-Includes unterscheiden sich zu den Hardware MUL-Includes im vorangestellten "_" wie zum Beispiel _muls_32x16_48.inc.
Der Aufbau des Dateinamens ist schon fast selbsterklärend.
- mul -> unsigned x unsigned Multiplikation
- mulsu -> signed x unsigned Multiplikation
- muls -> signed x signed Multiplikation
Dann kommt die Byte-Größe wie zum Beispiel _32x16 und dann die Result-Größe _48. Die Eingangsbytes sind dabei fest vorgeschrieben! Und die Unteren r11..r0 Register werden zerstört. Wer diese in Benutzung hat, sollte diese vorher sichern.
Was es noch zu beachten gibt, sind eventuelle Änderungen an "rcall's" die in "calls" umgewandelt werden müssen wenn die Sprungadresse zum Unterprogramm mehr als 2000 Byte Programmcounter entfernt ist. Dies bemängelt dann aber das Atmel Studio in einer Fehlermeldung.
Benutzte Register
In allen Include Dateien werden nach folgendem Muster die Register wie folgt benutzt:
MULT | r23 | r22 | r21 | r20 | r19 | r18 | r17 | r16 | r9 | r8 | r7 | r6 | r5 | r4 | r3 | r2 |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
16x8 | A1 | A0 | B0 | E2 | E1 | E0 | ||||||||||
16x16 | A1 | A2 | B1 | B0 | E3 | E2 | E1 | E0 | ||||||||
24x8 | A2 | A1 | A0 | B0 | E3 | E2 | E1 | E0 | ||||||||
24x16 | A2 | A1 | A0 | B1 | B0 | E4 | E3 | E2 | E1 | E0 | ||||||
24x24 | A2 | A1 | A0 | B2 | B1 | B0 | E5 | E4 | E3 | E2 | E1 | E0 | ||||
32x8 | A3 | A2 | A1 | A0 | B0 | E4 | E3 | E2 | E1 | E0 | ||||||
32x16 | A3 | A2 | A1 | A0 | B1 | B0 | E5 | E4 | E3 | E2 | E1 | E0 | ||||
32x24 | A3 | A2 | A1 | A0 | B2 | B1 | B0 | E6 | E5 | E4 | E3 | E2 | E1 | E0 | ||
32x32 | A3 | A2 | A1 | A0 | B3 | B2 | B1 | B0 | E7 | E6 | E5 | E4 | E3 | E2 | E1 | E0 |
Software Version
MACRO
Hier muss vorher zwingend die "sw_mult.inc" Datei eingebunden werden. Diese besteht teilweise aus Makros um die in den weiteren MUL-Include-Dateien verwendeten Funktionen nutzen zu können. Vor allem die richtige Erzeugung des Zero und Carry Flags!
.macro mul_
;---------------------------------
;mul Befehl in Software als Makro
;---------------------------------
clr r1 ;Ergebnis = 0
push @0 ;Multiplikant wegspeichern
sec ;Multiplikant ersetzt gleichzeitig den Schleifenzähler
ror @0 ;Eine 1 rein, LSB raus
rjmp mul2 ;Beim 1. Durchgang, gleich LSB auswerten
mul1: lsr @0 ;Bit (von 1..7 Niederwertige zuerst) ins Carry (Multiplikand)
breq mul4 ;Wenn die 1 von sec (oben) wieder im Carry ist, Ende
mul2: brcc mul3 ;Wenn 0, Addition überspringen
add r1,@1 ;Ansonsten Multiplikator ins Highbyte addieren
mul3: ror r1 ;Produkt mit evt. Übertrag nach rechts schieben
ror r0
rjmp mul1 ;Noch ne Runde (von insgesamt 8)
mul4: push r16
in r16,SREG
andi r16,0x80
out SREG,r16
sbrc r1,7
sec
pop r16
pop @0 ;Multiplikant zurückholen
.endm
.macro muls_
push r16
push r17
push r18
clt
mov r1,@0
mov r0,@1
clr r17
sbrc r1,7
inc r17
sbrc r0,7
inc r17
sbrc r17,0
set
mov r16,r1
rcall _muls_
andi r17,0x82
sbrc r17,SREG_Z
rjmp PC+3
brtc PC+2
sbr r17,(1<<SREG_C)
out SREG,r17
pop r18
pop r17
pop r16
.endm
_muls_: sub r1,r1 ; clear result High byte and carry
ldi r18,8 ; init loop counter
_m8s_1: brcc _m8s_2 ; if carry (previous bit) set
add r1,r16 ; add multiplicand to result High byte
_m8s_2: sbrc r0,0 ; if current bit set
sub r1,r16 ; subtract multiplicand from result High
asr r1 ; shift right result High byte
ror r0 ; shift right result L byte and multiplier
in r17,SREG
dec r18 ; decrement loop counter
brne _m8s_1 ; if not done, loop more
ret
.macro mulsu_
push r16
push r17
push r18
mov r1,@0
mov r0,@1
mov r16,r1
clt
sbrc r16,7
set
sbrc r16,7
neg r16
rcall _mulsu_
andi r17,0x83
brtc PC+6
com r1
com r0
ldi r16,1
add r0,r16
adc r1,r18
sbrc r1,7
sbr r17,(1<<SREG_C)
out SREG,r17
pop r18
pop r17
pop r16
.endm
_mulsu_:clr r1 ;clear result High byte
ldi r18,8 ;init loop counter
lsr r0 ;rotate multiplier
m8su_1: brcc m8su_2 ; carry set
add r1,r16 ; add multiplicand to result High byte
m8su_2: ror r1 ; rotate right result High byte
ror r0 ; rotate right result L byte and multiplier
in r17,SREG
dec r18 ; decrement loop counter
brne m8su_1 ; if not done, loop more
ret
unsigned x unsigned
_mul_16x8_24
_mul_16x16_32
_mul_24x8_32
_mul_24x16_40
_mul_24x24_48
_mul_32x8_40
_mul_32x16_48
_mul_32x24_56
_mul_32x32_64
;***************************************************
;* Mutiply 32x32 -> 64 bit unsigned
;* AH AM AL AXL BH BM BL BXL E7 E6 E5 E4 E3 E2 E1 E0
;* R23:R22:R21:R20 x R19:R18:R17:R16 -> R9:R8:R7:R6:R5:R4:R3:R2
;*
;* Scrached Register: R10,R11,R12,R13,R14
;* 108 cycles + 4 (RET) = 112 Cycles
;*
mul32x32_64:
push R10
push R11
push R12
push R13
push R14
clr R14
; R9:R2 = R19:R16 * R23:R20
sub R14,R14 ; Null
mul_ R20,R16 ; AXL*BXL
movw R3:R2,R1:R0
mul_ R21,R17 ; AL*BL
movw R5:R4,R1:R0
mul_ R22,R18 ; AM*BM
movw R7:R6,R1:R0
mul_ R23,R19 ; AH*BH
movw R9:R8,R1:R0
mul_ R22,R19 ; AH*BM
movw R13:R12,R1:R0
mul_ R21,R18 ; AM*BL
movw R11:R10,R1:R0
mul_ R20,R17 ; AL*BXL
add R3, R0
adc R4, R1
adc R5, R10
adc R6, R11
adc R7, R12
adc R8,R13
adc R9,R14
mul_ R23,R18 ; AM*BH
movw R13:R12,R1:R0
mul_ R22,R17 ; AL*BM
movw R11:R10,R1:R0
mul_ R21,R16 ; AXL*BL
add R3, R0
adc R4, R1
adc R5, R10
adc R6, R11
adc R7, R12
adc R8,R13
adc R9,R14
mul_ R21,R19 ; AH*BL
movw R11:R10,R1:R0
mul_ R20,R18 ; AM*BXL
add R4, R0
adc R5, R1
adc R6, R10
adc R7, R11
adc R8,R14
adc R9,R14
mul_ R23,R17 ; AL*BH
movw R11:R10,R1:R0
mul_ R22,R16 ; AXL*BM
add R4, R0
adc R5, R1
adc R6, R10
adc R7, R11
adc R8,R14
adc R9,R14
mul_ R20,R19 ; AH*BXL
movw R11:R10,R1:R0
mul_ R23,R16 ; AXL*BH
add R5, R0
adc R6, R1
adc R7, R14
adc R8,R14
adc R9,R14
add R5, R10
adc R6, R11
adc R7, R14
adc R8,R14
adc R9,R14
pop R14
pop R13
pop R12
pop R11
pop R10
ret
signed x unsigned
_mulsu_16x8_24
_mulsu_16x16_32
_mulsu_24x8_32
_mulsu_24x16_40
_mulsu_24x24_48
_mulsu_32x8_40
;***************************************************
;* Mutiply 32x8 -> 40 bit
;* AH AM AL AXL BL E4 E3 E2 E1 E0
;* R23:R22:R21:R20 x R16 -> R6:R5:R4:R3:R2
;*
;* 19 cycles + 4 (RET) = 23 Cycles
;*
mulsu_32x8:
push R10
clr R10
clr r4
mul_ R20,R16 ; AXL x BL
movw R3:R2,R1:R0 ; r1:r0 -> E1:E0
mulsu_ R23,R16 ; AH x BL
mov R5,R0 ; r0 -> E0
mov R6,R1 ; r1 -> E0
mul_ R21,R16 ; AL x BL
add R3,R0 ; R0 -> E1
adc R4,R1 ; R1 -> E2
adc R5,R10 ; T0 -> E3
adc R6,R10 ; T0 -> E4
mul_ R22,R16 ; AM x BL
add R4,R0 ; R0 -> E2
adc R5,R1 ; R1 -> E3
adc R6,R10 ; T0 -> E4
pop R10
ret
_mulsu_32x16_48
;*****************************************************************************
;* Mutiply 32x16 -> 48 bit
;*
;* signed x unsigned
;* AH AM AL AXL BH BL E5 E4 E3 E2 E1 E0
;* R23:R22:R21:R20 x R17:R16 -> R7:R6:R5:R4:R3:R2
;*
;* 56 cycles + 4 (RET) = 60 Cycles
;*******************************************************************************
mulsu_32x16:
push R8
push R9
push R10
clr R10
mul_ R20,R16 ; AXL x BL
movw R3:R2,R1:R0 ; r1:r0 -> E1:E0
mul_ R22,R16 ; AM x BL
movw R5:R4,R1:R0 ; r1:r0 -> E3:E2
mulsu_ R23,R17 ; AH x BH
movw R7:R6,R1:R0 ; r1:r0 -> E5:E4
mulsu_ R23,R16 ; AH x BL
movw R9:R8,R1:R0 ; r1:r0 -> T4:T3
sbc R7,R10 ; signed DUMMY T5
mul_ R21,R16 ; AL x BL
add R3,R0 ; r0 ----> + E1
adc R4,R1 ; r1 +c -> + E2
adc R5,R8 ; T3 +c -> + E3
adc R6,R9 ; T4 +c -> + E4
adc R7,R10 ; T5 +c -> + E5
mul_ R17,R20 ; AXL x BH -> BH x AXL
add R3,R0 ; r0 ----> + E1
adc R4,R1 ; r1 +c -> + E2
adc R5,R10 ; T5 +c -> + E3
adc R6,R10 ; T5 +c -> + E4
adc R7,R10 ; T5 +c -> + E5
mul_ R17,R21 ; AL x BH -> BH x AL
add R4,R0 ; r0 +c -> + E2
adc R5,R1 ; r1 +c -> + E3
adc R6,R10 ; T5 +c -> + E4
adc R7,R10 ; T5 +c -> + E5
mul_ R17,R22 ; BH x AM
add R5,R0 ; r0 +c -> + E3
adc R6,R1 ; r1 +c -> + E4
adc R7,R10 ; T5 +c -> + E5
pop R10
pop R9
pop R8
ret
_mulsu_32x24_56
_mulsu_32x32_64
signed x signed
_muls_16x8_24
_muls_16x16_32
;******************************************************************************
;*
;* FUNCTION
;* muls16x16_32
;* DECRIPTION
;* Signed multiply of two 16bits numbers with 32bits result.
;* USAGE E3:E2:E1:E0 = AH:AL * BH:BL
;* r5:r4:r3:r2 = r21:r20 * r17:r16
;* STATISTICS
;* Cycles : 19 + ret
;* Words : 15 + ret
;* Scrached Register usage: r6
;* NOTE
;* The routine is non-destructive to the operands.
;*
;******************************************************************************
muls_16x16:
push R6
clr R6
muls_ R21,R17 ; (signed)ah * (signed)bh
movw R5:R4,R1:R0
mul_ R20,R16 ; al * bl
movw R3:R2,R1:R0
mulsu_ R21, R16 ; (signed)ah * bl
sbc R5, R6
add R3, R0
adc R4, R1
adc R5, R6
mulsu_ R17, R20 ; (signed)bh * al
sbc R5, R6
add R3, R0
adc R4, R1
adc R5, R6
pop R6
ret
_muls_24x8_32
;***************************************************
;* Mutiply 24x8 -> 32 bit
;* AH AM AL BL E3 E2 E1 E0
;* R22:R21:R20 x R16 -> R5:R4:R3:R2
;*
;* 19 cycles + 4 (RET) = 23 Cycles
;*
muls24x8_32:
push R10
clr R10
mulsu_ R16,R20 ; AL x BL -> BL x AL
movw R3:R2,R1:R0 ; r1:r0 -> E1:E0
muls_ R22,R16 ; AH x BL
movw R5:R4,R1:R0 ; r1:r0 -> E3:E2
mul_ R21,R16 ; AM x BL
add R3,R0 ; R0 -> E1
adc R4,R1 ; R1 -> E2
adc R5,R10 ; T0 -> E3
pop R10
ret
_muls_24x16_40
;***************************************************
;* Mutiply 24x16 -> 64 bit
;* AH AM AL BH BL E4 E3 E2 E1 E0
;* R22:R21:R20 x R17:R16 -> R6:R5:R4:R3:R2
;*
;* 108 cycles + 4 (RET) = 112 Cycles
;*
muls_24x16:
push R10
clr R10
mul_ R20,R16 ; AL x BL
movw R3:R2,R1:R0 ; r1:r0 -> E1:E0
muls_ R22,R17 ; AH x BH
mov R5,R0 ; r0 -> E3
mov R6,R1 ; r1 -> E4
clr R4 ;clr -> E2
mul_ R21,R16 ; AM x BL
add R3,R0 ; r0 -> E1
adc R4,R1 ; r1 -> E2
mulsu_ R22,R16 ; AH x BL
sbc R6,R10 ; signed DUMMY T4
add R4,R0 ; r0 -> E2
adc R5,R1 ; r1 -> E3
adc R6,R10 ; T4 -> E4
mulsu_ R17,R20 ; AL x BH -> BH x AL
sbc R5,R10 ; signed DUMMY T3
sbc R6,R10 ; signed DUMMY T4
add R3,R0 ; r0 -> E1
adc R4,R1 ; r1 -> E2
adc R5,R10 ; T3 -> E3
adc R6,R10 ; T4 -> E4
mulsu_ R17,R21 ; AM x BH -> BH x AM
sbc R6,R10 ; signed DUMMY T4
add R4,R0 ; r0 -> E2
adc R5,R1 ; r1 -> E3
adc R6,R10 ; T4 -> E4
pop R10
ret
_muls_24x24_48
;***************************************************
;* Mutiply 24x24 -> 48 bit
;* AH AM AL BH BM BL E5 E4 E3 E2 E1 E0
;* R22:R21:R20 x R18:R17:R16 -> R7:R6:R5:R4:R3:R2
;*
;* 108 cycles + 4 (RET) = 112 Cycles
;*
muls_24x24:
push R8
push R9
push R10
clr R10
mul_ R20,R16 ; AL x BL
movw R3:R2,R1:R0 ; r1:r0 -> E1:E0
mul_ R21,R17 ; AL x BL
movw R5:R4,R1:R0 ; r1:r0 -> E3:E2
muls_ R22,R17 ; AH x BH
movw R7:R6,R1:R0 ; r1:r0 -> E5:E4
mulsu_ R22,R17 ; AH x BM
sbc R7,R10 ; signed DUMMY T2
movw R9:R8,R1:R0 ; r1:r0 -> T1:T0
mul_ R21,R16 ; AM x BL
add R3,R0 ; R0 -> E1
adc R4,R1 ; R1 -> E2
adc R5,R8 ; T0 -> E3
adc R6,R9 ; T1 -> E4
adc R7,R10 ; T2 -> E5
mulsu_ R18,R21 ; AM x BH -> BH x AM
sbc R7,R10 ; signed DUMMY T2
movw R9:R8,R1:R0 ; r1:r0 -> T1:T0
mul_ R20,R17 ; AL x BM
add R3,R0 ; R0 -> E1
adc R4,R1 ; R1 -> E2
adc R5,R8 ; T0 -> E3
adc R6,R9 ; T1 -> E4
adc R7,R10 ; T2 -> E5
mulsu_ R22,R16 ; AH x BL
sbc R6,R10 ; signed DUMMY T2
sbc R7,R10 ; signed DUMMY T2
adc R4,R0 ; R0 -> E2
adc R5,R1 ; R1 -> E3
adc R6,R10 ; T2 -> E4
adc R7,R10 ; T2 -> E5
mulsu_ R18,R20 ; AL x BH -> BH x AL
sbc R6,R10 ; signed DUMMY T2
sbc R7,R10 ; signed DUMMY T2
adc R4,R0 ; R0 -> E2
adc R5,R1 ; R1 -> E3
adc R6,R10 ; T2 -> E4
adc R7,R10 ; T2 -> E5
pop R10
pop R9
pop R8
ret
_muls_32x8_40
;***************************************************
;* Mutiply 32x8 -> 40 bit
;* AH AM AL AXL BL E4 E3 E2 E1 E0
;* R23:R22:R21:R20 x R16 -> R6:R5:R4:R3:R2
;*
;* 19 cycles + 4 (RET) = 23 Cycles
;*
muls_32x8:
push R10
clr R10
clr R6
clr r5
clr r4
mulsu_ R16,R20 ; BL x AXL
movw R3:R2,R1:R0 ; r1:r0 -> E1:E0
mulsu_ R16,R21 ; BL x AL
sbc R5,R10 ; signed DUMMY T0
sbc R6,R10 ; signed DUMMY T0
add R3,R0 ; R0 -> E1
adc R4,R1 ; R1 -> E2
adc R5,R10 ; T0 -> E3
adc R6,R10 ; T0 -> E4
mulsu_ R16,R22 ; BL x AM
sbc R6,R10 ; signed DUMMY T0
add R4,R0 ; R0 -> E2
adc R5,R1 ; R1 -> E3
adc R6,R10 ; T0 -> E4
muls_ R16,R23 ; BL x AH
add R5,R0 ; R0 -> E3
adc R6,R1 ; R1 -> E4
pop R10
ret
_muls_32x16_48
;***************************************************
;* Mutiply 32x16 -> 48 bit
;* AH AM AL AXL BH BL E5 E4 E3 E2 E1 E0
;* R23:R22:R21:R20 x R17:R16 -> R7:R6:R5:R4:R3:R2
;*
;* 56 cycles + 4 (RET) = 60 Cycles
;*
muls_32x16:
push R8
push R9
push R10
clr R10
mul_ R20,R16 ; AXL x BL
movw R3:R2,R1:R0 ; r1:r0 -> E1:E0
mul_ R22,R16 ; AM x BL
movw R5:R4,R1:R0 ; r1:r0 -> E3:E2
muls_ R23,R17 ; AH x BH
movw R7:R6,R1:R0 ; r1:r0 -> E5:E4
mulsu_ R23,R16 ; AH x BL
movw R9:R8,R1:R0 ; r1:r0 -> T4:T3
sbc R7,R10 ; signed DUMMY T5
mul_ R21,R16 ; AL x BL
add R3,R0 ; r0 ----> + E1
adc R4,R1 ; r1 +c -> + E2
adc R5,R8 ; T3 +c -> + E3
adc R6,R9 ; T4 +c -> + E4
adc R7,R10 ; T5 +c -> + E5
mulsu_ R17,R20 ; AXL x BH -> BH x AXL
sbc R5,R10 ; signed DUMMY T5
sbc R6,R10 ; signed DUMMY T5
sbc R7,R10 ; signed DUMMY T5
add R3,R0 ; r0 ----> + E1
adc R4,R1 ; r1 +c -> + E2
adc R5,R10 ; T5 +c -> + E3
adc R6,R10 ; T5 +c -> + E4
adc R7,R10 ; T5 +c -> + E5
mulsu_ R17,R21 ; AL x BH -> BH x AL
sbc R6,R10 ; signed DUMMY T5
sbc R7,R10 ; signed DUMMY T5
add R4,R0 ; r0 +c -> + E2
adc R5,R1 ; r1 +c -> + E3
adc R6,R10 ; T5 +c -> + E4
adc R7,R10 ; T5 +c -> + E5
mulsu_ R17,R22 ; BH x AM
sbc R7,R10 ; signed DUMMY T5
add R5,R0 ; r0 +c -> + E3
adc R6,R1 ; r1 +c -> + E4
adc R7,R10 ; T5 +c -> + E5
pop R10
pop R9
pop R8
ret
_muls_32x24_56
_muls_32x32_64
;***************************************************
;* Mutiply 32x32 -> 64 bit
;* AH AM AL AXL BH BM BL BXL E7 E6 E5 E4 E3 E2 E1 E0
;* R23:R22:R21:R20 x R19:R18:R17:R16 -> R9:R8:R7:R6:R5:R4:R3:R2
;*
;* 108 cycles + 4 (RET) = 112 Cycles
;*
muls_32x32:
push R10
push R11
push r12
push r13
push r14
clr R14
mul_ R20,R16 ; AXL x BXL
movw R3:R2,R1:R0 ; r1:r0 -> E1:E0
mul_ R21,R17 ; AL x BL
movw R5:R4,R1:R0 ; r1:r0 -> E3:E2
mul_ R22,R18 ; AM x BM
movw R7:R6,R1:R0 ; r1:r0 -> E5:E4
muls_ R23,R19 ; AH x BH
movw R9:R8,R1:R0 ; r1:r0 -> E7:E6
mulsu_ R23,R18 ; AH x BM
movw R13:R12,R1:R0 ; r1:r0 -> T5:T4
sbc R9,R14 ; signed DUMMY T6
mul_ R22,R17 ; AM x BL
movw R11:R10,R1:R0 ; r1:r0 -> T3:T2
mul_ R21,R16 ; AL x BXL
add R3,R0 ; r0 ----> + E1
adc R4,R1 ; r1 +c -> + E2
adc R5,R10 ; T2 +c -> + E3
adc R6,R11 ; T3 +c -> + E4
adc R7,R12 ; T4 +c -> + E5
adc R8,R13 ; T5 +c -> + E6
adc R9,R14 ; T6 +c -> + E7
mulsu_ R19,R22 ; AM x BH -> BH x AM
movw R13:R12,R1:R0 ; r1:r0 -> T5:T4
sbc R9,R14 ; signed DUMMY T6
mul_ R21,R18 ; AL x BM
movw R11:R10,R1:R0 ; r1:r0 -> T3:T2
mul_ R20,R17 ; AXL x BL
add R3,R0 ; r0 ----> + E1
adc R4,R1 ; r1 +c -> + E2
adc R5,R10 ; T2 +c -> + E3
adc R6,R11 ; T3 +c -> + E4
adc R7,R12 ; T4 +c -> + E5
adc R8,R13 ; T5 +c -> + E6
adc R9,R14 ; T6 +c -> + E7
mulsu_ R23,R17 ; AH x BL
movw R11:R10,R1:R0 ; r1:r0 -> T3:T2
sbc R9,R14 ; signed DUMMY T6
sbc R8,R14 ; signed DUMMY T6
mul_ R20,R18 ; AXL x BM
add R4,R0 ; r0 +c -> + E2
adc R5,R1 ; r1 +c -> + E3
adc R6,R10 ; T3 +c -> + E4
adc R7,R11 ; T4 +c -> + E5
adc R8,R14 ; T5 +c -> + E6
adc R9,R14 ; T6 +c -> + E7
mulsu_ R19,R21 ; AL x BH -> BH x AL
movw R11:R10,R1:R0 ; r1:r0 -> T3:T2
sbc R9,R14 ; signed DUMMY T6
sbc R8,R14 ; signed DUMMY T6
mul_ R22,R16 ; AM x BXL
add R4,R0 ; r0 +c -> + E2
adc R5,R1 ; r1 +c -> + E3
adc R6,R10 ; T3 +c -> + E4
adc R7,R11 ; T4 +c -> + E5
adc R8,R14 ; T5 +c -> + E6
adc R9,R14 ; T6 +c -> + E7
mulsu_ R19,R20 ; AXL x BH -> BH x AXL
sbc R9,R14 ; signed DUMMY T6
sbc R8,R14 ; signed DUMMY T6
sbc R7,R14 ; signed DUMMY T6
add R5,R0 ; r0 +c -> + E3
adc R6,R1 ; r1 +c -> + E4
adc R7,R14 ; T6 +c -> + E5
adc R8,R14 ; T6 +c -> + E6
adc R9,R14 ; T6 +c -> + E7
mulsu_ R23,R16 ; AH x BXL
sbc R9,R14 ; signed DUMMY T6
sbc R8,R14 ; signed DUMMY T6
sbc R7,R14 ; signed DUMMY T6
add R5,R0 ; r0 +c -> + E3
adc R6,R1 ; r1 +c -> + E4
adc R7,R14 ; T6 +c -> + E5
adc R8,R14 ; T6 +c -> + E6
adc R9,R14 ; T6 +c -> + E7
pop R14
pop R13
pop R12
pop R11
pop R10
ret
Hardware Version
Die Cycle Time der einzelnen Funktionen sind in der Tabelle zusammengefasst.
HW | 16x8 | 16x16 | 24x8 | 24x16 | 24x24 | 32x8 | 32x16 | 32x24 | 32x32 |
---|---|---|---|---|---|---|---|---|---|
mul | 14 | 26 | 21 | 40 | 57 | 29 | 49 | 75 | 102 |
mulsu | 20 | 27 | 21 | 41 | 60 | 29 | 54 | 78 | 106 |
muls | 19 | 28 | 23 | 42 | 63 | 38 | 60 | 86 | 112 |
unsigned x unsigned
mul_16x8_24
;******************************************************************************
;* FUNCTION mul16x8_24.inc
;* Mutiply 16x8 -> 24 bit (unsigned * unsigned)
;*
;* AH AL BL E2 E1 E0
;* R21:R20 x R16 -> R4:R3:R2
;*
;* Scrached Register: -
;* 10 cycles + 4 (RET) = 14 Cycles
;*
;******************************************************************************
mul_16x8:
; R5:R2 = R17:R16 * R21:R20
mul R20,R16 ; AL * BL
movw R3:R2,R1:R0
clr R4
mul R21,R16 ; AH * BL
add R3,R0
adc R4,R1
ret
mul_16x16_32
;******************************************************************************
;* FUNCTION mul16x16_32.inc
;* Mutiply 16x16 -> 32 bit (unsigned * unsigned)
;*
;* AH AL BH BL E3 E2 E1 E0
;* R21:R20 x R17:R16 -> R5:R4:R3:R2
;*
;* Scrached Register: -
;* 22 cycles + 4 (RET) = 26 Cycles
;*
;******************************************************************************
mul_16x16:
push R9
; R5:R2 = R17:R16 * R21:R20
sub R9,R9
mul R20,R16 ; AL * BL
movw R3:R2,R1:R0
mul R21,R17 ; AH * BH
movw R5:R4,R1:R0
mul R21,R16 ; AH * BL
add R3,R0
adc R4,R1
adc R5,R9
mul R20,R17 ; AL * BH
add R3,R0
adc R4,R1
adc R5,R9
pop R9
ret
mul_24x8_32
;******************************************************************************
;* FUNCTION mul24x8_32.inc
;* Mutiply 24x8 -> 32 bit (unsigned * unsigned)
;*
;* AH AM AL BL E3 E2 E1 E0
;* R22:R21:R20 x R16 -> R5:R4:R3:R2
;*
;* Scrached Register: -
;* 17 cycles + 4 (RET) = 21 Cycles
;*
;******************************************************************************
mul_24x8:
push R9
; R5:R2 = R16 * R22:R20
sub R9,R9 ; NULL
mul R20,R16 ; AL*BL
movw R3:R2,R1:R0
mul R22,R16 ; AH*BL
movw R5:R4,R1:R0
mul R21,R16 ; AM*BL
add R3,R0
adc R4,R1
adc R5,R9
pop R9
ret
mul_24x16_40
;******************************************************************************
;* FUNCTION mul24x16_40.inc
;* Mutiply 24x16 -> 40 bit (unsigned * unsigned)
;*
;* AH AM AL BH BL E4 E3 E2 E1 E0
;* R22:R21:R20 x R17:R16 -> R6:R5:R4:R3:R2
;*
;* Scrached Register: -
;* 36 cycles + 4 (RET) = 40 Cycles
;*
;******************************************************************************
mul_24x16:
push R9
; R6:R2 = R17:R16 * R22:R20
sub R4,R4 ; NULL
sub R9,R9
mul R20,R16 ; AL*BL
movw R3:R2,R1:R0
mul R22,R17 ; AH*BH
mov R5,R0
mov R6,R1
mul R21,R16 ; AM*BL
add R3,R0
adc R4,R1
adc R5,R9
adc R6,R9
mul R22,R16 ; AH*BL
add R4,R0
adc R5,R1
adc R6,R9
mul R20,R17 ; AL*BH
add R3,R0
adc R4,R1
adc R5,R9
adc R6,R9
mul R21,R17 ; AM*BH
add R4,R0
adc R5,R1
adc R6,R9
pop R9
ret
mul_24x24_48
;******************************************************************************
;* FUNCTION mul24x24_48.inc
;* Mutiply 24x24 -> 48 bit (unsigned * unsigned)
;*
;* AH AM AL BH BM BL E5 E4 E3 E2 E1 E0
;* R22:R21:R20 x R18:R17:R16 -> R7:R6:R5:R4:R3:R2
;*
;* Scrached Register: -
;* 53 cycles + 4 (RET) = 57 Cycles
;*
;******************************************************************************
mul_24x24:
push R9
push R10
push R11
; R7:R2 = R18:R16 * R22:R20
sub R9,R9 ; NULL
mul R20,R16 ; AL*BL
movw R3:R2,R1:R0
mul R22,R16 ; AH*BL
movw R5:R4,R1:R0
mul R22,R18 ; AH*BH
movw R7:R6,R1:R0
mul R21,R16 ; AM*BL
movw R11:R10,R1:R0
mul R22,R17 ; AH*BM
add R3,R10
adc R4,R11
adc R5,R0
adc R6,R1
adc R7,R9
mul R20,R17 ; AL*BM
movw R11:R10,R1:R0
mul R21,R18 ; AM*BH
add R3,R10
adc R4,R11
adc R5,R0
adc R6,R1
adc R7,R9
mul R21,R17 ; AM*BM
add R4,R0
adc R5,R1
adc R6,R9
adc R7,R9
mul R20,R18 ; AL*BH
add R4,R0
adc R5,R1
adc R6,R9
adc R7,R9
pop R11
pop R10
pop R9
ret
mul_32x8_40
;******************************************************************************
;* FUNCTION mul32x8_40.inc
;* Mutiply 32x8 -> 40 bit (unsigned * unsigned)
;*
;* AH AM AL AXL BL E4 E3 E2 E1 E0
;* R23:R22:R21:R20 x R16 -> R6:R5:R4:R3:R2
;*
;* Scrached Register: -
;* 25 cycles + 4 (RET) = 29 Cycles
;*
;******************************************************************************
mul_32x8:
push R9
; R6:R2 = R16 * R23:R20
sub R4,R4 ; NULL
sub R9,R9
mul R20,R16 ; AXL*BL
movw R3:R2,R1:R0
mul R23,R16 ; AH*BL
mov R5,R0
mov R6,R1
mul R21,R16 ; AL*BL
add R3,R0
adc R4,R1
adc R5,R9
adc R6,R9
mul R22,R16 ; AM*BL
add R4,R0
adc R5,R1
adc R6,R9
pop R9
ret
mul_32x16_48
;******************************************************************************
;* FUNCTION mul32x16_48.inc
;* Mutiply 32x16 -> 48 bit (unsigned * unsigned)
;*
;* AH AM AL AXL BH BL E5 E4 E3 E2 E1 E0
;* R23:R22:R21:R20 x R17:R16 -> R7:R6:R5:R4:R3:R2
;*
;* Scrached Register: -
;* 45 cycles + 4 (RET) = 49 Cycles
;*
;******************************************************************************
mul_32x16:
push R9
; R7:R2 = R17:R16 * R23:R20
sub R9,R9 ; NULL
mul R20,R16 ; AXL*BL
movw R3:R2,R1:R0
mul R22,R16 ; AM*BL
movw R5:R4,R1:R0
mul R23,R17 ; AH*BH
movw R7:R6,R1:R0
mul R21,R16 ; AL*BL
add R3,R0
adc R4,R1
adc R5,R9
adc R6,R9
adc R7,R9
mul R23,R16 ; AH*BL
add R5,R0
adc R6,R1
adc R7,R9
mul R20,R17 ; AXL*BH
add R3,R0
adc R4,R1
adc R5,R9
adc R6,R9
adc R7,R9
mul R21,R17 ; AL*BH
add R4,R0
adc R5,R1
adc R6,R9
adc R7,R9
mul R22,R17 ; AM*BH
add R5,R0
adc R6,R1
adc R7,R9
pop R9
ret
mul_32x24_56
;******************************************************************************
;* FUNCTION mul32x24_56.inc
;* Mutiply 32x24 -> 56 bit (unsigned * unsigned)
;*
;* AH AM AL AXL BH BM BL E6 E5 E4 E3 E2 E1 E0
;* R23:R22:R21:R20 x R18:R17:R16 -> R8:R7:R6:R5:R4:R3:R2
;*
;* Scrached Register: -
;* 71 cycles + 4 (RET) = 75 Cycles
;*
;******************************************************************************
mul_32x24:
push R9
push R10
push R11
; R8:R2 = R18:R16 * R23:R20
sub R6,R6 ; NULL
sub R9,R9 ; NULL
mul R20,R16 ; AXL*BL
movw R3:R2,R1:R0
mul R22,R16 ; AM*BL
movw R5:R4,R1:R0
mul R23,R18 ; AH*BH
mov R7,R0
mov R8,R1
mul R21,R16 ; AL*BL
movw R11:R10,R1:R0
mul R23,R16 ; AH*BL
add R3,R10
adc R4,R11
adc R5,R0
adc R6,R1
adc R7,R9
adc R8,R9
mul R20,R17 ; AXL*BM
movw R11:R10,R1:R0
mul R22,R17 ; AM*BM
add R3,R10
adc R4,R11
adc R5,R0
adc R6,R1
adc R7,R9
adc R8,R9
mul R21,R17 ; AL*BM
movw R11:R10,R1:R0
mul R23,R17 ; AH*BM
add R4,R10
adc R5,R11
adc R6,R0
adc R7,R1
adc R8,R9
mul R20,R18 ; AXL*BH
movw R11:R10,R1:R0
mul R22,R18 ; AM*BH
add R4,R10
adc R5,R11
adc R6,R0
adc R7,R1
adc R8,R9
mul R21,R18 ; AL*BH
add R5,R0
adc R6,R1
adc R7,R9
adc R8,R9
pop R11
pop R10
pop R9
ret
mul_32x32_64
;******************************************************************************
;* FUNCTION mul32x32_64.inc
;* Mutiply 32x32 -> 64 bit (unsigned * unsigned)
;*
;* AH AM AL AXL BH BM BL BXL E7 E6 E5 E4 E3 E2 E1 E0
;* R23:R22:R21:R20 x R19:R18:R17:R16 -> R9:R8:R7:R6:R5:R4:R3:R2
;*
;* Scrached Register: -
;* 98 cycles + 4 (RET) = 102 Cycles
;*
;******************************************************************************
mul_32x32:
push R10
push R11
push R12
push R13
push R14
clr R14
; R9:R2 = R19:R16 * R23:R20
sub R14,R14 ; Null
mul R20,R16 ; AXL*BXL
movw R3:R2,R1:R0
mul R21,R17 ; AL*BL
movw R5:R4,R1:R0
mul R22,R18 ; AM*BM
movw R7:R6,R1:R0
mul R23,R19 ; AH*BH
movw R9:R8,R1:R0
mul R22,R19 ; AH*BM
movw R13:R12,R1:R0
mul R21,R18 ; AM*BL
movw R11:R10,R1:R0
mul R20,R17 ; AL*BXL
add R3, R0
adc R4, R1
adc R5, R10
adc R6, R11
adc R7, R12
adc R8,R13
adc R9,R14
mul R23,R18 ; AM*BH
movw R13:R12,R1:R0
mul R22,R17 ; AL*BM
movw R11:R10,R1:R0
mul R21,R16 ; AXL*BL
add R3, R0
adc R4, R1
adc R5, R10
adc R6, R11
adc R7, R12
adc R8,R13
adc R9,R14
mul R21,R19 ; AH*BL
movw R11:R10,R1:R0
mul R20,R18 ; AM*BXL
add R4, R0
adc R5, R1
adc R6, R10
adc R7, R11
adc R8,R14
adc R9,R14
mul R23,R17 ; AL*BH
movw R11:R10,R1:R0
mul R22,R16 ; AXL*BM
add R4, R0
adc R5, R1
adc R6, R10
adc R7, R11
adc R8,R14
adc R9,R14
mul R20,R19 ; AH*BXL
movw R11:R10,R1:R0
mul R23,R16 ; AXL*BH
add R5, R0
adc R6, R1
adc R7, R14
adc R8,R14
adc R9,R14
add R5, R10
adc R6, R11
adc R7, R14
adc R8,R14
adc R9,R14
pop R14
pop R13
pop R12
pop R11
pop R10
ret
signed x unsigned
mulsu_16x8_24
;******************************************************************************
;* FUNCTION mulsu16x8_24.inc
;* Mutiply 16x8 -> 24 bit (signed * unsigned)
;*
;* AH AL BL E2 E1 E0
;* R21:R20 x R16 -> R4:R3:R2
;*
;* Scrached Register: -
;* 16 cycles + 4 (RET) = 20 Cycles
;*
;******************************************************************************
mulsu_16x8:
push R10
sub R10,R10 ; NULL
clr R4
mul R20,R16 ; AL x BL (u*u)
movw R3:R2,R1:R0
adc R4,R10
mulsu R21,R16 ; AH x BL (s*u)
sbc R4,R10
add R3,R0
adc R4,R1
pop R10
ret
mulsu_16x16_32
;******************************************************************************
;* FUNCTION mulsu16x16_32.inc
;* Mutiply 16x16 -> 32 bit (signed * unsigned)
;*
;* AH AL BH BL E3 E2 E1 E0
;* R21:R20 x R17:R16 -> R5:R4:R3:R2
;*
;* Scrached Register: -
;* 23 cycles + 4 (RET) = 27 Cycles
;*
;******************************************************************************
mulsu_16x16:
push R10
sub R10,R10 ; NULL
mul R20,R16 ; AL x BL (u*u)
movw R3:R2,R1:R0
mulsu R21,R17 ; AH x BH (s*u)
movw R5:R4,R1:R0
mulsu R21,R16 ; AH x BL (s*u)
sbc R5,R10
add R3,R0
adc R4,R1
adc R5,R10
mul R20,R17 ; AL x BH (u*u)
add R3,R0
adc R4,R1
adc R5,R10
pop R10
ret
mulsu_24x8_32
;***********************************************************************************
;*
;* FUNCTION mulsu24x8_32.inc
;* Mutiply 24x8 -> 32 bit (signed * unsigned)
;*
;* AH AM AL BL E3 E2 E1 E0
;* R22:R21:R20 x R16 -> R5:R4:R3:R2
;*
;* 17 cycles + 4 (RET) = 21 Cycles
;*
;***********************************************************************************
mulsu_24x8:
push R10
sub R10,R10
mul R20,R16 ; AL x BL
movw R3:R2,R1:R0
mulsu R22,R16 ; AH x BL
movw R5:R4,R1:R0
mul R21,R16 ; AM x BL
add R3,R0
adc R4,R1
adc R5,R10
pop R10
ret
mulsu_24x16_40
;***********************************************************************************
;*
;* FUNCTION mulsu24x16_40.inc
;* Mutiply 24x16 -> 40 bit (signed * unsigned)
;*
;* AH AM AL BH BL E4 E3 E2 E1 E0
;* R22:R21:R20 x R17:R16 -> R6:R5:R4:R3:R2
;*
;* 37 cycles + 4 (RET) = 41 Cycles
;*
;***********************************************************************************
mulsu_24x16:
push R10
clr R4
sub R10,R10 ; NULL
mul R20,R16 ; AL x BL (u*u)
movw R3:R2,R1:R0
mulsu R22,R17 ; AH x BH (s*u)
mov R5,R0
mov R6,R1
mul R21,R16 ; AM x BL (u*u)
add R3,R0
adc R4,R1
adc R5,R10
adc R6,R10
mulsu R22,R16 ; AH x BL (s*u)
sbc R6,R10
add R4,R0
adc R5,R1
adc R6,R10
mul R20,R17 ; AL x BH (u*u)
add R3,R0
adc R4,R1
adc R5,R10
adc R6,R10
mul R21,R17 ; AM x BH (u*u)
add R4,R0
adc R5,R1
adc R6,R10
pop R10
ret
mulsu_24x24_48
;***********************************************************************************
;*
;* FUNCTION mulsu24x24_48.inc
;* Mutiply 24x24 -> 48 bit (signed * unsigned)
;*
;* AH AM AL BH BM BL E5 E4 E3 E2 E1 E0
;* R22:R21:R20 x R18:R17:R16 -> R7:R6:R5:R4:R3:R2
;*
;* 56 cycles + 4 (RET) = 60 Cycles
;*
;***********************************************************************************
mulsu_24x24:
push R8
push R9
push R10
sub R10,R10 ; NULL
mul R20,R16 ; AL x BL (u*u)
movw R3:R2,R1:R0
mul R21,R17 ; AM x BM (u*u)
movw R5:R4,R1:R0
mulsu R22,R17 ; AH x BH (s*u)
movw R7:R6,R1:R0
mulsu R22,R17 ; AH x BM (s*u)
sbc R7,R10 ; signed DUMMY
movw R9:R8,R1:R0
mul R21,R16 ; AM x BL (u*u)
add R3,R0
adc R4,R1
adc R5,R8
adc R6,R9
adc R7,R10
mul R21,R18 ; AM x BH (u*u)
movw R9:R8,R1:R0
mul R20,R17 ; AL x BM (u*u)
add R3,R0
adc R4,R1
adc R5,R8
adc R6,R9
adc R7,R10
mulsu R22,R16 ; AH x BL (s*u)
sbc R6,R10 ; signed DUMMY
sbc R7,R10 ; signed DUMMY
add R4,R0
adc R5,R1
adc R6,R10
adc R7,R10
mul R20,R18 ; AL x BH (u*u)
add R4,R0
adc R5,R1
adc R6,R10
adc R7,R10
pop R10
pop R9
pop R8
ret
mulsu_32x8_40
;***********************************************************************************
;*
;* FUNCTION mulsu32x8_40.inc
;* Mutiply 32x8 -> 40 bit (signed * unsigned)
;*
;* AH AM AL AXL BL E4 E3 E2 E1 E0
;* R23:R22:R21:R20 x R16 -> R6:R5:R4:R3:R2
;*
;* 25 cycles + 4 (RET) = 29 Cycles
;*
;***********************************************************************************
mulsu_32x8:
push R10
clr R10
clr r4
mul R20,R16 ; AXL x BL
movw R3:R2,R1:R0 ; r1:r0 -> E1:E0
mulsu R23,R16 ; AH x BL
mov R5,R0 ; r0 -> E0
mov R6,R1 ; r1 -> E0
mul R21,R16 ; AL x BL
add R3,R0 ; R0 -> E1
adc R4,R1 ; R1 -> E2
adc R5,R10 ; T0 -> E3
adc R6,R10 ; T0 -> E4
mul R22,R16 ; AM x BL
add R4,R0 ; R0 -> E2
adc R5,R1 ; R1 -> E3
adc R6,R10 ; T0 -> E4
pop R10
ret
mulsu_32x16_48
;***********************************************************************************
;*
;* FUNCTION mulsu32x16_48.inc
;* Mutiply 32x16 -> 48 bit (signed * unsigned)
;*
;* AH AM AL AXL BH BL E5 E4 E3 E2 E1 E0
;* R23:R22:R21:R20 x R17:R16 -> R7:R6:R5:R4:R3:R2
;*
;* 50 cycles + 4 (RET) = 54 Cycles
;*
;***********************************************************************************
mulsu_32x16:
push R8
push R9
push R10
clr R10
mul R20,R16 ; AXL x BL
movw R3:R2,R1:R0 ; r1:r0 -> E1:E0
mul R22,R16 ; AM x BL
movw R5:R4,R1:R0 ; r1:r0 -> E3:E2
mulsu R23,R17 ; AH x BH
movw R7:R6,R1:R0 ; r1:r0 -> E5:E4
mulsu R23,R16 ; AH x BL
movw R9:R8,R1:R0 ; r1:r0 -> T4:T3
sbc R7,R10 ; signed DUMMY T5
mul R21,R16 ; AL x BL
add R3,R0 ; r0 ----> + E1
adc R4,R1 ; r1 +c -> + E2
adc R5,R8 ; T3 +c -> + E3
adc R6,R9 ; T4 +c -> + E4
adc R7,R10 ; T5 +c -> + E5
mul R17,R20 ; AXL x BH -> BH x AXL
add R3,R0 ; r0 ----> + E1
adc R4,R1 ; r1 +c -> + E2
adc R5,R10 ; T5 +c -> + E3
adc R6,R10 ; T5 +c -> + E4
adc R7,R10 ; T5 +c -> + E5
mul R17,R21 ; AL x BH -> BH x AL
add R4,R0 ; r0 +c -> + E2
adc R5,R1 ; r1 +c -> + E3
adc R6,R10 ; T5 +c -> + E4
adc R7,R10 ; T5 +c -> + E5
mul R17,R22 ; BH x AM
add R5,R0 ; r0 +c -> + E3
adc R6,R1 ; r1 +c -> + E4
adc R7,R10 ; T5 +c -> + E5
pop R10
pop R9
pop R8
ret
mulsu_32x24_56
;***********************************************************************************
;*
;* FUNCTION mulsu32x24_56.inc
;* Mutiply 32x24 -> 56 bit (signed * unsigned)
;*
;* AH AM AL AXL BH BM BL E6 E5 E4 E3 E2 E1 E0
;* R23:R22:R21:R20 x R18:R17:R16 -> R8:R7:R6:R5:R4:R3:R2
;*
;* 74 cycles + 4 (RET) = 78 Cycles
;*
;***********************************************************************************
mulsu_32x24:
push R9
push R10
push R11
sub R9,R9
clr R6
mul R20,R16 ; AXL x BL (u*u)
movw R3:R2,R1:R0
mul R22,R16 ; AM x BL (u*u)
movw R5:R4,R1:R0
mulsu R23,R18 ; AH x BH (s*u)
mov R7,R0
mov R8,R1
mul R21,R16 ; AL x BL (u*u)
movw R11:R10,R1:R0
mulsu R23,R16 ; AH x BL (s*u)
sbc R7,R9 ; signed DUMMY
sbc R8,R9 ; signed DUMMY
add R3,R10
adc R4,R11
adc R5,R0
adc R6,R1
adc R7,R9
adc R8,R9
mul R20,R17 ; AXL x BM (u*u)
movw R11:R10,R1:R0
mul R21,R18 ; AL x BH (u*u)
add R3,R10
adc R4,R11
adc R5,R0
adc R6,R1
adc R7,R9
adc R8,R9
mul R21,R17 ; AL x BM (u*u)
movw R11:R10,R1:R0
mulsu R23,R17 ; AH x BM (s*u)
sbc R8,R9 ; signed DUMMY
add R4,R10
adc R5,R11
adc R6,R0
adc R7,R1
adc R8,R9
mul R22,R17 ; AM*BM (u*u)
add R5,R0
adc R6,R1
adc R7,R9
adc R8,R9
mul R20,R18 ; AXL x BH (u*u)
movw R11:R10,R1:R0
mul R22,R18 ; AM x BH (u*u)
add R4,R10
adc R5,R11
adc R6,R0
adc R7,R1
adc R8,R9
pop R11
pop R10
pop R9
ret
mulsu_32x32_64
;***********************************************************************************
;*
;* FUNCTION mulsu32x32_64.inc
;* Mutiply 32x32 -> 64 bit (signed * unsigned)
;*
;* AH AM AL AXL BH BM BL BXL E7 E6 E5 E4 E3 E2 E1 E0
;* R23:R22:R21:R20 x R19:R18:R17:R16 -> R9:R8:R7:R6:R5:R4:R3:R2
;*
;* 102 cycles + 4 (RET) = 106 Cycles
;*
;***********************************************************************************
mulsu_32x32:
push R10
push R11
push R12
push R13
push R14
sub R14,R14 ; NULL
mul R20,R16 ; AXL x BXL (u*u)
movw R3:R2,R1:R0
mul R21,R17 ; AL x BL (u*u)
movw R5:R4,R1:R0
mul R22,R18 ; AM x BM (u*u)
movw R7:R6,R1:R0
mulsu R23,R19 ; AH x BH (s*u)
movw R9:R8,R1:R0
mul R20,R17 ; AXL x BL (u*u)
movw R11:R10,R1:R0
mul R21,R18 ; AL x BM (u*u)
movw R13:R12,R1:R0
mul R22,R19 ; AM x BH (u*u)
add R3,R10
adc R4,R11
adc R5,R12
adc R6,R13
adc R7,R0
adc R8,R1
adc R9,R14
mul R21,R16 ; AL x BXL (u*u)
movw R11:R10,R1:R0
mul R22,R17 ; AM x BL (u*u)
movw R13:R12,R1:R0
mulsu R23,R18 ; AH x BM (s*u)
sbc R9,R14
add R3,R10
adc R4,R11
adc R5,R12
adc R6,R13
adc R7,R0
adc R8,R1
adc R9,R14
mul R20,R18 ; AXL x BM (u*u)
movw R11:R10,R1:R0
mul R21,R19 ; AL x BH (u*u)
add R4,R10
adc R5,R11
adc R6,R0
adc R7,R1
adc R8,R14
adc R9,R14
mul R22,R16 ; AM x BXL (u*u)
movw R11:R10,R1:R0
mulsu R23,R17 ; AH x BL (s*u)
sbc R8,R14 ; signed DUMMY
sbc R9,R14 ; signed DUMMY
add R4,R10
adc R5,R11
adc R6,R0
adc R7,R1
adc R8,R14
adc R9,R14
mulsu R23,R16 ; AH*BXL (s*u)
sbc R7,R14
sbc R8,R14
sbc R9,R14
add R5,R0
adc R6,R1
adc R7,R14
adc R8,R14
adc R9,R14
mul R20,R19 ; AXL x BH (u*u)
add R5,R0
adc R6,R1
adc R7,R14
adc R8,R14
adc R9,R14
pop R14
pop R13
pop R12
pop R11
pop R10
ret
signed x signed
muls_16x8_24
;******************************************************************************
;* FUNCTION muls16x8_24.inc
;* Mutiply 16x8 -> 24 bit (signed * signed)
;*
;* AH AL BL E2 E1 E0
;* R21:R20 x R16 -> R4:R3:R2
;*
;* Scrached Register: -
;* 11 cycles + 4 (RET) = 15 Cycles
;*
;******************************************************************************
muls_16x8:
push R10
sub R10,R10 ; NULL
sub R4,R4
mulsu R16,R20 ; BL*AL (s*u)
sbc R4,R10
movw R3:R2,R1:R0
muls R21,R16 ; AH*BL (s*s)
add R3,R0
adc R4,R1
pop R10
ret
muls_16x16_32
;******************************************************************************
;* FUNCTION muls16x16_32.inc
;* Mutiply 16x16 -> 32 bit (signed * signed)
;*
;* AH AL BH BL E3 E2 E1 E0
;* R21:R20 x R17:R16 -> R5:R4:R3:R2
;*
;* Scrached Register: -
;* 13 cycles + 4 (RET) = 17 Cycles
;*
;******************************************************************************
muls_16x16:
push R10
clr R10
muls R21,R17 ; (signed)ah * (signed)bh
movw R5:R4,R1:R0
mul R20,R16 ; al * bl
movw R3:R2,R1:R0
mulsu R21, R16 ; (signed)ah * bl
sbc R5, R10
add R3, R0
adc R4, R1
adc R5, R10
mulsu R17, R20 ; (signed)bh * al
sbc R5, R10
add R3, R0
adc R4, R1
adc R5, R10
pop R10
ret
muls_24x8_32
;******************************************************************************
;* FUNCTION muls24x8_32.inc
;* Mutiply 24x8 -> 32 bit (signed * signed)
;*
;* AH AM AL BL E3 E2 E1 E0
;* R22:R21:R20 x R16 -> R5:R4:R3:R2
;*
;* Scrached Register: -
;* 19 cycles + 4 (RET) = 23 Cycles
;*
;******************************************************************************
muls_24x8:
push R10
clr R10
mulsu R16,R20 ; AL x BL -> BL x AL
movw R3:R2,R1:R0 ; r1:r0 -> E1:E0
muls R22,R16 ; AH x BL
movw R5:R4,R1:R0 ; r1:r0 -> E3:E2
mul R21,R16 ; AM x BL
add R3,R0 ; R0 -> E1
adc R4,R1 ; R1 -> E2
adc R5,R10 ; T0 -> E3
pop R10
ret
muls_24x16_40
;******************************************************************************
;* FUNCTION muls24x16_40.inc
;* Mutiply 24x16 -> 40 bit (signed * signed)
;*
;* AH AM AL BH BL E4 E3 E2 E1 E0
;* R22:R21:R20 x R17:R16 -> R6 R5:R4:R3:R2
;*
;* Scrached Register: -
;* 38 cycles + 4 (RET) = 42 Cycles
;*
;******************************************************************************
muls_24x16:
push R10
clr R10
mul R20,R16 ; AL x BL
movw R3:R2,R1:R0 ; r1:r0 -> E1:E0
muls R22,R17 ; AH x BH
mov R5,R0 ; r0 -> E3
mov R6,R1 ; r1 -> E4
clr R4 ;clr -> E2
mul R21,R16 ; AM x BL
add R3,R0 ; r0 -> E1
adc R4,R1 ; r1 -> E2
mulsu R22,R16 ; AH x BL
sbc R6,R10 ; signed DUMMY T4
add R4,R0 ; r0 -> E2
adc R5,R1 ; r1 -> E3
adc R6,R10 ; T4 -> E4
mulsu R17,R20 ; AL x BH -> BH x AL
sbc R5,R10 ; signed DUMMY T3
sbc R6,R10 ; signed DUMMY T4
add R3,R0 ; r0 -> E1
adc R4,R1 ; r1 -> E2
adc R5,R10 ; T3 -> E3
adc R6,R10 ; T4 -> E4
mulsu R17,R21 ; AM x BH -> BH x AM
sbc R6,R10 ; signed DUMMY T4
add R4,R0 ; r0 -> E2
adc R5,R1 ; r1 -> E3
adc R6,R10 ; T4 -> E4
pop R10
ret
muls_24x24_48
;***********************************************************************************
;*
;* FUNCTION muls24x24_48.inc
;* Mutiply 24x24 -> 48 bit (signed * signed)
;*
;* AH AM AL BH BM BL E5 E4 E3 E2 E1 E0
;* R22:R21:R20 x R18:R17:R16 -> R7:R6:R5:R4:R3:R2
;*
;* 59 cycles + 4 (RET) = 63 Cycles
;*
;***********************************************************************************
muls_24x24:
push R8
push R9
push R10
clr R10
mul R20,R16 ; AL x BL
movw R3:R2,R1:R0 ; r1:r0 -> E1:E0
mul R21,R17 ; AM x BM
movw R5:R4,R1:R0 ; r1:r0 -> E3:E2
muls R22,R17 ; AH x BH
movw R7:R6,R1:R0 ; r1:r0 -> E5:E4
mulsu R22,R17 ; AH x BM
sbc R7,R10 ; signed DUMMY T2
movw R9:R8,R1:R0 ; r1:r0 -> T1:T0
mul R21,R16 ; AM x BL
add R3,R0 ; R0 -> E1
adc R4,R1 ; R1 -> E2
adc R5,R8 ; T0 -> E3
adc R6,R9 ; T1 -> E4
adc R7,R10 ; T2 -> E5
mulsu R18,R21 ; AM x BH -> BH x AM
sbc R7,R10 ; signed DUMMY T2
movw R9:R8,R1:R0 ; r1:r0 -> T1:T0
mul R20,R17 ; AL x BM
add R3,R0 ; R0 -> E1
adc R4,R1 ; R1 -> E2
adc R5,R8 ; T0 -> E3
adc R6,R9 ; T1 -> E4
adc R7,R10 ; T2 -> E5
mulsu R22,R16 ; AH x BL
sbc R6,R10 ; signed DUMMY T2
sbc R7,R10 ; signed DUMMY T2
add R4,R0 ; R0 -> E2
adc R5,R1 ; R1 -> E3
adc R6,R10 ; T2 -> E4
adc R7,R10 ; T2 -> E5
mulsu R18,R20 ; AL x BH -> BH x AL
sbc R6,R10 ; signed DUMMY T2
sbc R7,R10 ; signed DUMMY T2
add R4,R0 ; R0 -> E2
adc R5,R1 ; R1 -> E3
adc R6,R10 ; T2 -> E4
adc R7,R10 ; T2 -> E5
pop R10
pop R9
pop R8
ret
muls_32x8_40
;***********************************************************************************
;*
;* FUNCTION muls32x8_40.inc
;* Mutiply 32x8 -> 40 bit (signed * signed)
;*
;* AH AM AL AXL BL E4 E3 E2 E1 E0
;* R23:R22:R21:R20 x R16 -> R6:R5:R4:R3:R2
;*
;* 34 cycles + 4 (RET) = 38 Cycles
;*
;***********************************************************************************
muls_32x8:
push R10
clr R4
sub R10,R10 ; NULL
muls R23,R16 ; AH x BL (s*s)
mov R5,R0
mov R6,R1
mulsu R16,R20 ; BL x AXL (s*u)
sbc R4,R10
sbc R5,R10
sbc R6,R10
movw R3:R2,R1:R0
adc R4,R10
adc R5,R10
adc R6,R10
mulsu R16,R21 ; BL x AL (s*u)
sbc R5,R10 ; signed DUMMY
sbc R6,R10 ; signed DUMMY
add R3,R0
adc R4,R1
adc R5,R10
adc R6,R10
mulsu R16,R22 ; BL x AM (s*u)
sbc R6,R10 ; signed DUMMY
add R4,R0
adc R5,R1
adc R6,R10
pop R10
ret
muls_32x16_48
;***********************************************************************************
;*
;* FUNCTION muls32x16_48.inc
;* Mutiply 32x16 -> 48 bit (signed * signed)
;*
;* AH AM AL AXL BH BL E5 E4 E3 E2 E1 E0
;* R23:R22:R21:R20 x R17:R16 -> R7:R6:R5:R4:R3:R2
;*
;* 56 cycles + 4 (RET) = 60 Cycles
;*
;***********************************************************************************
muls_32x16:
push R8
push R9
push R10
clr R10
mul R20,R16 ; AXL x BL
movw R3:R2,R1:R0 ; r1:r0 -> E1:E0
mul R22,R16 ; AM x BL
movw R5:R4,R1:R0 ; r1:r0 -> E3:E2
muls R23,R17 ; AH x BH
movw R7:R6,R1:R0 ; r1:r0 -> E5:E4
mulsu R23,R16 ; AH x BL
movw R9:R8,R1:R0 ; r1:r0 -> T4:T3
sbc R7,R10 ; signed DUMMY T5
mul R21,R16 ; AL x BL
add R3,R0 ; r0 ----> + E1
adc R4,R1 ; r1 +c -> + E2
adc R5,R8 ; T3 +c -> + E3
adc R6,R9 ; T4 +c -> + E4
adc R7,R10 ; T5 +c -> + E5
mulsu R17,R20 ; AXL x BH -> BH x AXL
sbc R5,R10 ; signed DUMMY T5
sbc R6,R10 ; signed DUMMY T5
sbc R7,R10 ; signed DUMMY T5
add R3,R0 ; r0 ----> + E1
adc R4,R1 ; r1 +c -> + E2
adc R5,R10 ; T5 +c -> + E3
adc R6,R10 ; T5 +c -> + E4
adc R7,R10 ; T5 +c -> + E5
mulsu R17,R21 ; AL x BH -> BH x AL
sbc R6,R10 ; signed DUMMY T5
sbc R7,R10 ; signed DUMMY T5
add R4,R0 ; r0 +c -> + E2
adc R5,R1 ; r1 +c -> + E3
adc R6,R10 ; T5 +c -> + E4
adc R7,R10 ; T5 +c -> + E5
mulsu R17,R22 ; BH x AM
sbc R7,R10 ; signed DUMMY T5
add R5,R0 ; r0 +c -> + E3
adc R6,R1 ; r1 +c -> + E4
adc R7,R10 ; T5 +c -> + E5
pop R10
pop R9
pop R8
ret
muls_32x24_56
;***********************************************************************************
;*
;* FUNCTION muls32x24_56.inc
;* Mutiply 32x24 -> 56 bit (signed * signed)
;*
;* AH AM AL AXL BH BM BL E6 E5 E4 E3 E2 E1 E0
;* R23:R22:R21:R20 x R18:R17:R16 -> R8:R7:R6:R5:R4:R3:R2
;*
;* 82 cycles + 4 (RET) = 86 Cycles
;*
;***********************************************************************************
muls_32x24:
push R9
push R10
push R11
sub R9,R9
sub R6,R6 ; NULL
mul R20,R16 ; AXL*BL (u*u)
movw R3:R2,R1:R0
mul R22,R16 ; AM*BL (u*u)
movw R5:R4,R1:R0
muls R23,R18 ; AH*BH (s*s)
mov R7,R0
mov R8,R1
mul R21,R16 ; AL*BL (u*u)
movw R11:R10,R1:R0
mulsu R23,R16 ; AH*BL (s*u)
sbc R7,R9
sbc R8,R9
add R3,R10
adc R4,R11
adc R5,R0
adc R6,R1
adc R7,R9
adc R8,R9
mul R20,R17 ; AXL*BM (u*u)
movw R11:R10,R1:R0
mulsu R18,R21 ; BH*AL (s*u)
sbc R7,R9
sbc R8,R9
add R3,R10
adc R4,R11
adc R5,R0
adc R6,R1
adc R7,R9
adc R8,R9
mul R21,R17 ; AL*BM (u*u)
movw R11:R10,R1:R0
mulsu R23,R17 ; AH*BM (s*u)
sbc R8,R9
add R4,R10
adc R5,R11
adc R6,R0
adc R7,R1
adc R8,R9
mul R22,R17 ; AM*BM (u*u)
add R5,R0
adc R6,R1
adc R7,R9
adc R8,R9
mulsu R18,R20 ; BH*AXL (s*u)
sbc R6,R9
sbc R7,R9
sbc R8,R9
add R4,R0
adc R5,R1
adc R6,R9
adc R7,R9
adc R8,R9
mulsu R18,R22 ; BH*AM (s*u)
sbc R8,R9
add R6,R0
adc R7,R1
adc R8,R9
pop R11
pop R10
pop R9
ret
muls_32x32_64
;***********************************************************************************
;*
;* FUNCTION muls32x32_64.inc
;* Mutiply 32x32 -> 64 bit (signed * signed)
;*
;* AH AM AL AXL BH BM BL BXL E7 E6 E5 E4 E3 E2 E1 E0
;* R23:R22:R21:R20 x R19:R18:R17:R16 -> R9:R8:R7:R6:R5:R4:R3:R2
;*
;* 108 cycles + 4 (RET) = 112 Cycles
;*
;***********************************************************************************
muls_32x32:
push R10
push R11
push r12
push r13
push r14
clr R14
mul R20,R16 ; AXL x BXL
movw R3:R2,R1:R0 ; r1:r0 -> E1:E0
mul R21,R17 ; AL x BL
movw R5:R4,R1:R0 ; r1:r0 -> E3:E2
mul R22,R18 ; AM x BM
movw R7:R6,R1:R0 ; r1:r0 -> E5:E4
muls R23,R19 ; AH x BH
movw R9:R8,R1:R0 ; r1:r0 -> E7:E6
mulsu R23,R18 ; AH x BM
movw R13:R12,R1:R0 ; r1:r0 -> T5:T4
sbc R9,R14 ; signed DUMMY T6
mul R22,R17 ; AM x BL
movw R11:R10,R1:R0 ; r1:r0 -> T3:T2
mul R21,R16 ; AL x BXL
add R3,R0 ; r0 ----> + E1
adc R4,R1 ; r1 +c -> + E2
adc R5,R10 ; T2 +c -> + E3
adc R6,R11 ; T3 +c -> + E4
adc R7,R12 ; T4 +c -> + E5
adc R8,R13 ; T5 +c -> + E6
adc R9,R14 ; T6 +c -> + E7
mulsu R19,R22 ; AM x BH -> BH x AM
movw R13:R12,R1:R0 ; r1:r0 -> T5:T4
sbc R9,R14 ; signed DUMMY T6
mul R21,R18 ; AL x BM
movw R11:R10,R1:R0 ; r1:r0 -> T3:T2
mul R20,R17 ; AXL x BL
add R3,R0 ; r0 ----> + E1
adc R4,R1 ; r1 +c -> + E2
adc R5,R10 ; T2 +c -> + E3
adc R6,R11 ; T3 +c -> + E4
adc R7,R12 ; T4 +c -> + E5
adc R8,R13 ; T5 +c -> + E6
adc R9,R14 ; T6 +c -> + E7
mulsu R23,R17 ; AH x BL
movw R11:R10,R1:R0 ; r1:r0 -> T3:T2
sbc R8,R14 ; signed DUMMY T6
sbc R9,R14 ; signed DUMMY T6
mul R20,R18 ; AXL x BM
add R4,R0 ; r0 +c -> + E2
adc R5,R1 ; r1 +c -> + E3
adc R6,R10 ; T3 +c -> + E4
adc R7,R11 ; T4 +c -> + E5
adc R8,R14 ; T5 +c -> + E6
adc R9,R14 ; T6 +c -> + E7
mulsu R19,R21 ; AL x BH -> BH x AL
movw R11:R10,R1:R0 ; r1:r0 -> T3:T2
sbc R8,R14 ; signed DUMMY T6
sbc R9,R14 ; signed DUMMY T6
mul R22,R16 ; AM x BXL
add R4,R0 ; r0 +c -> + E2
adc R5,R1 ; r1 +c -> + E3
adc R6,R10 ; T3 +c -> + E4
adc R7,R11 ; T4 +c -> + E5
adc R8,R14 ; T5 +c -> + E6
adc R9,R14 ; T6 +c -> + E7
mulsu R19,R20 ; AXL x BH -> BH x AXL
sbc R7,R14 ; signed DUMMY T6
sbc R8,R14 ; signed DUMMY T6
sbc R9,R14 ; signed DUMMY T6
add R5,R0 ; r0 +c -> + E3
adc R6,R1 ; r1 +c -> + E4
adc R7,R14 ; T6 +c -> + E5
adc R8,R14 ; T6 +c -> + E6
adc R9,R14 ; T6 +c -> + E7
mulsu R23,R16 ; AH x BXL
sbc R7,R14 ; signed DUMMY T6
sbc R8,R14 ; signed DUMMY T6
sbc R9,R14 ; signed DUMMY T6
add R5,R0 ; r0 +c -> + E3
adc R6,R1 ; r1 +c -> + E4
adc R7,R14 ; T6 +c -> + E5
adc R8,R14 ; T6 +c -> + E6
adc R9,R14 ; T6 +c -> + E7
pop R14
pop R13
pop R12
pop R11
pop R10
ret
Download
SOFTWARE VERSION --> Datei:
HARDWARE VERSION --> Datei:MULT ASM.zip