From 947716c1872d210828122212d076d503ae68b928 Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Thu, 1 Sep 2016 10:39:15 +0200 Subject: MIPS assembly pack: adapt it for MIPS[32|64]R6. MIPS[32|64]R6 is binary and source incompatible with previous MIPS ISA specifications. Fortunately it's still possible to resolve differences in source code with standard pre-processor and switching to trap-free version of addition and subtraction instructions. Reviewed-by: Richard Levitte --- crypto/bn/asm/mips-mont.pl | 102 ++++--- crypto/bn/asm/mips.pl | 742 +++++++++++++++++++++++---------------------- crypto/bn/build.info | 16 +- 3 files changed, 436 insertions(+), 424 deletions(-) (limited to 'crypto/bn') diff --git a/crypto/bn/asm/mips-mont.pl b/crypto/bn/asm/mips-mont.pl index a907571bec..56d4202824 100644 --- a/crypto/bn/asm/mips-mont.pl +++ b/crypto/bn/asm/mips-mont.pl @@ -56,14 +56,14 @@ $flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64 if ($flavour =~ /64|n32/i) { - $PTR_ADD="dadd"; # incidentally works even on n32 - $PTR_SUB="dsub"; # incidentally works even on n32 + $PTR_ADD="daddu"; # incidentally works even on n32 + $PTR_SUB="dsubu"; # incidentally works even on n32 $REG_S="sd"; $REG_L="ld"; $SZREG=8; } else { - $PTR_ADD="add"; - $PTR_SUB="sub"; + $PTR_ADD="addu"; + $PTR_SUB="subu"; $REG_S="sw"; $REG_L="lw"; $SZREG=4; @@ -121,6 +121,8 @@ $m1=$s11; $FRAMESIZE=14; $code=<<___; +#include "mips_arch.h" + .text .set noat @@ -183,27 +185,27 @@ $code.=<<___; $PTR_SUB $sp,$num and $sp,$at - $MULTU $aj,$bi - $LD $alo,$BNSZ($ap) - $LD $nlo,$BNSZ($np) - mflo $lo0 - mfhi $hi0 - $MULTU $lo0,$n0 - mflo $m1 - - $MULTU $alo,$bi - mflo $alo - mfhi $ahi - - $MULTU $nj,$m1 - mflo $lo1 - mfhi $hi1 - $MULTU $nlo,$m1 + $MULTU ($aj,$bi) + $LD $ahi,$BNSZ($ap) + $LD $nhi,$BNSZ($np) + mflo ($lo0,$aj,$bi) + mfhi ($hi0,$aj,$bi) + $MULTU ($lo0,$n0) + mflo ($m1,$lo0,$n0) + + $MULTU ($ahi,$bi) + mflo ($alo,$ahi,$bi) + mfhi ($ahi,$ahi,$bi) + + $MULTU ($nj,$m1) + mflo ($lo1,$nj,$m1) + mfhi ($hi1,$nj,$m1) + $MULTU ($nhi,$m1) $ADDU $lo1,$lo0 sltu $at,$lo1,$lo0 $ADDU $hi1,$at - mflo $nlo - mfhi $nhi + mflo ($nlo,$nhi,$m1) + mfhi ($nhi,$nhi,$m1) move $tp,$sp li $j,2*$BNSZ @@ -215,25 +217,25 @@ $code.=<<___; $LD $aj,($aj) $LD $nj,($nj) - $MULTU $aj,$bi + $MULTU ($aj,$bi) $ADDU $lo0,$alo,$hi0 $ADDU $lo1,$nlo,$hi1 sltu $at,$lo0,$hi0 sltu $t0,$lo1,$hi1 $ADDU $hi0,$ahi,$at $ADDU $hi1,$nhi,$t0 - mflo $alo - mfhi $ahi + mflo ($alo,$aj,$bi) + mfhi ($ahi,$aj,$bi) $ADDU $lo1,$lo0 sltu $at,$lo1,$lo0 - $MULTU $nj,$m1 + $MULTU ($nj,$m1) $ADDU $hi1,$at addu $j,$BNSZ $ST $lo1,($tp) sltu $t0,$j,$num - mflo $nlo - mfhi $nhi + mflo ($nlo,$nj,$m1) + mfhi ($nhi,$nj,$m1) bnez $t0,.L1st $PTR_ADD $tp,$BNSZ @@ -263,34 +265,34 @@ $code.=<<___; $PTR_ADD $bi,$bp,$i $LD $bi,($bi) $LD $aj,($ap) - $LD $alo,$BNSZ($ap) + $LD $ahi,$BNSZ($ap) $LD $tj,($sp) - $MULTU $aj,$bi + $MULTU ($aj,$bi) $LD $nj,($np) - $LD $nlo,$BNSZ($np) - mflo $lo0 - mfhi $hi0 + $LD $nhi,$BNSZ($np) + mflo ($lo0,$aj,$bi) + mfhi ($hi0,$aj,$bi) $ADDU $lo0,$tj - $MULTU $lo0,$n0 + $MULTU ($lo0,$n0) sltu $at,$lo0,$tj $ADDU $hi0,$at - mflo $m1 + mflo ($m1,$lo0,$n0) - $MULTU $alo,$bi - mflo $alo - mfhi $ahi + $MULTU ($ahi,$bi) + mflo ($alo,$ahi,$bi) + mfhi ($ahi,$ahi,$bi) - $MULTU $nj,$m1 - mflo $lo1 - mfhi $hi1 + $MULTU ($nj,$m1) + mflo ($lo1,$nj,$m1) + mfhi ($hi1,$nj,$m1) - $MULTU $nlo,$m1 + $MULTU ($nhi,$m1) $ADDU $lo1,$lo0 sltu $at,$lo1,$lo0 $ADDU $hi1,$at - mflo $nlo - mfhi $nhi + mflo ($nlo,$nhi,$m1) + mfhi ($nhi,$nhi,$m1) move $tp,$sp li $j,2*$BNSZ @@ -303,19 +305,19 @@ $code.=<<___; $LD $aj,($aj) $LD $nj,($nj) - $MULTU $aj,$bi + $MULTU ($aj,$bi) $ADDU $lo0,$alo,$hi0 $ADDU $lo1,$nlo,$hi1 sltu $at,$lo0,$hi0 sltu $t0,$lo1,$hi1 $ADDU $hi0,$ahi,$at $ADDU $hi1,$nhi,$t0 - mflo $alo - mfhi $ahi + mflo ($alo,$aj,$bi) + mfhi ($ahi,$aj,$bi) $ADDU $lo0,$tj addu $j,$BNSZ - $MULTU $nj,$m1 + $MULTU ($nj,$m1) sltu $at,$lo0,$tj $ADDU $lo1,$lo0 $ADDU $hi0,$at @@ -323,8 +325,8 @@ $code.=<<___; $LD $tj,2*$BNSZ($tp) $ADDU $hi1,$t0 sltu $at,$j,$num - mflo $nlo - mfhi $nhi + mflo ($nlo,$nj,$m1) + mfhi ($nhi,$nj,$m1) $ST $lo1,($tp) bnez $at,.Linner $PTR_ADD $tp,$BNSZ diff --git a/crypto/bn/asm/mips.pl b/crypto/bn/asm/mips.pl index 420f01f3a4..102b656229 100644 --- a/crypto/bn/asm/mips.pl +++ b/crypto/bn/asm/mips.pl @@ -109,6 +109,22 @@ $gp=$v1 if ($flavour =~ /nubi/i); $minus4=$v1; $code.=<<___; +#include "mips_arch.h" + +#if defined(_MIPS_ARCH_MIPS64R6) +# define ddivu(rs,rt) +# define mfqt(rd,rs,rt) ddivu rd,rs,rt +# define mfrm(rd,rs,rt) dmodu rd,rs,rt +#elif defined(_MIPS_ARCH_MIPS32R6) +# define divu(rs,rt) +# define mfqt(rd,rs,rt) divu rd,rs,rt +# define mfrm(rd,rs,rt) modu rd,rs,rt +#else +# define $DIVU(rs,rt) $DIVU $zero,rs,rt +# define mfqt(rd,rs,rt) mflo rd +# define mfrm(rd,rs,rt) mfhi rd +#endif + .rdata .asciiz "mips3.s, Version 1.2" .asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov " @@ -151,7 +167,7 @@ $code.=<<___; .L_bn_mul_add_words_loop: $LD $t0,0($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) $LD $t1,0($a0) $LD $t2,$BNSZ($a1) $LD $t3,$BNSZ($a0) @@ -161,11 +177,11 @@ $code.=<<___; sltu $v0,$t1,$v0 # All manuals say it "compares 32-bit # values", but it seems to work fine # even on 64-bit registers. - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $t1,$at $ADDU $v0,$t0 - $MULTU $t2,$a3 + $MULTU ($t2,$a3) sltu $at,$t1,$at $ST $t1,0($a0) $ADDU $v0,$at @@ -174,11 +190,11 @@ $code.=<<___; $LD $ta3,3*$BNSZ($a0) $ADDU $t3,$v0 sltu $v0,$t3,$v0 - mflo $at - mfhi $t2 + mflo ($at,$t2,$a3) + mfhi ($t2,$t2,$a3) $ADDU $t3,$at $ADDU $v0,$t2 - $MULTU $ta0,$a3 + $MULTU ($ta0,$a3) sltu $at,$t3,$at $ST $t3,$BNSZ($a0) $ADDU $v0,$at @@ -188,11 +204,11 @@ $code.=<<___; $PTR_ADD $a1,4*$BNSZ $ADDU $ta1,$v0 sltu $v0,$ta1,$v0 - mflo $at - mfhi $ta0 + mflo ($at,$ta0,$a3) + mfhi ($ta0,$ta0,$a3) $ADDU $ta1,$at $ADDU $v0,$ta0 - $MULTU $ta2,$a3 + $MULTU ($ta2,$a3) sltu $at,$ta1,$at $ST $ta1,-2*$BNSZ($a0) $ADDU $v0,$at @@ -201,8 +217,8 @@ $code.=<<___; and $ta0,$a2,$minus4 $ADDU $ta3,$v0 sltu $v0,$ta3,$v0 - mflo $at - mfhi $ta2 + mflo ($at,$ta2,$a3) + mfhi ($ta2,$ta2,$a3) $ADDU $ta3,$at $ADDU $v0,$ta2 sltu $at,$ta3,$at @@ -217,13 +233,13 @@ $code.=<<___; .L_bn_mul_add_words_tail: .set reorder $LD $t0,0($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) $LD $t1,0($a0) subu $a2,1 $ADDU $t1,$v0 sltu $v0,$t1,$v0 - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $t1,$at $ADDU $v0,$t0 sltu $at,$t1,$at @@ -232,13 +248,13 @@ $code.=<<___; beqz $a2,.L_bn_mul_add_words_return $LD $t0,$BNSZ($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) $LD $t1,$BNSZ($a0) subu $a2,1 $ADDU $t1,$v0 sltu $v0,$t1,$v0 - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $t1,$at $ADDU $v0,$t0 sltu $at,$t1,$at @@ -247,12 +263,12 @@ $code.=<<___; beqz $a2,.L_bn_mul_add_words_return $LD $t0,2*$BNSZ($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) $LD $t1,2*$BNSZ($a0) $ADDU $t1,$v0 sltu $v0,$t1,$v0 - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $t1,$at $ADDU $v0,$t0 sltu $at,$t1,$at @@ -310,40 +326,40 @@ $code.=<<___; .L_bn_mul_words_loop: $LD $t0,0($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) $LD $t2,$BNSZ($a1) $LD $ta0,2*$BNSZ($a1) $LD $ta2,3*$BNSZ($a1) - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $v0,$at sltu $t1,$v0,$at - $MULTU $t2,$a3 + $MULTU ($t2,$a3) $ST $v0,0($a0) $ADDU $v0,$t1,$t0 subu $a2,4 $PTR_ADD $a0,4*$BNSZ $PTR_ADD $a1,4*$BNSZ - mflo $at - mfhi $t2 + mflo ($at,$t2,$a3) + mfhi ($t2,$t2,$a3) $ADDU $v0,$at sltu $t3,$v0,$at - $MULTU $ta0,$a3 + $MULTU ($ta0,$a3) $ST $v0,-3*$BNSZ($a0) $ADDU $v0,$t3,$t2 - mflo $at - mfhi $ta0 + mflo ($at,$ta0,$a3) + mfhi ($ta0,$ta0,$a3) $ADDU $v0,$at sltu $ta1,$v0,$at - $MULTU $ta2,$a3 + $MULTU ($ta2,$a3) $ST $v0,-2*$BNSZ($a0) $ADDU $v0,$ta1,$ta0 and $ta0,$a2,$minus4 - mflo $at - mfhi $ta2 + mflo ($at,$ta2,$a3) + mfhi ($ta2,$ta2,$a3) $ADDU $v0,$at sltu $ta3,$v0,$at $ST $v0,-$BNSZ($a0) @@ -357,10 +373,10 @@ $code.=<<___; .L_bn_mul_words_tail: .set reorder $LD $t0,0($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) subu $a2,1 - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $v0,$at sltu $t1,$v0,$at $ST $v0,0($a0) @@ -368,10 +384,10 @@ $code.=<<___; beqz $a2,.L_bn_mul_words_return $LD $t0,$BNSZ($a1) - $MULTU $t0,$a3 + $MULTU ($t0,$a3) subu $a2,1 - mflo $at - mfhi $t0 + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $v0,$at sltu $t1,$v0,$at $ST $v0,$BNSZ($a0) @@ -379,9 +395,9 @@ $code.=<<___; beqz $a2,.L_bn_mul_words_return $LD $t0,2*$BNSZ($a1) - $MULTU $t0,$a3 - mflo $at - mfhi $t0 + $MULTU ($t0,$a3) + mflo ($at,$t0,$a3) + mfhi ($t0,$t0,$a3) $ADDU $v0,$at sltu $t1,$v0,$at $ST $v0,2*$BNSZ($a0) @@ -438,35 +454,35 @@ $code.=<<___; .L_bn_sqr_words_loop: $LD $t0,0($a1) - $MULTU $t0,$t0 + $MULTU ($t0,$t0) $LD $t2,$BNSZ($a1) $LD $ta0,2*$BNSZ($a1) $LD $ta2,3*$BNSZ($a1) - mflo $t1 - mfhi $t0 + mflo ($t1,$t0,$t0) + mfhi ($t0,$t0,$t0) $ST $t1,0($a0) $ST $t0,$BNSZ($a0) - $MULTU $t2,$t2 + $MULTU ($t2,$t2) subu $a2,4 $PTR_ADD $a0,8*$BNSZ $PTR_ADD $a1,4*$BNSZ - mflo $t3 - mfhi $t2 + mflo ($t3,$t2,$t2) + mfhi ($t2,$t2,$t2) $ST $t3,-6*$BNSZ($a0) $ST $t2,-5*$BNSZ($a0) - $MULTU $ta0,$ta0 - mflo $ta1 - mfhi $ta0 + $MULTU ($ta0,$ta0) + mflo ($ta1,$ta0,$ta0) + mfhi ($ta0,$ta0,$ta0) $ST $ta1,-4*$BNSZ($a0) $ST $ta0,-3*$BNSZ($a0) - $MULTU $ta2,$ta2 + $MULTU ($ta2,$ta2) and $ta0,$a2,$minus4 - mflo $ta3 - mfhi $ta2 + mflo ($ta3,$ta2,$ta2) + mfhi ($ta2,$ta2,$ta2) $ST $ta3,-2*$BNSZ($a0) .set noreorder @@ -479,27 +495,27 @@ $code.=<<___; .L_bn_sqr_words_tail: .set reorder $LD $t0,0($a1) - $MULTU $t0,$t0 + $MULTU ($t0,$t0) subu $a2,1 - mflo $t1 - mfhi $t0 + mflo ($t1,$t0,$t0) + mfhi ($t0,$t0,$t0) $ST $t1,0($a0) $ST $t0,$BNSZ($a0) beqz $a2,.L_bn_sqr_words_return $LD $t0,$BNSZ($a1) - $MULTU $t0,$t0 + $MULTU ($t0,$t0) subu $a2,1 - mflo $t1 - mfhi $t0 + mflo ($t1,$t0,$t0) + mfhi ($t0,$t0,$t0) $ST $t1,2*$BNSZ($a0) $ST $t0,3*$BNSZ($a0) beqz $a2,.L_bn_sqr_words_return $LD $t0,2*$BNSZ($a1) - $MULTU $t0,$t0 - mflo $t1 - mfhi $t0 + $MULTU ($t0,$t0) + mflo ($t1,$t0,$t0) + mfhi ($t0,$t0,$t0) $ST $t1,4*$BNSZ($a0) $ST $t0,5*$BNSZ($a0) @@ -823,11 +839,11 @@ $code.=<<___; move $ta3,$ra bal bn_div_words_internal move $ra,$ta3 - $MULTU $ta2,$v0 + $MULTU ($ta2,$v0) $LD $t2,-2*$BNSZ($a3) move $ta0,$zero - mfhi $t1 - mflo $t0 + mfhi ($t1,$ta2,$v0) + mflo ($t0,$ta2,$v0) sltu $t8,$t1,$a1 .L_bn_div_3_words_inner_loop: bnez $t8,.L_bn_div_3_words_inner_loop_done @@ -930,15 +946,15 @@ $code.=<<___; $SRL $HH,$a0,4*$BNSZ # bits $SRL $QT,4*$BNSZ # q=0xffffffff beq $DH,$HH,.L_bn_div_words_skip_div1 - $DIVU $zero,$a0,$DH - mflo $QT + $DIVU ($a0,$DH) + mfqt ($QT,$a0,$DH) .L_bn_div_words_skip_div1: - $MULTU $a2,$QT + $MULTU ($a2,$QT) $SLL $t3,$a0,4*$BNSZ # bits $SRL $at,$a1,4*$BNSZ # bits or $t3,$at - mflo $t0 - mfhi $t1 + mflo ($t0,$a2,$QT) + mfhi ($t1,$a2,$QT) .L_bn_div_words_inner_loop1: sltu $t2,$t3,$t0 seq $t8,$HH,$t1 @@ -963,15 +979,15 @@ $code.=<<___; $SRL $HH,$a0,4*$BNSZ # bits $SRL $QT,4*$BNSZ # q=0xffffffff beq $DH,$HH,.L_bn_div_words_skip_div2 - $DIVU $zero,$a0,$DH - mflo $QT + $DIVU ($a0,$DH) + mfqt ($QT,$a0,$DH) .L_bn_div_words_skip_div2: - $MULTU $a2,$QT + $MULTU ($a2,$QT) $SLL $t3,$a0,4*$BNSZ # bits $SRL $at,$a1,4*$BNSZ # bits or $t3,$at - mflo $t0 - mfhi $t1 + mflo ($t0,$a2,$QT) + mfhi ($t1,$a2,$QT) .L_bn_div_words_inner_loop2: sltu $t2,$t3,$t0 seq $t8,$HH,$t1 @@ -1070,592 +1086,592 @@ $code.=<<___; $LD $b_0,0($a2) $LD $a_1,$BNSZ($a1) $LD $a_2,2*$BNSZ($a1) - $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3); + $MULTU ($a_0,$b_0) # mul_add_c(a[0],b[0],c1,c2,c3); $LD $a_3,3*$BNSZ($a1) $LD $b_1,$BNSZ($a2) $LD $b_2,2*$BNSZ($a2) $LD $b_3,3*$BNSZ($a2) - mflo $c_1 - mfhi $c_2 + mflo ($c_1,$a_0,$b_0) + mfhi ($c_2,$a_0,$b_0) $LD $a_4,4*$BNSZ($a1) $LD $a_5,5*$BNSZ($a1) - $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1); + $MULTU ($a_0,$b_1) # mul_add_c(a[0],b[1],c2,c3,c1); $LD $a_6,6*$BNSZ($a1) $LD $a_7,7*$BNSZ($a1) $LD $b_4,4*$BNSZ($a2) $LD $b_5,5*$BNSZ($a2) - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_1) + mfhi ($t_2,$a_0,$b_1) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1); + $MULTU ($a_1,$b_0) # mul_add_c(a[1],b[0],c2,c3,c1); $ADDU $c_3,$t_2,$at $LD $b_6,6*$BNSZ($a2) $LD $b_7,7*$BNSZ($a2) $ST $c_1,0($a0) # r[0]=c1; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_0) + mfhi ($t_2,$a_1,$b_0) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2); + $MULTU ($a_2,$b_0) # mul_add_c(a[2],b[0],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 $ST $c_2,$BNSZ($a0) # r[1]=c2; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_0) + mfhi ($t_2,$a_2,$b_0) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2); + $MULTU ($a_1,$b_1) # mul_add_c(a[1],b[1],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_1) + mfhi ($t_2,$a_1,$b_1) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2); + $MULTU ($a_0,$b_2) # mul_add_c(a[0],b[2],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_2) + mfhi ($t_2,$a_0,$b_2) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3); + $MULTU ($a_0,$b_3) # mul_add_c(a[0],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,2*$BNSZ($a0) # r[2]=c3; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_3) + mfhi ($t_2,$a_0,$b_3) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3); + $MULTU ($a_1,$b_2) # mul_add_c(a[1],b[2],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_2) + mfhi ($t_2,$a_1,$b_2) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3); + $MULTU ($a_2,$b_1) # mul_add_c(a[2],b[1],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_1) + mfhi ($t_2,$a_2,$b_1) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3); + $MULTU ($a_3,$b_0) # mul_add_c(a[3],b[0],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_0) + mfhi ($t_2,$a_3,$b_0) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_4,$b_0 # mul_add_c(a[4],b[0],c2,c3,c1); + $MULTU ($a_4,$b_0) # mul_add_c(a[4],b[0],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,3*$BNSZ($a0) # r[3]=c1; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_0) + mfhi ($t_2,$a_4,$b_0) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1); + $MULTU ($a_3,$b_1) # mul_add_c(a[3],b[1],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_1) + mfhi ($t_2,$a_3,$b_1) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1); + $MULTU ($a_2,$b_2) # mul_add_c(a[2],b[2],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_2) + mfhi ($t_2,$a_2,$b_2) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1); + $MULTU ($a_1,$b_3) # mul_add_c(a[1],b[3],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_3) + mfhi ($t_2,$a_1,$b_3) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_0,$b_4 # mul_add_c(a[0],b[4],c2,c3,c1); + $MULTU ($a_0,$b_4) # mul_add_c(a[0],b[4],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_4) + mfhi ($t_2,$a_0,$b_4) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_0,$b_5 # mul_add_c(a[0],b[5],c3,c1,c2); + $MULTU ($a_0,$b_5) # mul_add_c(a[0],b[5],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,4*$BNSZ($a0) # r[4]=c2; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_5) + mfhi ($t_2,$a_0,$b_5) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_1,$b_4 # mul_add_c(a[1],b[4],c3,c1,c2); + $MULTU ($a_1,$b_4) # mul_add_c(a[1],b[4],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_4) + mfhi ($t_2,$a_1,$b_4) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2); + $MULTU ($a_2,$b_3) # mul_add_c(a[2],b[3],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_3) + mfhi ($t_2,$a_2,$b_3) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2); + $MULTU ($a_3,$b_2) # mul_add_c(a[3],b[2],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_2) + mfhi ($t_2,$a_3,$b_2) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_4,$b_1 # mul_add_c(a[4],b[1],c3,c1,c2); + $MULTU ($a_4,$b_1) # mul_add_c(a[4],b[1],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_1) + mfhi ($t_2,$a_4,$b_1) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_5,$b_0 # mul_add_c(a[5],b[0],c3,c1,c2); + $MULTU ($a_5,$b_0) # mul_add_c(a[5],b[0],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_0) + mfhi ($t_2,$a_5,$b_0) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_6,$b_0 # mul_add_c(a[6],b[0],c1,c2,c3); + $MULTU ($a_6,$b_0) # mul_add_c(a[6],b[0],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,5*$BNSZ($a0) # r[5]=c3; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_0) + mfhi ($t_2,$a_6,$b_0) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_5,$b_1 # mul_add_c(a[5],b[1],c1,c2,c3); + $MULTU ($a_5,$b_1) # mul_add_c(a[5],b[1],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_1) + mfhi ($t_2,$a_5,$b_1) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_4,$b_2 # mul_add_c(a[4],b[2],c1,c2,c3); + $MULTU ($a_4,$b_2) # mul_add_c(a[4],b[2],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_2) + mfhi ($t_2,$a_4,$b_2) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3); + $MULTU ($a_3,$b_3) # mul_add_c(a[3],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_3) + mfhi ($t_2,$a_3,$b_3) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_2,$b_4 # mul_add_c(a[2],b[4],c1,c2,c3); + $MULTU ($a_2,$b_4) # mul_add_c(a[2],b[4],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_4) + mfhi ($t_2,$a_2,$b_4) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_1,$b_5 # mul_add_c(a[1],b[5],c1,c2,c3); + $MULTU ($a_1,$b_5) # mul_add_c(a[1],b[5],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_5) + mfhi ($t_2,$a_1,$b_5) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_0,$b_6 # mul_add_c(a[0],b[6],c1,c2,c3); + $MULTU ($a_0,$b_6) # mul_add_c(a[0],b[6],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_6) + mfhi ($t_2,$a_0,$b_6) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_0,$b_7 # mul_add_c(a[0],b[7],c2,c3,c1); + $MULTU ($a_0,$b_7) # mul_add_c(a[0],b[7],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,6*$BNSZ($a0) # r[6]=c1; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_7) + mfhi ($t_2,$a_0,$b_7) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_6 # mul_add_c(a[1],b[6],c2,c3,c1); + $MULTU ($a_1,$b_6) # mul_add_c(a[1],b[6],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_6) + mfhi ($t_2,$a_1,$b_6) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_5 # mul_add_c(a[2],b[5],c2,c3,c1); + $MULTU ($a_2,$b_5) # mul_add_c(a[2],b[5],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_5) + mfhi ($t_2,$a_2,$b_5) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_3,$b_4 # mul_add_c(a[3],b[4],c2,c3,c1); + $MULTU ($a_3,$b_4) # mul_add_c(a[3],b[4],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_4) + mfhi ($t_2,$a_3,$b_4) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_4,$b_3 # mul_add_c(a[4],b[3],c2,c3,c1); + $MULTU ($a_4,$b_3) # mul_add_c(a[4],b[3],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_3) + mfhi ($t_2,$a_4,$b_3) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_5,$b_2 # mul_add_c(a[5],b[2],c2,c3,c1); + $MULTU ($a_5,$b_2) # mul_add_c(a[5],b[2],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_2) + mfhi ($t_2,$a_5,$b_2) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_6,$b_1 # mul_add_c(a[6],b[1],c2,c3,c1); + $MULTU ($a_6,$b_1) # mul_add_c(a[6],b[1],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_1) + mfhi ($t_2,$a_6,$b_1) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_7,$b_0 # mul_add_c(a[7],b[0],c2,c3,c1); + $MULTU ($a_7,$b_0) # mul_add_c(a[7],b[0],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_0) + mfhi ($t_2,$a_7,$b_0) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_7,$b_1 # mul_add_c(a[7],b[1],c3,c1,c2); + $MULTU ($a_7,$b_1) # mul_add_c(a[7],b[1],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,7*$BNSZ($a0) # r[7]=c2; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_1) + mfhi ($t_2,$a_7,$b_1) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_6,$b_2 # mul_add_c(a[6],b[2],c3,c1,c2); + $MULTU ($a_6,$b_2) # mul_add_c(a[6],b[2],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_2) + mfhi ($t_2,$a_6,$b_2) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_5,$b_3 # mul_add_c(a[5],b[3],c3,c1,c2); + $MULTU ($a_5,$b_3) # mul_add_c(a[5],b[3],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_3) + mfhi ($t_2,$a_5,$b_3) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_4,$b_4 # mul_add_c(a[4],b[4],c3,c1,c2); + $MULTU ($a_4,$b_4) # mul_add_c(a[4],b[4],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_4) + mfhi ($t_2,$a_4,$b_4) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_3,$b_5 # mul_add_c(a[3],b[5],c3,c1,c2); + $MULTU ($a_3,$b_5) # mul_add_c(a[3],b[5],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_5) + mfhi ($t_2,$a_3,$b_5) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_2,$b_6 # mul_add_c(a[2],b[6],c3,c1,c2); + $MULTU ($a_2,$b_6) # mul_add_c(a[2],b[6],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_6) + mfhi ($t_2,$a_2,$b_6) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_1,$b_7 # mul_add_c(a[1],b[7],c3,c1,c2); + $MULTU ($a_1,$b_7) # mul_add_c(a[1],b[7],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_7) + mfhi ($t_2,$a_1,$b_7) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_2,$b_7 # mul_add_c(a[2],b[7],c1,c2,c3); + $MULTU ($a_2,$b_7) # mul_add_c(a[2],b[7],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,8*$BNSZ($a0) # r[8]=c3; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_7) + mfhi ($t_2,$a_2,$b_7) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_6 # mul_add_c(a[3],b[6],c1,c2,c3); + $MULTU ($a_3,$b_6) # mul_add_c(a[3],b[6],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_6) + mfhi ($t_2,$a_3,$b_6) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_4,$b_5 # mul_add_c(a[4],b[5],c1,c2,c3); + $MULTU ($a_4,$b_5) # mul_add_c(a[4],b[5],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_5) + mfhi ($t_2,$a_4,$b_5) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_5,$b_4 # mul_add_c(a[5],b[4],c1,c2,c3); + $MULTU ($a_5,$b_4) # mul_add_c(a[5],b[4],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_4) + mfhi ($t_2,$a_5,$b_4) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_6,$b_3 # mul_add_c(a[6],b[3],c1,c2,c3); + $MULTU ($a_6,$b_3) # mul_add_c(a[6],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_3) + mfhi ($t_2,$a_6,$b_3) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_7,$b_2 # mul_add_c(a[7],b[2],c1,c2,c3); + $MULTU ($a_7,$b_2) # mul_add_c(a[7],b[2],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_2) + mfhi ($t_2,$a_7,$b_2) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_7,$b_3 # mul_add_c(a[7],b[3],c2,c3,c1); + $MULTU ($a_7,$b_3) # mul_add_c(a[7],b[3],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,9*$BNSZ($a0) # r[9]=c1; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_3) + mfhi ($t_2,$a_7,$b_3) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_6,$b_4 # mul_add_c(a[6],b[4],c2,c3,c1); + $MULTU ($a_6,$b_4) # mul_add_c(a[6],b[4],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_4) + mfhi ($t_2,$a_6,$b_4) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_5,$b_5 # mul_add_c(a[5],b[5],c2,c3,c1); + $MULTU ($a_5,$b_5) # mul_add_c(a[5],b[5],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_5) + mfhi ($t_2,$a_5,$b_5) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_4,$b_6 # mul_add_c(a[4],b[6],c2,c3,c1); + $MULTU ($a_4,$b_6) # mul_add_c(a[4],b[6],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_6) + mfhi ($t_2,$a_4,$b_6) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_3,$b_7 # mul_add_c(a[3],b[7],c2,c3,c1); + $MULTU ($a_3,$b_7) # mul_add_c(a[3],b[7],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_7) + mfhi ($t_2,$a_3,$b_7) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_4,$b_7 # mul_add_c(a[4],b[7],c3,c1,c2); + $MULTU ($a_4,$b_7) # mul_add_c(a[4],b[7],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,10*$BNSZ($a0) # r[10]=c2; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_4,$b_7) + mfhi ($t_2,$a_4,$b_7) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_5,$b_6 # mul_add_c(a[5],b[6],c3,c1,c2); + $MULTU ($a_5,$b_6) # mul_add_c(a[5],b[6],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_6) + mfhi ($t_2,$a_5,$b_6) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_6,$b_5 # mul_add_c(a[6],b[5],c3,c1,c2); + $MULTU ($a_6,$b_5) # mul_add_c(a[6],b[5],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_5) + mfhi ($t_2,$a_6,$b_5) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_7,$b_4 # mul_add_c(a[7],b[4],c3,c1,c2); + $MULTU ($a_7,$b_4) # mul_add_c(a[7],b[4],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_4) + mfhi ($t_2,$a_7,$b_4) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_7,$b_5 # mul_add_c(a[7],b[5],c1,c2,c3); + $MULTU ($a_7,$b_5) # mul_add_c(a[7],b[5],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,11*$BNSZ($a0) # r[11]=c3; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_5) + mfhi ($t_2,$a_7,$b_5) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_6,$b_6 # mul_add_c(a[6],b[6],c1,c2,c3); + $MULTU ($a_6,$b_6) # mul_add_c(a[6],b[6],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_6) + mfhi ($t_2,$a_6,$b_6) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_5,$b_7 # mul_add_c(a[5],b[7],c1,c2,c3); + $MULTU ($a_5,$b_7) # mul_add_c(a[5],b[7],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_5,$b_7) + mfhi ($t_2,$a_5,$b_7) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_6,$b_7 # mul_add_c(a[6],b[7],c2,c3,c1); + $MULTU ($a_6,$b_7) # mul_add_c(a[6],b[7],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,12*$BNSZ($a0) # r[12]=c1; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_6,$b_7) + mfhi ($t_2,$a_6,$b_7) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_7,$b_6 # mul_add_c(a[7],b[6],c2,c3,c1); + $MULTU ($a_7,$b_6) # mul_add_c(a[7],b[6],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_6) + mfhi ($t_2,$a_7,$b_6) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_7,$b_7 # mul_add_c(a[7],b[7],c3,c1,c2); + $MULTU ($a_7,$b_7) # mul_add_c(a[7],b[7],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,13*$BNSZ($a0) # r[13]=c2; - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_7,$b_7) + mfhi ($t_2,$a_7,$b_7) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 $ADDU $t_2,$at @@ -1716,144 +1732,144 @@ $code.=<<___; $LD $b_0,0($a2) $LD $a_1,$BNSZ($a1) $LD $a_2,2*$BNSZ($a1) - $MULTU $a_0,$b_0 # mul_add_c(a[0],b[0],c1,c2,c3); + $MULTU ($a_0,$b_0) # mul_add_c(a[0],b[0],c1,c2,c3); $LD $a_3,3*$BNSZ($a1) $LD $b_1,$BNSZ($a2) $LD $b_2,2*$BNSZ($a2) $LD $b_3,3*$BNSZ($a2) - mflo $c_1 - mfhi $c_2 + mflo ($c_1,$a_0,$b_0) + mfhi ($c_2,$a_0,$b_0) $ST $c_1,0($a0) - $MULTU $a_0,$b_1 # mul_add_c(a[0],b[1],c2,c3,c1); - mflo $t_1 - mfhi $t_2 + $MULTU ($a_0,$b_1) # mul_add_c(a[0],b[1],c2,c3,c1); + mflo ($t_1,$a_0,$b_1) + mfhi ($t_2,$a_0,$b_1) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_0 # mul_add_c(a[1],b[0],c2,c3,c1); + $MULTU ($a_1,$b_0) # mul_add_c(a[1],b[0],c2,c3,c1); $ADDU $c_3,$t_2,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_0) + mfhi ($t_2,$a_1,$b_0) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_0 # mul_add_c(a[2],b[0],c3,c1,c2); + $MULTU ($a_2,$b_0) # mul_add_c(a[2],b[0],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 $ST $c_2,$BNSZ($a0) - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_0) + mfhi ($t_2,$a_2,$b_0) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_1,$b_1 # mul_add_c(a[1],b[1],c3,c1,c2); + $MULTU ($a_1,$b_1) # mul_add_c(a[1],b[1],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_1) + mfhi ($t_2,$a_1,$b_1) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_0,$b_2 # mul_add_c(a[0],b[2],c3,c1,c2); + $MULTU ($a_0,$b_2) # mul_add_c(a[0],b[2],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_2) + mfhi ($t_2,$a_0,$b_2) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_0,$b_3 # mul_add_c(a[0],b[3],c1,c2,c3); + $MULTU ($a_0,$b_3) # mul_add_c(a[0],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,2*$BNSZ($a0) - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_0,$b_3) + mfhi ($t_2,$a_0,$b_3) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_1,$b_2 # mul_add_c(a[1],b[2],c1,c2,c3); + $MULTU ($a_1,$b_2) # mul_add_c(a[1],b[2],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $c_3,$c_2,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_2) + mfhi ($t_2,$a_1,$b_2) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_2,$b_1 # mul_add_c(a[2],b[1],c1,c2,c3); + $MULTU ($a_2,$b_1) # mul_add_c(a[2],b[1],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_1) + mfhi ($t_2,$a_2,$b_1) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_0 # mul_add_c(a[3],b[0],c1,c2,c3); + $MULTU ($a_3,$b_0) # mul_add_c(a[3],b[0],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_0) + mfhi ($t_2,$a_3,$b_0) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_3,$b_1 # mul_add_c(a[3],b[1],c2,c3,c1); + $MULTU ($a_3,$b_1) # mul_add_c(a[3],b[1],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,3*$BNSZ($a0) - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_1) + mfhi ($t_2,$a_3,$b_1) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_2 # mul_add_c(a[2],b[2],c2,c3,c1); + $MULTU ($a_2,$b_2) # mul_add_c(a[2],b[2],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $c_1,$c_3,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_2) + mfhi ($t_2,$a_2,$b_2) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_1,$b_3 # mul_add_c(a[1],b[3],c2,c3,c1); + $MULTU ($a_1,$b_3) # mul_add_c(a[1],b[3],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_1,$b_3) + mfhi ($t_2,$a_1,$b_3) $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$b_3 # mul_add_c(a[2],b[3],c3,c1,c2); + $MULTU ($a_2,$b_3) # mul_add_c(a[2],b[3],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,4*$BNSZ($a0) - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_2,$b_3) + mfhi ($t_2,$a_2,$b_3) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_3,$b_2 # mul_add_c(a[3],b[2],c3,c1,c2); + $MULTU ($a_3,$b_2) # mul_add_c(a[3],b[2],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $c_2,$c_1,$t_2 - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_2) + mfhi ($t_2,$a_3,$b_2) $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_3,$b_3 # mul_add_c(a[3],b[3],c1,c2,c3); + $MULTU ($a_3,$b_3) # mul_add_c(a[3],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,5*$BNSZ($a0) - mflo $t_1 - mfhi $t_2 + mflo ($t_1,$a_3,$b_3) + mfhi ($t_2,$a_3,$b_3) $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 $ADDU $t_2,$at @@ -1888,11 +1904,9 @@ my ($hi,$lo,$c0,$c1,$c2, # commented as "forward multiplication" below]; )=@_; $code.=<<___; - mflo $lo - mfhi $hi $ADDU $c0,$lo sltu $at,$c0,$lo - $MULTU $an,$bn # forward multiplication + $MULTU ($an,$bn) # forward multiplication $ADDU $c0,$lo $ADDU $at,$hi sltu $lo,$c0,$lo @@ -1902,15 +1916,17 @@ ___ $code.=<<___ if (!$warm); sltu $c2,$c1,$at $ADDU $c1,$hi - sltu $hi,$c1,$hi - $ADDU $c2,$hi ___ $code.=<<___ if ($warm); sltu $at,$c1,$at $ADDU $c1,$hi $ADDU $c2,$at +___ +$code.=<<___; sltu $hi,$c1,$hi $ADDU $c2,$hi + mflo ($lo,$an,$bn) + mfhi ($hi,$an,$bn) ___ } @@ -1940,21 +1956,21 @@ $code.=<<___; $LD $a_2,2*$BNSZ($a1) $LD $a_3,3*$BNSZ($a1) - $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3); + $MULTU ($a_0,$a_0) # mul_add_c(a[0],b[0],c1,c2,c3); $LD $a_4,4*$BNSZ($a1) $LD $a_5,5*$BNSZ($a1) $LD $a_6,6*$BNSZ($a1) $LD $a_7,7*$BNSZ($a1) - mflo $c_1 - mfhi $c_2 + mflo ($c_1,$a_0,$a_0) + mfhi ($c_2,$a_0,$a_0) $ST $c_1,0($a0) - $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1); - mflo $t_1 - mfhi $t_2 + $MULTU ($a_0,$a_1) # mul_add_c2(a[0],b[1],c2,c3,c1); + mflo ($t_1,$a_0,$a_1) + mfhi ($t_2,$a_0,$a_1) slt $c_1,$t_2,$zero $SLL $t_2,1 - $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2); + $MULTU ($a_2,$a_0) # mul_add_c2(a[2],b[0],c3,c1,c2); slt $a2,$t_1,$zero $ADDU $t_2,$a2 $SLL $t_1,1 @@ -1962,20 +1978,22 @@ $code.=<<___; sltu $at,$c_2,$t_1 $ADDU $c_3,$t_2,$at $ST $c_2,$BNSZ($a0) + mflo ($t_1,$a_2,$a_0) + mfhi ($t_2,$a_2,$a_0) ___ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3); + $MULTU ($a_0,$a_3) # mul_add_c2(a[0],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,2*$BNSZ($a0) + mflo ($t_1,$a_0,$a_3) + mfhi ($t_2,$a_0,$a_3) ___ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, $a_1,$a_2); # mul_add_c2(a[1],b[2],c1,c2,c3); @@ -1989,16 +2007,16 @@ ___ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_0,$a_5 # mul_add_c2(a[0],b[5],c3,c1,c2); + $MULTU ($a_0,$a_5) # mul_add_c2(a[0],b[5],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,4*$BNSZ($a0) + mflo ($t_1,$a_0,$a_5) + mfhi ($t_2,$a_0,$a_5) ___ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, $a_1,$a_4); # mul_add_c2(a[1],b[4],c3,c1,c2); @@ -2016,16 +2034,16 @@ ___ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_0,$a_7 # mul_add_c2(a[0],b[7],c2,c3,c1); + $MULTU ($a_0,$a_7) # mul_add_c2(a[0],b[7],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,6*$BNSZ($a0) + mflo ($t_1,$a_0,$a_7) + mfhi ($t_2,$a_0,$a_7) ___ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, $a_1,$a_6); # mul_add_c2(a[1],b[6],c2,c3,c1); @@ -2045,16 +2063,16 @@ ___ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, $a_4,$a_4); # mul_add_c(a[4],b[4],c3,c1,c2); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_2,$a_7 # mul_add_c2(a[2],b[7],c1,c2,c3); + $MULTU ($a_2,$a_7) # mul_add_c2(a[2],b[7],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,8*$BNSZ($a0) + mflo ($t_1,$a_2,$a_7) + mfhi ($t_2,$a_2,$a_7) ___ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, $a_3,$a_6); # mul_add_c2(a[3],b[6],c1,c2,c3); @@ -2070,16 +2088,16 @@ ___ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, $a_5,$a_5); # mul_add_c(a[5],b[5],c2,c3,c1); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_4,$a_7 # mul_add_c2(a[4],b[7],c3,c1,c2); + $MULTU ($a_4,$a_7) # mul_add_c2(a[4],b[7],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,10*$BNSZ($a0) + mflo ($t_1,$a_4,$a_7) + mfhi ($t_2,$a_4,$a_7) ___ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, $a_5,$a_6); # mul_add_c2(a[5],b[6],c3,c1,c2); @@ -2091,24 +2109,22 @@ ___ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, $a_6,$a_6); # mul_add_c(a[6],b[6],c1,c2,c3); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 - $MULTU $a_6,$a_7 # mul_add_c2(a[6],b[7],c2,c3,c1); + $MULTU ($a_6,$a_7) # mul_add_c2(a[6],b[7],c2,c3,c1); $ADDU $t_2,$at $ADDU $c_2,$t_2 sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,12*$BNSZ($a0) + mflo ($t_1,$a_6,$a_7) + mfhi ($t_2,$a_6,$a_7) ___ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, $a_7,$a_7); # mul_add_c(a[7],b[7],c3,c1,c2); $code.=<<___; $ST $c_2,13*$BNSZ($a0) - mflo $t_1 - mfhi $t_2 $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 $ADDU $t_2,$at @@ -2152,19 +2168,19 @@ $code.=<<___; .set reorder $LD $a_0,0($a1) $LD $a_1,$BNSZ($a1) - $MULTU $a_0,$a_0 # mul_add_c(a[0],b[0],c1,c2,c3); + $MULTU ($a_0,$a_0) # mul_add_c(a[0],b[0],c1,c2,c3); $LD $a_2,2*$BNSZ($a1) $LD $a_3,3*$BNSZ($a1) - mflo $c_1 - mfhi $c_2 + mflo ($c_1,$a_0,$a_0) + mfhi ($c_2,$a_0,$a_0) $ST $c_1,0($a0) - $MULTU $a_0,$a_1 # mul_add_c2(a[0],b[1],c2,c3,c1); - mflo $t_1 - mfhi $t_2 + $MULTU ($a_0,$a_1) # mul_add_c2(a[0],b[1],c2,c3,c1); + mflo ($t_1,$a_0,$a_1) + mfhi ($t_2,$a_0,$a_1) slt $c_1,$t_2,$zero $SLL $t_2,1 - $MULTU $a_2,$a_0 # mul_add_c2(a[2],b[0],c3,c1,c2); + $MULTU ($a_2,$a_0) # mul_add_c2(a[2],b[0],c3,c1,c2); slt $a2,$t_1,$zero $ADDU $t_2,$a2 $SLL $t_1,1 @@ -2172,20 +2188,22 @@ $code.=<<___; sltu $at,$c_2,$t_1 $ADDU $c_3,$t_2,$at $ST $c_2,$BNSZ($a0) + mflo ($t_1,$a_2,$a_0) + mfhi ($t_2,$a_2,$a_0) ___ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_3,$t_1 sltu $at,$c_3,$t_1 - $MULTU $a_0,$a_3 # mul_add_c2(a[0],b[3],c1,c2,c3); + $MULTU ($a_0,$a_3) # mul_add_c2(a[0],b[3],c1,c2,c3); $ADDU $t_2,$at $ADDU $c_1,$t_2 sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,2*$BNSZ($a0) + mflo ($t_1,$a_0,$a_3) + mfhi ($t_2,$a_0,$a_3) ___ &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, $a_1,$a_2); # mul_add_c2(a2[1],b[2],c1,c2,c3); @@ -2197,24 +2215,22 @@ ___ &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); $code.=<<___; - mflo $t_1 - mfhi $t_2 $ADDU $c_2,$t_1 sltu $at,$c_2,$t_1 - $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); + $MULTU ($a_2,$a_3) # mul_add_c2(a[2],b[3],c3,c1,c2); $ADDU $t_2,$at $ADDU $c_3,$t_2 sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,4*$BNSZ($a0) + mflo ($t_1,$a_2,$a_3) + mfhi ($t_2,$a_2,$a_3) ___ &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); $code.=<<___; $ST $c_3,5*$BNSZ($a0) - mflo $t_1 - mfhi $t_2 $ADDU $c_1,$t_1 sltu $at,$c_1,$t_1 $ADDU $t_2,$at diff --git a/crypto/bn/build.info b/crypto/bn/build.info index c608ecce82..354410615b 100644 --- a/crypto/bn/build.info +++ b/crypto/bn/build.info @@ -34,8 +34,10 @@ INCLUDE[sparct4-mont.o]=.. GENERATE[sparcv9-gf2m.S]=asm/sparcv9-gf2m.pl $(PERLASM_SCHEME) INCLUDE[sparcv9-gf2m.o]=.. -GENERATE[bn-mips.s]=asm/mips.pl $(PERLASM_SCHEME) -GENERATE[mips-mont.s]=asm/mips-mont.pl $(PERLASM_SCHEME) +GENERATE[bn-mips.S]=asm/mips.pl $(PERLASM_SCHEME) +INCLUDE[bn-mips.o]=.. +GENERATE[mips-mont.S]=asm/mips-mont.pl $(PERLASM_SCHEME) +INCLUDE[mips-mont.o]=.. GENERATE[s390x-mont.S]=asm/s390x-mont.pl $(PERLASM_SCHEME) GENERATE[s390x-gf2m.s]=asm/s390x-gf2m.pl $(PERLASM_SCHEME) @@ -64,16 +66,8 @@ GENERATE[armv4-gf2m.S]=asm/armv4-gf2m.pl $(PERLASM_SCHEME) INCLUDE[armv4-gf2m.o]=.. GENERATE[armv8-mont.S]=asm/armv8-mont.pl $(PERLASM_SCHEME) -OVERRIDES=bn-mips3.o pa-risc2W.o pa-risc2.c +OVERRIDES=pa-risc2W.o pa-risc2.c BEGINRAW[Makefile] -##### BN assembler implementations - -{- $builddir -}/bn-mips3.o: {- $sourcedir -}/asm/mips3.s - @if [ "$(CC)" = "gcc" ]; then \ - ABI=`expr "$(CFLAGS)" : ".*-mabi=\([n3264]*\)"` && \ - as -$$ABI -O -o $@ {- $sourcedir -}/asm/mips3.s; \ - else $(CC) -c $(CFLAGS) $(LIB_CFLAGS) -o $@ {- $sourcedir -}/asm/mips3.s; fi - # GNU assembler fails to compile PA-RISC2 modules, insist on calling # vendor assembler... {- $builddir -}/pa-risc2W.o: {- $sourcedir -}/asm/pa-risc2W.s -- cgit v1.2.3