aboutsummaryrefslogtreecommitdiffstats
path: root/crypto
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2006-08-09 15:40:30 +0000
committerAndy Polyakov <appro@openssl.org>2006-08-09 15:40:30 +0000
commit8ea975d07060eac1e20cd9ae5bb74a0a4fb2b1bb (patch)
treeae985a643e04a0dccfe7c86d4ffadf74de08b08b /crypto
parent6c69aa532eee8a2cd1228b7810a926f2b752b864 (diff)
downloadopenssl-8ea975d07060eac1e20cd9ae5bb74a0a4fb2b1bb.tar.gz
+20% tune-up for Power5.
Diffstat (limited to 'crypto')
-rw-r--r--crypto/bn/asm/ppc-mont.pl29
1 files changed, 11 insertions, 18 deletions
diff --git a/crypto/bn/asm/ppc-mont.pl b/crypto/bn/asm/ppc-mont.pl
index c345c1b30e..280d31244b 100644
--- a/crypto/bn/asm/ppc-mont.pl
+++ b/crypto/bn/asm/ppc-mont.pl
@@ -162,17 +162,16 @@ $code=<<___;
.align 4
L1st:
$LDX $aj,$ap,$j ; ap[j]
- $LDX $nj,$np,$j ; np[j]
addc $lo0,$alo,$hi0
+ $LDX $nj,$np,$j ; np[j]
addze $hi0,$ahi
$UMULL $alo,$aj,$m0 ; ap[j]*bp[0]
- $UMULH $ahi,$aj,$m0
-
addc $lo1,$nlo,$hi1
+ $UMULH $ahi,$aj,$m0
addze $hi1,$nhi
$UMULL $nlo,$nj,$m1 ; np[j]*m1
- $UMULH $nhi,$nj,$m1
addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[0]
+ $UMULH $nhi,$nj,$m1
addze $hi1,$hi1
$ST $lo1,0($tp) ; tp[j-1]
@@ -206,20 +205,16 @@ Louter:
$LD $aj,$BNSZ($ap) ; ap[1]
$LD $nj,0($np) ; np[0]
addc $lo0,$lo0,$tj ; ap[0]*bp[i]+tp[0]
+ $UMULL $alo,$aj,$m0 ; ap[j]*bp[i]
addze $hi0,$hi0
-
$UMULL $m1,$lo0,$n0 ; tp[0]*n0
-
- $UMULL $alo,$aj,$m0 ; ap[j]*bp[i]
$UMULH $ahi,$aj,$m0
-
$UMULL $lo1,$nj,$m1 ; np[0]*m1
$UMULH $hi1,$nj,$m1
$LD $nj,$BNSZ($np) ; np[1]
addc $lo1,$lo1,$lo0
- addze $hi1,$hi1
-
$UMULL $nlo,$nj,$m1 ; np[1]*m1
+ addze $hi1,$hi1
$UMULH $nhi,$nj,$m1
mtctr $num
@@ -227,24 +222,22 @@ Louter:
.align 4
Linner:
$LDX $aj,$ap,$j ; ap[j]
- $LD $tj,$BNSZ($tp) ; tp[j]
addc $lo0,$alo,$hi0
+ $LD $tj,$BNSZ($tp) ; tp[j]
addze $hi0,$ahi
$LDX $nj,$np,$j ; np[j]
- addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j]
- addze $hi0,$hi0
- $UMULL $alo,$aj,$m0 ; ap[j]*bp[i]
- $UMULH $ahi,$aj,$m0
-
addc $lo1,$nlo,$hi1
+ $UMULL $alo,$aj,$m0 ; ap[j]*bp[i]
addze $hi1,$nhi
+ $UMULH $ahi,$aj,$m0
+ addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j]
$UMULL $nlo,$nj,$m1 ; np[j]*m1
+ addze $hi0,$hi0
$UMULH $nhi,$nj,$m1
addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[i]+tp[j]
+ addi $j,$j,$BNSZ ; j++
addze $hi1,$hi1
$ST $lo1,0($tp) ; tp[j-1]
-
- addi $j,$j,$BNSZ ; j++
addi $tp,$tp,$BNSZ ; tp++
bdnz- Linner
;Linner