aboutsummaryrefslogtreecommitdiffstats
path: root/crypto/bn
diff options
context:
space:
mode:
authorRalf S. Engelschall <rse@openssl.org>1998-12-21 11:00:56 +0000
committerRalf S. Engelschall <rse@openssl.org>1998-12-21 11:00:56 +0000
commit31b8d8684441e6cd5138832bb1b2ddb10acd6ba6 (patch)
treebd2486e342980b19b92cff888ae6206d906a089d /crypto/bn
parent7dfb0b774e6592dcbfe47015168a0ac8b44e2a17 (diff)
parentdfeab0689f69c0b4bd3480ffd37a9cacc2f17d9c (diff)
downloadopenssl-31b8d8684441e6cd5138832bb1b2ddb10acd6ba6.tar.gz
This commit was generated by cvs2svn to track changes on a CVS vendor
branch.
Diffstat (limited to 'crypto/bn')
-rw-r--r--crypto/bn/DSA2
-rw-r--r--crypto/bn/Makefile.ssl50
-rw-r--r--crypto/bn/alpha.s0
-rw-r--r--crypto/bn/asm/a.outbin0 -> 5795 bytes
-rw-r--r--crypto/bn/asm/alpha.s1846
-rw-r--r--crypto/bn/asm/alpha.s.works533
-rw-r--r--crypto/bn/asm/alpha.works/add.pl119
-rw-r--r--crypto/bn/asm/alpha.works/div.pl144
-rw-r--r--crypto/bn/asm/alpha.works/mul.pl116
-rw-r--r--crypto/bn/asm/alpha.works/mul_add.pl120
-rw-r--r--crypto/bn/asm/alpha.works/mul_c4.pl213
-rw-r--r--crypto/bn/asm/alpha.works/mul_c4.works.pl98
-rw-r--r--crypto/bn/asm/alpha.works/mul_c8.pl177
-rw-r--r--crypto/bn/asm/alpha.works/sqr.pl113
-rw-r--r--crypto/bn/asm/alpha.works/sqr_c4.pl109
-rw-r--r--crypto/bn/asm/alpha.works/sqr_c8.pl132
-rw-r--r--crypto/bn/asm/alpha.works/sub.pl108
-rw-r--r--crypto/bn/asm/alpha/add.pl118
-rw-r--r--crypto/bn/asm/alpha/div.pl144
-rw-r--r--crypto/bn/asm/alpha/mul.pl104
-rw-r--r--crypto/bn/asm/alpha/mul_add.pl123
-rw-r--r--crypto/bn/asm/alpha/mul_c4.pl215
-rw-r--r--crypto/bn/asm/alpha/mul_c4.works.pl98
-rw-r--r--crypto/bn/asm/alpha/mul_c8.pl177
-rw-r--r--crypto/bn/asm/alpha/sqr.pl113
-rw-r--r--crypto/bn/asm/alpha/sqr_c4.pl109
-rw-r--r--crypto/bn/asm/alpha/sqr_c8.pl132
-rw-r--r--crypto/bn/asm/alpha/sub.pl108
-rw-r--r--crypto/bn/asm/bn-586.pl82
-rw-r--r--crypto/bn/asm/bn-alpha.pl571
-rw-r--r--crypto/bn/asm/bn-win32.asm1441
-rw-r--r--crypto/bn/asm/bn86unix.cpp1465
-rw-r--r--crypto/bn/asm/ca.pl33
-rw-r--r--crypto/bn/asm/co-586.pl286
-rw-r--r--crypto/bn/asm/co-alpha.pl116
-rw-r--r--crypto/bn/asm/co86unix.cpp1315
-rw-r--r--crypto/bn/asm/elf.s1269
-rw-r--r--crypto/bn/asm/f500
-rw-r--r--crypto/bn/asm/f.c8
-rw-r--r--crypto/bn/asm/f.elf2149
-rw-r--r--crypto/bn/asm/f.s1773
-rw-r--r--crypto/bn/asm/ff724
-rw-r--r--crypto/bn/asm/mips1.s539
-rw-r--r--crypto/bn/asm/mips3.s544
-rw-r--r--crypto/bn/asm/x86.pl28
-rw-r--r--crypto/bn/asm/x86/add.pl76
-rw-r--r--crypto/bn/asm/x86/comba.pl277
-rw-r--r--crypto/bn/asm/x86/div.pl15
-rw-r--r--crypto/bn/asm/x86/f3
-rw-r--r--crypto/bn/asm/x86/mul.pl77
-rw-r--r--crypto/bn/asm/x86/mul_add.pl87
-rw-r--r--crypto/bn/asm/x86/sqr.pl60
-rw-r--r--crypto/bn/asm/x86/sub.pl76
-rw-r--r--crypto/bn/asm/x86w16.asm6
-rw-r--r--crypto/bn/asm/x86w32.asm34
-rw-r--r--crypto/bn/bn.err17
-rw-r--r--crypto/bn/bn.h193
-rw-r--r--crypto/bn/bn.mul19
-rw-r--r--crypto/bn/bn.org193
-rw-r--r--crypto/bn/bn_add.c192
-rw-r--r--crypto/bn/bn_asm.c829
-rw-r--r--crypto/bn/bn_blind.c12
-rw-r--r--crypto/bn/bn_comba.c349
-rw-r--r--crypto/bn/bn_div.c68
-rw-r--r--crypto/bn/bn_err.c7
-rw-r--r--crypto/bn/bn_exp.c146
-rw-r--r--crypto/bn/bn_exp2.c202
-rw-r--r--crypto/bn/bn_gcd.c39
-rw-r--r--crypto/bn/bn_lcl.h77
-rw-r--r--crypto/bn/bn_lib.c330
-rw-r--r--crypto/bn/bn_mont.c411
-rw-r--r--crypto/bn/bn_mpi.c2
-rw-r--r--crypto/bn/bn_mul.c759
-rw-r--r--crypto/bn/bn_opts.c342
-rw-r--r--crypto/bn/bn_prime.c62
-rw-r--r--crypto/bn/bn_recp.c176
-rw-r--r--crypto/bn/bn_sqr.c189
-rw-r--r--crypto/bn/bn_word.c20
-rw-r--r--crypto/bn/bnspeed.c16
-rw-r--r--crypto/bn/bntest.c307
-rw-r--r--crypto/bn/comba.pl285
-rw-r--r--crypto/bn/d.c72
-rw-r--r--crypto/bn/exp.c60
-rw-r--r--crypto/bn/expspeed.c3
-rw-r--r--crypto/bn/exptest.c15
-rw-r--r--crypto/bn/m.pl32
-rw-r--r--crypto/bn/new23
-rw-r--r--crypto/bn/old/b_sqr.c205
-rw-r--r--crypto/bn/old/bn_com.c90
-rw-r--r--crypto/bn/old/bn_high.c137
-rw-r--r--crypto/bn/old/bn_ka.c578
-rw-r--r--crypto/bn/old/bn_low.c201
-rw-r--r--crypto/bn/old/bn_m.c142
-rw-r--r--crypto/bn/old/bn_mul.c.works219
-rw-r--r--crypto/bn/old/bn_wmul.c181
-rwxr-xr-xcrypto/bn/old/build3
-rw-r--r--crypto/bn/old/info22
-rw-r--r--crypto/bn/old/test.works205
-rw-r--r--crypto/bn/test.c252
-rw-r--r--crypto/bn/todo3
100 files changed, 25588 insertions, 972 deletions
diff --git a/crypto/bn/DSA b/crypto/bn/DSA
new file mode 100644
index 0000000000..83f257c84f
--- /dev/null
+++ b/crypto/bn/DSA
@@ -0,0 +1,2 @@
+DSA wants 64*32 to use word mont mul, but
+RSA wants to use full.
diff --git a/crypto/bn/Makefile.ssl b/crypto/bn/Makefile.ssl
index 9809d26cbc..0a365fca6a 100644
--- a/crypto/bn/Makefile.ssl
+++ b/crypto/bn/Makefile.ssl
@@ -13,9 +13,9 @@ MAKEDEPEND= makedepend -f Makefile.ssl
MAKEFILE= Makefile.ssl
AR= ar r
-BN_MULW= bn_mulw.o
+BN_ASM= bn_asm.o
# or use
-#BN_MULW= bn86-elf.o
+#BN_ASM= bn86-elf.o
CFLAGS= $(INCLUDES) $(CFLAG)
@@ -26,16 +26,15 @@ TEST=bntest.c exptest.c
APPS=
LIB=$(TOP)/libcrypto.a
-LIBSRC= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_mod.c bn_mul.c \
- bn_print.c bn_rand.c bn_shift.c bn_sub.c bn_word.c bn_blind.c \
- bn_gcd.c bn_prime.c $(ERRC).c bn_sqr.c bn_mulw.c bn_recp.c bn_mont.c \
- bn_mpi.c
-
-LIBOBJ= bn_add.o bn_div.o bn_exp.o bn_lib.o bn_mod.o bn_mul.o \
- bn_print.o bn_rand.o bn_shift.o bn_sub.o bn_word.o bn_blind.o \
- bn_gcd.o bn_prime.o $(ERRC).o bn_sqr.o $(BN_MULW) bn_recp.o bn_mont.o \
- bn_mpi.o
+LIBSRC= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_mul.c \
+ bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \
+ bn_gcd.c bn_prime.c $(ERRC).c bn_sqr.c bn_asm.c bn_recp.c bn_mont.c \
+ bn_mpi.c bn_exp2.c
+LIBOBJ= bn_add.o bn_div.o bn_exp.o bn_lib.o bn_mul.o \
+ bn_print.o bn_rand.o bn_shift.o bn_word.o bn_blind.o \
+ bn_gcd.o bn_prime.o $(ERRC).o bn_sqr.o $(BN_ASM) bn_recp.o bn_mont.o \
+ bn_mpi.o bn_exp2.o
SRC= $(LIBSRC)
@@ -65,23 +64,48 @@ lib: $(LIBOBJ)
asm/bn86-elf.o: asm/bn86unix.cpp
$(CPP) -DELF asm/bn86unix.cpp | as -o asm/bn86-elf.o
+asm/co86-elf.o: asm/co86unix.cpp
+ $(CPP) -DELF asm/co86unix.cpp | as -o asm/co86-elf.o
+
# solaris
asm/bn86-sol.o: asm/bn86unix.cpp
$(CC) -E -DSOL asm/bn86unix.cpp | sed 's/^#.*//' > asm/bn86-sol.s
as -o asm/bn86-sol.o asm/bn86-sol.s
rm -f asm/bn86-sol.s
+asm/co86-sol.o: asm/co86unix.cpp
+ $(CC) -E -DSOL asm/co86unix.cpp | sed 's/^#.*//' > asm/co86-sol.s
+ as -o asm/co86-sol.o asm/co86-sol.s
+ rm -f asm/co86-sol.s
+
# a.out
asm/bn86-out.o: asm/bn86unix.cpp
$(CPP) -DOUT asm/bn86unix.cpp | as -o asm/bn86-out.o
+asm/co86-out.o: asm/co86unix.cpp
+ $(CPP) -DOUT asm/co86unix.cpp | as -o asm/co86-out.o
+
# bsdi
asm/bn86bsdi.o: asm/bn86unix.cpp
- $(CPP) -DBSDI asm/bn86unix.cpp | as -o asm/bn86bsdi.o
+ $(CPP) -DBSDI asm/bn86unix.cpp | sed 's/ :/:/' | as -o asm/bn86bsdi.o
+
+asm/co86bsdi.o: asm/co86unix.cpp
+ $(CPP) -DBSDI asm/co86unix.cpp | sed 's/ :/:/' | as -o asm/co86bsdi.o
asm/bn86unix.cpp:
(cd asm; perl bn-586.pl cpp >bn86unix.cpp )
+asm/co86unix.cpp:
+ (cd asm; perl co-586.pl cpp >co86unix.cpp )
+
+# MIPS 64 bit assember
+asm/mips3.o: asm/mips3.s
+ /usr/bin/as -mips3 -O2 -o asm/mips3.o asm/mips3.s
+
+# MIPS 32 bit assember
+asm/mips1.o: asm/mips1.s
+ /usr/bin/as -O2 -o asm/mips1.o asm/mips1.s
+
files:
perl $(TOP)/util/files.pl Makefile.ssl >> $(TOP)/MINFO
@@ -123,7 +147,7 @@ dclean:
mv -f Makefile.new $(MAKEFILE)
clean:
- /bin/rm -f *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff bn_mulw.s
+ /bin/rm -f *.o */*.o *.obj lib tags core .pure .nfs* *.old *.bak fluff bn_asm.s
errors:
perl $(TOP)/util/err-ins.pl $(ERR).err $(ERR).org # special case .org
diff --git a/crypto/bn/alpha.s b/crypto/bn/alpha.s
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/crypto/bn/alpha.s
diff --git a/crypto/bn/asm/a.out b/crypto/bn/asm/a.out
new file mode 100644
index 0000000000..cc5094ff45
--- /dev/null
+++ b/crypto/bn/asm/a.out
Binary files differ
diff --git a/crypto/bn/asm/alpha.s b/crypto/bn/asm/alpha.s
index 1d17b1d619..cf0b69cff9 100644
--- a/crypto/bn/asm/alpha.s
+++ b/crypto/bn/asm/alpha.s
@@ -2,7 +2,13 @@
# The bn_div64 is actually gcc output but the other parts are hand done.
# Thanks to tzeruch@ceddec.com for sending me the gcc output for
# bn_div64.
- .file 1 "bn_mulw.c"
+ # I've gone back and re-done most of routines.
+ # The key thing to remeber for the 164 CPU is that while a
+ # multiply operation takes 8 cycles, another one can only be issued
+ # after 4 cycles have elapsed. I've done modification to help
+ # improve this. Also, normally, a ld instruction will not be available
+ # for about 3 cycles.
+ .file 1 "bn_asm.c"
.set noat
gcc2_compiled.:
__gnu_compiled_c:
@@ -14,65 +20,91 @@ bn_mul_add_words:
bn_mul_add_words..ng:
.frame $30,0,$26,0
.prologue 0
- subq $18,2,$25 # num=-2
- bis $31,$31,$0
- blt $25,$42
.align 5
-$142:
- subq $18,2,$18 # num-=2
- subq $25,2,$25 # num-=2
-
- ldq $1,0($17) # a[0]
- ldq $2,8($17) # a[1]
-
- mulq $19,$1,$3 # a[0]*w low part r3
- umulh $19,$1,$1 # a[0]*w high part r1
- mulq $19,$2,$4 # a[1]*w low part r4
- umulh $19,$2,$2 # a[1]*w high part r2
-
- ldq $22,0($16) # r[0] r22
- ldq $23,8($16) # r[1] r23
-
- addq $3,$22,$3 # a0 low part + r[0]
- addq $4,$23,$4 # a1 low part + r[1]
- cmpult $3,$22,$5 # overflow?
- cmpult $4,$23,$6 # overflow?
- addq $5,$1,$1 # high part + overflow
- addq $6,$2,$2 # high part + overflow
-
- addq $3,$0,$3 # add c
- cmpult $3,$0,$5 # overflow?
- stq $3,0($16)
- addq $5,$1,$0 # c=high part + overflow
-
- addq $4,$0,$4 # add c
- cmpult $4,$0,$5 # overflow?
- stq $4,8($16)
- addq $5,$2,$0 # c=high part + overflow
+ subq $18,4,$18
+ bis $31,$31,$0
+ blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
+ ldq $20,0($17) # 1 1
+ ldq $1,0($16) # 1 1
+ .align 3
+$42:
+ mulq $20,$19,$5 # 1 2 1 ######
+ ldq $21,8($17) # 2 1
+ ldq $2,8($16) # 2 1
+ umulh $20,$19,$20 # 1 2 ######
+ ldq $27,16($17) # 3 1
+ ldq $3,16($16) # 3 1
+ mulq $21,$19,$6 # 2 2 1 ######
+ ldq $28,24($17) # 4 1
+ addq $1,$5,$1 # 1 2 2
+ ldq $4,24($16) # 4 1
+ umulh $21,$19,$21 # 2 2 ######
+ cmpult $1,$5,$22 # 1 2 3 1
+ addq $20,$22,$20 # 1 3 1
+ addq $1,$0,$1 # 1 2 3 1
+ mulq $27,$19,$7 # 3 2 1 ######
+ cmpult $1,$0,$0 # 1 2 3 2
+ addq $2,$6,$2 # 2 2 2
+ addq $20,$0,$0 # 1 3 2
+ cmpult $2,$6,$23 # 2 2 3 1
+ addq $21,$23,$21 # 2 3 1
+ umulh $27,$19,$27 # 3 2 ######
+ addq $2,$0,$2 # 2 2 3 1
+ cmpult $2,$0,$0 # 2 2 3 2
+ subq $18,4,$18
+ mulq $28,$19,$8 # 4 2 1 ######
+ addq $21,$0,$0 # 2 3 2
+ addq $3,$7,$3 # 3 2 2
+ addq $16,32,$16
+ cmpult $3,$7,$24 # 3 2 3 1
+ stq $1,-32($16) # 1 2 4
+ umulh $28,$19,$28 # 4 2 ######
+ addq $27,$24,$27 # 3 3 1
+ addq $3,$0,$3 # 3 2 3 1
+ stq $2,-24($16) # 2 2 4
+ cmpult $3,$0,$0 # 3 2 3 2
+ stq $3,-16($16) # 3 2 4
+ addq $4,$8,$4 # 4 2 2
+ addq $27,$0,$0 # 3 3 2
+ cmpult $4,$8,$25 # 4 2 3 1
+ addq $17,32,$17
+ addq $28,$25,$28 # 4 3 1
+ addq $4,$0,$4 # 4 2 3 1
+ cmpult $4,$0,$0 # 4 2 3 2
+ stq $4,-8($16) # 4 2 4
+ addq $28,$0,$0 # 4 3 2
+ blt $18,$43
- ble $18,$43
+ ldq $20,0($17) # 1 1
+ ldq $1,0($16) # 1 1
- addq $16,16,$16
- addq $17,16,$17
- blt $25,$42
+ br $42
- br $31,$142
-$42:
- ldq $1,0($17) # a[0]
- umulh $19,$1,$3 # a[0]*w high part
- mulq $19,$1,$1 # a[0]*w low part
- ldq $2,0($16) # r[0]
- addq $1,$2,$1 # low part + r[0]
- cmpult $1,$2,$4 # overflow?
- addq $4,$3,$3 # high part + overflow
- addq $1,$0,$1 # add c
- cmpult $1,$0,$4 # overflow?
- addq $4,$3,$0 # c=high part + overflow
- stq $1,0($16)
+ .align 4
+$45:
+ ldq $20,0($17) # 4 1
+ ldq $1,0($16) # 4 1
+ mulq $20,$19,$5 # 4 2 1
+ subq $18,1,$18
+ addq $16,8,$16
+ addq $17,8,$17
+ umulh $20,$19,$20 # 4 2
+ addq $1,$5,$1 # 4 2 2
+ cmpult $1,$5,$22 # 4 2 3 1
+ addq $20,$22,$20 # 4 3 1
+ addq $1,$0,$1 # 4 2 3 1
+ cmpult $1,$0,$0 # 4 2 3 2
+ addq $20,$0,$0 # 4 3 2
+ stq $1,-8($16) # 4 2 4
+ bgt $18,$45
+ ret $31,($26),1 # else exit
.align 4
$43:
- ret $31,($26),1
+ addq $18,4,$18
+ bgt $18,$45 # goto tail code
+ ret $31,($26),1 # else exit
+
.end bn_mul_add_words
.align 3
.globl bn_mul_words
@@ -81,49 +113,75 @@ bn_mul_words:
bn_mul_words..ng:
.frame $30,0,$26,0
.prologue 0
- subq $18,2,$25 # num=-2
- bis $31,$31,$0
- blt $25,$242
.align 5
-$342:
- subq $18,2,$18 # num-=2
- subq $25,2,$25 # num-=2
-
- ldq $1,0($17) # a[0]
- ldq $2,8($17) # a[1]
-
- mulq $19,$1,$3 # a[0]*w low part r3
- umulh $19,$1,$1 # a[0]*w high part r1
- mulq $19,$2,$4 # a[1]*w low part r4
- umulh $19,$2,$2 # a[1]*w high part r2
-
- addq $3,$0,$3 # add c
- cmpult $3,$0,$5 # overflow?
- stq $3,0($16)
- addq $5,$1,$0 # c=high part + overflow
-
- addq $4,$0,$4 # add c
- cmpult $4,$0,$5 # overflow?
- stq $4,8($16)
- addq $5,$2,$0 # c=high part + overflow
-
- ble $18,$243
-
- addq $16,16,$16
- addq $17,16,$17
- blt $25,$242
-
- br $31,$342
-$242:
- ldq $1,0($17) # a[0]
- umulh $19,$1,$3 # a[0]*w high part
- mulq $19,$1,$1 # a[0]*w low part
- addq $1,$0,$1 # add c
- cmpult $1,$0,$4 # overflow?
- addq $4,$3,$0 # c=high part + overflow
- stq $1,0($16)
-$243:
- ret $31,($26),1
+ subq $18,4,$18
+ bis $31,$31,$0
+ blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
+ ldq $20,0($17) # 1 1
+ .align 3
+$142:
+
+ mulq $20,$19,$5 # 1 2 1 #####
+ ldq $21,8($17) # 2 1
+ ldq $27,16($17) # 3 1
+ umulh $20,$19,$20 # 1 2 #####
+ ldq $28,24($17) # 4 1
+ mulq $21,$19,$6 # 2 2 1 #####
+ addq $5,$0,$5 # 1 2 3 1
+ subq $18,4,$18
+ cmpult $5,$0,$0 # 1 2 3 2
+ umulh $21,$19,$21 # 2 2 #####
+ addq $20,$0,$0 # 1 3 2
+ addq $17,32,$17
+ addq $6,$0,$6 # 2 2 3 1
+ mulq $27,$19,$7 # 3 2 1 #####
+ cmpult $6,$0,$0 # 2 2 3 2
+ addq $21,$0,$0 # 2 3 2
+ addq $16,32,$16
+ umulh $27,$19,$27 # 3 2 #####
+ stq $5,-32($16) # 1 2 4
+ mulq $28,$19,$8 # 4 2 1 #####
+ addq $7,$0,$7 # 3 2 3 1
+ stq $6,-24($16) # 2 2 4
+ cmpult $7,$0,$0 # 3 2 3 2
+ umulh $28,$19,$28 # 4 2 #####
+ addq $27,$0,$0 # 3 3 2
+ stq $7,-16($16) # 3 2 4
+ addq $8,$0,$8 # 4 2 3 1
+ cmpult $8,$0,$0 # 4 2 3 2
+
+ addq $28,$0,$0 # 4 3 2
+
+ stq $8,-8($16) # 4 2 4
+
+ blt $18,$143
+
+ ldq $20,0($17) # 1 1
+
+ br $142
+
+ .align 4
+$145:
+ ldq $20,0($17) # 4 1
+ mulq $20,$19,$5 # 4 2 1
+ subq $18,1,$18
+ umulh $20,$19,$20 # 4 2
+ addq $5,$0,$5 # 4 2 3 1
+ addq $16,8,$16
+ cmpult $5,$0,$0 # 4 2 3 2
+ addq $17,8,$17
+ addq $20,$0,$0 # 4 3 2
+ stq $5,-8($16) # 4 2 4
+
+ bgt $18,$145
+ ret $31,($26),1 # else exit
+
+ .align 4
+$143:
+ addq $18,4,$18
+ bgt $18,$145 # goto tail code
+ ret $31,($26),1 # else exit
+
.end bn_mul_words
.align 3
.globl bn_sqr_words
@@ -132,44 +190,58 @@ bn_sqr_words:
bn_sqr_words..ng:
.frame $30,0,$26,0
.prologue 0
-
- subq $18,2,$25 # num=-2
- blt $25,$442
- .align 5
-$542:
- subq $18,2,$18 # num-=2
- subq $25,2,$25 # num-=2
-
- ldq $1,0($17) # a[0]
- ldq $4,8($17) # a[1]
- mulq $1,$1,$2 # a[0]*w low part r2
- umulh $1,$1,$3 # a[0]*w high part r3
- mulq $4,$4,$5 # a[1]*w low part r5
- umulh $4,$4,$6 # a[1]*w high part r6
-
- stq $2,0($16) # r[0]
- stq $3,8($16) # r[1]
- stq $5,16($16) # r[3]
- stq $6,24($16) # r[4]
+ subq $18,4,$18
+ blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
+ ldq $20,0($17) # 1 1
+ .align 3
+$542:
+ mulq $20,$20,$5 ######
+ ldq $21,8($17) # 1 1
+ subq $18,4
+ umulh $20,$20,$1 ######
+ ldq $27,16($17) # 1 1
+ mulq $21,$21,$6 ######
+ ldq $28,24($17) # 1 1
+ stq $5,0($16) # r[0]
+ umulh $21,$21,$2 ######
+ stq $1,8($16) # r[1]
+ mulq $27,$27,$7 ######
+ stq $6,16($16) # r[0]
+ umulh $27,$27,$3 ######
+ stq $2,24($16) # r[1]
+ mulq $28,$28,$8 ######
+ stq $7,32($16) # r[0]
+ umulh $28,$28,$4 ######
+ stq $3,40($16) # r[1]
- ble $18,$443
+ addq $16,64,$16
+ addq $17,32,$17
+ stq $8,-16($16) # r[0]
+ stq $4,-8($16) # r[1]
- addq $16,32,$16
- addq $17,16,$17
- blt $25,$442
- br $31,$542
+ blt $18,$543
+ ldq $20,0($17) # 1 1
+ br $542
$442:
- ldq $1,0($17) # a[0]
- mulq $1,$1,$2 # a[0]*w low part r2
- umulh $1,$1,$3 # a[0]*w high part r3
- stq $2,0($16) # r[0]
- stq $3,8($16) # r[1]
+ ldq $20,0($17) # a[0]
+ mulq $20,$20,$5 # a[0]*w low part r2
+ addq $16,16,$16
+ addq $17,8,$17
+ subq $18,1,$18
+ umulh $20,$20,$1 # a[0]*w high part r3
+ stq $5,-16($16) # r[0]
+ stq $1,-8($16) # r[1]
+
+ bgt $18,$442
+ ret $31,($26),1 # else exit
.align 4
-$443:
- ret $31,($26),1
+$543:
+ addq $18,4,$18
+ bgt $18,$442 # goto tail code
+ ret $31,($26),1 # else exit
.end bn_sqr_words
.align 3
@@ -180,31 +252,74 @@ bn_add_words..ng:
.frame $30,0,$26,0
.prologue 0
- bis $31,$31,$8 # carry = 0
- ble $19,$900
+ subq $19,4,$19
+ bis $31,$31,$0 # carry = 0
+ blt $19,$900
+ ldq $5,0($17) # a[0]
+ ldq $1,0($18) # b[1]
+ .align 3
$901:
- ldq $0,0($17) # a[0]
- ldq $1,0($18) # a[1]
+ addq $1,$5,$1 # r=a+b;
+ ldq $6,8($17) # a[1]
+ cmpult $1,$5,$22 # did we overflow?
+ ldq $2,8($18) # b[1]
+ addq $1,$0,$1 # c+= overflow
+ ldq $7,16($17) # a[2]
+ cmpult $1,$0,$0 # overflow?
+ ldq $3,16($18) # b[2]
+ addq $0,$22,$0
+ ldq $8,24($17) # a[3]
+ addq $2,$6,$2 # r=a+b;
+ ldq $4,24($18) # b[3]
+ cmpult $2,$6,$23 # did we overflow?
+ addq $3,$7,$3 # r=a+b;
+ addq $2,$0,$2 # c+= overflow
+ cmpult $3,$7,$24 # did we overflow?
+ cmpult $2,$0,$0 # overflow?
+ addq $4,$8,$4 # r=a+b;
+ addq $0,$23,$0
+ cmpult $4,$8,$25 # did we overflow?
+ addq $3,$0,$3 # c+= overflow
+ stq $1,0($16) # r[0]=c
+ cmpult $3,$0,$0 # overflow?
+ stq $2,8($16) # r[1]=c
+ addq $0,$24,$0
+ stq $3,16($16) # r[2]=c
+ addq $4,$0,$4 # c+= overflow
+ subq $19,4,$19 # loop--
+ cmpult $4,$0,$0 # overflow?
+ addq $17,32,$17 # a++
+ addq $0,$25,$0
+ stq $4,24($16) # r[3]=c
+ addq $18,32,$18 # b++
+ addq $16,32,$16 # r++
- addq $0,$1,$3 # c=a+b;
+ blt $19,$900
+ ldq $5,0($17) # a[0]
+ ldq $1,0($18) # b[1]
+ br $901
+ .align 4
+$945:
+ ldq $5,0($17) # a[0]
+ ldq $1,0($18) # b[1]
+ addq $1,$5,$1 # r=a+b;
+ subq $19,1,$19 # loop--
+ addq $1,$0,$1 # c+= overflow
addq $17,8,$17 # a++
+ cmpult $1,$5,$22 # did we overflow?
+ cmpult $1,$0,$0 # overflow?
+ addq $18,8,$18 # b++
+ stq $1,0($16) # r[0]=c
+ addq $0,$22,$0
+ addq $16,8,$16 # r++
- cmpult $3,$1,$7 # did we overflow?
- addq $18,8,$18 # b++
-
- addq $8,$3,$3 # c+=carry
+ bgt $19,$945
+ ret $31,($26),1 # else exit
- cmpult $3,$8,$8 # did we overflow?
- stq $3,($16) # r[0]=c
-
- addq $7,$8,$8 # add into overflow
- subq $19,1,$19 # loop--
-
- addq $16,8,$16 # r++
- bgt $19,$901
$900:
- bis $8,$8,$0 # return carry
- ret $31,($26),1
+ addq $19,4,$19
+ bgt $19,$945 # goto tail code
+ ret $31,($26),1 # else exit
.end bn_add_words
#
@@ -339,6 +454,1445 @@ $136:
addq $30,48,$30
ret $31,($26),1
.end bn_div64
- .ident "GCC: (GNU) 2.7.2.1"
+ .set noat
+ .text
+ .align 3
+ .globl bn_sub_words
+ .ent bn_sub_words
+bn_sub_words:
+bn_sub_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $19, 4, $19
+ bis $31, $31, $0
+ blt $19, $100
+ ldq $1, 0($17)
+ ldq $2, 0($18)
+$101:
+ ldq $3, 8($17)
+ cmpult $1, $2, $4
+ ldq $5, 8($18)
+ subq $1, $2, $1
+ ldq $6, 16($17)
+ cmpult $1, $0, $2
+ ldq $7, 16($18)
+ subq $1, $0, $23
+ ldq $8, 24($17)
+ addq $2, $4, $0
+ cmpult $3, $5, $24
+ subq $3, $5, $3
+ ldq $22, 24($18)
+ cmpult $3, $0, $5
+ subq $3, $0, $25
+ addq $5, $24, $0
+ cmpult $6, $7, $27
+ subq $6, $7, $6
+ stq $23, 0($16)
+ cmpult $6, $0, $7
+ subq $6, $0, $28
+ addq $7, $27, $0
+ cmpult $8, $22, $21
+ subq $8, $22, $8
+ stq $25, 8($16)
+ cmpult $8, $0, $22
+ subq $8, $0, $20
+ addq $22, $21, $0
+ stq $28, 16($16)
+ subq $19, 4, $19
+ stq $20, 24($16)
+ addq $17, 32, $17
+ addq $18, 32, $18
+ addq $16, 32, $16
+ blt $19, $100
+ ldq $1, 0($17)
+ ldq $2, 0($18)
+ br $101
+$102:
+ ldq $1, 0($17)
+ ldq $2, 0($18)
+ cmpult $1, $2, $27
+ subq $1, $2, $1
+ cmpult $1, $0, $2
+ subq $1, $0, $1
+ stq $1, 0($16)
+ addq $2, $27, $0
+ addq $17, 8, $17
+ addq $18, 8, $18
+ addq $16, 8, $16
+ subq $19, 1, $19
+ bgt $19, $102
+ ret $31,($26),1
+$100:
+ addq $19, 4, $19
+ bgt $19, $102
+$103:
+ ret $31,($26),1
+ .end bn_sub_words
+ .text
+ .align 3
+ .globl bn_mul_comba4
+ .ent bn_mul_comba4
+bn_mul_comba4:
+bn_mul_comba4..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ ldq $0, 0($17)
+ ldq $1, 0($18)
+ ldq $2, 8($17)
+ ldq $3, 8($18)
+ ldq $4, 16($17)
+ ldq $5, 16($18)
+ ldq $6, 24($17)
+ ldq $7, 24($18)
+ bis $31, $31, $23
+ mulq $0, $1, $8
+ umulh $0, $1, $22
+ stq $8, 0($16)
+ bis $31, $31, $8
+ mulq $0, $3, $24
+ umulh $0, $3, $25
+ addq $22, $24, $22
+ cmpult $22, $24, $27
+ addq $27, $25, $25
+ addq $23, $25, $23
+ cmpult $23, $25, $28
+ addq $8, $28, $8
+ mulq $2, $1, $21
+ umulh $2, $1, $20
+ addq $22, $21, $22
+ cmpult $22, $21, $19
+ addq $19, $20, $20
+ addq $23, $20, $23
+ cmpult $23, $20, $17
+ addq $8, $17, $8
+ stq $22, 8($16)
+ bis $31, $31, $22
+ mulq $2, $3, $18
+ umulh $2, $3, $24
+ addq $23, $18, $23
+ cmpult $23, $18, $27
+ addq $27, $24, $24
+ addq $8, $24, $8
+ cmpult $8, $24, $25
+ addq $22, $25, $22
+ mulq $0, $5, $28
+ umulh $0, $5, $21
+ addq $23, $28, $23
+ cmpult $23, $28, $19
+ addq $19, $21, $21
+ addq $8, $21, $8
+ cmpult $8, $21, $20
+ addq $22, $20, $22
+ mulq $4, $1, $17
+ umulh $4, $1, $18
+ addq $23, $17, $23
+ cmpult $23, $17, $27
+ addq $27, $18, $18
+ addq $8, $18, $8
+ cmpult $8, $18, $24
+ addq $22, $24, $22
+ stq $23, 16($16)
+ bis $31, $31, $23
+ mulq $0, $7, $25
+ umulh $0, $7, $28
+ addq $8, $25, $8
+ cmpult $8, $25, $19
+ addq $19, $28, $28
+ addq $22, $28, $22
+ cmpult $22, $28, $21
+ addq $23, $21, $23
+ mulq $2, $5, $20
+ umulh $2, $5, $17
+ addq $8, $20, $8
+ cmpult $8, $20, $27
+ addq $27, $17, $17
+ addq $22, $17, $22
+ cmpult $22, $17, $18
+ addq $23, $18, $23
+ mulq $4, $3, $24
+ umulh $4, $3, $25
+ addq $8, $24, $8
+ cmpult $8, $24, $19
+ addq $19, $25, $25
+ addq $22, $25, $22
+ cmpult $22, $25, $28
+ addq $23, $28, $23
+ mulq $6, $1, $21
+ umulh $6, $1, $0
+ addq $8, $21, $8
+ cmpult $8, $21, $20
+ addq $20, $0, $0
+ addq $22, $0, $22
+ cmpult $22, $0, $27
+ addq $23, $27, $23
+ stq $8, 24($16)
+ bis $31, $31, $8
+ mulq $2, $7, $17
+ umulh $2, $7, $18
+ addq $22, $17, $22
+ cmpult $22, $17, $24
+ addq $24, $18, $18
+ addq $23, $18, $23
+ cmpult $23, $18, $19
+ addq $8, $19, $8
+ mulq $4, $5, $25
+ umulh $4, $5, $28
+ addq $22, $25, $22
+ cmpult $22, $25, $21
+ addq $21, $28, $28
+ addq $23, $28, $23
+ cmpult $23, $28, $20
+ addq $8, $20, $8
+ mulq $6, $3, $0
+ umulh $6, $3, $27
+ addq $22, $0, $22
+ cmpult $22, $0, $1
+ addq $1, $27, $27
+ addq $23, $27, $23
+ cmpult $23, $27, $17
+ addq $8, $17, $8
+ stq $22, 32($16)
+ bis $31, $31, $22
+ mulq $4, $7, $24
+ umulh $4, $7, $18
+ addq $23, $24, $23
+ cmpult $23, $24, $19
+ addq $19, $18, $18
+ addq $8, $18, $8
+ cmpult $8, $18, $2
+ addq $22, $2, $22
+ mulq $6, $5, $25
+ umulh $6, $5, $21
+ addq $23, $25, $23
+ cmpult $23, $25, $28
+ addq $28, $21, $21
+ addq $8, $21, $8
+ cmpult $8, $21, $20
+ addq $22, $20, $22
+ stq $23, 40($16)
+ bis $31, $31, $23
+ mulq $6, $7, $0
+ umulh $6, $7, $1
+ addq $8, $0, $8
+ cmpult $8, $0, $27
+ addq $27, $1, $1
+ addq $22, $1, $22
+ cmpult $22, $1, $17
+ addq $23, $17, $23
+ stq $8, 48($16)
+ stq $22, 56($16)
+ ret $31,($26),1
+ .end bn_mul_comba4
+ .text
+ .align 3
+ .globl bn_mul_comba8
+ .ent bn_mul_comba8
+bn_mul_comba8:
+bn_mul_comba8..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $30, 16, $30
+ ldq $0, 0($17)
+ ldq $1, 0($18)
+ stq $9, 0($30)
+ stq $10, 8($30)
+ ldq $2, 8($17)
+ ldq $3, 8($18)
+ ldq $4, 16($17)
+ ldq $5, 16($18)
+ ldq $6, 24($17)
+ ldq $7, 24($18)
+ ldq $8, 8($17)
+ ldq $22, 8($18)
+ ldq $23, 8($17)
+ ldq $24, 8($18)
+ ldq $25, 8($17)
+ ldq $27, 8($18)
+ ldq $28, 8($17)
+ ldq $21, 8($18)
+ bis $31, $31, $9
+ mulq $0, $1, $20
+ umulh $0, $1, $19
+ stq $20, 0($16)
+ bis $31, $31, $20
+ mulq $0, $3, $10
+ umulh $0, $3, $17
+ addq $19, $10, $19
+ cmpult $19, $10, $18
+ addq $18, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $10
+ addq $20, $10, $20
+ mulq $2, $1, $18
+ umulh $2, $1, $17
+ addq $19, $18, $19
+ cmpult $19, $18, $10
+ addq $10, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $18
+ addq $20, $18, $20
+ stq $19, 8($16)
+ bis $31, $31, $19
+ mulq $0, $5, $10
+ umulh $0, $5, $17
+ addq $9, $10, $9
+ cmpult $9, $10, $18
+ addq $18, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $10
+ addq $19, $10, $19
+ mulq $2, $3, $18
+ umulh $2, $3, $17
+ addq $9, $18, $9
+ cmpult $9, $18, $10
+ addq $10, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $18
+ addq $19, $18, $19
+ mulq $4, $1, $10
+ umulh $4, $1, $17
+ addq $9, $10, $9
+ cmpult $9, $10, $18
+ addq $18, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $10
+ addq $19, $10, $19
+ stq $9, 16($16)
+ bis $31, $31, $9
+ mulq $0, $7, $18
+ umulh $0, $7, $17
+ addq $20, $18, $20
+ cmpult $20, $18, $10
+ addq $10, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $18
+ addq $9, $18, $9
+ mulq $2, $5, $10
+ umulh $2, $5, $17
+ addq $20, $10, $20
+ cmpult $20, $10, $18
+ addq $18, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $10
+ addq $9, $10, $9
+ mulq $4, $3, $18
+ umulh $4, $3, $17
+ addq $20, $18, $20
+ cmpult $20, $18, $10
+ addq $10, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $18
+ addq $9, $18, $9
+ mulq $6, $1, $10
+ umulh $6, $1, $17
+ addq $20, $10, $20
+ cmpult $20, $10, $18
+ addq $18, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $10
+ addq $9, $10, $9
+ stq $20, 24($16)
+ bis $31, $31, $20
+ mulq $0, $22, $18
+ umulh $0, $22, $17
+ addq $19, $18, $19
+ cmpult $19, $18, $10
+ addq $10, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $18
+ addq $20, $18, $20
+ mulq $2, $7, $10
+ umulh $2, $7, $17
+ addq $19, $10, $19
+ cmpult $19, $10, $18
+ addq $18, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $10
+ addq $20, $10, $20
+ mulq $4, $5, $18
+ umulh $4, $5, $17
+ addq $19, $18, $19
+ cmpult $19, $18, $10
+ addq $10, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $18
+ addq $20, $18, $20
+ mulq $6, $3, $10
+ umulh $6, $3, $17
+ addq $19, $10, $19
+ cmpult $19, $10, $18
+ addq $18, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $10
+ addq $20, $10, $20
+ mulq $8, $1, $18
+ umulh $8, $1, $17
+ addq $19, $18, $19
+ cmpult $19, $18, $10
+ addq $10, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $18
+ addq $20, $18, $20
+ stq $19, 32($16)
+ bis $31, $31, $19
+ mulq $0, $24, $10
+ umulh $0, $24, $17
+ addq $9, $10, $9
+ cmpult $9, $10, $18
+ addq $18, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $10
+ addq $19, $10, $19
+ mulq $2, $22, $18
+ umulh $2, $22, $17
+ addq $9, $18, $9
+ cmpult $9, $18, $10
+ addq $10, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $18
+ addq $19, $18, $19
+ mulq $4, $7, $10
+ umulh $4, $7, $17
+ addq $9, $10, $9
+ cmpult $9, $10, $18
+ addq $18, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $10
+ addq $19, $10, $19
+ mulq $6, $5, $18
+ umulh $6, $5, $17
+ addq $9, $18, $9
+ cmpult $9, $18, $10
+ addq $10, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $18
+ addq $19, $18, $19
+ mulq $8, $3, $10
+ umulh $8, $3, $17
+ addq $9, $10, $9
+ cmpult $9, $10, $18
+ addq $18, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $10
+ addq $19, $10, $19
+ mulq $23, $1, $18
+ umulh $23, $1, $17
+ addq $9, $18, $9
+ cmpult $9, $18, $10
+ addq $10, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $18
+ addq $19, $18, $19
+ stq $9, 40($16)
+ bis $31, $31, $9
+ mulq $0, $27, $10
+ umulh $0, $27, $17
+ addq $20, $10, $20
+ cmpult $20, $10, $18
+ addq $18, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $10
+ addq $9, $10, $9
+ mulq $2, $24, $18
+ umulh $2, $24, $17
+ addq $20, $18, $20
+ cmpult $20, $18, $10
+ addq $10, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $18
+ addq $9, $18, $9
+ mulq $4, $22, $10
+ umulh $4, $22, $17
+ addq $20, $10, $20
+ cmpult $20, $10, $18
+ addq $18, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $10
+ addq $9, $10, $9
+ mulq $6, $7, $18
+ umulh $6, $7, $17
+ addq $20, $18, $20
+ cmpult $20, $18, $10
+ addq $10, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $18
+ addq $9, $18, $9
+ mulq $8, $5, $10
+ umulh $8, $5, $17
+ addq $20, $10, $20
+ cmpult $20, $10, $18
+ addq $18, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $10
+ addq $9, $10, $9
+ mulq $23, $3, $18
+ umulh $23, $3, $17
+ addq $20, $18, $20
+ cmpult $20, $18, $10
+ addq $10, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $18
+ addq $9, $18, $9
+ mulq $25, $1, $10
+ umulh $25, $1, $17
+ addq $20, $10, $20
+ cmpult $20, $10, $18
+ addq $18, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $10
+ addq $9, $10, $9
+ stq $20, 48($16)
+ bis $31, $31, $20
+ mulq $0, $21, $18
+ umulh $0, $21, $17
+ addq $19, $18, $19
+ cmpult $19, $18, $10
+ addq $10, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $18
+ addq $20, $18, $20
+ mulq $2, $27, $10
+ umulh $2, $27, $17
+ addq $19, $10, $19
+ cmpult $19, $10, $18
+ addq $18, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $0
+ addq $20, $0, $20
+ mulq $4, $24, $10
+ umulh $4, $24, $18
+ addq $19, $10, $19
+ cmpult $19, $10, $17
+ addq $17, $18, $18
+ addq $9, $18, $9
+ cmpult $9, $18, $0
+ addq $20, $0, $20
+ mulq $6, $22, $10
+ umulh $6, $22, $17
+ addq $19, $10, $19
+ cmpult $19, $10, $18
+ addq $18, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $0
+ addq $20, $0, $20
+ mulq $8, $7, $10
+ umulh $8, $7, $18
+ addq $19, $10, $19
+ cmpult $19, $10, $17
+ addq $17, $18, $18
+ addq $9, $18, $9
+ cmpult $9, $18, $0
+ addq $20, $0, $20
+ mulq $23, $5, $10
+ umulh $23, $5, $17
+ addq $19, $10, $19
+ cmpult $19, $10, $18
+ addq $18, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $0
+ addq $20, $0, $20
+ mulq $25, $3, $10
+ umulh $25, $3, $18
+ addq $19, $10, $19
+ cmpult $19, $10, $17
+ addq $17, $18, $18
+ addq $9, $18, $9
+ cmpult $9, $18, $0
+ addq $20, $0, $20
+ mulq $28, $1, $10
+ umulh $28, $1, $17
+ addq $19, $10, $19
+ cmpult $19, $10, $18
+ addq $18, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $0
+ addq $20, $0, $20
+ stq $19, 56($16)
+ bis $31, $31, $19
+ mulq $2, $21, $10
+ umulh $2, $21, $18
+ addq $9, $10, $9
+ cmpult $9, $10, $17
+ addq $17, $18, $18
+ addq $20, $18, $20
+ cmpult $20, $18, $0
+ addq $19, $0, $19
+ mulq $4, $27, $1
+ umulh $4, $27, $10
+ addq $9, $1, $9
+ cmpult $9, $1, $17
+ addq $17, $10, $10
+ addq $20, $10, $20
+ cmpult $20, $10, $18
+ addq $19, $18, $19
+ mulq $6, $24, $0
+ umulh $6, $24, $2
+ addq $9, $0, $9
+ cmpult $9, $0, $1
+ addq $1, $2, $2
+ addq $20, $2, $20
+ cmpult $20, $2, $17
+ addq $19, $17, $19
+ mulq $8, $22, $10
+ umulh $8, $22, $18
+ addq $9, $10, $9
+ cmpult $9, $10, $0
+ addq $0, $18, $18
+ addq $20, $18, $20
+ cmpult $20, $18, $1
+ addq $19, $1, $19
+ mulq $23, $7, $2
+ umulh $23, $7, $17
+ addq $9, $2, $9
+ cmpult $9, $2, $10
+ addq $10, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $0
+ addq $19, $0, $19
+ mulq $25, $5, $18
+ umulh $25, $5, $1
+ addq $9, $18, $9
+ cmpult $9, $18, $2
+ addq $2, $1, $1
+ addq $20, $1, $20
+ cmpult $20, $1, $10
+ addq $19, $10, $19
+ mulq $28, $3, $17
+ umulh $28, $3, $0
+ addq $9, $17, $9
+ cmpult $9, $17, $18
+ addq $18, $0, $0
+ addq $20, $0, $20
+ cmpult $20, $0, $2
+ addq $19, $2, $19
+ stq $9, 64($16)
+ bis $31, $31, $9
+ mulq $4, $21, $1
+ umulh $4, $21, $10
+ addq $20, $1, $20
+ cmpult $20, $1, $17
+ addq $17, $10, $10
+ addq $19, $10, $19
+ cmpult $19, $10, $18
+ addq $9, $18, $9
+ mulq $6, $27, $0
+ umulh $6, $27, $2
+ addq $20, $0, $20
+ cmpult $20, $0, $3
+ addq $3, $2, $2
+ addq $19, $2, $19
+ cmpult $19, $2, $1
+ addq $9, $1, $9
+ mulq $8, $24, $17
+ umulh $8, $24, $10
+ addq $20, $17, $20
+ cmpult $20, $17, $18
+ addq $18, $10, $10
+ addq $19, $10, $19
+ cmpult $19, $10, $4
+ addq $9, $4, $9
+ mulq $23, $22, $0
+ umulh $23, $22, $3
+ addq $20, $0, $20
+ cmpult $20, $0, $2
+ addq $2, $3, $3
+ addq $19, $3, $19
+ cmpult $19, $3, $1
+ addq $9, $1, $9
+ mulq $25, $7, $17
+ umulh $25, $7, $18
+ addq $20, $17, $20
+ cmpult $20, $17, $10
+ addq $10, $18, $18
+ addq $19, $18, $19
+ cmpult $19, $18, $4
+ addq $9, $4, $9
+ mulq $28, $5, $0
+ umulh $28, $5, $2
+ addq $20, $0, $20
+ cmpult $20, $0, $3
+ addq $3, $2, $2
+ addq $19, $2, $19
+ cmpult $19, $2, $1
+ addq $9, $1, $9
+ stq $20, 72($16)
+ bis $31, $31, $20
+ mulq $6, $21, $17
+ umulh $6, $21, $10
+ addq $19, $17, $19
+ cmpult $19, $17, $18
+ addq $18, $10, $10
+ addq $9, $10, $9
+ cmpult $9, $10, $4
+ addq $20, $4, $20
+ mulq $8, $27, $0
+ umulh $8, $27, $3
+ addq $19, $0, $19
+ cmpult $19, $0, $2
+ addq $2, $3, $3
+ addq $9, $3, $9
+ cmpult $9, $3, $1
+ addq $20, $1, $20
+ mulq $23, $24, $5
+ umulh $23, $24, $17
+ addq $19, $5, $19
+ cmpult $19, $5, $18
+ addq $18, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $10
+ addq $20, $10, $20
+ mulq $25, $22, $4
+ umulh $25, $22, $6
+ addq $19, $4, $19
+ cmpult $19, $4, $0
+ addq $0, $6, $6
+ addq $9, $6, $9
+ cmpult $9, $6, $2
+ addq $20, $2, $20
+ mulq $28, $7, $3
+ umulh $28, $7, $1
+ addq $19, $3, $19
+ cmpult $19, $3, $5
+ addq $5, $1, $1
+ addq $9, $1, $9
+ cmpult $9, $1, $18
+ addq $20, $18, $20
+ stq $19, 80($16)
+ bis $31, $31, $19
+ mulq $8, $21, $17
+ umulh $8, $21, $10
+ addq $9, $17, $9
+ cmpult $9, $17, $4
+ addq $4, $10, $10
+ addq $20, $10, $20
+ cmpult $20, $10, $0
+ addq $19, $0, $19
+ mulq $23, $27, $6
+ umulh $23, $27, $2
+ addq $9, $6, $9
+ cmpult $9, $6, $3
+ addq $3, $2, $2
+ addq $20, $2, $20
+ cmpult $20, $2, $5
+ addq $19, $5, $19
+ mulq $25, $24, $1
+ umulh $25, $24, $18
+ addq $9, $1, $9
+ cmpult $9, $1, $7
+ addq $7, $18, $18
+ addq $20, $18, $20
+ cmpult $20, $18, $17
+ addq $19, $17, $19
+ mulq $28, $22, $4
+ umulh $28, $22, $10
+ addq $9, $4, $9
+ cmpult $9, $4, $0
+ addq $0, $10, $10
+ addq $20, $10, $20
+ cmpult $20, $10, $8
+ addq $19, $8, $19
+ stq $9, 88($16)
+ bis $31, $31, $9
+ mulq $23, $21, $6
+ umulh $23, $21, $3
+ addq $20, $6, $20
+ cmpult $20, $6, $2
+ addq $2, $3, $3
+ addq $19, $3, $19
+ cmpult $19, $3, $5
+ addq $9, $5, $9
+ mulq $25, $27, $1
+ umulh $25, $27, $7
+ addq $20, $1, $20
+ cmpult $20, $1, $18
+ addq $18, $7, $7
+ addq $19, $7, $19
+ cmpult $19, $7, $17
+ addq $9, $17, $9
+ mulq $28, $24, $4
+ umulh $28, $24, $0
+ addq $20, $4, $20
+ cmpult $20, $4, $10
+ addq $10, $0, $0
+ addq $19, $0, $19
+ cmpult $19, $0, $8
+ addq $9, $8, $9
+ stq $20, 96($16)
+ bis $31, $31, $20
+ mulq $25, $21, $22
+ umulh $25, $21, $6
+ addq $19, $22, $19
+ cmpult $19, $22, $2
+ addq $2, $6, $6
+ addq $9, $6, $9
+ cmpult $9, $6, $3
+ addq $20, $3, $20
+ mulq $28, $27, $5
+ umulh $28, $27, $23
+ addq $19, $5, $19
+ cmpult $19, $5, $1
+ addq $1, $23, $23
+ addq $9, $23, $9
+ cmpult $9, $23, $18
+ addq $20, $18, $20
+ stq $19, 104($16)
+ bis $31, $31, $19
+ mulq $28, $21, $7
+ umulh $28, $21, $17
+ addq $9, $7, $9
+ cmpult $9, $7, $4
+ addq $4, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $10
+ addq $19, $10, $19
+ stq $9, 112($16)
+ stq $20, 120($16)
+ ldq $9, 0($30)
+ ldq $10, 8($30)
+ addq $30, 16, $30
+ ret $31,($26),1
+ .end bn_mul_comba8
+ .text
+ .align 3
+ .globl bn_sqr_comba4
+ .ent bn_sqr_comba4
+bn_sqr_comba4:
+bn_sqr_comba4..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ ldq $0, 0($17)
+ ldq $1, 8($17)
+ ldq $2, 16($17)
+ ldq $3, 24($17)
+ bis $31, $31, $6
+ mulq $0, $0, $4
+ umulh $0, $0, $5
+ stq $4, 0($16)
+ bis $31, $31, $4
+ mulq $0, $1, $7
+ umulh $0, $1, $8
+ cmplt $7, $31, $22
+ cmplt $8, $31, $23
+ addq $7, $7, $7
+ addq $8, $8, $8
+ addq $8, $22, $8
+ addq $4, $23, $4
+ addq $5, $7, $5
+ addq $6, $8, $6
+ cmpult $5, $7, $24
+ cmpult $6, $8, $25
+ addq $6, $24, $6
+ addq $4, $25, $4
+ stq $5, 8($16)
+ bis $31, $31, $5
+ mulq $1, $1, $27
+ umulh $1, $1, $28
+ addq $6, $27, $6
+ addq $4, $28, $4
+ cmpult $6, $27, $21
+ cmpult $4, $28, $20
+ addq $4, $21, $4
+ addq $5, $20, $5
+ mulq $2, $0, $19
+ umulh $2, $0, $18
+ cmplt $19, $31, $17
+ cmplt $18, $31, $22
+ addq $19, $19, $19
+ addq $18, $18, $18
+ addq $18, $17, $18
+ addq $5, $22, $5
+ addq $6, $19, $6
+ addq $4, $18, $4
+ cmpult $6, $19, $23
+ cmpult $4, $18, $7
+ addq $4, $23, $4
+ addq $5, $7, $5
+ stq $6, 16($16)
+ bis $31, $31, $6
+ mulq $3, $0, $8
+ umulh $3, $0, $24
+ cmplt $8, $31, $25
+ cmplt $24, $31, $27
+ addq $8, $8, $8
+ addq $24, $24, $24
+ addq $24, $25, $24
+ addq $6, $27, $6
+ addq $4, $8, $4
+ addq $5, $24, $5
+ cmpult $4, $8, $28
+ cmpult $5, $24, $21
+ addq $5, $28, $5
+ addq $6, $21, $6
+ mulq $2, $1, $20
+ umulh $2, $1, $17
+ cmplt $20, $31, $22
+ cmplt $17, $31, $19
+ addq $20, $20, $20
+ addq $17, $17, $17
+ addq $17, $22, $17
+ addq $6, $19, $6
+ addq $4, $20, $4
+ addq $5, $17, $5
+ cmpult $4, $20, $18
+ cmpult $5, $17, $23
+ addq $5, $18, $5
+ addq $6, $23, $6
+ stq $4, 24($16)
+ bis $31, $31, $4
+ mulq $2, $2, $7
+ umulh $2, $2, $25
+ addq $5, $7, $5
+ addq $6, $25, $6
+ cmpult $5, $7, $27
+ cmpult $6, $25, $8
+ addq $6, $27, $6
+ addq $4, $8, $4
+ mulq $3, $1, $24
+ umulh $3, $1, $28
+ cmplt $24, $31, $21
+ cmplt $28, $31, $22
+ addq $24, $24, $24
+ addq $28, $28, $28
+ addq $28, $21, $28
+ addq $4, $22, $4
+ addq $5, $24, $5
+ addq $6, $28, $6
+ cmpult $5, $24, $19
+ cmpult $6, $28, $20
+ addq $6, $19, $6
+ addq $4, $20, $4
+ stq $5, 32($16)
+ bis $31, $31, $5
+ mulq $3, $2, $17
+ umulh $3, $2, $18
+ cmplt $17, $31, $23
+ cmplt $18, $31, $7
+ addq $17, $17, $17
+ addq $18, $18, $18
+ addq $18, $23, $18
+ addq $5, $7, $5
+ addq $6, $17, $6
+ addq $4, $18, $4
+ cmpult $6, $17, $25
+ cmpult $4, $18, $27
+ addq $4, $25, $4
+ addq $5, $27, $5
+ stq $6, 40($16)
+ bis $31, $31, $6
+ mulq $3, $3, $8
+ umulh $3, $3, $21
+ addq $4, $8, $4
+ addq $5, $21, $5
+ cmpult $4, $8, $22
+ cmpult $5, $21, $24
+ addq $5, $22, $5
+ addq $6, $24, $6
+ stq $4, 48($16)
+ stq $5, 56($16)
+ ret $31,($26),1
+ .end bn_sqr_comba4
+ .text
+ .align 3
+ .globl bn_sqr_comba8
+ .ent bn_sqr_comba8
+bn_sqr_comba8:
+bn_sqr_comba8..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+ ldq $0, 0($17)
+ ldq $1, 8($17)
+ ldq $2, 16($17)
+ ldq $3, 24($17)
+ ldq $4, 32($17)
+ ldq $5, 40($17)
+ ldq $6, 48($17)
+ ldq $7, 56($17)
+ bis $31, $31, $23
+ mulq $0, $0, $8
+ umulh $0, $0, $22
+ stq $8, 0($16)
+ bis $31, $31, $8
+ mulq $1, $0, $24
+ umulh $1, $0, $25
+ cmplt $24, $31, $27
+ cmplt $25, $31, $28
+ addq $24, $24, $24
+ addq $25, $25, $25
+ addq $25, $27, $25
+ addq $8, $28, $8
+ addq $22, $24, $22
+ addq $23, $25, $23
+ cmpult $22, $24, $21
+ cmpult $23, $25, $20
+ addq $23, $21, $23
+ addq $8, $20, $8
+ stq $22, 8($16)
+ bis $31, $31, $22
+ mulq $1, $1, $19
+ umulh $1, $1, $18
+ addq $23, $19, $23
+ addq $8, $18, $8
+ cmpult $23, $19, $17
+ cmpult $8, $18, $27
+ addq $8, $17, $8
+ addq $22, $27, $22
+ mulq $2, $0, $28
+ umulh $2, $0, $24
+ cmplt $28, $31, $25
+ cmplt $24, $31, $21
+ addq $28, $28, $28
+ addq $24, $24, $24
+ addq $24, $25, $24
+ addq $22, $21, $22
+ addq $23, $28, $23
+ addq $8, $24, $8
+ cmpult $23, $28, $20
+ cmpult $8, $24, $19
+ addq $8, $20, $8
+ addq $22, $19, $22
+ stq $23, 16($16)
+ bis $31, $31, $23
+ mulq $2, $1, $18
+ umulh $2, $1, $17
+ cmplt $18, $31, $27
+ cmplt $17, $31, $25
+ addq $18, $18, $18
+ addq $17, $17, $17
+ addq $17, $27, $17
+ addq $23, $25, $23
+ addq $8, $18, $8
+ addq $22, $17, $22
+ cmpult $8, $18, $21
+ cmpult $22, $17, $28
+ addq $22, $21, $22
+ addq $23, $28, $23
+ mulq $3, $0, $24
+ umulh $3, $0, $20
+ cmplt $24, $31, $19
+ cmplt $20, $31, $27
+ addq $24, $24, $24
+ addq $20, $20, $20
+ addq $20, $19, $20
+ addq $23, $27, $23
+ addq $8, $24, $8
+ addq $22, $20, $22
+ cmpult $8, $24, $25
+ cmpult $22, $20, $18
+ addq $22, $25, $22
+ addq $23, $18, $23
+ stq $8, 24($16)
+ bis $31, $31, $8
+ mulq $2, $2, $17
+ umulh $2, $2, $21
+ addq $22, $17, $22
+ addq $23, $21, $23
+ cmpult $22, $17, $28
+ cmpult $23, $21, $19
+ addq $23, $28, $23
+ addq $8, $19, $8
+ mulq $3, $1, $27
+ umulh $3, $1, $24
+ cmplt $27, $31, $20
+ cmplt $24, $31, $25
+ addq $27, $27, $27
+ addq $24, $24, $24
+ addq $24, $20, $24
+ addq $8, $25, $8
+ addq $22, $27, $22
+ addq $23, $24, $23
+ cmpult $22, $27, $18
+ cmpult $23, $24, $17
+ addq $23, $18, $23
+ addq $8, $17, $8
+ mulq $4, $0, $21
+ umulh $4, $0, $28
+ cmplt $21, $31, $19
+ cmplt $28, $31, $20
+ addq $21, $21, $21
+ addq $28, $28, $28
+ addq $28, $19, $28
+ addq $8, $20, $8
+ addq $22, $21, $22
+ addq $23, $28, $23
+ cmpult $22, $21, $25
+ cmpult $23, $28, $27
+ addq $23, $25, $23
+ addq $8, $27, $8
+ stq $22, 32($16)
+ bis $31, $31, $22
+ mulq $3, $2, $24
+ umulh $3, $2, $18
+ cmplt $24, $31, $17
+ cmplt $18, $31, $19
+ addq $24, $24, $24
+ addq $18, $18, $18
+ addq $18, $17, $18
+ addq $22, $19, $22
+ addq $23, $24, $23
+ addq $8, $18, $8
+ cmpult $23, $24, $20
+ cmpult $8, $18, $21
+ addq $8, $20, $8
+ addq $22, $21, $22
+ mulq $4, $1, $28
+ umulh $4, $1, $25
+ cmplt $28, $31, $27
+ cmplt $25, $31, $17
+ addq $28, $28, $28
+ addq $25, $25, $25
+ addq $25, $27, $25
+ addq $22, $17, $22
+ addq $23, $28, $23
+ addq $8, $25, $8
+ cmpult $23, $28, $19
+ cmpult $8, $25, $24
+ addq $8, $19, $8
+ addq $22, $24, $22
+ mulq $5, $0, $18
+ umulh $5, $0, $20
+ cmplt $18, $31, $21
+ cmplt $20, $31, $27
+ addq $18, $18, $18
+ addq $20, $20, $20
+ addq $20, $21, $20
+ addq $22, $27, $22
+ addq $23, $18, $23
+ addq $8, $20, $8
+ cmpult $23, $18, $17
+ cmpult $8, $20, $28
+ addq $8, $17, $8
+ addq $22, $28, $22
+ stq $23, 40($16)
+ bis $31, $31, $23
+ mulq $3, $3, $25
+ umulh $3, $3, $19
+ addq $8, $25, $8
+ addq $22, $19, $22
+ cmpult $8, $25, $24
+ cmpult $22, $19, $21
+ addq $22, $24, $22
+ addq $23, $21, $23
+ mulq $4, $2, $27
+ umulh $4, $2, $18
+ cmplt $27, $31, $20
+ cmplt $18, $31, $17
+ addq $27, $27, $27
+ addq $18, $18, $18
+ addq $18, $20, $18
+ addq $23, $17, $23
+ addq $8, $27, $8
+ addq $22, $18, $22
+ cmpult $8, $27, $28
+ cmpult $22, $18, $25
+ addq $22, $28, $22
+ addq $23, $25, $23
+ mulq $5, $1, $19
+ umulh $5, $1, $24
+ cmplt $19, $31, $21
+ cmplt $24, $31, $20
+ addq $19, $19, $19
+ addq $24, $24, $24
+ addq $24, $21, $24
+ addq $23, $20, $23
+ addq $8, $19, $8
+ addq $22, $24, $22
+ cmpult $8, $19, $17
+ cmpult $22, $24, $27
+ addq $22, $17, $22
+ addq $23, $27, $23
+ mulq $6, $0, $18
+ umulh $6, $0, $28
+ cmplt $18, $31, $25
+ cmplt $28, $31, $21
+ addq $18, $18, $18
+ addq $28, $28, $28
+ addq $28, $25, $28
+ addq $23, $21, $23
+ addq $8, $18, $8
+ addq $22, $28, $22
+ cmpult $8, $18, $20
+ cmpult $22, $28, $19
+ addq $22, $20, $22
+ addq $23, $19, $23
+ stq $8, 48($16)
+ bis $31, $31, $8
+ mulq $4, $3, $24
+ umulh $4, $3, $17
+ cmplt $24, $31, $27
+ cmplt $17, $31, $25
+ addq $24, $24, $24
+ addq $17, $17, $17
+ addq $17, $27, $17
+ addq $8, $25, $8
+ addq $22, $24, $22
+ addq $23, $17, $23
+ cmpult $22, $24, $21
+ cmpult $23, $17, $18
+ addq $23, $21, $23
+ addq $8, $18, $8
+ mulq $5, $2, $28
+ umulh $5, $2, $20
+ cmplt $28, $31, $19
+ cmplt $20, $31, $27
+ addq $28, $28, $28
+ addq $20, $20, $20
+ addq $20, $19, $20
+ addq $8, $27, $8
+ addq $22, $28, $22
+ addq $23, $20, $23
+ cmpult $22, $28, $25
+ cmpult $23, $20, $24
+ addq $23, $25, $23
+ addq $8, $24, $8
+ mulq $6, $1, $17
+ umulh $6, $1, $21
+ cmplt $17, $31, $18
+ cmplt $21, $31, $19
+ addq $17, $17, $17
+ addq $21, $21, $21
+ addq $21, $18, $21
+ addq $8, $19, $8
+ addq $22, $17, $22
+ addq $23, $21, $23
+ cmpult $22, $17, $27
+ cmpult $23, $21, $28
+ addq $23, $27, $23
+ addq $8, $28, $8
+ mulq $7, $0, $20
+ umulh $7, $0, $25
+ cmplt $20, $31, $24
+ cmplt $25, $31, $18
+ addq $20, $20, $20
+ addq $25, $25, $25
+ addq $25, $24, $25
+ addq $8, $18, $8
+ addq $22, $20, $22
+ addq $23, $25, $23
+ cmpult $22, $20, $19
+ cmpult $23, $25, $17
+ addq $23, $19, $23
+ addq $8, $17, $8
+ stq $22, 56($16)
+ bis $31, $31, $22
+ mulq $4, $4, $21
+ umulh $4, $4, $27
+ addq $23, $21, $23
+ addq $8, $27, $8
+ cmpult $23, $21, $28
+ cmpult $8, $27, $24
+ addq $8, $28, $8
+ addq $22, $24, $22
+ mulq $5, $3, $18
+ umulh $5, $3, $20
+ cmplt $18, $31, $25
+ cmplt $20, $31, $19
+ addq $18, $18, $18
+ addq $20, $20, $20
+ addq $20, $25, $20
+ addq $22, $19, $22
+ addq $23, $18, $23
+ addq $8, $20, $8
+ cmpult $23, $18, $17
+ cmpult $8, $20, $21
+ addq $8, $17, $8
+ addq $22, $21, $22
+ mulq $6, $2, $27
+ umulh $6, $2, $28
+ cmplt $27, $31, $24
+ cmplt $28, $31, $25
+ addq $27, $27, $27
+ addq $28, $28, $28
+ addq $28, $24, $28
+ addq $22, $25, $22
+ addq $23, $27, $23
+ addq $8, $28, $8
+ cmpult $23, $27, $19
+ cmpult $8, $28, $18
+ addq $8, $19, $8
+ addq $22, $18, $22
+ mulq $7, $1, $20
+ umulh $7, $1, $17
+ cmplt $20, $31, $21
+ cmplt $17, $31, $24
+ addq $20, $20, $20
+ addq $17, $17, $17
+ addq $17, $21, $17
+ addq $22, $24, $22
+ addq $23, $20, $23
+ addq $8, $17, $8
+ cmpult $23, $20, $25
+ cmpult $8, $17, $27
+ addq $8, $25, $8
+ addq $22, $27, $22
+ stq $23, 64($16)
+ bis $31, $31, $23
+ mulq $5, $4, $28
+ umulh $5, $4, $19
+ cmplt $28, $31, $18
+ cmplt $19, $31, $21
+ addq $28, $28, $28
+ addq $19, $19, $19
+ addq $19, $18, $19
+ addq $23, $21, $23
+ addq $8, $28, $8
+ addq $22, $19, $22
+ cmpult $8, $28, $24
+ cmpult $22, $19, $20
+ addq $22, $24, $22
+ addq $23, $20, $23
+ mulq $6, $3, $17
+ umulh $6, $3, $25
+ cmplt $17, $31, $27
+ cmplt $25, $31, $18
+ addq $17, $17, $17
+ addq $25, $25, $25
+ addq $25, $27, $25
+ addq $23, $18, $23
+ addq $8, $17, $8
+ addq $22, $25, $22
+ cmpult $8, $17, $21
+ cmpult $22, $25, $28
+ addq $22, $21, $22
+ addq $23, $28, $23
+ mulq $7, $2, $19
+ umulh $7, $2, $24
+ cmplt $19, $31, $20
+ cmplt $24, $31, $27
+ addq $19, $19, $19
+ addq $24, $24, $24
+ addq $24, $20, $24
+ addq $23, $27, $23
+ addq $8, $19, $8
+ addq $22, $24, $22
+ cmpult $8, $19, $18
+ cmpult $22, $24, $17
+ addq $22, $18, $22
+ addq $23, $17, $23
+ stq $8, 72($16)
+ bis $31, $31, $8
+ mulq $5, $5, $25
+ umulh $5, $5, $21
+ addq $22, $25, $22
+ addq $23, $21, $23
+ cmpult $22, $25, $28
+ cmpult $23, $21, $20
+ addq $23, $28, $23
+ addq $8, $20, $8
+ mulq $6, $4, $27
+ umulh $6, $4, $19
+ cmplt $27, $31, $24
+ cmplt $19, $31, $18
+ addq $27, $27, $27
+ addq $19, $19, $19
+ addq $19, $24, $19
+ addq $8, $18, $8
+ addq $22, $27, $22
+ addq $23, $19, $23
+ cmpult $22, $27, $17
+ cmpult $23, $19, $25
+ addq $23, $17, $23
+ addq $8, $25, $8
+ mulq $7, $3, $21
+ umulh $7, $3, $28
+ cmplt $21, $31, $20
+ cmplt $28, $31, $24
+ addq $21, $21, $21
+ addq $28, $28, $28
+ addq $28, $20, $28
+ addq $8, $24, $8
+ addq $22, $21, $22
+ addq $23, $28, $23
+ cmpult $22, $21, $18
+ cmpult $23, $28, $27
+ addq $23, $18, $23
+ addq $8, $27, $8
+ stq $22, 80($16)
+ bis $31, $31, $22
+ mulq $6, $5, $19
+ umulh $6, $5, $17
+ cmplt $19, $31, $25
+ cmplt $17, $31, $20
+ addq $19, $19, $19
+ addq $17, $17, $17
+ addq $17, $25, $17
+ addq $22, $20, $22
+ addq $23, $19, $23
+ addq $8, $17, $8
+ cmpult $23, $19, $24
+ cmpult $8, $17, $21
+ addq $8, $24, $8
+ addq $22, $21, $22
+ mulq $7, $4, $28
+ umulh $7, $4, $18
+ cmplt $28, $31, $27
+ cmplt $18, $31, $25
+ addq $28, $28, $28
+ addq $18, $18, $18
+ addq $18, $27, $18
+ addq $22, $25, $22
+ addq $23, $28, $23
+ addq $8, $18, $8
+ cmpult $23, $28, $20
+ cmpult $8, $18, $19
+ addq $8, $20, $8
+ addq $22, $19, $22
+ stq $23, 88($16)
+ bis $31, $31, $23
+ mulq $6, $6, $17
+ umulh $6, $6, $24
+ addq $8, $17, $8
+ addq $22, $24, $22
+ cmpult $8, $17, $21
+ cmpult $22, $24, $27
+ addq $22, $21, $22
+ addq $23, $27, $23
+ mulq $7, $5, $25
+ umulh $7, $5, $28
+ cmplt $25, $31, $18
+ cmplt $28, $31, $20
+ addq $25, $25, $25
+ addq $28, $28, $28
+ addq $28, $18, $28
+ addq $23, $20, $23
+ addq $8, $25, $8
+ addq $22, $28, $22
+ cmpult $8, $25, $19
+ cmpult $22, $28, $17
+ addq $22, $19, $22
+ addq $23, $17, $23
+ stq $8, 96($16)
+ bis $31, $31, $8
+ mulq $7, $6, $24
+ umulh $7, $6, $21
+ cmplt $24, $31, $27
+ cmplt $21, $31, $18
+ addq $24, $24, $24
+ addq $21, $21, $21
+ addq $21, $27, $21
+ addq $8, $18, $8
+ addq $22, $24, $22
+ addq $23, $21, $23
+ cmpult $22, $24, $20
+ cmpult $23, $21, $25
+ addq $23, $20, $23
+ addq $8, $25, $8
+ stq $22, 104($16)
+ bis $31, $31, $22
+ mulq $7, $7, $28
+ umulh $7, $7, $19
+ addq $23, $28, $23
+ addq $8, $19, $8
+ cmpult $23, $28, $17
+ cmpult $8, $19, $27
+ addq $8, $17, $8
+ addq $22, $27, $22
+ stq $23, 112($16)
+ stq $8, 120($16)
+ ret $31,($26),1
+ .end bn_sqr_comba8
diff --git a/crypto/bn/asm/alpha.s.works b/crypto/bn/asm/alpha.s.works
new file mode 100644
index 0000000000..ee6c587809
--- /dev/null
+++ b/crypto/bn/asm/alpha.s.works
@@ -0,0 +1,533 @@
+
+ # DEC Alpha assember
+ # The bn_div64 is actually gcc output but the other parts are hand done.
+ # Thanks to tzeruch@ceddec.com for sending me the gcc output for
+ # bn_div64.
+ # I've gone back and re-done most of routines.
+ # The key thing to remeber for the 164 CPU is that while a
+ # multiply operation takes 8 cycles, another one can only be issued
+ # after 4 cycles have elapsed. I've done modification to help
+ # improve this. Also, normally, a ld instruction will not be available
+ # for about 3 cycles.
+ .file 1 "bn_asm.c"
+ .set noat
+gcc2_compiled.:
+__gnu_compiled_c:
+ .text
+ .align 3
+ .globl bn_mul_add_words
+ .ent bn_mul_add_words
+bn_mul_add_words:
+bn_mul_add_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+ .align 5
+ subq $18,4,$18
+ bis $31,$31,$0
+ blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
+ ldq $20,0($17) # 1 1
+ ldq $1,0($16) # 1 1
+ .align 3
+$42:
+ mulq $20,$19,$5 # 1 2 1 ######
+ ldq $21,8($17) # 2 1
+ ldq $2,8($16) # 2 1
+ umulh $20,$19,$20 # 1 2 ######
+ ldq $27,16($17) # 3 1
+ ldq $3,16($16) # 3 1
+ mulq $21,$19,$6 # 2 2 1 ######
+ ldq $28,24($17) # 4 1
+ addq $1,$5,$1 # 1 2 2
+ ldq $4,24($16) # 4 1
+ umulh $21,$19,$21 # 2 2 ######
+ cmpult $1,$5,$22 # 1 2 3 1
+ addq $20,$22,$20 # 1 3 1
+ addq $1,$0,$1 # 1 2 3 1
+ mulq $27,$19,$7 # 3 2 1 ######
+ cmpult $1,$0,$0 # 1 2 3 2
+ addq $2,$6,$2 # 2 2 2
+ addq $20,$0,$0 # 1 3 2
+ cmpult $2,$6,$23 # 2 2 3 1
+ addq $21,$23,$21 # 2 3 1
+ umulh $27,$19,$27 # 3 2 ######
+ addq $2,$0,$2 # 2 2 3 1
+ cmpult $2,$0,$0 # 2 2 3 2
+ subq $18,4,$18
+ mulq $28,$19,$8 # 4 2 1 ######
+ addq $21,$0,$0 # 2 3 2
+ addq $3,$7,$3 # 3 2 2
+ addq $16,32,$16
+ cmpult $3,$7,$24 # 3 2 3 1
+ stq $1,-32($16) # 1 2 4
+ umulh $28,$19,$28 # 4 2 ######
+ addq $27,$24,$27 # 3 3 1
+ addq $3,$0,$3 # 3 2 3 1
+ stq $2,-24($16) # 2 2 4
+ cmpult $3,$0,$0 # 3 2 3 2
+ stq $3,-16($16) # 3 2 4
+ addq $4,$8,$4 # 4 2 2
+ addq $27,$0,$0 # 3 3 2
+ cmpult $4,$8,$25 # 4 2 3 1
+ addq $17,32,$17
+ addq $28,$25,$28 # 4 3 1
+ addq $4,$0,$4 # 4 2 3 1
+ cmpult $4,$0,$0 # 4 2 3 2
+ stq $4,-8($16) # 4 2 4
+ addq $28,$0,$0 # 4 3 2
+ blt $18,$43
+
+ ldq $20,0($17) # 1 1
+ ldq $1,0($16) # 1 1
+
+ br $42
+
+ .align 4
+$45:
+ ldq $20,0($17) # 4 1
+ ldq $1,0($16) # 4 1
+ mulq $20,$19,$5 # 4 2 1
+ subq $18,1,$18
+ addq $16,8,$16
+ addq $17,8,$17
+ umulh $20,$19,$20 # 4 2
+ addq $1,$5,$1 # 4 2 2
+ cmpult $1,$5,$22 # 4 2 3 1
+ addq $20,$22,$20 # 4 3 1
+ addq $1,$0,$1 # 4 2 3 1
+ cmpult $1,$0,$0 # 4 2 3 2
+ addq $20,$0,$0 # 4 3 2
+ stq $1,-8($16) # 4 2 4
+ bgt $18,$45
+ ret $31,($26),1 # else exit
+
+ .align 4
+$43:
+ addq $18,4,$18
+ bgt $18,$45 # goto tail code
+ ret $31,($26),1 # else exit
+
+ .end bn_mul_add_words
+ .align 3
+ .globl bn_mul_words
+ .ent bn_mul_words
+bn_mul_words:
+bn_mul_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+ .align 5
+ subq $18,4,$18
+ bis $31,$31,$0
+ blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
+ ldq $20,0($17) # 1 1
+ .align 3
+$142:
+
+ mulq $20,$19,$5 # 1 2 1 #####
+ ldq $21,8($17) # 2 1
+ ldq $27,16($17) # 3 1
+ umulh $20,$19,$20 # 1 2 #####
+ ldq $28,24($17) # 4 1
+ mulq $21,$19,$6 # 2 2 1 #####
+ addq $5,$0,$5 # 1 2 3 1
+ subq $18,4,$18
+ cmpult $5,$0,$0 # 1 2 3 2
+ umulh $21,$19,$21 # 2 2 #####
+ addq $20,$0,$0 # 1 3 2
+ addq $17,32,$17
+ addq $6,$0,$6 # 2 2 3 1
+ mulq $27,$19,$7 # 3 2 1 #####
+ cmpult $6,$0,$0 # 2 2 3 2
+ addq $21,$0,$0 # 2 3 2
+ addq $16,32,$16
+ umulh $27,$19,$27 # 3 2 #####
+ stq $5,-32($16) # 1 2 4
+ mulq $28,$19,$8 # 4 2 1 #####
+ addq $7,$0,$7 # 3 2 3 1
+ stq $6,-24($16) # 2 2 4
+ cmpult $7,$0,$0 # 3 2 3 2
+ umulh $28,$19,$28 # 4 2 #####
+ addq $27,$0,$0 # 3 3 2
+ stq $7,-16($16) # 3 2 4
+ addq $8,$0,$8 # 4 2 3 1
+ cmpult $8,$0,$0 # 4 2 3 2
+
+ addq $28,$0,$0 # 4 3 2
+
+ stq $8,-8($16) # 4 2 4
+
+ blt $18,$143
+
+ ldq $20,0($17) # 1 1
+
+ br $142
+
+ .align 4
+$145:
+ ldq $20,0($17) # 4 1
+ mulq $20,$19,$5 # 4 2 1
+ subq $18,1,$18
+ umulh $20,$19,$20 # 4 2
+ addq $5,$0,$5 # 4 2 3 1
+ addq $16,8,$16
+ cmpult $5,$0,$0 # 4 2 3 2
+ addq $17,8,$17
+ addq $20,$0,$0 # 4 3 2
+ stq $5,-8($16) # 4 2 4
+
+ bgt $18,$145
+ ret $31,($26),1 # else exit
+
+ .align 4
+$143:
+ addq $18,4,$18
+ bgt $18,$145 # goto tail code
+ ret $31,($26),1 # else exit
+
+ .end bn_mul_words
+ .align 3
+ .globl bn_sqr_words
+ .ent bn_sqr_words
+bn_sqr_words:
+bn_sqr_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $18,4,$18
+ blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
+ ldq $20,0($17) # 1 1
+ .align 3
+$542:
+ mulq $20,$20,$5 ######
+ ldq $21,8($17) # 1 1
+ subq $18,4
+ umulh $20,$20,$1 ######
+ ldq $27,16($17) # 1 1
+ mulq $21,$21,$6 ######
+ ldq $28,24($17) # 1 1
+ stq $5,0($16) # r[0]
+ umulh $21,$21,$2 ######
+ stq $1,8($16) # r[1]
+ mulq $27,$27,$7 ######
+ stq $6,16($16) # r[0]
+ umulh $27,$27,$3 ######
+ stq $2,24($16) # r[1]
+ mulq $28,$28,$8 ######
+ stq $7,32($16) # r[0]
+ umulh $28,$28,$4 ######
+ stq $3,40($16) # r[1]
+
+ addq $16,64,$16
+ addq $17,32,$17
+ stq $8,-16($16) # r[0]
+ stq $4,-8($16) # r[1]
+
+ blt $18,$543
+ ldq $20,0($17) # 1 1
+ br $542
+
+$442:
+ ldq $20,0($17) # a[0]
+ mulq $20,$20,$5 # a[0]*w low part r2
+ addq $16,16,$16
+ addq $17,8,$17
+ subq $18,1,$18
+ umulh $20,$20,$1 # a[0]*w high part r3
+ stq $5,-16($16) # r[0]
+ stq $1,-8($16) # r[1]
+
+ bgt $18,$442
+ ret $31,($26),1 # else exit
+
+ .align 4
+$543:
+ addq $18,4,$18
+ bgt $18,$442 # goto tail code
+ ret $31,($26),1 # else exit
+ .end bn_sqr_words
+
+ .align 3
+ .globl bn_add_words
+ .ent bn_add_words
+bn_add_words:
+bn_add_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $19,4,$19
+ bis $31,$31,$0 # carry = 0
+ blt $19,$900
+ ldq $5,0($17) # a[0]
+ ldq $1,0($18) # b[1]
+ .align 3
+$901:
+ addq $1,$5,$1 # r=a+b;
+ ldq $6,8($17) # a[1]
+ cmpult $1,$5,$22 # did we overflow?
+ ldq $2,8($18) # b[1]
+ addq $1,$0,$1 # c+= overflow
+ ldq $7,16($17) # a[2]
+ cmpult $1,$0,$0 # overflow?
+ ldq $3,16($18) # b[2]
+ addq $0,$22,$0
+ ldq $8,24($17) # a[3]
+ addq $2,$6,$2 # r=a+b;
+ ldq $4,24($18) # b[3]
+ cmpult $2,$6,$23 # did we overflow?
+ addq $3,$7,$3 # r=a+b;
+ addq $2,$0,$2 # c+= overflow
+ cmpult $3,$7,$24 # did we overflow?
+ cmpult $2,$0,$0 # overflow?
+ addq $4,$8,$4 # r=a+b;
+ addq $0,$23,$0
+ cmpult $4,$8,$25 # did we overflow?
+ addq $3,$0,$3 # c+= overflow
+ stq $1,0($16) # r[0]=c
+ cmpult $3,$0,$0 # overflow?
+ stq $2,8($16) # r[1]=c
+ addq $0,$24,$0
+ stq $3,16($16) # r[2]=c
+ addq $4,$0,$4 # c+= overflow
+ subq $19,4,$19 # loop--
+ cmpult $4,$0,$0 # overflow?
+ addq $17,32,$17 # a++
+ addq $0,$25,$0
+ stq $4,24($16) # r[3]=c
+ addq $18,32,$18 # b++
+ addq $16,32,$16 # r++
+
+ blt $19,$900
+ ldq $5,0($17) # a[0]
+ ldq $1,0($18) # b[1]
+ br $901
+ .align 4
+$945:
+ ldq $5,0($17) # a[0]
+ ldq $1,0($18) # b[1]
+ addq $1,$5,$1 # r=a+b;
+ subq $19,1,$19 # loop--
+ addq $1,$0,$1 # c+= overflow
+ addq $17,8,$17 # a++
+ cmpult $1,$5,$22 # did we overflow?
+ cmpult $1,$0,$0 # overflow?
+ addq $18,8,$18 # b++
+ stq $1,0($16) # r[0]=c
+ addq $0,$22,$0
+ addq $16,8,$16 # r++
+
+ bgt $19,$945
+ ret $31,($26),1 # else exit
+
+$900:
+ addq $19,4,$19
+ bgt $19,$945 # goto tail code
+ ret $31,($26),1 # else exit
+ .end bn_add_words
+
+ #
+ # What follows was taken directly from the C compiler with a few
+ # hacks to redo the lables.
+ #
+.text
+ .align 3
+ .globl bn_div64
+ .ent bn_div64
+bn_div64:
+ ldgp $29,0($27)
+bn_div64..ng:
+ lda $30,-48($30)
+ .frame $30,48,$26,0
+ stq $26,0($30)
+ stq $9,8($30)
+ stq $10,16($30)
+ stq $11,24($30)
+ stq $12,32($30)
+ stq $13,40($30)
+ .mask 0x4003e00,-48
+ .prologue 1
+ bis $16,$16,$9
+ bis $17,$17,$10
+ bis $18,$18,$11
+ bis $31,$31,$13
+ bis $31,2,$12
+ bne $11,$119
+ lda $0,-1
+ br $31,$136
+ .align 4
+$119:
+ bis $11,$11,$16
+ jsr $26,BN_num_bits_word
+ ldgp $29,0($26)
+ subq $0,64,$1
+ beq $1,$120
+ bis $31,1,$1
+ sll $1,$0,$1
+ cmpule $9,$1,$1
+ bne $1,$120
+ # lda $16,_IO_stderr_
+ # lda $17,$C32
+ # bis $0,$0,$18
+ # jsr $26,fprintf
+ # ldgp $29,0($26)
+ jsr $26,abort
+ ldgp $29,0($26)
+ .align 4
+$120:
+ bis $31,64,$3
+ cmpult $9,$11,$2
+ subq $3,$0,$1
+ addl $1,$31,$0
+ subq $9,$11,$1
+ cmoveq $2,$1,$9
+ beq $0,$122
+ zapnot $0,15,$2
+ subq $3,$0,$1
+ sll $11,$2,$11
+ sll $9,$2,$3
+ srl $10,$1,$1
+ sll $10,$2,$10
+ bis $3,$1,$9
+$122:
+ srl $11,32,$5
+ zapnot $11,15,$6
+ lda $7,-1
+ .align 5
+$123:
+ srl $9,32,$1
+ subq $1,$5,$1
+ bne $1,$126
+ zapnot $7,15,$27
+ br $31,$127
+ .align 4
+$126:
+ bis $9,$9,$24
+ bis $5,$5,$25
+ divqu $24,$25,$27
+$127:
+ srl $10,32,$4
+ .align 5
+$128:
+ mulq $27,$5,$1
+ subq $9,$1,$3
+ zapnot $3,240,$1
+ bne $1,$129
+ mulq $6,$27,$2
+ sll $3,32,$1
+ addq $1,$4,$1
+ cmpule $2,$1,$2
+ bne $2,$129
+ subq $27,1,$27
+ br $31,$128
+ .align 4
+$129:
+ mulq $27,$6,$1
+ mulq $27,$5,$4
+ srl $1,32,$3
+ sll $1,32,$1
+ addq $4,$3,$4
+ cmpult $10,$1,$2
+ subq $10,$1,$10
+ addq $2,$4,$2
+ cmpult $9,$2,$1
+ bis $2,$2,$4
+ beq $1,$134
+ addq $9,$11,$9
+ subq $27,1,$27
+$134:
+ subl $12,1,$12
+ subq $9,$4,$9
+ beq $12,$124
+ sll $27,32,$13
+ sll $9,32,$2
+ srl $10,32,$1
+ sll $10,32,$10
+ bis $2,$1,$9
+ br $31,$123
+ .align 4
+$124:
+ bis $13,$27,$0
+$136:
+ ldq $26,0($30)
+ ldq $9,8($30)
+ ldq $10,16($30)
+ ldq $11,24($30)
+ ldq $12,32($30)
+ ldq $13,40($30)
+ addq $30,48,$30
+ ret $31,($26),1
+ .end bn_div64
+
+ .set noat
+ .text
+ .align 3
+ .globl bn_sub_words
+ .ent bn_sub_words
+bn_sub_words:
+bn_sub_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $19, 4, $19
+ bis $31, $31, $0
+ blt $19, $100
+ ldq $1, 0($17)
+ ldq $2, 0($18)
+$101:
+ ldq $3, 8($17)
+ cmpult $1, $2, $4
+ ldq $5, 8($18)
+ subq $1, $2, $1
+ ldq $6, 16($17)
+ cmpult $1, $0, $2
+ ldq $7, 16($18)
+ subq $1, $0, $23
+ ldq $8, 24($17)
+ addq $2, $4, $0
+ cmpult $3, $5, $24
+ subq $3, $5, $3
+ ldq $22, 24($18)
+ cmpult $3, $0, $5
+ subq $3, $0, $25
+ addq $5, $24, $0
+ cmpult $6, $7, $27
+ subq $6, $7, $6
+ stq $23, 0($16)
+ cmpult $6, $0, $7
+ subq $6, $0, $28
+ addq $7, $27, $0
+ cmpult $8, $22, $21
+ subq $8, $22, $8
+ stq $25, 8($16)
+ cmpult $8, $0, $22
+ subq $8, $0, $20
+ addq $22, $21, $0
+ stq $28, 16($16)
+ subq $19, 4, $19
+ stq $20, 24($16)
+ addq $17, 32, $17
+ addq $18, 32, $18
+ addq $16, 32, $16
+ blt $19, $100
+ ldq $1, 0($17)
+ ldq $2, 0($18)
+ br $101
+$102:
+ ldq $1, 0($17)
+ ldq $2, 0($18)
+ cmpult $1, $2, $27
+ subq $1, $2, $1
+ cmpult $1, $0, $2
+ subq $1, $0, $1
+ stq $1, 0($16)
+ addq $2, $27, $0
+ addq $17, 8, $17
+ addq $18, 8, $18
+ addq $16, 8, $16
+ subq $19, 1, $19
+ bgt $19, $102
+ ret $31,($26),1
+$100:
+ addq $19, 4, $19
+ bgt $19, $102
+$103:
+ ret $31,($26),1
+ .end bn_sub_words
diff --git a/crypto/bn/asm/alpha.works/add.pl b/crypto/bn/asm/alpha.works/add.pl
new file mode 100644
index 0000000000..4dc76e6b69
--- /dev/null
+++ b/crypto/bn/asm/alpha.works/add.pl
@@ -0,0 +1,119 @@
+#!/usr/local/bin/perl
+# alpha assember
+
+sub bn_add_words
+ {
+ local($name)=@_;
+ local($cc,$a,$b,$r);
+
+ &init_pool(4);
+ ($cc)=GR("r0");
+
+ $rp=&wparam(0);
+ $ap=&wparam(1);
+ $bp=&wparam(2);
+ $count=&wparam(3);
+
+ &function_begin($name,"");
+
+ &comment("");
+ &sub($count,4,$count);
+ &mov("zero",$cc);
+ &br(&label("finish"));
+ &blt($count,&label("finish"));
+
+ ($a0,$b0)=&NR(2);
+ &ld($a0,&QWPw(0,$ap));
+ &ld($b0,&QWPw(0,$bp));
+
+##########################################################
+ &set_label("loop");
+
+ ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
+ ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
+ ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
+ ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
+ ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
+ ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
+
+ ($o0,$t0)=&NR(2);
+ &add($a0,$b0,$o0);
+ &cmpult($o0,$b0,$t0);
+ &add($o0,$cc,$o0);
+ &cmpult($o0,$cc,$cc);
+ &add($cc,$t0,$cc); &FR($t0);
+
+ ($t1,$o1)=&NR(2);
+
+ &add($a1,$b1,$o1); &FR($a1);
+ &cmpult($o1,$b1,$t1); &FR($b1);
+ &add($o1,$cc,$o1);
+ &cmpult($o1,$cc,$cc);
+ &add($cc,$t1,$cc); &FR($t1);
+
+ ($t2,$o2)=&NR(2);
+
+ &add($a2,$b2,$o2); &FR($a2);
+ &cmpult($o2,$b2,$t2); &FR($b2);
+ &add($o2,$cc,$o2);
+ &cmpult($o2,$cc,$cc);
+ &add($cc,$t2,$cc); &FR($t2);
+
+ ($t3,$o3)=&NR(2);
+
+ &add($a3,$b3,$o3); &FR($a3);
+ &cmpult($o3,$b3,$t3); &FR($b3);
+ &add($o3,$cc,$o3);
+ &cmpult($o3,$cc,$cc);
+ &add($cc,$t3,$cc); &FR($t3);
+
+ &st($o0,&QWPw(0,$rp)); &FR($o0);
+ &st($o1,&QWPw(0,$rp)); &FR($o1);
+ &st($o2,&QWPw(0,$rp)); &FR($o2);
+ &st($o3,&QWPw(0,$rp)); &FR($o3);
+
+ &sub($count,4,$count); # count-=4
+ &add($ap,4*$QWS,$ap); # count+=4
+ &add($bp,4*$QWS,$bp); # count+=4
+ &add($rp,4*$QWS,$rp); # count+=4
+
+ &blt($count,&label("finish"));
+ &ld($a0,&QWPw(0,$ap));
+ &ld($b0,&QWPw(0,$bp));
+ &br(&label("loop"));
+##################################################
+ # Do the last 0..3 words
+
+ ($t0,$o0)=&NR(2);
+ &set_label("last_loop");
+
+ &ld($a0,&QWPw(0,$ap)); # get a
+ &ld($b0,&QWPw(0,$bp)); # get b
+
+ &add($a0,$b0,$o0);
+ &cmpult($o0,$b0,$t0); # will we borrow?
+ &add($o0,$cc,$o0); # will we borrow?
+ &cmpult($o0,$cc,$cc); # will we borrow?
+ &add($cc,$t0,$cc); # add the borrows
+ &st($o0,&QWPw(0,$rp)); # save
+
+ &add($ap,$QWS,$ap);
+ &add($bp,$QWS,$bp);
+ &add($rp,$QWS,$rp);
+ &sub($count,1,$count);
+ &bgt($count,&label("last_loop"));
+ &function_end_A($name);
+
+######################################################
+ &set_label("finish");
+ &add($count,4,$count);
+ &bgt($count,&label("last_loop"));
+
+ &FR($o0,$t0,$a0,$b0);
+ &set_label("end");
+ &function_end($name);
+
+ &fin_pool;
+ }
+
+1;
diff --git a/crypto/bn/asm/alpha.works/div.pl b/crypto/bn/asm/alpha.works/div.pl
new file mode 100644
index 0000000000..7ec144377f
--- /dev/null
+++ b/crypto/bn/asm/alpha.works/div.pl
@@ -0,0 +1,144 @@
+#!/usr/local/bin/perl
+
+sub bn_div64
+ {
+ local($data)=<<'EOF';
+ #
+ # What follows was taken directly from the C compiler with a few
+ # hacks to redo the lables.
+ #
+.text
+ .set noreorder
+ .set volatile
+ .align 3
+ .globl bn_div64
+ .ent bn_div64
+bn_div64:
+ ldgp $29,0($27)
+bn_div64..ng:
+ lda $30,-48($30)
+ .frame $30,48,$26,0
+ stq $26,0($30)
+ stq $9,8($30)
+ stq $10,16($30)
+ stq $11,24($30)
+ stq $12,32($30)
+ stq $13,40($30)
+ .mask 0x4003e00,-48
+ .prologue 1
+ bis $16,$16,$9
+ bis $17,$17,$10
+ bis $18,$18,$11
+ bis $31,$31,$13
+ bis $31,2,$12
+ bne $11,$9119
+ lda $0,-1
+ br $31,$9136
+ .align 4
+$9119:
+ bis $11,$11,$16
+ jsr $26,BN_num_bits_word
+ ldgp $29,0($26)
+ subq $0,64,$1
+ beq $1,$9120
+ bis $31,1,$1
+ sll $1,$0,$1
+ cmpule $9,$1,$1
+ bne $1,$9120
+ # lda $16,_IO_stderr_
+ # lda $17,$C32
+ # bis $0,$0,$18
+ # jsr $26,fprintf
+ # ldgp $29,0($26)
+ jsr $26,abort
+ ldgp $29,0($26)
+ .align 4
+$9120:
+ bis $31,64,$3
+ cmpult $9,$11,$2
+ subq $3,$0,$1
+ addl $1,$31,$0
+ subq $9,$11,$1
+ cmoveq $2,$1,$9
+ beq $0,$9122
+ zapnot $0,15,$2
+ subq $3,$0,$1
+ sll $11,$2,$11
+ sll $9,$2,$3
+ srl $10,$1,$1
+ sll $10,$2,$10
+ bis $3,$1,$9
+$9122:
+ srl $11,32,$5
+ zapnot $11,15,$6
+ lda $7,-1
+ .align 5
+$9123:
+ srl $9,32,$1
+ subq $1,$5,$1
+ bne $1,$9126
+ zapnot $7,15,$27
+ br $31,$9127
+ .align 4
+$9126:
+ bis $9,$9,$24
+ bis $5,$5,$25
+ divqu $24,$25,$27
+$9127:
+ srl $10,32,$4
+ .align 5
+$9128:
+ mulq $27,$5,$1
+ subq $9,$1,$3
+ zapnot $3,240,$1
+ bne $1,$9129
+ mulq $6,$27,$2
+ sll $3,32,$1
+ addq $1,$4,$1
+ cmpule $2,$1,$2
+ bne $2,$9129
+ subq $27,1,$27
+ br $31,$9128
+ .align 4
+$9129:
+ mulq $27,$6,$1
+ mulq $27,$5,$4
+ srl $1,32,$3
+ sll $1,32,$1
+ addq $4,$3,$4
+ cmpult $10,$1,$2
+ subq $10,$1,$10
+ addq $2,$4,$2
+ cmpult $9,$2,$1
+ bis $2,$2,$4
+ beq $1,$9134
+ addq $9,$11,$9
+ subq $27,1,$27
+$9134:
+ subl $12,1,$12
+ subq $9,$4,$9
+ beq $12,$9124
+ sll $27,32,$13
+ sll $9,32,$2
+ srl $10,32,$1
+ sll $10,32,$10
+ bis $2,$1,$9
+ br $31,$9123
+ .align 4
+$9124:
+ bis $13,$27,$0
+$9136:
+ ldq $26,0($30)
+ ldq $9,8($30)
+ ldq $10,16($30)
+ ldq $11,24($30)
+ ldq $12,32($30)
+ ldq $13,40($30)
+ addq $30,48,$30
+ ret $31,($26),1
+ .end bn_div64
+EOF
+ &asm_add($data);
+ }
+
+1;
diff --git a/crypto/bn/asm/alpha.works/mul.pl b/crypto/bn/asm/alpha.works/mul.pl
new file mode 100644
index 0000000000..b182bae452
--- /dev/null
+++ b/crypto/bn/asm/alpha.works/mul.pl
@@ -0,0 +1,116 @@
+#!/usr/local/bin/perl
+# alpha assember
+
+sub bn_mul_words
+ {
+ local($name)=@_;
+ local($cc,$a,$b,$r,$couny);
+
+ &init_pool(4);
+ ($cc)=GR("r0");
+
+ $rp=&wparam(0);
+ $ap=&wparam(1);
+ $count=&wparam(2);
+ $word=&wparam(3);
+
+ &function_begin($name,"");
+
+ &comment("");
+ &sub($count,4,$count);
+ &mov("zero",$cc);
+ &br(&label("finish"));
+ &blt($count,&label("finish"));
+
+ ($a0,$r0)=&NR(2);
+ &ld($a0,&QWPw(0,$ap));
+ &ld($r0,&QWPw(0,$rp));
+
+$a=<<'EOF';
+##########################################################
+ &set_label("loop");
+
+ ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
+ ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
+ ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
+ ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
+ ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
+ ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
+
+ ($o0,$t0)=&NR(2);
+ &add($a0,$b0,$o0);
+ &cmpult($o0,$b0,$t0);
+ &add($o0,$cc,$o0);
+ &cmpult($o0,$cc,$cc);
+ &add($cc,$t0,$cc); &FR($t0);
+
+ ($t1,$o1)=&NR(2);
+
+ &add($a1,$b1,$o1); &FR($a1);
+ &cmpult($o1,$b1,$t1); &FR($b1);
+ &add($o1,$cc,$o1);
+ &cmpult($o1,$cc,$cc);
+ &add($cc,$t1,$cc); &FR($t1);
+
+ ($t2,$o2)=&NR(2);
+
+ &add($a2,$b2,$o2); &FR($a2);
+ &cmpult($o2,$b2,$t2); &FR($b2);
+ &add($o2,$cc,$o2);
+ &cmpult($o2,$cc,$cc);
+ &add($cc,$t2,$cc); &FR($t2);
+
+ ($t3,$o3)=&NR(2);
+
+ &add($a3,$b3,$o3); &FR($a3);
+ &cmpult($o3,$b3,$t3); &FR($b3);
+ &add($o3,$cc,$o3);
+ &cmpult($o3,$cc,$cc);
+ &add($cc,$t3,$cc); &FR($t3);
+
+ &st($o0,&QWPw(0,$rp)); &FR($o0);
+ &st($o1,&QWPw(0,$rp)); &FR($o1);
+ &st($o2,&QWPw(0,$rp)); &FR($o2);
+ &st($o3,&QWPw(0,$rp)); &FR($o3);
+
+ &sub($count,4,$count); # count-=4
+ &add($ap,4*$QWS,$ap); # count+=4
+ &add($bp,4*$QWS,$bp); # count+=4
+ &add($rp,4*$QWS,$rp); # count+=4
+
+ &blt($count,&label("finish"));
+ &ld($a0,&QWPw(0,$ap));
+ &ld($b0,&QWPw(0,$bp));
+ &br(&label("loop"));
+EOF
+##################################################
+ # Do the last 0..3 words
+
+ &set_label("last_loop");
+
+ &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
+ &mul($a0,$word,($l0)=&NR(1));
+ &add($ap,$QWS,$ap);
+ &muh($a0,$word,($h0)=&NR(1)); &FR($a0);
+ &add($l0,$cc,$l0);
+ &add($rp,$QWS,$rp);
+ &sub($count,1,$count);
+ &cmpult($l0,$cc,$cc);
+ &st($l0,&QWPw(-1,$rp)); &FR($l0);
+ &add($h0,$cc,$cc); &FR($h0);
+
+ &bgt($count,&label("last_loop"));
+ &function_end_A($name);
+
+######################################################
+ &set_label("finish");
+ &add($count,4,$count);
+ &bgt($count,&label("last_loop"));
+
+ &set_label("end");
+ &function_end($name);
+
+ &fin_pool;
+ }
+
+1;
diff --git a/crypto/bn/asm/alpha.works/mul_add.pl b/crypto/bn/asm/alpha.works/mul_add.pl
new file mode 100644
index 0000000000..e37f6315fb
--- /dev/null
+++ b/crypto/bn/asm/alpha.works/mul_add.pl
@@ -0,0 +1,120 @@
+#!/usr/local/bin/perl
+# alpha assember
+
+sub bn_mul_add_words
+ {
+ local($name)=@_;
+ local($cc,$a,$b,$r,$couny);
+
+ &init_pool(4);
+ ($cc)=GR("r0");
+
+ $rp=&wparam(0);
+ $ap=&wparam(1);
+ $count=&wparam(2);
+ $word=&wparam(3);
+
+ &function_begin($name,"");
+
+ &comment("");
+ &sub($count,4,$count);
+ &mov("zero",$cc);
+ &br(&label("finish"));
+ &blt($count,&label("finish"));
+
+ ($a0,$r0)=&NR(2);
+ &ld($a0,&QWPw(0,$ap));
+ &ld($r0,&QWPw(0,$rp));
+
+$a=<<'EOF';
+##########################################################
+ &set_label("loop");
+
+ ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
+ ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
+ ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
+ ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
+ ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
+ ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
+
+ ($o0,$t0)=&NR(2);
+ &add($a0,$b0,$o0);
+ &cmpult($o0,$b0,$t0);
+ &add($o0,$cc,$o0);
+ &cmpult($o0,$cc,$cc);
+ &add($cc,$t0,$cc); &FR($t0);
+
+ ($t1,$o1)=&NR(2);
+
+ &add($a1,$b1,$o1); &FR($a1);
+ &cmpult($o1,$b1,$t1); &FR($b1);
+ &add($o1,$cc,$o1);
+ &cmpult($o1,$cc,$cc);
+ &add($cc,$t1,$cc); &FR($t1);
+
+ ($t2,$o2)=&NR(2);
+
+ &add($a2,$b2,$o2); &FR($a2);
+ &cmpult($o2,$b2,$t2); &FR($b2);
+ &add($o2,$cc,$o2);
+ &cmpult($o2,$cc,$cc);
+ &add($cc,$t2,$cc); &FR($t2);
+
+ ($t3,$o3)=&NR(2);
+
+ &add($a3,$b3,$o3); &FR($a3);
+ &cmpult($o3,$b3,$t3); &FR($b3);
+ &add($o3,$cc,$o3);
+ &cmpult($o3,$cc,$cc);
+ &add($cc,$t3,$cc); &FR($t3);
+
+ &st($o0,&QWPw(0,$rp)); &FR($o0);
+ &st($o1,&QWPw(0,$rp)); &FR($o1);
+ &st($o2,&QWPw(0,$rp)); &FR($o2);
+ &st($o3,&QWPw(0,$rp)); &FR($o3);
+
+ &sub($count,4,$count); # count-=4
+ &add($ap,4*$QWS,$ap); # count+=4
+ &add($bp,4*$QWS,$bp); # count+=4
+ &add($rp,4*$QWS,$rp); # count+=4
+
+ &blt($count,&label("finish"));
+ &ld($a0,&QWPw(0,$ap));
+ &ld($b0,&QWPw(0,$bp));
+ &br(&label("loop"));
+EOF
+##################################################
+ # Do the last 0..3 words
+
+ &set_label("last_loop");
+
+ &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
+ &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b
+ &mul($a0,$word,($l0)=&NR(1));
+ &sub($count,1,$count);
+ &add($ap,$QWS,$ap);
+ &muh($a0,$word,($h0)=&NR(1)); &FR($a0);
+ &add($r0,$l0,$r0);
+ &add($rp,$QWS,$rp);
+ &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0);
+ &add($r0,$cc,$r0);
+ &add($h0,$t0,$h0); &FR($t0);
+ &cmpult($r0,$cc,$cc);
+ &st($r0,&QWPw(-1,$rp)); &FR($r0);
+ &add($h0,$cc,$cc); &FR($h0);
+
+ &bgt($count,&label("last_loop"));
+ &function_end_A($name);
+
+######################################################
+ &set_label("finish");
+ &add($count,4,$count);
+ &bgt($count,&label("last_loop"));
+
+ &set_label("end");
+ &function_end($name);
+
+ &fin_pool;
+ }
+
+1;
diff --git a/crypto/bn/asm/alpha.works/mul_c4.pl b/crypto/bn/asm/alpha.works/mul_c4.pl
new file mode 100644
index 0000000000..5efd201281
--- /dev/null
+++ b/crypto/bn/asm/alpha.works/mul_c4.pl
@@ -0,0 +1,213 @@
+#!/usr/local/bin/perl
+# alpha assember
+
+sub mul_add_c
+ {
+ local($a,$b,$c0,$c1,$c2)=@_;
+ local($l1,$h1,$t1,$t2);
+
+ &mul($a,$b,($l1)=&NR(1));
+ &muh($a,$b,($h1)=&NR(1));
+ &add($c0,$l1,$c0);
+ &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
+ &add($t1,$h1,$h1); &FR($t1);
+ &add($c1,$h1,$c1);
+ &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
+ &add($c2,$t2,$c2); &FR($t2);
+ }
+
+sub bn_mul_comba4
+ {
+ local($name)=@_;
+ local(@a,@b,$r,$c0,$c1,$c2);
+
+ $cnt=1;
+ &init_pool(3);
+
+ $rp=&wparam(0);
+ $ap=&wparam(1);
+ $bp=&wparam(2);
+
+ &function_begin($name,"");
+
+ &comment("");
+
+ &ld(($a[0])=&NR(1),&QWPw(0,$ap));
+ &ld(($b[0])=&NR(1),&QWPw(0,$bp));
+ &ld(($a[1])=&NR(1),&QWPw(1,$ap));
+ &ld(($b[1])=&NR(1),&QWPw(1,$bp));
+ &mul($a[0],$b[0],($r00)=&NR(1));
+ &ld(($a[2])=&NR(1),&QWPw(2,$ap));
+ &ld(($b[2])=&NR(1),&QWPw(2,$bp));
+ &muh($a[0],$b[0],($r01)=&NR(1));
+ &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap));
+ &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp));
+ &mul($a[0],$b[1],($r02)=&NR(1));
+
+ ($R,$H1,$H2)=&NR(3);
+
+ &st($r00,&QWPw(0,$rp)); &FR($r00);
+
+ &mov("zero",$R);
+ &mul($a[1],$b[0],($r03)=&NR(1));
+
+ &mov("zero",$H1);
+ &mov("zero",$H0);
+ &add($R,$r01,$R);
+ &muh($a[0],$b[1],($r04)=&NR(1));
+ &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01);
+ &add($R,$r02,$R);
+ &add($H1,$t01,$H1) &FR($t01);
+ &muh($a[1],$b[0],($r05)=&NR(1));
+ &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02);
+ &add($R,$r03,$R);
+ &add($H2,$t02,$H2) &FR($t02);
+ &mul($a[0],$b[2],($r06)=&NR(1));
+ &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03);
+ &add($H1,$t03,$H1) &FR($t03);
+ &st($R,&QWPw(1,$rp));
+ &add($H1,$H2,$R);
+
+ &mov("zero",$H1);
+ &add($R,$r04,$R);
+ &mov("zero",$H2);
+ &mul($a[1],$b[1],($r07)=&NR(1));
+ &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04);
+ &add($R,$r05,$R);
+ &add($H1,$t04,$H1) &FR($t04);
+ &mul($a[2],$b[0],($r08)=&NR(1));
+ &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05);
+ &add($R,$r01,$R);
+ &add($H2,$t05,$H2) &FR($t05);
+ &muh($a[0],$b[2],($r09)=&NR(1));
+ &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06);
+ &add($R,$r07,$R);
+ &add($H1,$t06,$H1) &FR($t06);
+ &muh($a[1],$b[1],($r10)=&NR(1));
+ &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07);
+ &add($R,$r08,$R);
+ &add($H2,$t07,$H2) &FR($t07);
+ &muh($a[2],$b[0],($r11)=&NR(1));
+ &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08);
+ &add($H1,$t08,$H1) &FR($t08);
+ &st($R,&QWPw(2,$rp));
+ &add($H1,$H2,$R);
+
+ &mov("zero",$H1);
+ &add($R,$r09,$R);
+ &mov("zero",$H2);
+ &mul($a[0],$b[3],($r12)=&NR(1));
+ &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09);
+ &add($R,$r10,$R);
+ &add($H1,$t09,$H1) &FR($t09);
+ &mul($a[1],$b[2],($r13)=&NR(1));
+ &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10);
+ &add($R,$r11,$R);
+ &add($H1,$t10,$H1) &FR($t10);
+ &mul($a[2],$b[1],($r14)=&NR(1));
+ &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11);
+ &add($R,$r12,$R);
+ &add($H1,$t11,$H1) &FR($t11);
+ &mul($a[3],$b[0],($r15)=&NR(1));
+ &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12);
+ &add($R,$r13,$R);
+ &add($H1,$t12,$H1) &FR($t12);
+ &muh($a[0],$b[3],($r16)=&NR(1));
+ &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13);
+ &add($R,$r14,$R);
+ &add($H1,$t13,$H1) &FR($t13);
+ &muh($a[1],$b[2],($r17)=&NR(1));
+ &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14);
+ &add($R,$r15,$R);
+ &add($H1,$t14,$H1) &FR($t14);
+ &muh($a[2],$b[1],($r18)=&NR(1));
+ &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15);
+ &add($H1,$t15,$H1) &FR($t15);
+ &st($R,&QWPw(3,$rp));
+ &add($H1,$H2,$R);
+
+ &mov("zero",$H1);
+ &add($R,$r16,$R);
+ &mov("zero",$H2);
+ &muh($a[3],$b[0],($r19)=&NR(1));
+ &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16);
+ &add($R,$r17,$R);
+ &add($H1,$t16,$H1) &FR($t16);
+ &mul($a[1],$b[3],($r20)=&NR(1));
+ &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17);
+ &add($R,$r18,$R);
+ &add($H1,$t17,$H1) &FR($t17);
+ &mul($a[2],$b[2],($r21)=&NR(1));
+ &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18);
+ &add($R,$r19,$R);
+ &add($H1,$t18,$H1) &FR($t18);
+ &mul($a[3],$b[1],($r22)=&NR(1));
+ &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19);
+ &add($R,$r20,$R);
+ &add($H1,$t19,$H1) &FR($t19);
+ &muh($a[1],$b[3],($r23)=&NR(1));
+ &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20);
+ &add($R,$r21,$R);
+ &add($H1,$t20,$H1) &FR($t20);
+ &muh($a[2],$b[2],($r24)=&NR(1));
+ &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21);
+ &add($R,$r22,$R);
+ &add($H1,$t21,$H1) &FR($t21);
+ &muh($a[3],$b[1],($r25)=&NR(1));
+ &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22);
+ &add($H1,$t22,$H1) &FR($t22);
+ &st($R,&QWPw(4,$rp));
+ &add($H1,$H2,$R);
+
+ &mov("zero",$H1);
+ &add($R,$r23,$R);
+ &mov("zero",$H2);
+ &mul($a[2],$b[3],($r26)=&NR(1));
+ &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23);
+ &add($R,$r24,$R);
+ &add($H1,$t23,$H1) &FR($t23);
+ &mul($a[3],$b[2],($r27)=&NR(1));
+ &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24);
+ &add($R,$r25,$R);
+ &add($H1,$t24,$H1) &FR($t24);
+ &muh($a[2],$b[3],($r28)=&NR(1));
+ &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25);
+ &add($R,$r26,$R);
+ &add($H1,$t25,$H1) &FR($t25);
+ &muh($a[3],$b[2],($r29)=&NR(1));
+ &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26);
+ &add($R,$r27,$R);
+ &add($H1,$t26,$H1) &FR($t26);
+ &mul($a[3],$b[3],($r30)=&NR(1));
+ &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27);
+ &add($H1,$t27,$H1) &FR($t27);
+ &st($R,&QWPw(5,$rp));
+ &add($H1,$H2,$R);
+
+ &mov("zero",$H1);
+ &add($R,$r28,$R);
+ &mov("zero",$H2);
+ &muh($a[3],$b[3],($r31)=&NR(1));
+ &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28);
+ &add($R,$r29,$R);
+ &add($H1,$t28,$H1) &FR($t28);
+ ############
+ &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29);
+ &add($R,$r30,$R);
+ &add($H1,$t29,$H1) &FR($t29);
+ ############
+ &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30);
+ &add($H1,$t30,$H1) &FR($t30);
+ &st($R,&QWPw(6,$rp));
+ &add($H1,$H2,$R);
+
+ &add($R,$r31,$R); &FR($r31);
+ &st($R,&QWPw(7,$rp));
+
+ &FR($R,$H1,$H2);
+ &function_end($name);
+
+ &fin_pool;
+ }
+
+1;
diff --git a/crypto/bn/asm/alpha.works/mul_c4.works.pl b/crypto/bn/asm/alpha.works/mul_c4.works.pl
new file mode 100644
index 0000000000..79d86dd25c
--- /dev/null
+++ b/crypto/bn/asm/alpha.works/mul_c4.works.pl
@@ -0,0 +1,98 @@
+#!/usr/local/bin/perl
+# alpha assember
+
+sub mul_add_c
+ {
+ local($a,$b,$c0,$c1,$c2)=@_;
+ local($l1,$h1,$t1,$t2);
+
+print STDERR "count=$cnt\n"; $cnt++;
+ &mul($a,$b,($l1)=&NR(1));
+ &muh($a,$b,($h1)=&NR(1));
+ &add($c0,$l1,$c0);
+ &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
+ &add($t1,$h1,$h1); &FR($t1);
+ &add($c1,$h1,$c1);
+ &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
+ &add($c2,$t2,$c2); &FR($t2);
+ }
+
+sub bn_mul_comba4
+ {
+ local($name)=@_;
+ local(@a,@b,$r,$c0,$c1,$c2);
+
+ $cnt=1;
+ &init_pool(3);
+
+ $rp=&wparam(0);
+ $ap=&wparam(1);
+ $bp=&wparam(2);
+
+ &function_begin($name,"");
+
+ &comment("");
+
+ &ld(($a[0])=&NR(1),&QWPw(0,$ap));
+ &ld(($b[0])=&NR(1),&QWPw(0,$bp));
+ &ld(($a[1])=&NR(1),&QWPw(1,$ap));
+ &ld(($b[1])=&NR(1),&QWPw(1,$bp));
+ &ld(($a[2])=&NR(1),&QWPw(2,$ap));
+ &ld(($b[2])=&NR(1),&QWPw(2,$bp));
+ &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
+ &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp);
+
+ ($c0,$c1,$c2)=&NR(3);
+ &mov("zero",$c2);
+ &mul($a[0],$b[0],$c0);
+ &muh($a[0],$b[0],$c1);
+ &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
+ &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
+ &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
+ &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]);
+ &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
+ &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
+ &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]);
+ &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]);
+ &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
+ &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]);
+ &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]);
+ &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]);
+ &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]);
+ &st($c0,&QWPw(6,$rp));
+ &st($c1,&QWPw(7,$rp));
+
+ &FR($c0,$c1,$c2);
+
+ &function_end($name);
+
+ &fin_pool;
+ }
+
+1;
diff --git a/crypto/bn/asm/alpha.works/mul_c8.pl b/crypto/bn/asm/alpha.works/mul_c8.pl
new file mode 100644
index 0000000000..525ca7494b
--- /dev/null
+++ b/crypto/bn/asm/alpha.works/mul_c8.pl
@@ -0,0 +1,177 @@
+#!/usr/local/bin/perl
+# alpha assember
+
+sub bn_mul_comba8
+ {
+ local($name)=@_;
+ local(@a,@b,$r,$c0,$c1,$c2);
+
+ $cnt=1;
+ &init_pool(3);
+
+ $rp=&wparam(0);
+ $ap=&wparam(1);
+ $bp=&wparam(2);
+
+ &function_begin($name,"");
+
+ &comment("");
+
+ &stack_push(2);
+ &ld(($a[0])=&NR(1),&QWPw(0,$ap));
+ &ld(($b[0])=&NR(1),&QWPw(0,$bp));
+ &st($reg_s0,&swtmp(0)); &FR($reg_s0);
+ &st($reg_s1,&swtmp(1)); &FR($reg_s1);
+ &ld(($a[1])=&NR(1),&QWPw(1,$ap));
+ &ld(($b[1])=&NR(1),&QWPw(1,$bp));
+ &ld(($a[2])=&NR(1),&QWPw(2,$ap));
+ &ld(($b[2])=&NR(1),&QWPw(2,$bp));
+ &ld(($a[3])=&NR(1),&QWPw(3,$ap));
+ &ld(($b[3])=&NR(1),&QWPw(3,$bp));
+ &ld(($a[4])=&NR(1),&QWPw(1,$ap));
+ &ld(($b[4])=&NR(1),&QWPw(1,$bp));
+ &ld(($a[5])=&NR(1),&QWPw(1,$ap));
+ &ld(($b[5])=&NR(1),&QWPw(1,$bp));
+ &ld(($a[6])=&NR(1),&QWPw(1,$ap));
+ &ld(($b[6])=&NR(1),&QWPw(1,$bp));
+ &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap);
+ &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp);
+
+ ($c0,$c1,$c2)=&NR(3);
+ &mov("zero",$c2);
+ &mul($a[0],$b[0],$c0);
+ &muh($a[0],$b[0],$c1);
+ &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
+ &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
+ &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
+ &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[0],$b[3],$c0,$c1,$c2);
+ &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
+ &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
+ &mul_add_c($a[3],$b[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[0],$b[4],$c0,$c1,$c2);
+ &mul_add_c($a[1],$b[3],$c0,$c1,$c2);
+ &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
+ &mul_add_c($a[3],$b[1],$c0,$c1,$c2);
+ &mul_add_c($a[4],$b[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[0],$b[5],$c0,$c1,$c2);
+ &mul_add_c($a[1],$b[4],$c0,$c1,$c2);
+ &mul_add_c($a[2],$b[3],$c0,$c1,$c2);
+ &mul_add_c($a[3],$b[2],$c0,$c1,$c2);
+ &mul_add_c($a[4],$b[1],$c0,$c1,$c2);
+ &mul_add_c($a[5],$b[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[0],$b[6],$c0,$c1,$c2);
+ &mul_add_c($a[1],$b[5],$c0,$c1,$c2);
+ &mul_add_c($a[2],$b[4],$c0,$c1,$c2);
+ &mul_add_c($a[3],$b[3],$c0,$c1,$c2);
+ &mul_add_c($a[4],$b[2],$c0,$c1,$c2);
+ &mul_add_c($a[5],$b[1],$c0,$c1,$c2);
+ &mul_add_c($a[6],$b[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]);
+ &mul_add_c($a[1],$b[6],$c0,$c1,$c2);
+ &mul_add_c($a[2],$b[5],$c0,$c1,$c2);
+ &mul_add_c($a[3],$b[4],$c0,$c1,$c2);
+ &mul_add_c($a[4],$b[3],$c0,$c1,$c2);
+ &mul_add_c($a[5],$b[2],$c0,$c1,$c2);
+ &mul_add_c($a[6],$b[1],$c0,$c1,$c2);
+ &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]);
+ &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]);
+ &mul_add_c($a[2],$b[6],$c0,$c1,$c2);
+ &mul_add_c($a[3],$b[5],$c0,$c1,$c2);
+ &mul_add_c($a[4],$b[4],$c0,$c1,$c2);
+ &mul_add_c($a[5],$b[3],$c0,$c1,$c2);
+ &mul_add_c($a[6],$b[2],$c0,$c1,$c2);
+ &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]);
+ &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]);
+ &mul_add_c($a[3],$b[6],$c0,$c1,$c2);
+ &mul_add_c($a[4],$b[5],$c0,$c1,$c2);
+ &mul_add_c($a[5],$b[4],$c0,$c1,$c2);
+ &mul_add_c($a[6],$b[3],$c0,$c1,$c2);
+ &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]);
+ &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]);
+ &mul_add_c($a[4],$b[6],$c0,$c1,$c2);
+ &mul_add_c($a[5],$b[5],$c0,$c1,$c2);
+ &mul_add_c($a[6],$b[4],$c0,$c1,$c2);
+ &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]);
+ &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]);
+ &mul_add_c($a[5],$b[6],$c0,$c1,$c2);
+ &mul_add_c($a[6],$b[5],$c0,$c1,$c2);
+ &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]);
+ &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]);
+ &mul_add_c($a[6],$b[6],$c0,$c1,$c2);
+ &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]);
+ &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]);
+ &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]);
+ &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]);
+ &st($c0,&QWPw(14,$rp));
+ &st($c1,&QWPw(15,$rp));
+
+ &FR($c0,$c1,$c2);
+
+ &ld($reg_s0,&swtmp(0));
+ &ld($reg_s1,&swtmp(1));
+ &stack_pop(2);
+
+ &function_end($name);
+
+ &fin_pool;
+ }
+
+1;
diff --git a/crypto/bn/asm/alpha.works/sqr.pl b/crypto/bn/asm/alpha.works/sqr.pl
new file mode 100644
index 0000000000..a55b696906
--- /dev/null
+++ b/crypto/bn/asm/alpha.works/sqr.pl
@@ -0,0 +1,113 @@
+#!/usr/local/bin/perl
+# alpha assember
+
+sub bn_sqr_words
+ {
+ local($name)=@_;
+ local($cc,$a,$b,$r,$couny);
+
+ &init_pool(3);
+ ($cc)=GR("r0");
+
+ $rp=&wparam(0);
+ $ap=&wparam(1);
+ $count=&wparam(2);
+
+ &function_begin($name,"");
+
+ &comment("");
+ &sub($count,4,$count);
+ &mov("zero",$cc);
+ &br(&label("finish"));
+ &blt($count,&label("finish"));
+
+ ($a0,$r0)=&NR(2);
+ &ld($a0,&QWPw(0,$ap));
+ &ld($r0,&QWPw(0,$rp));
+
+$a=<<'EOF';
+##########################################################
+ &set_label("loop");
+
+ ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
+ ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
+ ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
+ ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
+ ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
+ ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
+
+ ($o0,$t0)=&NR(2);
+ &add($a0,$b0,$o0);
+ &cmpult($o0,$b0,$t0);
+ &add($o0,$cc,$o0);
+ &cmpult($o0,$cc,$cc);
+ &add($cc,$t0,$cc); &FR($t0);
+
+ ($t1,$o1)=&NR(2);
+
+ &add($a1,$b1,$o1); &FR($a1);
+ &cmpult($o1,$b1,$t1); &FR($b1);
+ &add($o1,$cc,$o1);
+ &cmpult($o1,$cc,$cc);
+ &add($cc,$t1,$cc); &FR($t1);
+
+ ($t2,$o2)=&NR(2);
+
+ &add($a2,$b2,$o2); &FR($a2);
+ &cmpult($o2,$b2,$t2); &FR($b2);
+ &add($o2,$cc,$o2);
+ &cmpult($o2,$cc,$cc);
+ &add($cc,$t2,$cc); &FR($t2);
+
+ ($t3,$o3)=&NR(2);
+
+ &add($a3,$b3,$o3); &FR($a3);
+ &cmpult($o3,$b3,$t3); &FR($b3);
+ &add($o3,$cc,$o3);
+ &cmpult($o3,$cc,$cc);
+ &add($cc,$t3,$cc); &FR($t3);
+
+ &st($o0,&QWPw(0,$rp)); &FR($o0);
+ &st($o1,&QWPw(0,$rp)); &FR($o1);
+ &st($o2,&QWPw(0,$rp)); &FR($o2);
+ &st($o3,&QWPw(0,$rp)); &FR($o3);
+
+ &sub($count,4,$count); # count-=4
+ &add($ap,4*$QWS,$ap); # count+=4
+ &add($bp,4*$QWS,$bp); # count+=4
+ &add($rp,4*$QWS,$rp); # count+=4
+
+ &blt($count,&label("finish"));
+ &ld($a0,&QWPw(0,$ap));
+ &ld($b0,&QWPw(0,$bp));
+ &br(&label("loop"));
+EOF
+##################################################
+ # Do the last 0..3 words
+
+ &set_label("last_loop");
+
+ &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
+ &mul($a0,$a0,($l0)=&NR(1));
+ &add($ap,$QWS,$ap);
+ &add($rp,2*$QWS,$rp);
+ &sub($count,1,$count);
+ &muh($a0,$a0,($h0)=&NR(1)); &FR($a0);
+ &st($l0,&QWPw(-2,$rp)); &FR($l0);
+ &st($h0,&QWPw(-1,$rp)); &FR($h0);
+
+ &bgt($count,&label("last_loop"));
+ &function_end_A($name);
+
+######################################################
+ &set_label("finish");
+ &add($count,4,$count);
+ &bgt($count,&label("last_loop"));
+
+ &set_label("end");
+ &function_end($name);
+
+ &fin_pool;
+ }
+
+1;
diff --git a/crypto/bn/asm/alpha.works/sqr_c4.pl b/crypto/bn/asm/alpha.works/sqr_c4.pl
new file mode 100644
index 0000000000..bf33f5b503
--- /dev/null
+++ b/crypto/bn/asm/alpha.works/sqr_c4.pl
@@ -0,0 +1,109 @@
+#!/usr/local/bin/perl
+# alpha assember
+
+sub sqr_add_c
+ {
+ local($a,$c0,$c1,$c2)=@_;
+ local($l1,$h1,$t1,$t2);
+
+ &mul($a,$a,($l1)=&NR(1));
+ &muh($a,$a,($h1)=&NR(1));
+ &add($c0,$l1,$c0);
+ &add($c1,$h1,$c1);
+ &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
+ &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
+ &add($c1,$t1,$c1); &FR($t1);
+ &add($c2,$t2,$c2); &FR($t2);
+ }
+
+sub sqr_add_c2
+ {
+ local($a,$b,$c0,$c1,$c2)=@_;
+ local($l1,$h1,$t1,$t2);
+
+ &mul($a,$b,($l1)=&NR(1));
+ &muh($a,$b,($h1)=&NR(1));
+ &cmplt($l1,"zero",($lc1)=&NR(1));
+ &cmplt($h1,"zero",($hc1)=&NR(1));
+ &add($l1,$l1,$l1);
+ &add($h1,$h1,$h1);
+ &add($h1,$lc1,$h1); &FR($lc1);
+ &add($c2,$hc1,$c2); &FR($hc1);
+
+ &add($c0,$l1,$c0);
+ &add($c1,$h1,$c1);
+ &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1);
+ &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1);
+
+ &add($c1,$lc1,$c1); &FR($lc1);
+ &add($c2,$hc1,$c2); &FR($hc1);
+ }
+
+
+sub bn_sqr_comba4
+ {
+ local($name)=@_;
+ local(@a,@b,$r,$c0,$c1,$c2);
+
+ $cnt=1;
+ &init_pool(2);
+
+ $rp=&wparam(0);
+ $ap=&wparam(1);
+
+ &function_begin($name,"");
+
+ &comment("");
+
+ &ld(($a[0])=&NR(1),&QWPw(0,$ap));
+ &ld(($a[1])=&NR(1),&QWPw(1,$ap));
+ &ld(($a[2])=&NR(1),&QWPw(2,$ap));
+ &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
+
+ ($c0,$c1,$c2)=&NR(3);
+
+ &mov("zero",$c2);
+ &mul($a[0],$a[0],$c0);
+ &muh($a[0],$a[0],$c1);
+ &st($c0,&QWPw(0,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2);
+ &st($c0,&QWPw(1,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c($a[1],$c0,$c1,$c2);
+ &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(2,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
+ &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
+ &st($c0,&QWPw(3,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c($a[2],$c0,$c1,$c2);
+ &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
+ &st($c0,&QWPw(4,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
+ &st($c0,&QWPw(5,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c($a[3],$c0,$c1,$c2);
+ &st($c0,&QWPw(6,$rp));
+ &st($c1,&QWPw(7,$rp));
+
+ &function_end($name);
+
+ &fin_pool;
+ }
+
+1;
diff --git a/crypto/bn/asm/alpha.works/sqr_c8.pl b/crypto/bn/asm/alpha.works/sqr_c8.pl
new file mode 100644
index 0000000000..b4afe085f1
--- /dev/null
+++ b/crypto/bn/asm/alpha.works/sqr_c8.pl
@@ -0,0 +1,132 @@
+#!/usr/local/bin/perl
+# alpha assember
+
+sub bn_sqr_comba8
+ {
+ local($name)=@_;
+ local(@a,@b,$r,$c0,$c1,$c2);
+
+ $cnt=1;
+ &init_pool(2);
+
+ $rp=&wparam(0);
+ $ap=&wparam(1);
+
+ &function_begin($name,"");
+
+ &comment("");
+
+ &ld(($a[0])=&NR(1),&QWPw(0,$ap));
+ &ld(($a[1])=&NR(1),&QWPw(1,$ap));
+ &ld(($a[2])=&NR(1),&QWPw(2,$ap));
+ &ld(($a[3])=&NR(1),&QWPw(3,$ap));
+ &ld(($a[4])=&NR(1),&QWPw(4,$ap));
+ &ld(($a[5])=&NR(1),&QWPw(5,$ap));
+ &ld(($a[6])=&NR(1),&QWPw(6,$ap));
+ &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap);
+
+ ($c0,$c1,$c2)=&NR(3);
+
+ &mov("zero",$c2);
+ &mul($a[0],$a[0],$c0);
+ &muh($a[0],$a[0],$c1);
+ &st($c0,&QWPw(0,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(1,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c($a[1],$c0,$c1,$c2);
+ &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(2,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
+ &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(3,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c($a[2],$c0,$c1,$c2);
+ &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
+ &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(4,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
+ &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2);
+ &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(5,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c($a[3],$c0,$c1,$c2);
+ &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2);
+ &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2);
+ &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(6,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2);
+ &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2);
+ &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2);
+ &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(7,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c($a[4],$c0,$c1,$c2);
+ &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2);
+ &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2);
+ &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2);
+ &st($c0,&QWPw(8,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2);
+ &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2);
+ &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2);
+ &st($c0,&QWPw(9,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c($a[5],$c0,$c1,$c2);
+ &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2);
+ &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2);
+ &st($c0,&QWPw(10,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2);
+ &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2);
+ &st($c0,&QWPw(11,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c($a[6],$c0,$c1,$c2);
+ &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2);
+ &st($c0,&QWPw(12,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2);
+ &st($c0,&QWPw(13,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c($a[7],$c0,$c1,$c2);
+ &st($c0,&QWPw(14,$rp));
+ &st($c1,&QWPw(15,$rp));
+
+ &function_end($name);
+
+ &fin_pool;
+ }
+
+1;
diff --git a/crypto/bn/asm/alpha.works/sub.pl b/crypto/bn/asm/alpha.works/sub.pl
new file mode 100644
index 0000000000..d998da5c21
--- /dev/null
+++ b/crypto/bn/asm/alpha.works/sub.pl
@@ -0,0 +1,108 @@
+#!/usr/local/bin/perl
+# alpha assember
+
+sub bn_sub_words
+ {
+ local($name)=@_;
+ local($cc,$a,$b,$r);
+
+ &init_pool(4);
+ ($cc)=GR("r0");
+
+ $rp=&wparam(0);
+ $ap=&wparam(1);
+ $bp=&wparam(2);
+ $count=&wparam(3);
+
+ &function_begin($name,"");
+
+ &comment("");
+ &sub($count,4,$count);
+ &mov("zero",$cc);
+ &blt($count,&label("finish"));
+
+ ($a0,$b0)=&NR(2);
+ &ld($a0,&QWPw(0,$ap));
+ &ld($b0,&QWPw(0,$bp));
+
+##########################################################
+ &set_label("loop");
+
+ ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8);
+ &ld($a1,&QWPw(1,$ap));
+ &cmpult($a0,$b0,$tmp); # will we borrow?
+ &ld($b1,&QWPw(1,$bp));
+ &sub($a0,$b0,$a0); # do the subtract
+ &ld($a2,&QWPw(2,$ap));
+ &cmpult($a0,$cc,$b0); # will we borrow?
+ &ld($b2,&QWPw(2,$bp));
+ &sub($a0,$cc,$o0); # will we borrow?
+ &ld($a3,&QWPw(3,$ap));
+ &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp);
+
+ &cmpult($a1,$b1,$t1); # will we borrow?
+ &sub($a1,$b1,$a1); # do the subtract
+ &ld($b3,&QWPw(3,$bp));
+ &cmpult($a1,$cc,$b1); # will we borrow?
+ &sub($a1,$cc,$o1); # will we borrow?
+ &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1);
+
+ &cmpult($a2,$b2,$tmp); # will we borrow?
+ &sub($a2,$b2,$a2); # do the subtract
+ &st($o0,&QWPw(0,$rp)); &FR($o0); # save
+ &cmpult($a2,$cc,$b2); # will we borrow?
+ &sub($a2,$cc,$o2); # will we borrow?
+ &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2);
+
+ &cmpult($a3,$b3,$t3); # will we borrow?
+ &sub($a3,$b3,$a3); # do the subtract
+ &st($o1,&QWPw(1,$rp)); &FR($o1);
+ &cmpult($a3,$cc,$b3); # will we borrow?
+ &sub($a3,$cc,$o3); # will we borrow?
+ &add($b3,$t3,$cc); &FR($t3,$a3,$b3);
+
+ &st($o2,&QWPw(2,$rp)); &FR($o2);
+ &sub($count,4,$count); # count-=4
+ &st($o3,&QWPw(3,$rp)); &FR($o3);
+ &add($ap,4*$QWS,$ap); # count+=4
+ &add($bp,4*$QWS,$bp); # count+=4
+ &add($rp,4*$QWS,$rp); # count+=4
+
+ &blt($count,&label("finish"));
+ &ld($a0,&QWPw(0,$ap));
+ &ld($b0,&QWPw(0,$bp));
+ &br(&label("loop"));
+##################################################
+ # Do the last 0..3 words
+
+ &set_label("last_loop");
+
+ &ld($a0,&QWPw(0,$ap)); # get a
+ &ld($b0,&QWPw(0,$bp)); # get b
+ &cmpult($a0,$b0,$tmp); # will we borrow?
+ &sub($a0,$b0,$a0); # do the subtract
+ &cmpult($a0,$cc,$b0); # will we borrow?
+ &sub($a0,$cc,$a0); # will we borrow?
+ &st($a0,&QWPw(0,$rp)); # save
+ &add($b0,$tmp,$cc); # add the borrows
+
+ &add($ap,$QWS,$ap);
+ &add($bp,$QWS,$bp);
+ &add($rp,$QWS,$rp);
+ &sub($count,1,$count);
+ &bgt($count,&label("last_loop"));
+ &function_end_A($name);
+
+######################################################
+ &set_label("finish");
+ &add($count,4,$count);
+ &bgt($count,&label("last_loop"));
+
+ &FR($a0,$b0);
+ &set_label("end");
+ &function_end($name);
+
+ &fin_pool;
+ }
+
+1;
diff --git a/crypto/bn/asm/alpha/add.pl b/crypto/bn/asm/alpha/add.pl
new file mode 100644
index 0000000000..13bf516428
--- /dev/null
+++ b/crypto/bn/asm/alpha/add.pl
@@ -0,0 +1,118 @@
+#!/usr/local/bin/perl
+# alpha assember
+
+sub bn_add_words
+ {
+ local($name)=@_;
+ local($cc,$a,$b,$r);
+
+ &init_pool(4);
+ ($cc)=GR("r0");
+
+ $rp=&wparam(0);
+ $ap=&wparam(1);
+ $bp=&wparam(2);
+ $count=&wparam(3);
+
+ &function_begin($name,"");
+
+ &comment("");
+ &sub($count,4,$count);
+ &mov("zero",$cc);
+ &blt($count,&label("finish"));
+
+ ($a0,$b0)=&NR(2);
+
+##########################################################
+ &set_label("loop");
+
+ &ld(($a0)=&NR(1),&QWPw(0,$ap));
+ &ld(($b0)=&NR(1),&QWPw(0,$bp));
+ &ld(($a1)=&NR(1),&QWPw(1,$ap));
+ &ld(($b1)=&NR(1),&QWPw(1,$bp));
+
+ ($o0,$t0)=&NR(2);
+ &add($a0,$b0,$o0);
+ &ld(($a2)=&NR(1),&QWPw(2,$ap));
+ &cmpult($o0,$b0,$t0);
+ &add($o0,$cc,$o0);
+ &cmpult($o0,$cc,$cc);
+ &ld(($b2)=&NR(1),&QWPw(2,$bp));
+ &add($cc,$t0,$cc); &FR($t0);
+
+ ($t1,$o1)=&NR(2);
+
+ &add($a1,$b1,$o1); &FR($a1);
+ &cmpult($o1,$b1,$t1); &FR($b1);
+ &add($o1,$cc,$o1);
+ &cmpult($o1,$cc,$cc);
+ &ld(($a3)=&NR(1),&QWPw(3,$ap));
+ &add($cc,$t1,$cc); &FR($t1);
+
+ ($t2,$o2)=&NR(2);
+
+ &add($a2,$b2,$o2); &FR($a2);
+ &cmpult($o2,$b2,$t2); &FR($b2);
+ &add($o2,$cc,$o2);
+ &cmpult($o2,$cc,$cc);
+ &ld(($b3)=&NR(1),&QWPw(3,$bp));
+ &st($o0,&QWPw(0,$rp)); &FR($o0);
+ &add($cc,$t2,$cc); &FR($t2);
+
+ ($t3,$o3)=&NR(2);
+
+ &st($o1,&QWPw(0,$rp)); &FR($o1);
+ &add($a3,$b3,$o3); &FR($a3);
+ &cmpult($o3,$b3,$t3); &FR($b3);
+ &add($o3,$cc,$o3);
+ &st($o2,&QWPw(0,$rp)); &FR($o2);
+ &cmpult($o3,$cc,$cc);
+ &st($o3,&QWPw(0,$rp)); &FR($o3);
+ &add($cc,$t3,$cc); &FR($t3);
+
+
+ &sub($count,4,$count); # count-=4
+ &add($ap,4*$QWS,$ap); # count+=4
+ &add($bp,4*$QWS,$bp); # count+=4
+ &add($rp,4*$QWS,$rp); # count+=4
+
+ ###
+ &bge($count,&label("loop"));
+ ###
+ &br(&label("finish"));
+##################################################
+ # Do the last 0..3 words
+
+ ($t0,$o0)=&NR(2);
+ &set_label("last_loop");
+
+ &ld($a0,&QWPw(0,$ap)); # get a
+ &ld($b0,&QWPw(0,$bp)); # get b
+ &add($ap,$QWS,$ap);
+ &add($bp,$QWS,$bp);
+ &add($a0,$b0,$o0);
+ &sub($count,1,$count);
+ &cmpult($o0,$b0,$t0); # will we borrow?
+ &add($o0,$cc,$o0); # will we borrow?
+ &cmpult($o0,$cc,$cc); # will we borrow?
+ &add($rp,$QWS,$rp);
+ &st($o0,&QWPw(-1,$rp)); # save
+ &add($cc,$t0,$cc); # add the borrows
+
+ ###
+ &bgt($count,&label("last_loop"));
+ &function_end_A($name);
+
+######################################################
+ &set_label("finish");
+ &add($count,4,$count);
+ &bgt($count,&label("last_loop"));
+
+ &FR($o0,$t0,$a0,$b0);
+ &set_label("end");
+ &function_end($name);
+
+ &fin_pool;
+ }
+
+1;
diff --git a/crypto/bn/asm/alpha/div.pl b/crypto/bn/asm/alpha/div.pl
new file mode 100644
index 0000000000..e9e680897a
--- /dev/null
+++ b/crypto/bn/asm/alpha/div.pl
@@ -0,0 +1,144 @@
+#!/usr/local/bin/perl
+
+sub bn_div_words
+ {
+ local($data)=<<'EOF';
+ #
+ # What follows was taken directly from the C compiler with a few
+ # hacks to redo the lables.
+ #
+.text
+ .set noreorder
+ .set volatile
+ .align 3
+ .globl bn_div_words
+ .ent bn_div_words
+bn_div_words
+ ldgp $29,0($27)
+bn_div_words.ng:
+ lda $30,-48($30)
+ .frame $30,48,$26,0
+ stq $26,0($30)
+ stq $9,8($30)
+ stq $10,16($30)
+ stq $11,24($30)
+ stq $12,32($30)
+ stq $13,40($30)
+ .mask 0x4003e00,-48
+ .prologue 1
+ bis $16,$16,$9
+ bis $17,$17,$10
+ bis $18,$18,$11
+ bis $31,$31,$13
+ bis $31,2,$12
+ bne $11,$9119
+ lda $0,-1
+ br $31,$9136
+ .align 4
+$9119:
+ bis $11,$11,$16
+ jsr $26,BN_num_bits_word
+ ldgp $29,0($26)
+ subq $0,64,$1
+ beq $1,$9120
+ bis $31,1,$1
+ sll $1,$0,$1
+ cmpule $9,$1,$1
+ bne $1,$9120
+ # lda $16,_IO_stderr_
+ # lda $17,$C32
+ # bis $0,$0,$18
+ # jsr $26,fprintf
+ # ldgp $29,0($26)
+ jsr $26,abort
+ ldgp $29,0($26)
+ .align 4
+$9120:
+ bis $31,64,$3
+ cmpult $9,$11,$2
+ subq $3,$0,$1
+ addl $1,$31,$0
+ subq $9,$11,$1
+ cmoveq $2,$1,$9
+ beq $0,$9122
+ zapnot $0,15,$2
+ subq $3,$0,$1
+ sll $11,$2,$11
+ sll $9,$2,$3
+ srl $10,$1,$1
+ sll $10,$2,$10
+ bis $3,$1,$9
+$9122:
+ srl $11,32,$5
+ zapnot $11,15,$6
+ lda $7,-1
+ .align 5
+$9123:
+ srl $9,32,$1
+ subq $1,$5,$1
+ bne $1,$9126
+ zapnot $7,15,$27
+ br $31,$9127
+ .align 4
+$9126:
+ bis $9,$9,$24
+ bis $5,$5,$25
+ divqu $24,$25,$27
+$9127:
+ srl $10,32,$4
+ .align 5
+$9128:
+ mulq $27,$5,$1
+ subq $9,$1,$3
+ zapnot $3,240,$1
+ bne $1,$9129
+ mulq $6,$27,$2
+ sll $3,32,$1
+ addq $1,$4,$1
+ cmpule $2,$1,$2
+ bne $2,$9129
+ subq $27,1,$27
+ br $31,$9128
+ .align 4
+$9129:
+ mulq $27,$6,$1
+ mulq $27,$5,$4
+ srl $1,32,$3
+ sll $1,32,$1
+ addq $4,$3,$4
+ cmpult $10,$1,$2
+ subq $10,$1,$10
+ addq $2,$4,$2
+ cmpult $9,$2,$1
+ bis $2,$2,$4
+ beq $1,$9134
+ addq $9,$11,$9
+ subq $27,1,$27
+$9134:
+ subl $12,1,$12
+ subq $9,$4,$9
+ beq $12,$9124
+ sll $27,32,$13
+ sll $9,32,$2
+ srl $10,32,$1
+ sll $10,32,$10
+ bis $2,$1,$9
+ br $31,$9123
+ .align 4
+$9124:
+ bis $13,$27,$0
+$9136:
+ ldq $26,0($30)
+ ldq $9,8($30)
+ ldq $10,16($30)
+ ldq $11,24($30)
+ ldq $12,32($30)
+ ldq $13,40($30)
+ addq $30,48,$30
+ ret $31,($26),1
+ .end bn_div_words
+EOF
+ &asm_add($data);
+ }
+
+1;
diff --git a/crypto/bn/asm/alpha/mul.pl b/crypto/bn/asm/alpha/mul.pl
new file mode 100644
index 0000000000..76c926566c
--- /dev/null
+++ b/crypto/bn/asm/alpha/mul.pl
@@ -0,0 +1,104 @@
+#!/usr/local/bin/perl
+# alpha assember
+
+sub bn_mul_words
+ {
+ local($name)=@_;
+ local($cc,$a,$b,$r,$couny);
+
+ &init_pool(4);
+ ($cc)=GR("r0");
+
+ $rp=&wparam(0);
+ $ap=&wparam(1);
+ $count=&wparam(2);
+ $word=&wparam(3);
+
+ &function_begin($name,"");
+
+ &comment("");
+ &sub($count,4,$count);
+ &mov("zero",$cc);
+ ###
+ &blt($count,&label("finish"));
+
+ ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap));
+
+ &set_label("loop");
+
+ ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
+ ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
+
+ &muh($a0,$word,($h0)=&NR(1)); &FR($a0);
+ ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
+ ### wait 8
+ &mul($a0,$word,($l0)=&NR(1)); &FR($a0);
+ ### wait 8
+ &muh($a1,$word,($h1)=&NR(1)); &FR($a1);
+ &add($l0,$cc,$l0); ### wait 8
+ &mul($a1,$word,($l1)=&NR(1)); &FR($a1);
+ &cmpult($l0,$cc,$cc); ### wait 8
+ &muh($a2,$word,($h2)=&NR(1)); &FR($a2);
+ &add($h0,$cc,$cc); &FR($h0); ### wait 8
+ &mul($a2,$word,($l2)=&NR(1)); &FR($a2);
+ &add($l1,$cc,$l1); ### wait 8
+ &st($l0,&QWPw(0,$rp)); &FR($l0);
+ &cmpult($l1,$cc,$cc); ### wait 8
+ &muh($a3,$word,($h3)=&NR(1)); &FR($a3);
+ &add($h1,$cc,$cc); &FR($h1);
+ &mul($a3,$word,($l3)=&NR(1)); &FR($a3);
+ &add($l2,$cc,$l2);
+ &st($l1,&QWPw(1,$rp)); &FR($l1);
+ &cmpult($l2,$cc,$cc);
+ &add($h2,$cc,$cc); &FR($h2);
+ &sub($count,4,$count); # count-=4
+ &st($l2,&QWPw(2,$rp)); &FR($l2);
+ &add($l3,$cc,$l3);
+ &cmpult($l3,$cc,$cc);
+ &add($bp,4*$QWS,$bp); # count+=4
+ &add($h3,$cc,$cc); &FR($h3);
+ &add($ap,4*$QWS,$ap); # count+=4
+ &st($l3,&QWPw(3,$rp)); &FR($l3);
+ &add($rp,4*$QWS,$rp); # count+=4
+ ###
+ &blt($count,&label("finish"));
+ ($a0)=&NR(1); &ld($a0,&QWPw(0,$ap));
+ &br(&label("finish"));
+##################################################
+
+##################################################
+ # Do the last 0..3 words
+
+ &set_label("last_loop");
+
+ &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
+ ###
+ ###
+ ###
+ &muh($a0,$word,($h0)=&NR(1));
+ ### Wait 8 for next mul issue
+ &mul($a0,$word,($l0)=&NR(1)); &FR($a0)
+ &add($ap,$QWS,$ap);
+ ### Loose 12 until result is available
+ &add($rp,$QWS,$rp);
+ &sub($count,1,$count);
+ &add($l0,$cc,$l0);
+ ###
+ &st($l0,&QWPw(-1,$rp)); &FR($l0);
+ &cmpult($l0,$cc,$cc);
+ &add($h0,$cc,$cc); &FR($h0);
+ &bgt($count,&label("last_loop"));
+ &function_end_A($name);
+
+######################################################
+ &set_label("finish");
+ &add($count,4,$count);
+ &bgt($count,&label("last_loop"));
+
+ &set_label("end");
+ &function_end($name);
+
+ &fin_pool;
+ }
+
+1;
diff --git a/crypto/bn/asm/alpha/mul_add.pl b/crypto/bn/asm/alpha/mul_add.pl
new file mode 100644
index 0000000000..0d6df69bc4
--- /dev/null
+++ b/crypto/bn/asm/alpha/mul_add.pl
@@ -0,0 +1,123 @@
+#!/usr/local/bin/perl
+# alpha assember
+
+sub bn_mul_add_words
+ {
+ local($name)=@_;
+ local($cc,$a,$b,$r,$couny);
+
+ &init_pool(4);
+ ($cc)=GR("r0");
+
+ $rp=&wparam(0);
+ $ap=&wparam(1);
+ $count=&wparam(2);
+ $word=&wparam(3);
+
+ &function_begin($name,"");
+
+ &comment("");
+ &sub($count,4,$count);
+ &mov("zero",$cc);
+ ###
+ &blt($count,&label("finish"));
+
+ &ld(($a0)=&NR(1),&QWPw(0,$ap));
+
+$a=<<'EOF';
+##########################################################
+ &set_label("loop");
+
+ &ld(($r0)=&NR(1),&QWPw(0,$rp));
+ &ld(($a1)=&NR(1),&QWPw(1,$ap));
+ &muh($a0,$word,($h0)=&NR(1));
+ &ld(($r1)=&NR(1),&QWPw(1,$rp));
+ &ld(($a2)=&NR(1),&QWPw(2,$ap));
+ ###
+ &mul($a0,$word,($l0)=&NR(1)); &FR($a0);
+ &ld(($r2)=&NR(1),&QWPw(2,$rp));
+ &muh($a1,$word,($h1)=&NR(1));
+ &ld(($a3)=&NR(1),&QWPw(3,$ap));
+ &mul($a1,$word,($l1)=&NR(1)); &FR($a1);
+ &ld(($r3)=&NR(1),&QWPw(3,$rp));
+ &add($r0,$l0,$r0);
+ &add($r1,$l1,$r1);
+ &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0);
+ &cmpult($r1,$l1,($t1)=&NR(1)); &FR($l1);
+ &muh($a2,$word,($h2)=&NR(1));
+ &add($r0,$cc,$r0);
+ &add($h0,$t0,$h0); &FR($t0);
+ &cmpult($r0,$cc,$cc);
+ &add($h1,$t1,$h1); &FR($t1);
+ &add($h0,$cc,$cc); &FR($h0);
+ &mul($a2,$word,($l2)=&NR(1)); &FR($a2);
+ &add($r1,$cc,$r1);
+ &cmpult($r1,$cc,$cc);
+ &add($r2,$l2,$r2);
+ &add($h1,$cc,$cc); &FR($h1);
+ &cmpult($r2,$l2,($t2)=&NR(1)); &FR($l2);
+ &muh($a3,$word,($h3)=&NR(1));
+ &add($r2,$cc,$r2);
+ &st($r0,&QWPw(0,$rp)); &FR($r0);
+ &add($h2,$t2,$h2); &FR($t2);
+ &st($r1,&QWPw(1,$rp)); &FR($r1);
+ &cmpult($r2,$cc,$cc);
+ &mul($a3,$word,($l3)=&NR(1)); &FR($a3);
+ &add($h2,$cc,$cc); &FR($h2);
+ &st($r2,&QWPw(2,$rp)); &FR($r2);
+ &sub($count,4,$count); # count-=4
+ &add($rp,4*$QWS,$rp); # count+=4
+ &add($r3,$l3,$r3);
+ &add($ap,4*$QWS,$ap); # count+=4
+ &cmpult($r3,$l3,($t3)=&NR(1)); &FR($l3);
+ &add($r3,$cc,$r3);
+ &add($h3,$t3,$h3); &FR($t3);
+ &cmpult($r3,$cc,$cc);
+ &st($r3,&QWPw(-1,$rp)); &FR($r3);
+ &add($h3,$cc,$cc); &FR($h3);
+
+ ###
+ &blt($count,&label("finish"));
+ &ld(($a0)=&NR(1),&QWPw(0,$ap));
+ &br(&label("loop"));
+EOF
+##################################################
+ # Do the last 0..3 words
+
+ &set_label("last_loop");
+
+ &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
+ &ld(($r0)=&NR(1),&QWPw(0,$rp)); # get b
+ ###
+ ###
+ &muh($a0,$word,($h0)=&NR(1)); &FR($a0);
+ ### wait 8
+ &mul($a0,$word,($l0)=&NR(1)); &FR($a0);
+ &add($rp,$QWS,$rp);
+ &add($ap,$QWS,$ap);
+ &sub($count,1,$count);
+ ### wait 3 until l0 is available
+ &add($r0,$l0,$r0);
+ ###
+ &cmpult($r0,$l0,($t0)=&NR(1)); &FR($l0);
+ &add($r0,$cc,$r0);
+ &add($h0,$t0,$h0); &FR($t0);
+ &cmpult($r0,$cc,$cc);
+ &add($h0,$cc,$cc); &FR($h0);
+
+ &st($r0,&QWPw(-1,$rp)); &FR($r0);
+ &bgt($count,&label("last_loop"));
+ &function_end_A($name);
+
+######################################################
+ &set_label("finish");
+ &add($count,4,$count);
+ &bgt($count,&label("last_loop"));
+
+ &set_label("end");
+ &function_end($name);
+
+ &fin_pool;
+ }
+
+1;
diff --git a/crypto/bn/asm/alpha/mul_c4.pl b/crypto/bn/asm/alpha/mul_c4.pl
new file mode 100644
index 0000000000..9cc876ded4
--- /dev/null
+++ b/crypto/bn/asm/alpha/mul_c4.pl
@@ -0,0 +1,215 @@
+#!/usr/local/bin/perl
+# alpha assember
+
+# upto
+
+sub mul_add_c
+ {
+ local($a,$b,$c0,$c1,$c2)=@_;
+ local($l1,$h1,$t1,$t2);
+
+ &mul($a,$b,($l1)=&NR(1));
+ &muh($a,$b,($h1)=&NR(1));
+ &add($c0,$l1,$c0);
+ &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
+ &add($t1,$h1,$h1); &FR($t1);
+ &add($c1,$h1,$c1);
+ &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
+ &add($c2,$t2,$c2); &FR($t2);
+ }
+
+sub bn_mul_comba4
+ {
+ local($name)=@_;
+ local(@a,@b,$r,$c0,$c1,$c2);
+
+ $cnt=1;
+ &init_pool(3);
+
+ $rp=&wparam(0);
+ $ap=&wparam(1);
+ $bp=&wparam(2);
+
+ &function_begin($name,"");
+
+ &comment("");
+
+ &ld(($a[0])=&NR(1),&QWPw(0,$ap));
+ &ld(($b[0])=&NR(1),&QWPw(0,$bp));
+ &ld(($a[1])=&NR(1),&QWPw(1,$ap));
+ &ld(($b[1])=&NR(1),&QWPw(1,$bp));
+ &mul($a[0],$b[0],($r00)=&NR(1));
+ &ld(($a[2])=&NR(1),&QWPw(2,$ap));
+ &ld(($b[2])=&NR(1),&QWPw(2,$bp));
+ &muh($a[0],$b[0],($r01)=&NR(1));
+ &FR($ap); &ld(($a[3])=&NR(1),&QWPw(3,$ap));
+ &FR($bp); &ld(($b[3])=&NR(1),&QWPw(3,$bp));
+ &mul($a[0],$b[1],($r02)=&NR(1));
+
+ ($R,$H1,$H2)=&NR(3);
+
+ &st($r00,&QWPw(0,$rp)); &FR($r00);
+
+ &mov("zero",$R);
+ &mul($a[1],$b[0],($r03)=&NR(1));
+
+ &mov("zero",$H1);
+ &mov("zero",$H0);
+ &add($R,$r01,$R);
+ &muh($a[0],$b[1],($r04)=&NR(1));
+ &cmpult($R,$r01,($t01)=&NR(1)); &FR($r01);
+ &add($R,$r02,$R);
+ &add($H1,$t01,$H1) &FR($t01);
+ &muh($a[1],$b[0],($r05)=&NR(1));
+ &cmpult($R,$r02,($t02)=&NR(1)); &FR($r02);
+ &add($R,$r03,$R);
+ &add($H2,$t02,$H2) &FR($t02);
+ &mul($a[0],$b[2],($r06)=&NR(1));
+ &cmpult($R,$r03,($t03)=&NR(1)); &FR($r03);
+ &add($H1,$t03,$H1) &FR($t03);
+ &st($R,&QWPw(1,$rp));
+ &add($H1,$H2,$R);
+
+ &mov("zero",$H1);
+ &add($R,$r04,$R);
+ &mov("zero",$H2);
+ &mul($a[1],$b[1],($r07)=&NR(1));
+ &cmpult($R,$r04,($t04)=&NR(1)); &FR($r04);
+ &add($R,$r05,$R);
+ &add($H1,$t04,$H1) &FR($t04);
+ &mul($a[2],$b[0],($r08)=&NR(1));
+ &cmpult($R,$r05,($t05)=&NR(1)); &FR($r05);
+ &add($R,$r01,$R);
+ &add($H2,$t05,$H2) &FR($t05);
+ &muh($a[0],$b[2],($r09)=&NR(1));
+ &cmpult($R,$r06,($t06)=&NR(1)); &FR($r06);
+ &add($R,$r07,$R);
+ &add($H1,$t06,$H1) &FR($t06);
+ &muh($a[1],$b[1],($r10)=&NR(1));
+ &cmpult($R,$r07,($t07)=&NR(1)); &FR($r07);
+ &add($R,$r08,$R);
+ &add($H2,$t07,$H2) &FR($t07);
+ &muh($a[2],$b[0],($r11)=&NR(1));
+ &cmpult($R,$r08,($t08)=&NR(1)); &FR($r08);
+ &add($H1,$t08,$H1) &FR($t08);
+ &st($R,&QWPw(2,$rp));
+ &add($H1,$H2,$R);
+
+ &mov("zero",$H1);
+ &add($R,$r09,$R);
+ &mov("zero",$H2);
+ &mul($a[0],$b[3],($r12)=&NR(1));
+ &cmpult($R,$r09,($t09)=&NR(1)); &FR($r09);
+ &add($R,$r10,$R);
+ &add($H1,$t09,$H1) &FR($t09);
+ &mul($a[1],$b[2],($r13)=&NR(1));
+ &cmpult($R,$r10,($t10)=&NR(1)); &FR($r10);
+ &add($R,$r11,$R);
+ &add($H1,$t10,$H1) &FR($t10);
+ &mul($a[2],$b[1],($r14)=&NR(1));
+ &cmpult($R,$r11,($t11)=&NR(1)); &FR($r11);
+ &add($R,$r12,$R);
+ &add($H1,$t11,$H1) &FR($t11);
+ &mul($a[3],$b[0],($r15)=&NR(1));
+ &cmpult($R,$r12,($t12)=&NR(1)); &FR($r12);
+ &add($R,$r13,$R);
+ &add($H1,$t12,$H1) &FR($t12);
+ &muh($a[0],$b[3],($r16)=&NR(1));
+ &cmpult($R,$r13,($t13)=&NR(1)); &FR($r13);
+ &add($R,$r14,$R);
+ &add($H1,$t13,$H1) &FR($t13);
+ &muh($a[1],$b[2],($r17)=&NR(1));
+ &cmpult($R,$r14,($t14)=&NR(1)); &FR($r14);
+ &add($R,$r15,$R);
+ &add($H1,$t14,$H1) &FR($t14);
+ &muh($a[2],$b[1],($r18)=&NR(1));
+ &cmpult($R,$r15,($t15)=&NR(1)); &FR($r15);
+ &add($H1,$t15,$H1) &FR($t15);
+ &st($R,&QWPw(3,$rp));
+ &add($H1,$H2,$R);
+
+ &mov("zero",$H1);
+ &add($R,$r16,$R);
+ &mov("zero",$H2);
+ &muh($a[3],$b[0],($r19)=&NR(1));
+ &cmpult($R,$r16,($t16)=&NR(1)); &FR($r16);
+ &add($R,$r17,$R);
+ &add($H1,$t16,$H1) &FR($t16);
+ &mul($a[1],$b[3],($r20)=&NR(1));
+ &cmpult($R,$r17,($t17)=&NR(1)); &FR($r17);
+ &add($R,$r18,$R);
+ &add($H1,$t17,$H1) &FR($t17);
+ &mul($a[2],$b[2],($r21)=&NR(1));
+ &cmpult($R,$r18,($t18)=&NR(1)); &FR($r18);
+ &add($R,$r19,$R);
+ &add($H1,$t18,$H1) &FR($t18);
+ &mul($a[3],$b[1],($r22)=&NR(1));
+ &cmpult($R,$r19,($t19)=&NR(1)); &FR($r19);
+ &add($R,$r20,$R);
+ &add($H1,$t19,$H1) &FR($t19);
+ &muh($a[1],$b[3],($r23)=&NR(1));
+ &cmpult($R,$r20,($t20)=&NR(1)); &FR($r20);
+ &add($R,$r21,$R);
+ &add($H1,$t20,$H1) &FR($t20);
+ &muh($a[2],$b[2],($r24)=&NR(1));
+ &cmpult($R,$r21,($t21)=&NR(1)); &FR($r21);
+ &add($R,$r22,$R);
+ &add($H1,$t21,$H1) &FR($t21);
+ &muh($a[3],$b[1],($r25)=&NR(1));
+ &cmpult($R,$r22,($t22)=&NR(1)); &FR($r22);
+ &add($H1,$t22,$H1) &FR($t22);
+ &st($R,&QWPw(4,$rp));
+ &add($H1,$H2,$R);
+
+ &mov("zero",$H1);
+ &add($R,$r23,$R);
+ &mov("zero",$H2);
+ &mul($a[2],$b[3],($r26)=&NR(1));
+ &cmpult($R,$r23,($t23)=&NR(1)); &FR($r23);
+ &add($R,$r24,$R);
+ &add($H1,$t23,$H1) &FR($t23);
+ &mul($a[3],$b[2],($r27)=&NR(1));
+ &cmpult($R,$r24,($t24)=&NR(1)); &FR($r24);
+ &add($R,$r25,$R);
+ &add($H1,$t24,$H1) &FR($t24);
+ &muh($a[2],$b[3],($r28)=&NR(1));
+ &cmpult($R,$r25,($t25)=&NR(1)); &FR($r25);
+ &add($R,$r26,$R);
+ &add($H1,$t25,$H1) &FR($t25);
+ &muh($a[3],$b[2],($r29)=&NR(1));
+ &cmpult($R,$r26,($t26)=&NR(1)); &FR($r26);
+ &add($R,$r27,$R);
+ &add($H1,$t26,$H1) &FR($t26);
+ &mul($a[3],$b[3],($r30)=&NR(1));
+ &cmpult($R,$r27,($t27)=&NR(1)); &FR($r27);
+ &add($H1,$t27,$H1) &FR($t27);
+ &st($R,&QWPw(5,$rp));
+ &add($H1,$H2,$R);
+
+ &mov("zero",$H1);
+ &add($R,$r28,$R);
+ &mov("zero",$H2);
+ &muh($a[3],$b[3],($r31)=&NR(1));
+ &cmpult($R,$r28,($t28)=&NR(1)); &FR($r28);
+ &add($R,$r29,$R);
+ &add($H1,$t28,$H1) &FR($t28);
+ ############
+ &cmpult($R,$r29,($t29)=&NR(1)); &FR($r29);
+ &add($R,$r30,$R);
+ &add($H1,$t29,$H1) &FR($t29);
+ ############
+ &cmpult($R,$r30,($t30)=&NR(1)); &FR($r30);
+ &add($H1,$t30,$H1) &FR($t30);
+ &st($R,&QWPw(6,$rp));
+ &add($H1,$H2,$R);
+
+ &add($R,$r31,$R); &FR($r31);
+ &st($R,&QWPw(7,$rp));
+
+ &FR($R,$H1,$H2);
+ &function_end($name);
+
+ &fin_pool;
+ }
+
+1;
diff --git a/crypto/bn/asm/alpha/mul_c4.works.pl b/crypto/bn/asm/alpha/mul_c4.works.pl
new file mode 100644
index 0000000000..79d86dd25c
--- /dev/null
+++ b/crypto/bn/asm/alpha/mul_c4.works.pl
@@ -0,0 +1,98 @@
+#!/usr/local/bin/perl
+# alpha assember
+
+sub mul_add_c
+ {
+ local($a,$b,$c0,$c1,$c2)=@_;
+ local($l1,$h1,$t1,$t2);
+
+print STDERR "count=$cnt\n"; $cnt++;
+ &mul($a,$b,($l1)=&NR(1));
+ &muh($a,$b,($h1)=&NR(1));
+ &add($c0,$l1,$c0);
+ &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
+ &add($t1,$h1,$h1); &FR($t1);
+ &add($c1,$h1,$c1);
+ &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
+ &add($c2,$t2,$c2); &FR($t2);
+ }
+
+sub bn_mul_comba4
+ {
+ local($name)=@_;
+ local(@a,@b,$r,$c0,$c1,$c2);
+
+ $cnt=1;
+ &init_pool(3);
+
+ $rp=&wparam(0);
+ $ap=&wparam(1);
+ $bp=&wparam(2);
+
+ &function_begin($name,"");
+
+ &comment("");
+
+ &ld(($a[0])=&NR(1),&QWPw(0,$ap));
+ &ld(($b[0])=&NR(1),&QWPw(0,$bp));
+ &ld(($a[1])=&NR(1),&QWPw(1,$ap));
+ &ld(($b[1])=&NR(1),&QWPw(1,$bp));
+ &ld(($a[2])=&NR(1),&QWPw(2,$ap));
+ &ld(($b[2])=&NR(1),&QWPw(2,$bp));
+ &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
+ &ld(($b[3])=&NR(1),&QWPw(3,$bp)); &FR($bp);
+
+ ($c0,$c1,$c2)=&NR(3);
+ &mov("zero",$c2);
+ &mul($a[0],$b[0],$c0);
+ &muh($a[0],$b[0],$c1);
+ &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR($c0);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
+ &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR($c0);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
+ &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
+ &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR($c0);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[0],$b[3],$c0,$c1,$c2); &FR($a[0]);
+ &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
+ &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
+ &mul_add_c($a[3],$b[0],$c0,$c1,$c2); &FR($b[0]);
+ &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR($c0);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[1],$b[3],$c0,$c1,$c2); &FR($a[1]);
+ &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
+ &mul_add_c($a[3],$b[1],$c0,$c1,$c2); &FR($b[1]);
+ &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR($c0);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[2],$b[3],$c0,$c1,$c2); &FR($a[2]);
+ &mul_add_c($a[3],$b[2],$c0,$c1,$c2); &FR($b[2]);
+ &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR($c0);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[3],$b[3],$c0,$c1,$c2); &FR($a[3],$b[3]);
+ &st($c0,&QWPw(6,$rp));
+ &st($c1,&QWPw(7,$rp));
+
+ &FR($c0,$c1,$c2);
+
+ &function_end($name);
+
+ &fin_pool;
+ }
+
+1;
diff --git a/crypto/bn/asm/alpha/mul_c8.pl b/crypto/bn/asm/alpha/mul_c8.pl
new file mode 100644
index 0000000000..525ca7494b
--- /dev/null
+++ b/crypto/bn/asm/alpha/mul_c8.pl
@@ -0,0 +1,177 @@
+#!/usr/local/bin/perl
+# alpha assember
+
+sub bn_mul_comba8
+ {
+ local($name)=@_;
+ local(@a,@b,$r,$c0,$c1,$c2);
+
+ $cnt=1;
+ &init_pool(3);
+
+ $rp=&wparam(0);
+ $ap=&wparam(1);
+ $bp=&wparam(2);
+
+ &function_begin($name,"");
+
+ &comment("");
+
+ &stack_push(2);
+ &ld(($a[0])=&NR(1),&QWPw(0,$ap));
+ &ld(($b[0])=&NR(1),&QWPw(0,$bp));
+ &st($reg_s0,&swtmp(0)); &FR($reg_s0);
+ &st($reg_s1,&swtmp(1)); &FR($reg_s1);
+ &ld(($a[1])=&NR(1),&QWPw(1,$ap));
+ &ld(($b[1])=&NR(1),&QWPw(1,$bp));
+ &ld(($a[2])=&NR(1),&QWPw(2,$ap));
+ &ld(($b[2])=&NR(1),&QWPw(2,$bp));
+ &ld(($a[3])=&NR(1),&QWPw(3,$ap));
+ &ld(($b[3])=&NR(1),&QWPw(3,$bp));
+ &ld(($a[4])=&NR(1),&QWPw(1,$ap));
+ &ld(($b[4])=&NR(1),&QWPw(1,$bp));
+ &ld(($a[5])=&NR(1),&QWPw(1,$ap));
+ &ld(($b[5])=&NR(1),&QWPw(1,$bp));
+ &ld(($a[6])=&NR(1),&QWPw(1,$ap));
+ &ld(($b[6])=&NR(1),&QWPw(1,$bp));
+ &ld(($a[7])=&NR(1),&QWPw(1,$ap)); &FR($ap);
+ &ld(($b[7])=&NR(1),&QWPw(1,$bp)); &FR($bp);
+
+ ($c0,$c1,$c2)=&NR(3);
+ &mov("zero",$c2);
+ &mul($a[0],$b[0],$c0);
+ &muh($a[0],$b[0],$c1);
+ &st($c0,&QWPw(0,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[0],$b[1],$c0,$c1,$c2);
+ &mul_add_c($a[1],$b[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(1,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[0],$b[2],$c0,$c1,$c2);
+ &mul_add_c($a[1],$b[1],$c0,$c1,$c2);
+ &mul_add_c($a[2],$b[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(2,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[0],$b[3],$c0,$c1,$c2);
+ &mul_add_c($a[1],$b[2],$c0,$c1,$c2);
+ &mul_add_c($a[2],$b[1],$c0,$c1,$c2);
+ &mul_add_c($a[3],$b[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(3,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[0],$b[4],$c0,$c1,$c2);
+ &mul_add_c($a[1],$b[3],$c0,$c1,$c2);
+ &mul_add_c($a[2],$b[2],$c0,$c1,$c2);
+ &mul_add_c($a[3],$b[1],$c0,$c1,$c2);
+ &mul_add_c($a[4],$b[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(4,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[0],$b[5],$c0,$c1,$c2);
+ &mul_add_c($a[1],$b[4],$c0,$c1,$c2);
+ &mul_add_c($a[2],$b[3],$c0,$c1,$c2);
+ &mul_add_c($a[3],$b[2],$c0,$c1,$c2);
+ &mul_add_c($a[4],$b[1],$c0,$c1,$c2);
+ &mul_add_c($a[5],$b[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(5,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[0],$b[6],$c0,$c1,$c2);
+ &mul_add_c($a[1],$b[5],$c0,$c1,$c2);
+ &mul_add_c($a[2],$b[4],$c0,$c1,$c2);
+ &mul_add_c($a[3],$b[3],$c0,$c1,$c2);
+ &mul_add_c($a[4],$b[2],$c0,$c1,$c2);
+ &mul_add_c($a[5],$b[1],$c0,$c1,$c2);
+ &mul_add_c($a[6],$b[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(6,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[0],$b[7],$c0,$c1,$c2); &FR($a[0]);
+ &mul_add_c($a[1],$b[6],$c0,$c1,$c2);
+ &mul_add_c($a[2],$b[5],$c0,$c1,$c2);
+ &mul_add_c($a[3],$b[4],$c0,$c1,$c2);
+ &mul_add_c($a[4],$b[3],$c0,$c1,$c2);
+ &mul_add_c($a[5],$b[2],$c0,$c1,$c2);
+ &mul_add_c($a[6],$b[1],$c0,$c1,$c2);
+ &mul_add_c($a[7],$b[0],$c0,$c1,$c2); &FR($b[0]);
+ &st($c0,&QWPw(7,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[1],$b[7],$c0,$c1,$c2); &FR($a[1]);
+ &mul_add_c($a[2],$b[6],$c0,$c1,$c2);
+ &mul_add_c($a[3],$b[5],$c0,$c1,$c2);
+ &mul_add_c($a[4],$b[4],$c0,$c1,$c2);
+ &mul_add_c($a[5],$b[3],$c0,$c1,$c2);
+ &mul_add_c($a[6],$b[2],$c0,$c1,$c2);
+ &mul_add_c($a[7],$b[1],$c0,$c1,$c2); &FR($b[1]);
+ &st($c0,&QWPw(8,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[2],$b[7],$c0,$c1,$c2); &FR($a[2]);
+ &mul_add_c($a[3],$b[6],$c0,$c1,$c2);
+ &mul_add_c($a[4],$b[5],$c0,$c1,$c2);
+ &mul_add_c($a[5],$b[4],$c0,$c1,$c2);
+ &mul_add_c($a[6],$b[3],$c0,$c1,$c2);
+ &mul_add_c($a[7],$b[2],$c0,$c1,$c2); &FR($b[2]);
+ &st($c0,&QWPw(9,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[3],$b[7],$c0,$c1,$c2); &FR($a[3]);
+ &mul_add_c($a[4],$b[6],$c0,$c1,$c2);
+ &mul_add_c($a[5],$b[5],$c0,$c1,$c2);
+ &mul_add_c($a[6],$b[4],$c0,$c1,$c2);
+ &mul_add_c($a[7],$b[3],$c0,$c1,$c2); &FR($b[3]);
+ &st($c0,&QWPw(10,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[4],$b[7],$c0,$c1,$c2); &FR($a[4]);
+ &mul_add_c($a[5],$b[6],$c0,$c1,$c2);
+ &mul_add_c($a[6],$b[5],$c0,$c1,$c2);
+ &mul_add_c($a[7],$b[4],$c0,$c1,$c2); &FR($b[4]);
+ &st($c0,&QWPw(11,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[5],$b[7],$c0,$c1,$c2); &FR($a[5]);
+ &mul_add_c($a[6],$b[6],$c0,$c1,$c2);
+ &mul_add_c($a[7],$b[5],$c0,$c1,$c2); &FR($b[5]);
+ &st($c0,&QWPw(12,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[6],$b[7],$c0,$c1,$c2); &FR($a[6]);
+ &mul_add_c($a[7],$b[6],$c0,$c1,$c2); &FR($b[6]);
+ &st($c0,&QWPw(13,$rp)); &FR($c0); ($c0)=&NR(1);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &mul_add_c($a[7],$b[7],$c0,$c1,$c2); &FR($a[7],$b[7]);
+ &st($c0,&QWPw(14,$rp));
+ &st($c1,&QWPw(15,$rp));
+
+ &FR($c0,$c1,$c2);
+
+ &ld($reg_s0,&swtmp(0));
+ &ld($reg_s1,&swtmp(1));
+ &stack_pop(2);
+
+ &function_end($name);
+
+ &fin_pool;
+ }
+
+1;
diff --git a/crypto/bn/asm/alpha/sqr.pl b/crypto/bn/asm/alpha/sqr.pl
new file mode 100644
index 0000000000..a55b696906
--- /dev/null
+++ b/crypto/bn/asm/alpha/sqr.pl
@@ -0,0 +1,113 @@
+#!/usr/local/bin/perl
+# alpha assember
+
+sub bn_sqr_words
+ {
+ local($name)=@_;
+ local($cc,$a,$b,$r,$couny);
+
+ &init_pool(3);
+ ($cc)=GR("r0");
+
+ $rp=&wparam(0);
+ $ap=&wparam(1);
+ $count=&wparam(2);
+
+ &function_begin($name,"");
+
+ &comment("");
+ &sub($count,4,$count);
+ &mov("zero",$cc);
+ &br(&label("finish"));
+ &blt($count,&label("finish"));
+
+ ($a0,$r0)=&NR(2);
+ &ld($a0,&QWPw(0,$ap));
+ &ld($r0,&QWPw(0,$rp));
+
+$a=<<'EOF';
+##########################################################
+ &set_label("loop");
+
+ ($a1)=&NR(1); &ld($a1,&QWPw(1,$ap));
+ ($b1)=&NR(1); &ld($b1,&QWPw(1,$bp));
+ ($a2)=&NR(1); &ld($a2,&QWPw(2,$ap));
+ ($b2)=&NR(1); &ld($b2,&QWPw(2,$bp));
+ ($a3)=&NR(1); &ld($a3,&QWPw(3,$ap));
+ ($b3)=&NR(1); &ld($b3,&QWPw(3,$bp));
+
+ ($o0,$t0)=&NR(2);
+ &add($a0,$b0,$o0);
+ &cmpult($o0,$b0,$t0);
+ &add($o0,$cc,$o0);
+ &cmpult($o0,$cc,$cc);
+ &add($cc,$t0,$cc); &FR($t0);
+
+ ($t1,$o1)=&NR(2);
+
+ &add($a1,$b1,$o1); &FR($a1);
+ &cmpult($o1,$b1,$t1); &FR($b1);
+ &add($o1,$cc,$o1);
+ &cmpult($o1,$cc,$cc);
+ &add($cc,$t1,$cc); &FR($t1);
+
+ ($t2,$o2)=&NR(2);
+
+ &add($a2,$b2,$o2); &FR($a2);
+ &cmpult($o2,$b2,$t2); &FR($b2);
+ &add($o2,$cc,$o2);
+ &cmpult($o2,$cc,$cc);
+ &add($cc,$t2,$cc); &FR($t2);
+
+ ($t3,$o3)=&NR(2);
+
+ &add($a3,$b3,$o3); &FR($a3);
+ &cmpult($o3,$b3,$t3); &FR($b3);
+ &add($o3,$cc,$o3);
+ &cmpult($o3,$cc,$cc);
+ &add($cc,$t3,$cc); &FR($t3);
+
+ &st($o0,&QWPw(0,$rp)); &FR($o0);
+ &st($o1,&QWPw(0,$rp)); &FR($o1);
+ &st($o2,&QWPw(0,$rp)); &FR($o2);
+ &st($o3,&QWPw(0,$rp)); &FR($o3);
+
+ &sub($count,4,$count); # count-=4
+ &add($ap,4*$QWS,$ap); # count+=4
+ &add($bp,4*$QWS,$bp); # count+=4
+ &add($rp,4*$QWS,$rp); # count+=4
+
+ &blt($count,&label("finish"));
+ &ld($a0,&QWPw(0,$ap));
+ &ld($b0,&QWPw(0,$bp));
+ &br(&label("loop"));
+EOF
+##################################################
+ # Do the last 0..3 words
+
+ &set_label("last_loop");
+
+ &ld(($a0)=&NR(1),&QWPw(0,$ap)); # get a
+ &mul($a0,$a0,($l0)=&NR(1));
+ &add($ap,$QWS,$ap);
+ &add($rp,2*$QWS,$rp);
+ &sub($count,1,$count);
+ &muh($a0,$a0,($h0)=&NR(1)); &FR($a0);
+ &st($l0,&QWPw(-2,$rp)); &FR($l0);
+ &st($h0,&QWPw(-1,$rp)); &FR($h0);
+
+ &bgt($count,&label("last_loop"));
+ &function_end_A($name);
+
+######################################################
+ &set_label("finish");
+ &add($count,4,$count);
+ &bgt($count,&label("last_loop"));
+
+ &set_label("end");
+ &function_end($name);
+
+ &fin_pool;
+ }
+
+1;
diff --git a/crypto/bn/asm/alpha/sqr_c4.pl b/crypto/bn/asm/alpha/sqr_c4.pl
new file mode 100644
index 0000000000..bf33f5b503
--- /dev/null
+++ b/crypto/bn/asm/alpha/sqr_c4.pl
@@ -0,0 +1,109 @@
+#!/usr/local/bin/perl
+# alpha assember
+
+sub sqr_add_c
+ {
+ local($a,$c0,$c1,$c2)=@_;
+ local($l1,$h1,$t1,$t2);
+
+ &mul($a,$a,($l1)=&NR(1));
+ &muh($a,$a,($h1)=&NR(1));
+ &add($c0,$l1,$c0);
+ &add($c1,$h1,$c1);
+ &cmpult($c0,$l1,($t1)=&NR(1)); &FR($l1);
+ &cmpult($c1,$h1,($t2)=&NR(1)); &FR($h1);
+ &add($c1,$t1,$c1); &FR($t1);
+ &add($c2,$t2,$c2); &FR($t2);
+ }
+
+sub sqr_add_c2
+ {
+ local($a,$b,$c0,$c1,$c2)=@_;
+ local($l1,$h1,$t1,$t2);
+
+ &mul($a,$b,($l1)=&NR(1));
+ &muh($a,$b,($h1)=&NR(1));
+ &cmplt($l1,"zero",($lc1)=&NR(1));
+ &cmplt($h1,"zero",($hc1)=&NR(1));
+ &add($l1,$l1,$l1);
+ &add($h1,$h1,$h1);
+ &add($h1,$lc1,$h1); &FR($lc1);
+ &add($c2,$hc1,$c2); &FR($hc1);
+
+ &add($c0,$l1,$c0);
+ &add($c1,$h1,$c1);
+ &cmpult($c0,$l1,($lc1)=&NR(1)); &FR($l1);
+ &cmpult($c1,$h1,($hc1)=&NR(1)); &FR($h1);
+
+ &add($c1,$lc1,$c1); &FR($lc1);
+ &add($c2,$hc1,$c2); &FR($hc1);
+ }
+
+
+sub bn_sqr_comba4
+ {
+ local($name)=@_;
+ local(@a,@b,$r,$c0,$c1,$c2);
+
+ $cnt=1;
+ &init_pool(2);
+
+ $rp=&wparam(0);
+ $ap=&wparam(1);
+
+ &function_begin($name,"");
+
+ &comment("");
+
+ &ld(($a[0])=&NR(1),&QWPw(0,$ap));
+ &ld(($a[1])=&NR(1),&QWPw(1,$ap));
+ &ld(($a[2])=&NR(1),&QWPw(2,$ap));
+ &ld(($a[3])=&NR(1),&QWPw(3,$ap)); &FR($ap);
+
+ ($c0,$c1,$c2)=&NR(3);
+
+ &mov("zero",$c2);
+ &mul($a[0],$a[0],$c0);
+ &muh($a[0],$a[0],$c1);
+ &st($c0,&QWPw(0,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c2($a[0],$a[1],$c0,$c1,$c2);
+ &st($c0,&QWPw(1,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c($a[1],$c0,$c1,$c2);
+ &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(2,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
+ &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
+ &st($c0,&QWPw(3,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c($a[2],$c0,$c1,$c2);
+ &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
+ &st($c0,&QWPw(4,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
+ &st($c0,&QWPw(5,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c($a[3],$c0,$c1,$c2);
+ &st($c0,&QWPw(6,$rp));
+ &st($c1,&QWPw(7,$rp));
+
+ &function_end($name);
+
+ &fin_pool;
+ }
+
+1;
diff --git a/crypto/bn/asm/alpha/sqr_c8.pl b/crypto/bn/asm/alpha/sqr_c8.pl
new file mode 100644
index 0000000000..b4afe085f1
--- /dev/null
+++ b/crypto/bn/asm/alpha/sqr_c8.pl
@@ -0,0 +1,132 @@
+#!/usr/local/bin/perl
+# alpha assember
+
+sub bn_sqr_comba8
+ {
+ local($name)=@_;
+ local(@a,@b,$r,$c0,$c1,$c2);
+
+ $cnt=1;
+ &init_pool(2);
+
+ $rp=&wparam(0);
+ $ap=&wparam(1);
+
+ &function_begin($name,"");
+
+ &comment("");
+
+ &ld(($a[0])=&NR(1),&QWPw(0,$ap));
+ &ld(($a[1])=&NR(1),&QWPw(1,$ap));
+ &ld(($a[2])=&NR(1),&QWPw(2,$ap));
+ &ld(($a[3])=&NR(1),&QWPw(3,$ap));
+ &ld(($a[4])=&NR(1),&QWPw(4,$ap));
+ &ld(($a[5])=&NR(1),&QWPw(5,$ap));
+ &ld(($a[6])=&NR(1),&QWPw(6,$ap));
+ &ld(($a[7])=&NR(1),&QWPw(7,$ap)); &FR($ap);
+
+ ($c0,$c1,$c2)=&NR(3);
+
+ &mov("zero",$c2);
+ &mul($a[0],$a[0],$c0);
+ &muh($a[0],$a[0],$c1);
+ &st($c0,&QWPw(0,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c2($a[1],$a[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(1,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c($a[1],$c0,$c1,$c2);
+ &sqr_add_c2($a[2],$a[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(2,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c2($a[2],$a[1],$c0,$c1,$c2);
+ &sqr_add_c2($a[3],$a[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(3,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c($a[2],$c0,$c1,$c2);
+ &sqr_add_c2($a[3],$a[1],$c0,$c1,$c2);
+ &sqr_add_c2($a[4],$a[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(4,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c2($a[3],$a[2],$c0,$c1,$c2);
+ &sqr_add_c2($a[4],$a[1],$c0,$c1,$c2);
+ &sqr_add_c2($a[5],$a[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(5,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c($a[3],$c0,$c1,$c2);
+ &sqr_add_c2($a[4],$a[2],$c0,$c1,$c2);
+ &sqr_add_c2($a[5],$a[1],$c0,$c1,$c2);
+ &sqr_add_c2($a[6],$a[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(6,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c2($a[4],$a[3],$c0,$c1,$c2);
+ &sqr_add_c2($a[5],$a[2],$c0,$c1,$c2);
+ &sqr_add_c2($a[6],$a[1],$c0,$c1,$c2);
+ &sqr_add_c2($a[7],$a[0],$c0,$c1,$c2);
+ &st($c0,&QWPw(7,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c($a[4],$c0,$c1,$c2);
+ &sqr_add_c2($a[5],$a[3],$c0,$c1,$c2);
+ &sqr_add_c2($a[6],$a[2],$c0,$c1,$c2);
+ &sqr_add_c2($a[7],$a[1],$c0,$c1,$c2);
+ &st($c0,&QWPw(8,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c2($a[5],$a[4],$c0,$c1,$c2);
+ &sqr_add_c2($a[6],$a[3],$c0,$c1,$c2);
+ &sqr_add_c2($a[7],$a[2],$c0,$c1,$c2);
+ &st($c0,&QWPw(9,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c($a[5],$c0,$c1,$c2);
+ &sqr_add_c2($a[6],$a[4],$c0,$c1,$c2);
+ &sqr_add_c2($a[7],$a[3],$c0,$c1,$c2);
+ &st($c0,&QWPw(10,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c2($a[6],$a[5],$c0,$c1,$c2);
+ &sqr_add_c2($a[7],$a[4],$c0,$c1,$c2);
+ &st($c0,&QWPw(11,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c($a[6],$c0,$c1,$c2);
+ &sqr_add_c2($a[7],$a[5],$c0,$c1,$c2);
+ &st($c0,&QWPw(12,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c2($a[7],$a[6],$c0,$c1,$c2);
+ &st($c0,&QWPw(13,$rp));
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ &mov("zero",$c2);
+
+ &sqr_add_c($a[7],$c0,$c1,$c2);
+ &st($c0,&QWPw(14,$rp));
+ &st($c1,&QWPw(15,$rp));
+
+ &function_end($name);
+
+ &fin_pool;
+ }
+
+1;
diff --git a/crypto/bn/asm/alpha/sub.pl b/crypto/bn/asm/alpha/sub.pl
new file mode 100644
index 0000000000..d998da5c21
--- /dev/null
+++ b/crypto/bn/asm/alpha/sub.pl
@@ -0,0 +1,108 @@
+#!/usr/local/bin/perl
+# alpha assember
+
+sub bn_sub_words
+ {
+ local($name)=@_;
+ local($cc,$a,$b,$r);
+
+ &init_pool(4);
+ ($cc)=GR("r0");
+
+ $rp=&wparam(0);
+ $ap=&wparam(1);
+ $bp=&wparam(2);
+ $count=&wparam(3);
+
+ &function_begin($name,"");
+
+ &comment("");
+ &sub($count,4,$count);
+ &mov("zero",$cc);
+ &blt($count,&label("finish"));
+
+ ($a0,$b0)=&NR(2);
+ &ld($a0,&QWPw(0,$ap));
+ &ld($b0,&QWPw(0,$bp));
+
+##########################################################
+ &set_label("loop");
+
+ ($a1,$tmp,$b1,$a2,$b2,$a3,$b3,$o0)=&NR(8);
+ &ld($a1,&QWPw(1,$ap));
+ &cmpult($a0,$b0,$tmp); # will we borrow?
+ &ld($b1,&QWPw(1,$bp));
+ &sub($a0,$b0,$a0); # do the subtract
+ &ld($a2,&QWPw(2,$ap));
+ &cmpult($a0,$cc,$b0); # will we borrow?
+ &ld($b2,&QWPw(2,$bp));
+ &sub($a0,$cc,$o0); # will we borrow?
+ &ld($a3,&QWPw(3,$ap));
+ &add($b0,$tmp,$cc); ($t1,$o1)=&NR(2); &FR($tmp);
+
+ &cmpult($a1,$b1,$t1); # will we borrow?
+ &sub($a1,$b1,$a1); # do the subtract
+ &ld($b3,&QWPw(3,$bp));
+ &cmpult($a1,$cc,$b1); # will we borrow?
+ &sub($a1,$cc,$o1); # will we borrow?
+ &add($b1,$t1,$cc); ($tmp,$o2)=&NR(2); &FR($t1,$a1,$b1);
+
+ &cmpult($a2,$b2,$tmp); # will we borrow?
+ &sub($a2,$b2,$a2); # do the subtract
+ &st($o0,&QWPw(0,$rp)); &FR($o0); # save
+ &cmpult($a2,$cc,$b2); # will we borrow?
+ &sub($a2,$cc,$o2); # will we borrow?
+ &add($b2,$tmp,$cc); ($t3,$o3)=&NR(2); &FR($tmp,$a2,$b2);
+
+ &cmpult($a3,$b3,$t3); # will we borrow?
+ &sub($a3,$b3,$a3); # do the subtract
+ &st($o1,&QWPw(1,$rp)); &FR($o1);
+ &cmpult($a3,$cc,$b3); # will we borrow?
+ &sub($a3,$cc,$o3); # will we borrow?
+ &add($b3,$t3,$cc); &FR($t3,$a3,$b3);
+
+ &st($o2,&QWPw(2,$rp)); &FR($o2);
+ &sub($count,4,$count); # count-=4
+ &st($o3,&QWPw(3,$rp)); &FR($o3);
+ &add($ap,4*$QWS,$ap); # count+=4
+ &add($bp,4*$QWS,$bp); # count+=4
+ &add($rp,4*$QWS,$rp); # count+=4
+
+ &blt($count,&label("finish"));
+ &ld($a0,&QWPw(0,$ap));
+ &ld($b0,&QWPw(0,$bp));
+ &br(&label("loop"));
+##################################################
+ # Do the last 0..3 words
+
+ &set_label("last_loop");
+
+ &ld($a0,&QWPw(0,$ap)); # get a
+ &ld($b0,&QWPw(0,$bp)); # get b
+ &cmpult($a0,$b0,$tmp); # will we borrow?
+ &sub($a0,$b0,$a0); # do the subtract
+ &cmpult($a0,$cc,$b0); # will we borrow?
+ &sub($a0,$cc,$a0); # will we borrow?
+ &st($a0,&QWPw(0,$rp)); # save
+ &add($b0,$tmp,$cc); # add the borrows
+
+ &add($ap,$QWS,$ap);
+ &add($bp,$QWS,$bp);
+ &add($rp,$QWS,$rp);
+ &sub($count,1,$count);
+ &bgt($count,&label("last_loop"));
+ &function_end_A($name);
+
+######################################################
+ &set_label("finish");
+ &add($count,4,$count);
+ &bgt($count,&label("last_loop"));
+
+ &FR($a0,$b0);
+ &set_label("end");
+ &function_end($name);
+
+ &fin_pool;
+ }
+
+1;
diff --git a/crypto/bn/asm/bn-586.pl b/crypto/bn/asm/bn-586.pl
index 128f0f29d6..7a03c67b5b 100644
--- a/crypto/bn/asm/bn-586.pl
+++ b/crypto/bn/asm/bn-586.pl
@@ -1,7 +1,4 @@
#!/usr/local/bin/perl
-#
-
-#!/usr/local/bin/perl
push(@INC,"perlasm","../../perlasm");
require "x86asm.pl";
@@ -11,8 +8,9 @@ require "x86asm.pl";
&bn_mul_add_words("bn_mul_add_words");
&bn_mul_words("bn_mul_words");
&bn_sqr_words("bn_sqr_words");
-&bn_div64("bn_div64");
+&bn_div_words("bn_div_words");
&bn_add_words("bn_add_words");
+&bn_sub_words("bn_sub_words");
&asm_finish();
@@ -228,7 +226,7 @@ sub bn_sqr_words
&function_end($name);
}
-sub bn_div64
+sub bn_div_words
{
local($name)=@_;
@@ -307,7 +305,79 @@ sub bn_add_words
}
&set_label("aw_end",0);
- &mov("eax",$c);
+# &mov("eax",$c); # $c is "eax"
+
+ &function_end($name);
+ }
+
+sub bn_sub_words
+ {
+ local($name)=@_;
+
+ &function_begin($name,"");
+
+ &comment("");
+ $a="esi";
+ $b="edi";
+ $c="eax";
+ $r="ebx";
+ $tmp1="ecx";
+ $tmp2="edx";
+ $num="ebp";
+
+ &mov($r,&wparam(0)); # get r
+ &mov($a,&wparam(1)); # get a
+ &mov($b,&wparam(2)); # get b
+ &mov($num,&wparam(3)); # get num
+ &xor($c,$c); # clear carry
+ &and($num,0xfffffff8); # num / 8
+
+ &jz(&label("aw_finish"));
+
+ &set_label("aw_loop",0);
+ for ($i=0; $i<8; $i++)
+ {
+ &comment("Round $i");
+
+ &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
+ &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
+ &sub($tmp1,$c);
+ &mov($c,0);
+ &adc($c,$c);
+ &sub($tmp1,$tmp2);
+ &adc($c,0);
+ &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
+ }
+
+ &comment("");
+ &add($a,32);
+ &add($b,32);
+ &add($r,32);
+ &sub($num,8);
+ &jnz(&label("aw_loop"));
+
+ &set_label("aw_finish",0);
+ &mov($num,&wparam(3)); # get num
+ &and($num,7);
+ &jz(&label("aw_end"));
+
+ for ($i=0; $i<7; $i++)
+ {
+ &comment("Tail Round $i");
+ &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
+ &mov($tmp2,&DWP($i*4,$b,"",0));# *b
+ &sub($tmp1,$c);
+ &mov($c,0);
+ &adc($c,$c);
+ &sub($tmp1,$tmp2);
+ &adc($c,0);
+ &dec($num) if ($i != 6);
+ &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
+ &jz(&label("aw_end")) if ($i != 6);
+ }
+ &set_label("aw_end",0);
+
+# &mov("eax",$c); # $c is "eax"
&function_end($name);
}
diff --git a/crypto/bn/asm/bn-alpha.pl b/crypto/bn/asm/bn-alpha.pl
new file mode 100644
index 0000000000..302edf2376
--- /dev/null
+++ b/crypto/bn/asm/bn-alpha.pl
@@ -0,0 +1,571 @@
+#!/usr/local/bin/perl
+# I have this in perl so I can use more usefull register names and then convert
+# them into alpha registers.
+#
+
+$d=&data();
+$d =~ s/CC/0/g;
+$d =~ s/R1/1/g;
+$d =~ s/R2/2/g;
+$d =~ s/R3/3/g;
+$d =~ s/R4/4/g;
+$d =~ s/L1/5/g;
+$d =~ s/L2/6/g;
+$d =~ s/L3/7/g;
+$d =~ s/L4/8/g;
+$d =~ s/O1/22/g;
+$d =~ s/O2/23/g;
+$d =~ s/O3/24/g;
+$d =~ s/O4/25/g;
+$d =~ s/A1/20/g;
+$d =~ s/A2/21/g;
+$d =~ s/A3/27/g;
+$d =~ s/A4/28/g;
+if (0){
+}
+
+print $d;
+
+sub data
+ {
+ local($data)=<<'EOF';
+
+ # DEC Alpha assember
+ # The bn_div_words is actually gcc output but the other parts are hand done.
+ # Thanks to tzeruch@ceddec.com for sending me the gcc output for
+ # bn_div_words.
+ # I've gone back and re-done most of routines.
+ # The key thing to remeber for the 164 CPU is that while a
+ # multiply operation takes 8 cycles, another one can only be issued
+ # after 4 cycles have elapsed. I've done modification to help
+ # improve this. Also, normally, a ld instruction will not be available
+ # for about 3 cycles.
+ .file 1 "bn_asm.c"
+ .set noat
+gcc2_compiled.:
+__gnu_compiled_c:
+ .text
+ .align 3
+ .globl bn_mul_add_words
+ .ent bn_mul_add_words
+bn_mul_add_words:
+bn_mul_add_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+ .align 5
+ subq $18,4,$18
+ bis $31,$31,$CC
+ blt $18,$43 # if we are -1, -2, -3 or -4 goto tail code
+ ldq $A1,0($17) # 1 1
+ ldq $R1,0($16) # 1 1
+ .align 3
+$42:
+ mulq $A1,$19,$L1 # 1 2 1 ######
+ ldq $A2,8($17) # 2 1
+ ldq $R2,8($16) # 2 1
+ umulh $A1,$19,$A1 # 1 2 ######
+ ldq $A3,16($17) # 3 1
+ ldq $R3,16($16) # 3 1
+ mulq $A2,$19,$L2 # 2 2 1 ######
+ ldq $A4,24($17) # 4 1
+ addq $R1,$L1,$R1 # 1 2 2
+ ldq $R4,24($16) # 4 1
+ umulh $A2,$19,$A2 # 2 2 ######
+ cmpult $R1,$L1,$O1 # 1 2 3 1
+ addq $A1,$O1,$A1 # 1 3 1
+ addq $R1,$CC,$R1 # 1 2 3 1
+ mulq $A3,$19,$L3 # 3 2 1 ######
+ cmpult $R1,$CC,$CC # 1 2 3 2
+ addq $R2,$L2,$R2 # 2 2 2
+ addq $A1,$CC,$CC # 1 3 2
+ cmpult $R2,$L2,$O2 # 2 2 3 1
+ addq $A2,$O2,$A2 # 2 3 1
+ umulh $A3,$19,$A3 # 3 2 ######
+ addq $R2,$CC,$R2 # 2 2 3 1
+ cmpult $R2,$CC,$CC # 2 2 3 2
+ subq $18,4,$18
+ mulq $A4,$19,$L4 # 4 2 1 ######
+ addq $A2,$CC,$CC # 2 3 2
+ addq $R3,$L3,$R3 # 3 2 2
+ addq $16,32,$16
+ cmpult $R3,$L3,$O3 # 3 2 3 1
+ stq $R1,-32($16) # 1 2 4
+ umulh $A4,$19,$A4 # 4 2 ######
+ addq $A3,$O3,$A3 # 3 3 1
+ addq $R3,$CC,$R3 # 3 2 3 1
+ stq $R2,-24($16) # 2 2 4
+ cmpult $R3,$CC,$CC # 3 2 3 2
+ stq $R3,-16($16) # 3 2 4
+ addq $R4,$L4,$R4 # 4 2 2
+ addq $A3,$CC,$CC # 3 3 2
+ cmpult $R4,$L4,$O4 # 4 2 3 1
+ addq $17,32,$17
+ addq $A4,$O4,$A4 # 4 3 1
+ addq $R4,$CC,$R4 # 4 2 3 1
+ cmpult $R4,$CC,$CC # 4 2 3 2
+ stq $R4,-8($16) # 4 2 4
+ addq $A4,$CC,$CC # 4 3 2
+ blt $18,$43
+
+ ldq $A1,0($17) # 1 1
+ ldq $R1,0($16) # 1 1
+
+ br $42
+
+ .align 4
+$45:
+ ldq $A1,0($17) # 4 1
+ ldq $R1,0($16) # 4 1
+ mulq $A1,$19,$L1 # 4 2 1
+ subq $18,1,$18
+ addq $16,8,$16
+ addq $17,8,$17
+ umulh $A1,$19,$A1 # 4 2
+ addq $R1,$L1,$R1 # 4 2 2
+ cmpult $R1,$L1,$O1 # 4 2 3 1
+ addq $A1,$O1,$A1 # 4 3 1
+ addq $R1,$CC,$R1 # 4 2 3 1
+ cmpult $R1,$CC,$CC # 4 2 3 2
+ addq $A1,$CC,$CC # 4 3 2
+ stq $R1,-8($16) # 4 2 4
+ bgt $18,$45
+ ret $31,($26),1 # else exit
+
+ .align 4
+$43:
+ addq $18,4,$18
+ bgt $18,$45 # goto tail code
+ ret $31,($26),1 # else exit
+
+ .end bn_mul_add_words
+ .align 3
+ .globl bn_mul_words
+ .ent bn_mul_words
+bn_mul_words:
+bn_mul_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+ .align 5
+ subq $18,4,$18
+ bis $31,$31,$CC
+ blt $18,$143 # if we are -1, -2, -3 or -4 goto tail code
+ ldq $A1,0($17) # 1 1
+ .align 3
+$142:
+
+ mulq $A1,$19,$L1 # 1 2 1 #####
+ ldq $A2,8($17) # 2 1
+ ldq $A3,16($17) # 3 1
+ umulh $A1,$19,$A1 # 1 2 #####
+ ldq $A4,24($17) # 4 1
+ mulq $A2,$19,$L2 # 2 2 1 #####
+ addq $L1,$CC,$L1 # 1 2 3 1
+ subq $18,4,$18
+ cmpult $L1,$CC,$CC # 1 2 3 2
+ umulh $A2,$19,$A2 # 2 2 #####
+ addq $A1,$CC,$CC # 1 3 2
+ addq $17,32,$17
+ addq $L2,$CC,$L2 # 2 2 3 1
+ mulq $A3,$19,$L3 # 3 2 1 #####
+ cmpult $L2,$CC,$CC # 2 2 3 2
+ addq $A2,$CC,$CC # 2 3 2
+ addq $16,32,$16
+ umulh $A3,$19,$A3 # 3 2 #####
+ stq $L1,-32($16) # 1 2 4
+ mulq $A4,$19,$L4 # 4 2 1 #####
+ addq $L3,$CC,$L3 # 3 2 3 1
+ stq $L2,-24($16) # 2 2 4
+ cmpult $L3,$CC,$CC # 3 2 3 2
+ umulh $A4,$19,$A4 # 4 2 #####
+ addq $A3,$CC,$CC # 3 3 2
+ stq $L3,-16($16) # 3 2 4
+ addq $L4,$CC,$L4 # 4 2 3 1
+ cmpult $L4,$CC,$CC # 4 2 3 2
+
+ addq $A4,$CC,$CC # 4 3 2
+
+ stq $L4,-8($16) # 4 2 4
+
+ blt $18,$143
+
+ ldq $A1,0($17) # 1 1
+
+ br $142
+
+ .align 4
+$145:
+ ldq $A1,0($17) # 4 1
+ mulq $A1,$19,$L1 # 4 2 1
+ subq $18,1,$18
+ umulh $A1,$19,$A1 # 4 2
+ addq $L1,$CC,$L1 # 4 2 3 1
+ addq $16,8,$16
+ cmpult $L1,$CC,$CC # 4 2 3 2
+ addq $17,8,$17
+ addq $A1,$CC,$CC # 4 3 2
+ stq $L1,-8($16) # 4 2 4
+
+ bgt $18,$145
+ ret $31,($26),1 # else exit
+
+ .align 4
+$143:
+ addq $18,4,$18
+ bgt $18,$145 # goto tail code
+ ret $31,($26),1 # else exit
+
+ .end bn_mul_words
+ .align 3
+ .globl bn_sqr_words
+ .ent bn_sqr_words
+bn_sqr_words:
+bn_sqr_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $18,4,$18
+ blt $18,$543 # if we are -1, -2, -3 or -4 goto tail code
+ ldq $A1,0($17) # 1 1
+ .align 3
+$542:
+ mulq $A1,$A1,$L1 ######
+ ldq $A2,8($17) # 1 1
+ subq $18,4
+ umulh $A1,$A1,$R1 ######
+ ldq $A3,16($17) # 1 1
+ mulq $A2,$A2,$L2 ######
+ ldq $A4,24($17) # 1 1
+ stq $L1,0($16) # r[0]
+ umulh $A2,$A2,$R2 ######
+ stq $R1,8($16) # r[1]
+ mulq $A3,$A3,$L3 ######
+ stq $L2,16($16) # r[0]
+ umulh $A3,$A3,$R3 ######
+ stq $R2,24($16) # r[1]
+ mulq $A4,$A4,$L4 ######
+ stq $L3,32($16) # r[0]
+ umulh $A4,$A4,$R4 ######
+ stq $R3,40($16) # r[1]
+
+ addq $16,64,$16
+ addq $17,32,$17
+ stq $L4,-16($16) # r[0]
+ stq $R4,-8($16) # r[1]
+
+ blt $18,$543
+ ldq $A1,0($17) # 1 1
+ br $542
+
+$442:
+ ldq $A1,0($17) # a[0]
+ mulq $A1,$A1,$L1 # a[0]*w low part r2
+ addq $16,16,$16
+ addq $17,8,$17
+ subq $18,1,$18
+ umulh $A1,$A1,$R1 # a[0]*w high part r3
+ stq $L1,-16($16) # r[0]
+ stq $R1,-8($16) # r[1]
+
+ bgt $18,$442
+ ret $31,($26),1 # else exit
+
+ .align 4
+$543:
+ addq $18,4,$18
+ bgt $18,$442 # goto tail code
+ ret $31,($26),1 # else exit
+ .end bn_sqr_words
+
+ .align 3
+ .globl bn_add_words
+ .ent bn_add_words
+bn_add_words:
+bn_add_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $19,4,$19
+ bis $31,$31,$CC # carry = 0
+ blt $19,$900
+ ldq $L1,0($17) # a[0]
+ ldq $R1,0($18) # b[1]
+ .align 3
+$901:
+ addq $R1,$L1,$R1 # r=a+b;
+ ldq $L2,8($17) # a[1]
+ cmpult $R1,$L1,$O1 # did we overflow?
+ ldq $R2,8($18) # b[1]
+ addq $R1,$CC,$R1 # c+= overflow
+ ldq $L3,16($17) # a[2]
+ cmpult $R1,$CC,$CC # overflow?
+ ldq $R3,16($18) # b[2]
+ addq $CC,$O1,$CC
+ ldq $L4,24($17) # a[3]
+ addq $R2,$L2,$R2 # r=a+b;
+ ldq $R4,24($18) # b[3]
+ cmpult $R2,$L2,$O2 # did we overflow?
+ addq $R3,$L3,$R3 # r=a+b;
+ addq $R2,$CC,$R2 # c+= overflow
+ cmpult $R3,$L3,$O3 # did we overflow?
+ cmpult $R2,$CC,$CC # overflow?
+ addq $R4,$L4,$R4 # r=a+b;
+ addq $CC,$O2,$CC
+ cmpult $R4,$L4,$O4 # did we overflow?
+ addq $R3,$CC,$R3 # c+= overflow
+ stq $R1,0($16) # r[0]=c
+ cmpult $R3,$CC,$CC # overflow?
+ stq $R2,8($16) # r[1]=c
+ addq $CC,$O3,$CC
+ stq $R3,16($16) # r[2]=c
+ addq $R4,$CC,$R4 # c+= overflow
+ subq $19,4,$19 # loop--
+ cmpult $R4,$CC,$CC # overflow?
+ addq $17,32,$17 # a++
+ addq $CC,$O4,$CC
+ stq $R4,24($16) # r[3]=c
+ addq $18,32,$18 # b++
+ addq $16,32,$16 # r++
+
+ blt $19,$900
+ ldq $L1,0($17) # a[0]
+ ldq $R1,0($18) # b[1]
+ br $901
+ .align 4
+$945:
+ ldq $L1,0($17) # a[0]
+ ldq $R1,0($18) # b[1]
+ addq $R1,$L1,$R1 # r=a+b;
+ subq $19,1,$19 # loop--
+ addq $R1,$CC,$R1 # c+= overflow
+ addq $17,8,$17 # a++
+ cmpult $R1,$L1,$O1 # did we overflow?
+ cmpult $R1,$CC,$CC # overflow?
+ addq $18,8,$18 # b++
+ stq $R1,0($16) # r[0]=c
+ addq $CC,$O1,$CC
+ addq $16,8,$16 # r++
+
+ bgt $19,$945
+ ret $31,($26),1 # else exit
+
+$900:
+ addq $19,4,$19
+ bgt $19,$945 # goto tail code
+ ret $31,($26),1 # else exit
+ .end bn_add_words
+
+ .align 3
+ .globl bn_sub_words
+ .ent bn_sub_words
+bn_sub_words:
+bn_sub_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $19,4,$19
+ bis $31,$31,$CC # carry = 0
+ br $800
+ blt $19,$800
+ ldq $L1,0($17) # a[0]
+ ldq $R1,0($18) # b[1]
+ .align 3
+$801:
+ addq $R1,$L1,$R1 # r=a+b;
+ ldq $L2,8($17) # a[1]
+ cmpult $R1,$L1,$O1 # did we overflow?
+ ldq $R2,8($18) # b[1]
+ addq $R1,$CC,$R1 # c+= overflow
+ ldq $L3,16($17) # a[2]
+ cmpult $R1,$CC,$CC # overflow?
+ ldq $R3,16($18) # b[2]
+ addq $CC,$O1,$CC
+ ldq $L4,24($17) # a[3]
+ addq $R2,$L2,$R2 # r=a+b;
+ ldq $R4,24($18) # b[3]
+ cmpult $R2,$L2,$O2 # did we overflow?
+ addq $R3,$L3,$R3 # r=a+b;
+ addq $R2,$CC,$R2 # c+= overflow
+ cmpult $R3,$L3,$O3 # did we overflow?
+ cmpult $R2,$CC,$CC # overflow?
+ addq $R4,$L4,$R4 # r=a+b;
+ addq $CC,$O2,$CC
+ cmpult $R4,$L4,$O4 # did we overflow?
+ addq $R3,$CC,$R3 # c+= overflow
+ stq $R1,0($16) # r[0]=c
+ cmpult $R3,$CC,$CC # overflow?
+ stq $R2,8($16) # r[1]=c
+ addq $CC,$O3,$CC
+ stq $R3,16($16) # r[2]=c
+ addq $R4,$CC,$R4 # c+= overflow
+ subq $19,4,$19 # loop--
+ cmpult $R4,$CC,$CC # overflow?
+ addq $17,32,$17 # a++
+ addq $CC,$O4,$CC
+ stq $R4,24($16) # r[3]=c
+ addq $18,32,$18 # b++
+ addq $16,32,$16 # r++
+
+ blt $19,$800
+ ldq $L1,0($17) # a[0]
+ ldq $R1,0($18) # b[1]
+ br $801
+ .align 4
+$845:
+ ldq $L1,0($17) # a[0]
+ ldq $R1,0($18) # b[1]
+ cmpult $L1,$R1,$O1 # will we borrow?
+ subq $L1,$R1,$R1 # r=a-b;
+ subq $19,1,$19 # loop--
+ cmpult $R1,$CC,$O2 # will we borrow?
+ subq $R1,$CC,$R1 # c+= overflow
+ addq $17,8,$17 # a++
+ addq $18,8,$18 # b++
+ stq $R1,0($16) # r[0]=c
+ addq $O2,$O1,$CC
+ addq $16,8,$16 # r++
+
+ bgt $19,$845
+ ret $31,($26),1 # else exit
+
+$800:
+ addq $19,4,$19
+ bgt $19,$845 # goto tail code
+ ret $31,($26),1 # else exit
+ .end bn_sub_words
+
+ #
+ # What follows was taken directly from the C compiler with a few
+ # hacks to redo the lables.
+ #
+.text
+ .align 3
+ .globl bn_div_words
+ .ent bn_div_words
+bn_div_words:
+ ldgp $29,0($27)
+bn_div_words..ng:
+ lda $30,-48($30)
+ .frame $30,48,$26,0
+ stq $26,0($30)
+ stq $9,8($30)
+ stq $10,16($30)
+ stq $11,24($30)
+ stq $12,32($30)
+ stq $13,40($30)
+ .mask 0x4003e00,-48
+ .prologue 1
+ bis $16,$16,$9
+ bis $17,$17,$10
+ bis $18,$18,$11
+ bis $31,$31,$13
+ bis $31,2,$12
+ bne $11,$119
+ lda $0,-1
+ br $31,$136
+ .align 4
+$119:
+ bis $11,$11,$16
+ jsr $26,BN_num_bits_word
+ ldgp $29,0($26)
+ subq $0,64,$1
+ beq $1,$120
+ bis $31,1,$1
+ sll $1,$0,$1
+ cmpule $9,$1,$1
+ bne $1,$120
+ # lda $16,_IO_stderr_
+ # lda $17,$C32
+ # bis $0,$0,$18
+ # jsr $26,fprintf
+ # ldgp $29,0($26)
+ jsr $26,abort
+ ldgp $29,0($26)
+ .align 4
+$120:
+ bis $31,64,$3
+ cmpult $9,$11,$2
+ subq $3,$0,$1
+ addl $1,$31,$0
+ subq $9,$11,$1
+ cmoveq $2,$1,$9
+ beq $0,$122
+ zapnot $0,15,$2
+ subq $3,$0,$1
+ sll $11,$2,$11
+ sll $9,$2,$3
+ srl $10,$1,$1
+ sll $10,$2,$10
+ bis $3,$1,$9
+$122:
+ srl $11,32,$5
+ zapnot $11,15,$6
+ lda $7,-1
+ .align 5
+$123:
+ srl $9,32,$1
+ subq $1,$5,$1
+ bne $1,$126
+ zapnot $7,15,$27
+ br $31,$127
+ .align 4
+$126:
+ bis $9,$9,$24
+ bis $5,$5,$25
+ divqu $24,$25,$27
+$127:
+ srl $10,32,$4
+ .align 5
+$128:
+ mulq $27,$5,$1
+ subq $9,$1,$3
+ zapnot $3,240,$1
+ bne $1,$129
+ mulq $6,$27,$2
+ sll $3,32,$1
+ addq $1,$4,$1
+ cmpule $2,$1,$2
+ bne $2,$129
+ subq $27,1,$27
+ br $31,$128
+ .align 4
+$129:
+ mulq $27,$6,$1
+ mulq $27,$5,$4
+ srl $1,32,$3
+ sll $1,32,$1
+ addq $4,$3,$4
+ cmpult $10,$1,$2
+ subq $10,$1,$10
+ addq $2,$4,$2
+ cmpult $9,$2,$1
+ bis $2,$2,$4
+ beq $1,$134
+ addq $9,$11,$9
+ subq $27,1,$27
+$134:
+ subl $12,1,$12
+ subq $9,$4,$9
+ beq $12,$124
+ sll $27,32,$13
+ sll $9,32,$2
+ srl $10,32,$1
+ sll $10,32,$10
+ bis $2,$1,$9
+ br $31,$123
+ .align 4
+$124:
+ bis $13,$27,$0
+$136:
+ ldq $26,0($30)
+ ldq $9,8($30)
+ ldq $10,16($30)
+ ldq $11,24($30)
+ ldq $12,32($30)
+ ldq $13,40($30)
+ addq $30,48,$30
+ ret $31,($26),1
+ .end bn_div_words
+EOF
+ return($data);
+ }
+
diff --git a/crypto/bn/asm/bn-win32.asm b/crypto/bn/asm/bn-win32.asm
index 017ea462b0..871bd88d77 100644
--- a/crypto/bn/asm/bn-win32.asm
+++ b/crypto/bn/asm/bn-win32.asm
@@ -485,9 +485,9 @@ $L010sw_end:
_bn_sqr_words ENDP
_TEXT ENDS
_TEXT SEGMENT
-PUBLIC _bn_div64
+PUBLIC _bn_div_words
-_bn_div64 PROC NEAR
+_bn_div_words PROC NEAR
push ebp
push ebx
push esi
@@ -501,7 +501,7 @@ _bn_div64 PROC NEAR
pop ebx
pop ebp
ret
-_bn_div64 ENDP
+_bn_div_words ENDP
_TEXT ENDS
_TEXT SEGMENT
PUBLIC _bn_add_words
@@ -678,7 +678,6 @@ $L011aw_finish:
adc eax, 0
mov DWORD PTR 24[ebx],ecx
$L013aw_end:
- mov eax, eax
pop edi
pop esi
pop ebx
@@ -686,4 +685,1438 @@ $L013aw_end:
ret
_bn_add_words ENDP
_TEXT ENDS
+_TEXT SEGMENT
+PUBLIC _bn_sub_words
+
+_bn_sub_words PROC NEAR
+ push ebp
+ push ebx
+ push esi
+ push edi
+ ;
+ mov ebx, DWORD PTR 20[esp]
+ mov esi, DWORD PTR 24[esp]
+ mov edi, DWORD PTR 28[esp]
+ mov ebp, DWORD PTR 32[esp]
+ xor eax, eax
+ and ebp, 4294967288
+ jz $L014aw_finish
+L015aw_loop:
+ ; Round 0
+ mov ecx, DWORD PTR [esi]
+ mov edx, DWORD PTR [edi]
+ sub ecx, eax
+ mov eax, 0
+ adc eax, eax
+ sub ecx, edx
+ adc eax, 0
+ mov DWORD PTR [ebx],ecx
+ ; Round 1
+ mov ecx, DWORD PTR 4[esi]
+ mov edx, DWORD PTR 4[edi]
+ sub ecx, eax
+ mov eax, 0
+ adc eax, eax
+ sub ecx, edx
+ adc eax, 0
+ mov DWORD PTR 4[ebx],ecx
+ ; Round 2
+ mov ecx, DWORD PTR 8[esi]
+ mov edx, DWORD PTR 8[edi]
+ sub ecx, eax
+ mov eax, 0
+ adc eax, eax
+ sub ecx, edx
+ adc eax, 0
+ mov DWORD PTR 8[ebx],ecx
+ ; Round 3
+ mov ecx, DWORD PTR 12[esi]
+ mov edx, DWORD PTR 12[edi]
+ sub ecx, eax
+ mov eax, 0
+ adc eax, eax
+ sub ecx, edx
+ adc eax, 0
+ mov DWORD PTR 12[ebx],ecx
+ ; Round 4
+ mov ecx, DWORD PTR 16[esi]
+ mov edx, DWORD PTR 16[edi]
+ sub ecx, eax
+ mov eax, 0
+ adc eax, eax
+ sub ecx, edx
+ adc eax, 0
+ mov DWORD PTR 16[ebx],ecx
+ ; Round 5
+ mov ecx, DWORD PTR 20[esi]
+ mov edx, DWORD PTR 20[edi]
+ sub ecx, eax
+ mov eax, 0
+ adc eax, eax
+ sub ecx, edx
+ adc eax, 0
+ mov DWORD PTR 20[ebx],ecx
+ ; Round 6
+ mov ecx, DWORD PTR 24[esi]
+ mov edx, DWORD PTR 24[edi]
+ sub ecx, eax
+ mov eax, 0
+ adc eax, eax
+ sub ecx, edx
+ adc eax, 0
+ mov DWORD PTR 24[ebx],ecx
+ ; Round 7
+ mov ecx, DWORD PTR 28[esi]
+ mov edx, DWORD PTR 28[edi]
+ sub ecx, eax
+ mov eax, 0
+ adc eax, eax
+ sub ecx, edx
+ adc eax, 0
+ mov DWORD PTR 28[ebx],ecx
+ ;
+ add esi, 32
+ add edi, 32
+ add ebx, 32
+ sub ebp, 8
+ jnz L015aw_loop
+$L014aw_finish:
+ mov ebp, DWORD PTR 32[esp]
+ and ebp, 7
+ jz $L016aw_end
+ ; Tail Round 0
+ mov ecx, DWORD PTR [esi]
+ mov edx, DWORD PTR [edi]
+ sub ecx, eax
+ mov eax, 0
+ adc eax, eax
+ sub ecx, edx
+ adc eax, 0
+ dec ebp
+ mov DWORD PTR [ebx],ecx
+ jz $L016aw_end
+ ; Tail Round 1
+ mov ecx, DWORD PTR 4[esi]
+ mov edx, DWORD PTR 4[edi]
+ sub ecx, eax
+ mov eax, 0
+ adc eax, eax
+ sub ecx, edx
+ adc eax, 0
+ dec ebp
+ mov DWORD PTR 4[ebx],ecx
+ jz $L016aw_end
+ ; Tail Round 2
+ mov ecx, DWORD PTR 8[esi]
+ mov edx, DWORD PTR 8[edi]
+ sub ecx, eax
+ mov eax, 0
+ adc eax, eax
+ sub ecx, edx
+ adc eax, 0
+ dec ebp
+ mov DWORD PTR 8[ebx],ecx
+ jz $L016aw_end
+ ; Tail Round 3
+ mov ecx, DWORD PTR 12[esi]
+ mov edx, DWORD PTR 12[edi]
+ sub ecx, eax
+ mov eax, 0
+ adc eax, eax
+ sub ecx, edx
+ adc eax, 0
+ dec ebp
+ mov DWORD PTR 12[ebx],ecx
+ jz $L016aw_end
+ ; Tail Round 4
+ mov ecx, DWORD PTR 16[esi]
+ mov edx, DWORD PTR 16[edi]
+ sub ecx, eax
+ mov eax, 0
+ adc eax, eax
+ sub ecx, edx
+ adc eax, 0
+ dec ebp
+ mov DWORD PTR 16[ebx],ecx
+ jz $L016aw_end
+ ; Tail Round 5
+ mov ecx, DWORD PTR 20[esi]
+ mov edx, DWORD PTR 20[edi]
+ sub ecx, eax
+ mov eax, 0
+ adc eax, eax
+ sub ecx, edx
+ adc eax, 0
+ dec ebp
+ mov DWORD PTR 20[ebx],ecx
+ jz $L016aw_end
+ ; Tail Round 6
+ mov ecx, DWORD PTR 24[esi]
+ mov edx, DWORD PTR 24[edi]
+ sub ecx, eax
+ mov eax, 0
+ adc eax, eax
+ sub ecx, edx
+ adc eax, 0
+ mov DWORD PTR 24[ebx],ecx
+$L016aw_end:
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+_bn_sub_words ENDP
+_TEXT ENDS
+_TEXT SEGMENT
+PUBLIC _bn_mul_comba8
+
+_bn_mul_comba8 PROC NEAR
+ push esi
+ mov esi, DWORD PTR 12[esp]
+ push edi
+ mov edi, DWORD PTR 20[esp]
+ push ebp
+ push ebx
+ xor ebx, ebx
+ mov eax, DWORD PTR [esi]
+ xor ecx, ecx
+ mov edx, DWORD PTR [edi]
+ ; ################## Calculate word 0
+ xor ebp, ebp
+ ; mul a[0]*b[0]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ecx, edx
+ mov edx, DWORD PTR [edi]
+ adc ebp, 0
+ mov DWORD PTR [eax],ebx
+ mov eax, DWORD PTR 4[esi]
+ ; saved r[0]
+ ; ################## Calculate word 1
+ xor ebx, ebx
+ ; mul a[1]*b[0]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR [esi]
+ adc ebp, edx
+ mov edx, DWORD PTR 4[edi]
+ adc ebx, 0
+ ; mul a[0]*b[1]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ebp, edx
+ mov edx, DWORD PTR [edi]
+ adc ebx, 0
+ mov DWORD PTR 4[eax],ecx
+ mov eax, DWORD PTR 8[esi]
+ ; saved r[1]
+ ; ################## Calculate word 2
+ xor ecx, ecx
+ ; mul a[2]*b[0]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 4[esi]
+ adc ebx, edx
+ mov edx, DWORD PTR 4[edi]
+ adc ecx, 0
+ ; mul a[1]*b[1]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR [esi]
+ adc ebx, edx
+ mov edx, DWORD PTR 8[edi]
+ adc ecx, 0
+ ; mul a[0]*b[2]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ebx, edx
+ mov edx, DWORD PTR [edi]
+ adc ecx, 0
+ mov DWORD PTR 8[eax],ebp
+ mov eax, DWORD PTR 12[esi]
+ ; saved r[2]
+ ; ################## Calculate word 3
+ xor ebp, ebp
+ ; mul a[3]*b[0]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 8[esi]
+ adc ecx, edx
+ mov edx, DWORD PTR 4[edi]
+ adc ebp, 0
+ ; mul a[2]*b[1]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 4[esi]
+ adc ecx, edx
+ mov edx, DWORD PTR 8[edi]
+ adc ebp, 0
+ ; mul a[1]*b[2]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR [esi]
+ adc ecx, edx
+ mov edx, DWORD PTR 12[edi]
+ adc ebp, 0
+ ; mul a[0]*b[3]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ecx, edx
+ mov edx, DWORD PTR [edi]
+ adc ebp, 0
+ mov DWORD PTR 12[eax],ebx
+ mov eax, DWORD PTR 16[esi]
+ ; saved r[3]
+ ; ################## Calculate word 4
+ xor ebx, ebx
+ ; mul a[4]*b[0]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 12[esi]
+ adc ebp, edx
+ mov edx, DWORD PTR 4[edi]
+ adc ebx, 0
+ ; mul a[3]*b[1]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 8[esi]
+ adc ebp, edx
+ mov edx, DWORD PTR 8[edi]
+ adc ebx, 0
+ ; mul a[2]*b[2]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 4[esi]
+ adc ebp, edx
+ mov edx, DWORD PTR 12[edi]
+ adc ebx, 0
+ ; mul a[1]*b[3]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR [esi]
+ adc ebp, edx
+ mov edx, DWORD PTR 16[edi]
+ adc ebx, 0
+ ; mul a[0]*b[4]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ebp, edx
+ mov edx, DWORD PTR [edi]
+ adc ebx, 0
+ mov DWORD PTR 16[eax],ecx
+ mov eax, DWORD PTR 20[esi]
+ ; saved r[4]
+ ; ################## Calculate word 5
+ xor ecx, ecx
+ ; mul a[5]*b[0]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 16[esi]
+ adc ebx, edx
+ mov edx, DWORD PTR 4[edi]
+ adc ecx, 0
+ ; mul a[4]*b[1]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 12[esi]
+ adc ebx, edx
+ mov edx, DWORD PTR 8[edi]
+ adc ecx, 0
+ ; mul a[3]*b[2]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 8[esi]
+ adc ebx, edx
+ mov edx, DWORD PTR 12[edi]
+ adc ecx, 0
+ ; mul a[2]*b[3]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 4[esi]
+ adc ebx, edx
+ mov edx, DWORD PTR 16[edi]
+ adc ecx, 0
+ ; mul a[1]*b[4]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR [esi]
+ adc ebx, edx
+ mov edx, DWORD PTR 20[edi]
+ adc ecx, 0
+ ; mul a[0]*b[5]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ebx, edx
+ mov edx, DWORD PTR [edi]
+ adc ecx, 0
+ mov DWORD PTR 20[eax],ebp
+ mov eax, DWORD PTR 24[esi]
+ ; saved r[5]
+ ; ################## Calculate word 6
+ xor ebp, ebp
+ ; mul a[6]*b[0]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 20[esi]
+ adc ecx, edx
+ mov edx, DWORD PTR 4[edi]
+ adc ebp, 0
+ ; mul a[5]*b[1]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 16[esi]
+ adc ecx, edx
+ mov edx, DWORD PTR 8[edi]
+ adc ebp, 0
+ ; mul a[4]*b[2]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 12[esi]
+ adc ecx, edx
+ mov edx, DWORD PTR 12[edi]
+ adc ebp, 0
+ ; mul a[3]*b[3]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 8[esi]
+ adc ecx, edx
+ mov edx, DWORD PTR 16[edi]
+ adc ebp, 0
+ ; mul a[2]*b[4]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 4[esi]
+ adc ecx, edx
+ mov edx, DWORD PTR 20[edi]
+ adc ebp, 0
+ ; mul a[1]*b[5]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR [esi]
+ adc ecx, edx
+ mov edx, DWORD PTR 24[edi]
+ adc ebp, 0
+ ; mul a[0]*b[6]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ecx, edx
+ mov edx, DWORD PTR [edi]
+ adc ebp, 0
+ mov DWORD PTR 24[eax],ebx
+ mov eax, DWORD PTR 28[esi]
+ ; saved r[6]
+ ; ################## Calculate word 7
+ xor ebx, ebx
+ ; mul a[7]*b[0]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 24[esi]
+ adc ebp, edx
+ mov edx, DWORD PTR 4[edi]
+ adc ebx, 0
+ ; mul a[6]*b[1]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 20[esi]
+ adc ebp, edx
+ mov edx, DWORD PTR 8[edi]
+ adc ebx, 0
+ ; mul a[5]*b[2]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 16[esi]
+ adc ebp, edx
+ mov edx, DWORD PTR 12[edi]
+ adc ebx, 0
+ ; mul a[4]*b[3]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 12[esi]
+ adc ebp, edx
+ mov edx, DWORD PTR 16[edi]
+ adc ebx, 0
+ ; mul a[3]*b[4]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 8[esi]
+ adc ebp, edx
+ mov edx, DWORD PTR 20[edi]
+ adc ebx, 0
+ ; mul a[2]*b[5]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 4[esi]
+ adc ebp, edx
+ mov edx, DWORD PTR 24[edi]
+ adc ebx, 0
+ ; mul a[1]*b[6]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR [esi]
+ adc ebp, edx
+ mov edx, DWORD PTR 28[edi]
+ adc ebx, 0
+ ; mul a[0]*b[7]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ebp, edx
+ mov edx, DWORD PTR 4[edi]
+ adc ebx, 0
+ mov DWORD PTR 28[eax],ecx
+ mov eax, DWORD PTR 28[esi]
+ ; saved r[7]
+ ; ################## Calculate word 8
+ xor ecx, ecx
+ ; mul a[7]*b[1]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 24[esi]
+ adc ebx, edx
+ mov edx, DWORD PTR 8[edi]
+ adc ecx, 0
+ ; mul a[6]*b[2]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 20[esi]
+ adc ebx, edx
+ mov edx, DWORD PTR 12[edi]
+ adc ecx, 0
+ ; mul a[5]*b[3]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 16[esi]
+ adc ebx, edx
+ mov edx, DWORD PTR 16[edi]
+ adc ecx, 0
+ ; mul a[4]*b[4]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 12[esi]
+ adc ebx, edx
+ mov edx, DWORD PTR 20[edi]
+ adc ecx, 0
+ ; mul a[3]*b[5]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 8[esi]
+ adc ebx, edx
+ mov edx, DWORD PTR 24[edi]
+ adc ecx, 0
+ ; mul a[2]*b[6]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 4[esi]
+ adc ebx, edx
+ mov edx, DWORD PTR 28[edi]
+ adc ecx, 0
+ ; mul a[1]*b[7]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ebx, edx
+ mov edx, DWORD PTR 8[edi]
+ adc ecx, 0
+ mov DWORD PTR 32[eax],ebp
+ mov eax, DWORD PTR 28[esi]
+ ; saved r[8]
+ ; ################## Calculate word 9
+ xor ebp, ebp
+ ; mul a[7]*b[2]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 24[esi]
+ adc ecx, edx
+ mov edx, DWORD PTR 12[edi]
+ adc ebp, 0
+ ; mul a[6]*b[3]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 20[esi]
+ adc ecx, edx
+ mov edx, DWORD PTR 16[edi]
+ adc ebp, 0
+ ; mul a[5]*b[4]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 16[esi]
+ adc ecx, edx
+ mov edx, DWORD PTR 20[edi]
+ adc ebp, 0
+ ; mul a[4]*b[5]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 12[esi]
+ adc ecx, edx
+ mov edx, DWORD PTR 24[edi]
+ adc ebp, 0
+ ; mul a[3]*b[6]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 8[esi]
+ adc ecx, edx
+ mov edx, DWORD PTR 28[edi]
+ adc ebp, 0
+ ; mul a[2]*b[7]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ecx, edx
+ mov edx, DWORD PTR 12[edi]
+ adc ebp, 0
+ mov DWORD PTR 36[eax],ebx
+ mov eax, DWORD PTR 28[esi]
+ ; saved r[9]
+ ; ################## Calculate word 10
+ xor ebx, ebx
+ ; mul a[7]*b[3]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 24[esi]
+ adc ebp, edx
+ mov edx, DWORD PTR 16[edi]
+ adc ebx, 0
+ ; mul a[6]*b[4]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 20[esi]
+ adc ebp, edx
+ mov edx, DWORD PTR 20[edi]
+ adc ebx, 0
+ ; mul a[5]*b[5]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 16[esi]
+ adc ebp, edx
+ mov edx, DWORD PTR 24[edi]
+ adc ebx, 0
+ ; mul a[4]*b[6]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 12[esi]
+ adc ebp, edx
+ mov edx, DWORD PTR 28[edi]
+ adc ebx, 0
+ ; mul a[3]*b[7]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ebp, edx
+ mov edx, DWORD PTR 16[edi]
+ adc ebx, 0
+ mov DWORD PTR 40[eax],ecx
+ mov eax, DWORD PTR 28[esi]
+ ; saved r[10]
+ ; ################## Calculate word 11
+ xor ecx, ecx
+ ; mul a[7]*b[4]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 24[esi]
+ adc ebx, edx
+ mov edx, DWORD PTR 20[edi]
+ adc ecx, 0
+ ; mul a[6]*b[5]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 20[esi]
+ adc ebx, edx
+ mov edx, DWORD PTR 24[edi]
+ adc ecx, 0
+ ; mul a[5]*b[6]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 16[esi]
+ adc ebx, edx
+ mov edx, DWORD PTR 28[edi]
+ adc ecx, 0
+ ; mul a[4]*b[7]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ebx, edx
+ mov edx, DWORD PTR 20[edi]
+ adc ecx, 0
+ mov DWORD PTR 44[eax],ebp
+ mov eax, DWORD PTR 28[esi]
+ ; saved r[11]
+ ; ################## Calculate word 12
+ xor ebp, ebp
+ ; mul a[7]*b[5]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 24[esi]
+ adc ecx, edx
+ mov edx, DWORD PTR 24[edi]
+ adc ebp, 0
+ ; mul a[6]*b[6]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 20[esi]
+ adc ecx, edx
+ mov edx, DWORD PTR 28[edi]
+ adc ebp, 0
+ ; mul a[5]*b[7]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ecx, edx
+ mov edx, DWORD PTR 24[edi]
+ adc ebp, 0
+ mov DWORD PTR 48[eax],ebx
+ mov eax, DWORD PTR 28[esi]
+ ; saved r[12]
+ ; ################## Calculate word 13
+ xor ebx, ebx
+ ; mul a[7]*b[6]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 24[esi]
+ adc ebp, edx
+ mov edx, DWORD PTR 28[edi]
+ adc ebx, 0
+ ; mul a[6]*b[7]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ebp, edx
+ mov edx, DWORD PTR 28[edi]
+ adc ebx, 0
+ mov DWORD PTR 52[eax],ecx
+ mov eax, DWORD PTR 28[esi]
+ ; saved r[13]
+ ; ################## Calculate word 14
+ xor ecx, ecx
+ ; mul a[7]*b[7]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ebx, edx
+ adc ecx, 0
+ mov DWORD PTR 56[eax],ebp
+ ; saved r[14]
+ ; save r[15]
+ mov DWORD PTR 60[eax],ebx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
+_bn_mul_comba8 ENDP
+_TEXT ENDS
+_TEXT SEGMENT
+PUBLIC _bn_mul_comba4
+
+_bn_mul_comba4 PROC NEAR
+ push esi
+ mov esi, DWORD PTR 12[esp]
+ push edi
+ mov edi, DWORD PTR 20[esp]
+ push ebp
+ push ebx
+ xor ebx, ebx
+ mov eax, DWORD PTR [esi]
+ xor ecx, ecx
+ mov edx, DWORD PTR [edi]
+ ; ################## Calculate word 0
+ xor ebp, ebp
+ ; mul a[0]*b[0]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ecx, edx
+ mov edx, DWORD PTR [edi]
+ adc ebp, 0
+ mov DWORD PTR [eax],ebx
+ mov eax, DWORD PTR 4[esi]
+ ; saved r[0]
+ ; ################## Calculate word 1
+ xor ebx, ebx
+ ; mul a[1]*b[0]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR [esi]
+ adc ebp, edx
+ mov edx, DWORD PTR 4[edi]
+ adc ebx, 0
+ ; mul a[0]*b[1]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ebp, edx
+ mov edx, DWORD PTR [edi]
+ adc ebx, 0
+ mov DWORD PTR 4[eax],ecx
+ mov eax, DWORD PTR 8[esi]
+ ; saved r[1]
+ ; ################## Calculate word 2
+ xor ecx, ecx
+ ; mul a[2]*b[0]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 4[esi]
+ adc ebx, edx
+ mov edx, DWORD PTR 4[edi]
+ adc ecx, 0
+ ; mul a[1]*b[1]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR [esi]
+ adc ebx, edx
+ mov edx, DWORD PTR 8[edi]
+ adc ecx, 0
+ ; mul a[0]*b[2]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ebx, edx
+ mov edx, DWORD PTR [edi]
+ adc ecx, 0
+ mov DWORD PTR 8[eax],ebp
+ mov eax, DWORD PTR 12[esi]
+ ; saved r[2]
+ ; ################## Calculate word 3
+ xor ebp, ebp
+ ; mul a[3]*b[0]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 8[esi]
+ adc ecx, edx
+ mov edx, DWORD PTR 4[edi]
+ adc ebp, 0
+ ; mul a[2]*b[1]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 4[esi]
+ adc ecx, edx
+ mov edx, DWORD PTR 8[edi]
+ adc ebp, 0
+ ; mul a[1]*b[2]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR [esi]
+ adc ecx, edx
+ mov edx, DWORD PTR 12[edi]
+ adc ebp, 0
+ ; mul a[0]*b[3]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ecx, edx
+ mov edx, DWORD PTR 4[edi]
+ adc ebp, 0
+ mov DWORD PTR 12[eax],ebx
+ mov eax, DWORD PTR 12[esi]
+ ; saved r[3]
+ ; ################## Calculate word 4
+ xor ebx, ebx
+ ; mul a[3]*b[1]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 8[esi]
+ adc ebp, edx
+ mov edx, DWORD PTR 8[edi]
+ adc ebx, 0
+ ; mul a[2]*b[2]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 4[esi]
+ adc ebp, edx
+ mov edx, DWORD PTR 12[edi]
+ adc ebx, 0
+ ; mul a[1]*b[3]
+ mul edx
+ add ecx, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ebp, edx
+ mov edx, DWORD PTR 8[edi]
+ adc ebx, 0
+ mov DWORD PTR 16[eax],ecx
+ mov eax, DWORD PTR 12[esi]
+ ; saved r[4]
+ ; ################## Calculate word 5
+ xor ecx, ecx
+ ; mul a[3]*b[2]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 8[esi]
+ adc ebx, edx
+ mov edx, DWORD PTR 12[edi]
+ adc ecx, 0
+ ; mul a[2]*b[3]
+ mul edx
+ add ebp, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ebx, edx
+ mov edx, DWORD PTR 12[edi]
+ adc ecx, 0
+ mov DWORD PTR 20[eax],ebp
+ mov eax, DWORD PTR 12[esi]
+ ; saved r[5]
+ ; ################## Calculate word 6
+ xor ebp, ebp
+ ; mul a[3]*b[3]
+ mul edx
+ add ebx, eax
+ mov eax, DWORD PTR 20[esp]
+ adc ecx, edx
+ adc ebp, 0
+ mov DWORD PTR 24[eax],ebx
+ ; saved r[6]
+ ; save r[7]
+ mov DWORD PTR 28[eax],ecx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
+_bn_mul_comba4 ENDP
+_TEXT ENDS
+_TEXT SEGMENT
+PUBLIC _bn_sqr_comba8
+
+_bn_sqr_comba8 PROC NEAR
+ push esi
+ push edi
+ push ebp
+ push ebx
+ mov edi, DWORD PTR 20[esp]
+ mov esi, DWORD PTR 24[esp]
+ xor ebx, ebx
+ xor ecx, ecx
+ mov eax, DWORD PTR [esi]
+ ; ############### Calculate word 0
+ xor ebp, ebp
+ ; sqr a[0]*a[0]
+ mul eax
+ add ebx, eax
+ adc ecx, edx
+ mov edx, DWORD PTR [esi]
+ adc ebp, 0
+ mov DWORD PTR [edi],ebx
+ mov eax, DWORD PTR 4[esi]
+ ; saved r[0]
+ ; ############### Calculate word 1
+ xor ebx, ebx
+ ; sqr a[1]*a[0]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebx, 0
+ add ecx, eax
+ adc ebp, edx
+ mov eax, DWORD PTR 8[esi]
+ adc ebx, 0
+ mov DWORD PTR 4[edi],ecx
+ mov edx, DWORD PTR [esi]
+ ; saved r[1]
+ ; ############### Calculate word 2
+ xor ecx, ecx
+ ; sqr a[2]*a[0]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ecx, 0
+ add ebp, eax
+ adc ebx, edx
+ mov eax, DWORD PTR 4[esi]
+ adc ecx, 0
+ ; sqr a[1]*a[1]
+ mul eax
+ add ebp, eax
+ adc ebx, edx
+ mov edx, DWORD PTR [esi]
+ adc ecx, 0
+ mov DWORD PTR 8[edi],ebp
+ mov eax, DWORD PTR 12[esi]
+ ; saved r[2]
+ ; ############### Calculate word 3
+ xor ebp, ebp
+ ; sqr a[3]*a[0]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebp, 0
+ add ebx, eax
+ adc ecx, edx
+ mov eax, DWORD PTR 8[esi]
+ adc ebp, 0
+ mov edx, DWORD PTR 4[esi]
+ ; sqr a[2]*a[1]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebp, 0
+ add ebx, eax
+ adc ecx, edx
+ mov eax, DWORD PTR 16[esi]
+ adc ebp, 0
+ mov DWORD PTR 12[edi],ebx
+ mov edx, DWORD PTR [esi]
+ ; saved r[3]
+ ; ############### Calculate word 4
+ xor ebx, ebx
+ ; sqr a[4]*a[0]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebx, 0
+ add ecx, eax
+ adc ebp, edx
+ mov eax, DWORD PTR 12[esi]
+ adc ebx, 0
+ mov edx, DWORD PTR 4[esi]
+ ; sqr a[3]*a[1]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebx, 0
+ add ecx, eax
+ adc ebp, edx
+ mov eax, DWORD PTR 8[esi]
+ adc ebx, 0
+ ; sqr a[2]*a[2]
+ mul eax
+ add ecx, eax
+ adc ebp, edx
+ mov edx, DWORD PTR [esi]
+ adc ebx, 0
+ mov DWORD PTR 16[edi],ecx
+ mov eax, DWORD PTR 20[esi]
+ ; saved r[4]
+ ; ############### Calculate word 5
+ xor ecx, ecx
+ ; sqr a[5]*a[0]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ecx, 0
+ add ebp, eax
+ adc ebx, edx
+ mov eax, DWORD PTR 16[esi]
+ adc ecx, 0
+ mov edx, DWORD PTR 4[esi]
+ ; sqr a[4]*a[1]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ecx, 0
+ add ebp, eax
+ adc ebx, edx
+ mov eax, DWORD PTR 12[esi]
+ adc ecx, 0
+ mov edx, DWORD PTR 8[esi]
+ ; sqr a[3]*a[2]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ecx, 0
+ add ebp, eax
+ adc ebx, edx
+ mov eax, DWORD PTR 24[esi]
+ adc ecx, 0
+ mov DWORD PTR 20[edi],ebp
+ mov edx, DWORD PTR [esi]
+ ; saved r[5]
+ ; ############### Calculate word 6
+ xor ebp, ebp
+ ; sqr a[6]*a[0]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebp, 0
+ add ebx, eax
+ adc ecx, edx
+ mov eax, DWORD PTR 20[esi]
+ adc ebp, 0
+ mov edx, DWORD PTR 4[esi]
+ ; sqr a[5]*a[1]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebp, 0
+ add ebx, eax
+ adc ecx, edx
+ mov eax, DWORD PTR 16[esi]
+ adc ebp, 0
+ mov edx, DWORD PTR 8[esi]
+ ; sqr a[4]*a[2]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebp, 0
+ add ebx, eax
+ adc ecx, edx
+ mov eax, DWORD PTR 12[esi]
+ adc ebp, 0
+ ; sqr a[3]*a[3]
+ mul eax
+ add ebx, eax
+ adc ecx, edx
+ mov edx, DWORD PTR [esi]
+ adc ebp, 0
+ mov DWORD PTR 24[edi],ebx
+ mov eax, DWORD PTR 28[esi]
+ ; saved r[6]
+ ; ############### Calculate word 7
+ xor ebx, ebx
+ ; sqr a[7]*a[0]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebx, 0
+ add ecx, eax
+ adc ebp, edx
+ mov eax, DWORD PTR 24[esi]
+ adc ebx, 0
+ mov edx, DWORD PTR 4[esi]
+ ; sqr a[6]*a[1]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebx, 0
+ add ecx, eax
+ adc ebp, edx
+ mov eax, DWORD PTR 20[esi]
+ adc ebx, 0
+ mov edx, DWORD PTR 8[esi]
+ ; sqr a[5]*a[2]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebx, 0
+ add ecx, eax
+ adc ebp, edx
+ mov eax, DWORD PTR 16[esi]
+ adc ebx, 0
+ mov edx, DWORD PTR 12[esi]
+ ; sqr a[4]*a[3]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebx, 0
+ add ecx, eax
+ adc ebp, edx
+ mov eax, DWORD PTR 28[esi]
+ adc ebx, 0
+ mov DWORD PTR 28[edi],ecx
+ mov edx, DWORD PTR 4[esi]
+ ; saved r[7]
+ ; ############### Calculate word 8
+ xor ecx, ecx
+ ; sqr a[7]*a[1]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ecx, 0
+ add ebp, eax
+ adc ebx, edx
+ mov eax, DWORD PTR 24[esi]
+ adc ecx, 0
+ mov edx, DWORD PTR 8[esi]
+ ; sqr a[6]*a[2]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ecx, 0
+ add ebp, eax
+ adc ebx, edx
+ mov eax, DWORD PTR 20[esi]
+ adc ecx, 0
+ mov edx, DWORD PTR 12[esi]
+ ; sqr a[5]*a[3]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ecx, 0
+ add ebp, eax
+ adc ebx, edx
+ mov eax, DWORD PTR 16[esi]
+ adc ecx, 0
+ ; sqr a[4]*a[4]
+ mul eax
+ add ebp, eax
+ adc ebx, edx
+ mov edx, DWORD PTR 8[esi]
+ adc ecx, 0
+ mov DWORD PTR 32[edi],ebp
+ mov eax, DWORD PTR 28[esi]
+ ; saved r[8]
+ ; ############### Calculate word 9
+ xor ebp, ebp
+ ; sqr a[7]*a[2]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebp, 0
+ add ebx, eax
+ adc ecx, edx
+ mov eax, DWORD PTR 24[esi]
+ adc ebp, 0
+ mov edx, DWORD PTR 12[esi]
+ ; sqr a[6]*a[3]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebp, 0
+ add ebx, eax
+ adc ecx, edx
+ mov eax, DWORD PTR 20[esi]
+ adc ebp, 0
+ mov edx, DWORD PTR 16[esi]
+ ; sqr a[5]*a[4]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebp, 0
+ add ebx, eax
+ adc ecx, edx
+ mov eax, DWORD PTR 28[esi]
+ adc ebp, 0
+ mov DWORD PTR 36[edi],ebx
+ mov edx, DWORD PTR 12[esi]
+ ; saved r[9]
+ ; ############### Calculate word 10
+ xor ebx, ebx
+ ; sqr a[7]*a[3]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebx, 0
+ add ecx, eax
+ adc ebp, edx
+ mov eax, DWORD PTR 24[esi]
+ adc ebx, 0
+ mov edx, DWORD PTR 16[esi]
+ ; sqr a[6]*a[4]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebx, 0
+ add ecx, eax
+ adc ebp, edx
+ mov eax, DWORD PTR 20[esi]
+ adc ebx, 0
+ ; sqr a[5]*a[5]
+ mul eax
+ add ecx, eax
+ adc ebp, edx
+ mov edx, DWORD PTR 16[esi]
+ adc ebx, 0
+ mov DWORD PTR 40[edi],ecx
+ mov eax, DWORD PTR 28[esi]
+ ; saved r[10]
+ ; ############### Calculate word 11
+ xor ecx, ecx
+ ; sqr a[7]*a[4]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ecx, 0
+ add ebp, eax
+ adc ebx, edx
+ mov eax, DWORD PTR 24[esi]
+ adc ecx, 0
+ mov edx, DWORD PTR 20[esi]
+ ; sqr a[6]*a[5]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ecx, 0
+ add ebp, eax
+ adc ebx, edx
+ mov eax, DWORD PTR 28[esi]
+ adc ecx, 0
+ mov DWORD PTR 44[edi],ebp
+ mov edx, DWORD PTR 20[esi]
+ ; saved r[11]
+ ; ############### Calculate word 12
+ xor ebp, ebp
+ ; sqr a[7]*a[5]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebp, 0
+ add ebx, eax
+ adc ecx, edx
+ mov eax, DWORD PTR 24[esi]
+ adc ebp, 0
+ ; sqr a[6]*a[6]
+ mul eax
+ add ebx, eax
+ adc ecx, edx
+ mov edx, DWORD PTR 24[esi]
+ adc ebp, 0
+ mov DWORD PTR 48[edi],ebx
+ mov eax, DWORD PTR 28[esi]
+ ; saved r[12]
+ ; ############### Calculate word 13
+ xor ebx, ebx
+ ; sqr a[7]*a[6]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebx, 0
+ add ecx, eax
+ adc ebp, edx
+ mov eax, DWORD PTR 28[esi]
+ adc ebx, 0
+ mov DWORD PTR 52[edi],ecx
+ ; saved r[13]
+ ; ############### Calculate word 14
+ xor ecx, ecx
+ ; sqr a[7]*a[7]
+ mul eax
+ add ebp, eax
+ adc ebx, edx
+ adc ecx, 0
+ mov DWORD PTR 56[edi],ebp
+ ; saved r[14]
+ mov DWORD PTR 60[edi],ebx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
+_bn_sqr_comba8 ENDP
+_TEXT ENDS
+_TEXT SEGMENT
+PUBLIC _bn_sqr_comba4
+
+_bn_sqr_comba4 PROC NEAR
+ push esi
+ push edi
+ push ebp
+ push ebx
+ mov edi, DWORD PTR 20[esp]
+ mov esi, DWORD PTR 24[esp]
+ xor ebx, ebx
+ xor ecx, ecx
+ mov eax, DWORD PTR [esi]
+ ; ############### Calculate word 0
+ xor ebp, ebp
+ ; sqr a[0]*a[0]
+ mul eax
+ add ebx, eax
+ adc ecx, edx
+ mov edx, DWORD PTR [esi]
+ adc ebp, 0
+ mov DWORD PTR [edi],ebx
+ mov eax, DWORD PTR 4[esi]
+ ; saved r[0]
+ ; ############### Calculate word 1
+ xor ebx, ebx
+ ; sqr a[1]*a[0]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebx, 0
+ add ecx, eax
+ adc ebp, edx
+ mov eax, DWORD PTR 8[esi]
+ adc ebx, 0
+ mov DWORD PTR 4[edi],ecx
+ mov edx, DWORD PTR [esi]
+ ; saved r[1]
+ ; ############### Calculate word 2
+ xor ecx, ecx
+ ; sqr a[2]*a[0]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ecx, 0
+ add ebp, eax
+ adc ebx, edx
+ mov eax, DWORD PTR 4[esi]
+ adc ecx, 0
+ ; sqr a[1]*a[1]
+ mul eax
+ add ebp, eax
+ adc ebx, edx
+ mov edx, DWORD PTR [esi]
+ adc ecx, 0
+ mov DWORD PTR 8[edi],ebp
+ mov eax, DWORD PTR 12[esi]
+ ; saved r[2]
+ ; ############### Calculate word 3
+ xor ebp, ebp
+ ; sqr a[3]*a[0]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebp, 0
+ add ebx, eax
+ adc ecx, edx
+ mov eax, DWORD PTR 8[esi]
+ adc ebp, 0
+ mov edx, DWORD PTR 4[esi]
+ ; sqr a[2]*a[1]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebp, 0
+ add ebx, eax
+ adc ecx, edx
+ mov eax, DWORD PTR 12[esi]
+ adc ebp, 0
+ mov DWORD PTR 12[edi],ebx
+ mov edx, DWORD PTR 4[esi]
+ ; saved r[3]
+ ; ############### Calculate word 4
+ xor ebx, ebx
+ ; sqr a[3]*a[1]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ebx, 0
+ add ecx, eax
+ adc ebp, edx
+ mov eax, DWORD PTR 8[esi]
+ adc ebx, 0
+ ; sqr a[2]*a[2]
+ mul eax
+ add ecx, eax
+ adc ebp, edx
+ mov edx, DWORD PTR 8[esi]
+ adc ebx, 0
+ mov DWORD PTR 16[edi],ecx
+ mov eax, DWORD PTR 12[esi]
+ ; saved r[4]
+ ; ############### Calculate word 5
+ xor ecx, ecx
+ ; sqr a[3]*a[2]
+ mul edx
+ add eax, eax
+ adc edx, edx
+ adc ecx, 0
+ add ebp, eax
+ adc ebx, edx
+ mov eax, DWORD PTR 12[esi]
+ adc ecx, 0
+ mov DWORD PTR 20[edi],ebp
+ ; saved r[5]
+ ; ############### Calculate word 6
+ xor ebp, ebp
+ ; sqr a[3]*a[3]
+ mul eax
+ add ebx, eax
+ adc ecx, edx
+ adc ebp, 0
+ mov DWORD PTR 24[edi],ebx
+ ; saved r[6]
+ mov DWORD PTR 28[edi],ecx
+ pop ebx
+ pop ebp
+ pop edi
+ pop esi
+ ret
+_bn_sqr_comba4 ENDP
+_TEXT ENDS
END
diff --git a/crypto/bn/asm/bn86unix.cpp b/crypto/bn/asm/bn86unix.cpp
index 64702201ea..639a3ac41c 100644
--- a/crypto/bn/asm/bn86unix.cpp
+++ b/crypto/bn/asm/bn86unix.cpp
@@ -12,8 +12,13 @@
#define bn_mul_add_words _bn_mul_add_words
#define bn_mul_words _bn_mul_words
#define bn_sqr_words _bn_sqr_words
-#define bn_div64 _bn_div64
+#define bn_div_words _bn_div_words
#define bn_add_words _bn_add_words
+#define bn_sub_words _bn_sub_words
+#define bn_mul_comba8 _bn_mul_comba8
+#define bn_mul_comba4 _bn_mul_comba4
+#define bn_sqr_comba8 _bn_sqr_comba8
+#define bn_sqr_comba4 _bn_sqr_comba4
#endif
@@ -544,9 +549,9 @@ bn_sqr_words:
.ident "bn_sqr_words"
.text
.align ALIGN
-.globl bn_div64
- TYPE(bn_div64,@function)
-bn_div64:
+.globl bn_div_words
+ TYPE(bn_div_words,@function)
+bn_div_words:
pushl %ebp
pushl %ebx
pushl %esi
@@ -561,9 +566,9 @@ bn_div64:
popl %ebx
popl %ebp
ret
-.bn_div64_end:
- SIZE(bn_div64,.bn_div64_end-bn_div64)
-.ident "bn_div64"
+.bn_div_words_end:
+ SIZE(bn_div_words,.bn_div_words_end-bn_div_words)
+.ident "bn_div_words"
.text
.align ALIGN
.globl bn_add_words
@@ -741,7 +746,6 @@ bn_add_words:
adcl $0, %eax
movl %ecx, 24(%ebx)
.L013aw_end:
- movl %eax, %eax
popl %edi
popl %esi
popl %ebx
@@ -750,3 +754,1448 @@ bn_add_words:
.bn_add_words_end:
SIZE(bn_add_words,.bn_add_words_end-bn_add_words)
.ident "bn_add_words"
+.text
+ .align ALIGN
+.globl bn_sub_words
+ TYPE(bn_sub_words,@function)
+bn_sub_words:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+
+
+ movl 20(%esp), %ebx
+ movl 24(%esp), %esi
+ movl 28(%esp), %edi
+ movl 32(%esp), %ebp
+ xorl %eax, %eax
+ andl $4294967288, %ebp
+ jz .L014aw_finish
+.L015aw_loop:
+ /* Round 0 */
+ movl (%esi), %ecx
+ movl (%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, (%ebx)
+ /* Round 1 */
+ movl 4(%esi), %ecx
+ movl 4(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 4(%ebx)
+ /* Round 2 */
+ movl 8(%esi), %ecx
+ movl 8(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 8(%ebx)
+ /* Round 3 */
+ movl 12(%esi), %ecx
+ movl 12(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 12(%ebx)
+ /* Round 4 */
+ movl 16(%esi), %ecx
+ movl 16(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 16(%ebx)
+ /* Round 5 */
+ movl 20(%esi), %ecx
+ movl 20(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 20(%ebx)
+ /* Round 6 */
+ movl 24(%esi), %ecx
+ movl 24(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 24(%ebx)
+ /* Round 7 */
+ movl 28(%esi), %ecx
+ movl 28(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 28(%ebx)
+
+ addl $32, %esi
+ addl $32, %edi
+ addl $32, %ebx
+ subl $8, %ebp
+ jnz .L015aw_loop
+.L014aw_finish:
+ movl 32(%esp), %ebp
+ andl $7, %ebp
+ jz .L016aw_end
+ /* Tail Round 0 */
+ movl (%esi), %ecx
+ movl (%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ decl %ebp
+ movl %ecx, (%ebx)
+ jz .L016aw_end
+ /* Tail Round 1 */
+ movl 4(%esi), %ecx
+ movl 4(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ decl %ebp
+ movl %ecx, 4(%ebx)
+ jz .L016aw_end
+ /* Tail Round 2 */
+ movl 8(%esi), %ecx
+ movl 8(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ decl %ebp
+ movl %ecx, 8(%ebx)
+ jz .L016aw_end
+ /* Tail Round 3 */
+ movl 12(%esi), %ecx
+ movl 12(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ decl %ebp
+ movl %ecx, 12(%ebx)
+ jz .L016aw_end
+ /* Tail Round 4 */
+ movl 16(%esi), %ecx
+ movl 16(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ decl %ebp
+ movl %ecx, 16(%ebx)
+ jz .L016aw_end
+ /* Tail Round 5 */
+ movl 20(%esi), %ecx
+ movl 20(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ decl %ebp
+ movl %ecx, 20(%ebx)
+ jz .L016aw_end
+ /* Tail Round 6 */
+ movl 24(%esi), %ecx
+ movl 24(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 24(%ebx)
+.L016aw_end:
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.bn_sub_words_end:
+ SIZE(bn_sub_words,.bn_sub_words_end-bn_sub_words)
+.ident "bn_sub_words"
+.text
+ .align ALIGN
+.globl bn_mul_comba8
+ TYPE(bn_mul_comba8,@function)
+bn_mul_comba8:
+ pushl %esi
+ movl 12(%esp), %esi
+ pushl %edi
+ movl 20(%esp), %edi
+ pushl %ebp
+ pushl %ebx
+ xorl %ebx, %ebx
+ movl (%esi), %eax
+ xorl %ecx, %ecx
+ movl (%edi), %edx
+ /* ################## Calculate word 0 */
+ xorl %ebp, %ebp
+ /* mul a[0]*b[0] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl (%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, (%eax)
+ movl 4(%esi), %eax
+ /* saved r[0] */
+ /* ################## Calculate word 1 */
+ xorl %ebx, %ebx
+ /* mul a[1]*b[0] */
+ mull %edx
+ addl %eax, %ecx
+ movl (%esi), %eax
+ adcl %edx, %ebp
+ movl 4(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[0]*b[1] */
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl (%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 4(%eax)
+ movl 8(%esi), %eax
+ /* saved r[1] */
+ /* ################## Calculate word 2 */
+ xorl %ecx, %ecx
+ /* mul a[2]*b[0] */
+ mull %edx
+ addl %eax, %ebp
+ movl 4(%esi), %eax
+ adcl %edx, %ebx
+ movl 4(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[1]*b[1] */
+ mull %edx
+ addl %eax, %ebp
+ movl (%esi), %eax
+ adcl %edx, %ebx
+ movl 8(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[0]*b[2] */
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl (%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 8(%eax)
+ movl 12(%esi), %eax
+ /* saved r[2] */
+ /* ################## Calculate word 3 */
+ xorl %ebp, %ebp
+ /* mul a[3]*b[0] */
+ mull %edx
+ addl %eax, %ebx
+ movl 8(%esi), %eax
+ adcl %edx, %ecx
+ movl 4(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[2]*b[1] */
+ mull %edx
+ addl %eax, %ebx
+ movl 4(%esi), %eax
+ adcl %edx, %ecx
+ movl 8(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[1]*b[2] */
+ mull %edx
+ addl %eax, %ebx
+ movl (%esi), %eax
+ adcl %edx, %ecx
+ movl 12(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[0]*b[3] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl (%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, 12(%eax)
+ movl 16(%esi), %eax
+ /* saved r[3] */
+ /* ################## Calculate word 4 */
+ xorl %ebx, %ebx
+ /* mul a[4]*b[0] */
+ mull %edx
+ addl %eax, %ecx
+ movl 12(%esi), %eax
+ adcl %edx, %ebp
+ movl 4(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[3]*b[1] */
+ mull %edx
+ addl %eax, %ecx
+ movl 8(%esi), %eax
+ adcl %edx, %ebp
+ movl 8(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[2]*b[2] */
+ mull %edx
+ addl %eax, %ecx
+ movl 4(%esi), %eax
+ adcl %edx, %ebp
+ movl 12(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[1]*b[3] */
+ mull %edx
+ addl %eax, %ecx
+ movl (%esi), %eax
+ adcl %edx, %ebp
+ movl 16(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[0]*b[4] */
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl (%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 16(%eax)
+ movl 20(%esi), %eax
+ /* saved r[4] */
+ /* ################## Calculate word 5 */
+ xorl %ecx, %ecx
+ /* mul a[5]*b[0] */
+ mull %edx
+ addl %eax, %ebp
+ movl 16(%esi), %eax
+ adcl %edx, %ebx
+ movl 4(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[4]*b[1] */
+ mull %edx
+ addl %eax, %ebp
+ movl 12(%esi), %eax
+ adcl %edx, %ebx
+ movl 8(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[3]*b[2] */
+ mull %edx
+ addl %eax, %ebp
+ movl 8(%esi), %eax
+ adcl %edx, %ebx
+ movl 12(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[2]*b[3] */
+ mull %edx
+ addl %eax, %ebp
+ movl 4(%esi), %eax
+ adcl %edx, %ebx
+ movl 16(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[1]*b[4] */
+ mull %edx
+ addl %eax, %ebp
+ movl (%esi), %eax
+ adcl %edx, %ebx
+ movl 20(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[0]*b[5] */
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl (%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 20(%eax)
+ movl 24(%esi), %eax
+ /* saved r[5] */
+ /* ################## Calculate word 6 */
+ xorl %ebp, %ebp
+ /* mul a[6]*b[0] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esi), %eax
+ adcl %edx, %ecx
+ movl 4(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[5]*b[1] */
+ mull %edx
+ addl %eax, %ebx
+ movl 16(%esi), %eax
+ adcl %edx, %ecx
+ movl 8(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[4]*b[2] */
+ mull %edx
+ addl %eax, %ebx
+ movl 12(%esi), %eax
+ adcl %edx, %ecx
+ movl 12(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[3]*b[3] */
+ mull %edx
+ addl %eax, %ebx
+ movl 8(%esi), %eax
+ adcl %edx, %ecx
+ movl 16(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[2]*b[4] */
+ mull %edx
+ addl %eax, %ebx
+ movl 4(%esi), %eax
+ adcl %edx, %ecx
+ movl 20(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[1]*b[5] */
+ mull %edx
+ addl %eax, %ebx
+ movl (%esi), %eax
+ adcl %edx, %ecx
+ movl 24(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[0]*b[6] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl (%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, 24(%eax)
+ movl 28(%esi), %eax
+ /* saved r[6] */
+ /* ################## Calculate word 7 */
+ xorl %ebx, %ebx
+ /* mul a[7]*b[0] */
+ mull %edx
+ addl %eax, %ecx
+ movl 24(%esi), %eax
+ adcl %edx, %ebp
+ movl 4(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[6]*b[1] */
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esi), %eax
+ adcl %edx, %ebp
+ movl 8(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[5]*b[2] */
+ mull %edx
+ addl %eax, %ecx
+ movl 16(%esi), %eax
+ adcl %edx, %ebp
+ movl 12(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[4]*b[3] */
+ mull %edx
+ addl %eax, %ecx
+ movl 12(%esi), %eax
+ adcl %edx, %ebp
+ movl 16(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[3]*b[4] */
+ mull %edx
+ addl %eax, %ecx
+ movl 8(%esi), %eax
+ adcl %edx, %ebp
+ movl 20(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[2]*b[5] */
+ mull %edx
+ addl %eax, %ecx
+ movl 4(%esi), %eax
+ adcl %edx, %ebp
+ movl 24(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[1]*b[6] */
+ mull %edx
+ addl %eax, %ecx
+ movl (%esi), %eax
+ adcl %edx, %ebp
+ movl 28(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[0]*b[7] */
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl 4(%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 28(%eax)
+ movl 28(%esi), %eax
+ /* saved r[7] */
+ /* ################## Calculate word 8 */
+ xorl %ecx, %ecx
+ /* mul a[7]*b[1] */
+ mull %edx
+ addl %eax, %ebp
+ movl 24(%esi), %eax
+ adcl %edx, %ebx
+ movl 8(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[6]*b[2] */
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esi), %eax
+ adcl %edx, %ebx
+ movl 12(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[5]*b[3] */
+ mull %edx
+ addl %eax, %ebp
+ movl 16(%esi), %eax
+ adcl %edx, %ebx
+ movl 16(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[4]*b[4] */
+ mull %edx
+ addl %eax, %ebp
+ movl 12(%esi), %eax
+ adcl %edx, %ebx
+ movl 20(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[3]*b[5] */
+ mull %edx
+ addl %eax, %ebp
+ movl 8(%esi), %eax
+ adcl %edx, %ebx
+ movl 24(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[2]*b[6] */
+ mull %edx
+ addl %eax, %ebp
+ movl 4(%esi), %eax
+ adcl %edx, %ebx
+ movl 28(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[1]*b[7] */
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl 8(%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 32(%eax)
+ movl 28(%esi), %eax
+ /* saved r[8] */
+ /* ################## Calculate word 9 */
+ xorl %ebp, %ebp
+ /* mul a[7]*b[2] */
+ mull %edx
+ addl %eax, %ebx
+ movl 24(%esi), %eax
+ adcl %edx, %ecx
+ movl 12(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[6]*b[3] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esi), %eax
+ adcl %edx, %ecx
+ movl 16(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[5]*b[4] */
+ mull %edx
+ addl %eax, %ebx
+ movl 16(%esi), %eax
+ adcl %edx, %ecx
+ movl 20(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[4]*b[5] */
+ mull %edx
+ addl %eax, %ebx
+ movl 12(%esi), %eax
+ adcl %edx, %ecx
+ movl 24(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[3]*b[6] */
+ mull %edx
+ addl %eax, %ebx
+ movl 8(%esi), %eax
+ adcl %edx, %ecx
+ movl 28(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[2]*b[7] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl 12(%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, 36(%eax)
+ movl 28(%esi), %eax
+ /* saved r[9] */
+ /* ################## Calculate word 10 */
+ xorl %ebx, %ebx
+ /* mul a[7]*b[3] */
+ mull %edx
+ addl %eax, %ecx
+ movl 24(%esi), %eax
+ adcl %edx, %ebp
+ movl 16(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[6]*b[4] */
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esi), %eax
+ adcl %edx, %ebp
+ movl 20(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[5]*b[5] */
+ mull %edx
+ addl %eax, %ecx
+ movl 16(%esi), %eax
+ adcl %edx, %ebp
+ movl 24(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[4]*b[6] */
+ mull %edx
+ addl %eax, %ecx
+ movl 12(%esi), %eax
+ adcl %edx, %ebp
+ movl 28(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[3]*b[7] */
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl 16(%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 40(%eax)
+ movl 28(%esi), %eax
+ /* saved r[10] */
+ /* ################## Calculate word 11 */
+ xorl %ecx, %ecx
+ /* mul a[7]*b[4] */
+ mull %edx
+ addl %eax, %ebp
+ movl 24(%esi), %eax
+ adcl %edx, %ebx
+ movl 20(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[6]*b[5] */
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esi), %eax
+ adcl %edx, %ebx
+ movl 24(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[5]*b[6] */
+ mull %edx
+ addl %eax, %ebp
+ movl 16(%esi), %eax
+ adcl %edx, %ebx
+ movl 28(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[4]*b[7] */
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl 20(%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 44(%eax)
+ movl 28(%esi), %eax
+ /* saved r[11] */
+ /* ################## Calculate word 12 */
+ xorl %ebp, %ebp
+ /* mul a[7]*b[5] */
+ mull %edx
+ addl %eax, %ebx
+ movl 24(%esi), %eax
+ adcl %edx, %ecx
+ movl 24(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[6]*b[6] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esi), %eax
+ adcl %edx, %ecx
+ movl 28(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[5]*b[7] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl 24(%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, 48(%eax)
+ movl 28(%esi), %eax
+ /* saved r[12] */
+ /* ################## Calculate word 13 */
+ xorl %ebx, %ebx
+ /* mul a[7]*b[6] */
+ mull %edx
+ addl %eax, %ecx
+ movl 24(%esi), %eax
+ adcl %edx, %ebp
+ movl 28(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[6]*b[7] */
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl 28(%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 52(%eax)
+ movl 28(%esi), %eax
+ /* saved r[13] */
+ /* ################## Calculate word 14 */
+ xorl %ecx, %ecx
+ /* mul a[7]*b[7] */
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ adcl $0, %ecx
+ movl %ebp, 56(%eax)
+ /* saved r[14] */
+ /* save r[15] */
+ movl %ebx, 60(%eax)
+ popl %ebx
+ popl %ebp
+ popl %edi
+ popl %esi
+ ret
+.bn_mul_comba8_end:
+ SIZE(bn_mul_comba8,.bn_mul_comba8_end-bn_mul_comba8)
+.ident "desasm.pl"
+.text
+ .align ALIGN
+.globl bn_mul_comba4
+ TYPE(bn_mul_comba4,@function)
+bn_mul_comba4:
+ pushl %esi
+ movl 12(%esp), %esi
+ pushl %edi
+ movl 20(%esp), %edi
+ pushl %ebp
+ pushl %ebx
+ xorl %ebx, %ebx
+ movl (%esi), %eax
+ xorl %ecx, %ecx
+ movl (%edi), %edx
+ /* ################## Calculate word 0 */
+ xorl %ebp, %ebp
+ /* mul a[0]*b[0] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl (%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, (%eax)
+ movl 4(%esi), %eax
+ /* saved r[0] */
+ /* ################## Calculate word 1 */
+ xorl %ebx, %ebx
+ /* mul a[1]*b[0] */
+ mull %edx
+ addl %eax, %ecx
+ movl (%esi), %eax
+ adcl %edx, %ebp
+ movl 4(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[0]*b[1] */
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl (%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 4(%eax)
+ movl 8(%esi), %eax
+ /* saved r[1] */
+ /* ################## Calculate word 2 */
+ xorl %ecx, %ecx
+ /* mul a[2]*b[0] */
+ mull %edx
+ addl %eax, %ebp
+ movl 4(%esi), %eax
+ adcl %edx, %ebx
+ movl 4(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[1]*b[1] */
+ mull %edx
+ addl %eax, %ebp
+ movl (%esi), %eax
+ adcl %edx, %ebx
+ movl 8(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[0]*b[2] */
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl (%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 8(%eax)
+ movl 12(%esi), %eax
+ /* saved r[2] */
+ /* ################## Calculate word 3 */
+ xorl %ebp, %ebp
+ /* mul a[3]*b[0] */
+ mull %edx
+ addl %eax, %ebx
+ movl 8(%esi), %eax
+ adcl %edx, %ecx
+ movl 4(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[2]*b[1] */
+ mull %edx
+ addl %eax, %ebx
+ movl 4(%esi), %eax
+ adcl %edx, %ecx
+ movl 8(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[1]*b[2] */
+ mull %edx
+ addl %eax, %ebx
+ movl (%esi), %eax
+ adcl %edx, %ecx
+ movl 12(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[0]*b[3] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl 4(%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, 12(%eax)
+ movl 12(%esi), %eax
+ /* saved r[3] */
+ /* ################## Calculate word 4 */
+ xorl %ebx, %ebx
+ /* mul a[3]*b[1] */
+ mull %edx
+ addl %eax, %ecx
+ movl 8(%esi), %eax
+ adcl %edx, %ebp
+ movl 8(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[2]*b[2] */
+ mull %edx
+ addl %eax, %ecx
+ movl 4(%esi), %eax
+ adcl %edx, %ebp
+ movl 12(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[1]*b[3] */
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl 8(%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 16(%eax)
+ movl 12(%esi), %eax
+ /* saved r[4] */
+ /* ################## Calculate word 5 */
+ xorl %ecx, %ecx
+ /* mul a[3]*b[2] */
+ mull %edx
+ addl %eax, %ebp
+ movl 8(%esi), %eax
+ adcl %edx, %ebx
+ movl 12(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[2]*b[3] */
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl 12(%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 20(%eax)
+ movl 12(%esi), %eax
+ /* saved r[5] */
+ /* ################## Calculate word 6 */
+ xorl %ebp, %ebp
+ /* mul a[3]*b[3] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ adcl $0, %ebp
+ movl %ebx, 24(%eax)
+ /* saved r[6] */
+ /* save r[7] */
+ movl %ecx, 28(%eax)
+ popl %ebx
+ popl %ebp
+ popl %edi
+ popl %esi
+ ret
+.bn_mul_comba4_end:
+ SIZE(bn_mul_comba4,.bn_mul_comba4_end-bn_mul_comba4)
+.ident "desasm.pl"
+.text
+ .align ALIGN
+.globl bn_sqr_comba8
+ TYPE(bn_sqr_comba8,@function)
+bn_sqr_comba8:
+ pushl %esi
+ pushl %edi
+ pushl %ebp
+ pushl %ebx
+ movl 20(%esp), %edi
+ movl 24(%esp), %esi
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ movl (%esi), %eax
+ /* ############### Calculate word 0 */
+ xorl %ebp, %ebp
+ /* sqr a[0]*a[0] */
+ mull %eax
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl (%esi), %edx
+ adcl $0, %ebp
+ movl %ebx, (%edi)
+ movl 4(%esi), %eax
+ /* saved r[0] */
+ /* ############### Calculate word 1 */
+ xorl %ebx, %ebx
+ /* sqr a[1]*a[0] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 8(%esi), %eax
+ adcl $0, %ebx
+ movl %ecx, 4(%edi)
+ movl (%esi), %edx
+ /* saved r[1] */
+ /* ############### Calculate word 2 */
+ xorl %ecx, %ecx
+ /* sqr a[2]*a[0] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 4(%esi), %eax
+ adcl $0, %ecx
+ /* sqr a[1]*a[1] */
+ mull %eax
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl (%esi), %edx
+ adcl $0, %ecx
+ movl %ebp, 8(%edi)
+ movl 12(%esi), %eax
+ /* saved r[2] */
+ /* ############### Calculate word 3 */
+ xorl %ebp, %ebp
+ /* sqr a[3]*a[0] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 8(%esi), %eax
+ adcl $0, %ebp
+ movl 4(%esi), %edx
+ /* sqr a[2]*a[1] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 16(%esi), %eax
+ adcl $0, %ebp
+ movl %ebx, 12(%edi)
+ movl (%esi), %edx
+ /* saved r[3] */
+ /* ############### Calculate word 4 */
+ xorl %ebx, %ebx
+ /* sqr a[4]*a[0] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 12(%esi), %eax
+ adcl $0, %ebx
+ movl 4(%esi), %edx
+ /* sqr a[3]*a[1] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 8(%esi), %eax
+ adcl $0, %ebx
+ /* sqr a[2]*a[2] */
+ mull %eax
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl (%esi), %edx
+ adcl $0, %ebx
+ movl %ecx, 16(%edi)
+ movl 20(%esi), %eax
+ /* saved r[4] */
+ /* ############### Calculate word 5 */
+ xorl %ecx, %ecx
+ /* sqr a[5]*a[0] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 16(%esi), %eax
+ adcl $0, %ecx
+ movl 4(%esi), %edx
+ /* sqr a[4]*a[1] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 12(%esi), %eax
+ adcl $0, %ecx
+ movl 8(%esi), %edx
+ /* sqr a[3]*a[2] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 24(%esi), %eax
+ adcl $0, %ecx
+ movl %ebp, 20(%edi)
+ movl (%esi), %edx
+ /* saved r[5] */
+ /* ############### Calculate word 6 */
+ xorl %ebp, %ebp
+ /* sqr a[6]*a[0] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 20(%esi), %eax
+ adcl $0, %ebp
+ movl 4(%esi), %edx
+ /* sqr a[5]*a[1] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 16(%esi), %eax
+ adcl $0, %ebp
+ movl 8(%esi), %edx
+ /* sqr a[4]*a[2] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 12(%esi), %eax
+ adcl $0, %ebp
+ /* sqr a[3]*a[3] */
+ mull %eax
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl (%esi), %edx
+ adcl $0, %ebp
+ movl %ebx, 24(%edi)
+ movl 28(%esi), %eax
+ /* saved r[6] */
+ /* ############### Calculate word 7 */
+ xorl %ebx, %ebx
+ /* sqr a[7]*a[0] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 24(%esi), %eax
+ adcl $0, %ebx
+ movl 4(%esi), %edx
+ /* sqr a[6]*a[1] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 20(%esi), %eax
+ adcl $0, %ebx
+ movl 8(%esi), %edx
+ /* sqr a[5]*a[2] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 16(%esi), %eax
+ adcl $0, %ebx
+ movl 12(%esi), %edx
+ /* sqr a[4]*a[3] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 28(%esi), %eax
+ adcl $0, %ebx
+ movl %ecx, 28(%edi)
+ movl 4(%esi), %edx
+ /* saved r[7] */
+ /* ############### Calculate word 8 */
+ xorl %ecx, %ecx
+ /* sqr a[7]*a[1] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 24(%esi), %eax
+ adcl $0, %ecx
+ movl 8(%esi), %edx
+ /* sqr a[6]*a[2] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 20(%esi), %eax
+ adcl $0, %ecx
+ movl 12(%esi), %edx
+ /* sqr a[5]*a[3] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 16(%esi), %eax
+ adcl $0, %ecx
+ /* sqr a[4]*a[4] */
+ mull %eax
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 8(%esi), %edx
+ adcl $0, %ecx
+ movl %ebp, 32(%edi)
+ movl 28(%esi), %eax
+ /* saved r[8] */
+ /* ############### Calculate word 9 */
+ xorl %ebp, %ebp
+ /* sqr a[7]*a[2] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 24(%esi), %eax
+ adcl $0, %ebp
+ movl 12(%esi), %edx
+ /* sqr a[6]*a[3] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 20(%esi), %eax
+ adcl $0, %ebp
+ movl 16(%esi), %edx
+ /* sqr a[5]*a[4] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 28(%esi), %eax
+ adcl $0, %ebp
+ movl %ebx, 36(%edi)
+ movl 12(%esi), %edx
+ /* saved r[9] */
+ /* ############### Calculate word 10 */
+ xorl %ebx, %ebx
+ /* sqr a[7]*a[3] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 24(%esi), %eax
+ adcl $0, %ebx
+ movl 16(%esi), %edx
+ /* sqr a[6]*a[4] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 20(%esi), %eax
+ adcl $0, %ebx
+ /* sqr a[5]*a[5] */
+ mull %eax
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 16(%esi), %edx
+ adcl $0, %ebx
+ movl %ecx, 40(%edi)
+ movl 28(%esi), %eax
+ /* saved r[10] */
+ /* ############### Calculate word 11 */
+ xorl %ecx, %ecx
+ /* sqr a[7]*a[4] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 24(%esi), %eax
+ adcl $0, %ecx
+ movl 20(%esi), %edx
+ /* sqr a[6]*a[5] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 28(%esi), %eax
+ adcl $0, %ecx
+ movl %ebp, 44(%edi)
+ movl 20(%esi), %edx
+ /* saved r[11] */
+ /* ############### Calculate word 12 */
+ xorl %ebp, %ebp
+ /* sqr a[7]*a[5] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 24(%esi), %eax
+ adcl $0, %ebp
+ /* sqr a[6]*a[6] */
+ mull %eax
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 24(%esi), %edx
+ adcl $0, %ebp
+ movl %ebx, 48(%edi)
+ movl 28(%esi), %eax
+ /* saved r[12] */
+ /* ############### Calculate word 13 */
+ xorl %ebx, %ebx
+ /* sqr a[7]*a[6] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 28(%esi), %eax
+ adcl $0, %ebx
+ movl %ecx, 52(%edi)
+ /* saved r[13] */
+ /* ############### Calculate word 14 */
+ xorl %ecx, %ecx
+ /* sqr a[7]*a[7] */
+ mull %eax
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ adcl $0, %ecx
+ movl %ebp, 56(%edi)
+ /* saved r[14] */
+ movl %ebx, 60(%edi)
+ popl %ebx
+ popl %ebp
+ popl %edi
+ popl %esi
+ ret
+.bn_sqr_comba8_end:
+ SIZE(bn_sqr_comba8,.bn_sqr_comba8_end-bn_sqr_comba8)
+.ident "desasm.pl"
+.text
+ .align ALIGN
+.globl bn_sqr_comba4
+ TYPE(bn_sqr_comba4,@function)
+bn_sqr_comba4:
+ pushl %esi
+ pushl %edi
+ pushl %ebp
+ pushl %ebx
+ movl 20(%esp), %edi
+ movl 24(%esp), %esi
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ movl (%esi), %eax
+ /* ############### Calculate word 0 */
+ xorl %ebp, %ebp
+ /* sqr a[0]*a[0] */
+ mull %eax
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl (%esi), %edx
+ adcl $0, %ebp
+ movl %ebx, (%edi)
+ movl 4(%esi), %eax
+ /* saved r[0] */
+ /* ############### Calculate word 1 */
+ xorl %ebx, %ebx
+ /* sqr a[1]*a[0] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 8(%esi), %eax
+ adcl $0, %ebx
+ movl %ecx, 4(%edi)
+ movl (%esi), %edx
+ /* saved r[1] */
+ /* ############### Calculate word 2 */
+ xorl %ecx, %ecx
+ /* sqr a[2]*a[0] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 4(%esi), %eax
+ adcl $0, %ecx
+ /* sqr a[1]*a[1] */
+ mull %eax
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl (%esi), %edx
+ adcl $0, %ecx
+ movl %ebp, 8(%edi)
+ movl 12(%esi), %eax
+ /* saved r[2] */
+ /* ############### Calculate word 3 */
+ xorl %ebp, %ebp
+ /* sqr a[3]*a[0] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 8(%esi), %eax
+ adcl $0, %ebp
+ movl 4(%esi), %edx
+ /* sqr a[2]*a[1] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 12(%esi), %eax
+ adcl $0, %ebp
+ movl %ebx, 12(%edi)
+ movl 4(%esi), %edx
+ /* saved r[3] */
+ /* ############### Calculate word 4 */
+ xorl %ebx, %ebx
+ /* sqr a[3]*a[1] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 8(%esi), %eax
+ adcl $0, %ebx
+ /* sqr a[2]*a[2] */
+ mull %eax
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 8(%esi), %edx
+ adcl $0, %ebx
+ movl %ecx, 16(%edi)
+ movl 12(%esi), %eax
+ /* saved r[4] */
+ /* ############### Calculate word 5 */
+ xorl %ecx, %ecx
+ /* sqr a[3]*a[2] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 12(%esi), %eax
+ adcl $0, %ecx
+ movl %ebp, 20(%edi)
+ /* saved r[5] */
+ /* ############### Calculate word 6 */
+ xorl %ebp, %ebp
+ /* sqr a[3]*a[3] */
+ mull %eax
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ adcl $0, %ebp
+ movl %ebx, 24(%edi)
+ /* saved r[6] */
+ movl %ecx, 28(%edi)
+ popl %ebx
+ popl %ebp
+ popl %edi
+ popl %esi
+ ret
+.bn_sqr_comba4_end:
+ SIZE(bn_sqr_comba4,.bn_sqr_comba4_end-bn_sqr_comba4)
+.ident "desasm.pl"
diff --git a/crypto/bn/asm/ca.pl b/crypto/bn/asm/ca.pl
new file mode 100644
index 0000000000..181d1f007e
--- /dev/null
+++ b/crypto/bn/asm/ca.pl
@@ -0,0 +1,33 @@
+#!/usr/local/bin/perl
+# I have this in perl so I can use more usefull register names and then convert
+# them into alpha registers.
+#
+
+push(@INC,"perlasm","../../perlasm");
+require "alpha.pl";
+require "alpha/mul_add.pl";
+require "alpha/mul.pl";
+require "alpha/sqr.pl";
+require "alpha/add.pl";
+require "alpha/sub.pl";
+require "alpha/mul_c8.pl";
+require "alpha/mul_c4.pl";
+require "alpha/sqr_c4.pl";
+require "alpha/sqr_c8.pl";
+require "alpha/div.pl";
+
+&asm_init($ARGV[0],"bn-586.pl");
+
+&bn_mul_words("bn_mul_words");
+&bn_sqr_words("bn_sqr_words");
+&bn_mul_add_words("bn_mul_add_words");
+&bn_add_words("bn_add_words");
+&bn_sub_words("bn_sub_words");
+&bn_div_words("bn_div_words");
+&bn_mul_comba8("bn_mul_comba8");
+&bn_mul_comba4("bn_mul_comba4");
+&bn_sqr_comba4("bn_sqr_comba4");
+&bn_sqr_comba8("bn_sqr_comba8");
+
+&asm_finish();
+
diff --git a/crypto/bn/asm/co-586.pl b/crypto/bn/asm/co-586.pl
new file mode 100644
index 0000000000..0bcb5a6d47
--- /dev/null
+++ b/crypto/bn/asm/co-586.pl
@@ -0,0 +1,286 @@
+#!/usr/local/bin/perl
+
+push(@INC,"perlasm","../../perlasm");
+require "x86asm.pl";
+
+&asm_init($ARGV[0],"bn-586.pl");
+
+&bn_mul_comba("bn_mul_comba8",8);
+&bn_mul_comba("bn_mul_comba4",4);
+&bn_sqr_comba("bn_sqr_comba8",8);
+&bn_sqr_comba("bn_sqr_comba4",4);
+
+&asm_finish();
+
+sub mul_add_c
+ {
+ local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
+
+ # pos == -1 if eax and edx are pre-loaded, 0 to load from next
+ # words, and 1 if load return value
+
+ &comment("mul a[$ai]*b[$bi]");
+
+ # "eax" and "edx" will always be pre-loaded.
+ # &mov("eax",&DWP($ai*4,$a,"",0)) ;
+ # &mov("edx",&DWP($bi*4,$b,"",0));
+
+ &mul("edx");
+ &add($c0,"eax");
+ &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a
+ &mov("eax",&wparam(0)) if $pos > 0; # load r[]
+ ###
+ &adc($c1,"edx");
+ &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b
+ &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b
+ ###
+ &adc($c2,0);
+ # is pos > 1, it means it is the last loop
+ &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
+ &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a
+ }
+
+sub sqr_add_c
+ {
+ local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
+
+ # pos == -1 if eax and edx are pre-loaded, 0 to load from next
+ # words, and 1 if load return value
+
+ &comment("sqr a[$ai]*a[$bi]");
+
+ # "eax" and "edx" will always be pre-loaded.
+ # &mov("eax",&DWP($ai*4,$a,"",0)) ;
+ # &mov("edx",&DWP($bi*4,$b,"",0));
+
+ if ($ai == $bi)
+ { &mul("eax");}
+ else
+ { &mul("edx");}
+ &add($c0,"eax");
+ &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
+ ###
+ &adc($c1,"edx");
+ &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
+ ###
+ &adc($c2,0);
+ # is pos > 1, it means it is the last loop
+ &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
+ &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
+ }
+
+sub sqr_add_c2
+ {
+ local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
+
+ # pos == -1 if eax and edx are pre-loaded, 0 to load from next
+ # words, and 1 if load return value
+
+ &comment("sqr a[$ai]*a[$bi]");
+
+ # "eax" and "edx" will always be pre-loaded.
+ # &mov("eax",&DWP($ai*4,$a,"",0)) ;
+ # &mov("edx",&DWP($bi*4,$a,"",0));
+
+ if ($ai == $bi)
+ { &mul("eax");}
+ else
+ { &mul("edx");}
+ &add("eax","eax");
+ ###
+ &adc("edx","edx");
+ ###
+ &adc($c2,0);
+ &add($c0,"eax");
+ &adc($c1,"edx");
+ &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
+ &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
+ &adc($c2,0);
+ &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
+ &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
+ ###
+ }
+
+sub bn_mul_comba
+ {
+ local($name,$num)=@_;
+ local($a,$b,$c0,$c1,$c2);
+ local($i,$as,$ae,$bs,$be,$ai,$bi);
+ local($tot,$end);
+
+ &function_begin_B($name,"");
+
+ $c0="ebx";
+ $c1="ecx";
+ $c2="ebp";
+ $a="esi";
+ $b="edi";
+
+ $as=0;
+ $ae=0;
+ $bs=0;
+ $be=0;
+ $tot=$num+$num-1;
+
+ &push("esi");
+ &mov($a,&wparam(1));
+ &push("edi");
+ &mov($b,&wparam(2));
+ &push("ebp");
+ &push("ebx");
+
+ &xor($c0,$c0);
+ &mov("eax",&DWP(0,$a,"",0)); # load the first word
+ &xor($c1,$c1);
+ &mov("edx",&DWP(0,$b,"",0)); # load the first second
+
+ for ($i=0; $i<$tot; $i++)
+ {
+ $ai=$as;
+ $bi=$bs;
+ $end=$be+1;
+
+ &comment("################## Calculate word $i");
+
+ for ($j=$bs; $j<$end; $j++)
+ {
+ &xor($c2,$c2) if ($j == $bs);
+ if (($j+1) == $end)
+ {
+ $v=1;
+ $v=2 if (($i+1) == $tot);
+ }
+ else
+ { $v=0; }
+ if (($j+1) != $end)
+ {
+ $na=($ai-1);
+ $nb=($bi+1);
+ }
+ else
+ {
+ $na=$as+($i < ($num-1));
+ $nb=$bs+($i >= ($num-1));
+ }
+#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
+ &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
+ if ($v)
+ {
+ &comment("saved r[$i]");
+ # &mov("eax",&wparam(0));
+ # &mov(&DWP($i*4,"eax","",0),$c0);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ }
+ $ai--;
+ $bi++;
+ }
+ $as++ if ($i < ($num-1));
+ $ae++ if ($i >= ($num-1));
+
+ $bs++ if ($i >= ($num-1));
+ $be++ if ($i < ($num-1));
+ }
+ &comment("save r[$i]");
+ # &mov("eax",&wparam(0));
+ &mov(&DWP($i*4,"eax","",0),$c0);
+
+ &pop("ebx");
+ &pop("ebp");
+ &pop("edi");
+ &pop("esi");
+ &ret();
+ &function_end_B($name);
+ }
+
+sub bn_sqr_comba
+ {
+ local($name,$num)=@_;
+ local($r,$a,$c0,$c1,$c2)=@_;
+ local($i,$as,$ae,$bs,$be,$ai,$bi);
+ local($b,$tot,$end,$half);
+
+ &function_begin_B($name,"");
+
+ $c0="ebx";
+ $c1="ecx";
+ $c2="ebp";
+ $a="esi";
+ $r="edi";
+
+ &push("esi");
+ &push("edi");
+ &push("ebp");
+ &push("ebx");
+ &mov($r,&wparam(0));
+ &mov($a,&wparam(1));
+ &xor($c0,$c0);
+ &xor($c1,$c1);
+ &mov("eax",&DWP(0,$a,"",0)); # load the first word
+
+ $as=0;
+ $ae=0;
+ $bs=0;
+ $be=0;
+ $tot=$num+$num-1;
+
+ for ($i=0; $i<$tot; $i++)
+ {
+ $ai=$as;
+ $bi=$bs;
+ $end=$be+1;
+
+ &comment("############### Calculate word $i");
+ for ($j=$bs; $j<$end; $j++)
+ {
+ &xor($c2,$c2) if ($j == $bs);
+ if (($ai-1) < ($bi+1))
+ {
+ $v=1;
+ $v=2 if ($i+1) == $tot;
+ }
+ else
+ { $v=0; }
+ if (!$v)
+ {
+ $na=$ai-1;
+ $nb=$bi+1;
+ }
+ else
+ {
+ $na=$as+($i < ($num-1));
+ $nb=$bs+($i >= ($num-1));
+ }
+ if ($ai == $bi)
+ {
+ &sqr_add_c($r,$a,$ai,$bi,
+ $c0,$c1,$c2,$v,$i,$na,$nb);
+ }
+ else
+ {
+ &sqr_add_c2($r,$a,$ai,$bi,
+ $c0,$c1,$c2,$v,$i,$na,$nb);
+ }
+ if ($v)
+ {
+ &comment("saved r[$i]");
+ #&mov(&DWP($i*4,$r,"",0),$c0);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ last;
+ }
+ $ai--;
+ $bi++;
+ }
+ $as++ if ($i < ($num-1));
+ $ae++ if ($i >= ($num-1));
+
+ $bs++ if ($i >= ($num-1));
+ $be++ if ($i < ($num-1));
+ }
+ &mov(&DWP($i*4,$r,"",0),$c0);
+ &pop("ebx");
+ &pop("ebp");
+ &pop("edi");
+ &pop("esi");
+ &ret();
+ &function_end_B($name);
+ }
diff --git a/crypto/bn/asm/co-alpha.pl b/crypto/bn/asm/co-alpha.pl
new file mode 100644
index 0000000000..23869a4ef5
--- /dev/null
+++ b/crypto/bn/asm/co-alpha.pl
@@ -0,0 +1,116 @@
+#!/usr/local/bin/perl
+# I have this in perl so I can use more usefull register names and then convert
+# them into alpha registers.
+#
+
+push(@INC,"perlasm","../../perlasm");
+require "alpha.pl";
+
+&asm_init($ARGV[0],"bn-586.pl");
+
+print &bn_sub_words("bn_sub_words");
+
+&asm_finish();
+
+sub bn_sub_words
+ {
+ local($name)=@_;
+ local($cc,$a,$b,$r);
+
+ $cc="r0";
+ $a0="r1"; $b0="r5"; $r0="r9"; $tmp="r13";
+ $a1="r2"; $b1="r6"; $r1="r10"; $t1="r14";
+ $a2="r3"; $b2="r7"; $r2="r11";
+ $a3="r4"; $b3="r8"; $r3="r12"; $t3="r15";
+
+ $rp=&wparam(0);
+ $ap=&wparam(1);
+ $bp=&wparam(2);
+ $count=&wparam(3);
+
+ &function_begin($name,"");
+
+ &comment("");
+ &sub($count,4,$count);
+ &mov("zero",$cc);
+ &blt($count,&label("finish"));
+
+ &ld($a0,&QWPw(0,$ap));
+ &ld($b0,&QWPw(0,$bp));
+
+##########################################################
+ &set_label("loop");
+
+ &ld($a1,&QWPw(1,$ap));
+ &cmpult($a0,$b0,$tmp); # will we borrow?
+ &ld($b1,&QWPw(1,$bp));
+ &sub($a0,$b0,$a0); # do the subtract
+ &ld($a2,&QWPw(2,$ap));
+ &cmpult($a0,$cc,$b0); # will we borrow?
+ &ld($b2,&QWPw(2,$bp));
+ &sub($a0,$cc,$a0); # will we borrow?
+ &ld($a3,&QWPw(3,$ap));
+ &add($b0,$tmp,$cc); # add the borrows
+
+ &cmpult($a1,$b1,$t1); # will we borrow?
+ &sub($a1,$b1,$a1); # do the subtract
+ &ld($b3,&QWPw(3,$bp));
+ &cmpult($a1,$cc,$b1); # will we borrow?
+ &sub($a1,$cc,$a1); # will we borrow?
+ &add($b1,$t1,$cc); # add the borrows
+
+ &cmpult($a2,$b2,$tmp); # will we borrow?
+ &sub($a2,$b2,$a2); # do the subtract
+ &st($a0,&QWPw(0,$rp)); # save
+ &cmpult($a2,$cc,$b2); # will we borrow?
+ &sub($a2,$cc,$a2); # will we borrow?
+ &add($b2,$tmp,$cc); # add the borrows
+
+ &cmpult($a3,$b3,$t3); # will we borrow?
+ &sub($a3,$b3,$a3); # do the subtract
+ &st($a1,&QWPw(1,$rp)); # save
+ &cmpult($a3,$cc,$b3); # will we borrow?
+ &sub($a3,$cc,$a3); # will we borrow?
+ &add($b3,$t3,$cc); # add the borrows
+
+ &st($a2,&QWPw(2,$rp)); # save
+ &sub($count,4,$count); # count-=4
+ &st($a3,&QWPw(3,$rp)); # save
+ &add($ap,4*$QWS,$ap); # count+=4
+ &add($bp,4*$QWS,$bp); # count+=4
+ &add($rp,4*$QWS,$rp); # count+=4
+
+ &blt($count,&label("finish"));
+ &ld($a0,&QWPw(0,$ap));
+ &ld($b0,&QWPw(0,$bp));
+ &br(&label("loop"));
+##################################################
+ # Do the last 0..3 words
+
+ &set_label("last_loop");
+
+ &ld($a0,&QWPw(0,$ap)); # get a
+ &ld($b0,&QWPw(0,$bp)); # get b
+ &cmpult($a0,$b0,$tmp); # will we borrow?
+ &sub($a0,$b0,$a0); # do the subtract
+ &cmpult($a0,$cc,$b0); # will we borrow?
+ &sub($a0,$cc,$a0); # will we borrow?
+ &st($a0,&QWPw(0,$rp)); # save
+ &add($b0,$tmp,$cc); # add the borrows
+
+ &add($ap,$QWS,$ap);
+ &add($bp,$QWS,$bp);
+ &add($rp,$QWS,$rp);
+ &sub($count,1,$count);
+ &bgt($count,&label("last_loop"));
+ &function_end_A($name);
+
+######################################################
+ &set_label("finish");
+ &add($count,4,$count);
+ &bgt($count,&label("last_loop"));
+
+ &set_label("end");
+ &function_end($name);
+ }
+
diff --git a/crypto/bn/asm/co86unix.cpp b/crypto/bn/asm/co86unix.cpp
new file mode 100644
index 0000000000..fa80b14046
--- /dev/null
+++ b/crypto/bn/asm/co86unix.cpp
@@ -0,0 +1,1315 @@
+/* Run the C pre-processor over this file with one of the following defined
+ * ELF - elf object files,
+ * OUT - a.out object files,
+ * BSDI - BSDI style a.out object files
+ * SOL - Solaris style elf
+ */
+
+#define TYPE(a,b) .type a,b
+#define SIZE(a,b) .size a,b
+
+#if defined(OUT) || defined(BSDI)
+#define bn_mul_comba8 _bn_mul_comba8
+#define bn_mul_comba4 _bn_mul_comba4
+#define bn_sqr_comba8 _bn_sqr_comba8
+#define bn_sqr_comba4 _bn_sqr_comba4
+
+#endif
+
+#ifdef OUT
+#define OK 1
+#define ALIGN 4
+#endif
+
+#ifdef BSDI
+#define OK 1
+#define ALIGN 4
+#undef SIZE
+#undef TYPE
+#define SIZE(a,b)
+#define TYPE(a,b)
+#endif
+
+#if defined(ELF) || defined(SOL)
+#define OK 1
+#define ALIGN 16
+#endif
+
+#ifndef OK
+You need to define one of
+ELF - elf systems - linux-elf, NetBSD and DG-UX
+OUT - a.out systems - linux-a.out and FreeBSD
+SOL - solaris systems, which are elf with strange comment lines
+BSDI - a.out with a very primative version of as.
+#endif
+
+/* Let the Assembler begin :-) */
+ /* Don't even think of reading this code */
+ /* It was automatically generated by bn-586.pl */
+ /* Which is a perl program used to generate the x86 assember for */
+ /* any of elf, a.out, BSDI,Win32, or Solaris */
+ /* eric <eay@cryptsoft.com> */
+
+ .file "bn-586.s"
+ .version "01.01"
+gcc2_compiled.:
+.text
+ .align ALIGN
+.globl bn_mul_comba8
+ TYPE(bn_mul_comba8,@function)
+bn_mul_comba8:
+ pushl %esi
+ movl 12(%esp), %esi
+ pushl %edi
+ movl 20(%esp), %edi
+ pushl %ebp
+ pushl %ebx
+ xorl %ebx, %ebx
+ movl (%esi), %eax
+ xorl %ecx, %ecx
+ movl (%edi), %edx
+ /* ################## Calculate word 0 */
+ xorl %ebp, %ebp
+ /* mul a[0]*b[0] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl (%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, (%eax)
+ movl 4(%esi), %eax
+ /* saved r[0] */
+ /* ################## Calculate word 1 */
+ xorl %ebx, %ebx
+ /* mul a[1]*b[0] */
+ mull %edx
+ addl %eax, %ecx
+ movl (%esi), %eax
+ adcl %edx, %ebp
+ movl 4(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[0]*b[1] */
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl (%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 4(%eax)
+ movl 8(%esi), %eax
+ /* saved r[1] */
+ /* ################## Calculate word 2 */
+ xorl %ecx, %ecx
+ /* mul a[2]*b[0] */
+ mull %edx
+ addl %eax, %ebp
+ movl 4(%esi), %eax
+ adcl %edx, %ebx
+ movl 4(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[1]*b[1] */
+ mull %edx
+ addl %eax, %ebp
+ movl (%esi), %eax
+ adcl %edx, %ebx
+ movl 8(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[0]*b[2] */
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl (%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 8(%eax)
+ movl 12(%esi), %eax
+ /* saved r[2] */
+ /* ################## Calculate word 3 */
+ xorl %ebp, %ebp
+ /* mul a[3]*b[0] */
+ mull %edx
+ addl %eax, %ebx
+ movl 8(%esi), %eax
+ adcl %edx, %ecx
+ movl 4(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[2]*b[1] */
+ mull %edx
+ addl %eax, %ebx
+ movl 4(%esi), %eax
+ adcl %edx, %ecx
+ movl 8(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[1]*b[2] */
+ mull %edx
+ addl %eax, %ebx
+ movl (%esi), %eax
+ adcl %edx, %ecx
+ movl 12(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[0]*b[3] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl (%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, 12(%eax)
+ movl 16(%esi), %eax
+ /* saved r[3] */
+ /* ################## Calculate word 4 */
+ xorl %ebx, %ebx
+ /* mul a[4]*b[0] */
+ mull %edx
+ addl %eax, %ecx
+ movl 12(%esi), %eax
+ adcl %edx, %ebp
+ movl 4(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[3]*b[1] */
+ mull %edx
+ addl %eax, %ecx
+ movl 8(%esi), %eax
+ adcl %edx, %ebp
+ movl 8(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[2]*b[2] */
+ mull %edx
+ addl %eax, %ecx
+ movl 4(%esi), %eax
+ adcl %edx, %ebp
+ movl 12(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[1]*b[3] */
+ mull %edx
+ addl %eax, %ecx
+ movl (%esi), %eax
+ adcl %edx, %ebp
+ movl 16(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[0]*b[4] */
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl (%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 16(%eax)
+ movl 20(%esi), %eax
+ /* saved r[4] */
+ /* ################## Calculate word 5 */
+ xorl %ecx, %ecx
+ /* mul a[5]*b[0] */
+ mull %edx
+ addl %eax, %ebp
+ movl 16(%esi), %eax
+ adcl %edx, %ebx
+ movl 4(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[4]*b[1] */
+ mull %edx
+ addl %eax, %ebp
+ movl 12(%esi), %eax
+ adcl %edx, %ebx
+ movl 8(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[3]*b[2] */
+ mull %edx
+ addl %eax, %ebp
+ movl 8(%esi), %eax
+ adcl %edx, %ebx
+ movl 12(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[2]*b[3] */
+ mull %edx
+ addl %eax, %ebp
+ movl 4(%esi), %eax
+ adcl %edx, %ebx
+ movl 16(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[1]*b[4] */
+ mull %edx
+ addl %eax, %ebp
+ movl (%esi), %eax
+ adcl %edx, %ebx
+ movl 20(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[0]*b[5] */
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl (%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 20(%eax)
+ movl 24(%esi), %eax
+ /* saved r[5] */
+ /* ################## Calculate word 6 */
+ xorl %ebp, %ebp
+ /* mul a[6]*b[0] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esi), %eax
+ adcl %edx, %ecx
+ movl 4(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[5]*b[1] */
+ mull %edx
+ addl %eax, %ebx
+ movl 16(%esi), %eax
+ adcl %edx, %ecx
+ movl 8(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[4]*b[2] */
+ mull %edx
+ addl %eax, %ebx
+ movl 12(%esi), %eax
+ adcl %edx, %ecx
+ movl 12(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[3]*b[3] */
+ mull %edx
+ addl %eax, %ebx
+ movl 8(%esi), %eax
+ adcl %edx, %ecx
+ movl 16(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[2]*b[4] */
+ mull %edx
+ addl %eax, %ebx
+ movl 4(%esi), %eax
+ adcl %edx, %ecx
+ movl 20(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[1]*b[5] */
+ mull %edx
+ addl %eax, %ebx
+ movl (%esi), %eax
+ adcl %edx, %ecx
+ movl 24(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[0]*b[6] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl (%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, 24(%eax)
+ movl 28(%esi), %eax
+ /* saved r[6] */
+ /* ################## Calculate word 7 */
+ xorl %ebx, %ebx
+ /* mul a[7]*b[0] */
+ mull %edx
+ addl %eax, %ecx
+ movl 24(%esi), %eax
+ adcl %edx, %ebp
+ movl 4(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[6]*b[1] */
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esi), %eax
+ adcl %edx, %ebp
+ movl 8(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[5]*b[2] */
+ mull %edx
+ addl %eax, %ecx
+ movl 16(%esi), %eax
+ adcl %edx, %ebp
+ movl 12(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[4]*b[3] */
+ mull %edx
+ addl %eax, %ecx
+ movl 12(%esi), %eax
+ adcl %edx, %ebp
+ movl 16(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[3]*b[4] */
+ mull %edx
+ addl %eax, %ecx
+ movl 8(%esi), %eax
+ adcl %edx, %ebp
+ movl 20(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[2]*b[5] */
+ mull %edx
+ addl %eax, %ecx
+ movl 4(%esi), %eax
+ adcl %edx, %ebp
+ movl 24(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[1]*b[6] */
+ mull %edx
+ addl %eax, %ecx
+ movl (%esi), %eax
+ adcl %edx, %ebp
+ movl 28(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[0]*b[7] */
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl 4(%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 28(%eax)
+ movl 28(%esi), %eax
+ /* saved r[7] */
+ /* ################## Calculate word 8 */
+ xorl %ecx, %ecx
+ /* mul a[7]*b[1] */
+ mull %edx
+ addl %eax, %ebp
+ movl 24(%esi), %eax
+ adcl %edx, %ebx
+ movl 8(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[6]*b[2] */
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esi), %eax
+ adcl %edx, %ebx
+ movl 12(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[5]*b[3] */
+ mull %edx
+ addl %eax, %ebp
+ movl 16(%esi), %eax
+ adcl %edx, %ebx
+ movl 16(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[4]*b[4] */
+ mull %edx
+ addl %eax, %ebp
+ movl 12(%esi), %eax
+ adcl %edx, %ebx
+ movl 20(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[3]*b[5] */
+ mull %edx
+ addl %eax, %ebp
+ movl 8(%esi), %eax
+ adcl %edx, %ebx
+ movl 24(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[2]*b[6] */
+ mull %edx
+ addl %eax, %ebp
+ movl 4(%esi), %eax
+ adcl %edx, %ebx
+ movl 28(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[1]*b[7] */
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl 8(%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 32(%eax)
+ movl 28(%esi), %eax
+ /* saved r[8] */
+ /* ################## Calculate word 9 */
+ xorl %ebp, %ebp
+ /* mul a[7]*b[2] */
+ mull %edx
+ addl %eax, %ebx
+ movl 24(%esi), %eax
+ adcl %edx, %ecx
+ movl 12(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[6]*b[3] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esi), %eax
+ adcl %edx, %ecx
+ movl 16(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[5]*b[4] */
+ mull %edx
+ addl %eax, %ebx
+ movl 16(%esi), %eax
+ adcl %edx, %ecx
+ movl 20(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[4]*b[5] */
+ mull %edx
+ addl %eax, %ebx
+ movl 12(%esi), %eax
+ adcl %edx, %ecx
+ movl 24(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[3]*b[6] */
+ mull %edx
+ addl %eax, %ebx
+ movl 8(%esi), %eax
+ adcl %edx, %ecx
+ movl 28(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[2]*b[7] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl 12(%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, 36(%eax)
+ movl 28(%esi), %eax
+ /* saved r[9] */
+ /* ################## Calculate word 10 */
+ xorl %ebx, %ebx
+ /* mul a[7]*b[3] */
+ mull %edx
+ addl %eax, %ecx
+ movl 24(%esi), %eax
+ adcl %edx, %ebp
+ movl 16(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[6]*b[4] */
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esi), %eax
+ adcl %edx, %ebp
+ movl 20(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[5]*b[5] */
+ mull %edx
+ addl %eax, %ecx
+ movl 16(%esi), %eax
+ adcl %edx, %ebp
+ movl 24(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[4]*b[6] */
+ mull %edx
+ addl %eax, %ecx
+ movl 12(%esi), %eax
+ adcl %edx, %ebp
+ movl 28(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[3]*b[7] */
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl 16(%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 40(%eax)
+ movl 28(%esi), %eax
+ /* saved r[10] */
+ /* ################## Calculate word 11 */
+ xorl %ecx, %ecx
+ /* mul a[7]*b[4] */
+ mull %edx
+ addl %eax, %ebp
+ movl 24(%esi), %eax
+ adcl %edx, %ebx
+ movl 20(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[6]*b[5] */
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esi), %eax
+ adcl %edx, %ebx
+ movl 24(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[5]*b[6] */
+ mull %edx
+ addl %eax, %ebp
+ movl 16(%esi), %eax
+ adcl %edx, %ebx
+ movl 28(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[4]*b[7] */
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl 20(%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 44(%eax)
+ movl 28(%esi), %eax
+ /* saved r[11] */
+ /* ################## Calculate word 12 */
+ xorl %ebp, %ebp
+ /* mul a[7]*b[5] */
+ mull %edx
+ addl %eax, %ebx
+ movl 24(%esi), %eax
+ adcl %edx, %ecx
+ movl 24(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[6]*b[6] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esi), %eax
+ adcl %edx, %ecx
+ movl 28(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[5]*b[7] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl 24(%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, 48(%eax)
+ movl 28(%esi), %eax
+ /* saved r[12] */
+ /* ################## Calculate word 13 */
+ xorl %ebx, %ebx
+ /* mul a[7]*b[6] */
+ mull %edx
+ addl %eax, %ecx
+ movl 24(%esi), %eax
+ adcl %edx, %ebp
+ movl 28(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[6]*b[7] */
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl 28(%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 52(%eax)
+ movl 28(%esi), %eax
+ /* saved r[13] */
+ /* ################## Calculate word 14 */
+ xorl %ecx, %ecx
+ /* mul a[7]*b[7] */
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ adcl $0, %ecx
+ movl %ebp, 56(%eax)
+ /* saved r[14] */
+ /* save r[15] */
+ movl %ebx, 60(%eax)
+ popl %ebx
+ popl %ebp
+ popl %edi
+ popl %esi
+ ret
+.bn_mul_comba8_end:
+ SIZE(bn_mul_comba8,.bn_mul_comba8_end-bn_mul_comba8)
+.ident "desasm.pl"
+.text
+ .align ALIGN
+.globl bn_mul_comba4
+ TYPE(bn_mul_comba4,@function)
+bn_mul_comba4:
+ pushl %esi
+ movl 12(%esp), %esi
+ pushl %edi
+ movl 20(%esp), %edi
+ pushl %ebp
+ pushl %ebx
+ xorl %ebx, %ebx
+ movl (%esi), %eax
+ xorl %ecx, %ecx
+ movl (%edi), %edx
+ /* ################## Calculate word 0 */
+ xorl %ebp, %ebp
+ /* mul a[0]*b[0] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl (%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, (%eax)
+ movl 4(%esi), %eax
+ /* saved r[0] */
+ /* ################## Calculate word 1 */
+ xorl %ebx, %ebx
+ /* mul a[1]*b[0] */
+ mull %edx
+ addl %eax, %ecx
+ movl (%esi), %eax
+ adcl %edx, %ebp
+ movl 4(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[0]*b[1] */
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl (%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 4(%eax)
+ movl 8(%esi), %eax
+ /* saved r[1] */
+ /* ################## Calculate word 2 */
+ xorl %ecx, %ecx
+ /* mul a[2]*b[0] */
+ mull %edx
+ addl %eax, %ebp
+ movl 4(%esi), %eax
+ adcl %edx, %ebx
+ movl 4(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[1]*b[1] */
+ mull %edx
+ addl %eax, %ebp
+ movl (%esi), %eax
+ adcl %edx, %ebx
+ movl 8(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[0]*b[2] */
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl (%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 8(%eax)
+ movl 12(%esi), %eax
+ /* saved r[2] */
+ /* ################## Calculate word 3 */
+ xorl %ebp, %ebp
+ /* mul a[3]*b[0] */
+ mull %edx
+ addl %eax, %ebx
+ movl 8(%esi), %eax
+ adcl %edx, %ecx
+ movl 4(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[2]*b[1] */
+ mull %edx
+ addl %eax, %ebx
+ movl 4(%esi), %eax
+ adcl %edx, %ecx
+ movl 8(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[1]*b[2] */
+ mull %edx
+ addl %eax, %ebx
+ movl (%esi), %eax
+ adcl %edx, %ecx
+ movl 12(%edi), %edx
+ adcl $0, %ebp
+ /* mul a[0]*b[3] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl 4(%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, 12(%eax)
+ movl 12(%esi), %eax
+ /* saved r[3] */
+ /* ################## Calculate word 4 */
+ xorl %ebx, %ebx
+ /* mul a[3]*b[1] */
+ mull %edx
+ addl %eax, %ecx
+ movl 8(%esi), %eax
+ adcl %edx, %ebp
+ movl 8(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[2]*b[2] */
+ mull %edx
+ addl %eax, %ecx
+ movl 4(%esi), %eax
+ adcl %edx, %ebp
+ movl 12(%edi), %edx
+ adcl $0, %ebx
+ /* mul a[1]*b[3] */
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl 8(%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 16(%eax)
+ movl 12(%esi), %eax
+ /* saved r[4] */
+ /* ################## Calculate word 5 */
+ xorl %ecx, %ecx
+ /* mul a[3]*b[2] */
+ mull %edx
+ addl %eax, %ebp
+ movl 8(%esi), %eax
+ adcl %edx, %ebx
+ movl 12(%edi), %edx
+ adcl $0, %ecx
+ /* mul a[2]*b[3] */
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl 12(%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 20(%eax)
+ movl 12(%esi), %eax
+ /* saved r[5] */
+ /* ################## Calculate word 6 */
+ xorl %ebp, %ebp
+ /* mul a[3]*b[3] */
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ adcl $0, %ebp
+ movl %ebx, 24(%eax)
+ /* saved r[6] */
+ /* save r[7] */
+ movl %ecx, 28(%eax)
+ popl %ebx
+ popl %ebp
+ popl %edi
+ popl %esi
+ ret
+.bn_mul_comba4_end:
+ SIZE(bn_mul_comba4,.bn_mul_comba4_end-bn_mul_comba4)
+.ident "desasm.pl"
+.text
+ .align ALIGN
+.globl bn_sqr_comba8
+ TYPE(bn_sqr_comba8,@function)
+bn_sqr_comba8:
+ pushl %esi
+ pushl %edi
+ pushl %ebp
+ pushl %ebx
+ movl 20(%esp), %edi
+ movl 24(%esp), %esi
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ movl (%esi), %eax
+ /* ############### Calculate word 0 */
+ xorl %ebp, %ebp
+ /* sqr a[0]*a[0] */
+ mull %eax
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl (%esi), %edx
+ adcl $0, %ebp
+ movl %ebx, (%edi)
+ movl 4(%esi), %eax
+ /* saved r[0] */
+ /* ############### Calculate word 1 */
+ xorl %ebx, %ebx
+ /* sqr a[1]*a[0] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 8(%esi), %eax
+ adcl $0, %ebx
+ movl %ecx, 4(%edi)
+ movl (%esi), %edx
+ /* saved r[1] */
+ /* ############### Calculate word 2 */
+ xorl %ecx, %ecx
+ /* sqr a[2]*a[0] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 4(%esi), %eax
+ adcl $0, %ecx
+ /* sqr a[1]*a[1] */
+ mull %eax
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl (%esi), %edx
+ adcl $0, %ecx
+ movl %ebp, 8(%edi)
+ movl 12(%esi), %eax
+ /* saved r[2] */
+ /* ############### Calculate word 3 */
+ xorl %ebp, %ebp
+ /* sqr a[3]*a[0] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 8(%esi), %eax
+ adcl $0, %ebp
+ movl 4(%esi), %edx
+ /* sqr a[2]*a[1] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 16(%esi), %eax
+ adcl $0, %ebp
+ movl %ebx, 12(%edi)
+ movl (%esi), %edx
+ /* saved r[3] */
+ /* ############### Calculate word 4 */
+ xorl %ebx, %ebx
+ /* sqr a[4]*a[0] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 12(%esi), %eax
+ adcl $0, %ebx
+ movl 4(%esi), %edx
+ /* sqr a[3]*a[1] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 8(%esi), %eax
+ adcl $0, %ebx
+ /* sqr a[2]*a[2] */
+ mull %eax
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl (%esi), %edx
+ adcl $0, %ebx
+ movl %ecx, 16(%edi)
+ movl 20(%esi), %eax
+ /* saved r[4] */
+ /* ############### Calculate word 5 */
+ xorl %ecx, %ecx
+ /* sqr a[5]*a[0] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 16(%esi), %eax
+ adcl $0, %ecx
+ movl 4(%esi), %edx
+ /* sqr a[4]*a[1] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 12(%esi), %eax
+ adcl $0, %ecx
+ movl 8(%esi), %edx
+ /* sqr a[3]*a[2] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 24(%esi), %eax
+ adcl $0, %ecx
+ movl %ebp, 20(%edi)
+ movl (%esi), %edx
+ /* saved r[5] */
+ /* ############### Calculate word 6 */
+ xorl %ebp, %ebp
+ /* sqr a[6]*a[0] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 20(%esi), %eax
+ adcl $0, %ebp
+ movl 4(%esi), %edx
+ /* sqr a[5]*a[1] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 16(%esi), %eax
+ adcl $0, %ebp
+ movl 8(%esi), %edx
+ /* sqr a[4]*a[2] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 12(%esi), %eax
+ adcl $0, %ebp
+ /* sqr a[3]*a[3] */
+ mull %eax
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl (%esi), %edx
+ adcl $0, %ebp
+ movl %ebx, 24(%edi)
+ movl 28(%esi), %eax
+ /* saved r[6] */
+ /* ############### Calculate word 7 */
+ xorl %ebx, %ebx
+ /* sqr a[7]*a[0] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 24(%esi), %eax
+ adcl $0, %ebx
+ movl 4(%esi), %edx
+ /* sqr a[6]*a[1] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 20(%esi), %eax
+ adcl $0, %ebx
+ movl 8(%esi), %edx
+ /* sqr a[5]*a[2] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 16(%esi), %eax
+ adcl $0, %ebx
+ movl 12(%esi), %edx
+ /* sqr a[4]*a[3] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 28(%esi), %eax
+ adcl $0, %ebx
+ movl %ecx, 28(%edi)
+ movl 4(%esi), %edx
+ /* saved r[7] */
+ /* ############### Calculate word 8 */
+ xorl %ecx, %ecx
+ /* sqr a[7]*a[1] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 24(%esi), %eax
+ adcl $0, %ecx
+ movl 8(%esi), %edx
+ /* sqr a[6]*a[2] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 20(%esi), %eax
+ adcl $0, %ecx
+ movl 12(%esi), %edx
+ /* sqr a[5]*a[3] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 16(%esi), %eax
+ adcl $0, %ecx
+ /* sqr a[4]*a[4] */
+ mull %eax
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 8(%esi), %edx
+ adcl $0, %ecx
+ movl %ebp, 32(%edi)
+ movl 28(%esi), %eax
+ /* saved r[8] */
+ /* ############### Calculate word 9 */
+ xorl %ebp, %ebp
+ /* sqr a[7]*a[2] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 24(%esi), %eax
+ adcl $0, %ebp
+ movl 12(%esi), %edx
+ /* sqr a[6]*a[3] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 20(%esi), %eax
+ adcl $0, %ebp
+ movl 16(%esi), %edx
+ /* sqr a[5]*a[4] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 28(%esi), %eax
+ adcl $0, %ebp
+ movl %ebx, 36(%edi)
+ movl 12(%esi), %edx
+ /* saved r[9] */
+ /* ############### Calculate word 10 */
+ xorl %ebx, %ebx
+ /* sqr a[7]*a[3] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 24(%esi), %eax
+ adcl $0, %ebx
+ movl 16(%esi), %edx
+ /* sqr a[6]*a[4] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 20(%esi), %eax
+ adcl $0, %ebx
+ /* sqr a[5]*a[5] */
+ mull %eax
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 16(%esi), %edx
+ adcl $0, %ebx
+ movl %ecx, 40(%edi)
+ movl 28(%esi), %eax
+ /* saved r[10] */
+ /* ############### Calculate word 11 */
+ xorl %ecx, %ecx
+ /* sqr a[7]*a[4] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 24(%esi), %eax
+ adcl $0, %ecx
+ movl 20(%esi), %edx
+ /* sqr a[6]*a[5] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 28(%esi), %eax
+ adcl $0, %ecx
+ movl %ebp, 44(%edi)
+ movl 20(%esi), %edx
+ /* saved r[11] */
+ /* ############### Calculate word 12 */
+ xorl %ebp, %ebp
+ /* sqr a[7]*a[5] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 24(%esi), %eax
+ adcl $0, %ebp
+ /* sqr a[6]*a[6] */
+ mull %eax
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 24(%esi), %edx
+ adcl $0, %ebp
+ movl %ebx, 48(%edi)
+ movl 28(%esi), %eax
+ /* saved r[12] */
+ /* ############### Calculate word 13 */
+ xorl %ebx, %ebx
+ /* sqr a[7]*a[6] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 28(%esi), %eax
+ adcl $0, %ebx
+ movl %ecx, 52(%edi)
+ /* saved r[13] */
+ /* ############### Calculate word 14 */
+ xorl %ecx, %ecx
+ /* sqr a[7]*a[7] */
+ mull %eax
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ adcl $0, %ecx
+ movl %ebp, 56(%edi)
+ /* saved r[14] */
+ movl %ebx, 60(%edi)
+ popl %ebx
+ popl %ebp
+ popl %edi
+ popl %esi
+ ret
+.bn_sqr_comba8_end:
+ SIZE(bn_sqr_comba8,.bn_sqr_comba8_end-bn_sqr_comba8)
+.ident "desasm.pl"
+.text
+ .align ALIGN
+.globl bn_sqr_comba4
+ TYPE(bn_sqr_comba4,@function)
+bn_sqr_comba4:
+ pushl %esi
+ pushl %edi
+ pushl %ebp
+ pushl %ebx
+ movl 20(%esp), %edi
+ movl 24(%esp), %esi
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ movl (%esi), %eax
+ /* ############### Calculate word 0 */
+ xorl %ebp, %ebp
+ /* sqr a[0]*a[0] */
+ mull %eax
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl (%esi), %edx
+ adcl $0, %ebp
+ movl %ebx, (%edi)
+ movl 4(%esi), %eax
+ /* saved r[0] */
+ /* ############### Calculate word 1 */
+ xorl %ebx, %ebx
+ /* sqr a[1]*a[0] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 8(%esi), %eax
+ adcl $0, %ebx
+ movl %ecx, 4(%edi)
+ movl (%esi), %edx
+ /* saved r[1] */
+ /* ############### Calculate word 2 */
+ xorl %ecx, %ecx
+ /* sqr a[2]*a[0] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 4(%esi), %eax
+ adcl $0, %ecx
+ /* sqr a[1]*a[1] */
+ mull %eax
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl (%esi), %edx
+ adcl $0, %ecx
+ movl %ebp, 8(%edi)
+ movl 12(%esi), %eax
+ /* saved r[2] */
+ /* ############### Calculate word 3 */
+ xorl %ebp, %ebp
+ /* sqr a[3]*a[0] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 8(%esi), %eax
+ adcl $0, %ebp
+ movl 4(%esi), %edx
+ /* sqr a[2]*a[1] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 12(%esi), %eax
+ adcl $0, %ebp
+ movl %ebx, 12(%edi)
+ movl 4(%esi), %edx
+ /* saved r[3] */
+ /* ############### Calculate word 4 */
+ xorl %ebx, %ebx
+ /* sqr a[3]*a[1] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 8(%esi), %eax
+ adcl $0, %ebx
+ /* sqr a[2]*a[2] */
+ mull %eax
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 8(%esi), %edx
+ adcl $0, %ebx
+ movl %ecx, 16(%edi)
+ movl 12(%esi), %eax
+ /* saved r[4] */
+ /* ############### Calculate word 5 */
+ xorl %ecx, %ecx
+ /* sqr a[3]*a[2] */
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 12(%esi), %eax
+ adcl $0, %ecx
+ movl %ebp, 20(%edi)
+ /* saved r[5] */
+ /* ############### Calculate word 6 */
+ xorl %ebp, %ebp
+ /* sqr a[3]*a[3] */
+ mull %eax
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ adcl $0, %ebp
+ movl %ebx, 24(%edi)
+ /* saved r[6] */
+ movl %ecx, 28(%edi)
+ popl %ebx
+ popl %ebp
+ popl %edi
+ popl %esi
+ ret
+.bn_sqr_comba4_end:
+ SIZE(bn_sqr_comba4,.bn_sqr_comba4_end-bn_sqr_comba4)
+.ident "desasm.pl"
diff --git a/crypto/bn/asm/elf.s b/crypto/bn/asm/elf.s
new file mode 100644
index 0000000000..97ad1264db
--- /dev/null
+++ b/crypto/bn/asm/elf.s
@@ -0,0 +1,1269 @@
+ # Don't even think of reading this code
+ # It was automatically generated by bn-586.pl
+ # Which is a perl program used to generate the x86 assember for
+ # any of elf, a.out, BSDI,Win32, or Solaris
+ # eric <eay@cryptsoft.com>
+
+ .file "bn-586.s"
+ .version "01.01"
+gcc2_compiled.:
+.text
+ .align 16
+.globl bn_mul_comba8
+ .type bn_mul_comba8,@function
+bn_mul_comba8:
+ pushl %esi
+ movl 12(%esp), %esi
+ pushl %edi
+ movl 20(%esp), %edi
+ pushl %ebp
+ pushl %ebx
+ xorl %ebx, %ebx
+ movl (%esi), %eax
+ xorl %ecx, %ecx
+ movl (%edi), %edx
+ # ################## Calculate word 0
+ xorl %ebp, %ebp
+ # mul a[0]*b[0]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl (%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, (%eax)
+ movl 4(%esi), %eax
+ # saved r[0]
+ # ################## Calculate word 1
+ xorl %ebx, %ebx
+ # mul a[1]*b[0]
+ mull %edx
+ addl %eax, %ecx
+ movl (%esi), %eax
+ adcl %edx, %ebp
+ movl 4(%edi), %edx
+ adcl $0, %ebx
+ # mul a[0]*b[1]
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl (%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 4(%eax)
+ movl 8(%esi), %eax
+ # saved r[1]
+ # ################## Calculate word 2
+ xorl %ecx, %ecx
+ # mul a[2]*b[0]
+ mull %edx
+ addl %eax, %ebp
+ movl 4(%esi), %eax
+ adcl %edx, %ebx
+ movl 4(%edi), %edx
+ adcl $0, %ecx
+ # mul a[1]*b[1]
+ mull %edx
+ addl %eax, %ebp
+ movl (%esi), %eax
+ adcl %edx, %ebx
+ movl 8(%edi), %edx
+ adcl $0, %ecx
+ # mul a[0]*b[2]
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl (%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 8(%eax)
+ movl 12(%esi), %eax
+ # saved r[2]
+ # ################## Calculate word 3
+ xorl %ebp, %ebp
+ # mul a[3]*b[0]
+ mull %edx
+ addl %eax, %ebx
+ movl 8(%esi), %eax
+ adcl %edx, %ecx
+ movl 4(%edi), %edx
+ adcl $0, %ebp
+ # mul a[2]*b[1]
+ mull %edx
+ addl %eax, %ebx
+ movl 4(%esi), %eax
+ adcl %edx, %ecx
+ movl 8(%edi), %edx
+ adcl $0, %ebp
+ # mul a[1]*b[2]
+ mull %edx
+ addl %eax, %ebx
+ movl (%esi), %eax
+ adcl %edx, %ecx
+ movl 12(%edi), %edx
+ adcl $0, %ebp
+ # mul a[0]*b[3]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl (%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, 12(%eax)
+ movl 16(%esi), %eax
+ # saved r[3]
+ # ################## Calculate word 4
+ xorl %ebx, %ebx
+ # mul a[4]*b[0]
+ mull %edx
+ addl %eax, %ecx
+ movl 12(%esi), %eax
+ adcl %edx, %ebp
+ movl 4(%edi), %edx
+ adcl $0, %ebx
+ # mul a[3]*b[1]
+ mull %edx
+ addl %eax, %ecx
+ movl 8(%esi), %eax
+ adcl %edx, %ebp
+ movl 8(%edi), %edx
+ adcl $0, %ebx
+ # mul a[2]*b[2]
+ mull %edx
+ addl %eax, %ecx
+ movl 4(%esi), %eax
+ adcl %edx, %ebp
+ movl 12(%edi), %edx
+ adcl $0, %ebx
+ # mul a[1]*b[3]
+ mull %edx
+ addl %eax, %ecx
+ movl (%esi), %eax
+ adcl %edx, %ebp
+ movl 16(%edi), %edx
+ adcl $0, %ebx
+ # mul a[0]*b[4]
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl (%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 16(%eax)
+ movl 20(%esi), %eax
+ # saved r[4]
+ # ################## Calculate word 5
+ xorl %ecx, %ecx
+ # mul a[5]*b[0]
+ mull %edx
+ addl %eax, %ebp
+ movl 16(%esi), %eax
+ adcl %edx, %ebx
+ movl 4(%edi), %edx
+ adcl $0, %ecx
+ # mul a[4]*b[1]
+ mull %edx
+ addl %eax, %ebp
+ movl 12(%esi), %eax
+ adcl %edx, %ebx
+ movl 8(%edi), %edx
+ adcl $0, %ecx
+ # mul a[3]*b[2]
+ mull %edx
+ addl %eax, %ebp
+ movl 8(%esi), %eax
+ adcl %edx, %ebx
+ movl 12(%edi), %edx
+ adcl $0, %ecx
+ # mul a[2]*b[3]
+ mull %edx
+ addl %eax, %ebp
+ movl 4(%esi), %eax
+ adcl %edx, %ebx
+ movl 16(%edi), %edx
+ adcl $0, %ecx
+ # mul a[1]*b[4]
+ mull %edx
+ addl %eax, %ebp
+ movl (%esi), %eax
+ adcl %edx, %ebx
+ movl 20(%edi), %edx
+ adcl $0, %ecx
+ # mul a[0]*b[5]
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl (%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 20(%eax)
+ movl 24(%esi), %eax
+ # saved r[5]
+ # ################## Calculate word 6
+ xorl %ebp, %ebp
+ # mul a[6]*b[0]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esi), %eax
+ adcl %edx, %ecx
+ movl 4(%edi), %edx
+ adcl $0, %ebp
+ # mul a[5]*b[1]
+ mull %edx
+ addl %eax, %ebx
+ movl 16(%esi), %eax
+ adcl %edx, %ecx
+ movl 8(%edi), %edx
+ adcl $0, %ebp
+ # mul a[4]*b[2]
+ mull %edx
+ addl %eax, %ebx
+ movl 12(%esi), %eax
+ adcl %edx, %ecx
+ movl 12(%edi), %edx
+ adcl $0, %ebp
+ # mul a[3]*b[3]
+ mull %edx
+ addl %eax, %ebx
+ movl 8(%esi), %eax
+ adcl %edx, %ecx
+ movl 16(%edi), %edx
+ adcl $0, %ebp
+ # mul a[2]*b[4]
+ mull %edx
+ addl %eax, %ebx
+ movl 4(%esi), %eax
+ adcl %edx, %ecx
+ movl 20(%edi), %edx
+ adcl $0, %ebp
+ # mul a[1]*b[5]
+ mull %edx
+ addl %eax, %ebx
+ movl (%esi), %eax
+ adcl %edx, %ecx
+ movl 24(%edi), %edx
+ adcl $0, %ebp
+ # mul a[0]*b[6]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl (%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, 24(%eax)
+ movl 28(%esi), %eax
+ # saved r[6]
+ # ################## Calculate word 7
+ xorl %ebx, %ebx
+ # mul a[7]*b[0]
+ mull %edx
+ addl %eax, %ecx
+ movl 24(%esi), %eax
+ adcl %edx, %ebp
+ movl 4(%edi), %edx
+ adcl $0, %ebx
+ # mul a[6]*b[1]
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esi), %eax
+ adcl %edx, %ebp
+ movl 8(%edi), %edx
+ adcl $0, %ebx
+ # mul a[5]*b[2]
+ mull %edx
+ addl %eax, %ecx
+ movl 16(%esi), %eax
+ adcl %edx, %ebp
+ movl 12(%edi), %edx
+ adcl $0, %ebx
+ # mul a[4]*b[3]
+ mull %edx
+ addl %eax, %ecx
+ movl 12(%esi), %eax
+ adcl %edx, %ebp
+ movl 16(%edi), %edx
+ adcl $0, %ebx
+ # mul a[3]*b[4]
+ mull %edx
+ addl %eax, %ecx
+ movl 8(%esi), %eax
+ adcl %edx, %ebp
+ movl 20(%edi), %edx
+ adcl $0, %ebx
+ # mul a[2]*b[5]
+ mull %edx
+ addl %eax, %ecx
+ movl 4(%esi), %eax
+ adcl %edx, %ebp
+ movl 24(%edi), %edx
+ adcl $0, %ebx
+ # mul a[1]*b[6]
+ mull %edx
+ addl %eax, %ecx
+ movl (%esi), %eax
+ adcl %edx, %ebp
+ movl 28(%edi), %edx
+ adcl $0, %ebx
+ # mul a[0]*b[7]
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl 4(%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 28(%eax)
+ movl 28(%esi), %eax
+ # saved r[7]
+ # ################## Calculate word 8
+ xorl %ecx, %ecx
+ # mul a[7]*b[1]
+ mull %edx
+ addl %eax, %ebp
+ movl 24(%esi), %eax
+ adcl %edx, %ebx
+ movl 8(%edi), %edx
+ adcl $0, %ecx
+ # mul a[6]*b[2]
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esi), %eax
+ adcl %edx, %ebx
+ movl 12(%edi), %edx
+ adcl $0, %ecx
+ # mul a[5]*b[3]
+ mull %edx
+ addl %eax, %ebp
+ movl 16(%esi), %eax
+ adcl %edx, %ebx
+ movl 16(%edi), %edx
+ adcl $0, %ecx
+ # mul a[4]*b[4]
+ mull %edx
+ addl %eax, %ebp
+ movl 12(%esi), %eax
+ adcl %edx, %ebx
+ movl 20(%edi), %edx
+ adcl $0, %ecx
+ # mul a[3]*b[5]
+ mull %edx
+ addl %eax, %ebp
+ movl 8(%esi), %eax
+ adcl %edx, %ebx
+ movl 24(%edi), %edx
+ adcl $0, %ecx
+ # mul a[2]*b[6]
+ mull %edx
+ addl %eax, %ebp
+ movl 4(%esi), %eax
+ adcl %edx, %ebx
+ movl 28(%edi), %edx
+ adcl $0, %ecx
+ # mul a[1]*b[7]
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl 8(%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 32(%eax)
+ movl 28(%esi), %eax
+ # saved r[8]
+ # ################## Calculate word 9
+ xorl %ebp, %ebp
+ # mul a[7]*b[2]
+ mull %edx
+ addl %eax, %ebx
+ movl 24(%esi), %eax
+ adcl %edx, %ecx
+ movl 12(%edi), %edx
+ adcl $0, %ebp
+ # mul a[6]*b[3]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esi), %eax
+ adcl %edx, %ecx
+ movl 16(%edi), %edx
+ adcl $0, %ebp
+ # mul a[5]*b[4]
+ mull %edx
+ addl %eax, %ebx
+ movl 16(%esi), %eax
+ adcl %edx, %ecx
+ movl 20(%edi), %edx
+ adcl $0, %ebp
+ # mul a[4]*b[5]
+ mull %edx
+ addl %eax, %ebx
+ movl 12(%esi), %eax
+ adcl %edx, %ecx
+ movl 24(%edi), %edx
+ adcl $0, %ebp
+ # mul a[3]*b[6]
+ mull %edx
+ addl %eax, %ebx
+ movl 8(%esi), %eax
+ adcl %edx, %ecx
+ movl 28(%edi), %edx
+ adcl $0, %ebp
+ # mul a[2]*b[7]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl 12(%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, 36(%eax)
+ movl 28(%esi), %eax
+ # saved r[9]
+ # ################## Calculate word 10
+ xorl %ebx, %ebx
+ # mul a[7]*b[3]
+ mull %edx
+ addl %eax, %ecx
+ movl 24(%esi), %eax
+ adcl %edx, %ebp
+ movl 16(%edi), %edx
+ adcl $0, %ebx
+ # mul a[6]*b[4]
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esi), %eax
+ adcl %edx, %ebp
+ movl 20(%edi), %edx
+ adcl $0, %ebx
+ # mul a[5]*b[5]
+ mull %edx
+ addl %eax, %ecx
+ movl 16(%esi), %eax
+ adcl %edx, %ebp
+ movl 24(%edi), %edx
+ adcl $0, %ebx
+ # mul a[4]*b[6]
+ mull %edx
+ addl %eax, %ecx
+ movl 12(%esi), %eax
+ adcl %edx, %ebp
+ movl 28(%edi), %edx
+ adcl $0, %ebx
+ # mul a[3]*b[7]
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl 16(%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 40(%eax)
+ movl 28(%esi), %eax
+ # saved r[10]
+ # ################## Calculate word 11
+ xorl %ecx, %ecx
+ # mul a[7]*b[4]
+ mull %edx
+ addl %eax, %ebp
+ movl 24(%esi), %eax
+ adcl %edx, %ebx
+ movl 20(%edi), %edx
+ adcl $0, %ecx
+ # mul a[6]*b[5]
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esi), %eax
+ adcl %edx, %ebx
+ movl 24(%edi), %edx
+ adcl $0, %ecx
+ # mul a[5]*b[6]
+ mull %edx
+ addl %eax, %ebp
+ movl 16(%esi), %eax
+ adcl %edx, %ebx
+ movl 28(%edi), %edx
+ adcl $0, %ecx
+ # mul a[4]*b[7]
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl 20(%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 44(%eax)
+ movl 28(%esi), %eax
+ # saved r[11]
+ # ################## Calculate word 12
+ xorl %ebp, %ebp
+ # mul a[7]*b[5]
+ mull %edx
+ addl %eax, %ebx
+ movl 24(%esi), %eax
+ adcl %edx, %ecx
+ movl 24(%edi), %edx
+ adcl $0, %ebp
+ # mul a[6]*b[6]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esi), %eax
+ adcl %edx, %ecx
+ movl 28(%edi), %edx
+ adcl $0, %ebp
+ # mul a[5]*b[7]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl 24(%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, 48(%eax)
+ movl 28(%esi), %eax
+ # saved r[12]
+ # ################## Calculate word 13
+ xorl %ebx, %ebx
+ # mul a[7]*b[6]
+ mull %edx
+ addl %eax, %ecx
+ movl 24(%esi), %eax
+ adcl %edx, %ebp
+ movl 28(%edi), %edx
+ adcl $0, %ebx
+ # mul a[6]*b[7]
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl 28(%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 52(%eax)
+ movl 28(%esi), %eax
+ # saved r[13]
+ # ################## Calculate word 14
+ xorl %ecx, %ecx
+ # mul a[7]*b[7]
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ adcl $0, %ecx
+ movl %ebp, 56(%eax)
+ # saved r[14]
+ # save r[15]
+ movl %ebx, 60(%eax)
+ popl %ebx
+ popl %ebp
+ popl %edi
+ popl %esi
+ ret
+.bn_mul_comba8_end:
+ .size bn_mul_comba8,.bn_mul_comba8_end-bn_mul_comba8
+.ident "desasm.pl"
+.text
+ .align 16
+.globl bn_mul_comba4
+ .type bn_mul_comba4,@function
+bn_mul_comba4:
+ pushl %esi
+ movl 12(%esp), %esi
+ pushl %edi
+ movl 20(%esp), %edi
+ pushl %ebp
+ pushl %ebx
+ xorl %ebx, %ebx
+ movl (%esi), %eax
+ xorl %ecx, %ecx
+ movl (%edi), %edx
+ # ################## Calculate word 0
+ xorl %ebp, %ebp
+ # mul a[0]*b[0]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl (%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, (%eax)
+ movl 4(%esi), %eax
+ # saved r[0]
+ # ################## Calculate word 1
+ xorl %ebx, %ebx
+ # mul a[1]*b[0]
+ mull %edx
+ addl %eax, %ecx
+ movl (%esi), %eax
+ adcl %edx, %ebp
+ movl 4(%edi), %edx
+ adcl $0, %ebx
+ # mul a[0]*b[1]
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl (%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 4(%eax)
+ movl 8(%esi), %eax
+ # saved r[1]
+ # ################## Calculate word 2
+ xorl %ecx, %ecx
+ # mul a[2]*b[0]
+ mull %edx
+ addl %eax, %ebp
+ movl 4(%esi), %eax
+ adcl %edx, %ebx
+ movl 4(%edi), %edx
+ adcl $0, %ecx
+ # mul a[1]*b[1]
+ mull %edx
+ addl %eax, %ebp
+ movl (%esi), %eax
+ adcl %edx, %ebx
+ movl 8(%edi), %edx
+ adcl $0, %ecx
+ # mul a[0]*b[2]
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl (%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 8(%eax)
+ movl 12(%esi), %eax
+ # saved r[2]
+ # ################## Calculate word 3
+ xorl %ebp, %ebp
+ # mul a[3]*b[0]
+ mull %edx
+ addl %eax, %ebx
+ movl 8(%esi), %eax
+ adcl %edx, %ecx
+ movl 4(%edi), %edx
+ adcl $0, %ebp
+ # mul a[2]*b[1]
+ mull %edx
+ addl %eax, %ebx
+ movl 4(%esi), %eax
+ adcl %edx, %ecx
+ movl 8(%edi), %edx
+ adcl $0, %ebp
+ # mul a[1]*b[2]
+ mull %edx
+ addl %eax, %ebx
+ movl (%esi), %eax
+ adcl %edx, %ecx
+ movl 12(%edi), %edx
+ adcl $0, %ebp
+ # mul a[0]*b[3]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl 4(%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, 12(%eax)
+ movl 12(%esi), %eax
+ # saved r[3]
+ # ################## Calculate word 4
+ xorl %ebx, %ebx
+ # mul a[3]*b[1]
+ mull %edx
+ addl %eax, %ecx
+ movl 8(%esi), %eax
+ adcl %edx, %ebp
+ movl 8(%edi), %edx
+ adcl $0, %ebx
+ # mul a[2]*b[2]
+ mull %edx
+ addl %eax, %ecx
+ movl 4(%esi), %eax
+ adcl %edx, %ebp
+ movl 12(%edi), %edx
+ adcl $0, %ebx
+ # mul a[1]*b[3]
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl 8(%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 16(%eax)
+ movl 12(%esi), %eax
+ # saved r[4]
+ # ################## Calculate word 5
+ xorl %ecx, %ecx
+ # mul a[3]*b[2]
+ mull %edx
+ addl %eax, %ebp
+ movl 8(%esi), %eax
+ adcl %edx, %ebx
+ movl 12(%edi), %edx
+ adcl $0, %ecx
+ # mul a[2]*b[3]
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl 12(%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 20(%eax)
+ movl 12(%esi), %eax
+ # saved r[5]
+ # ################## Calculate word 6
+ xorl %ebp, %ebp
+ # mul a[3]*b[3]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ adcl $0, %ebp
+ movl %ebx, 24(%eax)
+ # saved r[6]
+ # save r[7]
+ movl %ecx, 28(%eax)
+ popl %ebx
+ popl %ebp
+ popl %edi
+ popl %esi
+ ret
+.bn_mul_comba4_end:
+ .size bn_mul_comba4,.bn_mul_comba4_end-bn_mul_comba4
+.ident "desasm.pl"
+.text
+ .align 16
+.globl bn_sqr_comba8
+ .type bn_sqr_comba8,@function
+bn_sqr_comba8:
+ pushl %esi
+ pushl %edi
+ pushl %ebp
+ pushl %ebx
+ movl 20(%esp), %edi
+ movl 24(%esp), %esi
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ movl (%esi), %eax
+ # ############### Calculate word 0
+ xorl %ebp, %ebp
+ # sqr a[0]*a[0]
+ mull %eax
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl (%esi), %edx
+ adcl $0, %ebp
+ movl %ebx, (%edi)
+ movl 4(%esi), %eax
+ # saved r[0]
+ # ############### Calculate word 1
+ xorl %ebx, %ebx
+ # sqr a[1]*a[0]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 8(%esi), %eax
+ adcl $0, %ebx
+ movl %ecx, 4(%edi)
+ movl (%esi), %edx
+ # saved r[1]
+ # ############### Calculate word 2
+ xorl %ecx, %ecx
+ # sqr a[2]*a[0]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 4(%esi), %eax
+ adcl $0, %ecx
+ # sqr a[1]*a[1]
+ mull %eax
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl (%esi), %edx
+ adcl $0, %ecx
+ movl %ebp, 8(%edi)
+ movl 12(%esi), %eax
+ # saved r[2]
+ # ############### Calculate word 3
+ xorl %ebp, %ebp
+ # sqr a[3]*a[0]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 8(%esi), %eax
+ adcl $0, %ebp
+ movl 4(%esi), %edx
+ # sqr a[2]*a[1]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 16(%esi), %eax
+ adcl $0, %ebp
+ movl %ebx, 12(%edi)
+ movl (%esi), %edx
+ # saved r[3]
+ # ############### Calculate word 4
+ xorl %ebx, %ebx
+ # sqr a[4]*a[0]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 12(%esi), %eax
+ adcl $0, %ebx
+ movl 4(%esi), %edx
+ # sqr a[3]*a[1]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 8(%esi), %eax
+ adcl $0, %ebx
+ # sqr a[2]*a[2]
+ mull %eax
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl (%esi), %edx
+ adcl $0, %ebx
+ movl %ecx, 16(%edi)
+ movl 20(%esi), %eax
+ # saved r[4]
+ # ############### Calculate word 5
+ xorl %ecx, %ecx
+ # sqr a[5]*a[0]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 16(%esi), %eax
+ adcl $0, %ecx
+ movl 4(%esi), %edx
+ # sqr a[4]*a[1]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 12(%esi), %eax
+ adcl $0, %ecx
+ movl 8(%esi), %edx
+ # sqr a[3]*a[2]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 24(%esi), %eax
+ adcl $0, %ecx
+ movl %ebp, 20(%edi)
+ movl (%esi), %edx
+ # saved r[5]
+ # ############### Calculate word 6
+ xorl %ebp, %ebp
+ # sqr a[6]*a[0]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 20(%esi), %eax
+ adcl $0, %ebp
+ movl 4(%esi), %edx
+ # sqr a[5]*a[1]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 16(%esi), %eax
+ adcl $0, %ebp
+ movl 8(%esi), %edx
+ # sqr a[4]*a[2]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 12(%esi), %eax
+ adcl $0, %ebp
+ # sqr a[3]*a[3]
+ mull %eax
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl (%esi), %edx
+ adcl $0, %ebp
+ movl %ebx, 24(%edi)
+ movl 28(%esi), %eax
+ # saved r[6]
+ # ############### Calculate word 7
+ xorl %ebx, %ebx
+ # sqr a[7]*a[0]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 24(%esi), %eax
+ adcl $0, %ebx
+ movl 4(%esi), %edx
+ # sqr a[6]*a[1]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 20(%esi), %eax
+ adcl $0, %ebx
+ movl 8(%esi), %edx
+ # sqr a[5]*a[2]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 16(%esi), %eax
+ adcl $0, %ebx
+ movl 12(%esi), %edx
+ # sqr a[4]*a[3]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 28(%esi), %eax
+ adcl $0, %ebx
+ movl %ecx, 28(%edi)
+ movl 4(%esi), %edx
+ # saved r[7]
+ # ############### Calculate word 8
+ xorl %ecx, %ecx
+ # sqr a[7]*a[1]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 24(%esi), %eax
+ adcl $0, %ecx
+ movl 8(%esi), %edx
+ # sqr a[6]*a[2]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 20(%esi), %eax
+ adcl $0, %ecx
+ movl 12(%esi), %edx
+ # sqr a[5]*a[3]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 16(%esi), %eax
+ adcl $0, %ecx
+ # sqr a[4]*a[4]
+ mull %eax
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 8(%esi), %edx
+ adcl $0, %ecx
+ movl %ebp, 32(%edi)
+ movl 28(%esi), %eax
+ # saved r[8]
+ # ############### Calculate word 9
+ xorl %ebp, %ebp
+ # sqr a[7]*a[2]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 24(%esi), %eax
+ adcl $0, %ebp
+ movl 12(%esi), %edx
+ # sqr a[6]*a[3]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 20(%esi), %eax
+ adcl $0, %ebp
+ movl 16(%esi), %edx
+ # sqr a[5]*a[4]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 28(%esi), %eax
+ adcl $0, %ebp
+ movl %ebx, 36(%edi)
+ movl 12(%esi), %edx
+ # saved r[9]
+ # ############### Calculate word 10
+ xorl %ebx, %ebx
+ # sqr a[7]*a[3]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 24(%esi), %eax
+ adcl $0, %ebx
+ movl 16(%esi), %edx
+ # sqr a[6]*a[4]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 20(%esi), %eax
+ adcl $0, %ebx
+ # sqr a[5]*a[5]
+ mull %eax
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 16(%esi), %edx
+ adcl $0, %ebx
+ movl %ecx, 40(%edi)
+ movl 28(%esi), %eax
+ # saved r[10]
+ # ############### Calculate word 11
+ xorl %ecx, %ecx
+ # sqr a[7]*a[4]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 24(%esi), %eax
+ adcl $0, %ecx
+ movl 20(%esi), %edx
+ # sqr a[6]*a[5]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 28(%esi), %eax
+ adcl $0, %ecx
+ movl %ebp, 44(%edi)
+ movl 20(%esi), %edx
+ # saved r[11]
+ # ############### Calculate word 12
+ xorl %ebp, %ebp
+ # sqr a[7]*a[5]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 24(%esi), %eax
+ adcl $0, %ebp
+ # sqr a[6]*a[6]
+ mull %eax
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 24(%esi), %edx
+ adcl $0, %ebp
+ movl %ebx, 48(%edi)
+ movl 28(%esi), %eax
+ # saved r[12]
+ # ############### Calculate word 13
+ xorl %ebx, %ebx
+ # sqr a[7]*a[6]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 28(%esi), %eax
+ adcl $0, %ebx
+ movl %ecx, 52(%edi)
+ # saved r[13]
+ # ############### Calculate word 14
+ xorl %ecx, %ecx
+ # sqr a[7]*a[7]
+ mull %eax
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ adcl $0, %ecx
+ movl %ebp, 56(%edi)
+ # saved r[14]
+ movl %ebx, 60(%edi)
+ popl %ebx
+ popl %ebp
+ popl %edi
+ popl %esi
+ ret
+.bn_sqr_comba8_end:
+ .size bn_sqr_comba8,.bn_sqr_comba8_end-bn_sqr_comba8
+.ident "desasm.pl"
+.text
+ .align 16
+.globl bn_sqr_comba4
+ .type bn_sqr_comba4,@function
+bn_sqr_comba4:
+ pushl %esi
+ pushl %edi
+ pushl %ebp
+ pushl %ebx
+ movl 20(%esp), %edi
+ movl 24(%esp), %esi
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ movl (%esi), %eax
+ # ############### Calculate word 0
+ xorl %ebp, %ebp
+ # sqr a[0]*a[0]
+ mull %eax
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl (%esi), %edx
+ adcl $0, %ebp
+ movl %ebx, (%edi)
+ movl 4(%esi), %eax
+ # saved r[0]
+ # ############### Calculate word 1
+ xorl %ebx, %ebx
+ # sqr a[1]*a[0]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 8(%esi), %eax
+ adcl $0, %ebx
+ movl %ecx, 4(%edi)
+ movl (%esi), %edx
+ # saved r[1]
+ # ############### Calculate word 2
+ xorl %ecx, %ecx
+ # sqr a[2]*a[0]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 4(%esi), %eax
+ adcl $0, %ecx
+ # sqr a[1]*a[1]
+ mull %eax
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl (%esi), %edx
+ adcl $0, %ecx
+ movl %ebp, 8(%edi)
+ movl 12(%esi), %eax
+ # saved r[2]
+ # ############### Calculate word 3
+ xorl %ebp, %ebp
+ # sqr a[3]*a[0]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 8(%esi), %eax
+ adcl $0, %ebp
+ movl 4(%esi), %edx
+ # sqr a[2]*a[1]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 12(%esi), %eax
+ adcl $0, %ebp
+ movl %ebx, 12(%edi)
+ movl 4(%esi), %edx
+ # saved r[3]
+ # ############### Calculate word 4
+ xorl %ebx, %ebx
+ # sqr a[3]*a[1]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 8(%esi), %eax
+ adcl $0, %ebx
+ # sqr a[2]*a[2]
+ mull %eax
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 8(%esi), %edx
+ adcl $0, %ebx
+ movl %ecx, 16(%edi)
+ movl 12(%esi), %eax
+ # saved r[4]
+ # ############### Calculate word 5
+ xorl %ecx, %ecx
+ # sqr a[3]*a[2]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 12(%esi), %eax
+ adcl $0, %ecx
+ movl %ebp, 20(%edi)
+ # saved r[5]
+ # ############### Calculate word 6
+ xorl %ebp, %ebp
+ # sqr a[3]*a[3]
+ mull %eax
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ adcl $0, %ebp
+ movl %ebx, 24(%edi)
+ # saved r[6]
+ movl %ecx, 28(%edi)
+ popl %ebx
+ popl %ebp
+ popl %edi
+ popl %esi
+ ret
+.bn_sqr_comba4_end:
+ .size bn_sqr_comba4,.bn_sqr_comba4_end-bn_sqr_comba4
+.ident "desasm.pl"
diff --git a/crypto/bn/asm/f b/crypto/bn/asm/f
new file mode 100644
index 0000000000..a23fa159b2
--- /dev/null
+++ b/crypto/bn/asm/f
@@ -0,0 +1,500 @@
+ .text
+ .align 3
+ .globl bn_sqr_comba8
+ .ent bn_sqr_comba8
+bn_sqr_comba8:
+bn_sqr_comba8..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ ldq $0, 0($17)
+ ldq $1, 8($17)
+ ldq $2, 16($17)
+ ldq $3, 24($17)
+ ldq $4, 32($17)
+ ldq $5, 40($17)
+ ldq $6, 48($17)
+ ldq $7, 56($17)
+ bis $31, $31, $23
+ mulq $0, $0, $8
+ umulh $0, $0, $22
+ stq $8, 0($16)
+ bis $31, $31, $8
+ mulq $1, $0, $24
+ umulh $1, $0, $25
+ cmplt $24, $31, $27
+ cmplt $25, $31, $28
+ addq $24, $24, $24
+ addq $25, $25, $25
+ addq $25, $27, $25
+ addq $8, $28, $8
+ addq $22, $24, $22
+ addq $23, $25, $23
+ cmpult $22, $24, $21
+ cmpult $23, $25, $20
+ addq $23, $21, $23
+ addq $8, $20, $8
+ stq $22, 8($16)
+ bis $31, $31, $22
+ mulq $1, $1, $19
+ umulh $1, $1, $18
+ addq $23, $19, $23
+ addq $8, $18, $8
+ cmpult $23, $19, $17
+ cmpult $8, $18, $27
+ addq $8, $17, $8
+ addq $22, $27, $22
+ mulq $2, $0, $28
+ umulh $2, $0, $24
+ cmplt $28, $31, $25
+ cmplt $24, $31, $21
+ addq $28, $28, $28
+ addq $24, $24, $24
+ addq $24, $25, $24
+ addq $22, $21, $22
+ addq $23, $28, $23
+ addq $8, $24, $8
+ cmpult $23, $28, $20
+ cmpult $8, $24, $19
+ addq $8, $20, $8
+ addq $22, $19, $22
+ stq $23, 16($16)
+ bis $31, $31, $23
+ mulq $2, $1, $18
+ umulh $2, $1, $17
+ cmplt $18, $31, $27
+ cmplt $17, $31, $25
+ addq $18, $18, $18
+ addq $17, $17, $17
+ addq $17, $27, $17
+ addq $23, $25, $23
+ addq $8, $18, $8
+ addq $22, $17, $22
+ cmpult $8, $18, $21
+ cmpult $22, $17, $28
+ addq $22, $21, $22
+ addq $23, $28, $23
+ mulq $3, $0, $24
+ umulh $3, $0, $20
+ cmplt $24, $31, $19
+ cmplt $20, $31, $27
+ addq $24, $24, $24
+ addq $20, $20, $20
+ addq $20, $19, $20
+ addq $23, $27, $23
+ addq $8, $24, $8
+ addq $22, $20, $22
+ cmpult $8, $24, $25
+ cmpult $22, $20, $18
+ addq $22, $25, $22
+ addq $23, $18, $23
+ stq $8, 24($16)
+ bis $31, $31, $8
+ mulq $2, $2, $17
+ umulh $2, $2, $21
+ addq $22, $17, $22
+ addq $23, $21, $23
+ cmpult $22, $17, $28
+ cmpult $23, $21, $19
+ addq $23, $28, $23
+ addq $8, $19, $8
+ mulq $3, $1, $27
+ umulh $3, $1, $24
+ cmplt $27, $31, $20
+ cmplt $24, $31, $25
+ addq $27, $27, $27
+ addq $24, $24, $24
+ addq $24, $20, $24
+ addq $8, $25, $8
+ addq $22, $27, $22
+ addq $23, $24, $23
+ cmpult $22, $27, $18
+ cmpult $23, $24, $17
+ addq $23, $18, $23
+ addq $8, $17, $8
+ mulq $4, $0, $21
+ umulh $4, $0, $28
+ cmplt $21, $31, $19
+ cmplt $28, $31, $20
+ addq $21, $21, $21
+ addq $28, $28, $28
+ addq $28, $19, $28
+ addq $8, $20, $8
+ addq $22, $21, $22
+ addq $23, $28, $23
+ cmpult $22, $21, $25
+ cmpult $23, $28, $27
+ addq $23, $25, $23
+ addq $8, $27, $8
+ stq $22, 32($16)
+ bis $31, $31, $22
+ mulq $3, $2, $24
+ umulh $3, $2, $18
+ cmplt $24, $31, $17
+ cmplt $18, $31, $19
+ addq $24, $24, $24
+ addq $18, $18, $18
+ addq $18, $17, $18
+ addq $22, $19, $22
+ addq $23, $24, $23
+ addq $8, $18, $8
+ cmpult $23, $24, $20
+ cmpult $8, $18, $21
+ addq $8, $20, $8
+ addq $22, $21, $22
+ mulq $4, $1, $28
+ umulh $4, $1, $25
+ cmplt $28, $31, $27
+ cmplt $25, $31, $17
+ addq $28, $28, $28
+ addq $25, $25, $25
+ addq $25, $27, $25
+ addq $22, $17, $22
+ addq $23, $28, $23
+ addq $8, $25, $8
+ cmpult $23, $28, $19
+ cmpult $8, $25, $24
+ addq $8, $19, $8
+ addq $22, $24, $22
+ mulq $5, $0, $18
+ umulh $5, $0, $20
+ cmplt $18, $31, $21
+ cmplt $20, $31, $27
+ addq $18, $18, $18
+ addq $20, $20, $20
+ addq $20, $21, $20
+ addq $22, $27, $22
+ addq $23, $18, $23
+ addq $8, $20, $8
+ cmpult $23, $18, $17
+ cmpult $8, $20, $28
+ addq $8, $17, $8
+ addq $22, $28, $22
+ stq $23, 40($16)
+ bis $31, $31, $23
+ mulq $3, $3, $25
+ umulh $3, $3, $19
+ addq $8, $25, $8
+ addq $22, $19, $22
+ cmpult $8, $25, $24
+ cmpult $22, $19, $21
+ addq $22, $24, $22
+ addq $23, $21, $23
+ mulq $4, $2, $27
+ umulh $4, $2, $18
+ cmplt $27, $31, $20
+ cmplt $18, $31, $17
+ addq $27, $27, $27
+ addq $18, $18, $18
+ addq $18, $20, $18
+ addq $23, $17, $23
+ addq $8, $27, $8
+ addq $22, $18, $22
+ cmpult $8, $27, $28
+ cmpult $22, $18, $25
+ addq $22, $28, $22
+ addq $23, $25, $23
+ mulq $5, $1, $19
+ umulh $5, $1, $24
+ cmplt $19, $31, $21
+ cmplt $24, $31, $20
+ addq $19, $19, $19
+ addq $24, $24, $24
+ addq $24, $21, $24
+ addq $23, $20, $23
+ addq $8, $19, $8
+ addq $22, $24, $22
+ cmpult $8, $19, $17
+ cmpult $22, $24, $27
+ addq $22, $17, $22
+ addq $23, $27, $23
+ mulq $6, $0, $18
+ umulh $6, $0, $28
+ cmplt $18, $31, $25
+ cmplt $28, $31, $21
+ addq $18, $18, $18
+ addq $28, $28, $28
+ addq $28, $25, $28
+ addq $23, $21, $23
+ addq $8, $18, $8
+ addq $22, $28, $22
+ cmpult $8, $18, $20
+ cmpult $22, $28, $19
+ addq $22, $20, $22
+ addq $23, $19, $23
+ stq $8, 48($16)
+ bis $31, $31, $8
+ mulq $4, $3, $24
+ umulh $4, $3, $17
+ cmplt $24, $31, $27
+ cmplt $17, $31, $25
+ addq $24, $24, $24
+ addq $17, $17, $17
+ addq $17, $27, $17
+ addq $8, $25, $8
+ addq $22, $24, $22
+ addq $23, $17, $23
+ cmpult $22, $24, $21
+ cmpult $23, $17, $18
+ addq $23, $21, $23
+ addq $8, $18, $8
+ mulq $5, $2, $28
+ umulh $5, $2, $20
+ cmplt $28, $31, $19
+ cmplt $20, $31, $27
+ addq $28, $28, $28
+ addq $20, $20, $20
+ addq $20, $19, $20
+ addq $8, $27, $8
+ addq $22, $28, $22
+ addq $23, $20, $23
+ cmpult $22, $28, $25
+ cmpult $23, $20, $24
+ addq $23, $25, $23
+ addq $8, $24, $8
+ mulq $6, $1, $17
+ umulh $6, $1, $21
+ cmplt $17, $31, $18
+ cmplt $21, $31, $19
+ addq $17, $17, $17
+ addq $21, $21, $21
+ addq $21, $18, $21
+ addq $8, $19, $8
+ addq $22, $17, $22
+ addq $23, $21, $23
+ cmpult $22, $17, $27
+ cmpult $23, $21, $28
+ addq $23, $27, $23
+ addq $8, $28, $8
+ mulq $7, $0, $20
+ umulh $7, $0, $25
+ cmplt $20, $31, $24
+ cmplt $25, $31, $18
+ addq $20, $20, $20
+ addq $25, $25, $25
+ addq $25, $24, $25
+ addq $8, $18, $8
+ addq $22, $20, $22
+ addq $23, $25, $23
+ cmpult $22, $20, $19
+ cmpult $23, $25, $17
+ addq $23, $19, $23
+ addq $8, $17, $8
+ stq $22, 56($16)
+ bis $31, $31, $22
+ mulq $4, $4, $21
+ umulh $4, $4, $27
+ addq $23, $21, $23
+ addq $8, $27, $8
+ cmpult $23, $21, $28
+ cmpult $8, $27, $24
+ addq $8, $28, $8
+ addq $22, $24, $22
+ mulq $5, $3, $18
+ umulh $5, $3, $20
+ cmplt $18, $31, $25
+ cmplt $20, $31, $19
+ addq $18, $18, $18
+ addq $20, $20, $20
+ addq $20, $25, $20
+ addq $22, $19, $22
+ addq $23, $18, $23
+ addq $8, $20, $8
+ cmpult $23, $18, $17
+ cmpult $8, $20, $21
+ addq $8, $17, $8
+ addq $22, $21, $22
+ mulq $6, $2, $27
+ umulh $6, $2, $28
+ cmplt $27, $31, $24
+ cmplt $28, $31, $25
+ addq $27, $27, $27
+ addq $28, $28, $28
+ addq $28, $24, $28
+ addq $22, $25, $22
+ addq $23, $27, $23
+ addq $8, $28, $8
+ cmpult $23, $27, $19
+ cmpult $8, $28, $18
+ addq $8, $19, $8
+ addq $22, $18, $22
+ mulq $7, $1, $20
+ umulh $7, $1, $17
+ cmplt $20, $31, $21
+ cmplt $17, $31, $24
+ addq $20, $20, $20
+ addq $17, $17, $17
+ addq $17, $21, $17
+ addq $22, $24, $22
+ addq $23, $20, $23
+ addq $8, $17, $8
+ cmpult $23, $20, $25
+ cmpult $8, $17, $27
+ addq $8, $25, $8
+ addq $22, $27, $22
+ stq $23, 64($16)
+ bis $31, $31, $23
+ mulq $5, $4, $28
+ umulh $5, $4, $19
+ cmplt $28, $31, $18
+ cmplt $19, $31, $21
+ addq $28, $28, $28
+ addq $19, $19, $19
+ addq $19, $18, $19
+ addq $23, $21, $23
+ addq $8, $28, $8
+ addq $22, $19, $22
+ cmpult $8, $28, $24
+ cmpult $22, $19, $20
+ addq $22, $24, $22
+ addq $23, $20, $23
+ mulq $6, $3, $17
+ umulh $6, $3, $25
+ cmplt $17, $31, $27
+ cmplt $25, $31, $18
+ addq $17, $17, $17
+ addq $25, $25, $25
+ addq $25, $27, $25
+ addq $23, $18, $23
+ addq $8, $17, $8
+ addq $22, $25, $22
+ cmpult $8, $17, $21
+ cmpult $22, $25, $28
+ addq $22, $21, $22
+ addq $23, $28, $23
+ mulq $7, $2, $19
+ umulh $7, $2, $24
+ cmplt $19, $31, $20
+ cmplt $24, $31, $27
+ addq $19, $19, $19
+ addq $24, $24, $24
+ addq $24, $20, $24
+ addq $23, $27, $23
+ addq $8, $19, $8
+ addq $22, $24, $22
+ cmpult $8, $19, $18
+ cmpult $22, $24, $17
+ addq $22, $18, $22
+ addq $23, $17, $23
+ stq $8, 72($16)
+ bis $31, $31, $8
+ mulq $5, $5, $25
+ umulh $5, $5, $21
+ addq $22, $25, $22
+ addq $23, $21, $23
+ cmpult $22, $25, $28
+ cmpult $23, $21, $20
+ addq $23, $28, $23
+ addq $8, $20, $8
+ mulq $6, $4, $27
+ umulh $6, $4, $19
+ cmplt $27, $31, $24
+ cmplt $19, $31, $18
+ addq $27, $27, $27
+ addq $19, $19, $19
+ addq $19, $24, $19
+ addq $8, $18, $8
+ addq $22, $27, $22
+ addq $23, $19, $23
+ cmpult $22, $27, $17
+ cmpult $23, $19, $25
+ addq $23, $17, $23
+ addq $8, $25, $8
+ mulq $7, $3, $21
+ umulh $7, $3, $28
+ cmplt $21, $31, $20
+ cmplt $28, $31, $24
+ addq $21, $21, $21
+ addq $28, $28, $28
+ addq $28, $20, $28
+ addq $8, $24, $8
+ addq $22, $21, $22
+ addq $23, $28, $23
+ cmpult $22, $21, $18
+ cmpult $23, $28, $27
+ addq $23, $18, $23
+ addq $8, $27, $8
+ stq $22, 80($16)
+ bis $31, $31, $22
+ mulq $6, $5, $19
+ umulh $6, $5, $17
+ cmplt $19, $31, $25
+ cmplt $17, $31, $20
+ addq $19, $19, $19
+ addq $17, $17, $17
+ addq $17, $25, $17
+ addq $22, $20, $22
+ addq $23, $19, $23
+ addq $8, $17, $8
+ cmpult $23, $19, $24
+ cmpult $8, $17, $21
+ addq $8, $24, $8
+ addq $22, $21, $22
+ mulq $7, $4, $28
+ umulh $7, $4, $18
+ cmplt $28, $31, $27
+ cmplt $18, $31, $25
+ addq $28, $28, $28
+ addq $18, $18, $18
+ addq $18, $27, $18
+ addq $22, $25, $22
+ addq $23, $28, $23
+ addq $8, $18, $8
+ cmpult $23, $28, $20
+ cmpult $8, $18, $19
+ addq $8, $20, $8
+ addq $22, $19, $22
+ stq $23, 88($16)
+ bis $31, $31, $23
+ mulq $6, $6, $17
+ umulh $6, $6, $24
+ addq $8, $17, $8
+ addq $22, $24, $22
+ cmpult $8, $17, $21
+ cmpult $22, $24, $27
+ addq $22, $21, $22
+ addq $23, $27, $23
+ mulq $7, $5, $25
+ umulh $7, $5, $28
+ cmplt $25, $31, $18
+ cmplt $28, $31, $20
+ addq $25, $25, $25
+ addq $28, $28, $28
+ addq $28, $18, $28
+ addq $23, $20, $23
+ addq $8, $25, $8
+ addq $22, $28, $22
+ cmpult $8, $25, $19
+ cmpult $22, $28, $17
+ addq $22, $19, $22
+ addq $23, $17, $23
+ stq $8, 96($16)
+ bis $31, $31, $8
+ mulq $7, $6, $24
+ umulh $7, $6, $21
+ cmplt $24, $31, $27
+ cmplt $21, $31, $18
+ addq $24, $24, $24
+ addq $21, $21, $21
+ addq $21, $27, $21
+ addq $8, $18, $8
+ addq $22, $24, $22
+ addq $23, $21, $23
+ cmpult $22, $24, $20
+ cmpult $23, $21, $25
+ addq $23, $20, $23
+ addq $8, $25, $8
+ stq $22, 104($16)
+ bis $31, $31, $22
+ mulq $7, $7, $28
+ umulh $7, $7, $19
+ addq $23, $28, $23
+ addq $8, $19, $8
+ cmpult $23, $28, $17
+ cmpult $8, $19, $27
+ addq $8, $17, $8
+ addq $22, $27, $22
+ stq $23, 112($16)
+ stq $8, 120($16)
+ ret $31,($26),1
+ .end bn_sqr_comba8
diff --git a/crypto/bn/asm/f.c b/crypto/bn/asm/f.c
new file mode 100644
index 0000000000..bfdccae4a0
--- /dev/null
+++ b/crypto/bn/asm/f.c
@@ -0,0 +1,8 @@
+int abc(a,b,c,d,e,f,g,h,i,j)
+unsigned long a,b,c,d,e,f,g,h,i,j;
+ {
+ gg(g);
+ if (g)
+ gg(h);
+ gg(i);
+ }
diff --git a/crypto/bn/asm/f.elf b/crypto/bn/asm/f.elf
new file mode 100644
index 0000000000..39d07b79e1
--- /dev/null
+++ b/crypto/bn/asm/f.elf
@@ -0,0 +1,2149 @@
+ # Don't even think of reading this code
+ # It was automatically generated by bn-586.pl
+ # Which is a perl program used to generate the x86 assember for
+ # any of elf, a.out, BSDI,Win32, or Solaris
+ # eric <eay@cryptsoft.com>
+
+ .file "bn-586.s"
+ .version "01.01"
+gcc2_compiled.:
+.text
+ .align 16
+.globl bn_mul_add_words
+ .type bn_mul_add_words,@function
+bn_mul_add_words:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+
+
+ xorl %esi, %esi
+ movl 20(%esp), %edi
+ movl 28(%esp), %ecx
+ movl 24(%esp), %ebx
+ andl $4294967288, %ecx
+ movl 32(%esp), %ebp
+ pushl %ecx
+ jz .L000maw_finish
+.L001maw_loop:
+ movl %ecx, (%esp)
+ # Round 0
+ movl (%ebx), %eax
+ mull %ebp
+ addl %esi, %eax
+ movl (%edi), %esi
+ adcl $0, %edx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, (%edi)
+ movl %edx, %esi
+ # Round 4
+ movl 4(%ebx), %eax
+ mull %ebp
+ addl %esi, %eax
+ movl 4(%edi), %esi
+ adcl $0, %edx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, 4(%edi)
+ movl %edx, %esi
+ # Round 8
+ movl 8(%ebx), %eax
+ mull %ebp
+ addl %esi, %eax
+ movl 8(%edi), %esi
+ adcl $0, %edx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, 8(%edi)
+ movl %edx, %esi
+ # Round 12
+ movl 12(%ebx), %eax
+ mull %ebp
+ addl %esi, %eax
+ movl 12(%edi), %esi
+ adcl $0, %edx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, 12(%edi)
+ movl %edx, %esi
+ # Round 16
+ movl 16(%ebx), %eax
+ mull %ebp
+ addl %esi, %eax
+ movl 16(%edi), %esi
+ adcl $0, %edx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, 16(%edi)
+ movl %edx, %esi
+ # Round 20
+ movl 20(%ebx), %eax
+ mull %ebp
+ addl %esi, %eax
+ movl 20(%edi), %esi
+ adcl $0, %edx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, 20(%edi)
+ movl %edx, %esi
+ # Round 24
+ movl 24(%ebx), %eax
+ mull %ebp
+ addl %esi, %eax
+ movl 24(%edi), %esi
+ adcl $0, %edx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, 24(%edi)
+ movl %edx, %esi
+ # Round 28
+ movl 28(%ebx), %eax
+ mull %ebp
+ addl %esi, %eax
+ movl 28(%edi), %esi
+ adcl $0, %edx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, 28(%edi)
+ movl %edx, %esi
+
+ movl (%esp), %ecx
+ addl $32, %ebx
+ addl $32, %edi
+ subl $8, %ecx
+ jnz .L001maw_loop
+.L000maw_finish:
+ movl 32(%esp), %ecx
+ andl $7, %ecx
+ jnz .L002maw_finish2
+ jmp .L003maw_end
+.align 16
+.L002maw_finish2:
+ # Tail Round 0
+ movl (%ebx), %eax
+ mull %ebp
+ addl %esi, %eax
+ movl (%edi), %esi
+ adcl $0, %edx
+ addl %esi, %eax
+ adcl $0, %edx
+ decl %ecx
+ movl %eax, (%edi)
+ movl %edx, %esi
+ jz .L003maw_end
+ # Tail Round 1
+ movl 4(%ebx), %eax
+ mull %ebp
+ addl %esi, %eax
+ movl 4(%edi), %esi
+ adcl $0, %edx
+ addl %esi, %eax
+ adcl $0, %edx
+ decl %ecx
+ movl %eax, 4(%edi)
+ movl %edx, %esi
+ jz .L003maw_end
+ # Tail Round 2
+ movl 8(%ebx), %eax
+ mull %ebp
+ addl %esi, %eax
+ movl 8(%edi), %esi
+ adcl $0, %edx
+ addl %esi, %eax
+ adcl $0, %edx
+ decl %ecx
+ movl %eax, 8(%edi)
+ movl %edx, %esi
+ jz .L003maw_end
+ # Tail Round 3
+ movl 12(%ebx), %eax
+ mull %ebp
+ addl %esi, %eax
+ movl 12(%edi), %esi
+ adcl $0, %edx
+ addl %esi, %eax
+ adcl $0, %edx
+ decl %ecx
+ movl %eax, 12(%edi)
+ movl %edx, %esi
+ jz .L003maw_end
+ # Tail Round 4
+ movl 16(%ebx), %eax
+ mull %ebp
+ addl %esi, %eax
+ movl 16(%edi), %esi
+ adcl $0, %edx
+ addl %esi, %eax
+ adcl $0, %edx
+ decl %ecx
+ movl %eax, 16(%edi)
+ movl %edx, %esi
+ jz .L003maw_end
+ # Tail Round 5
+ movl 20(%ebx), %eax
+ mull %ebp
+ addl %esi, %eax
+ movl 20(%edi), %esi
+ adcl $0, %edx
+ addl %esi, %eax
+ adcl $0, %edx
+ decl %ecx
+ movl %eax, 20(%edi)
+ movl %edx, %esi
+ jz .L003maw_end
+ # Tail Round 6
+ movl 24(%ebx), %eax
+ mull %ebp
+ addl %esi, %eax
+ movl 24(%edi), %esi
+ adcl $0, %edx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, 24(%edi)
+ movl %edx, %esi
+.L003maw_end:
+ movl %esi, %eax
+ popl %ecx
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.bn_mul_add_words_end:
+ .size bn_mul_add_words,.bn_mul_add_words_end-bn_mul_add_words
+.ident "bn_mul_add_words"
+.text
+ .align 16
+.globl bn_mul_words
+ .type bn_mul_words,@function
+bn_mul_words:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+
+
+ xorl %esi, %esi
+ movl 20(%esp), %edi
+ movl 24(%esp), %ebx
+ movl 28(%esp), %ebp
+ movl 32(%esp), %ecx
+ andl $4294967288, %ebp
+ jz .L004mw_finish
+.L005mw_loop:
+ # Round 0
+ movl (%ebx), %eax
+ mull %ecx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, (%edi)
+ movl %edx, %esi
+ # Round 4
+ movl 4(%ebx), %eax
+ mull %ecx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, 4(%edi)
+ movl %edx, %esi
+ # Round 8
+ movl 8(%ebx), %eax
+ mull %ecx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, 8(%edi)
+ movl %edx, %esi
+ # Round 12
+ movl 12(%ebx), %eax
+ mull %ecx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, 12(%edi)
+ movl %edx, %esi
+ # Round 16
+ movl 16(%ebx), %eax
+ mull %ecx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, 16(%edi)
+ movl %edx, %esi
+ # Round 20
+ movl 20(%ebx), %eax
+ mull %ecx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, 20(%edi)
+ movl %edx, %esi
+ # Round 24
+ movl 24(%ebx), %eax
+ mull %ecx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, 24(%edi)
+ movl %edx, %esi
+ # Round 28
+ movl 28(%ebx), %eax
+ mull %ecx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, 28(%edi)
+ movl %edx, %esi
+
+ addl $32, %ebx
+ addl $32, %edi
+ subl $8, %ebp
+ jz .L004mw_finish
+ jmp .L005mw_loop
+.L004mw_finish:
+ movl 28(%esp), %ebp
+ andl $7, %ebp
+ jnz .L006mw_finish2
+ jmp .L007mw_end
+.align 16
+.L006mw_finish2:
+ # Tail Round 0
+ movl (%ebx), %eax
+ mull %ecx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, (%edi)
+ movl %edx, %esi
+ decl %ebp
+ jz .L007mw_end
+ # Tail Round 1
+ movl 4(%ebx), %eax
+ mull %ecx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, 4(%edi)
+ movl %edx, %esi
+ decl %ebp
+ jz .L007mw_end
+ # Tail Round 2
+ movl 8(%ebx), %eax
+ mull %ecx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, 8(%edi)
+ movl %edx, %esi
+ decl %ebp
+ jz .L007mw_end
+ # Tail Round 3
+ movl 12(%ebx), %eax
+ mull %ecx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, 12(%edi)
+ movl %edx, %esi
+ decl %ebp
+ jz .L007mw_end
+ # Tail Round 4
+ movl 16(%ebx), %eax
+ mull %ecx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, 16(%edi)
+ movl %edx, %esi
+ decl %ebp
+ jz .L007mw_end
+ # Tail Round 5
+ movl 20(%ebx), %eax
+ mull %ecx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, 20(%edi)
+ movl %edx, %esi
+ decl %ebp
+ jz .L007mw_end
+ # Tail Round 6
+ movl 24(%ebx), %eax
+ mull %ecx
+ addl %esi, %eax
+ adcl $0, %edx
+ movl %eax, 24(%edi)
+ movl %edx, %esi
+.L007mw_end:
+ movl %esi, %eax
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.bn_mul_words_end:
+ .size bn_mul_words,.bn_mul_words_end-bn_mul_words
+.ident "bn_mul_words"
+.text
+ .align 16
+.globl bn_sqr_words
+ .type bn_sqr_words,@function
+bn_sqr_words:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+
+
+ movl 20(%esp), %esi
+ movl 24(%esp), %edi
+ movl 28(%esp), %ebx
+ andl $4294967288, %ebx
+ jz .L008sw_finish
+.L009sw_loop:
+ # Round 0
+ movl (%edi), %eax
+ mull %eax
+ movl %eax, (%esi)
+ movl %edx, 4(%esi)
+ # Round 4
+ movl 4(%edi), %eax
+ mull %eax
+ movl %eax, 8(%esi)
+ movl %edx, 12(%esi)
+ # Round 8
+ movl 8(%edi), %eax
+ mull %eax
+ movl %eax, 16(%esi)
+ movl %edx, 20(%esi)
+ # Round 12
+ movl 12(%edi), %eax
+ mull %eax
+ movl %eax, 24(%esi)
+ movl %edx, 28(%esi)
+ # Round 16
+ movl 16(%edi), %eax
+ mull %eax
+ movl %eax, 32(%esi)
+ movl %edx, 36(%esi)
+ # Round 20
+ movl 20(%edi), %eax
+ mull %eax
+ movl %eax, 40(%esi)
+ movl %edx, 44(%esi)
+ # Round 24
+ movl 24(%edi), %eax
+ mull %eax
+ movl %eax, 48(%esi)
+ movl %edx, 52(%esi)
+ # Round 28
+ movl 28(%edi), %eax
+ mull %eax
+ movl %eax, 56(%esi)
+ movl %edx, 60(%esi)
+
+ addl $32, %edi
+ addl $64, %esi
+ subl $8, %ebx
+ jnz .L009sw_loop
+.L008sw_finish:
+ movl 28(%esp), %ebx
+ andl $7, %ebx
+ jz .L010sw_end
+ # Tail Round 0
+ movl (%edi), %eax
+ mull %eax
+ movl %eax, (%esi)
+ decl %ebx
+ movl %edx, 4(%esi)
+ jz .L010sw_end
+ # Tail Round 1
+ movl 4(%edi), %eax
+ mull %eax
+ movl %eax, 8(%esi)
+ decl %ebx
+ movl %edx, 12(%esi)
+ jz .L010sw_end
+ # Tail Round 2
+ movl 8(%edi), %eax
+ mull %eax
+ movl %eax, 16(%esi)
+ decl %ebx
+ movl %edx, 20(%esi)
+ jz .L010sw_end
+ # Tail Round 3
+ movl 12(%edi), %eax
+ mull %eax
+ movl %eax, 24(%esi)
+ decl %ebx
+ movl %edx, 28(%esi)
+ jz .L010sw_end
+ # Tail Round 4
+ movl 16(%edi), %eax
+ mull %eax
+ movl %eax, 32(%esi)
+ decl %ebx
+ movl %edx, 36(%esi)
+ jz .L010sw_end
+ # Tail Round 5
+ movl 20(%edi), %eax
+ mull %eax
+ movl %eax, 40(%esi)
+ decl %ebx
+ movl %edx, 44(%esi)
+ jz .L010sw_end
+ # Tail Round 6
+ movl 24(%edi), %eax
+ mull %eax
+ movl %eax, 48(%esi)
+ movl %edx, 52(%esi)
+.L010sw_end:
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.bn_sqr_words_end:
+ .size bn_sqr_words,.bn_sqr_words_end-bn_sqr_words
+.ident "bn_sqr_words"
+.text
+ .align 16
+.globl bn_div64
+ .type bn_div64,@function
+bn_div64:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+
+ movl 20(%esp), %edx
+ movl 24(%esp), %eax
+ movl 28(%esp), %ebx
+ divl %ebx
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.bn_div64_end:
+ .size bn_div64,.bn_div64_end-bn_div64
+.ident "bn_div64"
+.text
+ .align 16
+.globl bn_add_words
+ .type bn_add_words,@function
+bn_add_words:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+
+
+ movl 20(%esp), %ebx
+ movl 24(%esp), %esi
+ movl 28(%esp), %edi
+ movl 32(%esp), %ebp
+ xorl %eax, %eax
+ andl $4294967288, %ebp
+ jz .L011aw_finish
+.L012aw_loop:
+ # Round 0
+ movl (%esi), %ecx
+ movl (%edi), %edx
+ addl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ addl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, (%ebx)
+ # Round 1
+ movl 4(%esi), %ecx
+ movl 4(%edi), %edx
+ addl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ addl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 4(%ebx)
+ # Round 2
+ movl 8(%esi), %ecx
+ movl 8(%edi), %edx
+ addl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ addl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 8(%ebx)
+ # Round 3
+ movl 12(%esi), %ecx
+ movl 12(%edi), %edx
+ addl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ addl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 12(%ebx)
+ # Round 4
+ movl 16(%esi), %ecx
+ movl 16(%edi), %edx
+ addl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ addl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 16(%ebx)
+ # Round 5
+ movl 20(%esi), %ecx
+ movl 20(%edi), %edx
+ addl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ addl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 20(%ebx)
+ # Round 6
+ movl 24(%esi), %ecx
+ movl 24(%edi), %edx
+ addl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ addl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 24(%ebx)
+ # Round 7
+ movl 28(%esi), %ecx
+ movl 28(%edi), %edx
+ addl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ addl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 28(%ebx)
+
+ addl $32, %esi
+ addl $32, %edi
+ addl $32, %ebx
+ subl $8, %ebp
+ jnz .L012aw_loop
+.L011aw_finish:
+ movl 32(%esp), %ebp
+ andl $7, %ebp
+ jz .L013aw_end
+ # Tail Round 0
+ movl (%esi), %ecx
+ movl (%edi), %edx
+ addl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ addl %edx, %ecx
+ adcl $0, %eax
+ decl %ebp
+ movl %ecx, (%ebx)
+ jz .L013aw_end
+ # Tail Round 1
+ movl 4(%esi), %ecx
+ movl 4(%edi), %edx
+ addl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ addl %edx, %ecx
+ adcl $0, %eax
+ decl %ebp
+ movl %ecx, 4(%ebx)
+ jz .L013aw_end
+ # Tail Round 2
+ movl 8(%esi), %ecx
+ movl 8(%edi), %edx
+ addl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ addl %edx, %ecx
+ adcl $0, %eax
+ decl %ebp
+ movl %ecx, 8(%ebx)
+ jz .L013aw_end
+ # Tail Round 3
+ movl 12(%esi), %ecx
+ movl 12(%edi), %edx
+ addl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ addl %edx, %ecx
+ adcl $0, %eax
+ decl %ebp
+ movl %ecx, 12(%ebx)
+ jz .L013aw_end
+ # Tail Round 4
+ movl 16(%esi), %ecx
+ movl 16(%edi), %edx
+ addl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ addl %edx, %ecx
+ adcl $0, %eax
+ decl %ebp
+ movl %ecx, 16(%ebx)
+ jz .L013aw_end
+ # Tail Round 5
+ movl 20(%esi), %ecx
+ movl 20(%edi), %edx
+ addl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ addl %edx, %ecx
+ adcl $0, %eax
+ decl %ebp
+ movl %ecx, 20(%ebx)
+ jz .L013aw_end
+ # Tail Round 6
+ movl 24(%esi), %ecx
+ movl 24(%edi), %edx
+ addl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ addl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 24(%ebx)
+.L013aw_end:
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.bn_add_words_end:
+ .size bn_add_words,.bn_add_words_end-bn_add_words
+.ident "bn_add_words"
+.text
+ .align 16
+.globl bn_sub_words
+ .type bn_sub_words,@function
+bn_sub_words:
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+
+
+ movl 20(%esp), %ebx
+ movl 24(%esp), %esi
+ movl 28(%esp), %edi
+ movl 32(%esp), %ebp
+ xorl %eax, %eax
+ andl $4294967288, %ebp
+ jz .L014aw_finish
+.L015aw_loop:
+ # Round 0
+ movl (%esi), %ecx
+ movl (%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, (%ebx)
+ # Round 1
+ movl 4(%esi), %ecx
+ movl 4(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 4(%ebx)
+ # Round 2
+ movl 8(%esi), %ecx
+ movl 8(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 8(%ebx)
+ # Round 3
+ movl 12(%esi), %ecx
+ movl 12(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 12(%ebx)
+ # Round 4
+ movl 16(%esi), %ecx
+ movl 16(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 16(%ebx)
+ # Round 5
+ movl 20(%esi), %ecx
+ movl 20(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 20(%ebx)
+ # Round 6
+ movl 24(%esi), %ecx
+ movl 24(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 24(%ebx)
+ # Round 7
+ movl 28(%esi), %ecx
+ movl 28(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 28(%ebx)
+
+ addl $32, %esi
+ addl $32, %edi
+ addl $32, %ebx
+ subl $8, %ebp
+ jnz .L015aw_loop
+.L014aw_finish:
+ movl 32(%esp), %ebp
+ andl $7, %ebp
+ jz .L016aw_end
+ # Tail Round 0
+ movl (%esi), %ecx
+ movl (%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ decl %ebp
+ movl %ecx, (%ebx)
+ jz .L016aw_end
+ # Tail Round 1
+ movl 4(%esi), %ecx
+ movl 4(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ decl %ebp
+ movl %ecx, 4(%ebx)
+ jz .L016aw_end
+ # Tail Round 2
+ movl 8(%esi), %ecx
+ movl 8(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ decl %ebp
+ movl %ecx, 8(%ebx)
+ jz .L016aw_end
+ # Tail Round 3
+ movl 12(%esi), %ecx
+ movl 12(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ decl %ebp
+ movl %ecx, 12(%ebx)
+ jz .L016aw_end
+ # Tail Round 4
+ movl 16(%esi), %ecx
+ movl 16(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ decl %ebp
+ movl %ecx, 16(%ebx)
+ jz .L016aw_end
+ # Tail Round 5
+ movl 20(%esi), %ecx
+ movl 20(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ decl %ebp
+ movl %ecx, 20(%ebx)
+ jz .L016aw_end
+ # Tail Round 6
+ movl 24(%esi), %ecx
+ movl 24(%edi), %edx
+ subl %eax, %ecx
+ movl $0, %eax
+ adcl %eax, %eax
+ subl %edx, %ecx
+ adcl $0, %eax
+ movl %ecx, 24(%ebx)
+.L016aw_end:
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+.bn_sub_words_end:
+ .size bn_sub_words,.bn_sub_words_end-bn_sub_words
+.ident "bn_sub_words"
+.text
+ .align 16
+.globl bn_mul_comba8
+ .type bn_mul_comba8,@function
+bn_mul_comba8:
+ pushl %esi
+ movl 12(%esp), %esi
+ pushl %edi
+ movl 20(%esp), %edi
+ pushl %ebp
+ pushl %ebx
+ xorl %ebx, %ebx
+ movl (%esi), %eax
+ xorl %ecx, %ecx
+ movl (%edi), %edx
+ # ################## Calculate word 0
+ xorl %ebp, %ebp
+ # mul a[0]*b[0]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl (%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, (%eax)
+ movl 4(%esi), %eax
+ # saved r[0]
+ # ################## Calculate word 1
+ xorl %ebx, %ebx
+ # mul a[1]*b[0]
+ mull %edx
+ addl %eax, %ecx
+ movl (%esi), %eax
+ adcl %edx, %ebp
+ movl 4(%edi), %edx
+ adcl $0, %ebx
+ # mul a[0]*b[1]
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl (%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 4(%eax)
+ movl 8(%esi), %eax
+ # saved r[1]
+ # ################## Calculate word 2
+ xorl %ecx, %ecx
+ # mul a[2]*b[0]
+ mull %edx
+ addl %eax, %ebp
+ movl 4(%esi), %eax
+ adcl %edx, %ebx
+ movl 4(%edi), %edx
+ adcl $0, %ecx
+ # mul a[1]*b[1]
+ mull %edx
+ addl %eax, %ebp
+ movl (%esi), %eax
+ adcl %edx, %ebx
+ movl 8(%edi), %edx
+ adcl $0, %ecx
+ # mul a[0]*b[2]
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl (%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 8(%eax)
+ movl 12(%esi), %eax
+ # saved r[2]
+ # ################## Calculate word 3
+ xorl %ebp, %ebp
+ # mul a[3]*b[0]
+ mull %edx
+ addl %eax, %ebx
+ movl 8(%esi), %eax
+ adcl %edx, %ecx
+ movl 4(%edi), %edx
+ adcl $0, %ebp
+ # mul a[2]*b[1]
+ mull %edx
+ addl %eax, %ebx
+ movl 4(%esi), %eax
+ adcl %edx, %ecx
+ movl 8(%edi), %edx
+ adcl $0, %ebp
+ # mul a[1]*b[2]
+ mull %edx
+ addl %eax, %ebx
+ movl (%esi), %eax
+ adcl %edx, %ecx
+ movl 12(%edi), %edx
+ adcl $0, %ebp
+ # mul a[0]*b[3]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl (%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, 12(%eax)
+ movl 16(%esi), %eax
+ # saved r[3]
+ # ################## Calculate word 4
+ xorl %ebx, %ebx
+ # mul a[4]*b[0]
+ mull %edx
+ addl %eax, %ecx
+ movl 12(%esi), %eax
+ adcl %edx, %ebp
+ movl 4(%edi), %edx
+ adcl $0, %ebx
+ # mul a[3]*b[1]
+ mull %edx
+ addl %eax, %ecx
+ movl 8(%esi), %eax
+ adcl %edx, %ebp
+ movl 8(%edi), %edx
+ adcl $0, %ebx
+ # mul a[2]*b[2]
+ mull %edx
+ addl %eax, %ecx
+ movl 4(%esi), %eax
+ adcl %edx, %ebp
+ movl 12(%edi), %edx
+ adcl $0, %ebx
+ # mul a[1]*b[3]
+ mull %edx
+ addl %eax, %ecx
+ movl (%esi), %eax
+ adcl %edx, %ebp
+ movl 16(%edi), %edx
+ adcl $0, %ebx
+ # mul a[0]*b[4]
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl (%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 16(%eax)
+ movl 20(%esi), %eax
+ # saved r[4]
+ # ################## Calculate word 5
+ xorl %ecx, %ecx
+ # mul a[5]*b[0]
+ mull %edx
+ addl %eax, %ebp
+ movl 16(%esi), %eax
+ adcl %edx, %ebx
+ movl 4(%edi), %edx
+ adcl $0, %ecx
+ # mul a[4]*b[1]
+ mull %edx
+ addl %eax, %ebp
+ movl 12(%esi), %eax
+ adcl %edx, %ebx
+ movl 8(%edi), %edx
+ adcl $0, %ecx
+ # mul a[3]*b[2]
+ mull %edx
+ addl %eax, %ebp
+ movl 8(%esi), %eax
+ adcl %edx, %ebx
+ movl 12(%edi), %edx
+ adcl $0, %ecx
+ # mul a[2]*b[3]
+ mull %edx
+ addl %eax, %ebp
+ movl 4(%esi), %eax
+ adcl %edx, %ebx
+ movl 16(%edi), %edx
+ adcl $0, %ecx
+ # mul a[1]*b[4]
+ mull %edx
+ addl %eax, %ebp
+ movl (%esi), %eax
+ adcl %edx, %ebx
+ movl 20(%edi), %edx
+ adcl $0, %ecx
+ # mul a[0]*b[5]
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl (%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 20(%eax)
+ movl 24(%esi), %eax
+ # saved r[5]
+ # ################## Calculate word 6
+ xorl %ebp, %ebp
+ # mul a[6]*b[0]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esi), %eax
+ adcl %edx, %ecx
+ movl 4(%edi), %edx
+ adcl $0, %ebp
+ # mul a[5]*b[1]
+ mull %edx
+ addl %eax, %ebx
+ movl 16(%esi), %eax
+ adcl %edx, %ecx
+ movl 8(%edi), %edx
+ adcl $0, %ebp
+ # mul a[4]*b[2]
+ mull %edx
+ addl %eax, %ebx
+ movl 12(%esi), %eax
+ adcl %edx, %ecx
+ movl 12(%edi), %edx
+ adcl $0, %ebp
+ # mul a[3]*b[3]
+ mull %edx
+ addl %eax, %ebx
+ movl 8(%esi), %eax
+ adcl %edx, %ecx
+ movl 16(%edi), %edx
+ adcl $0, %ebp
+ # mul a[2]*b[4]
+ mull %edx
+ addl %eax, %ebx
+ movl 4(%esi), %eax
+ adcl %edx, %ecx
+ movl 20(%edi), %edx
+ adcl $0, %ebp
+ # mul a[1]*b[5]
+ mull %edx
+ addl %eax, %ebx
+ movl (%esi), %eax
+ adcl %edx, %ecx
+ movl 24(%edi), %edx
+ adcl $0, %ebp
+ # mul a[0]*b[6]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl (%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, 24(%eax)
+ movl 28(%esi), %eax
+ # saved r[6]
+ # ################## Calculate word 7
+ xorl %ebx, %ebx
+ # mul a[7]*b[0]
+ mull %edx
+ addl %eax, %ecx
+ movl 24(%esi), %eax
+ adcl %edx, %ebp
+ movl 4(%edi), %edx
+ adcl $0, %ebx
+ # mul a[6]*b[1]
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esi), %eax
+ adcl %edx, %ebp
+ movl 8(%edi), %edx
+ adcl $0, %ebx
+ # mul a[5]*b[2]
+ mull %edx
+ addl %eax, %ecx
+ movl 16(%esi), %eax
+ adcl %edx, %ebp
+ movl 12(%edi), %edx
+ adcl $0, %ebx
+ # mul a[4]*b[3]
+ mull %edx
+ addl %eax, %ecx
+ movl 12(%esi), %eax
+ adcl %edx, %ebp
+ movl 16(%edi), %edx
+ adcl $0, %ebx
+ # mul a[3]*b[4]
+ mull %edx
+ addl %eax, %ecx
+ movl 8(%esi), %eax
+ adcl %edx, %ebp
+ movl 20(%edi), %edx
+ adcl $0, %ebx
+ # mul a[2]*b[5]
+ mull %edx
+ addl %eax, %ecx
+ movl 4(%esi), %eax
+ adcl %edx, %ebp
+ movl 24(%edi), %edx
+ adcl $0, %ebx
+ # mul a[1]*b[6]
+ mull %edx
+ addl %eax, %ecx
+ movl (%esi), %eax
+ adcl %edx, %ebp
+ movl 28(%edi), %edx
+ adcl $0, %ebx
+ # mul a[0]*b[7]
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl 4(%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 28(%eax)
+ movl 28(%esi), %eax
+ # saved r[7]
+ # ################## Calculate word 8
+ xorl %ecx, %ecx
+ # mul a[7]*b[1]
+ mull %edx
+ addl %eax, %ebp
+ movl 24(%esi), %eax
+ adcl %edx, %ebx
+ movl 8(%edi), %edx
+ adcl $0, %ecx
+ # mul a[6]*b[2]
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esi), %eax
+ adcl %edx, %ebx
+ movl 12(%edi), %edx
+ adcl $0, %ecx
+ # mul a[5]*b[3]
+ mull %edx
+ addl %eax, %ebp
+ movl 16(%esi), %eax
+ adcl %edx, %ebx
+ movl 16(%edi), %edx
+ adcl $0, %ecx
+ # mul a[4]*b[4]
+ mull %edx
+ addl %eax, %ebp
+ movl 12(%esi), %eax
+ adcl %edx, %ebx
+ movl 20(%edi), %edx
+ adcl $0, %ecx
+ # mul a[3]*b[5]
+ mull %edx
+ addl %eax, %ebp
+ movl 8(%esi), %eax
+ adcl %edx, %ebx
+ movl 24(%edi), %edx
+ adcl $0, %ecx
+ # mul a[2]*b[6]
+ mull %edx
+ addl %eax, %ebp
+ movl 4(%esi), %eax
+ adcl %edx, %ebx
+ movl 28(%edi), %edx
+ adcl $0, %ecx
+ # mul a[1]*b[7]
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl 8(%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 32(%eax)
+ movl 28(%esi), %eax
+ # saved r[8]
+ # ################## Calculate word 9
+ xorl %ebp, %ebp
+ # mul a[7]*b[2]
+ mull %edx
+ addl %eax, %ebx
+ movl 24(%esi), %eax
+ adcl %edx, %ecx
+ movl 12(%edi), %edx
+ adcl $0, %ebp
+ # mul a[6]*b[3]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esi), %eax
+ adcl %edx, %ecx
+ movl 16(%edi), %edx
+ adcl $0, %ebp
+ # mul a[5]*b[4]
+ mull %edx
+ addl %eax, %ebx
+ movl 16(%esi), %eax
+ adcl %edx, %ecx
+ movl 20(%edi), %edx
+ adcl $0, %ebp
+ # mul a[4]*b[5]
+ mull %edx
+ addl %eax, %ebx
+ movl 12(%esi), %eax
+ adcl %edx, %ecx
+ movl 24(%edi), %edx
+ adcl $0, %ebp
+ # mul a[3]*b[6]
+ mull %edx
+ addl %eax, %ebx
+ movl 8(%esi), %eax
+ adcl %edx, %ecx
+ movl 28(%edi), %edx
+ adcl $0, %ebp
+ # mul a[2]*b[7]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl 12(%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, 36(%eax)
+ movl 28(%esi), %eax
+ # saved r[9]
+ # ################## Calculate word 10
+ xorl %ebx, %ebx
+ # mul a[7]*b[3]
+ mull %edx
+ addl %eax, %ecx
+ movl 24(%esi), %eax
+ adcl %edx, %ebp
+ movl 16(%edi), %edx
+ adcl $0, %ebx
+ # mul a[6]*b[4]
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esi), %eax
+ adcl %edx, %ebp
+ movl 20(%edi), %edx
+ adcl $0, %ebx
+ # mul a[5]*b[5]
+ mull %edx
+ addl %eax, %ecx
+ movl 16(%esi), %eax
+ adcl %edx, %ebp
+ movl 24(%edi), %edx
+ adcl $0, %ebx
+ # mul a[4]*b[6]
+ mull %edx
+ addl %eax, %ecx
+ movl 12(%esi), %eax
+ adcl %edx, %ebp
+ movl 28(%edi), %edx
+ adcl $0, %ebx
+ # mul a[3]*b[7]
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl 16(%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 40(%eax)
+ movl 28(%esi), %eax
+ # saved r[10]
+ # ################## Calculate word 11
+ xorl %ecx, %ecx
+ # mul a[7]*b[4]
+ mull %edx
+ addl %eax, %ebp
+ movl 24(%esi), %eax
+ adcl %edx, %ebx
+ movl 20(%edi), %edx
+ adcl $0, %ecx
+ # mul a[6]*b[5]
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esi), %eax
+ adcl %edx, %ebx
+ movl 24(%edi), %edx
+ adcl $0, %ecx
+ # mul a[5]*b[6]
+ mull %edx
+ addl %eax, %ebp
+ movl 16(%esi), %eax
+ adcl %edx, %ebx
+ movl 28(%edi), %edx
+ adcl $0, %ecx
+ # mul a[4]*b[7]
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl 20(%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 44(%eax)
+ movl 28(%esi), %eax
+ # saved r[11]
+ # ################## Calculate word 12
+ xorl %ebp, %ebp
+ # mul a[7]*b[5]
+ mull %edx
+ addl %eax, %ebx
+ movl 24(%esi), %eax
+ adcl %edx, %ecx
+ movl 24(%edi), %edx
+ adcl $0, %ebp
+ # mul a[6]*b[6]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esi), %eax
+ adcl %edx, %ecx
+ movl 28(%edi), %edx
+ adcl $0, %ebp
+ # mul a[5]*b[7]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl 24(%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, 48(%eax)
+ movl 28(%esi), %eax
+ # saved r[12]
+ # ################## Calculate word 13
+ xorl %ebx, %ebx
+ # mul a[7]*b[6]
+ mull %edx
+ addl %eax, %ecx
+ movl 24(%esi), %eax
+ adcl %edx, %ebp
+ movl 28(%edi), %edx
+ adcl $0, %ebx
+ # mul a[6]*b[7]
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl 28(%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 52(%eax)
+ movl 28(%esi), %eax
+ # saved r[13]
+ # ################## Calculate word 14
+ xorl %ecx, %ecx
+ # mul a[7]*b[7]
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ adcl $0, %ecx
+ movl %ebp, 56(%eax)
+ # saved r[14]
+ # save r[15]
+ movl %ebx, 60(%eax)
+ popl %ebx
+ popl %ebp
+ popl %edi
+ popl %esi
+ ret
+.bn_mul_comba8_end:
+ .size bn_mul_comba8,.bn_mul_comba8_end-bn_mul_comba8
+.ident "desasm.pl"
+.text
+ .align 16
+.globl bn_mul_comba4
+ .type bn_mul_comba4,@function
+bn_mul_comba4:
+ pushl %esi
+ movl 12(%esp), %esi
+ pushl %edi
+ movl 20(%esp), %edi
+ pushl %ebp
+ pushl %ebx
+ xorl %ebx, %ebx
+ movl (%esi), %eax
+ xorl %ecx, %ecx
+ movl (%edi), %edx
+ # ################## Calculate word 0
+ xorl %ebp, %ebp
+ # mul a[0]*b[0]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl (%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, (%eax)
+ movl 4(%esi), %eax
+ # saved r[0]
+ # ################## Calculate word 1
+ xorl %ebx, %ebx
+ # mul a[1]*b[0]
+ mull %edx
+ addl %eax, %ecx
+ movl (%esi), %eax
+ adcl %edx, %ebp
+ movl 4(%edi), %edx
+ adcl $0, %ebx
+ # mul a[0]*b[1]
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl (%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 4(%eax)
+ movl 8(%esi), %eax
+ # saved r[1]
+ # ################## Calculate word 2
+ xorl %ecx, %ecx
+ # mul a[2]*b[0]
+ mull %edx
+ addl %eax, %ebp
+ movl 4(%esi), %eax
+ adcl %edx, %ebx
+ movl 4(%edi), %edx
+ adcl $0, %ecx
+ # mul a[1]*b[1]
+ mull %edx
+ addl %eax, %ebp
+ movl (%esi), %eax
+ adcl %edx, %ebx
+ movl 8(%edi), %edx
+ adcl $0, %ecx
+ # mul a[0]*b[2]
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl (%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 8(%eax)
+ movl 12(%esi), %eax
+ # saved r[2]
+ # ################## Calculate word 3
+ xorl %ebp, %ebp
+ # mul a[3]*b[0]
+ mull %edx
+ addl %eax, %ebx
+ movl 8(%esi), %eax
+ adcl %edx, %ecx
+ movl 4(%edi), %edx
+ adcl $0, %ebp
+ # mul a[2]*b[1]
+ mull %edx
+ addl %eax, %ebx
+ movl 4(%esi), %eax
+ adcl %edx, %ecx
+ movl 8(%edi), %edx
+ adcl $0, %ebp
+ # mul a[1]*b[2]
+ mull %edx
+ addl %eax, %ebx
+ movl (%esi), %eax
+ adcl %edx, %ecx
+ movl 12(%edi), %edx
+ adcl $0, %ebp
+ # mul a[0]*b[3]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ movl 4(%edi), %edx
+ adcl $0, %ebp
+ movl %ebx, 12(%eax)
+ movl 12(%esi), %eax
+ # saved r[3]
+ # ################## Calculate word 4
+ xorl %ebx, %ebx
+ # mul a[3]*b[1]
+ mull %edx
+ addl %eax, %ecx
+ movl 8(%esi), %eax
+ adcl %edx, %ebp
+ movl 8(%edi), %edx
+ adcl $0, %ebx
+ # mul a[2]*b[2]
+ mull %edx
+ addl %eax, %ecx
+ movl 4(%esi), %eax
+ adcl %edx, %ebp
+ movl 12(%edi), %edx
+ adcl $0, %ebx
+ # mul a[1]*b[3]
+ mull %edx
+ addl %eax, %ecx
+ movl 20(%esp), %eax
+ adcl %edx, %ebp
+ movl 8(%edi), %edx
+ adcl $0, %ebx
+ movl %ecx, 16(%eax)
+ movl 12(%esi), %eax
+ # saved r[4]
+ # ################## Calculate word 5
+ xorl %ecx, %ecx
+ # mul a[3]*b[2]
+ mull %edx
+ addl %eax, %ebp
+ movl 8(%esi), %eax
+ adcl %edx, %ebx
+ movl 12(%edi), %edx
+ adcl $0, %ecx
+ # mul a[2]*b[3]
+ mull %edx
+ addl %eax, %ebp
+ movl 20(%esp), %eax
+ adcl %edx, %ebx
+ movl 12(%edi), %edx
+ adcl $0, %ecx
+ movl %ebp, 20(%eax)
+ movl 12(%esi), %eax
+ # saved r[5]
+ # ################## Calculate word 6
+ xorl %ebp, %ebp
+ # mul a[3]*b[3]
+ mull %edx
+ addl %eax, %ebx
+ movl 20(%esp), %eax
+ adcl %edx, %ecx
+ adcl $0, %ebp
+ movl %ebx, 24(%eax)
+ # saved r[6]
+ # save r[7]
+ movl %ecx, 28(%eax)
+ popl %ebx
+ popl %ebp
+ popl %edi
+ popl %esi
+ ret
+.bn_mul_comba4_end:
+ .size bn_mul_comba4,.bn_mul_comba4_end-bn_mul_comba4
+.ident "desasm.pl"
+.text
+ .align 16
+.globl bn_sqr_comba8
+ .type bn_sqr_comba8,@function
+bn_sqr_comba8:
+ pushl %esi
+ pushl %edi
+ pushl %ebp
+ pushl %ebx
+ movl 20(%esp), %edi
+ movl 24(%esp), %esi
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ movl (%esi), %eax
+ # ############### Calculate word 0
+ xorl %ebp, %ebp
+ # sqr a[0]*a[0]
+ mull %eax
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl (%esi), %edx
+ adcl $0, %ebp
+ movl %ebx, (%edi)
+ movl 4(%esi), %eax
+ # saved r[0]
+ # ############### Calculate word 1
+ xorl %ebx, %ebx
+ # sqr a[1]*a[0]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 8(%esi), %eax
+ adcl $0, %ebx
+ movl %ecx, 4(%edi)
+ movl (%esi), %edx
+ # saved r[1]
+ # ############### Calculate word 2
+ xorl %ecx, %ecx
+ # sqr a[2]*a[0]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 4(%esi), %eax
+ adcl $0, %ecx
+ # sqr a[1]*a[1]
+ mull %eax
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl (%esi), %edx
+ adcl $0, %ecx
+ movl %ebp, 8(%edi)
+ movl 12(%esi), %eax
+ # saved r[2]
+ # ############### Calculate word 3
+ xorl %ebp, %ebp
+ # sqr a[3]*a[0]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 8(%esi), %eax
+ adcl $0, %ebp
+ movl 4(%esi), %edx
+ # sqr a[2]*a[1]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 16(%esi), %eax
+ adcl $0, %ebp
+ movl %ebx, 12(%edi)
+ movl (%esi), %edx
+ # saved r[3]
+ # ############### Calculate word 4
+ xorl %ebx, %ebx
+ # sqr a[4]*a[0]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 12(%esi), %eax
+ adcl $0, %ebx
+ movl 4(%esi), %edx
+ # sqr a[3]*a[1]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 8(%esi), %eax
+ adcl $0, %ebx
+ # sqr a[2]*a[2]
+ mull %eax
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl (%esi), %edx
+ adcl $0, %ebx
+ movl %ecx, 16(%edi)
+ movl 20(%esi), %eax
+ # saved r[4]
+ # ############### Calculate word 5
+ xorl %ecx, %ecx
+ # sqr a[5]*a[0]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 16(%esi), %eax
+ adcl $0, %ecx
+ movl 4(%esi), %edx
+ # sqr a[4]*a[1]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 12(%esi), %eax
+ adcl $0, %ecx
+ movl 8(%esi), %edx
+ # sqr a[3]*a[2]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 24(%esi), %eax
+ adcl $0, %ecx
+ movl %ebp, 20(%edi)
+ movl (%esi), %edx
+ # saved r[5]
+ # ############### Calculate word 6
+ xorl %ebp, %ebp
+ # sqr a[6]*a[0]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 20(%esi), %eax
+ adcl $0, %ebp
+ movl 4(%esi), %edx
+ # sqr a[5]*a[1]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 16(%esi), %eax
+ adcl $0, %ebp
+ movl 8(%esi), %edx
+ # sqr a[4]*a[2]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 12(%esi), %eax
+ adcl $0, %ebp
+ # sqr a[3]*a[3]
+ mull %eax
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl (%esi), %edx
+ adcl $0, %ebp
+ movl %ebx, 24(%edi)
+ movl 28(%esi), %eax
+ # saved r[6]
+ # ############### Calculate word 7
+ xorl %ebx, %ebx
+ # sqr a[7]*a[0]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 24(%esi), %eax
+ adcl $0, %ebx
+ movl 4(%esi), %edx
+ # sqr a[6]*a[1]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 20(%esi), %eax
+ adcl $0, %ebx
+ movl 8(%esi), %edx
+ # sqr a[5]*a[2]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 16(%esi), %eax
+ adcl $0, %ebx
+ movl 12(%esi), %edx
+ # sqr a[4]*a[3]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 28(%esi), %eax
+ adcl $0, %ebx
+ movl %ecx, 28(%edi)
+ movl 4(%esi), %edx
+ # saved r[7]
+ # ############### Calculate word 8
+ xorl %ecx, %ecx
+ # sqr a[7]*a[1]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 24(%esi), %eax
+ adcl $0, %ecx
+ movl 8(%esi), %edx
+ # sqr a[6]*a[2]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 20(%esi), %eax
+ adcl $0, %ecx
+ movl 12(%esi), %edx
+ # sqr a[5]*a[3]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 16(%esi), %eax
+ adcl $0, %ecx
+ # sqr a[4]*a[4]
+ mull %eax
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 8(%esi), %edx
+ adcl $0, %ecx
+ movl %ebp, 32(%edi)
+ movl 28(%esi), %eax
+ # saved r[8]
+ # ############### Calculate word 9
+ xorl %ebp, %ebp
+ # sqr a[7]*a[2]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 24(%esi), %eax
+ adcl $0, %ebp
+ movl 12(%esi), %edx
+ # sqr a[6]*a[3]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 20(%esi), %eax
+ adcl $0, %ebp
+ movl 16(%esi), %edx
+ # sqr a[5]*a[4]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 28(%esi), %eax
+ adcl $0, %ebp
+ movl %ebx, 36(%edi)
+ movl 12(%esi), %edx
+ # saved r[9]
+ # ############### Calculate word 10
+ xorl %ebx, %ebx
+ # sqr a[7]*a[3]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 24(%esi), %eax
+ adcl $0, %ebx
+ movl 16(%esi), %edx
+ # sqr a[6]*a[4]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 20(%esi), %eax
+ adcl $0, %ebx
+ # sqr a[5]*a[5]
+ mull %eax
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 16(%esi), %edx
+ adcl $0, %ebx
+ movl %ecx, 40(%edi)
+ movl 28(%esi), %eax
+ # saved r[10]
+ # ############### Calculate word 11
+ xorl %ecx, %ecx
+ # sqr a[7]*a[4]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 24(%esi), %eax
+ adcl $0, %ecx
+ movl 20(%esi), %edx
+ # sqr a[6]*a[5]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 28(%esi), %eax
+ adcl $0, %ecx
+ movl %ebp, 44(%edi)
+ movl 20(%esi), %edx
+ # saved r[11]
+ # ############### Calculate word 12
+ xorl %ebp, %ebp
+ # sqr a[7]*a[5]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 24(%esi), %eax
+ adcl $0, %ebp
+ # sqr a[6]*a[6]
+ mull %eax
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 24(%esi), %edx
+ adcl $0, %ebp
+ movl %ebx, 48(%edi)
+ movl 28(%esi), %eax
+ # saved r[12]
+ # ############### Calculate word 13
+ xorl %ebx, %ebx
+ # sqr a[7]*a[6]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 28(%esi), %eax
+ adcl $0, %ebx
+ movl %ecx, 52(%edi)
+ # saved r[13]
+ # ############### Calculate word 14
+ xorl %ecx, %ecx
+ # sqr a[7]*a[7]
+ mull %eax
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ adcl $0, %ecx
+ movl %ebp, 56(%edi)
+ # saved r[14]
+ movl %ebx, 60(%edi)
+ popl %ebx
+ popl %ebp
+ popl %edi
+ popl %esi
+ ret
+.bn_sqr_comba8_end:
+ .size bn_sqr_comba8,.bn_sqr_comba8_end-bn_sqr_comba8
+.ident "desasm.pl"
+.text
+ .align 16
+.globl bn_sqr_comba4
+ .type bn_sqr_comba4,@function
+bn_sqr_comba4:
+ pushl %esi
+ pushl %edi
+ pushl %ebp
+ pushl %ebx
+ movl 20(%esp), %edi
+ movl 24(%esp), %esi
+ xorl %ebx, %ebx
+ xorl %ecx, %ecx
+ movl (%esi), %eax
+ # ############### Calculate word 0
+ xorl %ebp, %ebp
+ # sqr a[0]*a[0]
+ mull %eax
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl (%esi), %edx
+ adcl $0, %ebp
+ movl %ebx, (%edi)
+ movl 4(%esi), %eax
+ # saved r[0]
+ # ############### Calculate word 1
+ xorl %ebx, %ebx
+ # sqr a[1]*a[0]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 8(%esi), %eax
+ adcl $0, %ebx
+ movl %ecx, 4(%edi)
+ movl (%esi), %edx
+ # saved r[1]
+ # ############### Calculate word 2
+ xorl %ecx, %ecx
+ # sqr a[2]*a[0]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 4(%esi), %eax
+ adcl $0, %ecx
+ # sqr a[1]*a[1]
+ mull %eax
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl (%esi), %edx
+ adcl $0, %ecx
+ movl %ebp, 8(%edi)
+ movl 12(%esi), %eax
+ # saved r[2]
+ # ############### Calculate word 3
+ xorl %ebp, %ebp
+ # sqr a[3]*a[0]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 8(%esi), %eax
+ adcl $0, %ebp
+ movl 4(%esi), %edx
+ # sqr a[2]*a[1]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebp
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ movl 12(%esi), %eax
+ adcl $0, %ebp
+ movl %ebx, 12(%edi)
+ movl 4(%esi), %edx
+ # saved r[3]
+ # ############### Calculate word 4
+ xorl %ebx, %ebx
+ # sqr a[3]*a[1]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ebx
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 8(%esi), %eax
+ adcl $0, %ebx
+ # sqr a[2]*a[2]
+ mull %eax
+ addl %eax, %ecx
+ adcl %edx, %ebp
+ movl 8(%esi), %edx
+ adcl $0, %ebx
+ movl %ecx, 16(%edi)
+ movl 12(%esi), %eax
+ # saved r[4]
+ # ############### Calculate word 5
+ xorl %ecx, %ecx
+ # sqr a[3]*a[2]
+ mull %edx
+ addl %eax, %eax
+ adcl %edx, %edx
+ adcl $0, %ecx
+ addl %eax, %ebp
+ adcl %edx, %ebx
+ movl 12(%esi), %eax
+ adcl $0, %ecx
+ movl %ebp, 20(%edi)
+ # saved r[5]
+ # ############### Calculate word 6
+ xorl %ebp, %ebp
+ # sqr a[3]*a[3]
+ mull %eax
+ addl %eax, %ebx
+ adcl %edx, %ecx
+ adcl $0, %ebp
+ movl %ebx, 24(%edi)
+ # saved r[6]
+ movl %ecx, 28(%edi)
+ popl %ebx
+ popl %ebp
+ popl %edi
+ popl %esi
+ ret
+.bn_sqr_comba4_end:
+ .size bn_sqr_comba4,.bn_sqr_comba4_end-bn_sqr_comba4
+.ident "desasm.pl"
diff --git a/crypto/bn/asm/f.s b/crypto/bn/asm/f.s
new file mode 100644
index 0000000000..2f8f63c690
--- /dev/null
+++ b/crypto/bn/asm/f.s
@@ -0,0 +1,1773 @@
+ # Don't even think of reading this code
+ # It was automatically generated by bn-586.pl
+ # Which is a perl program used to generate the alpha assember.
+ # eric <eay@cryptsoft.com>
+
+ # DEC Alpha assember
+ # Generated from perl scripts contains in SSLeay
+ .file 1 "bn-586.s"
+ .set noat
+ .text
+ .align 3
+ .globl bn_mul_words
+ .ent bn_mul_words
+bn_mul_words:
+bn_mul_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $18, 4, $18
+ bis $31, $31, $0
+ br $100
+ blt $18, $100
+ ldq $1, 0($17)
+ ldq $2, 0($16)
+$101:
+ ldq $3, 0($17)
+ mulq $3, $19, $4
+ addq $17, 8, $17
+ umulh $3, $19, $5
+ addq $4, $0, $4
+ addq $16, 8, $16
+ subq $18, 1, $18
+ cmpult $4, $0, $0
+ stq $4, -8($16)
+ addq $5, $0, $0
+ bgt $18, $101
+ ret $31,($26),1
+$100:
+ addq $18, 4, $18
+ bgt $18, $101
+$102:
+ ret $31,($26),1
+ .end bn_mul_words
+ .text
+ .align 3
+ .globl bn_sqr_words
+ .ent bn_sqr_words
+bn_sqr_words:
+bn_sqr_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $18, 4, $18
+ bis $31, $31, $0
+ br $103
+ blt $18, $103
+ ldq $1, 0($17)
+ ldq $2, 0($16)
+$104:
+ ldq $3, 0($17)
+ mulq $3, $3, $4
+ addq $17, 8, $17
+ addq $16, 16, $16
+ subq $18, 1, $18
+ umulh $3, $3, $5
+ stq $4, -16($16)
+ stq $5, -8($16)
+ bgt $18, $104
+ ret $31,($26),1
+$103:
+ addq $18, 4, $18
+ bgt $18, $104
+$105:
+ ret $31,($26),1
+ .end bn_sqr_words
+ .text
+ .align 3
+ .globl bn_mul_add_words
+ .ent bn_mul_add_words
+bn_mul_add_words:
+bn_mul_add_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $18, 4, $18
+ bis $31, $31, $0
+ br $106
+ blt $18, $106
+ ldq $1, 0($17)
+ ldq $2, 0($16)
+$107:
+ ldq $3, 0($17)
+ ldq $4, 0($16)
+ mulq $3, $19, $5
+ subq $18, 1, $18
+ addq $17, 8, $17
+ umulh $3, $19, $6
+ addq $4, $5, $4
+ addq $16, 8, $16
+ cmpult $4, $5, $7
+ addq $4, $0, $4
+ addq $6, $7, $6
+ cmpult $4, $0, $0
+ stq $4, -8($16)
+ addq $6, $0, $0
+ bgt $18, $107
+ ret $31,($26),1
+$106:
+ addq $18, 4, $18
+ bgt $18, $107
+$108:
+ ret $31,($26),1
+ .end bn_mul_add_words
+ .text
+ .align 3
+ .globl bn_add_words
+ .ent bn_add_words
+bn_add_words:
+bn_add_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $19, 4, $19
+ bis $31, $31, $0
+ br $109
+ blt $19, $109
+ ldq $1, 0($17)
+ ldq $2, 0($18)
+$110:
+ ldq $3, 8($17)
+ ldq $4, 8($18)
+ ldq $5, 16($17)
+ ldq $6, 16($18)
+ ldq $7, 24($17)
+ ldq $8, 24($18)
+ addq $1, $2, $22
+ cmpult $22, $2, $23
+ addq $22, $0, $22
+ cmpult $22, $0, $0
+ addq $0, $23, $0
+ addq $3, $4, $25
+ cmpult $25, $4, $24
+ addq $25, $0, $25
+ cmpult $25, $0, $0
+ addq $0, $24, $0
+ addq $5, $6, $28
+ cmpult $28, $6, $27
+ addq $28, $0, $28
+ cmpult $28, $0, $0
+ addq $0, $27, $0
+ addq $7, $8, $20
+ cmpult $20, $8, $21
+ addq $20, $0, $20
+ cmpult $20, $0, $0
+ addq $0, $21, $0
+ stq $22, 0($16)
+ stq $25, 0($16)
+ stq $28, 0($16)
+ stq $20, 0($16)
+ subq $19, 4, $19
+ addq $17, 32, $17
+ addq $18, 32, $18
+ addq $16, 32, $16
+ blt $19, $109
+ ldq $1, 0($17)
+ ldq $2, 0($18)
+ br $110
+$111:
+ ldq $1, 0($17)
+ ldq $2, 0($18)
+ addq $1, $2, $3
+ cmpult $3, $2, $23
+ addq $3, $0, $3
+ cmpult $3, $0, $0
+ addq $0, $23, $0
+ stq $3, 0($16)
+ addq $17, 8, $17
+ addq $18, 8, $18
+ addq $16, 8, $16
+ subq $19, 1, $19
+ bgt $19, $111
+ ret $31,($26),1
+$109:
+ addq $19, 4, $19
+ bgt $19, $111
+$112:
+ ret $31,($26),1
+ .end bn_add_words
+ .text
+ .align 3
+ .globl bn_sub_words
+ .ent bn_sub_words
+bn_sub_words:
+bn_sub_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $19, 4, $19
+ bis $31, $31, $0
+ blt $19, $113
+ ldq $1, 0($17)
+ ldq $2, 0($18)
+$114:
+ ldq $3, 8($17)
+ cmpult $1, $2, $4
+ ldq $5, 8($18)
+ subq $1, $2, $1
+ ldq $6, 16($17)
+ cmpult $1, $0, $2
+ ldq $7, 16($18)
+ subq $1, $0, $23
+ ldq $8, 24($17)
+ addq $2, $4, $0
+ cmpult $3, $5, $24
+ subq $3, $5, $3
+ ldq $22, 24($18)
+ cmpult $3, $0, $5
+ subq $3, $0, $25
+ addq $5, $24, $0
+ cmpult $6, $7, $27
+ subq $6, $7, $6
+ stq $23, 0($16)
+ cmpult $6, $0, $7
+ subq $6, $0, $28
+ addq $7, $27, $0
+ cmpult $8, $22, $21
+ subq $8, $22, $8
+ stq $25, 8($16)
+ cmpult $8, $0, $22
+ subq $8, $0, $20
+ addq $22, $21, $0
+ stq $28, 16($16)
+ subq $19, 4, $19
+ stq $20, 24($16)
+ addq $17, 32, $17
+ addq $18, 32, $18
+ addq $16, 32, $16
+ blt $19, $113
+ ldq $1, 0($17)
+ ldq $2, 0($18)
+ br $114
+$115:
+ ldq $1, 0($17)
+ ldq $2, 0($18)
+ cmpult $1, $2, $27
+ subq $1, $2, $1
+ cmpult $1, $0, $2
+ subq $1, $0, $1
+ stq $1, 0($16)
+ addq $2, $27, $0
+ addq $17, 8, $17
+ addq $18, 8, $18
+ addq $16, 8, $16
+ subq $19, 1, $19
+ bgt $19, $115
+ ret $31,($26),1
+$113:
+ addq $19, 4, $19
+ bgt $19, $115
+$116:
+ ret $31,($26),1
+ .end bn_sub_words
+ #
+ # What follows was taken directly from the C compiler with a few
+ # hacks to redo the lables.
+ #
+.text
+ .align 3
+ .globl bn_div64
+ .ent bn_div64
+bn_div64:
+ ldgp $29,0($27)
+bn_div64..ng:
+ lda $30,-48($30)
+ .frame $30,48,$26,0
+ stq $26,0($30)
+ stq $9,8($30)
+ stq $10,16($30)
+ stq $11,24($30)
+ stq $12,32($30)
+ stq $13,40($30)
+ .mask 0x4003e00,-48
+ .prologue 1
+ bis $16,$16,$9
+ bis $17,$17,$10
+ bis $18,$18,$11
+ bis $31,$31,$13
+ bis $31,2,$12
+ bne $11,$9119
+ lda $0,-1
+ br $31,$9136
+ .align 4
+$9119:
+ bis $11,$11,$16
+ jsr $26,BN_num_bits_word
+ ldgp $29,0($26)
+ subq $0,64,$1
+ beq $1,$9120
+ bis $31,1,$1
+ sll $1,$0,$1
+ cmpule $9,$1,$1
+ bne $1,$9120
+ # lda $16,_IO_stderr_
+ # lda $17,$C32
+ # bis $0,$0,$18
+ # jsr $26,fprintf
+ # ldgp $29,0($26)
+ jsr $26,abort
+ ldgp $29,0($26)
+ .align 4
+$9120:
+ bis $31,64,$3
+ cmpult $9,$11,$2
+ subq $3,$0,$1
+ addl $1,$31,$0
+ subq $9,$11,$1
+ cmoveq $2,$1,$9
+ beq $0,$9122
+ zapnot $0,15,$2
+ subq $3,$0,$1
+ sll $11,$2,$11
+ sll $9,$2,$3
+ srl $10,$1,$1
+ sll $10,$2,$10
+ bis $3,$1,$9
+$9122:
+ srl $11,32,$5
+ zapnot $11,15,$6
+ lda $7,-1
+ .align 5
+$9123:
+ srl $9,32,$1
+ subq $1,$5,$1
+ bne $1,$9126
+ zapnot $7,15,$27
+ br $31,$9127
+ .align 4
+$9126:
+ bis $9,$9,$24
+ bis $5,$5,$25
+ divqu $24,$25,$27
+$9127:
+ srl $10,32,$4
+ .align 5
+$9128:
+ mulq $27,$5,$1
+ subq $9,$1,$3
+ zapnot $3,240,$1
+ bne $1,$9129
+ mulq $6,$27,$2
+ sll $3,32,$1
+ addq $1,$4,$1
+ cmpule $2,$1,$2
+ bne $2,$9129
+ subq $27,1,$27
+ br $31,$9128
+ .align 4
+$9129:
+ mulq $27,$6,$1
+ mulq $27,$5,$4
+ srl $1,32,$3
+ sll $1,32,$1
+ addq $4,$3,$4
+ cmpult $10,$1,$2
+ subq $10,$1,$10
+ addq $2,$4,$2
+ cmpult $9,$2,$1
+ bis $2,$2,$4
+ beq $1,$9134
+ addq $9,$11,$9
+ subq $27,1,$27
+$9134:
+ subl $12,1,$12
+ subq $9,$4,$9
+ beq $12,$9124
+ sll $27,32,$13
+ sll $9,32,$2
+ srl $10,32,$1
+ sll $10,32,$10
+ bis $2,$1,$9
+ br $31,$9123
+ .align 4
+$9124:
+ bis $13,$27,$0
+$9136:
+ ldq $26,0($30)
+ ldq $9,8($30)
+ ldq $10,16($30)
+ ldq $11,24($30)
+ ldq $12,32($30)
+ ldq $13,40($30)
+ addq $30,48,$30
+ ret $31,($26),1
+ .end bn_div64
+ .text
+ .align 3
+ .globl bn_mul_comba8
+ .ent bn_mul_comba8
+bn_mul_comba8:
+bn_mul_comba8..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $30, 16, $30
+ ldq $0, 0($17)
+ ldq $1, 0($18)
+ stq $9, 0($30)
+ stq $10, 8($30)
+ ldq $2, 8($17)
+ ldq $3, 8($18)
+ ldq $4, 16($17)
+ ldq $5, 16($18)
+ ldq $6, 24($17)
+ ldq $7, 24($18)
+ ldq $8, 8($17)
+ ldq $22, 8($18)
+ ldq $23, 8($17)
+ ldq $24, 8($18)
+ ldq $25, 8($17)
+ ldq $27, 8($18)
+ ldq $28, 8($17)
+ ldq $21, 8($18)
+ bis $31, $31, $9
+ mulq $0, $1, $20
+ umulh $0, $1, $19
+ stq $20, 0($16)
+ bis $31, $31, $10
+ mulq $0, $3, $17
+ umulh $0, $3, $18
+ addq $19, $17, $19
+ cmpult $19, $17, $20
+ addq $20, $18, $18
+ addq $9, $18, $9
+ cmpult $9, $18, $17
+ addq $10, $17, $10
+ mulq $2, $1, $20
+ umulh $2, $1, $18
+ addq $19, $20, $19
+ cmpult $19, $20, $17
+ addq $17, $18, $18
+ addq $9, $18, $9
+ cmpult $9, $18, $20
+ addq $10, $20, $10
+ stq $19, 8($16)
+ bis $31, $31, $17
+ mulq $0, $5, $18
+ umulh $0, $5, $20
+ addq $9, $18, $9
+ cmpult $9, $18, $19
+ addq $19, $20, $20
+ addq $10, $20, $10
+ cmpult $10, $20, $18
+ addq $17, $18, $17
+ mulq $2, $3, $19
+ umulh $2, $3, $20
+ addq $9, $19, $9
+ cmpult $9, $19, $18
+ addq $18, $20, $20
+ addq $10, $20, $10
+ cmpult $10, $20, $19
+ addq $17, $19, $17
+ mulq $4, $1, $18
+ umulh $4, $1, $20
+ addq $9, $18, $9
+ cmpult $9, $18, $19
+ addq $19, $20, $20
+ addq $10, $20, $10
+ cmpult $10, $20, $18
+ addq $17, $18, $17
+ stq $9, 16($16)
+ bis $31, $31, $19
+ mulq $0, $7, $20
+ umulh $0, $7, $18
+ addq $10, $20, $10
+ cmpult $10, $20, $9
+ addq $9, $18, $18
+ addq $17, $18, $17
+ cmpult $17, $18, $20
+ addq $19, $20, $19
+ mulq $2, $5, $9
+ umulh $2, $5, $18
+ addq $10, $9, $10
+ cmpult $10, $9, $20
+ addq $20, $18, $18
+ addq $17, $18, $17
+ cmpult $17, $18, $9
+ addq $19, $9, $19
+ mulq $4, $3, $20
+ umulh $4, $3, $18
+ addq $10, $20, $10
+ cmpult $10, $20, $9
+ addq $9, $18, $18
+ addq $17, $18, $17
+ cmpult $17, $18, $20
+ addq $19, $20, $19
+ mulq $6, $1, $9
+ umulh $6, $1, $18
+ addq $10, $9, $10
+ cmpult $10, $9, $20
+ addq $20, $18, $18
+ addq $17, $18, $17
+ cmpult $17, $18, $9
+ addq $19, $9, $19
+ stq $10, 24($16)
+ bis $31, $31, $20
+ mulq $0, $22, $18
+ umulh $0, $22, $9
+ addq $17, $18, $17
+ cmpult $17, $18, $10
+ addq $10, $9, $9
+ addq $19, $9, $19
+ cmpult $19, $9, $18
+ addq $20, $18, $20
+ mulq $2, $7, $10
+ umulh $2, $7, $9
+ addq $17, $10, $17
+ cmpult $17, $10, $18
+ addq $18, $9, $9
+ addq $19, $9, $19
+ cmpult $19, $9, $10
+ addq $20, $10, $20
+ mulq $4, $5, $18
+ umulh $4, $5, $9
+ addq $17, $18, $17
+ cmpult $17, $18, $10
+ addq $10, $9, $9
+ addq $19, $9, $19
+ cmpult $19, $9, $18
+ addq $20, $18, $20
+ mulq $6, $3, $10
+ umulh $6, $3, $9
+ addq $17, $10, $17
+ cmpult $17, $10, $18
+ addq $18, $9, $9
+ addq $19, $9, $19
+ cmpult $19, $9, $10
+ addq $20, $10, $20
+ mulq $8, $1, $18
+ umulh $8, $1, $9
+ addq $17, $18, $17
+ cmpult $17, $18, $10
+ addq $10, $9, $9
+ addq $19, $9, $19
+ cmpult $19, $9, $18
+ addq $20, $18, $20
+ stq $17, 32($16)
+ bis $31, $31, $10
+ mulq $0, $24, $9
+ umulh $0, $24, $18
+ addq $19, $9, $19
+ cmpult $19, $9, $17
+ addq $17, $18, $18
+ addq $20, $18, $20
+ cmpult $20, $18, $9
+ addq $10, $9, $10
+ mulq $2, $22, $17
+ umulh $2, $22, $18
+ addq $19, $17, $19
+ cmpult $19, $17, $9
+ addq $9, $18, $18
+ addq $20, $18, $20
+ cmpult $20, $18, $17
+ addq $10, $17, $10
+ mulq $4, $7, $9
+ umulh $4, $7, $18
+ addq $19, $9, $19
+ cmpult $19, $9, $17
+ addq $17, $18, $18
+ addq $20, $18, $20
+ cmpult $20, $18, $9
+ addq $10, $9, $10
+ mulq $6, $5, $17
+ umulh $6, $5, $18
+ addq $19, $17, $19
+ cmpult $19, $17, $9
+ addq $9, $18, $18
+ addq $20, $18, $20
+ cmpult $20, $18, $17
+ addq $10, $17, $10
+ mulq $8, $3, $9
+ umulh $8, $3, $18
+ addq $19, $9, $19
+ cmpult $19, $9, $17
+ addq $17, $18, $18
+ addq $20, $18, $20
+ cmpult $20, $18, $9
+ addq $10, $9, $10
+ mulq $23, $1, $17
+ umulh $23, $1, $18
+ addq $19, $17, $19
+ cmpult $19, $17, $9
+ addq $9, $18, $18
+ addq $20, $18, $20
+ cmpult $20, $18, $17
+ addq $10, $17, $10
+ stq $19, 40($16)
+ bis $31, $31, $9
+ mulq $0, $27, $18
+ umulh $0, $27, $17
+ addq $20, $18, $20
+ cmpult $20, $18, $19
+ addq $19, $17, $17
+ addq $10, $17, $10
+ cmpult $10, $17, $18
+ addq $9, $18, $9
+ mulq $2, $24, $19
+ umulh $2, $24, $17
+ addq $20, $19, $20
+ cmpult $20, $19, $18
+ addq $18, $17, $17
+ addq $10, $17, $10
+ cmpult $10, $17, $19
+ addq $9, $19, $9
+ mulq $4, $22, $18
+ umulh $4, $22, $17
+ addq $20, $18, $20
+ cmpult $20, $18, $19
+ addq $19, $17, $17
+ addq $10, $17, $10
+ cmpult $10, $17, $18
+ addq $9, $18, $9
+ mulq $6, $7, $19
+ umulh $6, $7, $17
+ addq $20, $19, $20
+ cmpult $20, $19, $18
+ addq $18, $17, $17
+ addq $10, $17, $10
+ cmpult $10, $17, $19
+ addq $9, $19, $9
+ mulq $8, $5, $18
+ umulh $8, $5, $17
+ addq $20, $18, $20
+ cmpult $20, $18, $19
+ addq $19, $17, $17
+ addq $10, $17, $10
+ cmpult $10, $17, $18
+ addq $9, $18, $9
+ mulq $23, $3, $19
+ umulh $23, $3, $17
+ addq $20, $19, $20
+ cmpult $20, $19, $18
+ addq $18, $17, $17
+ addq $10, $17, $10
+ cmpult $10, $17, $19
+ addq $9, $19, $9
+ mulq $25, $1, $18
+ umulh $25, $1, $17
+ addq $20, $18, $20
+ cmpult $20, $18, $19
+ addq $19, $17, $17
+ addq $10, $17, $10
+ cmpult $10, $17, $18
+ addq $9, $18, $9
+ stq $20, 48($16)
+ bis $31, $31, $19
+ mulq $0, $21, $17
+ umulh $0, $21, $18
+ addq $10, $17, $10
+ cmpult $10, $17, $20
+ addq $20, $18, $18
+ addq $9, $18, $9
+ cmpult $9, $18, $17
+ addq $19, $17, $19
+ mulq $2, $27, $20
+ umulh $2, $27, $18
+ addq $10, $20, $10
+ cmpult $10, $20, $17
+ addq $17, $18, $18
+ addq $9, $18, $9
+ cmpult $9, $18, $0
+ addq $19, $0, $19
+ mulq $4, $24, $20
+ umulh $4, $24, $17
+ addq $10, $20, $10
+ cmpult $10, $20, $18
+ addq $18, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $0
+ addq $19, $0, $19
+ mulq $6, $22, $20
+ umulh $6, $22, $18
+ addq $10, $20, $10
+ cmpult $10, $20, $17
+ addq $17, $18, $18
+ addq $9, $18, $9
+ cmpult $9, $18, $0
+ addq $19, $0, $19
+ mulq $8, $7, $20
+ umulh $8, $7, $17
+ addq $10, $20, $10
+ cmpult $10, $20, $18
+ addq $18, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $0
+ addq $19, $0, $19
+ mulq $23, $5, $20
+ umulh $23, $5, $18
+ addq $10, $20, $10
+ cmpult $10, $20, $17
+ addq $17, $18, $18
+ addq $9, $18, $9
+ cmpult $9, $18, $0
+ addq $19, $0, $19
+ mulq $25, $3, $20
+ umulh $25, $3, $17
+ addq $10, $20, $10
+ cmpult $10, $20, $18
+ addq $18, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $0
+ addq $19, $0, $19
+ mulq $28, $1, $20
+ umulh $28, $1, $18
+ addq $10, $20, $10
+ cmpult $10, $20, $17
+ addq $17, $18, $18
+ addq $9, $18, $9
+ cmpult $9, $18, $0
+ addq $19, $0, $19
+ stq $10, 56($16)
+ bis $31, $31, $20
+ mulq $2, $21, $17
+ umulh $2, $21, $18
+ addq $9, $17, $9
+ cmpult $9, $17, $0
+ addq $0, $18, $18
+ addq $19, $18, $19
+ cmpult $19, $18, $1
+ addq $20, $1, $20
+ mulq $4, $27, $10
+ umulh $4, $27, $17
+ addq $9, $10, $9
+ cmpult $9, $10, $0
+ addq $0, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $18
+ addq $20, $18, $20
+ mulq $6, $24, $1
+ umulh $6, $24, $2
+ addq $9, $1, $9
+ cmpult $9, $1, $10
+ addq $10, $2, $2
+ addq $19, $2, $19
+ cmpult $19, $2, $0
+ addq $20, $0, $20
+ mulq $8, $22, $17
+ umulh $8, $22, $18
+ addq $9, $17, $9
+ cmpult $9, $17, $1
+ addq $1, $18, $18
+ addq $19, $18, $19
+ cmpult $19, $18, $10
+ addq $20, $10, $20
+ mulq $23, $7, $2
+ umulh $23, $7, $0
+ addq $9, $2, $9
+ cmpult $9, $2, $17
+ addq $17, $0, $0
+ addq $19, $0, $19
+ cmpult $19, $0, $1
+ addq $20, $1, $20
+ mulq $25, $5, $18
+ umulh $25, $5, $10
+ addq $9, $18, $9
+ cmpult $9, $18, $2
+ addq $2, $10, $10
+ addq $19, $10, $19
+ cmpult $19, $10, $17
+ addq $20, $17, $20
+ mulq $28, $3, $0
+ umulh $28, $3, $1
+ addq $9, $0, $9
+ cmpult $9, $0, $18
+ addq $18, $1, $1
+ addq $19, $1, $19
+ cmpult $19, $1, $2
+ addq $20, $2, $20
+ stq $9, 64($16)
+ bis $31, $31, $10
+ mulq $4, $21, $17
+ umulh $4, $21, $0
+ addq $19, $17, $19
+ cmpult $19, $17, $18
+ addq $18, $0, $0
+ addq $20, $0, $20
+ cmpult $20, $0, $1
+ addq $10, $1, $10
+ mulq $6, $27, $2
+ umulh $6, $27, $3
+ addq $19, $2, $19
+ cmpult $19, $2, $9
+ addq $9, $3, $3
+ addq $20, $3, $20
+ cmpult $20, $3, $17
+ addq $10, $17, $10
+ mulq $8, $24, $18
+ umulh $8, $24, $0
+ addq $19, $18, $19
+ cmpult $19, $18, $1
+ addq $1, $0, $0
+ addq $20, $0, $20
+ cmpult $20, $0, $4
+ addq $10, $4, $10
+ mulq $23, $22, $2
+ umulh $23, $22, $9
+ addq $19, $2, $19
+ cmpult $19, $2, $3
+ addq $3, $9, $9
+ addq $20, $9, $20
+ cmpult $20, $9, $17
+ addq $10, $17, $10
+ mulq $25, $7, $18
+ umulh $25, $7, $1
+ addq $19, $18, $19
+ cmpult $19, $18, $0
+ addq $0, $1, $1
+ addq $20, $1, $20
+ cmpult $20, $1, $4
+ addq $10, $4, $10
+ mulq $28, $5, $2
+ umulh $28, $5, $3
+ addq $19, $2, $19
+ cmpult $19, $2, $9
+ addq $9, $3, $3
+ addq $20, $3, $20
+ cmpult $20, $3, $17
+ addq $10, $17, $10
+ stq $19, 72($16)
+ bis $31, $31, $18
+ mulq $6, $21, $0
+ umulh $6, $21, $1
+ addq $20, $0, $20
+ cmpult $20, $0, $4
+ addq $4, $1, $1
+ addq $10, $1, $10
+ cmpult $10, $1, $2
+ addq $18, $2, $18
+ mulq $8, $27, $9
+ umulh $8, $27, $3
+ addq $20, $9, $20
+ cmpult $20, $9, $17
+ addq $17, $3, $3
+ addq $10, $3, $10
+ cmpult $10, $3, $5
+ addq $18, $5, $18
+ mulq $23, $24, $19
+ umulh $23, $24, $0
+ addq $20, $19, $20
+ cmpult $20, $19, $4
+ addq $4, $0, $0
+ addq $10, $0, $10
+ cmpult $10, $0, $1
+ addq $18, $1, $18
+ mulq $25, $22, $2
+ umulh $25, $22, $6
+ addq $20, $2, $20
+ cmpult $20, $2, $9
+ addq $9, $6, $6
+ addq $10, $6, $10
+ cmpult $10, $6, $17
+ addq $18, $17, $18
+ mulq $28, $7, $3
+ umulh $28, $7, $5
+ addq $20, $3, $20
+ cmpult $20, $3, $19
+ addq $19, $5, $5
+ addq $10, $5, $10
+ cmpult $10, $5, $4
+ addq $18, $4, $18
+ stq $20, 80($16)
+ bis $31, $31, $0
+ mulq $8, $21, $1
+ umulh $8, $21, $2
+ addq $10, $1, $10
+ cmpult $10, $1, $9
+ addq $9, $2, $2
+ addq $18, $2, $18
+ cmpult $18, $2, $6
+ addq $0, $6, $0
+ mulq $23, $27, $17
+ umulh $23, $27, $3
+ addq $10, $17, $10
+ cmpult $10, $17, $19
+ addq $19, $3, $3
+ addq $18, $3, $18
+ cmpult $18, $3, $5
+ addq $0, $5, $0
+ mulq $25, $24, $4
+ umulh $25, $24, $7
+ addq $10, $4, $10
+ cmpult $10, $4, $20
+ addq $20, $7, $7
+ addq $18, $7, $18
+ cmpult $18, $7, $1
+ addq $0, $1, $0
+ mulq $28, $22, $9
+ umulh $28, $22, $2
+ addq $10, $9, $10
+ cmpult $10, $9, $6
+ addq $6, $2, $2
+ addq $18, $2, $18
+ cmpult $18, $2, $8
+ addq $0, $8, $0
+ stq $10, 88($16)
+ bis $31, $31, $17
+ mulq $23, $21, $19
+ umulh $23, $21, $3
+ addq $18, $19, $18
+ cmpult $18, $19, $5
+ addq $5, $3, $3
+ addq $0, $3, $0
+ cmpult $0, $3, $4
+ addq $17, $4, $17
+ mulq $25, $27, $20
+ umulh $25, $27, $7
+ addq $18, $20, $18
+ cmpult $18, $20, $1
+ addq $1, $7, $7
+ addq $0, $7, $0
+ cmpult $0, $7, $9
+ addq $17, $9, $17
+ mulq $28, $24, $6
+ umulh $28, $24, $2
+ addq $18, $6, $18
+ cmpult $18, $6, $8
+ addq $8, $2, $2
+ addq $0, $2, $0
+ cmpult $0, $2, $22
+ addq $17, $22, $17
+ stq $18, 96($16)
+ bis $31, $31, $10
+ mulq $25, $21, $19
+ umulh $25, $21, $5
+ addq $0, $19, $0
+ cmpult $0, $19, $3
+ addq $3, $5, $5
+ addq $17, $5, $17
+ cmpult $17, $5, $4
+ addq $10, $4, $10
+ mulq $28, $27, $23
+ umulh $28, $27, $20
+ addq $0, $23, $0
+ cmpult $0, $23, $1
+ addq $1, $20, $20
+ addq $17, $20, $17
+ cmpult $17, $20, $7
+ addq $10, $7, $10
+ stq $0, 104($16)
+ bis $31, $31, $9
+ mulq $28, $21, $6
+ umulh $28, $21, $8
+ addq $17, $6, $17
+ cmpult $17, $6, $2
+ addq $2, $8, $8
+ addq $10, $8, $10
+ cmpult $10, $8, $22
+ addq $9, $22, $9
+ stq $17, 112($16)
+ stq $10, 120($16)
+ ldq $9, 0($30)
+ ldq $10, 8($30)
+ addq $30, 16, $30
+ ret $31,($26),1
+ .end bn_mul_comba8
+ .text
+ .align 3
+ .globl bn_mul_comba4
+ .ent bn_mul_comba4
+bn_mul_comba4:
+bn_mul_comba4..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ ldq $0, 0($17)
+ ldq $1, 0($18)
+ ldq $2, 8($17)
+ ldq $3, 8($18)
+ mulq $0, $1, $4
+ ldq $5, 16($17)
+ ldq $6, 16($18)
+ umulh $0, $1, $7
+ ldq $8, 24($17)
+ ldq $22, 24($18)
+ mulq $0, $3, $23
+ stq $4, 0($16)
+ bis $31, $31, $24
+ mulq $2, $1, $28
+ bis $31, $31, $25
+ bis $31, $31,
+ addq $24, $7, $24
+ umulh $0, $3, $21
+ cmpult $24, $7, $20
+ addq $24, $23, $24
+ addq $25, $20, $25
+ umulh $2, $1, $19
+ cmpult $24, $23, $17
+ addq $24, $28, $24
+ addq $27, $17, $27
+ mulq $0, $6, $18
+ cmpult $24, $28, $4
+ addq $25, $4, $25
+ stq $24, 8($16)
+ addq $25, $27, $24
+ bis $31, $31, $25
+ addq $24, $21, $24
+ bis $31, $31, $27
+ mulq $2, $3, $7
+ cmpult $24, $21, $20
+ addq $24, $19, $24
+ addq $25, $20, $25
+ mulq $5, $1, $23
+ cmpult $24, $19, $17
+ addq $24, $7, $24
+ addq $27, $17, $27
+ umulh $0, $6, $28
+ cmpult $24, $18, $4
+ addq $24, $7, $24
+ addq $25, $4, $25
+ umulh $2, $3, $21
+ cmpult $24, $7, $20
+ addq $24, $23, $24
+ addq $27, $20, $27
+ umulh $5, $1, $19
+ cmpult $24, $23, $17
+ addq $25, $17, $25
+ stq $24, 16($16)
+ addq $25, $27, $24
+ bis $31, $31, $25
+ addq $24, $28, $24
+ bis $31, $31, $27
+ mulq $0, $22, $18
+ cmpult $24, $28, $4
+ addq $24, $21, $24
+ addq $25, $4, $25
+ mulq $2, $6, $7
+ cmpult $24, $21, $20
+ addq $24, $19, $24
+ addq $25, $20, $25
+ mulq $5, $3, $23
+ cmpult $24, $19, $17
+ addq $24, $18, $24
+ addq $25, $17, $25
+ mulq $8, $1, $28
+ cmpult $24, $18, $4
+ addq $24, $7, $24
+ addq $25, $4, $25
+ umulh $0, $22, $21
+ cmpult $24, $7, $20
+ addq $24, $23, $24
+ addq $25, $20, $25
+ umulh $2, $6, $19
+ cmpult $24, $23, $17
+ addq $24, $28, $24
+ addq $25, $17, $25
+ umulh $5, $3, $18
+ cmpult $24, $28, $4
+ addq $25, $4, $25
+ stq $24, 24($16)
+ addq $25, $27, $24
+ bis $31, $31, $25
+ addq $24, $21, $24
+ bis $31, $31, $27
+ umulh $8, $1, $7
+ cmpult $24, $21, $20
+ addq $24, $19, $24
+ addq $25, $20, $25
+ mulq $2, $22, $23
+ cmpult $24, $19, $17
+ addq $24, $18, $24
+ addq $25, $17, $25
+ mulq $5, $6, $28
+ cmpult $24, $18, $4
+ addq $24, $7, $24
+ addq $25, $4, $25
+ mulq $8, $3, $21
+ cmpult $24, $7, $20
+ addq $24, $23, $24
+ addq $25, $20, $25
+ umulh $2, $22, $19
+ cmpult $24, $23, $17
+ addq $24, $28, $24
+ addq $25, $17, $25
+ umulh $5, $6, $18
+ cmpult $24, $28, $4
+ addq $24, $21, $24
+ addq $25, $4, $25
+ umulh $8, $3, $7
+ cmpult $24, $21, $20
+ addq $25, $20, $25
+ stq $24, 32($16)
+ addq $25, $27, $24
+ bis $31, $31, $25
+ addq $24, $19, $24
+ bis $31, $31, $27
+ mulq $5, $22, $23
+ cmpult $24, $19, $17
+ addq $24, $18, $24
+ addq $25, $17, $25
+ mulq $8, $6, $28
+ cmpult $24, $18, $4
+ addq $24, $7, $24
+ addq $25, $4, $25
+ umulh $5, $22, $21
+ cmpult $24, $7, $20
+ addq $24, $23, $24
+ addq $25, $20, $25
+ umulh $8, $6, $19
+ cmpult $24, $23, $17
+ addq $24, $28, $24
+ addq $25, $17, $25
+ mulq $8, $22, $18
+ cmpult $24, $28, $4
+ addq $25, $4, $25
+ stq $24, 40($16)
+ addq $25, $27, $24
+ bis $31, $31, $25
+ addq $24, $21, $24
+ bis $31, $31, $27
+ umulh $8, $22, $7
+ cmpult $24, $21, $20
+ addq $24, $19, $24
+ addq $25, $20, $25
+ cmpult $24, $19, $23
+ addq $24, $18, $24
+ addq $25, $23, $25
+ cmpult $24, $18, $17
+ addq $25, $17, $25
+ stq $24, 48($16)
+ addq $25, $27, $24
+ addq $24, $7, $24
+ stq $24, 56($16)
+ ret $31,($26),1
+ .end bn_mul_comba4
+ .text
+ .align 3
+ .globl bn_sqr_comba4
+ .ent bn_sqr_comba4
+bn_sqr_comba4:
+bn_sqr_comba4..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ ldq $0, 0($17)
+ ldq $1, 8($17)
+ ldq $2, 16($17)
+ ldq $3, 24($17)
+ bis $31, $31, $6
+ mulq $0, $0, $4
+ umulh $0, $0, $5
+ stq $4, 0($16)
+ bis $31, $31, $4
+ mulq $0, $1, $7
+ umulh $0, $1, $8
+ cmplt $7, $31, $22
+ cmplt $8, $31, $23
+ addq $7, $7, $7
+ addq $8, $8, $8
+ addq $8, $22, $8
+ addq $4, $23, $4
+ addq $5, $7, $5
+ addq $6, $8, $6
+ cmpult $5, $7, $24
+ cmpult $6, $8, $25
+ addq $6, $24, $6
+ addq $4, $25, $4
+ stq $5, 8($16)
+ bis $31, $31, $5
+ mulq $1, $1, $27
+ umulh $1, $1, $28
+ addq $6, $27, $6
+ addq $4, $28, $4
+ cmpult $6, $27, $21
+ cmpult $4, $28, $20
+ addq $4, $21, $4
+ addq $5, $20, $5
+ mulq $2, $0, $19
+ umulh $2, $0, $18
+ cmplt $19, $31, $17
+ cmplt $18, $31, $22
+ addq $19, $19, $19
+ addq $18, $18, $18
+ addq $18, $17, $18
+ addq $5, $22, $5
+ addq $6, $19, $6
+ addq $4, $18, $4
+ cmpult $6, $19, $23
+ cmpult $4, $18, $7
+ addq $4, $23, $4
+ addq $5, $7, $5
+ stq $6, 16($16)
+ bis $31, $31, $6
+ mulq $3, $0, $8
+ umulh $3, $0, $24
+ cmplt $8, $31, $25
+ cmplt $24, $31, $27
+ addq $8, $8, $8
+ addq $24, $24, $24
+ addq $24, $25, $24
+ addq $6, $27, $6
+ addq $4, $8, $4
+ addq $5, $24, $5
+ cmpult $4, $8, $28
+ cmpult $5, $24, $21
+ addq $5, $28, $5
+ addq $6, $21, $6
+ mulq $2, $1, $20
+ umulh $2, $1, $17
+ cmplt $20, $31, $22
+ cmplt $17, $31, $19
+ addq $20, $20, $20
+ addq $17, $17, $17
+ addq $17, $22, $17
+ addq $6, $19, $6
+ addq $4, $20, $4
+ addq $5, $17, $5
+ cmpult $4, $20, $18
+ cmpult $5, $17, $23
+ addq $5, $18, $5
+ addq $6, $23, $6
+ stq $4, 24($16)
+ bis $31, $31, $4
+ mulq $2, $2, $7
+ umulh $2, $2, $25
+ addq $5, $7, $5
+ addq $6, $25, $6
+ cmpult $5, $7, $27
+ cmpult $6, $25, $8
+ addq $6, $27, $6
+ addq $4, $8, $4
+ mulq $3, $1, $24
+ umulh $3, $1, $28
+ cmplt $24, $31, $21
+ cmplt $28, $31, $22
+ addq $24, $24, $24
+ addq $28, $28, $28
+ addq $28, $21, $28
+ addq $4, $22, $4
+ addq $5, $24, $5
+ addq $6, $28, $6
+ cmpult $5, $24, $19
+ cmpult $6, $28, $20
+ addq $6, $19, $6
+ addq $4, $20, $4
+ stq $5, 32($16)
+ bis $31, $31, $5
+ mulq $3, $2, $17
+ umulh $3, $2, $18
+ cmplt $17, $31, $23
+ cmplt $18, $31, $7
+ addq $17, $17, $17
+ addq $18, $18, $18
+ addq $18, $23, $18
+ addq $5, $7, $5
+ addq $6, $17, $6
+ addq $4, $18, $4
+ cmpult $6, $17, $25
+ cmpult $4, $18, $27
+ addq $4, $25, $4
+ addq $5, $27, $5
+ stq $6, 40($16)
+ bis $31, $31, $6
+ mulq $3, $3, $8
+ umulh $3, $3, $21
+ addq $4, $8, $4
+ addq $5, $21, $5
+ cmpult $4, $8, $22
+ cmpult $5, $21, $24
+ addq $5, $22, $5
+ addq $6, $24, $6
+ stq $4, 48($16)
+ stq $5, 56($16)
+ ret $31,($26),1
+ .end bn_sqr_comba4
+ .text
+ .align 3
+ .globl bn_sqr_comba8
+ .ent bn_sqr_comba8
+bn_sqr_comba8:
+bn_sqr_comba8..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ ldq $0, 0($17)
+ ldq $1, 8($17)
+ ldq $2, 16($17)
+ ldq $3, 24($17)
+ ldq $4, 32($17)
+ ldq $5, 40($17)
+ ldq $6, 48($17)
+ ldq $7, 56($17)
+ bis $31, $31, $23
+ mulq $0, $0, $8
+ umulh $0, $0, $22
+ stq $8, 0($16)
+ bis $31, $31, $8
+ mulq $1, $0, $24
+ umulh $1, $0, $25
+ cmplt $24, $31, $27
+ cmplt $25, $31, $28
+ addq $24, $24, $24
+ addq $25, $25, $25
+ addq $25, $27, $25
+ addq $8, $28, $8
+ addq $22, $24, $22
+ addq $23, $25, $23
+ cmpult $22, $24, $21
+ cmpult $23, $25, $20
+ addq $23, $21, $23
+ addq $8, $20, $8
+ stq $22, 8($16)
+ bis $31, $31, $22
+ mulq $1, $1, $19
+ umulh $1, $1, $18
+ addq $23, $19, $23
+ addq $8, $18, $8
+ cmpult $23, $19, $17
+ cmpult $8, $18, $27
+ addq $8, $17, $8
+ addq $22, $27, $22
+ mulq $2, $0, $28
+ umulh $2, $0, $24
+ cmplt $28, $31, $25
+ cmplt $24, $31, $21
+ addq $28, $28, $28
+ addq $24, $24, $24
+ addq $24, $25, $24
+ addq $22, $21, $22
+ addq $23, $28, $23
+ addq $8, $24, $8
+ cmpult $23, $28, $20
+ cmpult $8, $24, $19
+ addq $8, $20, $8
+ addq $22, $19, $22
+ stq $23, 16($16)
+ bis $31, $31, $23
+ mulq $2, $1, $18
+ umulh $2, $1, $17
+ cmplt $18, $31, $27
+ cmplt $17, $31, $25
+ addq $18, $18, $18
+ addq $17, $17, $17
+ addq $17, $27, $17
+ addq $23, $25, $23
+ addq $8, $18, $8
+ addq $22, $17, $22
+ cmpult $8, $18, $21
+ cmpult $22, $17, $28
+ addq $22, $21, $22
+ addq $23, $28, $23
+ mulq $3, $0, $24
+ umulh $3, $0, $20
+ cmplt $24, $31, $19
+ cmplt $20, $31, $27
+ addq $24, $24, $24
+ addq $20, $20, $20
+ addq $20, $19, $20
+ addq $23, $27, $23
+ addq $8, $24, $8
+ addq $22, $20, $22
+ cmpult $8, $24, $25
+ cmpult $22, $20, $18
+ addq $22, $25, $22
+ addq $23, $18, $23
+ stq $8, 24($16)
+ bis $31, $31, $8
+ mulq $2, $2, $17
+ umulh $2, $2, $21
+ addq $22, $17, $22
+ addq $23, $21, $23
+ cmpult $22, $17, $28
+ cmpult $23, $21, $19
+ addq $23, $28, $23
+ addq $8, $19, $8
+ mulq $3, $1, $27
+ umulh $3, $1, $24
+ cmplt $27, $31, $20
+ cmplt $24, $31, $25
+ addq $27, $27, $27
+ addq $24, $24, $24
+ addq $24, $20, $24
+ addq $8, $25, $8
+ addq $22, $27, $22
+ addq $23, $24, $23
+ cmpult $22, $27, $18
+ cmpult $23, $24, $17
+ addq $23, $18, $23
+ addq $8, $17, $8
+ mulq $4, $0, $21
+ umulh $4, $0, $28
+ cmplt $21, $31, $19
+ cmplt $28, $31, $20
+ addq $21, $21, $21
+ addq $28, $28, $28
+ addq $28, $19, $28
+ addq $8, $20, $8
+ addq $22, $21, $22
+ addq $23, $28, $23
+ cmpult $22, $21, $25
+ cmpult $23, $28, $27
+ addq $23, $25, $23
+ addq $8, $27, $8
+ stq $22, 32($16)
+ bis $31, $31, $22
+ mulq $3, $2, $24
+ umulh $3, $2, $18
+ cmplt $24, $31, $17
+ cmplt $18, $31, $19
+ addq $24, $24, $24
+ addq $18, $18, $18
+ addq $18, $17, $18
+ addq $22, $19, $22
+ addq $23, $24, $23
+ addq $8, $18, $8
+ cmpult $23, $24, $20
+ cmpult $8, $18, $21
+ addq $8, $20, $8
+ addq $22, $21, $22
+ mulq $4, $1, $28
+ umulh $4, $1, $25
+ cmplt $28, $31, $27
+ cmplt $25, $31, $17
+ addq $28, $28, $28
+ addq $25, $25, $25
+ addq $25, $27, $25
+ addq $22, $17, $22
+ addq $23, $28, $23
+ addq $8, $25, $8
+ cmpult $23, $28, $19
+ cmpult $8, $25, $24
+ addq $8, $19, $8
+ addq $22, $24, $22
+ mulq $5, $0, $18
+ umulh $5, $0, $20
+ cmplt $18, $31, $21
+ cmplt $20, $31, $27
+ addq $18, $18, $18
+ addq $20, $20, $20
+ addq $20, $21, $20
+ addq $22, $27, $22
+ addq $23, $18, $23
+ addq $8, $20, $8
+ cmpult $23, $18, $17
+ cmpult $8, $20, $28
+ addq $8, $17, $8
+ addq $22, $28, $22
+ stq $23, 40($16)
+ bis $31, $31, $23
+ mulq $3, $3, $25
+ umulh $3, $3, $19
+ addq $8, $25, $8
+ addq $22, $19, $22
+ cmpult $8, $25, $24
+ cmpult $22, $19, $21
+ addq $22, $24, $22
+ addq $23, $21, $23
+ mulq $4, $2, $27
+ umulh $4, $2, $18
+ cmplt $27, $31, $20
+ cmplt $18, $31, $17
+ addq $27, $27, $27
+ addq $18, $18, $18
+ addq $18, $20, $18
+ addq $23, $17, $23
+ addq $8, $27, $8
+ addq $22, $18, $22
+ cmpult $8, $27, $28
+ cmpult $22, $18, $25
+ addq $22, $28, $22
+ addq $23, $25, $23
+ mulq $5, $1, $19
+ umulh $5, $1, $24
+ cmplt $19, $31, $21
+ cmplt $24, $31, $20
+ addq $19, $19, $19
+ addq $24, $24, $24
+ addq $24, $21, $24
+ addq $23, $20, $23
+ addq $8, $19, $8
+ addq $22, $24, $22
+ cmpult $8, $19, $17
+ cmpult $22, $24, $27
+ addq $22, $17, $22
+ addq $23, $27, $23
+ mulq $6, $0, $18
+ umulh $6, $0, $28
+ cmplt $18, $31, $25
+ cmplt $28, $31, $21
+ addq $18, $18, $18
+ addq $28, $28, $28
+ addq $28, $25, $28
+ addq $23, $21, $23
+ addq $8, $18, $8
+ addq $22, $28, $22
+ cmpult $8, $18, $20
+ cmpult $22, $28, $19
+ addq $22, $20, $22
+ addq $23, $19, $23
+ stq $8, 48($16)
+ bis $31, $31, $8
+ mulq $4, $3, $24
+ umulh $4, $3, $17
+ cmplt $24, $31, $27
+ cmplt $17, $31, $25
+ addq $24, $24, $24
+ addq $17, $17, $17
+ addq $17, $27, $17
+ addq $8, $25, $8
+ addq $22, $24, $22
+ addq $23, $17, $23
+ cmpult $22, $24, $21
+ cmpult $23, $17, $18
+ addq $23, $21, $23
+ addq $8, $18, $8
+ mulq $5, $2, $28
+ umulh $5, $2, $20
+ cmplt $28, $31, $19
+ cmplt $20, $31, $27
+ addq $28, $28, $28
+ addq $20, $20, $20
+ addq $20, $19, $20
+ addq $8, $27, $8
+ addq $22, $28, $22
+ addq $23, $20, $23
+ cmpult $22, $28, $25
+ cmpult $23, $20, $24
+ addq $23, $25, $23
+ addq $8, $24, $8
+ mulq $6, $1, $17
+ umulh $6, $1, $21
+ cmplt $17, $31, $18
+ cmplt $21, $31, $19
+ addq $17, $17, $17
+ addq $21, $21, $21
+ addq $21, $18, $21
+ addq $8, $19, $8
+ addq $22, $17, $22
+ addq $23, $21, $23
+ cmpult $22, $17, $27
+ cmpult $23, $21, $28
+ addq $23, $27, $23
+ addq $8, $28, $8
+ mulq $7, $0, $20
+ umulh $7, $0, $25
+ cmplt $20, $31, $24
+ cmplt $25, $31, $18
+ addq $20, $20, $20
+ addq $25, $25, $25
+ addq $25, $24, $25
+ addq $8, $18, $8
+ addq $22, $20, $22
+ addq $23, $25, $23
+ cmpult $22, $20, $19
+ cmpult $23, $25, $17
+ addq $23, $19, $23
+ addq $8, $17, $8
+ stq $22, 56($16)
+ bis $31, $31, $22
+ mulq $4, $4, $21
+ umulh $4, $4, $27
+ addq $23, $21, $23
+ addq $8, $27, $8
+ cmpult $23, $21, $28
+ cmpult $8, $27, $24
+ addq $8, $28, $8
+ addq $22, $24, $22
+ mulq $5, $3, $18
+ umulh $5, $3, $20
+ cmplt $18, $31, $25
+ cmplt $20, $31, $19
+ addq $18, $18, $18
+ addq $20, $20, $20
+ addq $20, $25, $20
+ addq $22, $19, $22
+ addq $23, $18, $23
+ addq $8, $20, $8
+ cmpult $23, $18, $17
+ cmpult $8, $20, $21
+ addq $8, $17, $8
+ addq $22, $21, $22
+ mulq $6, $2, $27
+ umulh $6, $2, $28
+ cmplt $27, $31, $24
+ cmplt $28, $31, $25
+ addq $27, $27, $27
+ addq $28, $28, $28
+ addq $28, $24, $28
+ addq $22, $25, $22
+ addq $23, $27, $23
+ addq $8, $28, $8
+ cmpult $23, $27, $19
+ cmpult $8, $28, $18
+ addq $8, $19, $8
+ addq $22, $18, $22
+ mulq $7, $1, $20
+ umulh $7, $1, $17
+ cmplt $20, $31, $21
+ cmplt $17, $31, $24
+ addq $20, $20, $20
+ addq $17, $17, $17
+ addq $17, $21, $17
+ addq $22, $24, $22
+ addq $23, $20, $23
+ addq $8, $17, $8
+ cmpult $23, $20, $25
+ cmpult $8, $17, $27
+ addq $8, $25, $8
+ addq $22, $27, $22
+ stq $23, 64($16)
+ bis $31, $31, $23
+ mulq $5, $4, $28
+ umulh $5, $4, $19
+ cmplt $28, $31, $18
+ cmplt $19, $31, $21
+ addq $28, $28, $28
+ addq $19, $19, $19
+ addq $19, $18, $19
+ addq $23, $21, $23
+ addq $8, $28, $8
+ addq $22, $19, $22
+ cmpult $8, $28, $24
+ cmpult $22, $19, $20
+ addq $22, $24, $22
+ addq $23, $20, $23
+ mulq $6, $3, $17
+ umulh $6, $3, $25
+ cmplt $17, $31, $27
+ cmplt $25, $31, $18
+ addq $17, $17, $17
+ addq $25, $25, $25
+ addq $25, $27, $25
+ addq $23, $18, $23
+ addq $8, $17, $8
+ addq $22, $25, $22
+ cmpult $8, $17, $21
+ cmpult $22, $25, $28
+ addq $22, $21, $22
+ addq $23, $28, $23
+ mulq $7, $2, $19
+ umulh $7, $2, $24
+ cmplt $19, $31, $20
+ cmplt $24, $31, $27
+ addq $19, $19, $19
+ addq $24, $24, $24
+ addq $24, $20, $24
+ addq $23, $27, $23
+ addq $8, $19, $8
+ addq $22, $24, $22
+ cmpult $8, $19, $18
+ cmpult $22, $24, $17
+ addq $22, $18, $22
+ addq $23, $17, $23
+ stq $8, 72($16)
+ bis $31, $31, $8
+ mulq $5, $5, $25
+ umulh $5, $5, $21
+ addq $22, $25, $22
+ addq $23, $21, $23
+ cmpult $22, $25, $28
+ cmpult $23, $21, $20
+ addq $23, $28, $23
+ addq $8, $20, $8
+ mulq $6, $4, $27
+ umulh $6, $4, $19
+ cmplt $27, $31, $24
+ cmplt $19, $31, $18
+ addq $27, $27, $27
+ addq $19, $19, $19
+ addq $19, $24, $19
+ addq $8, $18, $8
+ addq $22, $27, $22
+ addq $23, $19, $23
+ cmpult $22, $27, $17
+ cmpult $23, $19, $25
+ addq $23, $17, $23
+ addq $8, $25, $8
+ mulq $7, $3, $21
+ umulh $7, $3, $28
+ cmplt $21, $31, $20
+ cmplt $28, $31, $24
+ addq $21, $21, $21
+ addq $28, $28, $28
+ addq $28, $20, $28
+ addq $8, $24, $8
+ addq $22, $21, $22
+ addq $23, $28, $23
+ cmpult $22, $21, $18
+ cmpult $23, $28, $27
+ addq $23, $18, $23
+ addq $8, $27, $8
+ stq $22, 80($16)
+ bis $31, $31, $22
+ mulq $6, $5, $19
+ umulh $6, $5, $17
+ cmplt $19, $31, $25
+ cmplt $17, $31, $20
+ addq $19, $19, $19
+ addq $17, $17, $17
+ addq $17, $25, $17
+ addq $22, $20, $22
+ addq $23, $19, $23
+ addq $8, $17, $8
+ cmpult $23, $19, $24
+ cmpult $8, $17, $21
+ addq $8, $24, $8
+ addq $22, $21, $22
+ mulq $7, $4, $28
+ umulh $7, $4, $18
+ cmplt $28, $31, $27
+ cmplt $18, $31, $25
+ addq $28, $28, $28
+ addq $18, $18, $18
+ addq $18, $27, $18
+ addq $22, $25, $22
+ addq $23, $28, $23
+ addq $8, $18, $8
+ cmpult $23, $28, $20
+ cmpult $8, $18, $19
+ addq $8, $20, $8
+ addq $22, $19, $22
+ stq $23, 88($16)
+ bis $31, $31, $23
+ mulq $6, $6, $17
+ umulh $6, $6, $24
+ addq $8, $17, $8
+ addq $22, $24, $22
+ cmpult $8, $17, $21
+ cmpult $22, $24, $27
+ addq $22, $21, $22
+ addq $23, $27, $23
+ mulq $7, $5, $25
+ umulh $7, $5, $28
+ cmplt $25, $31, $18
+ cmplt $28, $31, $20
+ addq $25, $25, $25
+ addq $28, $28, $28
+ addq $28, $18, $28
+ addq $23, $20, $23
+ addq $8, $25, $8
+ addq $22, $28, $22
+ cmpult $8, $25, $19
+ cmpult $22, $28, $17
+ addq $22, $19, $22
+ addq $23, $17, $23
+ stq $8, 96($16)
+ bis $31, $31, $8
+ mulq $7, $6, $24
+ umulh $7, $6, $21
+ cmplt $24, $31, $27
+ cmplt $21, $31, $18
+ addq $24, $24, $24
+ addq $21, $21, $21
+ addq $21, $27, $21
+ addq $8, $18, $8
+ addq $22, $24, $22
+ addq $23, $21, $23
+ cmpult $22, $24, $20
+ cmpult $23, $21, $25
+ addq $23, $20, $23
+ addq $8, $25, $8
+ stq $22, 104($16)
+ bis $31, $31, $22
+ mulq $7, $7, $28
+ umulh $7, $7, $19
+ addq $23, $28, $23
+ addq $8, $19, $8
+ cmpult $23, $28, $17
+ cmpult $8, $19, $27
+ addq $8, $17, $8
+ addq $22, $27, $22
+ stq $23, 112($16)
+ stq $8, 120($16)
+ ret $31,($26),1
+ .end bn_sqr_comba8
diff --git a/crypto/bn/asm/ff b/crypto/bn/asm/ff
new file mode 100644
index 0000000000..4af216889d
--- /dev/null
+++ b/crypto/bn/asm/ff
@@ -0,0 +1,724 @@
+ .text
+ .align 3
+ .globl bn_mul_comba4
+ .ent bn_mul_comba4
+bn_mul_comba4:
+bn_mul_comba4..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ ldq $0, 0($17)
+ ldq $1, 0($18)
+ ldq $2, 8($17)
+ ldq $3, 8($18)
+ ldq $4, 16($17)
+ ldq $5, 16($18)
+ ldq $6, 24($17)
+ ldq $7, 24($18)
+ bis $31, $31, $23
+ mulq $0, $1, $8
+ umulh $0, $1, $22
+ stq $8, 0($16)
+ bis $31, $31, $8
+ mulq $0, $3, $24
+ umulh $0, $3, $25
+ addq $22, $24, $22
+ cmpult $22, $24, $27
+ addq $27, $25, $25
+ addq $23, $25, $23
+ cmpult $23, $25, $28
+ addq $8, $28, $8
+ mulq $2, $1, $21
+ umulh $2, $1, $20
+ addq $22, $21, $22
+ cmpult $22, $21, $19
+ addq $19, $20, $20
+ addq $23, $20, $23
+ cmpult $23, $20, $17
+ addq $8, $17, $8
+ stq $22, 8($16)
+ bis $31, $31, $22
+ mulq $2, $3, $18
+ umulh $2, $3, $24
+ addq $23, $18, $23
+ cmpult $23, $18, $27
+ addq $27, $24, $24
+ addq $8, $24, $8
+ cmpult $8, $24, $25
+ addq $22, $25, $22
+ mulq $0, $5, $28
+ umulh $0, $5, $21
+ addq $23, $28, $23
+ cmpult $23, $28, $19
+ addq $19, $21, $21
+ addq $8, $21, $8
+ cmpult $8, $21, $20
+ addq $22, $20, $22
+ mulq $4, $1, $17
+ umulh $4, $1, $18
+ addq $23, $17, $23
+ cmpult $23, $17, $27
+ addq $27, $18, $18
+ addq $8, $18, $8
+ cmpult $8, $18, $24
+ addq $22, $24, $22
+ stq $23, 16($16)
+ bis $31, $31, $23
+ mulq $0, $7, $25
+ umulh $0, $7, $28
+ addq $8, $25, $8
+ cmpult $8, $25, $19
+ addq $19, $28, $28
+ addq $22, $28, $22
+ cmpult $22, $28, $21
+ addq $23, $21, $23
+ mulq $2, $5, $20
+ umulh $2, $5, $17
+ addq $8, $20, $8
+ cmpult $8, $20, $27
+ addq $27, $17, $17
+ addq $22, $17, $22
+ cmpult $22, $17, $18
+ addq $23, $18, $23
+ mulq $4, $3, $24
+ umulh $4, $3, $25
+ addq $8, $24, $8
+ cmpult $8, $24, $19
+ addq $19, $25, $25
+ addq $22, $25, $22
+ cmpult $22, $25, $28
+ addq $23, $28, $23
+ mulq $6, $1, $21
+ umulh $6, $1, $0
+ addq $8, $21, $8
+ cmpult $8, $21, $20
+ addq $20, $0, $0
+ addq $22, $0, $22
+ cmpult $22, $0, $27
+ addq $23, $27, $23
+ stq $8, 24($16)
+ bis $31, $31, $8
+ mulq $2, $7, $17
+ umulh $2, $7, $18
+ addq $22, $17, $22
+ cmpult $22, $17, $24
+ addq $24, $18, $18
+ addq $23, $18, $23
+ cmpult $23, $18, $19
+ addq $8, $19, $8
+ mulq $4, $5, $25
+ umulh $4, $5, $28
+ addq $22, $25, $22
+ cmpult $22, $25, $21
+ addq $21, $28, $28
+ addq $23, $28, $23
+ cmpult $23, $28, $20
+ addq $8, $20, $8
+ mulq $6, $3, $0
+ umulh $6, $3, $27
+ addq $22, $0, $22
+ cmpult $22, $0, $1
+ addq $1, $27, $27
+ addq $23, $27, $23
+ cmpult $23, $27, $17
+ addq $8, $17, $8
+ stq $22, 32($16)
+ bis $31, $31, $22
+ mulq $4, $7, $24
+ umulh $4, $7, $18
+ addq $23, $24, $23
+ cmpult $23, $24, $19
+ addq $19, $18, $18
+ addq $8, $18, $8
+ cmpult $8, $18, $2
+ addq $22, $2, $22
+ mulq $6, $5, $25
+ umulh $6, $5, $21
+ addq $23, $25, $23
+ cmpult $23, $25, $28
+ addq $28, $21, $21
+ addq $8, $21, $8
+ cmpult $8, $21, $20
+ addq $22, $20, $22
+ stq $23, 40($16)
+ bis $31, $31, $23
+ mulq $6, $7, $0
+ umulh $6, $7, $1
+ addq $8, $0, $8
+ cmpult $8, $0, $27
+ addq $27, $1, $1
+ addq $22, $1, $22
+ cmpult $22, $1, $17
+ addq $23, $17, $23
+ stq $8, 48($16)
+ stq $22, 56($16)
+ ret $31,($26),1
+ .end bn_mul_comba4
+ .text
+ .align 3
+ .globl bn_mul_comba8
+ .ent bn_mul_comba8
+bn_mul_comba8:
+bn_mul_comba8..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ stq $9, 8($30)
+ stq $10, 16($30)
+ ldq $0, 0($17)
+ ldq $1, 0($18)
+ ldq $2, 8($17)
+ ldq $3, 8($18)
+ ldq $4, 16($17)
+ ldq $5, 16($18)
+ ldq $6, 24($17)
+ ldq $7, 24($18)
+ ldq $8, 8($17)
+ ldq $22, 8($18)
+ ldq $23, 8($17)
+ ldq $24, 8($18)
+ ldq $25, 8($17)
+ ldq $27, 8($18)
+ ldq $28, 8($17)
+ ldq $21, 8($18)
+ bis $31, $31, $9
+ mulq $0, $1, $20
+ umulh $0, $1, $19
+ stq $20, 0($16)
+ bis $31, $31, $20
+ mulq $0, $3, $10
+ umulh $0, $3, $17
+ addq $19, $10, $19
+ cmpult $19, $10, $18
+ addq $18, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $10
+ addq $20, $10, $20
+ mulq $2, $1, $18
+ umulh $2, $1, $17
+ addq $19, $18, $19
+ cmpult $19, $18, $10
+ addq $10, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $18
+ addq $20, $18, $20
+ stq $19, 8($16)
+ bis $31, $31, $19
+ mulq $0, $5, $10
+ umulh $0, $5, $17
+ addq $9, $10, $9
+ cmpult $9, $10, $18
+ addq $18, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $10
+ addq $19, $10, $19
+ mulq $2, $3, $18
+ umulh $2, $3, $17
+ addq $9, $18, $9
+ cmpult $9, $18, $10
+ addq $10, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $18
+ addq $19, $18, $19
+ mulq $4, $1, $10
+ umulh $4, $1, $17
+ addq $9, $10, $9
+ cmpult $9, $10, $18
+ addq $18, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $10
+ addq $19, $10, $19
+ stq $9, 16($16)
+ bis $31, $31, $9
+ mulq $0, $7, $18
+ umulh $0, $7, $17
+ addq $20, $18, $20
+ cmpult $20, $18, $10
+ addq $10, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $18
+ addq $9, $18, $9
+ mulq $2, $5, $10
+ umulh $2, $5, $17
+ addq $20, $10, $20
+ cmpult $20, $10, $18
+ addq $18, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $10
+ addq $9, $10, $9
+ mulq $4, $3, $18
+ umulh $4, $3, $17
+ addq $20, $18, $20
+ cmpult $20, $18, $10
+ addq $10, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $18
+ addq $9, $18, $9
+ mulq $6, $1, $10
+ umulh $6, $1, $17
+ addq $20, $10, $20
+ cmpult $20, $10, $18
+ addq $18, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $10
+ addq $9, $10, $9
+ stq $20, 24($16)
+ bis $31, $31, $20
+ mulq $0, $22, $18
+ umulh $0, $22, $17
+ addq $19, $18, $19
+ cmpult $19, $18, $10
+ addq $10, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $18
+ addq $20, $18, $20
+ mulq $2, $7, $10
+ umulh $2, $7, $17
+ addq $19, $10, $19
+ cmpult $19, $10, $18
+ addq $18, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $10
+ addq $20, $10, $20
+ mulq $4, $5, $18
+ umulh $4, $5, $17
+ addq $19, $18, $19
+ cmpult $19, $18, $10
+ addq $10, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $18
+ addq $20, $18, $20
+ mulq $6, $3, $10
+ umulh $6, $3, $17
+ addq $19, $10, $19
+ cmpult $19, $10, $18
+ addq $18, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $10
+ addq $20, $10, $20
+ mulq $8, $1, $18
+ umulh $8, $1, $17
+ addq $19, $18, $19
+ cmpult $19, $18, $10
+ addq $10, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $18
+ addq $20, $18, $20
+ stq $19, 32($16)
+ bis $31, $31, $19
+ mulq $0, $24, $10
+ umulh $0, $24, $17
+ addq $9, $10, $9
+ cmpult $9, $10, $18
+ addq $18, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $10
+ addq $19, $10, $19
+ mulq $2, $22, $18
+ umulh $2, $22, $17
+ addq $9, $18, $9
+ cmpult $9, $18, $10
+ addq $10, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $18
+ addq $19, $18, $19
+ mulq $4, $7, $10
+ umulh $4, $7, $17
+ addq $9, $10, $9
+ cmpult $9, $10, $18
+ addq $18, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $10
+ addq $19, $10, $19
+ mulq $6, $5, $18
+ umulh $6, $5, $17
+ addq $9, $18, $9
+ cmpult $9, $18, $10
+ addq $10, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $18
+ addq $19, $18, $19
+ mulq $8, $3, $10
+ umulh $8, $3, $17
+ addq $9, $10, $9
+ cmpult $9, $10, $18
+ addq $18, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $10
+ addq $19, $10, $19
+ mulq $23, $1, $18
+ umulh $23, $1, $17
+ addq $9, $18, $9
+ cmpult $9, $18, $10
+ addq $10, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $18
+ addq $19, $18, $19
+ stq $9, 40($16)
+ bis $31, $31, $9
+ mulq $0, $27, $10
+ umulh $0, $27, $17
+ addq $20, $10, $20
+ cmpult $20, $10, $18
+ addq $18, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $10
+ addq $9, $10, $9
+ mulq $2, $24, $18
+ umulh $2, $24, $17
+ addq $20, $18, $20
+ cmpult $20, $18, $10
+ addq $10, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $18
+ addq $9, $18, $9
+ mulq $4, $22, $10
+ umulh $4, $22, $17
+ addq $20, $10, $20
+ cmpult $20, $10, $18
+ addq $18, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $10
+ addq $9, $10, $9
+ mulq $6, $7, $18
+ umulh $6, $7, $17
+ addq $20, $18, $20
+ cmpult $20, $18, $10
+ addq $10, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $18
+ addq $9, $18, $9
+ mulq $8, $5, $10
+ umulh $8, $5, $17
+ addq $20, $10, $20
+ cmpult $20, $10, $18
+ addq $18, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $10
+ addq $9, $10, $9
+ mulq $23, $3, $18
+ umulh $23, $3, $17
+ addq $20, $18, $20
+ cmpult $20, $18, $10
+ addq $10, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $18
+ addq $9, $18, $9
+ mulq $25, $1, $10
+ umulh $25, $1, $17
+ addq $20, $10, $20
+ cmpult $20, $10, $18
+ addq $18, $17, $17
+ addq $19, $17, $19
+ cmpult $19, $17, $10
+ addq $9, $10, $9
+ stq $20, 48($16)
+ bis $31, $31, $20
+ mulq $0, $21, $18
+ umulh $0, $21, $17
+ addq $19, $18, $19
+ cmpult $19, $18, $10
+ addq $10, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $18
+ addq $20, $18, $20
+ mulq $2, $27, $10
+ umulh $2, $27, $17
+ addq $19, $10, $19
+ cmpult $19, $10, $18
+ addq $18, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $0
+ addq $20, $0, $20
+ mulq $4, $24, $10
+ umulh $4, $24, $18
+ addq $19, $10, $19
+ cmpult $19, $10, $17
+ addq $17, $18, $18
+ addq $9, $18, $9
+ cmpult $9, $18, $0
+ addq $20, $0, $20
+ mulq $6, $22, $10
+ umulh $6, $22, $17
+ addq $19, $10, $19
+ cmpult $19, $10, $18
+ addq $18, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $0
+ addq $20, $0, $20
+ mulq $8, $7, $10
+ umulh $8, $7, $18
+ addq $19, $10, $19
+ cmpult $19, $10, $17
+ addq $17, $18, $18
+ addq $9, $18, $9
+ cmpult $9, $18, $0
+ addq $20, $0, $20
+ mulq $23, $5, $10
+ umulh $23, $5, $17
+ addq $19, $10, $19
+ cmpult $19, $10, $18
+ addq $18, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $0
+ addq $20, $0, $20
+ mulq $25, $3, $10
+ umulh $25, $3, $18
+ addq $19, $10, $19
+ cmpult $19, $10, $17
+ addq $17, $18, $18
+ addq $9, $18, $9
+ cmpult $9, $18, $0
+ addq $20, $0, $20
+ mulq $28, $1, $10
+ umulh $28, $1, $17
+ addq $19, $10, $19
+ cmpult $19, $10, $18
+ addq $18, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $0
+ addq $20, $0, $20
+ stq $19, 56($16)
+ bis $31, $31, $19
+ mulq $2, $21, $10
+ umulh $2, $21, $18
+ addq $9, $10, $9
+ cmpult $9, $10, $17
+ addq $17, $18, $18
+ addq $20, $18, $20
+ cmpult $20, $18, $0
+ addq $19, $0, $19
+ mulq $4, $27, $1
+ umulh $4, $27, $10
+ addq $9, $1, $9
+ cmpult $9, $1, $17
+ addq $17, $10, $10
+ addq $20, $10, $20
+ cmpult $20, $10, $18
+ addq $19, $18, $19
+ mulq $6, $24, $0
+ umulh $6, $24, $2
+ addq $9, $0, $9
+ cmpult $9, $0, $1
+ addq $1, $2, $2
+ addq $20, $2, $20
+ cmpult $20, $2, $17
+ addq $19, $17, $19
+ mulq $8, $22, $10
+ umulh $8, $22, $18
+ addq $9, $10, $9
+ cmpult $9, $10, $0
+ addq $0, $18, $18
+ addq $20, $18, $20
+ cmpult $20, $18, $1
+ addq $19, $1, $19
+ mulq $23, $7, $2
+ umulh $23, $7, $17
+ addq $9, $2, $9
+ cmpult $9, $2, $10
+ addq $10, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $0
+ addq $19, $0, $19
+ mulq $25, $5, $18
+ umulh $25, $5, $1
+ addq $9, $18, $9
+ cmpult $9, $18, $2
+ addq $2, $1, $1
+ addq $20, $1, $20
+ cmpult $20, $1, $10
+ addq $19, $10, $19
+ mulq $28, $3, $17
+ umulh $28, $3, $0
+ addq $9, $17, $9
+ cmpult $9, $17, $18
+ addq $18, $0, $0
+ addq $20, $0, $20
+ cmpult $20, $0, $2
+ addq $19, $2, $19
+ stq $9, 64($16)
+ bis $31, $31, $9
+ mulq $4, $21, $1
+ umulh $4, $21, $10
+ addq $20, $1, $20
+ cmpult $20, $1, $17
+ addq $17, $10, $10
+ addq $19, $10, $19
+ cmpult $19, $10, $18
+ addq $9, $18, $9
+ mulq $6, $27, $0
+ umulh $6, $27, $2
+ addq $20, $0, $20
+ cmpult $20, $0, $3
+ addq $3, $2, $2
+ addq $19, $2, $19
+ cmpult $19, $2, $1
+ addq $9, $1, $9
+ mulq $8, $24, $17
+ umulh $8, $24, $10
+ addq $20, $17, $20
+ cmpult $20, $17, $18
+ addq $18, $10, $10
+ addq $19, $10, $19
+ cmpult $19, $10, $4
+ addq $9, $4, $9
+ mulq $23, $22, $0
+ umulh $23, $22, $3
+ addq $20, $0, $20
+ cmpult $20, $0, $2
+ addq $2, $3, $3
+ addq $19, $3, $19
+ cmpult $19, $3, $1
+ addq $9, $1, $9
+ mulq $25, $7, $17
+ umulh $25, $7, $18
+ addq $20, $17, $20
+ cmpult $20, $17, $10
+ addq $10, $18, $18
+ addq $19, $18, $19
+ cmpult $19, $18, $4
+ addq $9, $4, $9
+ mulq $28, $5, $0
+ umulh $28, $5, $2
+ addq $20, $0, $20
+ cmpult $20, $0, $3
+ addq $3, $2, $2
+ addq $19, $2, $19
+ cmpult $19, $2, $1
+ addq $9, $1, $9
+ stq $20, 72($16)
+ bis $31, $31, $20
+ mulq $6, $21, $17
+ umulh $6, $21, $10
+ addq $19, $17, $19
+ cmpult $19, $17, $18
+ addq $18, $10, $10
+ addq $9, $10, $9
+ cmpult $9, $10, $4
+ addq $20, $4, $20
+ mulq $8, $27, $0
+ umulh $8, $27, $3
+ addq $19, $0, $19
+ cmpult $19, $0, $2
+ addq $2, $3, $3
+ addq $9, $3, $9
+ cmpult $9, $3, $1
+ addq $20, $1, $20
+ mulq $23, $24, $5
+ umulh $23, $24, $17
+ addq $19, $5, $19
+ cmpult $19, $5, $18
+ addq $18, $17, $17
+ addq $9, $17, $9
+ cmpult $9, $17, $10
+ addq $20, $10, $20
+ mulq $25, $22, $4
+ umulh $25, $22, $6
+ addq $19, $4, $19
+ cmpult $19, $4, $0
+ addq $0, $6, $6
+ addq $9, $6, $9
+ cmpult $9, $6, $2
+ addq $20, $2, $20
+ mulq $28, $7, $3
+ umulh $28, $7, $1
+ addq $19, $3, $19
+ cmpult $19, $3, $5
+ addq $5, $1, $1
+ addq $9, $1, $9
+ cmpult $9, $1, $18
+ addq $20, $18, $20
+ stq $19, 80($16)
+ bis $31, $31, $19
+ mulq $8, $21, $17
+ umulh $8, $21, $10
+ addq $9, $17, $9
+ cmpult $9, $17, $4
+ addq $4, $10, $10
+ addq $20, $10, $20
+ cmpult $20, $10, $0
+ addq $19, $0, $19
+ mulq $23, $27, $6
+ umulh $23, $27, $2
+ addq $9, $6, $9
+ cmpult $9, $6, $3
+ addq $3, $2, $2
+ addq $20, $2, $20
+ cmpult $20, $2, $5
+ addq $19, $5, $19
+ mulq $25, $24, $1
+ umulh $25, $24, $18
+ addq $9, $1, $9
+ cmpult $9, $1, $7
+ addq $7, $18, $18
+ addq $20, $18, $20
+ cmpult $20, $18, $17
+ addq $19, $17, $19
+ mulq $28, $22, $4
+ umulh $28, $22, $10
+ addq $9, $4, $9
+ cmpult $9, $4, $0
+ addq $0, $10, $10
+ addq $20, $10, $20
+ cmpult $20, $10, $8
+ addq $19, $8, $19
+ stq $9, 88($16)
+ bis $31, $31, $9
+ mulq $23, $21, $6
+ umulh $23, $21, $3
+ addq $20, $6, $20
+ cmpult $20, $6, $2
+ addq $2, $3, $3
+ addq $19, $3, $19
+ cmpult $19, $3, $5
+ addq $9, $5, $9
+ mulq $25, $27, $1
+ umulh $25, $27, $7
+ addq $20, $1, $20
+ cmpult $20, $1, $18
+ addq $18, $7, $7
+ addq $19, $7, $19
+ cmpult $19, $7, $17
+ addq $9, $17, $9
+ mulq $28, $24, $4
+ umulh $28, $24, $0
+ addq $20, $4, $20
+ cmpult $20, $4, $10
+ addq $10, $0, $0
+ addq $19, $0, $19
+ cmpult $19, $0, $8
+ addq $9, $8, $9
+ stq $20, 96($16)
+ bis $31, $31, $20
+ mulq $25, $21, $22
+ umulh $25, $21, $6
+ addq $19, $22, $19
+ cmpult $19, $22, $2
+ addq $2, $6, $6
+ addq $9, $6, $9
+ cmpult $9, $6, $3
+ addq $20, $3, $20
+ mulq $28, $27, $5
+ umulh $28, $27, $23
+ addq $19, $5, $19
+ cmpult $19, $5, $1
+ addq $1, $23, $23
+ addq $9, $23, $9
+ cmpult $9, $23, $18
+ addq $20, $18, $20
+ stq $19, 104($16)
+ bis $31, $31, $19
+ mulq $28, $21, $7
+ umulh $28, $21, $17
+ addq $9, $7, $9
+ cmpult $9, $7, $4
+ addq $4, $17, $17
+ addq $20, $17, $20
+ cmpult $20, $17, $10
+ addq $19, $10, $19
+ stq $9, 112($16)
+ stq $20, 120($16)
+ ldq $9, 8($30)
+ ldq $10, 16($30)
+ ret $31,($26),1
+ .end bn_mul_comba8
diff --git a/crypto/bn/asm/mips1.s b/crypto/bn/asm/mips1.s
new file mode 100644
index 0000000000..44fa1254c7
--- /dev/null
+++ b/crypto/bn/asm/mips1.s
@@ -0,0 +1,539 @@
+/* This assember is for R2000/R3000 machines, or higher ones that do
+ * no want to do any 64 bit arithmatic.
+ * Make sure that the SSLeay bignum library is compiled with
+ * THIRTY_TWO_BIT set.
+ * This must either be compiled with the system CC, or, if you use GNU gas,
+ * cc -E mips1.s|gas -o mips1.o
+ */
+ .set reorder
+ .set noat
+
+#define R1 $1
+#define CC $2
+#define R2 $3
+#define R3 $8
+#define R4 $9
+#define L1 $10
+#define L2 $11
+#define L3 $12
+#define L4 $13
+#define H1 $14
+#define H2 $15
+#define H3 $24
+#define H4 $25
+
+#define P1 $4
+#define P2 $5
+#define P3 $6
+#define P4 $7
+
+ .align 2
+ .ent bn_mul_add_words
+ .globl bn_mul_add_words
+.text
+bn_mul_add_words:
+ .frame $sp,0,$31
+ .mask 0x00000000,0
+ .fmask 0x00000000,0
+
+ #blt P3,4,$lab34
+
+ subu R1,P3,4
+ move CC,$0
+ bltz R1,$lab34
+$lab2:
+ lw R1,0(P1)
+ lw L1,0(P2)
+ lw R2,4(P1)
+ lw L2,4(P2)
+ lw R3,8(P1)
+ lw L3,8(P2)
+ lw R4,12(P1)
+ lw L4,12(P2)
+ multu L1,P4
+ addu R1,R1,CC
+ mflo L1
+ sltu CC,R1,CC
+ addu R1,R1,L1
+ mfhi H1
+ sltu L1,R1,L1
+ sw R1,0(P1)
+ addu CC,CC,L1
+ multu L2,P4
+ addu CC,H1,CC
+ mflo L2
+ addu R2,R2,CC
+ sltu CC,R2,CC
+ mfhi H2
+ addu R2,R2,L2
+ addu P2,P2,16
+ sltu L2,R2,L2
+ sw R2,4(P1)
+ addu CC,CC,L2
+ multu L3,P4
+ addu CC,H2,CC
+ mflo L3
+ addu R3,R3,CC
+ sltu CC,R3,CC
+ mfhi H3
+ addu R3,R3,L3
+ addu P1,P1,16
+ sltu L3,R3,L3
+ sw R3,-8(P1)
+ addu CC,CC,L3
+ multu L4,P4
+ addu CC,H3,CC
+ mflo L4
+ addu R4,R4,CC
+ sltu CC,R4,CC
+ mfhi H4
+ addu R4,R4,L4
+ subu P3,P3,4
+ sltu L4,R4,L4
+ addu CC,CC,L4
+ addu CC,H4,CC
+
+ subu R1,P3,4
+ sw R4,-4(P1) # delay slot
+ bgez R1,$lab2
+
+ bleu P3,0,$lab3
+ .align 2
+$lab33:
+ lw L1,0(P2)
+ lw R1,0(P1)
+ multu L1,P4
+ addu R1,R1,CC
+ sltu CC,R1,CC
+ addu P1,P1,4
+ mflo L1
+ mfhi H1
+ addu R1,R1,L1
+ addu P2,P2,4
+ sltu L1,R1,L1
+ subu P3,P3,1
+ addu CC,CC,L1
+ sw R1,-4(P1)
+ addu CC,H1,CC
+ bgtz P3,$lab33
+ j $31
+ .align 2
+$lab3:
+ j $31
+ .align 2
+$lab34:
+ bgt P3,0,$lab33
+ j $31
+ .end bn_mul_add_words
+
+ .align 2
+ # Program Unit: bn_mul_words
+ .ent bn_mul_words
+ .globl bn_mul_words
+.text
+bn_mul_words:
+ .frame $sp,0,$31
+ .mask 0x00000000,0
+ .fmask 0x00000000,0
+
+ subu P3,P3,4
+ move CC,$0
+ bltz P3,$lab45
+$lab44:
+ lw L1,0(P2)
+ lw L2,4(P2)
+ lw L3,8(P2)
+ lw L4,12(P2)
+ multu L1,P4
+ subu P3,P3,4
+ mflo L1
+ mfhi H1
+ addu L1,L1,CC
+ multu L2,P4
+ sltu CC,L1,CC
+ sw L1,0(P1)
+ addu CC,H1,CC
+ mflo L2
+ mfhi H2
+ addu L2,L2,CC
+ multu L3,P4
+ sltu CC,L2,CC
+ sw L2,4(P1)
+ addu CC,H2,CC
+ mflo L3
+ mfhi H3
+ addu L3,L3,CC
+ multu L4,P4
+ sltu CC,L3,CC
+ sw L3,8(P1)
+ addu CC,H3,CC
+ mflo L4
+ mfhi H4
+ addu L4,L4,CC
+ addu P1,P1,16
+ sltu CC,L4,CC
+ addu P2,P2,16
+ addu CC,H4,CC
+ sw L4,-4(P1)
+
+ bgez P3,$lab44
+ b $lab45
+$lab46:
+ lw L1,0(P2)
+ addu P1,P1,4
+ multu L1,P4
+ addu P2,P2,4
+ mflo L1
+ mfhi H1
+ addu L1,L1,CC
+ subu P3,P3,1
+ sltu CC,L1,CC
+ sw L1,-4(P1)
+ addu CC,H1,CC
+ bgtz P3,$lab46
+ j $31
+$lab45:
+ addu P3,P3,4
+ bgtz P3,$lab46
+ j $31
+ .align 2
+ .end bn_mul_words
+
+ # Program Unit: bn_sqr_words
+ .ent bn_sqr_words
+ .globl bn_sqr_words
+.text
+bn_sqr_words:
+ .frame $sp,0,$31
+ .mask 0x00000000,0
+ .fmask 0x00000000,0
+
+ subu P3,P3,4
+ bltz P3,$lab55
+$lab54:
+ lw L1,0(P2)
+ lw L2,4(P2)
+ lw L3,8(P2)
+ lw L4,12(P2)
+
+ multu L1,L1
+ subu P3,P3,4
+ mflo L1
+ mfhi H1
+ sw L1,0(P1)
+ sw H1,4(P1)
+
+ multu L2,L2
+ addu P1,P1,32
+ mflo L2
+ mfhi H2
+ sw L2,-24(P1)
+ sw H2,-20(P1)
+
+ multu L3,L3
+ addu P2,P2,16
+ mflo L3
+ mfhi H3
+ sw L3,-16(P1)
+ sw H3,-12(P1)
+
+ multu L4,L4
+
+ mflo L4
+ mfhi H4
+ sw L4,-8(P1)
+ sw H4,-4(P1)
+
+ bgtz P3,$lab54
+ b $lab55
+$lab56:
+ lw L1,0(P2)
+ addu P1,P1,8
+ multu L1,L1
+ addu P2,P2,4
+ subu P3,P3,1
+ mflo L1
+ mfhi H1
+ sw L1,-8(P1)
+ sw H1,-4(P1)
+
+ bgtz P3,$lab56
+ j $31
+$lab55:
+ addu P3,P3,4
+ bgtz P3,$lab56
+ j $31
+ .align 2
+ .end bn_sqr_words
+
+ # Program Unit: bn_add_words
+ .ent bn_add_words
+ .globl bn_add_words
+.text
+bn_add_words: # 0x590
+ .frame $sp,0,$31
+ .mask 0x00000000,0
+ .fmask 0x00000000,0
+
+ subu P4,P4,4
+ move CC,$0
+ bltz P4,$lab65
+$lab64:
+ lw L1,0(P2)
+ lw R1,0(P3)
+ lw L2,4(P2)
+ lw R2,4(P3)
+
+ addu L1,L1,CC
+ lw L3,8(P2)
+ sltu CC,L1,CC
+ addu L1,L1,R1
+ sltu R1,L1,R1
+ lw R3,8(P3)
+ addu CC,CC,R1
+ lw L4,12(P2)
+
+ addu L2,L2,CC
+ lw R4,12(P3)
+ sltu CC,L2,CC
+ addu L2,L2,R2
+ sltu R2,L2,R2
+ sw L1,0(P1)
+ addu CC,CC,R2
+ addu P1,P1,16
+ addu L3,L3,CC
+ sw L2,-12(P1)
+
+ sltu CC,L3,CC
+ addu L3,L3,R3
+ sltu R3,L3,R3
+ addu P2,P2,16
+ addu CC,CC,R3
+
+ addu L4,L4,CC
+ addu P3,P3,16
+ sltu CC,L4,CC
+ addu L4,L4,R4
+ subu P4,P4,4
+ sltu R4,L4,R4
+ sw L3,-8(P1)
+ addu CC,CC,R4
+ sw L4,-4(P1)
+
+ bgtz P4,$lab64
+ b $lab65
+$lab66:
+ lw L1,0(P2)
+ lw R1,0(P3)
+ addu L1,L1,CC
+ addu P1,P1,4
+ sltu CC,L1,CC
+ addu P2,P2,4
+ addu P3,P3,4
+ addu L1,L1,R1
+ subu P4,P4,1
+ sltu R1,L1,R1
+ sw L1,-4(P1)
+ addu CC,CC,R1
+
+ bgtz P4,$lab66
+ j $31
+$lab65:
+ addu P4,P4,4
+ bgtz P4,$lab66
+ j $31
+ .end bn_add_words
+
+ # Program Unit: bn_div64
+ .set at
+ .set reorder
+ .text
+ .align 2
+ .globl bn_div64
+ # 321 {
+ .ent bn_div64 2
+bn_div64:
+ subu $sp, 64
+ sw $31, 56($sp)
+ sw $16, 48($sp)
+ .mask 0x80010000, -56
+ .frame $sp, 64, $31
+ move $9, $4
+ move $12, $5
+ move $16, $6
+ # 322 BN_ULONG dh,dl,q,ret=0,th,tl,t;
+ move $31, $0
+ # 323 int i,count=2;
+ li $13, 2
+ # 324
+ # 325 if (d == 0) return(BN_MASK2);
+ bne $16, 0, $80
+ li $2, -1
+ b $93
+$80:
+ # 326
+ # 327 i=BN_num_bits_word(d);
+ move $4, $16
+ sw $31, 16($sp)
+ sw $9, 24($sp)
+ sw $12, 32($sp)
+ sw $13, 40($sp)
+ .livereg 0x800ff0e,0xfff
+ jal BN_num_bits_word
+ li $4, 32
+ lw $31, 16($sp)
+ lw $9, 24($sp)
+ lw $12, 32($sp)
+ lw $13, 40($sp)
+ move $3, $2
+ # 328 if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i))
+ beq $2, $4, $81
+ li $14, 1
+ sll $15, $14, $2
+ bleu $9, $15, $81
+ # 329 {
+ # 330 #if !defined(NO_STDIO) && !defined(WIN16)
+ # 331 fprintf(stderr,"Division would overflow (%d)\n",i);
+ # 332 #endif
+ # 333 abort();
+ sw $3, 8($sp)
+ sw $9, 24($sp)
+ sw $12, 32($sp)
+ sw $13, 40($sp)
+ sw $31, 26($sp)
+ .livereg 0xff0e,0xfff
+ jal abort
+ lw $3, 8($sp)
+ li $4, 32
+ lw $9, 24($sp)
+ lw $12, 32($sp)
+ lw $13, 40($sp)
+ lw $31, 26($sp)
+ # 334 }
+$81:
+ # 335 i=BN_BITS2-i;
+ subu $3, $4, $3
+ # 336 if (h >= d) h-=d;
+ bltu $9, $16, $82
+ subu $9, $9, $16
+$82:
+ # 337
+ # 338 if (i)
+ beq $3, 0, $83
+ # 339 {
+ # 340 d<<=i;
+ sll $16, $16, $3
+ # 341 h=(h<<i)|(l>>(BN_BITS2-i));
+ sll $24, $9, $3
+ subu $25, $4, $3
+ srl $14, $12, $25
+ or $9, $24, $14
+ # 342 l<<=i;
+ sll $12, $12, $3
+ # 343 }
+$83:
+ # 344 dh=(d&BN_MASK2h)>>BN_BITS4;
+ # 345 dl=(d&BN_MASK2l);
+ and $8, $16, -65536
+ srl $8, $8, 16
+ and $10, $16, 65535
+ li $6, -65536
+$84:
+ # 346 for (;;)
+ # 347 {
+ # 348 if ((h>>BN_BITS4) == dh)
+ srl $15, $9, 16
+ bne $8, $15, $85
+ # 349 q=BN_MASK2l;
+ li $5, 65535
+ b $86
+$85:
+ # 350 else
+ # 351 q=h/dh;
+ divu $5, $9, $8
+$86:
+ # 352
+ # 353 for (;;)
+ # 354 {
+ # 355 t=(h-q*dh);
+ mul $4, $5, $8
+ subu $2, $9, $4
+ move $3, $2
+ # 356 if ((t&BN_MASK2h) ||
+ # 357 ((dl*q) <= (
+ # 358 (t<<BN_BITS4)+
+ # 359 ((l&BN_MASK2h)>>BN_BITS4))))
+ and $25, $2, $6
+ bne $25, $0, $87
+ mul $24, $10, $5
+ sll $14, $3, 16
+ and $15, $12, $6
+ srl $25, $15, 16
+ addu $15, $14, $25
+ bgtu $24, $15, $88
+$87:
+ # 360 break;
+ mul $3, $10, $5
+ b $89
+$88:
+ # 361 q--;
+ addu $5, $5, -1
+ # 362 }
+ b $86
+$89:
+ # 363 th=q*dh;
+ # 364 tl=q*dl;
+ # 365 t=(tl>>BN_BITS4);
+ # 366 tl=(tl<<BN_BITS4)&BN_MASK2h;
+ sll $14, $3, 16
+ and $2, $14, $6
+ move $11, $2
+ # 367 th+=t;
+ srl $25, $3, 16
+ addu $7, $4, $25
+ # 368
+ # 369 if (l < tl) th++;
+ bgeu $12, $2, $90
+ addu $7, $7, 1
+$90:
+ # 370 l-=tl;
+ subu $12, $12, $11
+ # 371 if (h < th)
+ bgeu $9, $7, $91
+ # 372 {
+ # 373 h+=d;
+ addu $9, $9, $16
+ # 374 q--;
+ addu $5, $5, -1
+ # 375 }
+$91:
+ # 376 h-=th;
+ subu $9, $9, $7
+ # 377
+ # 378 if (--count == 0) break;
+ addu $13, $13, -1
+ beq $13, 0, $92
+ # 379
+ # 380 ret=q<<BN_BITS4;
+ sll $31, $5, 16
+ # 381 h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
+ sll $24, $9, 16
+ srl $15, $12, 16
+ or $9, $24, $15
+ # 382 l=(l&BN_MASK2l)<<BN_BITS4;
+ and $12, $12, 65535
+ sll $12, $12, 16
+ # 383 }
+ b $84
+$92:
+ # 384 ret|=q;
+ or $31, $31, $5
+ # 385 return(ret);
+ move $2, $31
+$93:
+ lw $16, 48($sp)
+ lw $31, 56($sp)
+ addu $sp, 64
+ j $31
+ .end bn_div64
+
diff --git a/crypto/bn/asm/mips3.s b/crypto/bn/asm/mips3.s
new file mode 100644
index 0000000000..e8fdd50d16
--- /dev/null
+++ b/crypto/bn/asm/mips3.s
@@ -0,0 +1,544 @@
+/* This assember is for R4000 and above machines. It takes advantage
+ * of the 64 bit registers present on these CPUs.
+ * Make sure that the SSLeay bignum library is compiled with
+ * SIXTY_FOUR_BIT set and BN_LLONG undefined.
+ * This must either be compiled with the system CC, or, if you use GNU gas,
+ * cc -E mips3.s|gas -o mips3.o
+ */
+ .set reorder
+ .set noat
+
+#define R1 $1
+#define CC $2
+#define R2 $3
+#define R3 $8
+#define R4 $9
+#define L1 $10
+#define L2 $11
+#define L3 $12
+#define L4 $13
+#define H1 $14
+#define H2 $15
+#define H3 $24
+#define H4 $25
+
+#define P1 $4
+#define P2 $5
+#define P3 $6
+#define P4 $7
+
+ .align 2
+ .ent bn_mul_add_words
+ .globl bn_mul_add_words
+.text
+bn_mul_add_words:
+ .frame $sp,0,$31
+ .mask 0x00000000,0
+ .fmask 0x00000000,0
+
+ #blt P3,4,$lab34
+
+ subu R1,P3,4
+ move CC,$0
+ bltz R1,$lab34
+$lab2:
+ ld R1,0(P1)
+ ld L1,0(P2)
+ ld R2,8(P1)
+ ld L2,8(P2)
+ ld R3,16(P1)
+ ld L3,16(P2)
+ ld R4,24(P1)
+ ld L4,24(P2)
+ dmultu L1,P4
+ daddu R1,R1,CC
+ mflo L1
+ sltu CC,R1,CC
+ daddu R1,R1,L1
+ mfhi H1
+ sltu L1,R1,L1
+ sd R1,0(P1)
+ daddu CC,CC,L1
+ dmultu L2,P4
+ daddu CC,H1,CC
+ mflo L2
+ daddu R2,R2,CC
+ sltu CC,R2,CC
+ mfhi H2
+ daddu R2,R2,L2
+ daddu P2,P2,32
+ sltu L2,R2,L2
+ sd R2,8(P1)
+ daddu CC,CC,L2
+ dmultu L3,P4
+ daddu CC,H2,CC
+ mflo L3
+ daddu R3,R3,CC
+ sltu CC,R3,CC
+ mfhi H3
+ daddu R3,R3,L3
+ daddu P1,P1,32
+ sltu L3,R3,L3
+ sd R3,-16(P1)
+ daddu CC,CC,L3
+ dmultu L4,P4
+ daddu CC,H3,CC
+ mflo L4
+ daddu R4,R4,CC
+ sltu CC,R4,CC
+ mfhi H4
+ daddu R4,R4,L4
+ subu P3,P3,4
+ sltu L4,R4,L4
+ daddu CC,CC,L4
+ daddu CC,H4,CC
+
+ subu R1,P3,4
+ sd R4,-8(P1) # delay slot
+ bgez R1,$lab2
+
+ bleu P3,0,$lab3
+ .align 2
+$lab33:
+ ld L1,0(P2)
+ ld R1,0(P1)
+ dmultu L1,P4
+ daddu R1,R1,CC
+ sltu CC,R1,CC
+ daddu P1,P1,8
+ mflo L1
+ mfhi H1
+ daddu R1,R1,L1
+ daddu P2,P2,8
+ sltu L1,R1,L1
+ subu P3,P3,1
+ daddu CC,CC,L1
+ sd R1,-8(P1)
+ daddu CC,H1,CC
+ bgtz P3,$lab33
+ j $31
+ .align 2
+$lab3:
+ j $31
+ .align 2
+$lab34:
+ bgt P3,0,$lab33
+ j $31
+ .end bn_mul_add_words
+
+ .align 2
+ # Program Unit: bn_mul_words
+ .ent bn_mul_words
+ .globl bn_mul_words
+.text
+bn_mul_words:
+ .frame $sp,0,$31
+ .mask 0x00000000,0
+ .fmask 0x00000000,0
+
+ subu P3,P3,4
+ move CC,$0
+ bltz P3,$lab45
+$lab44:
+ ld L1,0(P2)
+ ld L2,8(P2)
+ ld L3,16(P2)
+ ld L4,24(P2)
+ dmultu L1,P4
+ subu P3,P3,4
+ mflo L1
+ mfhi H1
+ daddu L1,L1,CC
+ dmultu L2,P4
+ sltu CC,L1,CC
+ sd L1,0(P1)
+ daddu CC,H1,CC
+ mflo L2
+ mfhi H2
+ daddu L2,L2,CC
+ dmultu L3,P4
+ sltu CC,L2,CC
+ sd L2,8(P1)
+ daddu CC,H2,CC
+ mflo L3
+ mfhi H3
+ daddu L3,L3,CC
+ dmultu L4,P4
+ sltu CC,L3,CC
+ sd L3,16(P1)
+ daddu CC,H3,CC
+ mflo L4
+ mfhi H4
+ daddu L4,L4,CC
+ daddu P1,P1,32
+ sltu CC,L4,CC
+ daddu P2,P2,32
+ daddu CC,H4,CC
+ sd L4,-8(P1)
+
+ bgez P3,$lab44
+ b $lab45
+$lab46:
+ ld L1,0(P2)
+ daddu P1,P1,8
+ dmultu L1,P4
+ daddu P2,P2,8
+ mflo L1
+ mfhi H1
+ daddu L1,L1,CC
+ subu P3,P3,1
+ sltu CC,L1,CC
+ sd L1,-8(P1)
+ daddu CC,H1,CC
+ bgtz P3,$lab46
+ j $31
+$lab45:
+ addu P3,P3,4
+ bgtz P3,$lab46
+ j $31
+ .align 2
+ .end bn_mul_words
+
+ # Program Unit: bn_sqr_words
+ .ent bn_sqr_words
+ .globl bn_sqr_words
+.text
+bn_sqr_words:
+ .frame $sp,0,$31
+ .mask 0x00000000,0
+ .fmask 0x00000000,0
+
+ subu P3,P3,4
+ b $lab55
+ bltz P3,$lab55
+$lab54:
+ ld L1,0(P2)
+ ld L2,8(P2)
+ ld L3,16(P2)
+ ld L4,24(P2)
+
+ dmultu L1,L1
+ subu P3,P3,4
+ mflo L1
+ mfhi H1
+ sd L1,0(P1)
+ sd H1,8(P1)
+
+ dmultu L2,L2
+ daddu P1,P1,32
+ mflo L2
+ mfhi H2
+ sd L2,-48(P1)
+ sd H2,-40(P1)
+
+ dmultu L3,L3
+ daddu P2,P2,32
+ mflo L3
+ mfhi H3
+ sd L3,-32(P1)
+ sd H3,-24(P1)
+
+ dmultu L4,L4
+
+ mflo L4
+ mfhi H4
+ sd L4,-16(P1)
+ sd H4,-8(P1)
+
+ bgtz P3,$lab54
+ b $lab55
+$lab56:
+ ld L1,0(P2)
+ daddu P1,P1,16
+ dmultu L1,L1
+ daddu P2,P2,8
+ subu P3,P3,1
+ mflo L1
+ mfhi H1
+ sd L1,-16(P1)
+ sd H1,-8(P1)
+
+ bgtz P3,$lab56
+ j $31
+$lab55:
+ daddu P3,P3,4
+ bgtz P3,$lab56
+ j $31
+ .align 2
+ .end bn_sqr_words
+
+ # Program Unit: bn_add_words
+ .ent bn_add_words
+ .globl bn_add_words
+.text
+bn_add_words: # 0x590
+ .frame $sp,0,$31
+ .mask 0x00000000,0
+ .fmask 0x00000000,0
+
+ subu P4,P4,4
+ move CC,$0
+ bltz P4,$lab65
+$lab64:
+ ld L1,0(P2)
+ ld R1,0(P3)
+ ld L2,8(P2)
+ ld R2,8(P3)
+
+ daddu L1,L1,CC
+ ld L3,16(P2)
+ sltu CC,L1,CC
+ daddu L1,L1,R1
+ sltu R1,L1,R1
+ ld R3,16(P3)
+ daddu CC,CC,R1
+ ld L4,24(P2)
+
+ daddu L2,L2,CC
+ ld R4,24(P3)
+ sltu CC,L2,CC
+ daddu L2,L2,R2
+ sltu R2,L2,R2
+ sd L1,0(P1)
+ daddu CC,CC,R2
+ daddu P1,P1,32
+ daddu L3,L3,CC
+ sd L2,-24(P1)
+
+ sltu CC,L3,CC
+ daddu L3,L3,R3
+ sltu R3,L3,R3
+ daddu P2,P2,32
+ daddu CC,CC,R3
+
+ daddu L4,L4,CC
+ daddu P3,P3,32
+ sltu CC,L4,CC
+ daddu L4,L4,R4
+ sltu R4,L4,R4
+ subu P4,P4,4
+ sd L3,-16(P1)
+ daddu CC,CC,R4
+ sd L4,-8(P1)
+
+ bgtz P4,$lab64
+ b $lab65
+$lab66:
+ ld L1,0(P2)
+ ld R1,0(P3)
+ daddu L1,L1,CC
+ daddu P1,P1,8
+ sltu CC,L1,CC
+ daddu P2,P2,8
+ daddu P3,P3,8
+ daddu L1,L1,R1
+ subu P4,P4,1
+ sltu R1,L1,R1
+ sd L1,-8(P1)
+ daddu CC,CC,R1
+
+ bgtz P4,$lab66
+ j $31
+$lab65:
+ addu P4,P4,4
+ bgtz P4,$lab66
+ j $31
+ .end bn_add_words
+
+#if 1
+ # Program Unit: bn_div64
+ .set at
+ .set reorder
+ .text
+ .align 2
+ .globl bn_div64
+ # 321 {
+ .ent bn_div64
+bn_div64:
+ dsubu $sp, 64
+ sd $31, 56($sp)
+ sd $16, 48($sp)
+ .mask 0x80010000, -56
+ .frame $sp, 64, $31
+ move $9, $4
+ move $12, $5
+ move $16, $6
+ # 322 BN_ULONG dh,dl,q,ret=0,th,tl,t;
+ move $31, $0
+ # 323 int i,count=2;
+ li $13, 2
+ # 324
+ # 325 if (d == 0) return(BN_MASK2);
+ bne $16, 0, $80
+ dli $2, -1
+ b $93
+$80:
+ # 326
+ # 327 i=BN_num_bits_word(d);
+ move $4, $16
+ sd $31, 16($sp)
+ sd $9, 24($sp)
+ sd $12, 32($sp)
+ sd $13, 40($sp)
+ .livereg 0x800ff0e,0xfff
+ jal BN_num_bits_word
+ dli $4, 64
+ ld $31, 16($sp)
+ ld $9, 24($sp)
+ ld $12, 32($sp)
+ ld $13, 40($sp)
+ move $3, $2
+ # 328 if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i))
+ beq $2, $4, $81
+ dli $14, 1
+ dsll $15, $14, $2
+ bleu $9, $15, $81
+ # 329 {
+ # 330 #if !defined(NO_STDIO) && !defined(WIN16)
+ # 331 fprintf(stderr,"Division would overflow (%d)\n",i);
+ # 332 #endif
+ # 333 abort();
+ sd $3, 8($sp)
+ sd $31, 16($sp)
+ sd $9, 24($sp)
+ sd $12, 32($sp)
+ sd $13, 40($sp)
+ .livereg 0xff0e,0xfff
+ jal abort
+ dli $4, 64
+ ld $3, 8($sp)
+ ld $31, 16($sp)
+ ld $9, 24($sp)
+ ld $12, 32($sp)
+ ld $13, 40($sp)
+ # 334 }
+$81:
+ # 335 i=BN_BITS2-i;
+ dsubu $3, $4, $3
+ # 336 if (h >= d) h-=d;
+ bltu $9, $16, $82
+ dsubu $9, $9, $16
+$82:
+ # 337
+ # 338 if (i)
+ beq $3, 0, $83
+ # 339 {
+ # 340 d<<=i;
+ dsll $16, $16, $3
+ # 341 h=(h<<i)|(l>>(BN_BITS2-i));
+ dsll $24, $9, $3
+ dsubu $25, $4, $3
+ dsrl $14, $12, $25
+ or $9, $24, $14
+ # 342 l<<=i;
+ dsll $12, $12, $3
+ # 343 }
+$83:
+ # 344 dh=(d&BN_MASK2h)>>BN_BITS4;
+ # 345 dl=(d&BN_MASK2l);
+ and $8, $16,0xFFFFFFFF00000000
+ dsrl $8, $8, 32
+ # dli $10,0xFFFFFFFF # Is this needed?
+ # and $10, $16, $10
+ dsll $10, $16, 32
+ dsrl $10, $10, 32
+ dli $6,0xFFFFFFFF00000000
+$84:
+ # 346 for (;;)
+ # 347 {
+ # 348 if ((h>>BN_BITS4) == dh)
+ dsrl $15, $9, 32
+ bne $8, $15, $85
+ # 349 q=BN_MASK2l;
+ dli $5, 0xFFFFFFFF
+ b $86
+$85:
+ # 350 else
+ # 351 q=h/dh;
+ ddivu $5, $9, $8
+$86:
+ # 352
+ # 353 for (;;)
+ # 354 {
+ # 355 t=(h-q*dh);
+ dmul $4, $5, $8
+ dsubu $2, $9, $4
+ move $3, $2
+ # 356 if ((t&BN_MASK2h) ||
+ # 357 ((dl*q) <= (
+ # 358 (t<<BN_BITS4)+
+ # 359 ((l&BN_MASK2h)>>BN_BITS4))))
+ and $25, $2, $6
+ bne $25, $0, $87
+ dmul $24, $10, $5
+ dsll $14, $3, 32
+ and $15, $12, $6
+ dsrl $25, $15, 32
+ daddu $15, $14, $25
+ bgtu $24, $15, $88
+$87:
+ # 360 break;
+ dmul $3, $10, $5
+ b $89
+$88:
+ # 361 q--;
+ daddu $5, $5, -1
+ # 362 }
+ b $86
+$89:
+ # 363 th=q*dh;
+ # 364 tl=q*dl;
+ # 365 t=(tl>>BN_BITS4);
+ # 366 tl=(tl<<BN_BITS4)&BN_MASK2h;
+ dsll $14, $3, 32
+ and $2, $14, $6
+ move $11, $2
+ # 367 th+=t;
+ dsrl $25, $3, 32
+ daddu $7, $4, $25
+ # 368
+ # 369 if (l < tl) th++;
+ bgeu $12, $2, $90
+ daddu $7, $7, 1
+$90:
+ # 370 l-=tl;
+ dsubu $12, $12, $11
+ # 371 if (h < th)
+ bgeu $9, $7, $91
+ # 372 {
+ # 373 h+=d;
+ daddu $9, $9, $16
+ # 374 q--;
+ daddu $5, $5, -1
+ # 375 }
+$91:
+ # 376 h-=th;
+ dsubu $9, $9, $7
+ # 377
+ # 378 if (--count == 0) break;
+ addu $13, $13, -1
+ beq $13, 0, $92
+ # 379
+ # 380 ret=q<<BN_BITS4;
+ dsll $31, $5, 32
+ # 381 h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
+ dsll $24, $9, 32
+ dsrl $15, $12, 32
+ or $9, $24, $15
+ # 382 l=(l&BN_MASK2l)<<BN_BITS4;
+ and $12, $12, 0xFFFFFFFF
+ dsll $12, $12, 32
+ # 383 }
+ b $84
+$92:
+ # 384 ret|=q;
+ or $31, $31, $5
+ # 385 return(ret);
+ move $2, $31
+$93:
+ ld $16, 48($sp)
+ ld $31, 56($sp)
+ daddu $sp, 64
+ j $31
+ .end bn_div64
+#endif
diff --git a/crypto/bn/asm/x86.pl b/crypto/bn/asm/x86.pl
new file mode 100644
index 0000000000..bf869fd0ee
--- /dev/null
+++ b/crypto/bn/asm/x86.pl
@@ -0,0 +1,28 @@
+#!/usr/local/bin/perl
+
+push(@INC,"perlasm","../../perlasm");
+require "x86asm.pl";
+
+require("x86/mul_add.pl");
+require("x86/mul.pl");
+require("x86/sqr.pl");
+require("x86/div.pl");
+require("x86/add.pl");
+require("x86/sub.pl");
+require("x86/comba.pl");
+
+&asm_init($ARGV[0],"bn-586.pl");
+
+&bn_mul_add_words("bn_mul_add_words");
+&bn_mul_words("bn_mul_words");
+&bn_sqr_words("bn_sqr_words");
+&bn_div_words("bn_div_words");
+&bn_add_words("bn_add_words");
+&bn_sub_words("bn_sub_words");
+&bn_mul_comba("bn_mul_comba8",8);
+&bn_mul_comba("bn_mul_comba4",4);
+&bn_sqr_comba("bn_sqr_comba8",8);
+&bn_sqr_comba("bn_sqr_comba4",4);
+
+&asm_finish();
+
diff --git a/crypto/bn/asm/x86/add.pl b/crypto/bn/asm/x86/add.pl
new file mode 100644
index 0000000000..0b5cf583e3
--- /dev/null
+++ b/crypto/bn/asm/x86/add.pl
@@ -0,0 +1,76 @@
+#!/usr/local/bin/perl
+# x86 assember
+
+sub bn_add_words
+ {
+ local($name)=@_;
+
+ &function_begin($name,"");
+
+ &comment("");
+ $a="esi";
+ $b="edi";
+ $c="eax";
+ $r="ebx";
+ $tmp1="ecx";
+ $tmp2="edx";
+ $num="ebp";
+
+ &mov($r,&wparam(0)); # get r
+ &mov($a,&wparam(1)); # get a
+ &mov($b,&wparam(2)); # get b
+ &mov($num,&wparam(3)); # get num
+ &xor($c,$c); # clear carry
+ &and($num,0xfffffff8); # num / 8
+
+ &jz(&label("aw_finish"));
+
+ &set_label("aw_loop",0);
+ for ($i=0; $i<8; $i++)
+ {
+ &comment("Round $i");
+
+ &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
+ &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
+ &add($tmp1,$c);
+ &mov($c,0);
+ &adc($c,$c);
+ &add($tmp1,$tmp2);
+ &adc($c,0);
+ &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
+ }
+
+ &comment("");
+ &add($a,32);
+ &add($b,32);
+ &add($r,32);
+ &sub($num,8);
+ &jnz(&label("aw_loop"));
+
+ &set_label("aw_finish",0);
+ &mov($num,&wparam(3)); # get num
+ &and($num,7);
+ &jz(&label("aw_end"));
+
+ for ($i=0; $i<7; $i++)
+ {
+ &comment("Tail Round $i");
+ &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
+ &mov($tmp2,&DWP($i*4,$b,"",0));# *b
+ &add($tmp1,$c);
+ &mov($c,0);
+ &adc($c,$c);
+ &add($tmp1,$tmp2);
+ &adc($c,0);
+ &dec($num) if ($i != 6);
+ &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
+ &jz(&label("aw_end")) if ($i != 6);
+ }
+ &set_label("aw_end",0);
+
+# &mov("eax",$c); # $c is "eax"
+
+ &function_end($name);
+ }
+
+1;
diff --git a/crypto/bn/asm/x86/comba.pl b/crypto/bn/asm/x86/comba.pl
new file mode 100644
index 0000000000..2291253629
--- /dev/null
+++ b/crypto/bn/asm/x86/comba.pl
@@ -0,0 +1,277 @@
+#!/usr/local/bin/perl
+# x86 assember
+
+sub mul_add_c
+ {
+ local($a,$ai,$b,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
+
+ # pos == -1 if eax and edx are pre-loaded, 0 to load from next
+ # words, and 1 if load return value
+
+ &comment("mul a[$ai]*b[$bi]");
+
+ # "eax" and "edx" will always be pre-loaded.
+ # &mov("eax",&DWP($ai*4,$a,"",0)) ;
+ # &mov("edx",&DWP($bi*4,$b,"",0));
+
+ &mul("edx");
+ &add($c0,"eax");
+ &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # laod next a
+ &mov("eax",&wparam(0)) if $pos > 0; # load r[]
+ ###
+ &adc($c1,"edx");
+ &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 0; # laod next b
+ &mov("edx",&DWP(($nb)*4,$b,"",0)) if $pos == 1; # laod next b
+ ###
+ &adc($c2,0);
+ # is pos > 1, it means it is the last loop
+ &mov(&DWP($i*4,"eax","",0),$c0) if $pos > 0; # save r[];
+ &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # laod next a
+ }
+
+sub sqr_add_c
+ {
+ local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
+
+ # pos == -1 if eax and edx are pre-loaded, 0 to load from next
+ # words, and 1 if load return value
+
+ &comment("sqr a[$ai]*a[$bi]");
+
+ # "eax" and "edx" will always be pre-loaded.
+ # &mov("eax",&DWP($ai*4,$a,"",0)) ;
+ # &mov("edx",&DWP($bi*4,$b,"",0));
+
+ if ($ai == $bi)
+ { &mul("eax");}
+ else
+ { &mul("edx");}
+ &add($c0,"eax");
+ &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
+ ###
+ &adc($c1,"edx");
+ &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos == 1) && ($na != $nb);
+ ###
+ &adc($c2,0);
+ # is pos > 1, it means it is the last loop
+ &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
+ &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
+ }
+
+sub sqr_add_c2
+ {
+ local($r,$a,$ai,$bi,$c0,$c1,$c2,$pos,$i,$na,$nb)=@_;
+
+ # pos == -1 if eax and edx are pre-loaded, 0 to load from next
+ # words, and 1 if load return value
+
+ &comment("sqr a[$ai]*a[$bi]");
+
+ # "eax" and "edx" will always be pre-loaded.
+ # &mov("eax",&DWP($ai*4,$a,"",0)) ;
+ # &mov("edx",&DWP($bi*4,$a,"",0));
+
+ if ($ai == $bi)
+ { &mul("eax");}
+ else
+ { &mul("edx");}
+ &add("eax","eax");
+ ###
+ &adc("edx","edx");
+ ###
+ &adc($c2,0);
+ &add($c0,"eax");
+ &adc($c1,"edx");
+ &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 0; # load next a
+ &mov("eax",&DWP(($na)*4,$a,"",0)) if $pos == 1; # load next b
+ &adc($c2,0);
+ &mov(&DWP($i*4,$r,"",0),$c0) if $pos > 0; # save r[];
+ &mov("edx",&DWP(($nb)*4,$a,"",0)) if ($pos <= 1) && ($na != $nb);
+ ###
+ }
+
+sub bn_mul_comba
+ {
+ local($name,$num)=@_;
+ local($a,$b,$c0,$c1,$c2);
+ local($i,$as,$ae,$bs,$be,$ai,$bi);
+ local($tot,$end);
+
+ &function_begin_B($name,"");
+
+ $c0="ebx";
+ $c1="ecx";
+ $c2="ebp";
+ $a="esi";
+ $b="edi";
+
+ $as=0;
+ $ae=0;
+ $bs=0;
+ $be=0;
+ $tot=$num+$num-1;
+
+ &push("esi");
+ &mov($a,&wparam(1));
+ &push("edi");
+ &mov($b,&wparam(2));
+ &push("ebp");
+ &push("ebx");
+
+ &xor($c0,$c0);
+ &mov("eax",&DWP(0,$a,"",0)); # load the first word
+ &xor($c1,$c1);
+ &mov("edx",&DWP(0,$b,"",0)); # load the first second
+
+ for ($i=0; $i<$tot; $i++)
+ {
+ $ai=$as;
+ $bi=$bs;
+ $end=$be+1;
+
+ &comment("################## Calculate word $i");
+
+ for ($j=$bs; $j<$end; $j++)
+ {
+ &xor($c2,$c2) if ($j == $bs);
+ if (($j+1) == $end)
+ {
+ $v=1;
+ $v=2 if (($i+1) == $tot);
+ }
+ else
+ { $v=0; }
+ if (($j+1) != $end)
+ {
+ $na=($ai-1);
+ $nb=($bi+1);
+ }
+ else
+ {
+ $na=$as+($i < ($num-1));
+ $nb=$bs+($i >= ($num-1));
+ }
+#printf STDERR "[$ai,$bi] -> [$na,$nb]\n";
+ &mul_add_c($a,$ai,$b,$bi,$c0,$c1,$c2,$v,$i,$na,$nb);
+ if ($v)
+ {
+ &comment("saved r[$i]");
+ # &mov("eax",&wparam(0));
+ # &mov(&DWP($i*4,"eax","",0),$c0);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ }
+ $ai--;
+ $bi++;
+ }
+ $as++ if ($i < ($num-1));
+ $ae++ if ($i >= ($num-1));
+
+ $bs++ if ($i >= ($num-1));
+ $be++ if ($i < ($num-1));
+ }
+ &comment("save r[$i]");
+ # &mov("eax",&wparam(0));
+ &mov(&DWP($i*4,"eax","",0),$c0);
+
+ &pop("ebx");
+ &pop("ebp");
+ &pop("edi");
+ &pop("esi");
+ &ret();
+ &function_end_B($name);
+ }
+
+sub bn_sqr_comba
+ {
+ local($name,$num)=@_;
+ local($r,$a,$c0,$c1,$c2)=@_;
+ local($i,$as,$ae,$bs,$be,$ai,$bi);
+ local($b,$tot,$end,$half);
+
+ &function_begin_B($name,"");
+
+ $c0="ebx";
+ $c1="ecx";
+ $c2="ebp";
+ $a="esi";
+ $r="edi";
+
+ &push("esi");
+ &push("edi");
+ &push("ebp");
+ &push("ebx");
+ &mov($r,&wparam(0));
+ &mov($a,&wparam(1));
+ &xor($c0,$c0);
+ &xor($c1,$c1);
+ &mov("eax",&DWP(0,$a,"",0)); # load the first word
+
+ $as=0;
+ $ae=0;
+ $bs=0;
+ $be=0;
+ $tot=$num+$num-1;
+
+ for ($i=0; $i<$tot; $i++)
+ {
+ $ai=$as;
+ $bi=$bs;
+ $end=$be+1;
+
+ &comment("############### Calculate word $i");
+ for ($j=$bs; $j<$end; $j++)
+ {
+ &xor($c2,$c2) if ($j == $bs);
+ if (($ai-1) < ($bi+1))
+ {
+ $v=1;
+ $v=2 if ($i+1) == $tot;
+ }
+ else
+ { $v=0; }
+ if (!$v)
+ {
+ $na=$ai-1;
+ $nb=$bi+1;
+ }
+ else
+ {
+ $na=$as+($i < ($num-1));
+ $nb=$bs+($i >= ($num-1));
+ }
+ if ($ai == $bi)
+ {
+ &sqr_add_c($r,$a,$ai,$bi,
+ $c0,$c1,$c2,$v,$i,$na,$nb);
+ }
+ else
+ {
+ &sqr_add_c2($r,$a,$ai,$bi,
+ $c0,$c1,$c2,$v,$i,$na,$nb);
+ }
+ if ($v)
+ {
+ &comment("saved r[$i]");
+ #&mov(&DWP($i*4,$r,"",0),$c0);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+ last;
+ }
+ $ai--;
+ $bi++;
+ }
+ $as++ if ($i < ($num-1));
+ $ae++ if ($i >= ($num-1));
+
+ $bs++ if ($i >= ($num-1));
+ $be++ if ($i < ($num-1));
+ }
+ &mov(&DWP($i*4,$r,"",0),$c0);
+ &pop("ebx");
+ &pop("ebp");
+ &pop("edi");
+ &pop("esi");
+ &ret();
+ &function_end_B($name);
+ }
+
+1;
diff --git a/crypto/bn/asm/x86/div.pl b/crypto/bn/asm/x86/div.pl
new file mode 100644
index 0000000000..0e90152caa
--- /dev/null
+++ b/crypto/bn/asm/x86/div.pl
@@ -0,0 +1,15 @@
+#!/usr/local/bin/perl
+# x86 assember
+
+sub bn_div_words
+ {
+ local($name)=@_;
+
+ &function_begin($name,"");
+ &mov("edx",&wparam(0)); #
+ &mov("eax",&wparam(1)); #
+ &mov("ebx",&wparam(2)); #
+ &div("ebx");
+ &function_end($name);
+ }
+1;
diff --git a/crypto/bn/asm/x86/f b/crypto/bn/asm/x86/f
new file mode 100644
index 0000000000..22e4112224
--- /dev/null
+++ b/crypto/bn/asm/x86/f
@@ -0,0 +1,3 @@
+#!/usr/local/bin/perl
+# x86 assember
+
diff --git a/crypto/bn/asm/x86/mul.pl b/crypto/bn/asm/x86/mul.pl
new file mode 100644
index 0000000000..674cb9b055
--- /dev/null
+++ b/crypto/bn/asm/x86/mul.pl
@@ -0,0 +1,77 @@
+#!/usr/local/bin/perl
+# x86 assember
+
+sub bn_mul_words
+ {
+ local($name)=@_;
+
+ &function_begin($name,"");
+
+ &comment("");
+ $Low="eax";
+ $High="edx";
+ $a="ebx";
+ $w="ecx";
+ $r="edi";
+ $c="esi";
+ $num="ebp";
+
+ &xor($c,$c); # clear carry
+ &mov($r,&wparam(0)); #
+ &mov($a,&wparam(1)); #
+ &mov($num,&wparam(2)); #
+ &mov($w,&wparam(3)); #
+
+ &and($num,0xfffffff8); # num / 8
+ &jz(&label("mw_finish"));
+
+ &set_label("mw_loop",0);
+ for ($i=0; $i<32; $i+=4)
+ {
+ &comment("Round $i");
+
+ &mov("eax",&DWP($i,$a,"",0)); # *a
+ &mul($w); # *a * w
+ &add("eax",$c); # L(t)+=c
+ # XXX
+
+ &adc("edx",0); # H(t)+=carry
+ &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
+
+ &mov($c,"edx"); # c= H(t);
+ }
+
+ &comment("");
+ &add($a,32);
+ &add($r,32);
+ &sub($num,8);
+ &jz(&label("mw_finish"));
+ &jmp(&label("mw_loop"));
+
+ &set_label("mw_finish",0);
+ &mov($num,&wparam(2)); # get num
+ &and($num,7);
+ &jnz(&label("mw_finish2"));
+ &jmp(&label("mw_end"));
+
+ &set_label("mw_finish2",1);
+ for ($i=0; $i<7; $i++)
+ {
+ &comment("Tail Round $i");
+ &mov("eax",&DWP($i*4,$a,"",0));# *a
+ &mul($w); # *a * w
+ &add("eax",$c); # L(t)+=c
+ # XXX
+ &adc("edx",0); # H(t)+=carry
+ &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t);
+ &mov($c,"edx"); # c= H(t);
+ &dec($num) if ($i != 7-1);
+ &jz(&label("mw_end")) if ($i != 7-1);
+ }
+ &set_label("mw_end",0);
+ &mov("eax",$c);
+
+ &function_end($name);
+ }
+
+1;
diff --git a/crypto/bn/asm/x86/mul_add.pl b/crypto/bn/asm/x86/mul_add.pl
new file mode 100644
index 0000000000..61830d3a90
--- /dev/null
+++ b/crypto/bn/asm/x86/mul_add.pl
@@ -0,0 +1,87 @@
+#!/usr/local/bin/perl
+# x86 assember
+
+sub bn_mul_add_words
+ {
+ local($name)=@_;
+
+ &function_begin($name,"");
+
+ &comment("");
+ $Low="eax";
+ $High="edx";
+ $a="ebx";
+ $w="ebp";
+ $r="edi";
+ $c="esi";
+
+ &xor($c,$c); # clear carry
+ &mov($r,&wparam(0)); #
+
+ &mov("ecx",&wparam(2)); #
+ &mov($a,&wparam(1)); #
+
+ &and("ecx",0xfffffff8); # num / 8
+ &mov($w,&wparam(3)); #
+
+ &push("ecx"); # Up the stack for a tmp variable
+
+ &jz(&label("maw_finish"));
+
+ &set_label("maw_loop",0);
+
+ &mov(&swtmp(0),"ecx"); #
+
+ for ($i=0; $i<32; $i+=4)
+ {
+ &comment("Round $i");
+
+ &mov("eax",&DWP($i,$a,"",0)); # *a
+ &mul($w); # *a * w
+ &add("eax",$c); # L(t)+= *r
+ &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r
+ &adc("edx",0); # H(t)+=carry
+ &add("eax",$c); # L(t)+=c
+ &adc("edx",0); # H(t)+=carry
+ &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t);
+ &mov($c,"edx"); # c= H(t);
+ }
+
+ &comment("");
+ &mov("ecx",&swtmp(0)); #
+ &add($a,32);
+ &add($r,32);
+ &sub("ecx",8);
+ &jnz(&label("maw_loop"));
+
+ &set_label("maw_finish",0);
+ &mov("ecx",&wparam(2)); # get num
+ &and("ecx",7);
+ &jnz(&label("maw_finish2")); # helps branch prediction
+ &jmp(&label("maw_end"));
+
+ &set_label("maw_finish2",1);
+ for ($i=0; $i<7; $i++)
+ {
+ &comment("Tail Round $i");
+ &mov("eax",&DWP($i*4,$a,"",0));# *a
+ &mul($w); # *a * w
+ &add("eax",$c); # L(t)+=c
+ &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r
+ &adc("edx",0); # H(t)+=carry
+ &add("eax",$c);
+ &adc("edx",0); # H(t)+=carry
+ &dec("ecx") if ($i != 7-1);
+ &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t);
+ &mov($c,"edx"); # c= H(t);
+ &jz(&label("maw_end")) if ($i != 7-1);
+ }
+ &set_label("maw_end",0);
+ &mov("eax",$c);
+
+ &pop("ecx"); # clear variable from
+
+ &function_end($name);
+ }
+
+1;
diff --git a/crypto/bn/asm/x86/sqr.pl b/crypto/bn/asm/x86/sqr.pl
new file mode 100644
index 0000000000..1f90993cf6
--- /dev/null
+++ b/crypto/bn/asm/x86/sqr.pl
@@ -0,0 +1,60 @@
+#!/usr/local/bin/perl
+# x86 assember
+
+sub bn_sqr_words
+ {
+ local($name)=@_;
+
+ &function_begin($name,"");
+
+ &comment("");
+ $r="esi";
+ $a="edi";
+ $num="ebx";
+
+ &mov($r,&wparam(0)); #
+ &mov($a,&wparam(1)); #
+ &mov($num,&wparam(2)); #
+
+ &and($num,0xfffffff8); # num / 8
+ &jz(&label("sw_finish"));
+
+ &set_label("sw_loop",0);
+ for ($i=0; $i<32; $i+=4)
+ {
+ &comment("Round $i");
+ &mov("eax",&DWP($i,$a,"",0)); # *a
+ # XXX
+ &mul("eax"); # *a * *a
+ &mov(&DWP($i*2,$r,"",0),"eax"); #
+ &mov(&DWP($i*2+4,$r,"",0),"edx");#
+ }
+
+ &comment("");
+ &add($a,32);
+ &add($r,64);
+ &sub($num,8);
+ &jnz(&label("sw_loop"));
+
+ &set_label("sw_finish",0);
+ &mov($num,&wparam(2)); # get num
+ &and($num,7);
+ &jz(&label("sw_end"));
+
+ for ($i=0; $i<7; $i++)
+ {
+ &comment("Tail Round $i");
+ &mov("eax",&DWP($i*4,$a,"",0)); # *a
+ # XXX
+ &mul("eax"); # *a * *a
+ &mov(&DWP($i*8,$r,"",0),"eax"); #
+ &dec($num) if ($i != 7-1);
+ &mov(&DWP($i*8+4,$r,"",0),"edx");
+ &jz(&label("sw_end")) if ($i != 7-1);
+ }
+ &set_label("sw_end",0);
+
+ &function_end($name);
+ }
+
+1;
diff --git a/crypto/bn/asm/x86/sub.pl b/crypto/bn/asm/x86/sub.pl
new file mode 100644
index 0000000000..837b0e1b07
--- /dev/null
+++ b/crypto/bn/asm/x86/sub.pl
@@ -0,0 +1,76 @@
+#!/usr/local/bin/perl
+# x86 assember
+
+sub bn_sub_words
+ {
+ local($name)=@_;
+
+ &function_begin($name,"");
+
+ &comment("");
+ $a="esi";
+ $b="edi";
+ $c="eax";
+ $r="ebx";
+ $tmp1="ecx";
+ $tmp2="edx";
+ $num="ebp";
+
+ &mov($r,&wparam(0)); # get r
+ &mov($a,&wparam(1)); # get a
+ &mov($b,&wparam(2)); # get b
+ &mov($num,&wparam(3)); # get num
+ &xor($c,$c); # clear carry
+ &and($num,0xfffffff8); # num / 8
+
+ &jz(&label("aw_finish"));
+
+ &set_label("aw_loop",0);
+ for ($i=0; $i<8; $i++)
+ {
+ &comment("Round $i");
+
+ &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
+ &mov($tmp2,&DWP($i*4,$b,"",0)); # *b
+ &sub($tmp1,$c);
+ &mov($c,0);
+ &adc($c,$c);
+ &sub($tmp1,$tmp2);
+ &adc($c,0);
+ &mov(&DWP($i*4,$r,"",0),$tmp1); # *r
+ }
+
+ &comment("");
+ &add($a,32);
+ &add($b,32);
+ &add($r,32);
+ &sub($num,8);
+ &jnz(&label("aw_loop"));
+
+ &set_label("aw_finish",0);
+ &mov($num,&wparam(3)); # get num
+ &and($num,7);
+ &jz(&label("aw_end"));
+
+ for ($i=0; $i<7; $i++)
+ {
+ &comment("Tail Round $i");
+ &mov($tmp1,&DWP($i*4,$a,"",0)); # *a
+ &mov($tmp2,&DWP($i*4,$b,"",0));# *b
+ &sub($tmp1,$c);
+ &mov($c,0);
+ &adc($c,$c);
+ &sub($tmp1,$tmp2);
+ &adc($c,0);
+ &dec($num) if ($i != 6);
+ &mov(&DWP($i*4,$r,"",0),$tmp1); # *a
+ &jz(&label("aw_end")) if ($i != 6);
+ }
+ &set_label("aw_end",0);
+
+# &mov("eax",$c); # $c is "eax"
+
+ &function_end($name);
+ }
+
+1;
diff --git a/crypto/bn/asm/x86w16.asm b/crypto/bn/asm/x86w16.asm
index 74a933a8cd..80a9ed6eef 100644
--- a/crypto/bn/asm/x86w16.asm
+++ b/crypto/bn/asm/x86w16.asm
@@ -6,11 +6,11 @@ F_TEXT SEGMENT WORD PUBLIC 'CODE'
F_TEXT ENDS
_DATA SEGMENT WORD PUBLIC 'DATA'
_DATA ENDS
-CONST SEGMENT WORD PUBLIC 'CONST'
-CONST ENDS
+_CONST SEGMENT WORD PUBLIC 'CONST'
+_CONST ENDS
_BSS SEGMENT WORD PUBLIC 'BSS'
_BSS ENDS
-DGROUP GROUP CONST, _BSS, _DATA
+DGROUP GROUP _CONST, _BSS, _DATA
ASSUME DS: DGROUP, SS: DGROUP
F_TEXT SEGMENT
ASSUME CS: F_TEXT
diff --git a/crypto/bn/asm/x86w32.asm b/crypto/bn/asm/x86w32.asm
index fc6f917714..957d71e3b1 100644
--- a/crypto/bn/asm/x86w32.asm
+++ b/crypto/bn/asm/x86w32.asm
@@ -6,11 +6,11 @@ F_TEXT SEGMENT WORD USE16 PUBLIC 'CODE'
F_TEXT ENDS
_DATA SEGMENT WORD USE16 PUBLIC 'DATA'
_DATA ENDS
-CONST SEGMENT WORD USE16 PUBLIC 'CONST'
-CONST ENDS
+_CONST SEGMENT WORD USE16 PUBLIC 'CONST'
+_CONST ENDS
_BSS SEGMENT WORD USE16 PUBLIC 'BSS'
_BSS ENDS
-DGROUP GROUP CONST, _BSS, _DATA
+DGROUP GROUP _CONST, _BSS, _DATA
ASSUME DS: DGROUP, SS: DGROUP
F_TEXT SEGMENT
ASSUME CS: F_TEXT
@@ -89,7 +89,7 @@ $L555:
mov bp,WORD PTR [bp+26] ; load num
and bp,3
dec bp
- js $L547
+ js $L547m
mov eax,ecx
mul DWORD PTR es:[bx] ; w* *a
@@ -100,7 +100,7 @@ $L555:
mov DWORD PTR ds:[di],eax
mov esi,edx
dec bp
- js $L547 ; Note that we are now testing for -1
+ js $L547m ; Note that we are now testing for -1
;
mov eax,ecx
mul DWORD PTR es:[bx+4] ; w* *a
@@ -111,7 +111,7 @@ $L555:
mov DWORD PTR ds:[di+4],eax
mov esi,edx
dec bp
- js $L547
+ js $L547m
;
mov eax,ecx
mul DWORD PTR es:[bx+8] ; w* *a
@@ -121,7 +121,7 @@ $L555:
adc edx,0
mov DWORD PTR ds:[di+8],eax
mov esi,edx
-$L547:
+$L547m:
mov eax,esi
mov edx,esi
shr edx,16
@@ -315,37 +315,35 @@ _bn_add_words PROC FAR
; ap = 22
; rp = 18
xor esi,esi ;c=0;
+ mov bx,WORD PTR [bp+18] ; load low r
mov si,WORD PTR [bp+22] ; load a
mov es,WORD PTR [bp+24] ; load a
mov di,WORD PTR [bp+26] ; load b
mov ds,WORD PTR [bp+28] ; load b
mov dx,WORD PTR [bp+30] ; load num
- dec dx
- js $L547
xor ecx,ecx
+ dec dx
+ js $L547a
$L5477:
- xor ebx,ebx
mov eax,DWORD PTR es:[si] ; *a
add eax,ecx
- adc ebx,0
+ mov ecx,0
+ adc ecx,0
add si,4 ; a++
add eax,DWORD PTR ds:[di] ; + *b
- mov ecx,ebx
adc ecx,0
- add di,4
- mov bx,WORD PTR [bp+18]
mov ds,WORD PTR [bp+20]
+ add di,4
mov DWORD PTR ds:[bx],eax
- add bx,4
mov ds,WORD PTR [bp+28]
- mov WORD PTR [bp+18],bx
+ add bx,4
dec dx
- js $L547 ; Note that we are now testing for -1
+ js $L547a ; Note that we are now testing for -1
jmp $L5477
;
-$L547:
+$L547a:
mov eax,ecx
mov edx,ecx
shr edx,16
diff --git a/crypto/bn/bn.err b/crypto/bn/bn.err
index 7ccc247c41..ba5c9bc97e 100644
--- a/crypto/bn/bn.err
+++ b/crypto/bn/bn.err
@@ -16,12 +16,15 @@
#define BN_F_BN_MPI2BN 112
#define BN_F_BN_NEW 113
#define BN_F_BN_RAND 114
+#define BN_F_BN_USUB 115
/* Reason codes. */
-#define BN_R_BAD_RECIPROCAL 100
-#define BN_R_CALLED_WITH_EVEN_MODULUS 101
-#define BN_R_DIV_BY_ZERO 102
-#define BN_R_ENCODING_ERROR 103
-#define BN_R_INVALID_LENGTH 104
-#define BN_R_NOT_INITALISED 105
-#define BN_R_NO_INVERSE 106
+#define BN_R_ARG2_LT_ARG3 100
+#define BN_R_BAD_RECIPROCAL 101
+#define BN_R_CALLED_WITH_EVEN_MODULUS 102
+#define BN_R_DIV_BY_ZERO 103
+#define BN_R_ENCODING_ERROR 104
+#define BN_R_EXPAND_ON_STATIC_BIGNUM_DATA 105
+#define BN_R_INVALID_LENGTH 106
+#define BN_R_NOT_INITALISED 107
+#define BN_R_NO_INVERSE 108
diff --git a/crypto/bn/bn.h b/crypto/bn/bn.h
index 66dde285d6..2c14a1d582 100644
--- a/crypto/bn/bn.h
+++ b/crypto/bn/bn.h
@@ -77,6 +77,9 @@ extern "C" {
#define BN_LLONG /* This comment stops Configure mutilating things */
#endif
+#define BN_MUL_COMBA
+#define BN_SQR_COMBA
+#undef BN_RECURSION
#define RECP_MUL_MOD
#define MONT_MUL_MOD
@@ -105,6 +108,7 @@ extern "C" {
#undef SIXTEEN_BIT
#undef EIGHT_BIT
+
/* assuming long is 64bit - this is the DEC Alpha
* unsigned long long is only 64 bits :-(, don't define
* BN_LLONG for the DEC Alpha */
@@ -116,17 +120,23 @@ extern "C" {
#define BN_BYTES 8
#define BN_BITS2 64
#define BN_BITS4 32
+#define BN_MASK (0xffffffffffffffffffffffffffffffffLL)
#define BN_MASK2 (0xffffffffffffffffL)
#define BN_MASK2l (0xffffffffL)
#define BN_MASK2h (0xffffffff00000000L)
#define BN_MASK2h1 (0xffffffff80000000L)
#define BN_TBIT (0x8000000000000000L)
-#define BN_DEC_CONV (10000000000000000000L)
+#define BN_DEC_CONV (10000000000000000000UL)
#define BN_DEC_FMT1 "%lu"
#define BN_DEC_FMT2 "%019lu"
#define BN_DEC_NUM 19
#endif
+/* This is where the long long data type is 64 bits, but long is 32.
+ * For machines where there are 64bit registers, this is the mode to use.
+ * IRIX, on R4000 and above should use this mode, along with the relevent
+ * assember code :-). Do NOT define BN_ULLONG.
+ */
#ifdef SIXTY_FOUR_BIT
#undef BN_LLONG
/* #define BN_ULLONG unsigned long long */
@@ -141,9 +151,9 @@ extern "C" {
#define BN_MASK2h (0xffffffff00000000LL)
#define BN_MASK2h1 (0xffffffff80000000LL)
#define BN_TBIT (0x8000000000000000LL)
-#define BN_DEC_CONV (10000000000000000000L)
-#define BN_DEC_FMT1 "%lu"
-#define BN_DEC_FMT2 "%019lu"
+#define BN_DEC_CONV (10000000000000000000LL)
+#define BN_DEC_FMT1 "%llu"
+#define BN_DEC_FMT2 "%019llu"
#define BN_DEC_NUM 19
#endif
@@ -159,6 +169,7 @@ extern "C" {
#define BN_BYTES 4
#define BN_BITS2 32
#define BN_BITS4 16
+#define BN_MASK (0xffffffffffffffffLL)
#define BN_MASK2 (0xffffffffL)
#define BN_MASK2l (0xffff)
#define BN_MASK2h1 (0xffff8000L)
@@ -181,6 +192,7 @@ extern "C" {
#define BN_BYTES 2
#define BN_BITS2 16
#define BN_BITS4 8
+#define BN_MASK (0xffffffff)
#define BN_MASK2 (0xffff)
#define BN_MASK2l (0xff)
#define BN_MASK2h1 (0xff80)
@@ -203,6 +215,7 @@ extern "C" {
#define BN_BYTES 1
#define BN_BITS2 8
#define BN_BITS4 4
+#define BN_MASK (0xffff)
#define BN_MASK2 (0xff)
#define BN_MASK2l (0xf)
#define BN_MASK2h1 (0xf8)
@@ -220,6 +233,12 @@ extern "C" {
#undef BIGNUM
#endif
+#define BN_FLG_MALLOCED 0x01
+#define BN_FLG_STATIC_DATA 0x02
+#define BN_FLG_FREE 0x8000 /* used for debuging */
+#define BN_set_flags(b,n) ((b)->flags|=(n))
+#define BN_get_flags(b,n) ((b)->flags&(n))
+
typedef struct bignum_st
{
BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit chunks. */
@@ -227,6 +246,7 @@ typedef struct bignum_st
/* The next are internal book keeping for bn_expand. */
int max; /* Size of the d array. */
int neg; /* one if the number is negative */
+ int flags;
} BIGNUM;
/* Used for temp variables */
@@ -234,7 +254,8 @@ typedef struct bignum_st
typedef struct bignum_ctx
{
int tos;
- BIGNUM *bn[BN_CTX_NUM+1];
+ BIGNUM bn[BN_CTX_NUM+1];
+ int flags;
} BN_CTX;
typedef struct bn_blinding_st
@@ -248,51 +269,69 @@ typedef struct bn_blinding_st
/* Used for montgomery multiplication */
typedef struct bn_mont_ctx_st
{
+ int use_word; /* 0 for word form, 1 for long form */
int ri; /* number of bits in R */
- BIGNUM *RR; /* used to convert to montgomery form */
- BIGNUM *N; /* The modulus */
- BIGNUM *Ni; /* The inverse of N */
+ BIGNUM RR; /* used to convert to montgomery form */
+ BIGNUM N; /* The modulus */
+ BIGNUM Ni; /* The inverse of N */
BN_ULONG n0; /* word form of inverse, normally only one of
* Ni or n0 is defined */
+ int flags;
} BN_MONT_CTX;
+/* Used for reciprocal division/mod functions
+ * It cannot be shared between threads
+ */
+typedef struct bn_recp_ctx_st
+ {
+ BIGNUM N; /* the divisor */
+ BIGNUM Nr; /* the reciprocal */
+ int num_bits;
+ int shift;
+ int flags;
+ } BN_RECP_CTX;
+
#define BN_to_montgomery(r,a,mont,ctx) BN_mod_mul_montgomery(\
- r,a,(mont)->RR,(mont),ctx)
+ r,a,&((mont)->RR),(mont),ctx)
#define BN_prime_checks (5)
#define BN_num_bytes(a) ((BN_num_bits(a)+7)/8)
#define BN_is_word(a,w) (((a)->top == 1) && ((a)->d[0] == (BN_ULONG)(w)))
-#define BN_is_zero(a) (((a)->top <= 1) && ((a)->d[0] == (BN_ULONG)0))
+#define BN_is_zero(a) (((a)->top == 0) || BN_is_word(a,0))
#define BN_is_one(a) (BN_is_word((a),1))
-#define BN_is_odd(a) ((a)->d[0] & 1)
+#define BN_is_odd(a) (((a)->top > 0) && ((a)->d[0] & 1))
#define BN_one(a) (BN_set_word((a),1))
#define BN_zero(a) (BN_set_word((a),0))
-#define BN_ascii2bn(a) BN_hex2bn(a)
-#define BN_bn2ascii(a) BN_bn2hex(a)
-
-#define bn_fix_top(a) \
- { \
- BN_ULONG *fix_top_l; \
- for (fix_top_l= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \
- if (*(fix_top_l--)) break; \
- }
+/*#define BN_ascii2bn(a) BN_hex2bn(a) */
+/*#define BN_bn2ascii(a) BN_bn2hex(a) */
-#define bn_expand(n,b) ((((b)/BN_BITS2) <= (n)->max)?\
- (n):bn_expand2((n),(b)/BN_BITS2))
+#define bn_expand(n,b) ((((((b+BN_BITS2-1))/BN_BITS2)) <= (n)->max)?\
+ (n):bn_expand2((n),(b)/BN_BITS2+1))
#define bn_wexpand(n,b) (((b) <= (n)->max)?(n):bn_expand2((n),(b)))
+#define bn_fix_top(a) \
+ { \
+ BN_ULONG *ftl; \
+ if ((a)->top > 0) \
+ { \
+ for (ftl= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \
+ if (*(ftl--)) break; \
+ } \
+ }
#ifndef NOPROTO
BIGNUM *BN_value_one(void);
char * BN_options(void);
BN_CTX *BN_CTX_new(void);
+void BN_CTX_init(BN_CTX *c);
void BN_CTX_free(BN_CTX *c);
int BN_rand(BIGNUM *rnd, int bits, int top,int bottom);
int BN_num_bits(BIGNUM *a);
int BN_num_bits_word(BN_ULONG);
BIGNUM *BN_new(void);
+void BN_init(BIGNUM *);
void BN_clear_free(BIGNUM *a);
BIGNUM *BN_copy(BIGNUM *a, BIGNUM *b);
BIGNUM *BN_bin2bn(unsigned char *s,int len,BIGNUM *ret);
@@ -300,20 +339,20 @@ int BN_bn2bin(BIGNUM *a, unsigned char *to);
BIGNUM *BN_mpi2bn(unsigned char *s,int len,BIGNUM *ret);
int BN_bn2mpi(BIGNUM *a, unsigned char *to);
int BN_sub(BIGNUM *r, BIGNUM *a, BIGNUM *b);
-void bn_qsub(BIGNUM *r, BIGNUM *a, BIGNUM *b);
-void bn_qadd(BIGNUM *r, BIGNUM *a, BIGNUM *b);
+int BN_usub(BIGNUM *r, BIGNUM *a, BIGNUM *b);
+int BN_uadd(BIGNUM *r, BIGNUM *a, BIGNUM *b);
int BN_add(BIGNUM *r, BIGNUM *a, BIGNUM *b);
int BN_mod(BIGNUM *rem, BIGNUM *m, BIGNUM *d, BN_CTX *ctx);
int BN_div(BIGNUM *dv, BIGNUM *rem, BIGNUM *m, BIGNUM *d, BN_CTX *ctx);
-int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b);
+int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b,BN_CTX *ctx);
int BN_sqr(BIGNUM *r, BIGNUM *a,BN_CTX *ctx);
-BN_ULONG BN_mod_word(BIGNUM *a, unsigned long w);
-BN_ULONG BN_div_word(BIGNUM *a, unsigned long w);
-int BN_mul_word(BIGNUM *a, unsigned long w);
-int BN_add_word(BIGNUM *a, unsigned long w);
-int BN_sub_word(BIGNUM *a, unsigned long w);
-int BN_set_word(BIGNUM *a, unsigned long w);
-unsigned long BN_get_word(BIGNUM *a);
+BN_ULONG BN_mod_word(BIGNUM *a, BN_ULONG w);
+BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w);
+int BN_mul_word(BIGNUM *a, BN_ULONG w);
+int BN_add_word(BIGNUM *a, BN_ULONG w);
+int BN_sub_word(BIGNUM *a, BN_ULONG w);
+int BN_set_word(BIGNUM *a, BN_ULONG w);
+BN_ULONG BN_get_word(BIGNUM *a);
int BN_cmp(BIGNUM *a, BIGNUM *b);
void BN_free(BIGNUM *a);
int BN_is_bit_set(BIGNUM *a, int n);
@@ -323,12 +362,11 @@ int BN_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p,BN_CTX *ctx);
int BN_mod_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m,BN_CTX *ctx);
int BN_mod_exp_mont(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m,BN_CTX *ctx,
BN_MONT_CTX *m_ctx);
-int BN_mod_exp_recp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m,BN_CTX *ctx);
+int BN_mod_exp2_mont(BIGNUM *r, BIGNUM *a1, BIGNUM *p1,BIGNUM *a2,
+ BIGNUM *p2,BIGNUM *m,BN_CTX *ctx,BN_MONT_CTX *m_ctx);
int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p,
BIGNUM *m,BN_CTX *ctx);
int BN_mask_bits(BIGNUM *a,int n);
-int BN_mod_mul_reciprocal(BIGNUM *r, BIGNUM *x, BIGNUM *y, BIGNUM *m,
- BIGNUM *i, int nb, BN_CTX *ctx);
int BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, BIGNUM *m,
BN_CTX *ctx);
#ifndef WIN16
@@ -339,7 +377,7 @@ int BN_print(BIO *fp, BIGNUM *a);
#else
int BN_print(char *fp, BIGNUM *a);
#endif
-int BN_reciprocal(BIGNUM *r, BIGNUM *m, BN_CTX *ctx);
+int BN_reciprocal(BIGNUM *r, BIGNUM *m, int len, BN_CTX *ctx);
int BN_rshift(BIGNUM *r, BIGNUM *a, int n);
int BN_rshift1(BIGNUM *r, BIGNUM *a);
void BN_clear(BIGNUM *a);
@@ -353,8 +391,8 @@ char * BN_bn2dec(BIGNUM *a);
int BN_hex2bn(BIGNUM **a,char *str);
int BN_dec2bn(BIGNUM **a,char *str);
int BN_gcd(BIGNUM *r,BIGNUM *in_a,BIGNUM *in_b,BN_CTX *ctx);
-BIGNUM *BN_mod_inverse(BIGNUM *a, BIGNUM *n,BN_CTX *ctx);
-BIGNUM *BN_generate_prime(int bits,int strong,BIGNUM *add,
+BIGNUM *BN_mod_inverse(BIGNUM *ret,BIGNUM *a, BIGNUM *n,BN_CTX *ctx);
+BIGNUM *BN_generate_prime(BIGNUM *ret,int bits,int strong,BIGNUM *add,
BIGNUM *rem,void (*callback)(int,int,char *),char *cb_arg);
int BN_is_prime(BIGNUM *p,int nchecks,void (*callback)(int,int,char *),
BN_CTX *ctx,char *cb_arg);
@@ -363,15 +401,18 @@ void ERR_load_BN_strings(void );
BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w);
BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w);
void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num);
-BN_ULONG bn_div64(BN_ULONG h, BN_ULONG l, BN_ULONG d);
+BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d);
BN_ULONG bn_add_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num);
+BN_ULONG bn_sub_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num);
BN_MONT_CTX *BN_MONT_CTX_new(void );
+void BN_MONT_CTX_init(BN_MONT_CTX *ctx);
int BN_mod_mul_montgomery(BIGNUM *r,BIGNUM *a,BIGNUM *b,BN_MONT_CTX *mont,
BN_CTX *ctx);
int BN_from_montgomery(BIGNUM *r,BIGNUM *a,BN_MONT_CTX *mont,BN_CTX *ctx);
void BN_MONT_CTX_free(BN_MONT_CTX *mont);
int BN_MONT_CTX_set(BN_MONT_CTX *mont,BIGNUM *modulus,BN_CTX *ctx);
+BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to,BN_MONT_CTX *from);
BN_BLINDING *BN_BLINDING_new(BIGNUM *A,BIGNUM *Ai,BIGNUM *mod);
void BN_BLINDING_free(BN_BLINDING *b);
@@ -379,16 +420,45 @@ int BN_BLINDING_update(BN_BLINDING *b,BN_CTX *ctx);
int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *r, BN_CTX *ctx);
int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx);
+void BN_set_params(int mul,int high,int low,int mont);
+int BN_get_params(int which); /* 0, mul, 1 high, 2 low, 3 mont */
+
+void bn_mul_normal(BN_ULONG *r,BN_ULONG *a,int na,BN_ULONG *b,int nb);
+void bn_mul_comba8(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
+void bn_mul_comba4(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
+void bn_sqr_normal(BN_ULONG *r, BN_ULONG *a, int n, BN_ULONG *tmp);
+void bn_sqr_comba8(BN_ULONG *r,BN_ULONG *a);
+void bn_sqr_comba4(BN_ULONG *r,BN_ULONG *a);
+int bn_cmp_words(BN_ULONG *a,BN_ULONG *b,int n);
+void bn_mul_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2,BN_ULONG *t);
+void bn_mul_part_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,
+ int tn, int n,BN_ULONG *t);
+void bn_sqr_recursive(BN_ULONG *r,BN_ULONG *a, int n2, BN_ULONG *t);
+void bn_mul_low_normal(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b, int n);
+
+void BN_RECP_CTX_init(BN_RECP_CTX *recp);
+BN_RECP_CTX *BN_RECP_CTX_new(void);
+void BN_RECP_CTX_free(BN_RECP_CTX *recp);
+int BN_RECP_CTX_set(BN_RECP_CTX *recp,BIGNUM *rdiv,BN_CTX *ctx);
+int BN_mod_mul_reciprocal(BIGNUM *r, BIGNUM *x, BIGNUM *y,
+ BN_RECP_CTX *recp,BN_CTX *ctx);
+int BN_mod_exp_recp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m,BN_CTX *ctx);
+int BN_div_recp(BIGNUM *dv, BIGNUM *rem, BIGNUM *m,
+ BN_RECP_CTX *recp, BN_CTX *ctx);
+
+
#else
BIGNUM *BN_value_one();
char * BN_options();
BN_CTX *BN_CTX_new();
+void BN_CTX_init();
void BN_CTX_free();
int BN_rand();
int BN_num_bits();
int BN_num_bits_word();
BIGNUM *BN_new();
+void BN_init();
void BN_clear_free();
BIGNUM *BN_copy();
BIGNUM *BN_bin2bn();
@@ -396,8 +466,8 @@ int BN_bn2bin();
BIGNUM *BN_mpi2bn();
int BN_bn2mpi();
int BN_sub();
-void bn_qsub();
-void bn_qadd();
+int BN_usub();
+int BN_uadd();
int BN_add();
int BN_mod();
int BN_div();
@@ -449,12 +519,14 @@ void ERR_load_BN_strings();
BN_ULONG bn_mul_add_words();
BN_ULONG bn_mul_words();
void bn_sqr_words();
-BN_ULONG bn_div64();
+BN_ULONG bn_div_words();
BN_ULONG bn_add_words();
+BN_ULONG bn_sub_words();
int BN_mod_mul_montgomery();
int BN_from_montgomery();
BN_MONT_CTX *BN_MONT_CTX_new();
+void BN_MONT_CTX_init();
void BN_MONT_CTX_free();
int BN_MONT_CTX_set();
@@ -464,6 +536,26 @@ int BN_BLINDING_update();
int BN_BLINDING_convert();
int BN_BLINDING_invert();
+void bn_mul_normal();
+void bn_mul_comba8();
+void bn_mul_comba4();
+void bn_sqr_normal();
+void bn_sqr_comba8();
+void bn_sqr_comba4();
+int bn_cmp_words();
+void bn_mul_recursive();
+void bn_mul_part_recursive();
+void bn_sqr_recursive();
+void bn_mul_low_normal();
+
+void BN_RECP_CTX_init();
+BN_RECP_CTX *BN_RECP_CTX_new();
+void BN_RECP_CTX_free();
+int BN_RECP_CTX_set();
+int BN_mod_mul_reciprocal();
+int BN_mod_exp_recp();
+int BN_div_recp();
+
#endif
/* BEGIN ERROR CODES */
@@ -485,15 +577,18 @@ int BN_BLINDING_invert();
#define BN_F_BN_MPI2BN 112
#define BN_F_BN_NEW 113
#define BN_F_BN_RAND 114
+#define BN_F_BN_USUB 115
/* Reason codes. */
-#define BN_R_BAD_RECIPROCAL 100
-#define BN_R_CALLED_WITH_EVEN_MODULUS 101
-#define BN_R_DIV_BY_ZERO 102
-#define BN_R_ENCODING_ERROR 103
-#define BN_R_INVALID_LENGTH 104
-#define BN_R_NOT_INITALISED 105
-#define BN_R_NO_INVERSE 106
+#define BN_R_ARG2_LT_ARG3 100
+#define BN_R_BAD_RECIPROCAL 101
+#define BN_R_CALLED_WITH_EVEN_MODULUS 102
+#define BN_R_DIV_BY_ZERO 103
+#define BN_R_ENCODING_ERROR 104
+#define BN_R_EXPAND_ON_STATIC_BIGNUM_DATA 105
+#define BN_R_INVALID_LENGTH 106
+#define BN_R_NOT_INITALISED 107
+#define BN_R_NO_INVERSE 108
#ifdef __cplusplus
}
diff --git a/crypto/bn/bn.mul b/crypto/bn/bn.mul
new file mode 100644
index 0000000000..9728870d38
--- /dev/null
+++ b/crypto/bn/bn.mul
@@ -0,0 +1,19 @@
+We need
+
+* bn_mul_comba8
+* bn_mul_comba4
+* bn_mul_normal
+* bn_mul_recursive
+
+* bn_sqr_comba8
+* bn_sqr_comba4
+bn_sqr_normal -> BN_sqr
+* bn_sqr_recursive
+
+* bn_mul_low_recursive
+* bn_mul_low_normal
+* bn_mul_high
+
+* bn_mul_part_recursive # symetric but not power of 2
+
+bn_mul_asymetric_recursive # uneven, but do the chop up.
diff --git a/crypto/bn/bn.org b/crypto/bn/bn.org
index 66dde285d6..d8904d7efa 100644
--- a/crypto/bn/bn.org
+++ b/crypto/bn/bn.org
@@ -77,6 +77,9 @@ extern "C" {
#define BN_LLONG /* This comment stops Configure mutilating things */
#endif
+#define BN_MUL_COMBA
+#define BN_SQR_COMBA
+#define BN_RECURSION
#define RECP_MUL_MOD
#define MONT_MUL_MOD
@@ -105,6 +108,7 @@ extern "C" {
#undef SIXTEEN_BIT
#undef EIGHT_BIT
+
/* assuming long is 64bit - this is the DEC Alpha
* unsigned long long is only 64 bits :-(, don't define
* BN_LLONG for the DEC Alpha */
@@ -116,17 +120,23 @@ extern "C" {
#define BN_BYTES 8
#define BN_BITS2 64
#define BN_BITS4 32
+#define BN_MASK (0xffffffffffffffffffffffffffffffffLL)
#define BN_MASK2 (0xffffffffffffffffL)
#define BN_MASK2l (0xffffffffL)
#define BN_MASK2h (0xffffffff00000000L)
#define BN_MASK2h1 (0xffffffff80000000L)
#define BN_TBIT (0x8000000000000000L)
-#define BN_DEC_CONV (10000000000000000000L)
+#define BN_DEC_CONV (10000000000000000000UL)
#define BN_DEC_FMT1 "%lu"
#define BN_DEC_FMT2 "%019lu"
#define BN_DEC_NUM 19
#endif
+/* This is where the long long data type is 64 bits, but long is 32.
+ * For machines where there are 64bit registers, this is the mode to use.
+ * IRIX, on R4000 and above should use this mode, along with the relevent
+ * assember code :-). Do NOT define BN_ULLONG.
+ */
#ifdef SIXTY_FOUR_BIT
#undef BN_LLONG
/* #define BN_ULLONG unsigned long long */
@@ -141,9 +151,9 @@ extern "C" {
#define BN_MASK2h (0xffffffff00000000LL)
#define BN_MASK2h1 (0xffffffff80000000LL)
#define BN_TBIT (0x8000000000000000LL)
-#define BN_DEC_CONV (10000000000000000000L)
-#define BN_DEC_FMT1 "%lu"
-#define BN_DEC_FMT2 "%019lu"
+#define BN_DEC_CONV (10000000000000000000LL)
+#define BN_DEC_FMT1 "%llu"
+#define BN_DEC_FMT2 "%019llu"
#define BN_DEC_NUM 19
#endif
@@ -159,6 +169,7 @@ extern "C" {
#define BN_BYTES 4
#define BN_BITS2 32
#define BN_BITS4 16
+#define BN_MASK (0xffffffffffffffffLL)
#define BN_MASK2 (0xffffffffL)
#define BN_MASK2l (0xffff)
#define BN_MASK2h1 (0xffff8000L)
@@ -181,6 +192,7 @@ extern "C" {
#define BN_BYTES 2
#define BN_BITS2 16
#define BN_BITS4 8
+#define BN_MASK (0xffffffff)
#define BN_MASK2 (0xffff)
#define BN_MASK2l (0xff)
#define BN_MASK2h1 (0xff80)
@@ -203,6 +215,7 @@ extern "C" {
#define BN_BYTES 1
#define BN_BITS2 8
#define BN_BITS4 4
+#define BN_MASK (0xffff)
#define BN_MASK2 (0xff)
#define BN_MASK2l (0xf)
#define BN_MASK2h1 (0xf8)
@@ -220,6 +233,12 @@ extern "C" {
#undef BIGNUM
#endif
+#define BN_FLG_MALLOCED 0x01
+#define BN_FLG_STATIC_DATA 0x02
+#define BN_FLG_FREE 0x8000 /* used for debuging */
+#define BN_set_flags(b,n) ((b)->flags|=(n))
+#define BN_get_flags(b,n) ((b)->flags&(n))
+
typedef struct bignum_st
{
BN_ULONG *d; /* Pointer to an array of 'BN_BITS2' bit chunks. */
@@ -227,6 +246,7 @@ typedef struct bignum_st
/* The next are internal book keeping for bn_expand. */
int max; /* Size of the d array. */
int neg; /* one if the number is negative */
+ int flags;
} BIGNUM;
/* Used for temp variables */
@@ -234,7 +254,8 @@ typedef struct bignum_st
typedef struct bignum_ctx
{
int tos;
- BIGNUM *bn[BN_CTX_NUM+1];
+ BIGNUM bn[BN_CTX_NUM+1];
+ int flags;
} BN_CTX;
typedef struct bn_blinding_st
@@ -248,51 +269,69 @@ typedef struct bn_blinding_st
/* Used for montgomery multiplication */
typedef struct bn_mont_ctx_st
{
+ int use_word; /* 0 for word form, 1 for long form */
int ri; /* number of bits in R */
- BIGNUM *RR; /* used to convert to montgomery form */
- BIGNUM *N; /* The modulus */
- BIGNUM *Ni; /* The inverse of N */
+ BIGNUM RR; /* used to convert to montgomery form */
+ BIGNUM N; /* The modulus */
+ BIGNUM Ni; /* The inverse of N */
BN_ULONG n0; /* word form of inverse, normally only one of
* Ni or n0 is defined */
+ int flags;
} BN_MONT_CTX;
+/* Used for reciprocal division/mod functions
+ * It cannot be shared between threads
+ */
+typedef struct bn_recp_ctx_st
+ {
+ BIGNUM N; /* the divisor */
+ BIGNUM Nr; /* the reciprocal */
+ int num_bits;
+ int shift;
+ int flags;
+ } BN_RECP_CTX;
+
#define BN_to_montgomery(r,a,mont,ctx) BN_mod_mul_montgomery(\
- r,a,(mont)->RR,(mont),ctx)
+ r,a,&((mont)->RR),(mont),ctx)
#define BN_prime_checks (5)
#define BN_num_bytes(a) ((BN_num_bits(a)+7)/8)
#define BN_is_word(a,w) (((a)->top == 1) && ((a)->d[0] == (BN_ULONG)(w)))
-#define BN_is_zero(a) (((a)->top <= 1) && ((a)->d[0] == (BN_ULONG)0))
+#define BN_is_zero(a) (((a)->top == 0) || BN_is_word(a,0))
#define BN_is_one(a) (BN_is_word((a),1))
-#define BN_is_odd(a) ((a)->d[0] & 1)
+#define BN_is_odd(a) (((a)->top > 0) && ((a)->d[0] & 1))
#define BN_one(a) (BN_set_word((a),1))
#define BN_zero(a) (BN_set_word((a),0))
-#define BN_ascii2bn(a) BN_hex2bn(a)
-#define BN_bn2ascii(a) BN_bn2hex(a)
-
-#define bn_fix_top(a) \
- { \
- BN_ULONG *fix_top_l; \
- for (fix_top_l= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \
- if (*(fix_top_l--)) break; \
- }
+/*#define BN_ascii2bn(a) BN_hex2bn(a) */
+/*#define BN_bn2ascii(a) BN_bn2hex(a) */
-#define bn_expand(n,b) ((((b)/BN_BITS2) <= (n)->max)?\
- (n):bn_expand2((n),(b)/BN_BITS2))
+#define bn_expand(n,b) ((((((b+BN_BITS2-1))/BN_BITS2)) <= (n)->max)?\
+ (n):bn_expand2((n),(b)/BN_BITS2+1))
#define bn_wexpand(n,b) (((b) <= (n)->max)?(n):bn_expand2((n),(b)))
+#define bn_fix_top(a) \
+ { \
+ BN_ULONG *ftl; \
+ if ((a)->top > 0) \
+ { \
+ for (ftl= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \
+ if (*(ftl--)) break; \
+ } \
+ }
#ifndef NOPROTO
BIGNUM *BN_value_one(void);
char * BN_options(void);
BN_CTX *BN_CTX_new(void);
+void BN_CTX_init(BN_CTX *c);
void BN_CTX_free(BN_CTX *c);
int BN_rand(BIGNUM *rnd, int bits, int top,int bottom);
int BN_num_bits(BIGNUM *a);
int BN_num_bits_word(BN_ULONG);
BIGNUM *BN_new(void);
+void BN_init(BIGNUM *);
void BN_clear_free(BIGNUM *a);
BIGNUM *BN_copy(BIGNUM *a, BIGNUM *b);
BIGNUM *BN_bin2bn(unsigned char *s,int len,BIGNUM *ret);
@@ -300,20 +339,20 @@ int BN_bn2bin(BIGNUM *a, unsigned char *to);
BIGNUM *BN_mpi2bn(unsigned char *s,int len,BIGNUM *ret);
int BN_bn2mpi(BIGNUM *a, unsigned char *to);
int BN_sub(BIGNUM *r, BIGNUM *a, BIGNUM *b);
-void bn_qsub(BIGNUM *r, BIGNUM *a, BIGNUM *b);
-void bn_qadd(BIGNUM *r, BIGNUM *a, BIGNUM *b);
+int BN_usub(BIGNUM *r, BIGNUM *a, BIGNUM *b);
+int BN_uadd(BIGNUM *r, BIGNUM *a, BIGNUM *b);
int BN_add(BIGNUM *r, BIGNUM *a, BIGNUM *b);
int BN_mod(BIGNUM *rem, BIGNUM *m, BIGNUM *d, BN_CTX *ctx);
int BN_div(BIGNUM *dv, BIGNUM *rem, BIGNUM *m, BIGNUM *d, BN_CTX *ctx);
-int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b);
+int BN_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b,BN_CTX *ctx);
int BN_sqr(BIGNUM *r, BIGNUM *a,BN_CTX *ctx);
-BN_ULONG BN_mod_word(BIGNUM *a, unsigned long w);
-BN_ULONG BN_div_word(BIGNUM *a, unsigned long w);
-int BN_mul_word(BIGNUM *a, unsigned long w);
-int BN_add_word(BIGNUM *a, unsigned long w);
-int BN_sub_word(BIGNUM *a, unsigned long w);
-int BN_set_word(BIGNUM *a, unsigned long w);
-unsigned long BN_get_word(BIGNUM *a);
+BN_ULONG BN_mod_word(BIGNUM *a, BN_ULONG w);
+BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w);
+int BN_mul_word(BIGNUM *a, BN_ULONG w);
+int BN_add_word(BIGNUM *a, BN_ULONG w);
+int BN_sub_word(BIGNUM *a, BN_ULONG w);
+int BN_set_word(BIGNUM *a, BN_ULONG w);
+BN_ULONG BN_get_word(BIGNUM *a);
int BN_cmp(BIGNUM *a, BIGNUM *b);
void BN_free(BIGNUM *a);
int BN_is_bit_set(BIGNUM *a, int n);
@@ -323,12 +362,11 @@ int BN_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p,BN_CTX *ctx);
int BN_mod_exp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m,BN_CTX *ctx);
int BN_mod_exp_mont(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m,BN_CTX *ctx,
BN_MONT_CTX *m_ctx);
-int BN_mod_exp_recp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m,BN_CTX *ctx);
+int BN_mod_exp2_mont(BIGNUM *r, BIGNUM *a1, BIGNUM *p1,BIGNUM *a2,
+ BIGNUM *p2,BIGNUM *m,BN_CTX *ctx,BN_MONT_CTX *m_ctx);
int BN_mod_exp_simple(BIGNUM *r, BIGNUM *a, BIGNUM *p,
BIGNUM *m,BN_CTX *ctx);
int BN_mask_bits(BIGNUM *a,int n);
-int BN_mod_mul_reciprocal(BIGNUM *r, BIGNUM *x, BIGNUM *y, BIGNUM *m,
- BIGNUM *i, int nb, BN_CTX *ctx);
int BN_mod_mul(BIGNUM *ret, BIGNUM *a, BIGNUM *b, BIGNUM *m,
BN_CTX *ctx);
#ifndef WIN16
@@ -339,7 +377,7 @@ int BN_print(BIO *fp, BIGNUM *a);
#else
int BN_print(char *fp, BIGNUM *a);
#endif
-int BN_reciprocal(BIGNUM *r, BIGNUM *m, BN_CTX *ctx);
+int BN_reciprocal(BIGNUM *r, BIGNUM *m, int len, BN_CTX *ctx);
int BN_rshift(BIGNUM *r, BIGNUM *a, int n);
int BN_rshift1(BIGNUM *r, BIGNUM *a);
void BN_clear(BIGNUM *a);
@@ -353,8 +391,8 @@ char * BN_bn2dec(BIGNUM *a);
int BN_hex2bn(BIGNUM **a,char *str);
int BN_dec2bn(BIGNUM **a,char *str);
int BN_gcd(BIGNUM *r,BIGNUM *in_a,BIGNUM *in_b,BN_CTX *ctx);
-BIGNUM *BN_mod_inverse(BIGNUM *a, BIGNUM *n,BN_CTX *ctx);
-BIGNUM *BN_generate_prime(int bits,int strong,BIGNUM *add,
+BIGNUM *BN_mod_inverse(BIGNUM *ret,BIGNUM *a, BIGNUM *n,BN_CTX *ctx);
+BIGNUM *BN_generate_prime(BIGNUM *ret,int bits,int strong,BIGNUM *add,
BIGNUM *rem,void (*callback)(int,int,char *),char *cb_arg);
int BN_is_prime(BIGNUM *p,int nchecks,void (*callback)(int,int,char *),
BN_CTX *ctx,char *cb_arg);
@@ -363,15 +401,18 @@ void ERR_load_BN_strings(void );
BN_ULONG bn_mul_add_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w);
BN_ULONG bn_mul_words(BN_ULONG *rp, BN_ULONG *ap, int num, BN_ULONG w);
void bn_sqr_words(BN_ULONG *rp, BN_ULONG *ap, int num);
-BN_ULONG bn_div64(BN_ULONG h, BN_ULONG l, BN_ULONG d);
+BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d);
BN_ULONG bn_add_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num);
+BN_ULONG bn_sub_words(BN_ULONG *rp, BN_ULONG *ap, BN_ULONG *bp,int num);
BN_MONT_CTX *BN_MONT_CTX_new(void );
+void BN_MONT_CTX_init(BN_MONT_CTX *ctx);
int BN_mod_mul_montgomery(BIGNUM *r,BIGNUM *a,BIGNUM *b,BN_MONT_CTX *mont,
BN_CTX *ctx);
int BN_from_montgomery(BIGNUM *r,BIGNUM *a,BN_MONT_CTX *mont,BN_CTX *ctx);
void BN_MONT_CTX_free(BN_MONT_CTX *mont);
int BN_MONT_CTX_set(BN_MONT_CTX *mont,BIGNUM *modulus,BN_CTX *ctx);
+BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to,BN_MONT_CTX *from);
BN_BLINDING *BN_BLINDING_new(BIGNUM *A,BIGNUM *Ai,BIGNUM *mod);
void BN_BLINDING_free(BN_BLINDING *b);
@@ -379,16 +420,45 @@ int BN_BLINDING_update(BN_BLINDING *b,BN_CTX *ctx);
int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *r, BN_CTX *ctx);
int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx);
+void BN_set_params(int mul,int high,int low,int mont);
+int BN_get_params(int which); /* 0, mul, 1 high, 2 low, 3 mont */
+
+void bn_mul_normal(BN_ULONG *r,BN_ULONG *a,int na,BN_ULONG *b,int nb);
+void bn_mul_comba8(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
+void bn_mul_comba4(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
+void bn_sqr_normal(BN_ULONG *r, BN_ULONG *a, int n, BN_ULONG *tmp);
+void bn_sqr_comba8(BN_ULONG *r,BN_ULONG *a);
+void bn_sqr_comba4(BN_ULONG *r,BN_ULONG *a);
+int bn_cmp_words(BN_ULONG *a,BN_ULONG *b,int n);
+void bn_mul_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2,BN_ULONG *t);
+void bn_mul_part_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,
+ int tn, int n,BN_ULONG *t);
+void bn_sqr_recursive(BN_ULONG *r,BN_ULONG *a, int n2, BN_ULONG *t);
+void bn_mul_low_normal(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b, int n);
+
+void BN_RECP_CTX_init(BN_RECP_CTX *recp);
+BN_RECP_CTX *BN_RECP_CTX_new(void);
+void BN_RECP_CTX_free(BN_RECP_CTX *recp);
+int BN_RECP_CTX_set(BN_RECP_CTX *recp,BIGNUM *rdiv,BN_CTX *ctx);
+int BN_mod_mul_reciprocal(BIGNUM *r, BIGNUM *x, BIGNUM *y,
+ BN_RECP_CTX *recp,BN_CTX *ctx);
+int BN_mod_exp_recp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m,BN_CTX *ctx);
+int BN_div_recp(BIGNUM *dv, BIGNUM *rem, BIGNUM *m,
+ BN_RECP_CTX *recp, BN_CTX *ctx);
+
+
#else
BIGNUM *BN_value_one();
char * BN_options();
BN_CTX *BN_CTX_new();
+void BN_CTX_init();
void BN_CTX_free();
int BN_rand();
int BN_num_bits();
int BN_num_bits_word();
BIGNUM *BN_new();
+void BN_init();
void BN_clear_free();
BIGNUM *BN_copy();
BIGNUM *BN_bin2bn();
@@ -396,8 +466,8 @@ int BN_bn2bin();
BIGNUM *BN_mpi2bn();
int BN_bn2mpi();
int BN_sub();
-void bn_qsub();
-void bn_qadd();
+int BN_usub();
+int BN_uadd();
int BN_add();
int BN_mod();
int BN_div();
@@ -449,12 +519,14 @@ void ERR_load_BN_strings();
BN_ULONG bn_mul_add_words();
BN_ULONG bn_mul_words();
void bn_sqr_words();
-BN_ULONG bn_div64();
+BN_ULONG bn_div_words();
BN_ULONG bn_add_words();
+BN_ULONG bn_sub_words();
int BN_mod_mul_montgomery();
int BN_from_montgomery();
BN_MONT_CTX *BN_MONT_CTX_new();
+void BN_MONT_CTX_init();
void BN_MONT_CTX_free();
int BN_MONT_CTX_set();
@@ -464,6 +536,26 @@ int BN_BLINDING_update();
int BN_BLINDING_convert();
int BN_BLINDING_invert();
+void bn_mul_normal();
+void bn_mul_comba8();
+void bn_mul_comba4();
+void bn_sqr_normal();
+void bn_sqr_comba8();
+void bn_sqr_comba4();
+int bn_cmp_words();
+void bn_mul_recursive();
+void bn_mul_part_recursive();
+void bn_sqr_recursive();
+void bn_mul_low_normal();
+
+void BN_RECP_CTX_init();
+BN_RECP_CTX *BN_RECP_CTX_new();
+void BN_RECP_CTX_free();
+int BN_RECP_CTX_set();
+int BN_mod_mul_reciprocal();
+int BN_mod_exp_recp();
+int BN_div_recp();
+
#endif
/* BEGIN ERROR CODES */
@@ -485,15 +577,18 @@ int BN_BLINDING_invert();
#define BN_F_BN_MPI2BN 112
#define BN_F_BN_NEW 113
#define BN_F_BN_RAND 114
+#define BN_F_BN_USUB 115
/* Reason codes. */
-#define BN_R_BAD_RECIPROCAL 100
-#define BN_R_CALLED_WITH_EVEN_MODULUS 101
-#define BN_R_DIV_BY_ZERO 102
-#define BN_R_ENCODING_ERROR 103
-#define BN_R_INVALID_LENGTH 104
-#define BN_R_NOT_INITALISED 105
-#define BN_R_NO_INVERSE 106
+#define BN_R_ARG2_LT_ARG3 100
+#define BN_R_BAD_RECIPROCAL 101
+#define BN_R_CALLED_WITH_EVEN_MODULUS 102
+#define BN_R_DIV_BY_ZERO 103
+#define BN_R_ENCODING_ERROR 104
+#define BN_R_EXPAND_ON_STATIC_BIGNUM_DATA 105
+#define BN_R_INVALID_LENGTH 106
+#define BN_R_NOT_INITALISED 107
+#define BN_R_NO_INVERSE 108
#ifdef __cplusplus
}
diff --git a/crypto/bn/bn_add.c b/crypto/bn/bn_add.c
index efb2e312e8..27b781a367 100644
--- a/crypto/bn/bn_add.c
+++ b/crypto/bn/bn_add.c
@@ -66,9 +66,11 @@ BIGNUM *r;
BIGNUM *a;
BIGNUM *b;
{
- int i;
BIGNUM *tmp;
+ bn_check_top(a);
+ bn_check_top(b);
+
/* a + b a+b
* a + -b a-b
* -a + b b-a
@@ -84,14 +86,12 @@ BIGNUM *b;
if (BN_ucmp(a,b) < 0)
{
- if (bn_wexpand(r,b->top) == NULL) return(0);
- bn_qsub(r,b,a);
+ if (!BN_usub(r,b,a)) return(0);
r->neg=1;
}
else
{
- if (bn_wexpand(r,a->top) == NULL) return(0);
- bn_qsub(r,a,b);
+ if (!BN_usub(r,a,b)) return(0);
r->neg=0;
}
return(1);
@@ -102,23 +102,12 @@ BIGNUM *b;
else
r->neg=0;
- i=(a->top > b->top);
-
- if (i)
- {
- if (bn_wexpand(r,a->top+1) == NULL) return(0);
- bn_qadd(r,a,b);
- }
- else
- {
- if (bn_wexpand(r,b->top+1) == NULL) return(0);
- bn_qadd(r,b,a);
- }
+ if (!BN_uadd(r,a,b)) return(0);
return(1);
}
/* unsigned add of b to a, r must be large enough */
-void bn_qadd(r,a,b)
+int BN_uadd(r,a,b)
BIGNUM *r;
BIGNUM *a;
BIGNUM *b;
@@ -126,11 +115,22 @@ BIGNUM *b;
register int i;
int max,min;
BN_ULONG *ap,*bp,*rp,carry,t1;
+ BIGNUM *tmp;
+
+ bn_check_top(a);
+ bn_check_top(b);
+ if (a->top < b->top)
+ { tmp=a; a=b; b=tmp; }
max=a->top;
min=b->top;
+
+ if (bn_wexpand(r,max+1) == NULL)
+ return(0);
+
r->top=max;
+
ap=a->d;
bp=b->d;
rp=r->d;
@@ -160,8 +160,160 @@ BIGNUM *b;
r->top++;
}
}
- for (; i<max; i++)
- *(rp++)= *(ap++);
+ if (rp != ap)
+ {
+ for (; i<max; i++)
+ *(rp++)= *(ap++);
+ }
/* memcpy(rp,ap,sizeof(*ap)*(max-i));*/
+ return(1);
+ }
+
+/* unsigned subtraction of b from a, a must be larger than b. */
+int BN_usub(r, a, b)
+BIGNUM *r;
+BIGNUM *a;
+BIGNUM *b;
+ {
+ int max,min,ret=1;
+ register BN_ULONG t1,t2,*ap,*bp,*rp;
+ int i,carry;
+#if defined(IRIX_CC_BUG) && !defined(LINT)
+ int dummy;
+#endif
+
+ bn_check_top(a);
+ bn_check_top(b);
+
+ if (a->top < b->top) /* hmm... should not be happening */
+ {
+ BNerr(BN_F_BN_USUB,BN_R_ARG2_LT_ARG3);
+ return(0);
+ }
+
+ max=a->top;
+ min=b->top;
+ if (bn_wexpand(r,max) == NULL) return(0);
+
+ ap=a->d;
+ bp=b->d;
+ rp=r->d;
+
+#if 1
+ carry=0;
+ for (i=0; i<min; i++)
+ {
+ t1= *(ap++);
+ t2= *(bp++);
+ if (carry)
+ {
+ carry=(t1 <= t2);
+ t1=(t1-t2-1)&BN_MASK2;
+ }
+ else
+ {
+ carry=(t1 < t2);
+ t1=(t1-t2)&BN_MASK2;
+ }
+#if defined(IRIX_CC_BUG) && !defined(LINT)
+ dummy=t1;
+#endif
+ *(rp++)=t1&BN_MASK2;
+ }
+#else
+ carry=bn_sub_words(rp,ap,bp,min);
+ ap+=min;
+ bp+=min;
+ rp+=min;
+ i=min;
+#endif
+ if (carry) /* subtracted */
+ {
+ while (i < max)
+ {
+ i++;
+ t1= *(ap++);
+ t2=(t1-1)&BN_MASK2;
+ *(rp++)=t2;
+ if (t1 > t2) break;
+ }
+ }
+#if 0
+ memcpy(rp,ap,sizeof(*rp)*(max-i));
+#else
+ if (rp != ap)
+ {
+ for (;;)
+ {
+ if (i++ >= max) break;
+ rp[0]=ap[0];
+ if (i++ >= max) break;
+ rp[1]=ap[1];
+ if (i++ >= max) break;
+ rp[2]=ap[2];
+ if (i++ >= max) break;
+ rp[3]=ap[3];
+ rp+=4;
+ ap+=4;
+ }
+ }
+#endif
+
+ r->top=max;
+ bn_fix_top(r);
+ return(1);
+ }
+
+int BN_sub(r, a, b)
+BIGNUM *r;
+BIGNUM *a;
+BIGNUM *b;
+ {
+ int max;
+ int add=0,neg=0;
+ BIGNUM *tmp;
+
+ bn_check_top(a);
+ bn_check_top(b);
+
+ /* a - b a-b
+ * a - -b a+b
+ * -a - b -(a+b)
+ * -a - -b b-a
+ */
+ if (a->neg)
+ {
+ if (b->neg)
+ { tmp=a; a=b; b=tmp; }
+ else
+ { add=1; neg=1; }
+ }
+ else
+ {
+ if (b->neg) { add=1; neg=0; }
+ }
+
+ if (add)
+ {
+ if (!BN_uadd(r,a,b)) return(0);
+ r->neg=neg;
+ return(1);
+ }
+
+ /* We are actually doing a - b :-) */
+
+ max=(a->top > b->top)?a->top:b->top;
+ if (bn_wexpand(r,max) == NULL) return(0);
+ if (BN_ucmp(a,b) < 0)
+ {
+ if (!BN_usub(r,b,a)) return(0);
+ r->neg=1;
+ }
+ else
+ {
+ if (!BN_usub(r,a,b)) return(0);
+ r->neg=0;
+ }
+ return(1);
}
diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c
new file mode 100644
index 0000000000..c9eb0e9d05
--- /dev/null
+++ b/crypto/bn/bn_asm.c
@@ -0,0 +1,829 @@
+/* crypto/bn/bn_asm.c */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#include <stdio.h>
+#include "cryptlib.h"
+#include "bn_lcl.h"
+
+#ifdef BN_LLONG
+
+BN_ULONG bn_mul_add_words(rp,ap,num,w)
+BN_ULONG *rp,*ap;
+int num;
+BN_ULONG w;
+ {
+ BN_ULONG c1=0;
+
+ bn_check_num(num);
+ if (num <= 0) return(c1);
+
+ for (;;)
+ {
+ mul_add(rp[0],ap[0],w,c1);
+ if (--num == 0) break;
+ mul_add(rp[1],ap[1],w,c1);
+ if (--num == 0) break;
+ mul_add(rp[2],ap[2],w,c1);
+ if (--num == 0) break;
+ mul_add(rp[3],ap[3],w,c1);
+ if (--num == 0) break;
+ ap+=4;
+ rp+=4;
+ }
+
+ return(c1);
+ }
+
+BN_ULONG bn_mul_words(rp,ap,num,w)
+BN_ULONG *rp,*ap;
+int num;
+BN_ULONG w;
+ {
+ BN_ULONG c1=0;
+
+ bn_check_num(num);
+ if (num <= 0) return(c1);
+
+ for (;;)
+ {
+ mul(rp[0],ap[0],w,c1);
+ if (--num == 0) break;
+ mul(rp[1],ap[1],w,c1);
+ if (--num == 0) break;
+ mul(rp[2],ap[2],w,c1);
+ if (--num == 0) break;
+ mul(rp[3],ap[3],w,c1);
+ if (--num == 0) break;
+ ap+=4;
+ rp+=4;
+ }
+ return(c1);
+ }
+
+void bn_sqr_words(r,a,n)
+BN_ULONG *r,*a;
+int n;
+ {
+ bn_check_num(n);
+ if (n <= 0) return;
+ for (;;)
+ {
+ BN_ULLONG t;
+
+ t=(BN_ULLONG)(a[0])*(a[0]);
+ r[0]=Lw(t); r[1]=Hw(t);
+ if (--n == 0) break;
+
+ t=(BN_ULLONG)(a[1])*(a[1]);
+ r[2]=Lw(t); r[3]=Hw(t);
+ if (--n == 0) break;
+
+ t=(BN_ULLONG)(a[2])*(a[2]);
+ r[4]=Lw(t); r[5]=Hw(t);
+ if (--n == 0) break;
+
+ t=(BN_ULLONG)(a[3])*(a[3]);
+ r[6]=Lw(t); r[7]=Hw(t);
+ if (--n == 0) break;
+
+ a+=4;
+ r+=8;
+ }
+ }
+
+#else
+
+BN_ULONG bn_mul_add_words(rp,ap,num,w)
+BN_ULONG *rp,*ap;
+int num;
+BN_ULONG w;
+ {
+ BN_ULONG c=0;
+ BN_ULONG bl,bh;
+
+ bn_check_num(num);
+ if (num <= 0) return((BN_ULONG)0);
+
+ bl=LBITS(w);
+ bh=HBITS(w);
+
+ for (;;)
+ {
+ mul_add(rp[0],ap[0],bl,bh,c);
+ if (--num == 0) break;
+ mul_add(rp[1],ap[1],bl,bh,c);
+ if (--num == 0) break;
+ mul_add(rp[2],ap[2],bl,bh,c);
+ if (--num == 0) break;
+ mul_add(rp[3],ap[3],bl,bh,c);
+ if (--num == 0) break;
+ ap+=4;
+ rp+=4;
+ }
+ return(c);
+ }
+
+BN_ULONG bn_mul_words(rp,ap,num,w)
+BN_ULONG *rp,*ap;
+int num;
+BN_ULONG w;
+ {
+ BN_ULONG carry=0;
+ BN_ULONG bl,bh;
+
+ bn_check_num(num);
+ if (num <= 0) return((BN_ULONG)0);
+
+ bl=LBITS(w);
+ bh=HBITS(w);
+
+ for (;;)
+ {
+ mul(rp[0],ap[0],bl,bh,carry);
+ if (--num == 0) break;
+ mul(rp[1],ap[1],bl,bh,carry);
+ if (--num == 0) break;
+ mul(rp[2],ap[2],bl,bh,carry);
+ if (--num == 0) break;
+ mul(rp[3],ap[3],bl,bh,carry);
+ if (--num == 0) break;
+ ap+=4;
+ rp+=4;
+ }
+ return(carry);
+ }
+
+void bn_sqr_words(r,a,n)
+BN_ULONG *r,*a;
+int n;
+ {
+ bn_check_num(n);
+ if (n <= 0) return;
+ for (;;)
+ {
+ sqr64(r[0],r[1],a[0]);
+ if (--n == 0) break;
+
+ sqr64(r[2],r[3],a[1]);
+ if (--n == 0) break;
+
+ sqr64(r[4],r[5],a[2]);
+ if (--n == 0) break;
+
+ sqr64(r[6],r[7],a[3]);
+ if (--n == 0) break;
+
+ a+=4;
+ r+=8;
+ }
+ }
+
+#endif
+
+#if defined(BN_LLONG) && defined(BN_DIV2W)
+
+BN_ULONG bn_div_words(h,l,d)
+BN_ULONG h,l,d;
+ {
+ return((BN_ULONG)(((((BN_ULLONG)h)<<BN_BITS2)|l)/(BN_ULLONG)d));
+ }
+
+#else
+
+/* Divide h-l by d and return the result. */
+/* I need to test this some more :-( */
+BN_ULONG bn_div_words(h,l,d)
+BN_ULONG h,l,d;
+ {
+ BN_ULONG dh,dl,q,ret=0,th,tl,t;
+ int i,count=2;
+
+ if (d == 0) return(BN_MASK2);
+
+ i=BN_num_bits_word(d);
+ if ((i != BN_BITS2) && (h > (BN_ULONG)1<<i))
+ {
+#if !defined(NO_STDIO) && !defined(WIN16)
+ fprintf(stderr,"Division would overflow (%d)\n",i);
+#endif
+ abort();
+ }
+ i=BN_BITS2-i;
+ if (h >= d) h-=d;
+
+ if (i)
+ {
+ d<<=i;
+ h=(h<<i)|(l>>(BN_BITS2-i));
+ l<<=i;
+ }
+ dh=(d&BN_MASK2h)>>BN_BITS4;
+ dl=(d&BN_MASK2l);
+ for (;;)
+ {
+ if ((h>>BN_BITS4) == dh)
+ q=BN_MASK2l;
+ else
+ q=h/dh;
+
+ for (;;)
+ {
+ t=(h-q*dh);
+ if ((t&BN_MASK2h) ||
+ ((dl*q) <= (
+ (t<<BN_BITS4)+
+ ((l&BN_MASK2h)>>BN_BITS4))))
+ break;
+ q--;
+ }
+ th=q*dh;
+ tl=q*dl;
+ t=(tl>>BN_BITS4);
+ tl=(tl<<BN_BITS4)&BN_MASK2h;
+ th+=t;
+
+ if (l < tl) th++;
+ l-=tl;
+ if (h < th)
+ {
+ h+=d;
+ q--;
+ }
+ h-=th;
+
+ if (--count == 0) break;
+
+ ret=q<<BN_BITS4;
+ h=((h<<BN_BITS4)|(l>>BN_BITS4))&BN_MASK2;
+ l=(l&BN_MASK2l)<<BN_BITS4;
+ }
+ ret|=q;
+ return(ret);
+ }
+#endif
+
+#ifdef BN_LLONG
+BN_ULONG bn_add_words(r,a,b,n)
+BN_ULONG *r,*a,*b;
+int n;
+ {
+ BN_ULLONG ll=0;
+
+ bn_check_num(n);
+ if (n <= 0) return((BN_ULONG)0);
+
+ for (;;)
+ {
+ ll+=(BN_ULLONG)a[0]+b[0];
+ r[0]=(BN_ULONG)ll&BN_MASK2;
+ ll>>=BN_BITS2;
+ if (--n <= 0) break;
+
+ ll+=(BN_ULLONG)a[1]+b[1];
+ r[1]=(BN_ULONG)ll&BN_MASK2;
+ ll>>=BN_BITS2;
+ if (--n <= 0) break;
+
+ ll+=(BN_ULLONG)a[2]+b[2];
+ r[2]=(BN_ULONG)ll&BN_MASK2;
+ ll>>=BN_BITS2;
+ if (--n <= 0) break;
+
+ ll+=(BN_ULLONG)a[3]+b[3];
+ r[3]=(BN_ULONG)ll&BN_MASK2;
+ ll>>=BN_BITS2;
+ if (--n <= 0) break;
+
+ a+=4;
+ b+=4;
+ r+=4;
+ }
+ return((BN_ULONG)ll);
+ }
+#else
+BN_ULONG bn_add_words(r,a,b,n)
+BN_ULONG *r,*a,*b;
+int n;
+ {
+ BN_ULONG c,l,t;
+
+ bn_check_num(n);
+ if (n <= 0) return((BN_ULONG)0);
+
+ c=0;
+ for (;;)
+ {
+ t=a[0];
+ t=(t+c)&BN_MASK2;
+ c=(t < c);
+ l=(t+b[0])&BN_MASK2;
+ c+=(l < t);
+ r[0]=l;
+ if (--n <= 0) break;
+
+ t=a[1];
+ t=(t+c)&BN_MASK2;
+ c=(t < c);
+ l=(t+b[1])&BN_MASK2;
+ c+=(l < t);
+ r[1]=l;
+ if (--n <= 0) break;
+
+ t=a[2];
+ t=(t+c)&BN_MASK2;
+ c=(t < c);
+ l=(t+b[2])&BN_MASK2;
+ c+=(l < t);
+ r[2]=l;
+ if (--n <= 0) break;
+
+ t=a[3];
+ t=(t+c)&BN_MASK2;
+ c=(t < c);
+ l=(t+b[3])&BN_MASK2;
+ c+=(l < t);
+ r[3]=l;
+ if (--n <= 0) break;
+
+ a+=4;
+ b+=4;
+ r+=4;
+ }
+ return((BN_ULONG)c);
+ }
+#endif
+
+BN_ULONG bn_sub_words(r,a,b,n)
+BN_ULONG *r,*a,*b;
+int n;
+ {
+ BN_ULONG t1,t2;
+ int c=0;
+
+ bn_check_num(n);
+ if (n <= 0) return((BN_ULONG)0);
+
+ for (;;)
+ {
+ t1=a[0]; t2=b[0];
+ r[0]=(t1-t2-c)&BN_MASK2;
+ if (t1 != t2) c=(t1 < t2);
+ if (--n <= 0) break;
+
+ t1=a[1]; t2=b[1];
+ r[1]=(t1-t2-c)&BN_MASK2;
+ if (t1 != t2) c=(t1 < t2);
+ if (--n <= 0) break;
+
+ t1=a[2]; t2=b[2];
+ r[2]=(t1-t2-c)&BN_MASK2;
+ if (t1 != t2) c=(t1 < t2);
+ if (--n <= 0) break;
+
+ t1=a[3]; t2=b[3];
+ r[3]=(t1-t2-c)&BN_MASK2;
+ if (t1 != t2) c=(t1 < t2);
+ if (--n <= 0) break;
+
+ a+=4;
+ b+=4;
+ r+=4;
+ }
+ return(c);
+ }
+
+#ifdef BN_COMBA
+
+#undef bn_mul_comba8
+#undef bn_mul_comba4
+#undef bn_sqr_comba8
+#undef bn_sqr_comba4
+
+#ifdef BN_LLONG
+#define mul_add_c(a,b,c0,c1,c2) \
+ t=(BN_ULLONG)a*b; \
+ t1=(BN_ULONG)Lw(t); \
+ t2=(BN_ULONG)Hw(t); \
+ c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
+ c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
+
+#define mul_add_c2(a,b,c0,c1,c2) \
+ t=(BN_ULLONG)a*b; \
+ tt=(t+t)&BN_MASK; \
+ if (tt < t) c2++; \
+ t1=(BN_ULONG)Lw(tt); \
+ t2=(BN_ULONG)Hw(tt); \
+ c0=(c0+t1)&BN_MASK2; \
+ if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
+ c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
+
+#define sqr_add_c(a,i,c0,c1,c2) \
+ t=(BN_ULLONG)a[i]*a[i]; \
+ t1=(BN_ULONG)Lw(t); \
+ t2=(BN_ULONG)Hw(t); \
+ c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
+ c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
+
+#define sqr_add_c2(a,i,j,c0,c1,c2) \
+ mul_add_c2((a)[i],(a)[j],c0,c1,c2)
+#else
+#define mul_add_c(a,b,c0,c1,c2) \
+ t1=LBITS(a); t2=HBITS(a); \
+ bl=LBITS(b); bh=HBITS(b); \
+ mul64(t1,t2,bl,bh); \
+ c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
+ c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
+
+#define mul_add_c2(a,b,c0,c1,c2) \
+ t1=LBITS(a); t2=HBITS(a); \
+ bl=LBITS(b); bh=HBITS(b); \
+ mul64(t1,t2,bl,bh); \
+ if (t2 & BN_TBIT) c2++; \
+ t2=(t2+t2)&BN_MASK2; \
+ if (t1 & BN_TBIT) t2++; \
+ t1=(t1+t1)&BN_MASK2; \
+ c0=(c0+t1)&BN_MASK2; \
+ if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
+ c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
+
+#define sqr_add_c(a,i,c0,c1,c2) \
+ sqr64(t1,t2,(a)[i]); \
+ c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
+ c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
+
+#define sqr_add_c2(a,i,j,c0,c1,c2) \
+ mul_add_c2((a)[i],(a)[j],c0,c1,c2)
+#endif
+
+void bn_mul_comba8(r,a,b)
+BN_ULONG *r,*a,*b;
+ {
+#ifdef BN_LLONG
+ BN_ULLONG t;
+#else
+ BN_ULONG bl,bh;
+#endif
+ BN_ULONG t1,t2;
+ BN_ULONG c1,c2,c3;
+
+ c1=0;
+ c2=0;
+ c3=0;
+ mul_add_c(a[0],b[0],c1,c2,c3);
+ r[0]=c1;
+ c1=0;
+ mul_add_c(a[0],b[1],c2,c3,c1);
+ mul_add_c(a[1],b[0],c2,c3,c1);
+ r[1]=c2;
+ c2=0;
+ mul_add_c(a[2],b[0],c3,c1,c2);
+ mul_add_c(a[1],b[1],c3,c1,c2);
+ mul_add_c(a[0],b[2],c3,c1,c2);
+ r[2]=c3;
+ c3=0;
+ mul_add_c(a[0],b[3],c1,c2,c3);
+ mul_add_c(a[1],b[2],c1,c2,c3);
+ mul_add_c(a[2],b[1],c1,c2,c3);
+ mul_add_c(a[3],b[0],c1,c2,c3);
+ r[3]=c1;
+ c1=0;
+ mul_add_c(a[4],b[0],c2,c3,c1);
+ mul_add_c(a[3],b[1],c2,c3,c1);
+ mul_add_c(a[2],b[2],c2,c3,c1);
+ mul_add_c(a[1],b[3],c2,c3,c1);
+ mul_add_c(a[0],b[4],c2,c3,c1);
+ r[4]=c2;
+ c2=0;
+ mul_add_c(a[0],b[5],c3,c1,c2);
+ mul_add_c(a[1],b[4],c3,c1,c2);
+ mul_add_c(a[2],b[3],c3,c1,c2);
+ mul_add_c(a[3],b[2],c3,c1,c2);
+ mul_add_c(a[4],b[1],c3,c1,c2);
+ mul_add_c(a[5],b[0],c3,c1,c2);
+ r[5]=c3;
+ c3=0;
+ mul_add_c(a[6],b[0],c1,c2,c3);
+ mul_add_c(a[5],b[1],c1,c2,c3);
+ mul_add_c(a[4],b[2],c1,c2,c3);
+ mul_add_c(a[3],b[3],c1,c2,c3);
+ mul_add_c(a[2],b[4],c1,c2,c3);
+ mul_add_c(a[1],b[5],c1,c2,c3);
+ mul_add_c(a[0],b[6],c1,c2,c3);
+ r[6]=c1;
+ c1=0;
+ mul_add_c(a[0],b[7],c2,c3,c1);
+ mul_add_c(a[1],b[6],c2,c3,c1);
+ mul_add_c(a[2],b[5],c2,c3,c1);
+ mul_add_c(a[3],b[4],c2,c3,c1);
+ mul_add_c(a[4],b[3],c2,c3,c1);
+ mul_add_c(a[5],b[2],c2,c3,c1);
+ mul_add_c(a[6],b[1],c2,c3,c1);
+ mul_add_c(a[7],b[0],c2,c3,c1);
+ r[7]=c2;
+ c2=0;
+ mul_add_c(a[7],b[1],c3,c1,c2);
+ mul_add_c(a[6],b[2],c3,c1,c2);
+ mul_add_c(a[5],b[3],c3,c1,c2);
+ mul_add_c(a[4],b[4],c3,c1,c2);
+ mul_add_c(a[3],b[5],c3,c1,c2);
+ mul_add_c(a[2],b[6],c3,c1,c2);
+ mul_add_c(a[1],b[7],c3,c1,c2);
+ r[8]=c3;
+ c3=0;
+ mul_add_c(a[2],b[7],c1,c2,c3);
+ mul_add_c(a[3],b[6],c1,c2,c3);
+ mul_add_c(a[4],b[5],c1,c2,c3);
+ mul_add_c(a[5],b[4],c1,c2,c3);
+ mul_add_c(a[6],b[3],c1,c2,c3);
+ mul_add_c(a[7],b[2],c1,c2,c3);
+ r[9]=c1;
+ c1=0;
+ mul_add_c(a[7],b[3],c2,c3,c1);
+ mul_add_c(a[6],b[4],c2,c3,c1);
+ mul_add_c(a[5],b[5],c2,c3,c1);
+ mul_add_c(a[4],b[6],c2,c3,c1);
+ mul_add_c(a[3],b[7],c2,c3,c1);
+ r[10]=c2;
+ c2=0;
+ mul_add_c(a[4],b[7],c3,c1,c2);
+ mul_add_c(a[5],b[6],c3,c1,c2);
+ mul_add_c(a[6],b[5],c3,c1,c2);
+ mul_add_c(a[7],b[4],c3,c1,c2);
+ r[11]=c3;
+ c3=0;
+ mul_add_c(a[7],b[5],c1,c2,c3);
+ mul_add_c(a[6],b[6],c1,c2,c3);
+ mul_add_c(a[5],b[7],c1,c2,c3);
+ r[12]=c1;
+ c1=0;
+ mul_add_c(a[6],b[7],c2,c3,c1);
+ mul_add_c(a[7],b[6],c2,c3,c1);
+ r[13]=c2;
+ c2=0;
+ mul_add_c(a[7],b[7],c3,c1,c2);
+ r[14]=c3;
+ r[15]=c1;
+ }
+
+void bn_mul_comba4(r,a,b)
+BN_ULONG *r,*a,*b;
+ {
+#ifdef BN_LLONG
+ BN_ULLONG t;
+#else
+ BN_ULONG bl,bh;
+#endif
+ BN_ULONG t1,t2;
+ BN_ULONG c1,c2,c3;
+
+ c1=0;
+ c2=0;
+ c3=0;
+ mul_add_c(a[0],b[0],c1,c2,c3);
+ r[0]=c1;
+ c1=0;
+ mul_add_c(a[0],b[1],c2,c3,c1);
+ mul_add_c(a[1],b[0],c2,c3,c1);
+ r[1]=c2;
+ c2=0;
+ mul_add_c(a[2],b[0],c3,c1,c2);
+ mul_add_c(a[1],b[1],c3,c1,c2);
+ mul_add_c(a[0],b[2],c3,c1,c2);
+ r[2]=c3;
+ c3=0;
+ mul_add_c(a[0],b[3],c1,c2,c3);
+ mul_add_c(a[1],b[2],c1,c2,c3);
+ mul_add_c(a[2],b[1],c1,c2,c3);
+ mul_add_c(a[3],b[0],c1,c2,c3);
+ r[3]=c1;
+ c1=0;
+ mul_add_c(a[3],b[1],c2,c3,c1);
+ mul_add_c(a[2],b[2],c2,c3,c1);
+ mul_add_c(a[1],b[3],c2,c3,c1);
+ r[4]=c2;
+ c2=0;
+ mul_add_c(a[2],b[3],c3,c1,c2);
+ mul_add_c(a[3],b[2],c3,c1,c2);
+ r[5]=c3;
+ c3=0;
+ mul_add_c(a[3],b[3],c1,c2,c3);
+ r[6]=c1;
+ r[7]=c2;
+ }
+
+void bn_sqr_comba8(r,a)
+BN_ULONG *r,*a;
+ {
+#ifdef BN_LLONG
+ BN_ULLONG t,tt;
+#else
+ BN_ULONG bl,bh;
+#endif
+ BN_ULONG t1,t2;
+ BN_ULONG c1,c2,c3;
+
+ c1=0;
+ c2=0;
+ c3=0;
+ sqr_add_c(a,0,c1,c2,c3);
+ r[0]=c1;
+ c1=0;
+ sqr_add_c2(a,1,0,c2,c3,c1);
+ r[1]=c2;
+ c2=0;
+ sqr_add_c(a,1,c3,c1,c2);
+ sqr_add_c2(a,2,0,c3,c1,c2);
+ r[2]=c3;
+ c3=0;
+ sqr_add_c2(a,3,0,c1,c2,c3);
+ sqr_add_c2(a,2,1,c1,c2,c3);
+ r[3]=c1;
+ c1=0;
+ sqr_add_c(a,2,c2,c3,c1);
+ sqr_add_c2(a,3,1,c2,c3,c1);
+ sqr_add_c2(a,4,0,c2,c3,c1);
+ r[4]=c2;
+ c2=0;
+ sqr_add_c2(a,5,0,c3,c1,c2);
+ sqr_add_c2(a,4,1,c3,c1,c2);
+ sqr_add_c2(a,3,2,c3,c1,c2);
+ r[5]=c3;
+ c3=0;
+ sqr_add_c(a,3,c1,c2,c3);
+ sqr_add_c2(a,4,2,c1,c2,c3);
+ sqr_add_c2(a,5,1,c1,c2,c3);
+ sqr_add_c2(a,6,0,c1,c2,c3);
+ r[6]=c1;
+ c1=0;
+ sqr_add_c2(a,7,0,c2,c3,c1);
+ sqr_add_c2(a,6,1,c2,c3,c1);
+ sqr_add_c2(a,5,2,c2,c3,c1);
+ sqr_add_c2(a,4,3,c2,c3,c1);
+ r[7]=c2;
+ c2=0;
+ sqr_add_c(a,4,c3,c1,c2);
+ sqr_add_c2(a,5,3,c3,c1,c2);
+ sqr_add_c2(a,6,2,c3,c1,c2);
+ sqr_add_c2(a,7,1,c3,c1,c2);
+ r[8]=c3;
+ c3=0;
+ sqr_add_c2(a,7,2,c1,c2,c3);
+ sqr_add_c2(a,6,3,c1,c2,c3);
+ sqr_add_c2(a,5,4,c1,c2,c3);
+ r[9]=c1;
+ c1=0;
+ sqr_add_c(a,5,c2,c3,c1);
+ sqr_add_c2(a,6,4,c2,c3,c1);
+ sqr_add_c2(a,7,3,c2,c3,c1);
+ r[10]=c2;
+ c2=0;
+ sqr_add_c2(a,7,4,c3,c1,c2);
+ sqr_add_c2(a,6,5,c3,c1,c2);
+ r[11]=c3;
+ c3=0;
+ sqr_add_c(a,6,c1,c2,c3);
+ sqr_add_c2(a,7,5,c1,c2,c3);
+ r[12]=c1;
+ c1=0;
+ sqr_add_c2(a,7,6,c2,c3,c1);
+ r[13]=c2;
+ c2=0;
+ sqr_add_c(a,7,c3,c1,c2);
+ r[14]=c3;
+ r[15]=c1;
+ }
+
+void bn_sqr_comba4(r,a)
+BN_ULONG *r,*a;
+ {
+#ifdef BN_LLONG
+ BN_ULLONG t,tt;
+#else
+ BN_ULONG bl,bh;
+#endif
+ BN_ULONG t1,t2;
+ BN_ULONG c1,c2,c3;
+
+ c1=0;
+ c2=0;
+ c3=0;
+ sqr_add_c(a,0,c1,c2,c3);
+ r[0]=c1;
+ c1=0;
+ sqr_add_c2(a,1,0,c2,c3,c1);
+ r[1]=c2;
+ c2=0;
+ sqr_add_c(a,1,c3,c1,c2);
+ sqr_add_c2(a,2,0,c3,c1,c2);
+ r[2]=c3;
+ c3=0;
+ sqr_add_c2(a,3,0,c1,c2,c3);
+ sqr_add_c2(a,2,1,c1,c2,c3);
+ r[3]=c1;
+ c1=0;
+ sqr_add_c(a,2,c2,c3,c1);
+ sqr_add_c2(a,3,1,c2,c3,c1);
+ r[4]=c2;
+ c2=0;
+ sqr_add_c2(a,3,2,c3,c1,c2);
+ r[5]=c3;
+ c3=0;
+ sqr_add_c(a,3,c1,c2,c3);
+ r[6]=c1;
+ r[7]=c2;
+ }
+#else
+
+/* hmm... is it faster just to do a multiply? */
+void bn_sqr_comba4(r,a)
+BN_ULONG *r,*a;
+ {
+ BN_ULONG t[8];
+ bn_sqr_normal(r,a,4,t);
+ }
+
+void bn_sqr_comba8(r,a)
+BN_ULONG *r,*a;
+ {
+ BN_ULONG t[16];
+ bn_sqr_normal(r,a,8,t);
+ }
+
+void bn_mul_comba4(r,a,b)
+BN_ULONG *r,*a,*b;
+ {
+ r[4]=bn_mul_words( &(r[0]),a,4,b[0]);
+ r[5]=bn_mul_add_words(&(r[1]),a,4,b[1]);
+ r[6]=bn_mul_add_words(&(r[2]),a,4,b[2]);
+ r[7]=bn_mul_add_words(&(r[3]),a,4,b[3]);
+ }
+
+void bn_mul_comba8(r,a,b)
+BN_ULONG *r,*a,*b;
+ {
+ r[ 8]=bn_mul_words( &(r[0]),a,8,b[0]);
+ r[ 9]=bn_mul_add_words(&(r[1]),a,8,b[1]);
+ r[10]=bn_mul_add_words(&(r[2]),a,8,b[2]);
+ r[11]=bn_mul_add_words(&(r[3]),a,8,b[3]);
+ r[12]=bn_mul_add_words(&(r[4]),a,8,b[4]);
+ r[13]=bn_mul_add_words(&(r[5]),a,8,b[5]);
+ r[14]=bn_mul_add_words(&(r[6]),a,8,b[6]);
+ r[15]=bn_mul_add_words(&(r[7]),a,8,b[7]);
+ }
+
+#endif /* BN_COMBA */
diff --git a/crypto/bn/bn_blind.c b/crypto/bn/bn_blind.c
index a7b34f0bf0..35be32b99a 100644
--- a/crypto/bn/bn_blind.c
+++ b/crypto/bn/bn_blind.c
@@ -67,8 +67,14 @@ BIGNUM *mod;
{
BN_BLINDING *ret=NULL;
+ bn_check_top(Ai);
+ bn_check_top(mod);
+
if ((ret=(BN_BLINDING *)Malloc(sizeof(BN_BLINDING))) == NULL)
+ {
BNerr(BN_F_BN_BLINDING_NEW,ERR_R_MALLOC_FAILURE);
+ return(NULL);
+ }
memset(ret,0,sizeof(BN_BLINDING));
if ((ret->A=BN_new()) == NULL) goto err;
if ((ret->Ai=BN_new()) == NULL) goto err;
@@ -78,7 +84,7 @@ BIGNUM *mod;
return(ret);
err:
if (ret != NULL) BN_BLINDING_free(ret);
- return(ret);
+ return(NULL);
}
void BN_BLINDING_free(r)
@@ -114,6 +120,8 @@ BIGNUM *n;
BN_BLINDING *b;
BN_CTX *ctx;
{
+ bn_check_top(n);
+
if ((b->A == NULL) || (b->Ai == NULL))
{
BNerr(BN_F_BN_BLINDING_CONVERT,BN_R_NOT_INITALISED);
@@ -128,6 +136,8 @@ BN_BLINDING *b;
BN_CTX *ctx;
{
int ret;
+
+ bn_check_top(n);
if ((b->A == NULL) || (b->Ai == NULL))
{
BNerr(BN_F_BN_BLINDING_INVERT,BN_R_NOT_INITALISED);
diff --git a/crypto/bn/bn_comba.c b/crypto/bn/bn_comba.c
new file mode 100644
index 0000000000..30357cf5fb
--- /dev/null
+++ b/crypto/bn/bn_comba.c
@@ -0,0 +1,349 @@
+/* crypto/bn/bn_comba.c */
+#include <stdio.h>
+#include "bn_lcl.h"
+/* Auto generated from crypto/bn/comba.pl
+ */
+
+#undef bn_mul_comba8
+#undef bn_mul_comba4
+#undef bn_sqr_comba8
+#undef bn_sqr_comba4
+
+#ifdef BN_LLONG
+#define mul_add_c(a,b,c0,c1,c2) \
+ t=(BN_ULLONG)a*b; \
+ t1=(BN_ULONG)Lw(t); \
+ t2=(BN_ULONG)Hw(t); \
+ c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
+ c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
+
+#define mul_add_c2(a,b,c0,c1,c2) \
+ t=(BN_ULLONG)a*b; \
+ tt=(t+t)&BN_MASK; \
+ if (tt < t) c2++; \
+ t1=(BN_ULONG)Lw(tt); \
+ t2=(BN_ULONG)Hw(tt); \
+ c0=(c0+t1)&BN_MASK2; \
+ if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
+ c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
+
+#define sqr_add_c(a,i,c0,c1,c2) \
+ t=(BN_ULLONG)a[i]*a[i]; \
+ t1=(BN_ULONG)Lw(t); \
+ t2=(BN_ULONG)Hw(t); \
+ c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
+ c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
+
+#define sqr_add_c2(a,i,j,c0,c1,c2) \
+ mul_add_c2((a)[i],(a)[j],c0,c1,c2)
+#else
+#define mul_add_c(a,b,c0,c1,c2) \
+ t1=LBITS(a); t2=HBITS(a); \
+ bl=LBITS(b); bh=HBITS(b); \
+ mul64(t1,t2,bl,bh); \
+ c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
+ c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
+
+#define mul_add_c2(a,b,c0,c1,c2) \
+ t1=LBITS(a); t2=HBITS(a); \
+ bl=LBITS(b); bh=HBITS(b); \
+ mul64(t1,t2,bl,bh); \
+ if (t2 & BN_TBIT) c2++; \
+ t2=(t2+t2)&BN_MASK2; \
+ if (t1 & BN_TBIT) t2++; \
+ t1=(t1+t1)&BN_MASK2; \
+ c0=(c0+t1)&BN_MASK2; \
+ if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \
+ c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
+
+#define sqr_add_c(a,i,c0,c1,c2) \
+ sqr64(t1,t2,(a)[i]); \
+ c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \
+ c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
+
+#define sqr_add_c2(a,i,j,c0,c1,c2) \
+ mul_add_c2((a)[i],(a)[j],c0,c1,c2)
+#endif
+
+void bn_mul_comba88(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
+void bn_mul_comba44(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
+void bn_sqr_comba88(BN_ULONG *r,BN_ULONG *a);
+void bn_sqr_comba44(BN_ULONG *r,BN_ULONG *a);
+
+void bn_mul_comba88(r,a,b)
+BN_ULONG *r,*a,*b;
+ {
+#ifdef BN_LLONG
+ BN_ULLONG t;
+#else
+ BN_ULONG bl,bh;
+#endif
+ BN_ULONG t1,t2;
+ BN_ULONG c1,c2,c3;
+
+ c1=0;
+ c2=0;
+ c3=0;
+ mul_add_c(a[0],b[0],c1,c2,c3);
+ r[0]=c1;
+ c1=0;
+ mul_add_c(a[0],b[1],c2,c3,c1);
+ mul_add_c(a[1],b[0],c2,c3,c1);
+ r[1]=c2;
+ c2=0;
+ mul_add_c(a[2],b[0],c3,c1,c2);
+ mul_add_c(a[1],b[1],c3,c1,c2);
+ mul_add_c(a[0],b[2],c3,c1,c2);
+ r[2]=c3;
+ c3=0;
+ mul_add_c(a[0],b[3],c1,c2,c3);
+ mul_add_c(a[1],b[2],c1,c2,c3);
+ mul_add_c(a[2],b[1],c1,c2,c3);
+ mul_add_c(a[3],b[0],c1,c2,c3);
+ r[3]=c1;
+ c1=0;
+ mul_add_c(a[4],b[0],c2,c3,c1);
+ mul_add_c(a[3],b[1],c2,c3,c1);
+ mul_add_c(a[2],b[2],c2,c3,c1);
+ mul_add_c(a[1],b[3],c2,c3,c1);
+ mul_add_c(a[0],b[4],c2,c3,c1);
+ r[4]=c2;
+ c2=0;
+ mul_add_c(a[0],b[5],c3,c1,c2);
+ mul_add_c(a[1],b[4],c3,c1,c2);
+ mul_add_c(a[2],b[3],c3,c1,c2);
+ mul_add_c(a[3],b[2],c3,c1,c2);
+ mul_add_c(a[4],b[1],c3,c1,c2);
+ mul_add_c(a[5],b[0],c3,c1,c2);
+ r[5]=c3;
+ c3=0;
+ mul_add_c(a[6],b[0],c1,c2,c3);
+ mul_add_c(a[5],b[1],c1,c2,c3);
+ mul_add_c(a[4],b[2],c1,c2,c3);
+ mul_add_c(a[3],b[3],c1,c2,c3);
+ mul_add_c(a[2],b[4],c1,c2,c3);
+ mul_add_c(a[1],b[5],c1,c2,c3);
+ mul_add_c(a[0],b[6],c1,c2,c3);
+ r[6]=c1;
+ c1=0;
+ mul_add_c(a[0],b[7],c2,c3,c1);
+ mul_add_c(a[1],b[6],c2,c3,c1);
+ mul_add_c(a[2],b[5],c2,c3,c1);
+ mul_add_c(a[3],b[4],c2,c3,c1);
+ mul_add_c(a[4],b[3],c2,c3,c1);
+ mul_add_c(a[5],b[2],c2,c3,c1);
+ mul_add_c(a[6],b[1],c2,c3,c1);
+ mul_add_c(a[7],b[0],c2,c3,c1);
+ r[7]=c2;
+ c2=0;
+ mul_add_c(a[7],b[1],c3,c1,c2);
+ mul_add_c(a[6],b[2],c3,c1,c2);
+ mul_add_c(a[5],b[3],c3,c1,c2);
+ mul_add_c(a[4],b[4],c3,c1,c2);
+ mul_add_c(a[3],b[5],c3,c1,c2);
+ mul_add_c(a[2],b[6],c3,c1,c2);
+ mul_add_c(a[1],b[7],c3,c1,c2);
+ r[8]=c3;
+ c3=0;
+ mul_add_c(a[2],b[7],c1,c2,c3);
+ mul_add_c(a[3],b[6],c1,c2,c3);
+ mul_add_c(a[4],b[5],c1,c2,c3);
+ mul_add_c(a[5],b[4],c1,c2,c3);
+ mul_add_c(a[6],b[3],c1,c2,c3);
+ mul_add_c(a[7],b[2],c1,c2,c3);
+ r[9]=c1;
+ c1=0;
+ mul_add_c(a[7],b[3],c2,c3,c1);
+ mul_add_c(a[6],b[4],c2,c3,c1);
+ mul_add_c(a[5],b[5],c2,c3,c1);
+ mul_add_c(a[4],b[6],c2,c3,c1);
+ mul_add_c(a[3],b[7],c2,c3,c1);
+ r[10]=c2;
+ c2=0;
+ mul_add_c(a[4],b[7],c3,c1,c2);
+ mul_add_c(a[5],b[6],c3,c1,c2);
+ mul_add_c(a[6],b[5],c3,c1,c2);
+ mul_add_c(a[7],b[4],c3,c1,c2);
+ r[11]=c3;
+ c3=0;
+ mul_add_c(a[7],b[5],c1,c2,c3);
+ mul_add_c(a[6],b[6],c1,c2,c3);
+ mul_add_c(a[5],b[7],c1,c2,c3);
+ r[12]=c1;
+ c1=0;
+ mul_add_c(a[6],b[7],c2,c3,c1);
+ mul_add_c(a[7],b[6],c2,c3,c1);
+ r[13]=c2;
+ c2=0;
+ mul_add_c(a[7],b[7],c3,c1,c2);
+ r[14]=c3;
+ r[15]=c1;
+ }
+
+void bn_mul_comba44(r,a,b)
+BN_ULONG *r,*a,*b;
+ {
+#ifdef BN_LLONG
+ BN_ULLONG t;
+#else
+ BN_ULONG bl,bh;
+#endif
+ BN_ULONG t1,t2;
+ BN_ULONG c1,c2,c3;
+
+ c1=0;
+ c2=0;
+ c3=0;
+ mul_add_c(a[0],b[0],c1,c2,c3);
+ r[0]=c1;
+ c1=0;
+ mul_add_c(a[0],b[1],c2,c3,c1);
+ mul_add_c(a[1],b[0],c2,c3,c1);
+ r[1]=c2;
+ c2=0;
+ mul_add_c(a[2],b[0],c3,c1,c2);
+ mul_add_c(a[1],b[1],c3,c1,c2);
+ mul_add_c(a[0],b[2],c3,c1,c2);
+ r[2]=c3;
+ c3=0;
+ mul_add_c(a[0],b[3],c1,c2,c3);
+ mul_add_c(a[1],b[2],c1,c2,c3);
+ mul_add_c(a[2],b[1],c1,c2,c3);
+ mul_add_c(a[3],b[0],c1,c2,c3);
+ r[3]=c1;
+ c1=0;
+ mul_add_c(a[3],b[1],c2,c3,c1);
+ mul_add_c(a[2],b[2],c2,c3,c1);
+ mul_add_c(a[1],b[3],c2,c3,c1);
+ r[4]=c2;
+ c2=0;
+ mul_add_c(a[2],b[3],c3,c1,c2);
+ mul_add_c(a[3],b[2],c3,c1,c2);
+ r[5]=c3;
+ c3=0;
+ mul_add_c(a[3],b[3],c1,c2,c3);
+ r[6]=c1;
+ r[7]=c2;
+ }
+
+void bn_sqr_comba88(r,a)
+BN_ULONG *r,*a;
+ {
+#ifdef BN_LLONG
+ BN_ULLONG t,tt;
+#else
+ BN_ULONG bl,bh;
+#endif
+ BN_ULONG t1,t2;
+ BN_ULONG c1,c2,c3;
+
+ c1=0;
+ c2=0;
+ c3=0;
+ sqr_add_c(a,0,c1,c2,c3);
+ r[0]=c1;
+ c1=0;
+ sqr_add_c2(a,1,0,c2,c3,c1);
+ r[1]=c2;
+ c2=0;
+ sqr_add_c(a,1,c3,c1,c2);
+ sqr_add_c2(a,2,0,c3,c1,c2);
+ r[2]=c3;
+ c3=0;
+ sqr_add_c2(a,3,0,c1,c2,c3);
+ sqr_add_c2(a,2,1,c1,c2,c3);
+ r[3]=c1;
+ c1=0;
+ sqr_add_c(a,2,c2,c3,c1);
+ sqr_add_c2(a,3,1,c2,c3,c1);
+ sqr_add_c2(a,4,0,c2,c3,c1);
+ r[4]=c2;
+ c2=0;
+ sqr_add_c2(a,5,0,c3,c1,c2);
+ sqr_add_c2(a,4,1,c3,c1,c2);
+ sqr_add_c2(a,3,2,c3,c1,c2);
+ r[5]=c3;
+ c3=0;
+ sqr_add_c(a,3,c1,c2,c3);
+ sqr_add_c2(a,4,2,c1,c2,c3);
+ sqr_add_c2(a,5,1,c1,c2,c3);
+ sqr_add_c2(a,6,0,c1,c2,c3);
+ r[6]=c1;
+ c1=0;
+ sqr_add_c2(a,7,0,c2,c3,c1);
+ sqr_add_c2(a,6,1,c2,c3,c1);
+ sqr_add_c2(a,5,2,c2,c3,c1);
+ sqr_add_c2(a,4,3,c2,c3,c1);
+ r[7]=c2;
+ c2=0;
+ sqr_add_c(a,4,c3,c1,c2);
+ sqr_add_c2(a,5,3,c3,c1,c2);
+ sqr_add_c2(a,6,2,c3,c1,c2);
+ sqr_add_c2(a,7,1,c3,c1,c2);
+ r[8]=c3;
+ c3=0;
+ sqr_add_c2(a,7,2,c1,c2,c3);
+ sqr_add_c2(a,6,3,c1,c2,c3);
+ sqr_add_c2(a,5,4,c1,c2,c3);
+ r[9]=c1;
+ c1=0;
+ sqr_add_c(a,5,c2,c3,c1);
+ sqr_add_c2(a,6,4,c2,c3,c1);
+ sqr_add_c2(a,7,3,c2,c3,c1);
+ r[10]=c2;
+ c2=0;
+ sqr_add_c2(a,7,4,c3,c1,c2);
+ sqr_add_c2(a,6,5,c3,c1,c2);
+ r[11]=c3;
+ c3=0;
+ sqr_add_c(a,6,c1,c2,c3);
+ sqr_add_c2(a,7,5,c1,c2,c3);
+ r[12]=c1;
+ c1=0;
+ sqr_add_c2(a,7,6,c2,c3,c1);
+ r[13]=c2;
+ c2=0;
+ sqr_add_c(a,7,c3,c1,c2);
+ r[14]=c3;
+ r[15]=c1;
+ }
+
+void bn_sqr_comba44(r,a)
+BN_ULONG *r,*a;
+ {
+#ifdef BN_LLONG
+ BN_ULLONG t,tt;
+#else
+ BN_ULONG bl,bh;
+#endif
+ BN_ULONG t1,t2;
+ BN_ULONG c1,c2,c3;
+
+ c1=0;
+ c2=0;
+ c3=0;
+ sqr_add_c(a,0,c1,c2,c3);
+ r[0]=c1;
+ c1=0;
+ sqr_add_c2(a,1,0,c2,c3,c1);
+ r[1]=c2;
+ c2=0;
+ sqr_add_c(a,1,c3,c1,c2);
+ sqr_add_c2(a,2,0,c3,c1,c2);
+ r[2]=c3;
+ c3=0;
+ sqr_add_c2(a,3,0,c1,c2,c3);
+ sqr_add_c2(a,2,1,c1,c2,c3);
+ r[3]=c1;
+ c1=0;
+ sqr_add_c(a,2,c2,c3,c1);
+ sqr_add_c2(a,3,1,c2,c3,c1);
+ r[4]=c2;
+ c2=0;
+ sqr_add_c2(a,3,2,c3,c1,c2);
+ r[5]=c3;
+ c3=0;
+ sqr_add_c(a,3,c1,c2,c3);
+ r[6]=c1;
+ r[7]=c2;
+ }
diff --git a/crypto/bn/bn_div.c b/crypto/bn/bn_div.c
index 2263bdc7da..c7bc04d0b4 100644
--- a/crypto/bn/bn_div.c
+++ b/crypto/bn/bn_div.c
@@ -72,6 +72,8 @@ BN_CTX *ctx;
int i,nm,nd;
BIGNUM *D;
+ bn_check_top(m);
+ bn_check_top(d);
if (BN_is_zero(d))
{
BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO);
@@ -86,9 +88,9 @@ BN_CTX *ctx;
return(1);
}
- D=ctx->bn[ctx->tos];
- if (dv == NULL) dv=ctx->bn[ctx->tos+1];
- if (rem == NULL) rem=ctx->bn[ctx->tos+2];
+ D= &(ctx->bn[ctx->tos]);
+ if (dv == NULL) dv= &(ctx->bn[ctx->tos+1]);
+ if (rem == NULL) rem= &(ctx->bn[ctx->tos+2]);
nd=BN_num_bits(d);
nm=BN_num_bits(m);
@@ -98,6 +100,7 @@ BN_CTX *ctx;
/* The next 2 are needed so we can do a dv->d[0]|=1 later
* since BN_lshift1 will only work once there is a value :-) */
BN_zero(dv);
+ bn_wexpand(dv,1);
dv->top=1;
if (!BN_lshift(D,D,nm-nd)) return(0);
@@ -107,7 +110,7 @@ BN_CTX *ctx;
if (BN_ucmp(rem,D) >= 0)
{
dv->d[0]|=1;
- bn_qsub(rem,rem,D);
+ if (!BN_usub(rem,rem,D)) return(0);
}
/* CAN IMPROVE (and have now :=) */
if (!BN_rshift1(D,D)) return(0);
@@ -132,6 +135,9 @@ BN_CTX *ctx;
BN_ULONG d0,d1;
int num_n,div_n;
+ bn_check_top(num);
+ bn_check_top(divisor);
+
if (BN_is_zero(divisor))
{
BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO);
@@ -146,12 +152,12 @@ BN_CTX *ctx;
return(1);
}
- tmp=ctx->bn[ctx->tos];
+ tmp= &(ctx->bn[ctx->tos]);
tmp->neg=0;
- snum=ctx->bn[ctx->tos+1];
- sdiv=ctx->bn[ctx->tos+2];
+ snum= &(ctx->bn[ctx->tos+1]);
+ sdiv= &(ctx->bn[ctx->tos+2]);
if (dv == NULL)
- res=ctx->bn[ctx->tos+3];
+ res= &(ctx->bn[ctx->tos+3]);
else res=dv;
/* First we normalise the numbers */
@@ -168,10 +174,10 @@ BN_CTX *ctx;
/* Lets setup a 'window' into snum
* This is the part that corresponds to the current
* 'area' being divided */
+ BN_init(&wnum);
wnum.d= &(snum->d[loop]);
wnum.top= div_n;
- wnum.max= snum->max; /* a bit of a lie */
- wnum.neg= 0;
+ wnum.max= snum->max+1; /* a bit of a lie */
/* Get the top 2 words of sdiv */
/* i=sdiv->top; */
@@ -183,8 +189,8 @@ BN_CTX *ctx;
/* Setup to 'res' */
res->neg= (num->neg^divisor->neg);
- res->top=loop;
if (!bn_wexpand(res,(loop+1))) goto err;
+ res->top=loop;
resp= &(res->d[loop-1]);
/* space for temp */
@@ -192,7 +198,7 @@ BN_CTX *ctx;
if (BN_ucmp(&wnum,sdiv) >= 0)
{
- bn_qsub(&wnum,&wnum,sdiv);
+ if (!BN_usub(&wnum,&wnum,sdiv)) goto err;
*resp=1;
res->d[res->top-1]=1;
}
@@ -211,7 +217,7 @@ BN_CTX *ctx;
if (n0 == d0)
q=BN_MASK2;
else
- q=bn_div64(n0,n1,d0);
+ q=bn_div_words(n0,n1,d0);
{
#ifdef BN_LLONG
BN_ULLONG t1,t2,rem;
@@ -284,3 +290,39 @@ err:
}
#endif
+
+/* rem != m */
+int BN_mod(rem, m, d,ctx)
+BIGNUM *rem;
+BIGNUM *m;
+BIGNUM *d;
+BN_CTX *ctx;
+ {
+#if 0 /* The old slow way */
+ int i,nm,nd;
+ BIGNUM *dv;
+
+ if (BN_ucmp(m,d) < 0)
+ return((BN_copy(rem,m) == NULL)?0:1);
+
+ dv= &(ctx->bn[ctx->tos]);
+
+ if (!BN_copy(rem,m)) return(0);
+
+ nm=BN_num_bits(rem);
+ nd=BN_num_bits(d);
+ if (!BN_lshift(dv,d,nm-nd)) return(0);
+ for (i=nm-nd; i>=0; i--)
+ {
+ if (BN_cmp(rem,dv) >= 0)
+ {
+ if (!BN_sub(rem,rem,dv)) return(0);
+ }
+ if (!BN_rshift1(dv,dv)) return(0);
+ }
+ return(1);
+#else
+ return(BN_div(NULL,rem,m,d,ctx));
+#endif
+ }
+
diff --git a/crypto/bn/bn_err.c b/crypto/bn/bn_err.c
index 029ae810d5..4c29c1ac55 100644
--- a/crypto/bn/bn_err.c
+++ b/crypto/bn/bn_err.c
@@ -78,15 +78,18 @@ static ERR_STRING_DATA BN_str_functs[]=
{ERR_PACK(0,BN_F_BN_MPI2BN,0), "BN_mpi2bn"},
{ERR_PACK(0,BN_F_BN_NEW,0), "BN_new"},
{ERR_PACK(0,BN_F_BN_RAND,0), "BN_rand"},
+{ERR_PACK(0,BN_F_BN_USUB,0), "BN_usub"},
{0,NULL},
};
static ERR_STRING_DATA BN_str_reasons[]=
{
+{BN_R_ARG2_LT_ARG3 ,"arg2 lt arg3"},
{BN_R_BAD_RECIPROCAL ,"bad reciprocal"},
{BN_R_CALLED_WITH_EVEN_MODULUS ,"called with even modulus"},
{BN_R_DIV_BY_ZERO ,"div by zero"},
{BN_R_ENCODING_ERROR ,"encoding error"},
+{BN_R_EXPAND_ON_STATIC_BIGNUM_DATA ,"expand on static bignum data"},
{BN_R_INVALID_LENGTH ,"invalid length"},
{BN_R_NOT_INITALISED ,"not initalised"},
{BN_R_NO_INVERSE ,"no inverse"},
@@ -99,8 +102,8 @@ void ERR_load_BN_strings()
{
static int init=1;
- if (init);
- {;
+ if (init)
+ {
init=0;
#ifndef NO_ERR
ERR_load_strings(ERR_LIB_BN,BN_str_functs);
diff --git a/crypto/bn/bn_exp.c b/crypto/bn/bn_exp.c
index c056a5083f..44f47e7eb2 100644
--- a/crypto/bn/bn_exp.c
+++ b/crypto/bn/bn_exp.c
@@ -60,6 +60,8 @@
#include "cryptlib.h"
#include "bn_lcl.h"
+#define TABLE_SIZE 16
+
/* slow but works */
int BN_mod_mul(ret, a, b, m, ctx)
BIGNUM *ret;
@@ -71,11 +73,15 @@ BN_CTX *ctx;
BIGNUM *t;
int r=0;
- t=ctx->bn[ctx->tos++];
+ bn_check_top(a);
+ bn_check_top(b);
+ bn_check_top(m);
+
+ t= &(ctx->bn[ctx->tos++]);
if (a == b)
{ if (!BN_sqr(t,a,ctx)) goto err; }
else
- { if (!BN_mul(t,a,b)) goto err; }
+ { if (!BN_mul(t,a,b,ctx)) goto err; }
if (!BN_mod(ret,t,m,ctx)) goto err;
r=1;
err:
@@ -92,8 +98,8 @@ BN_CTX *ctx;
int i,bits,ret=0;
BIGNUM *v,*tmp;
- v=ctx->bn[ctx->tos++];
- tmp=ctx->bn[ctx->tos++];
+ v= &(ctx->bn[ctx->tos++]);
+ tmp= &(ctx->bn[ctx->tos++]);
if (BN_copy(v,a) == NULL) goto err;
bits=BN_num_bits(p);
@@ -108,7 +114,7 @@ BN_CTX *ctx;
if (!BN_mod(v,tmp,m,ctx)) goto err;
if (BN_is_bit_set(p,i))
{
- if (!BN_mul(tmp,r,v)) goto err;
+ if (!BN_mul(tmp,r,v,ctx)) goto err;
if (!BN_mod(r,tmp,m,ctx)) goto err;
}
}
@@ -128,8 +134,8 @@ BN_CTX *ctx;
int i,bits,ret=0;
BIGNUM *v,*tmp;
- v=ctx->bn[ctx->tos++];
- tmp=ctx->bn[ctx->tos++];
+ v= &(ctx->bn[ctx->tos++]);
+ tmp= &(ctx->bn[ctx->tos++]);
if (BN_copy(v,a) == NULL) goto err;
bits=BN_num_bits(p);
@@ -143,7 +149,7 @@ BN_CTX *ctx;
if (!BN_sqr(tmp,v,ctx)) goto err;
if (BN_is_bit_set(p,i))
{
- if (!BN_mul(tmp,r,v)) goto err;
+ if (!BN_mul(tmp,r,v,ctx)) goto err;
}
}
ret=1;
@@ -161,6 +167,10 @@ BN_CTX *ctx;
{
int ret;
+ bn_check_top(a);
+ bn_check_top(p);
+ bn_check_top(m);
+
#ifdef MONT_MUL_MOD
/* I have finally been able to take out this pre-condition of
* the top bit being set. It was caused by an error in BN_div
@@ -189,13 +199,13 @@ BIGNUM *p;
BIGNUM *m;
BN_CTX *ctx;
{
- int nb,i,j,bits,ret=0,wstart,wend,window,wvalue;
- int start=1;
- BIGNUM *d,*aa;
- BIGNUM *val[16];
+ int i,j,bits,ret=0,wstart,wend,window,wvalue;
+ int start=1,ts=0;
+ BIGNUM *aa;
+ BIGNUM val[TABLE_SIZE];
+ BN_RECP_CTX recp;
- d=ctx->bn[ctx->tos++];
- aa=ctx->bn[ctx->tos++];
+ aa= &(ctx->bn[ctx->tos++]);
bits=BN_num_bits(p);
if (bits == 0)
@@ -203,12 +213,14 @@ BN_CTX *ctx;
BN_one(r);
return(1);
}
- nb=BN_reciprocal(d,m,ctx);
- if (nb == -1) goto err;
+ BN_RECP_CTX_init(&recp);
+ if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err;
+
+ BN_init(&(val[0]));
+ ts=1;
- val[0]=BN_new();
- if (!BN_mod(val[0],a,m,ctx)) goto err; /* 1 */
- if (!BN_mod_mul_reciprocal(aa,val[0],val[0],m,d,nb,ctx))
+ if (!BN_mod(&(val[0]),a,m,ctx)) goto err; /* 1 */
+ if (!BN_mod_mul_reciprocal(aa,&(val[0]),&(val[0]),&recp,ctx))
goto err; /* 2 */
if (bits <= 17) /* This is probably 3 or 0x10001, so just do singles */
@@ -223,12 +235,11 @@ BN_CTX *ctx;
j=1<<(window-1);
for (i=1; i<j; i++)
{
- val[i]=BN_new();
- if (!BN_mod_mul_reciprocal(val[i],val[i-1],aa,m,d,nb,ctx))
+ BN_init(&val[i]);
+ if (!BN_mod_mul_reciprocal(&(val[i]),&(val[i-1]),aa,&recp,ctx))
goto err;
}
- for (; i<16; i++)
- val[i]=NULL;
+ ts=i;
start=1; /* This is used to avoid multiplication etc
* when there is only the value '1' in the
@@ -244,7 +255,7 @@ BN_CTX *ctx;
if (BN_is_bit_set(p,wstart) == 0)
{
if (!start)
- if (!BN_mod_mul_reciprocal(r,r,r,m,d,nb,ctx))
+ if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
goto err;
if (wstart == 0) break;
wstart--;
@@ -274,12 +285,12 @@ BN_CTX *ctx;
if (!start)
for (i=0; i<j; i++)
{
- if (!BN_mod_mul_reciprocal(r,r,r,m,d,nb,ctx))
+ if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
goto err;
}
/* wvalue will be an odd number < 2^window */
- if (!BN_mod_mul_reciprocal(r,r,val[wvalue>>1],m,d,nb,ctx))
+ if (!BN_mod_mul_reciprocal(r,r,&(val[wvalue>>1]),&recp,ctx))
goto err;
/* move the 'window' down further */
@@ -290,35 +301,40 @@ BN_CTX *ctx;
}
ret=1;
err:
- ctx->tos-=2;
- for (i=0; i<16; i++)
- if (val[i] != NULL) BN_clear_free(val[i]);
+ ctx->tos--;
+ for (i=0; i<ts; i++)
+ BN_clear_free(&(val[i]));
+ BN_RECP_CTX_free(&recp);
return(ret);
}
/* #endif */
/* #ifdef MONT_MUL_MOD */
-int BN_mod_exp_mont(r,a,p,m,ctx,in_mont)
-BIGNUM *r;
+int BN_mod_exp_mont(rr,a,p,m,ctx,in_mont)
+BIGNUM *rr;
BIGNUM *a;
BIGNUM *p;
BIGNUM *m;
BN_CTX *ctx;
BN_MONT_CTX *in_mont;
{
-#define TABLE_SIZE 16
int i,j,bits,ret=0,wstart,wend,window,wvalue;
- int start=1;
- BIGNUM *d,*aa;
- BIGNUM *val[TABLE_SIZE];
+ int start=1,ts=0;
+ BIGNUM *d,*aa,*r;
+ BIGNUM val[TABLE_SIZE];
BN_MONT_CTX *mont=NULL;
+ bn_check_top(a);
+ bn_check_top(p);
+ bn_check_top(m);
+
if (!(m->d[0] & 1))
{
BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
return(0);
}
- d=ctx->bn[ctx->tos++];
+ d= &(ctx->bn[ctx->tos++]);
+ r= &(ctx->bn[ctx->tos++]);
bits=BN_num_bits(p);
if (bits == 0)
{
@@ -339,22 +355,23 @@ BN_MONT_CTX *in_mont;
if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
}
- val[0]=BN_new();
+ BN_init(&val[0]);
+ ts=1;
if (BN_ucmp(a,m) >= 0)
{
- BN_mod(val[0],a,m,ctx);
- aa=val[0];
+ BN_mod(&(val[0]),a,m,ctx);
+ aa= &(val[0]);
}
else
aa=a;
- if (!BN_to_montgomery(val[0],aa,mont,ctx)) goto err; /* 1 */
- if (!BN_mod_mul_montgomery(d,val[0],val[0],mont,ctx)) goto err; /* 2 */
+ if (!BN_to_montgomery(&(val[0]),aa,mont,ctx)) goto err; /* 1 */
+ if (!BN_mod_mul_montgomery(d,&(val[0]),&(val[0]),mont,ctx)) goto err; /* 2 */
if (bits <= 20) /* This is probably 3 or 0x10001, so just do singles */
window=1;
- else if (bits > 250)
+ else if (bits >= 256)
window=5; /* max size of window */
- else if (bits >= 120)
+ else if (bits >= 128)
window=4;
else
window=3;
@@ -362,12 +379,11 @@ BN_MONT_CTX *in_mont;
j=1<<(window-1);
for (i=1; i<j; i++)
{
- val[i]=BN_new();
- if (!BN_mod_mul_montgomery(val[i],val[i-1],d,mont,ctx))
+ BN_init(&(val[i]));
+ if (!BN_mod_mul_montgomery(&(val[i]),&(val[i-1]),d,mont,ctx))
goto err;
}
- for (; i<TABLE_SIZE; i++)
- val[i]=NULL;
+ ts=i;
start=1; /* This is used to avoid multiplication etc
* when there is only the value '1' in the
@@ -419,7 +435,7 @@ BN_MONT_CTX *in_mont;
}
/* wvalue will be an odd number < 2^window */
- if (!BN_mod_mul_montgomery(r,r,val[wvalue>>1],mont,ctx))
+ if (!BN_mod_mul_montgomery(r,r,&(val[wvalue>>1]),mont,ctx))
goto err;
/* move the 'window' down further */
@@ -428,13 +444,13 @@ BN_MONT_CTX *in_mont;
start=0;
if (wstart < 0) break;
}
- BN_from_montgomery(r,r,mont,ctx);
+ BN_from_montgomery(rr,r,mont,ctx);
ret=1;
err:
if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
- ctx->tos--;
- for (i=0; i<TABLE_SIZE; i++)
- if (val[i] != NULL) BN_clear_free(val[i]);
+ ctx->tos-=2;
+ for (i=0; i<ts; i++)
+ BN_clear_free(&(val[i]));
return(ret);
}
/* #endif */
@@ -447,12 +463,12 @@ BIGNUM *p;
BIGNUM *m;
BN_CTX *ctx;
{
- int i,j,bits,ret=0,wstart,wend,window,wvalue;
+ int i,j,bits,ret=0,wstart,wend,window,wvalue,ts=0;
int start=1;
BIGNUM *d;
- BIGNUM *val[16];
+ BIGNUM val[TABLE_SIZE];
- d=ctx->bn[ctx->tos++];
+ d= &(ctx->bn[ctx->tos++]);
bits=BN_num_bits(p);
if (bits == 0)
@@ -461,9 +477,10 @@ BN_CTX *ctx;
return(1);
}
- val[0]=BN_new();
- if (!BN_mod(val[0],a,m,ctx)) goto err; /* 1 */
- if (!BN_mod_mul(d,val[0],val[0],m,ctx))
+ BN_init(&(val[0]));
+ ts=1;
+ if (!BN_mod(&(val[0]),a,m,ctx)) goto err; /* 1 */
+ if (!BN_mod_mul(d,&(val[0]),&(val[0]),m,ctx))
goto err; /* 2 */
if (bits <= 17) /* This is probably 3 or 0x10001, so just do singles */
@@ -478,12 +495,11 @@ BN_CTX *ctx;
j=1<<(window-1);
for (i=1; i<j; i++)
{
- val[i]=BN_new();
- if (!BN_mod_mul(val[i],val[i-1],d,m,ctx))
+ BN_init(&(val[i]));
+ if (!BN_mod_mul(&(val[i]),&(val[i-1]),d,m,ctx))
goto err;
}
- for (; i<16; i++)
- val[i]=NULL;
+ ts=i;
start=1; /* This is used to avoid multiplication etc
* when there is only the value '1' in the
@@ -534,7 +550,7 @@ BN_CTX *ctx;
}
/* wvalue will be an odd number < 2^window */
- if (!BN_mod_mul(r,r,val[wvalue>>1],m,ctx))
+ if (!BN_mod_mul(r,r,&(val[wvalue>>1]),m,ctx))
goto err;
/* move the 'window' down further */
@@ -546,8 +562,8 @@ BN_CTX *ctx;
ret=1;
err:
ctx->tos--;
- for (i=0; i<16; i++)
- if (val[i] != NULL) BN_clear_free(val[i]);
+ for (i=0; i<ts; i++)
+ BN_clear_free(&(val[i]));
return(ret);
}
diff --git a/crypto/bn/bn_exp2.c b/crypto/bn/bn_exp2.c
new file mode 100644
index 0000000000..eface739b3
--- /dev/null
+++ b/crypto/bn/bn_exp2.c
@@ -0,0 +1,202 @@
+#include <stdio.h>
+#include "cryptlib.h"
+#include "bn_lcl.h"
+
+/* I've done some timing with different table sizes.
+ * The main hassle is that even with bits set at 3, this requires
+ * 63 BIGNUMs to store the pre-calculated values.
+ * 512 1024
+ * bits=1 75.4% 79.4%
+ * bits=2 61.2% 62.4%
+ * bits=3 61.3% 59.3%
+ * The lack of speed improvment is also a function of the pre-calculation
+ * which could be removed.
+ */
+#define EXP2_TABLE_BITS 2 /* 1 2 3 4 5 */
+#define EXP2_TABLE_SIZE 4 /* 2 4 8 16 32 */
+
+int BN_mod_exp2_mont(rr,a1,p1,a2,p2,m,ctx,in_mont)
+BIGNUM *rr;
+BIGNUM *a1;
+BIGNUM *p1;
+BIGNUM *a2;
+BIGNUM *p2;
+BIGNUM *m;
+BN_CTX *ctx;
+BN_MONT_CTX *in_mont;
+ {
+ int i,j,k,bits,bits1,bits2,ret=0,wstart,wend,window,xvalue,yvalue;
+ int start=1,ts=0,x,y;
+ BIGNUM *d,*aa1,*aa2,*r;
+ BIGNUM val[EXP2_TABLE_SIZE][EXP2_TABLE_SIZE];
+ BN_MONT_CTX *mont=NULL;
+
+ bn_check_top(a1);
+ bn_check_top(p1);
+ bn_check_top(a2);
+ bn_check_top(p2);
+ bn_check_top(m);
+
+ if (!(m->d[0] & 1))
+ {
+ BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
+ return(0);
+ }
+ d= &(ctx->bn[ctx->tos++]);
+ r= &(ctx->bn[ctx->tos++]);
+ bits1=BN_num_bits(p1);
+ bits2=BN_num_bits(p2);
+ if ((bits1 == 0) && (bits2 == 0))
+ {
+ BN_one(r);
+ return(1);
+ }
+ bits=(bits1 > bits2)?bits1:bits2;
+
+ /* If this is not done, things will break in the montgomery
+ * part */
+
+ if (in_mont != NULL)
+ mont=in_mont;
+ else
+ {
+ if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
+ if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
+ }
+
+ BN_init(&(val[0][0]));
+ BN_init(&(val[1][1]));
+ BN_init(&(val[0][1]));
+ BN_init(&(val[1][0]));
+ ts=1;
+ if (BN_ucmp(a1,m) >= 0)
+ {
+ BN_mod(&(val[1][0]),a1,m,ctx);
+ aa1= &(val[1][0]);
+ }
+ else
+ aa1=a1;
+ if (BN_ucmp(a2,m) >= 0)
+ {
+ BN_mod(&(val[0][1]),a2,m,ctx);
+ aa2= &(val[0][1]);
+ }
+ else
+ aa2=a2;
+ if (!BN_to_montgomery(&(val[1][0]),aa1,mont,ctx)) goto err;
+ if (!BN_to_montgomery(&(val[0][1]),aa2,mont,ctx)) goto err;
+ if (!BN_mod_mul_montgomery(&(val[1][1]),
+ &(val[1][0]),&(val[0][1]),mont,ctx))
+ goto err;
+
+#if 0
+ if (bits <= 20) /* This is probably 3 or 0x10001, so just do singles */
+ window=1;
+ else if (bits > 250)
+ window=5; /* max size of window */
+ else if (bits >= 120)
+ window=4;
+ else
+ window=3;
+#else
+ window=EXP2_TABLE_BITS;
+#endif
+
+ k=1<<window;
+ for (x=0; x<k; x++)
+ {
+ if (x >= 2)
+ {
+ BN_init(&(val[x][0]));
+ BN_init(&(val[x][1]));
+ if (!BN_mod_mul_montgomery(&(val[x][0]),
+ &(val[1][0]),&(val[x-1][0]),mont,ctx)) goto err;
+ if (!BN_mod_mul_montgomery(&(val[x][1]),
+ &(val[1][0]),&(val[x-1][1]),mont,ctx)) goto err;
+ }
+ for (y=2; y<k; y++)
+ {
+ BN_init(&(val[x][y]));
+ if (!BN_mod_mul_montgomery(&(val[x][y]),
+ &(val[x][y-1]),&(val[0][1]),mont,ctx))
+ goto err;
+ }
+ }
+ ts=k;
+
+ start=1; /* This is used to avoid multiplication etc
+ * when there is only the value '1' in the
+ * buffer. */
+ xvalue=0; /* The 'x value' of the window */
+ yvalue=0; /* The 'y value' of the window */
+ wstart=bits-1; /* The top bit of the window */
+ wend=0; /* The bottom bit of the window */
+
+ if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
+ for (;;)
+ {
+ xvalue=BN_is_bit_set(p1,wstart);
+ yvalue=BN_is_bit_set(p2,wstart);
+ if (!(xvalue || yvalue))
+ {
+ if (!start)
+ {
+ if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
+ goto err;
+ }
+ wstart--;
+ if (wstart < 0) break;
+ continue;
+ }
+ /* We now have wstart on a 'set' bit, we now need to work out
+ * how bit a window to do. To do this we need to scan
+ * forward until the last set bit before the end of the
+ * window */
+ j=wstart;
+ /* xvalue=BN_is_bit_set(p1,wstart); already set */
+ /* yvalue=BN_is_bit_set(p1,wstart); already set */
+ wend=0;
+ for (i=1; i<window; i++)
+ {
+ if (wstart-i < 0) break;
+ xvalue+=xvalue;
+ xvalue|=BN_is_bit_set(p1,wstart-i);
+ yvalue+=yvalue;
+ yvalue|=BN_is_bit_set(p2,wstart-i);
+ }
+
+ /* i is the size of the current window */
+ /* add the 'bytes above' */
+ if (!start)
+ for (j=0; j<i; j++)
+ {
+ if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
+ goto err;
+ }
+
+ /* wvalue will be an odd number < 2^window */
+ if (xvalue || yvalue)
+ {
+ if (!BN_mod_mul_montgomery(r,r,&(val[xvalue][yvalue]),
+ mont,ctx)) goto err;
+ }
+
+ /* move the 'window' down further */
+ wstart-=i;
+ start=0;
+ if (wstart < 0) break;
+ }
+ BN_from_montgomery(rr,r,mont,ctx);
+ ret=1;
+err:
+ if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
+ ctx->tos-=2;
+ for (i=0; i<ts; i++)
+ {
+ for (j=0; j<ts; j++)
+ {
+ BN_clear_free(&(val[i][j]));
+ }
+ }
+ return(ret);
+ }
diff --git a/crypto/bn/bn_gcd.c b/crypto/bn/bn_gcd.c
index 071bba3b4b..c80cecdc8d 100644
--- a/crypto/bn/bn_gcd.c
+++ b/crypto/bn/bn_gcd.c
@@ -73,8 +73,11 @@ BN_CTX *ctx;
BIGNUM *a,*b,*t;
int ret=0;
- a=ctx->bn[ctx->tos];
- b=ctx->bn[ctx->tos+1];
+ bn_check_top(in_a);
+ bn_check_top(in_b);
+
+ a= &(ctx->bn[ctx->tos]);
+ b= &(ctx->bn[ctx->tos+1]);
if (BN_copy(a,in_a) == NULL) goto err;
if (BN_copy(b,in_b) == NULL) goto err;
@@ -95,6 +98,9 @@ BIGNUM *a,*b;
BIGNUM *t;
int shifts=0;
+ bn_check_top(a);
+ bn_check_top(b);
+
for (;;)
{
if (BN_is_zero(b))
@@ -142,23 +148,30 @@ err:
}
/* solves ax == 1 (mod n) */
-BIGNUM *BN_mod_inverse(a, n, ctx)
+BIGNUM *BN_mod_inverse(in, a, n, ctx)
+BIGNUM *in;
BIGNUM *a;
BIGNUM *n;
BN_CTX *ctx;
{
BIGNUM *A,*B,*X,*Y,*M,*D,*R;
- BIGNUM *ret=NULL,*T;
+ BIGNUM *T,*ret=NULL;
int sign;
- A=ctx->bn[ctx->tos];
- B=ctx->bn[ctx->tos+1];
- X=ctx->bn[ctx->tos+2];
- D=ctx->bn[ctx->tos+3];
- M=ctx->bn[ctx->tos+4];
- Y=ctx->bn[ctx->tos+5];
+ bn_check_top(a);
+ bn_check_top(n);
+
+ A= &(ctx->bn[ctx->tos]);
+ B= &(ctx->bn[ctx->tos+1]);
+ X= &(ctx->bn[ctx->tos+2]);
+ D= &(ctx->bn[ctx->tos+3]);
+ M= &(ctx->bn[ctx->tos+4]);
+ Y= &(ctx->bn[ctx->tos+5]);
ctx->tos+=6;
- R=BN_new();
+ if (in == NULL)
+ R=BN_new();
+ else
+ R=in;
if (R == NULL) goto err;
BN_zero(X);
@@ -175,7 +188,7 @@ BN_CTX *ctx;
B=M;
/* T has a struct, M does not */
- if (!BN_mul(T,D,X)) goto err;
+ if (!BN_mul(T,D,X,ctx)) goto err;
if (!BN_add(T,T,Y)) goto err;
M=Y;
Y=X;
@@ -196,7 +209,7 @@ BN_CTX *ctx;
}
ret=R;
err:
- if ((ret == NULL) && (R != NULL)) BN_free(R);
+ if ((ret == NULL) && (in == NULL)) BN_free(R);
ctx->tos-=6;
return(ret);
}
diff --git a/crypto/bn/bn_lcl.h b/crypto/bn/bn_lcl.h
index edfd788338..70b0787d8f 100644
--- a/crypto/bn/bn_lcl.h
+++ b/crypto/bn/bn_lcl.h
@@ -65,17 +65,68 @@
extern "C" {
#endif
+/* Pentium pro 16,16,16,32,64 */
+/* Alpha 16,16,16,16.64 */
+#define BN_MULL_SIZE_NORMAL (16) // 32
+#define BN_MUL_RECURSIVE_SIZE_NORMAL (16) // 32 /* less than */
+#define BN_SQR_RECURSIVE_SIZE_NORMAL (16) // 32
+#define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32) // 32
+#define BN_MONT_CTX_SET_SIZE_WORD (64) // 32
+
+#ifndef BN_MUL_COMBA
+#define bn_mul_comba8(r,a,b) bn_mul_normal(r,a,8,b,8)
+#define bn_mul_comba4(r,a,b) bn_mul_normal(r,a,4,b,4)
+/* This is probably faster than using the C code - I need to check */
+#define bn_sqr_comba8(r,a) bn_mul_normal(r,a,8,a,8)
+#define bn_sqr_comba4(r,a) bn_mul_normal(r,a,4,a,4)
+#endif
+
/*************************************************************
* Using the long long type
*/
#define Lw(t) (((BN_ULONG)(t))&BN_MASK2)
#define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
-#define bn_fix_top(a) \
- { \
- BN_ULONG *fix_top_l; \
- for (fix_top_l= &((a)->d[(a)->top-1]); (a)->top > 0; (a)->top--) \
- if (*(fix_top_l--)) break; \
+/* These are used for internal error checking and are not normally used */
+#ifdef BN_DEBUG
+#define bn_check_top(a) \
+ { if (((a)->top < 0) || ((a)->top > (a)->max)) \
+ { char *nullp=NULL; *nullp='z'; } }
+#define bn_check_num(a) if ((a) < 0) { char *nullp=NULL; *nullp='z'; }
+#else
+#define bn_check_top(a)
+#define bn_check_num(a)
+#endif
+
+/* This macro is to add extra stuff for development checking */
+#ifdef BN_DEBUG
+#define bn_set_max(r) ((r)->max=(r)->top,BN_set_flags((r),BN_FLG_STATIC_DATA))
+#else
+#define bn_set_max(r)
+#endif
+
+/* These macros are used to 'take' a section of a bignum for read only use */
+#define bn_set_low(r,a,n) \
+ { \
+ (r)->top=((a)->top > (n))?(n):(a)->top; \
+ (r)->d=(a)->d; \
+ (r)->neg=(a)->neg; \
+ (r)->flags|=BN_FLG_STATIC_DATA; \
+ bn_set_max(r); \
+ }
+
+#define bn_set_high(r,a,n) \
+ { \
+ if ((a)->top > (n)) \
+ { \
+ (r)->top=(a)->top-n; \
+ (r)->d= &((a)->d[n]); \
+ } \
+ else \
+ (r)->top=0; \
+ (r)->neg=(a)->neg; \
+ (r)->flags|=BN_FLG_STATIC_DATA; \
+ bn_set_max(r); \
}
/* #define bn_expand(n,b) ((((b)/BN_BITS2) <= (n)->max)?(n):bn_expand2((n),(b))) */
@@ -175,6 +226,17 @@ extern "C" {
#endif
+extern int bn_limit_bits;
+extern int bn_limit_num; /* (1<<bn_limit_bits) */
+/* Recursive 'low' limit */
+extern int bn_limit_bits_low;
+extern int bn_limit_num_low; /* (1<<bn_limit_bits_low) */
+/* Do modified 'high' part calculation' */
+extern int bn_limit_bits_high;
+extern int bn_limit_num_high; /* (1<<bn_limit_bits_high) */
+extern int bn_limit_bits_mont;
+extern int bn_limit_num_mont; /* (1<<bn_limit_bits_mont) */
+
#ifndef NOPROTO
BIGNUM *bn_expand2(BIGNUM *b, int bits);
@@ -197,3 +259,8 @@ BN_ULONG bn_add_words();
#endif
#endif
+
+void bn_mul_low_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2,BN_ULONG *t);
+void bn_mul_high(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,BN_ULONG *l,int n2, BN_ULONG *t);
+
+
diff --git a/crypto/bn/bn_lib.c b/crypto/bn/bn_lib.c
index bfe7628ad4..7ea216f919 100644
--- a/crypto/bn/bn_lib.c
+++ b/crypto/bn/bn_lib.c
@@ -60,7 +60,68 @@
#include "cryptlib.h"
#include "bn_lcl.h"
-char *BN_version="Big Number part of SSLeay 0.9.0b 29-Jun-1998";
+char *BN_version="Big Number part of SSLeay 0.9.1a 06-Jul-1998";
+
+/* For a 32 bit machine
+ * 2 - 4 == 128
+ * 3 - 8 == 256
+ * 4 - 16 == 512
+ * 5 - 32 == 1024
+ * 6 - 64 == 2048
+ * 7 - 128 == 4096
+ * 8 - 256 == 8192
+ */
+int bn_limit_bits=0;
+int bn_limit_num=8; /* (1<<bn_limit_bits) */
+int bn_limit_bits_low=0;
+int bn_limit_num_low=8; /* (1<<bn_limit_bits_low) */
+int bn_limit_bits_high=0;
+int bn_limit_num_high=8; /* (1<<bn_limit_bits_high) */
+int bn_limit_bits_mont=0;
+int bn_limit_num_mont=8; /* (1<<bn_limit_bits_mont) */
+
+void BN_set_params(mult,high,low,mont)
+int mult,high,low,mont;
+ {
+ if (mult >= 0)
+ {
+ if (mult > (sizeof(int)*8)-1)
+ mult=sizeof(int)*8-1;
+ bn_limit_bits=mult;
+ bn_limit_num=1<<mult;
+ }
+ if (high >= 0)
+ {
+ if (high > (sizeof(int)*8)-1)
+ high=sizeof(int)*8-1;
+ bn_limit_bits_high=high;
+ bn_limit_num_high=1<<high;
+ }
+ if (low >= 0)
+ {
+ if (low > (sizeof(int)*8)-1)
+ low=sizeof(int)*8-1;
+ bn_limit_bits_low=low;
+ bn_limit_num_low=1<<low;
+ }
+ if (mont >= 0)
+ {
+ if (mont > (sizeof(int)*8)-1)
+ mont=sizeof(int)*8-1;
+ bn_limit_bits_mont=mont;
+ bn_limit_num_mont=1<<mont;
+ }
+ }
+
+int BN_get_params(which)
+int which;
+ {
+ if (which == 0) return(bn_limit_bits);
+ else if (which == 1) return(bn_limit_bits_high);
+ else if (which == 2) return(bn_limit_bits_low);
+ else if (which == 3) return(bn_limit_bits_mont);
+ else return(0);
+ }
BIGNUM *BN_value_one()
{
@@ -111,24 +172,24 @@ BN_ULONG l;
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
};
-#ifdef SIXTY_FOUR_BIT_LONG
+#if defined(SIXTY_FOUR_BIT_LONG)
if (l & 0xffffffff00000000L)
{
if (l & 0xffff000000000000L)
{
if (l & 0xff00000000000000L)
{
- return(bits[l>>56]+56);
+ return(bits[(int)(l>>56)]+56);
}
- else return(bits[l>>48]+48);
+ else return(bits[(int)(l>>48)]+48);
}
else
{
if (l & 0x0000ff0000000000L)
{
- return(bits[l>>40]+40);
+ return(bits[(int)(l>>40)]+40);
}
- else return(bits[l>>32]+32);
+ else return(bits[(int)(l>>32)]+32);
}
}
else
@@ -140,17 +201,17 @@ BN_ULONG l;
{
if (l & 0xff00000000000000LL)
{
- return(bits[l>>56]+56);
+ return(bits[(int)(l>>56)]+56);
}
- else return(bits[l>>48]+48);
+ else return(bits[(int)(l>>48)]+48);
}
else
{
if (l & 0x0000ff0000000000LL)
{
- return(bits[l>>40]+40);
+ return(bits[(int)(l>>40)]+40);
}
- else return(bits[l>>32]+32);
+ else return(bits[(int)(l>>32)]+32);
}
}
else
@@ -161,18 +222,18 @@ BN_ULONG l;
if (l & 0xffff0000L)
{
if (l & 0xff000000L)
- return(bits[l>>24L]+24);
- else return(bits[l>>16L]+16);
+ return(bits[(int)(l>>24L)]+24);
+ else return(bits[(int)(l>>16L)]+16);
}
else
#endif
{
#if defined(SIXTEEN_BIT) || defined(THIRTY_TWO_BIT) || defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG)
if (l & 0xff00L)
- return(bits[l>>8]+8);
+ return(bits[(int)(l>>8)]+8);
else
#endif
- return(bits[l ] );
+ return(bits[(int)(l )] );
}
}
}
@@ -183,6 +244,8 @@ BIGNUM *a;
BN_ULONG l;
int i;
+ bn_check_top(a);
+
if (a->top == 0) return(0);
l=a->d[a->top-1];
i=(a->top-1)*BN_BITS2;
@@ -199,74 +262,78 @@ BIGNUM *a;
void BN_clear_free(a)
BIGNUM *a;
{
+ int i;
+
if (a == NULL) return;
if (a->d != NULL)
{
memset(a->d,0,a->max*sizeof(a->d[0]));
- Free(a->d);
+ if (!(BN_get_flags(a,BN_FLG_STATIC_DATA)))
+ Free(a->d);
}
+ i=BN_get_flags(a,BN_FLG_MALLOCED);
memset(a,0,sizeof(BIGNUM));
- Free(a);
+ if (i)
+ Free(a);
}
void BN_free(a)
BIGNUM *a;
{
if (a == NULL) return;
- if (a->d != NULL) Free(a->d);
- Free(a);
+ if ((a->d != NULL) && !(BN_get_flags(a,BN_FLG_STATIC_DATA)))
+ Free(a->d);
+ a->flags|=BN_FLG_FREE; /* REMOVE? */
+ if (a->flags & BN_FLG_MALLOCED)
+ Free(a);
+ }
+
+void BN_init(a)
+BIGNUM *a;
+ {
+ memset(a,0,sizeof(BIGNUM));
}
BIGNUM *BN_new()
{
BIGNUM *ret;
- BN_ULONG *p;
- ret=(BIGNUM *)Malloc(sizeof(BIGNUM));
- if (ret == NULL) goto err;
+ if ((ret=(BIGNUM *)Malloc(sizeof(BIGNUM))) == NULL)
+ {
+ BNerr(BN_F_BN_NEW,ERR_R_MALLOC_FAILURE);
+ return(NULL);
+ }
+ ret->flags=BN_FLG_MALLOCED;
ret->top=0;
ret->neg=0;
- ret->max=(BN_DEFAULT_BITS/BN_BITS2);
- p=(BN_ULONG *)Malloc(sizeof(BN_ULONG)*(ret->max+1));
- if (p == NULL) goto err;
- ret->d=p;
-
- memset(p,0,(ret->max+1)*sizeof(p[0]));
+ ret->max=0;
+ ret->d=NULL;
return(ret);
-err:
- BNerr(BN_F_BN_NEW,ERR_R_MALLOC_FAILURE);
- return(NULL);
}
+
BN_CTX *BN_CTX_new()
{
BN_CTX *ret;
- BIGNUM *n;
- int i,j;
ret=(BN_CTX *)Malloc(sizeof(BN_CTX));
- if (ret == NULL) goto err2;
-
- for (i=0; i<BN_CTX_NUM; i++)
+ if (ret == NULL)
{
- n=BN_new();
- if (n == NULL) goto err;
- ret->bn[i]=n;
+ BNerr(BN_F_BN_CTX_NEW,ERR_R_MALLOC_FAILURE);
+ return(NULL);
}
- /* There is actually an extra one, this is for debugging my
- * stuff */
- ret->bn[BN_CTX_NUM]=NULL;
-
- ret->tos=0;
+ BN_CTX_init(ret);
+ ret->flags=BN_FLG_MALLOCED;
return(ret);
-err:
- for (j=0; j<i; j++)
- BN_free(ret->bn[j]);
- Free(ret);
-err2:
- BNerr(BN_F_BN_CTX_NEW,ERR_R_MALLOC_FAILURE);
- return(NULL);
+ }
+
+void BN_CTX_init(ctx)
+BN_CTX *ctx;
+ {
+ memset(ctx,0,sizeof(BN_CTX));
+ ctx->tos=0;
+ ctx->flags=0;
}
void BN_CTX_free(c)
@@ -275,26 +342,98 @@ BN_CTX *c;
int i;
for (i=0; i<BN_CTX_NUM; i++)
- BN_clear_free(c->bn[i]);
- Free(c);
+ BN_clear_free(&(c->bn[i]));
+ if (c->flags & BN_FLG_MALLOCED)
+ Free(c);
}
BIGNUM *bn_expand2(b, words)
BIGNUM *b;
int words;
{
- BN_ULONG *p;
+ BN_ULONG *A,*B,*a;
+ int i,j;
+
+ bn_check_top(b);
if (words > b->max)
{
- p=(BN_ULONG *)Realloc(b->d,sizeof(BN_ULONG)*(words+1));
- if (p == NULL)
+ bn_check_top(b);
+ if (BN_get_flags(b,BN_FLG_STATIC_DATA))
+ {
+ BNerr(BN_F_BN_EXPAND2,BN_R_EXPAND_ON_STATIC_BIGNUM_DATA);
+ return(NULL);
+ }
+ a=A=(BN_ULONG *)Malloc(sizeof(BN_ULONG)*(words+1));
+ if (A == NULL)
{
BNerr(BN_F_BN_EXPAND2,ERR_R_MALLOC_FAILURE);
return(NULL);
}
- b->d=p;
- memset(&(p[b->max]),0,((words+1)-b->max)*sizeof(BN_ULONG));
+memset(A,0x5c,sizeof(BN_ULONG)*(words+1));
+#if 1
+ B=b->d;
+ if (B != NULL)
+ {
+ for (i=b->top&(~7); i>0; i-=8)
+ {
+ A[0]=B[0]; A[1]=B[1]; A[2]=B[2]; A[3]=B[3];
+ A[4]=B[4]; A[5]=B[5]; A[6]=B[6]; A[7]=B[7];
+ A+=8;
+ B+=8;
+ }
+ switch (b->top&7)
+ {
+ case 7:
+ A[6]=B[6];
+ case 6:
+ A[5]=B[5];
+ case 5:
+ A[4]=B[4];
+ case 4:
+ A[3]=B[3];
+ case 3:
+ A[2]=B[2];
+ case 2:
+ A[1]=B[1];
+ case 1:
+ A[0]=B[0];
+ case 0:
+ /* I need the 'case 0' entry for utrix cc.
+ * If the optimiser is turned on, it does the
+ * switch table by doing
+ * a=top&7
+ * a--;
+ * goto jump_table[a];
+ * If top is 0, this makes us jump to 0xffffffc
+ * which is rather bad :-(.
+ * eric 23-Apr-1998
+ */
+ ;
+ }
+ B= &(b->d[b->top]);
+ j=b->max-8;
+ for (i=b->top; i<j; i+=8)
+ {
+ B[0]=0; B[1]=0; B[2]=0; B[3]=0;
+ B[4]=0; B[5]=0; B[6]=0; B[7]=0;
+ B+=8;
+ }
+ for (j+=8; i<j; i++)
+ {
+ B[0]=0;
+ B++;
+ }
+#else
+ memcpy(a->d,b->d,sizeof(b->d[0])*b->top);
+#endif
+
+/* memset(&(p[b->max]),0,((words+1)-b->max)*sizeof(BN_ULONG)); */
+/* { int i; for (i=b->max; i<words+1; i++) p[i]=i;} */
+ Free(b->d);
+ }
+
+ b->d=a;
b->max=words;
}
return(b);
@@ -305,6 +444,8 @@ BIGNUM *a;
{
BIGNUM *r;
+ bn_check_top(a);
+
r=BN_new();
if (r == NULL) return(NULL);
return((BIGNUM *)BN_copy(r,a));
@@ -317,6 +458,8 @@ BIGNUM *b;
int i;
BN_ULONG *A,*B;
+ bn_check_top(b);
+
if (a == b) return(a);
if (bn_wexpand(a,b->top) == NULL) return(NULL);
@@ -352,6 +495,18 @@ BIGNUM *b;
A[1]=B[1];
case 1:
A[0]=B[0];
+ case 0:
+ /* I need the 'case 0' entry for utrix cc.
+ * If the optimiser is turned on, it does the
+ * switch table by doing
+ * a=top&7
+ * a--;
+ * goto jump_table[a];
+ * If top is 0, this makes us jump to 0xffffffc which is
+ * rather bad :-(.
+ * eric 23-Apr-1998
+ */
+ ;
}
#else
memcpy(a->d,b->d,sizeof(b->d[0])*b->top);
@@ -359,7 +514,7 @@ BIGNUM *b;
/* memset(&(a->d[b->top]),0,sizeof(a->d[0])*(a->max-b->top));*/
a->top=b->top;
- if (a->top == 0)
+ if ((a->top == 0) && (a->d != NULL))
a->d[0]=0;
a->neg=b->neg;
return(a);
@@ -368,24 +523,21 @@ BIGNUM *b;
void BN_clear(a)
BIGNUM *a;
{
- memset(a->d,0,a->max*sizeof(a->d[0]));
+ if (a->d != NULL)
+ memset(a->d,0,a->max*sizeof(a->d[0]));
a->top=0;
a->neg=0;
}
-unsigned long BN_get_word(a)
+BN_ULONG BN_get_word(a)
BIGNUM *a;
{
int i,n;
- unsigned long ret=0;
+ BN_ULONG ret=0;
n=BN_num_bytes(a);
- if (n > sizeof(unsigned long))
-#ifdef SIXTY_FOUR_BIT_LONG
+ if (n > sizeof(BN_ULONG))
return(BN_MASK2);
-#else
- return(0xFFFFFFFFL);
-#endif
for (i=a->top-1; i>=0; i--)
{
#ifndef SIXTY_FOUR_BIT /* the data item > unsigned long */
@@ -399,12 +551,12 @@ BIGNUM *a;
int BN_set_word(a,w)
BIGNUM *a;
-unsigned long w;
+BN_ULONG w;
{
int i,n;
- if (bn_expand(a,sizeof(unsigned long)*8) == NULL) return(0);
+ if (bn_expand(a,sizeof(BN_ULONG)*8) == NULL) return(0);
- n=sizeof(unsigned long)/BN_BYTES;
+ n=sizeof(BN_ULONG)/BN_BYTES;
a->neg=0;
a->top=0;
a->d[0]=(BN_ULONG)w&BN_MASK2;
@@ -488,6 +640,9 @@ BIGNUM *b;
int i;
BN_ULONG t1,t2,*ap,*bp;
+ bn_check_top(a);
+ bn_check_top(b);
+
i=a->top-b->top;
if (i != 0) return(i);
ap=a->d;
@@ -519,6 +674,10 @@ BIGNUM *b;
else
return(0);
}
+
+ bn_check_top(a);
+ bn_check_top(b);
+
if (a->neg != b->neg)
{
if (a->neg)
@@ -545,13 +704,15 @@ int BN_set_bit(a, n)
BIGNUM *a;
int n;
{
- int i,j;
+ int i,j,k;
i=n/BN_BITS2;
j=n%BN_BITS2;
if (a->top <= i)
{
- if (bn_expand(a,n) == NULL) return(0);
+ if (bn_wexpand(a,i+1) == NULL) return(0);
+ for(k=a->top; k<i+1; k++)
+ a->d[k]=0;
a->top=i+1;
}
@@ -570,6 +731,7 @@ int n;
if (a->top <= i) return(0);
a->d[i]&=(~(1L<<j));
+ bn_fix_top(a);
return(1);
}
@@ -601,11 +763,27 @@ int n;
{
a->top=w+1;
a->d[w]&= ~(BN_MASK2<<b);
- while ((w >= 0) && (a->d[w] == 0))
- {
- a->top--;
- w--;
- }
}
+ bn_fix_top(a);
return(1);
}
+
+int bn_cmp_words(a,b,n)
+BN_ULONG *a,*b;
+int n;
+ {
+ int i;
+ BN_ULONG aa,bb;
+
+ aa=a[n-1];
+ bb=b[n-1];
+ if (aa != bb) return((aa > bb)?1:-1);
+ for (i=n-2; i>=0; i--)
+ {
+ aa=a[i];
+ bb=b[i];
+ if (aa != bb) return((aa > bb)?1:-1);
+ }
+ return(0);
+ }
+
diff --git a/crypto/bn/bn_mont.c b/crypto/bn/bn_mont.c
index e435df61f8..e0aa3c769d 100644
--- a/crypto/bn/bn_mont.c
+++ b/crypto/bn/bn_mont.c
@@ -60,161 +60,208 @@
#include "cryptlib.h"
#include "bn_lcl.h"
+#define MONT_WORD
+
int BN_mod_mul_montgomery(r,a,b,mont,ctx)
BIGNUM *r,*a,*b;
BN_MONT_CTX *mont;
BN_CTX *ctx;
{
- BIGNUM *tmp;
+ BIGNUM *tmp,*tmp2;
+
+ tmp= &(ctx->bn[ctx->tos]);
+ tmp2= &(ctx->bn[ctx->tos]);
+ ctx->tos+=2;
- tmp=ctx->bn[ctx->tos++];
+ bn_check_top(tmp);
+ bn_check_top(tmp2);
if (a == b)
{
+#if 0
+ bn_wexpand(tmp,a->top*2);
+ bn_wexpand(tmp2,a->top*4);
+ bn_sqr_recursive(tmp->d,a->d,a->top,tmp2->d);
+ tmp->top=a->top*2;
+ if (tmp->d[tmp->top-1] == 0)
+ tmp->top--;
+#else
if (!BN_sqr(tmp,a,ctx)) goto err;
+#endif
}
else
{
- if (!BN_mul(tmp,a,b)) goto err;
+ if (!BN_mul(tmp,a,b,ctx)) goto err;
}
/* reduce from aRR to aR */
if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err;
- ctx->tos--;
+ ctx->tos-=2;
return(1);
err:
return(0);
}
-#define MONT_WORD
-
-#ifdef MONT_WORD
int BN_from_montgomery(ret,a,mont,ctx)
BIGNUM *ret;
BIGNUM *a;
BN_MONT_CTX *mont;
BN_CTX *ctx;
{
- BIGNUM *n,*t1,*r;
- BN_ULONG *ap,*np,*rp,n0,v;
- int al,nl,max,i,x,ri;
- int retn=0;
+#ifdef BN_RECURSION
+ if (mont->use_word)
+#endif
+ {
+ BIGNUM *n,*r;
+ BN_ULONG *ap,*np,*rp,n0,v,*nrp;
+ int al,nl,max,i,x,ri;
+ int retn=0;
- t1=ctx->bn[ctx->tos];
- r=ctx->bn[ctx->tos+1];
+ r= &(ctx->bn[ctx->tos]);
- if (!BN_copy(r,a)) goto err;
- n=mont->N;
+ if (!BN_copy(r,a)) goto err1;
+ n= &(mont->N);
- ap=a->d;
- /* mont->ri is the size of mont->N in bits/words */
- al=ri=mont->ri/BN_BITS2;
+ ap=a->d;
+ /* mont->ri is the size of mont->N in bits/words */
+ al=ri=mont->ri/BN_BITS2;
- nl=n->top;
- if ((al == 0) || (nl == 0)) { r->top=0; return(1); }
+ nl=n->top;
+ if ((al == 0) || (nl == 0)) { r->top=0; return(1); }
- max=(nl+al+1); /* allow for overflow (no?) XXX */
- if (bn_wexpand(r,max) == NULL) goto err;
- if (bn_wexpand(ret,max) == NULL) goto err;
+ max=(nl+al+1); /* allow for overflow (no?) XXX */
+ if (bn_wexpand(r,max) == NULL) goto err1;
+ if (bn_wexpand(ret,max) == NULL) goto err1;
- r->neg=a->neg^n->neg;
- np=n->d;
- rp=r->d;
+ r->neg=a->neg^n->neg;
+ np=n->d;
+ rp=r->d;
+ nrp= &(r->d[nl]);
- /* clear the top words of T */
+ /* clear the top words of T */
#if 1
- for (i=r->top; i<max; i++) /* memset? XXX */
- r->d[i]=0;
+ for (i=r->top; i<max; i++) /* memset? XXX */
+ r->d[i]=0;
#else
- memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG));
+ memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG));
#endif
- r->top=max;
- n0=mont->n0;
+ r->top=max;
+ n0=mont->n0;
- for (i=0; i<nl; i++)
- {
-#if 0
- int x1,x2;
-
- if (i+4 > nl)
+#ifdef BN_COUNT
+printf("word BN_from_montgomery %d * %d\n",nl,nl);
+#endif
+ for (i=0; i<nl; i++)
{
- x2=nl;
- x1=0;
+ v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
+ nrp++;
+ rp++;
+ if (((nrp[-1]+=v)&BN_MASK2) >= v)
+ continue;
+ else
+ {
+ if (((++nrp[0])&BN_MASK2) != 0) continue;
+ if (((++nrp[1])&BN_MASK2) != 0) continue;
+ for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ;
+ }
}
+ bn_fix_top(r);
+
+ /* mont->ri will be a multiple of the word size */
+#if 0
+ BN_rshift(ret,r,mont->ri);
+#else
+ x=ri;
+ rp=ret->d;
+ ap= &(r->d[x]);
+ if (r->top < x)
+ al=0;
else
+ al=r->top-x;
+ ret->top=al;
+ al-=4;
+ for (i=0; i<al; i+=4)
{
- x2=i+4;
- x1=nl-x2;
+ BN_ULONG t1,t2,t3,t4;
+
+ t1=ap[i+0];
+ t2=ap[i+1];
+ t3=ap[i+2];
+ t4=ap[i+3];
+ rp[i+0]=t1;
+ rp[i+1]=t2;
+ rp[i+2]=t3;
+ rp[i+3]=t4;
}
- v=bn_mul_add_words(&(rp[x1]),&(np[x1]),x2,(rp[x1]*n0)&BN_MASK2);
-#else
- v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
+ al+=4;
+ for (; i<al; i++)
+ rp[i]=ap[i];
#endif
- if (((rp[nl]+=v)&BN_MASK2) < v)
+ if (BN_ucmp(ret, &(mont->N)) >= 0)
{
- for (x=(nl+1); (((++rp[x])&BN_MASK2) == 0); x++)
- ;
+ BN_usub(ret,ret,&(mont->N)); /* XXX */
}
- rp++;
+ retn=1;
+err1:
+ return(retn);
}
- while (r->d[r->top-1] == 0)
- r->top--;
-
- /* mont->ri will be a multiple of the word size */
-#if 0
- BN_rshift(ret,r,mont->ri);
-#else
- ap=r->d;
- rp=ret->d;
- x=ri;
- al=r->top-x;
- for (i=0; i<al; i++)
+#ifdef BN_RECURSION
+ else /* bignum version */
{
- rp[i]=ap[i+x];
- }
- ret->top=al;
+ BIGNUM *t1,*t2,*t3;
+ int j,i;
+
+#ifdef BN_COUNT
+printf("number BN_from_montgomery\n");
#endif
- if (BN_ucmp(ret,mont->N) >= 0)
- {
- bn_qsub(ret,ret,mont->N); /* XXX */
- }
- retn=1;
-err:
- return(retn);
- }
-#else
-int BN_from_montgomery(r,a,mont,ctx)
-BIGNUM *r;
-BIGNUM *a;
-BN_MONT_CTX *mont;
-BN_CTX *ctx;
- {
- BIGNUM *t1,*t2;
+ t1= &(ctx->bn[ctx->tos]);
+ t2= &(ctx->bn[ctx->tos+1]);
+ t3= &(ctx->bn[ctx->tos+2]);
- t1=ctx->bn[ctx->tos];
- t2=ctx->bn[ctx->tos+1];
+ i=mont->Ni.top;
+ bn_wexpand(ret,i); /* perhaps only i*2 */
+ bn_wexpand(t1,i*4); /* perhaps only i*2 */
+ bn_wexpand(t2,i*2); /* perhaps only i */
- if (!BN_copy(t1,a)) goto err;
- /* can cheat */
- BN_mask_bits(t1,mont->ri);
+ bn_mul_low_recursive(t2->d,a->d,mont->Ni.d,i,t1->d);
- if (!BN_mul(t2,t1,mont->Ni)) goto err;
- BN_mask_bits(t2,mont->ri);
+ BN_zero(t3);
+ BN_set_bit(t3,mont->N.top*BN_BITS2);
+ bn_sub_words(t3->d,t3->d,a->d,i);
+ bn_mul_high(ret->d,t2->d,mont->N.d,t3->d,i,t1->d);
- if (!BN_mul(t1,t2,mont->N)) goto err;
- if (!BN_add(t2,a,t1)) goto err;
- BN_rshift(r,t2,mont->ri);
+ /* hmm... if a is between i and 2*i, things are bad */
+ if (a->top > i)
+ {
+ j=bn_add_words(ret->d,ret->d,&(a->d[i]),i);
+ if (j) /* overflow */
+ bn_sub_words(ret->d,ret->d,mont->N.d,i);
+ }
+ ret->top=i;
+ bn_fix_top(ret);
+ if (a->d[0])
+ BN_add_word(ret,1); /* Always? */
+ else /* Very very rare */
+ {
+ for (i=1; i<mont->N.top-1; i++)
+ {
+ if (a->d[i])
+ {
+ BN_add_word(ret,1); /* Always? */
+ break;
+ }
+ }
+ }
- if (BN_ucmp(r,mont->N) >= 0)
- bn_qsub(r,r,mont->N);
+ if (BN_ucmp(ret,&(mont->N)) >= 0)
+ BN_usub(ret,ret,&(mont->N));
- return(1);
-err:
- return(0);
- }
+ return(1);
+ }
#endif
+ }
BN_MONT_CTX *BN_MONT_CTX_new()
{
@@ -222,25 +269,31 @@ BN_MONT_CTX *BN_MONT_CTX_new()
if ((ret=(BN_MONT_CTX *)Malloc(sizeof(BN_MONT_CTX))) == NULL)
return(NULL);
- ret->ri=0;
- ret->RR=BN_new();
- ret->N=BN_new();
- ret->Ni=NULL;
- if ((ret->RR == NULL) || (ret->N == NULL))
- {
- BN_MONT_CTX_free(ret);
- return(NULL);
- }
+
+ BN_MONT_CTX_init(ret);
+ ret->flags=BN_FLG_MALLOCED;
return(ret);
}
+void BN_MONT_CTX_init(ctx)
+BN_MONT_CTX *ctx;
+ {
+ ctx->use_word=0;
+ ctx->ri=0;
+ BN_init(&(ctx->RR));
+ BN_init(&(ctx->N));
+ BN_init(&(ctx->Ni));
+ ctx->flags=0;
+ }
+
void BN_MONT_CTX_free(mont)
BN_MONT_CTX *mont;
{
- if (mont->RR != NULL) BN_free(mont->RR);
- if (mont->N != NULL) BN_free(mont->N);
- if (mont->Ni != NULL) BN_free(mont->Ni);
- Free(mont);
+ BN_free(&(mont->RR));
+ BN_free(&(mont->N));
+ BN_free(&(mont->Ni));
+ if (mont->flags & BN_FLG_MALLOCED)
+ Free(mont);
}
int BN_MONT_CTX_set(mont,mod,ctx)
@@ -248,59 +301,109 @@ BN_MONT_CTX *mont;
BIGNUM *mod;
BN_CTX *ctx;
{
- BIGNUM *Ri=NULL,*R=NULL;
-
- if (mont->RR == NULL) mont->RR=BN_new();
- if (mont->N == NULL) mont->N=BN_new();
-
- R=mont->RR; /* grab RR as a temp */
- BN_copy(mont->N,mod); /* Set N */
-
-#ifdef MONT_WORD
-{
- BIGNUM tmod;
- BN_ULONG buf[2];
- /* int z; */
-
- mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
- BN_lshift(R,BN_value_one(),BN_BITS2); /* R */
- /* I was bad, this modification of a passed variable was
- * breaking the multithreaded stuff :-(
- * z=mod->top;
- * mod->top=1; */
-
- buf[0]=mod->d[0];
- buf[1]=0;
- tmod.d=buf;
- tmod.top=1;
- tmod.max=mod->max;
- tmod.neg=mod->neg;
-
- if ((Ri=BN_mod_inverse(R,&tmod,ctx)) == NULL) goto err; /* Ri */
- BN_lshift(Ri,Ri,BN_BITS2); /* R*Ri */
- bn_qsub(Ri,Ri,BN_value_one()); /* R*Ri - 1 */
- BN_div(Ri,NULL,Ri,&tmod,ctx);
- mont->n0=Ri->d[0];
- BN_free(Ri);
- /* mod->top=z; */
-}
+ BIGNUM Ri,*R;
+
+ BN_init(&Ri);
+ R= &(mont->RR); /* grab RR as a temp */
+ BN_copy(&(mont->N),mod); /* Set N */
+
+#ifdef BN_RECURSION
+ if (mont->N.top < BN_MONT_CTX_SET_SIZE_WORD)
+#endif
+ {
+ BIGNUM tmod;
+ BN_ULONG buf[2];
+
+ mont->use_word=1;
+
+ mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
+ BN_zero(R);
+ BN_set_bit(R,BN_BITS2);
+ /* I was bad, this modification of a passed variable was
+ * breaking the multithreaded stuff :-(
+ * z=mod->top;
+ * mod->top=1; */
+
+ buf[0]=mod->d[0];
+ buf[1]=0;
+ tmod.d=buf;
+ tmod.top=1;
+ tmod.max=mod->max;
+ tmod.neg=mod->neg;
+
+ if ((BN_mod_inverse(&Ri,R,&tmod,ctx)) == NULL)
+ goto err;
+ BN_lshift(&Ri,&Ri,BN_BITS2); /* R*Ri */
+ if (!BN_is_zero(&Ri))
+ {
+#if 1
+ BN_sub_word(&Ri,1);
+#else
+ BN_usub(&Ri,&Ri,BN_value_one()); /* R*Ri - 1 */
+#endif
+ }
+ else
+ {
+ /* This is not common..., 1 in BN_MASK2,
+ * It happens when buf[0] was == 1. So for 8 bit,
+ * this is 1/256, 16bit, 1 in 2^16 etc.
+ */
+ BN_set_word(&Ri,BN_MASK2);
+ }
+ BN_div(&Ri,NULL,&Ri,&tmod,ctx);
+ mont->n0=Ri.d[0];
+ BN_free(&Ri);
+ /* mod->top=z; */
+ }
+#ifdef BN_RECURSION
+ else
+ {
+ mont->use_word=0;
+ mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
+#if 1
+ BN_zero(R);
+ BN_set_bit(R,mont->ri);
#else
- mont->ri=BN_num_bits(mod);
- BN_lshift(R,BN_value_one(),mont->ri); /* R */
- if ((Ri=BN_mod_inverse(R,mod,ctx)) == NULL) goto err; /* Ri */
- BN_lshift(Ri,Ri,mont->ri); /* R*Ri */
- bn_qsub(Ri,Ri,BN_value_one()); /* R*Ri - 1 */
- BN_div(Ri,NULL,Ri,mod,ctx);
- if (mont->Ni != NULL) BN_free(mont->Ni);
- mont->Ni=Ri; /* Ni=(R*Ri-1)/N */
+ BN_lshift(R,BN_value_one(),mont->ri); /* R */
+#endif
+ if ((BN_mod_inverse(&Ri,R,mod,ctx)) == NULL)
+ goto err;
+ BN_lshift(&Ri,&Ri,mont->ri); /* R*Ri */
+#if 1
+ BN_sub_word(&Ri,1);
+#else
+ BN_usub(&Ri,&Ri,BN_value_one()); /* R*Ri - 1 */
+#endif
+ BN_div(&(mont->Ni),NULL,&Ri,mod,ctx);
+ BN_free(&Ri);
+ }
#endif
/* setup RR for conversions */
+#if 1
+ BN_zero(&(mont->RR));
+ BN_set_bit(&(mont->RR),mont->ri*2);
+#else
BN_lshift(mont->RR,BN_value_one(),mont->ri*2);
- BN_mod(mont->RR,mont->RR,mont->N,ctx);
+#endif
+ BN_mod(&(mont->RR),&(mont->RR),&(mont->N),ctx);
return(1);
err:
return(0);
}
+BN_MONT_CTX *BN_MONT_CTX_copy(to, from)
+BN_MONT_CTX *to, *from;
+ {
+ if (to == from) return(to);
+
+ BN_copy(&(to->RR),&(from->RR));
+ BN_copy(&(to->N),&(from->N));
+ BN_copy(&(to->Ni),&(from->Ni));
+ to->use_word=from->use_word;
+ to->ri=from->ri;
+ to->n0=from->n0;
+ return(to);
+ }
+
diff --git a/crypto/bn/bn_mpi.c b/crypto/bn/bn_mpi.c
index 53945c1057..84b0317081 100644
--- a/crypto/bn/bn_mpi.c
+++ b/crypto/bn/bn_mpi.c
@@ -103,7 +103,7 @@ BIGNUM *a;
BNerr(BN_F_BN_MPI2BN,BN_R_INVALID_LENGTH);
return(NULL);
}
- len=(d[0]<<24)|(d[1]<<16)|(d[2]<<8)|d[3];
+ len=((long)d[0]<<24)|((long)d[1]<<16)|((int)d[2]<<8)|(int)d[3];
if ((len+4) != n)
{
BNerr(BN_F_BN_MPI2BN,BN_R_ENCODING_ERROR);
diff --git a/crypto/bn/bn_mul.c b/crypto/bn/bn_mul.c
index d0c04e1d4b..fc7bf974fd 100644
--- a/crypto/bn/bn_mul.c
+++ b/crypto/bn/bn_mul.c
@@ -60,150 +60,703 @@
#include "cryptlib.h"
#include "bn_lcl.h"
-/* r must be different to a and b */
-/* int BN_mmul(r, a, b) */
-int BN_mul(r, a, b)
-BIGNUM *r;
-BIGNUM *a;
-BIGNUM *b;
+#ifdef BN_RECURSION
+/* r is 2*n2 words in size,
+ * a and b are both n2 words in size.
+ * n2 must be a power of 2.
+ * We multiply and return the result.
+ * t must be 2*n2 words in size
+ * We calulate
+ * a[0]*b[0]
+ * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
+ * a[1]*b[1]
+ */
+void bn_mul_recursive(r,a,b,n2,t)
+BN_ULONG *r,*a,*b;
+int n2;
+BN_ULONG *t;
{
- int i;
- int max,al,bl;
- BN_ULONG *ap,*bp,*rp;
+ int n=n2/2,c1,c2;
+ unsigned int neg,zero;
+ BN_ULONG ln,lo,*p;
- al=a->top;
- bl=b->top;
- if ((al == 0) || (bl == 0))
+#ifdef BN_COUNT
+printf(" bn_mul_recursive %d * %d\n",n2,n2);
+#endif
+#ifdef BN_MUL_COMBA
+/* if (n2 == 4)
{
- r->top=0;
- return(1);
+ bn_mul_comba4(r,a,b);
+ return;
+ }
+ else */ if (n2 == 8)
+ {
+ bn_mul_comba8(r,a,b);
+ return;
+ }
+#endif
+ if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
+ {
+ /* This should not happen */
+ bn_mul_normal(r,a,n2,b,n2);
+ return;
+ }
+ /* r=(a[0]-a[1])*(b[1]-b[0]) */
+ c1=bn_cmp_words(a,&(a[n]),n);
+ c2=bn_cmp_words(&(b[n]),b,n);
+ zero=neg=0;
+ switch (c1*3+c2)
+ {
+ case -4:
+ bn_sub_words(t, &(a[n]),a, n); /* - */
+ bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */
+ break;
+ case -3:
+ zero=1;
+ break;
+ case -2:
+ bn_sub_words(t, &(a[n]),a, n); /* - */
+ bn_sub_words(&(t[n]),&(b[n]),b, n); /* + */
+ neg=1;
+ break;
+ case -1:
+ case 0:
+ case 1:
+ zero=1;
+ break;
+ case 2:
+ bn_sub_words(t, a, &(a[n]),n); /* + */
+ bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */
+ neg=1;
+ break;
+ case 3:
+ zero=1;
+ break;
+ case 4:
+ bn_sub_words(t, a, &(a[n]),n);
+ bn_sub_words(&(t[n]),&(b[n]),b, n);
+ break;
}
- max=(al+bl);
- if (bn_wexpand(r,max) == NULL) return(0);
- r->top=max;
- r->neg=a->neg^b->neg;
- ap=a->d;
- bp=b->d;
- rp=r->d;
+#ifdef BN_MUL_COMBA
+ if (n == 4)
+ {
+ if (!zero)
+ bn_mul_comba4(&(t[n2]),t,&(t[n]));
+ else
+ memset(&(t[n2]),0,8*sizeof(BN_ULONG));
+
+ bn_mul_comba4(r,a,b);
+ bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n]));
+ }
+ else if (n == 8)
+ {
+ if (!zero)
+ bn_mul_comba8(&(t[n2]),t,&(t[n]));
+ else
+ memset(&(t[n2]),0,16*sizeof(BN_ULONG));
+
+ bn_mul_comba8(r,a,b);
+ bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n]));
+ }
+ else
+#endif
+ {
+ p= &(t[n2*2]);
+ if (!zero)
+ bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p);
+ else
+ memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
+ bn_mul_recursive(r,a,b,n,p);
+ bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,p);
+ }
- rp[al]=bn_mul_words(rp,ap,al,*(bp++));
- rp++;
- for (i=1; i<bl; i++)
+ /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
+ * r[10] holds (a[0]*b[0])
+ * r[32] holds (b[1]*b[1])
+ */
+
+ c1=bn_add_words(t,r,&(r[n2]),n2);
+
+ if (neg) /* if t[32] is negative */
{
- rp[al]=bn_mul_add_words(rp,ap,al,*(bp++));
- rp++;
+ c1-=bn_sub_words(&(t[n2]),t,&(t[n2]),n2);
+ }
+ else
+ {
+ /* Might have a carry */
+ c1+=bn_add_words(&(t[n2]),&(t[n2]),t,n2);
}
- if (r->d[max-1] == 0) r->top--;
- return(1);
- }
-#if 0
-#include "stack.h"
+ /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
+ * r[10] holds (a[0]*b[0])
+ * r[32] holds (b[1]*b[1])
+ * c1 holds the carry bits
+ */
+ c1+=bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2);
+ if (c1)
+ {
+ p= &(r[n+n2]);
+ lo= *p;
+ ln=(lo+c1)&BN_MASK2;
+ *p=ln;
-int limit=16;
+ /* The overflow will stop before we over write
+ * words we should not overwrite */
+ if (ln < (BN_ULONG)c1)
+ {
+ do {
+ p++;
+ lo= *p;
+ ln=(lo+1)&BN_MASK2;
+ *p=ln;
+ } while (ln == 0);
+ }
+ }
+ }
-typedef struct bn_pool_st
+/* n+tn is the word length
+ * t needs to be n*4 is size, as does r */
+void bn_mul_part_recursive(r,a,b,tn,n,t)
+BN_ULONG *r,*a,*b;
+int tn,n;
+BN_ULONG *t;
{
- int used;
- int tos;
- STACK *sk;
- } BN_POOL;
+ int i,j,n2=n*2;
+ unsigned int c1;
+ BN_ULONG ln,lo,*p;
-BIGNUM *BN_POOL_push(bp)
-BN_POOL *bp;
- {
- BIGNUM *ret;
+#ifdef BN_COUNT
+printf(" bn_mul_part_recursive %d * %d\n",tn+n,tn+n);
+#endif
+ if (n < 8)
+ {
+ i=tn+n;
+ bn_mul_normal(r,a,i,b,i);
+ return;
+ }
+
+ /* r=(a[0]-a[1])*(b[1]-b[0]) */
+ bn_sub_words(t, a, &(a[n]),n); /* + */
+ bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */
- if (bp->used >= bp->tos)
+/* if (n == 4)
+ {
+ bn_mul_comba4(&(t[n2]),t,&(t[n]));
+ bn_mul_comba4(r,a,b);
+ bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
+ memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
+ }
+ else */ if (n == 8)
{
- ret=BN_new();
- sk_push(bp->sk,(char *)ret);
- bp->tos++;
- bp->used++;
+ bn_mul_comba8(&(t[n2]),t,&(t[n]));
+ bn_mul_comba8(r,a,b);
+ bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
+ memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
}
else
{
- ret=(BIGNUM *)sk_value(bp->sk,bp->used);
- bp->used++;
+ p= &(t[n2*2]);
+ bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p);
+ bn_mul_recursive(r,a,b,n,p);
+ i=n/2;
+ /* If there is only a bottom half to the number,
+ * just do it */
+ j=tn-i;
+ if (j == 0)
+ {
+ bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),i,p);
+ memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
+ }
+ else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
+ {
+ bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
+ j,i,p);
+ memset(&(r[n2+tn*2]),0,
+ sizeof(BN_ULONG)*(n2-tn*2));
+ }
+ else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
+ {
+ memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
+ if (tn < BN_MUL_RECURSIVE_SIZE_NORMAL)
+ {
+ bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
+ }
+ else
+ {
+ for (;;)
+ {
+ i/=2;
+ if (i < tn)
+ {
+ bn_mul_part_recursive(&(r[n2]),
+ &(a[n]),&(b[n]),
+ tn-i,i,p);
+ break;
+ }
+ else if (i == tn)
+ {
+ bn_mul_recursive(&(r[n2]),
+ &(a[n]),&(b[n]),
+ i,p);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
+ * r[10] holds (a[0]*b[0])
+ * r[32] holds (b[1]*b[1])
+ */
+
+ c1=bn_add_words(t,r,&(r[n2]),n2);
+ c1-=bn_sub_words(&(t[n2]),t,&(t[n2]),n2);
+
+ /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
+ * r[10] holds (a[0]*b[0])
+ * r[32] holds (b[1]*b[1])
+ * c1 holds the carry bits
+ */
+ c1+=bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2);
+ if (c1)
+ {
+ p= &(r[n+n2]);
+ lo= *p;
+ ln=(lo+c1)&BN_MASK2;
+ *p=ln;
+
+ /* The overflow will stop before we over write
+ * words we should not overwrite */
+ if (ln < c1)
+ {
+ do {
+ p++;
+ lo= *p;
+ ln=(lo+1)&BN_MASK2;
+ *p=ln;
+ } while (ln == 0);
+ }
}
- return(ret);
}
-void BN_POOL_pop(bp,num)
-BN_POOL *bp;
-int num;
+/* a and b must be the same size, which is n2.
+ * r needs to be n2 words and t needs to be n2*2
+ */
+void bn_mul_low_recursive(r,a,b,n2,t)
+BN_ULONG *r,*a,*b;
+int n2;
+BN_ULONG *t;
{
- bp->used-=num;
+ int n=n2/2;
+
+#ifdef BN_COUNT
+printf(" bn_mul_low_recursive %d * %d\n",n2,n2);
+#endif
+
+ bn_mul_recursive(r,a,b,n,&(t[0]));
+ if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL)
+ {
+ bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2]));
+ bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
+ bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2]));
+ bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
+ }
+ else
+ {
+ bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n);
+ bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n);
+ bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
+ bn_add_words(&(r[n]),&(r[n]),&(t[n]),n);
+ }
}
-int BN_mul(r,a,b)
-BIGNUM *r,*a,*b;
+/* a and b must be the same size, which is n2.
+ * r needs to be n2 words and t needs to be n2*2
+ * l is the low words of the output.
+ * t needs to be n2*3
+ */
+void bn_mul_high(r,a,b,l,n2,t)
+BN_ULONG *r,*a,*b,*l;
+int n2;
+BN_ULONG *t;
{
- static BN_POOL bp;
- static init=1;
+ int i,n;
+ int c1,c2;
+ int neg,oneg,zero;
+ BN_ULONG ll,lc,*lp,*mp;
+
+#ifdef BN_COUNT
+printf(" bn_mul_high %d * %d\n",n2,n2);
+#endif
+ n=(n2+1)/2;
+
+ /* Calculate (al-ah)*(bh-bl) */
+ neg=zero=0;
+ c1=bn_cmp_words(&(a[0]),&(a[n]),n);
+ c2=bn_cmp_words(&(b[n]),&(b[0]),n);
+ switch (c1*3+c2)
+ {
+ case -4:
+ bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
+ bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
+ break;
+ case -3:
+ zero=1;
+ break;
+ case -2:
+ bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
+ bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
+ neg=1;
+ break;
+ case -1:
+ case 0:
+ case 1:
+ zero=1;
+ break;
+ case 2:
+ bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
+ bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
+ neg=1;
+ break;
+ case 3:
+ zero=1;
+ break;
+ case 4:
+ bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
+ bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
+ break;
+ }
+
+ oneg=neg;
+ /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
+ /* r[10] = (a[1]*b[1]) */
+#ifdef BN_MUL_COMBA
+ if (n == 8)
+ {
+ bn_mul_comba8(&(t[0]),&(r[0]),&(r[n]));
+ bn_mul_comba8(r,&(a[n]),&(b[n]));
+ }
+ else
+#endif
+ {
+ bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,&(t[n2]));
+ bn_mul_recursive(r,&(a[n]),&(b[n]),n,&(t[n2]));
+ }
- if (init)
+ /* s0 == low(al*bl)
+ * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
+ * We know s0 and s1 so the only unknown is high(al*bl)
+ * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
+ * high(al*bl) == s1 - (r[0]+l[0]+t[0])
+ */
+ if (l != NULL)
{
- bp.used=0;
- bp.tos=0;
- bp.sk=sk_new_null();
- init=0;
+ lp= &(t[n2+n]);
+ c1=bn_add_words(lp,&(r[0]),&(l[0]),n);
+ }
+ else
+ {
+ c1=0;
+ lp= &(r[0]);
+ }
+
+ if (neg)
+ neg=bn_sub_words(&(t[n2]),lp,&(t[0]),n);
+ else
+ {
+ bn_add_words(&(t[n2]),lp,&(t[0]),n);
+ neg=0;
+ }
+
+ if (l != NULL)
+ {
+ bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n);
+ }
+ else
+ {
+ lp= &(t[n2+n]);
+ mp= &(t[n2]);
+ for (i=0; i<n; i++)
+ lp[i]=((~mp[i])+1)&BN_MASK2;
+ }
+
+ /* s[0] = low(al*bl)
+ * t[3] = high(al*bl)
+ * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
+ * r[10] = (a[1]*b[1])
+ */
+ /* R[10] = al*bl
+ * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
+ * R[32] = ah*bh
+ */
+ /* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
+ * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
+ * R[3]=r[1]+(carry/borrow)
+ */
+ if (l != NULL)
+ {
+ lp= &(t[n2]);
+ c1= bn_add_words(lp,&(t[n2+n]),&(l[0]),n);
+ }
+ else
+ {
+ lp= &(t[n2+n]);
+ c1=0;
+ }
+ c1+=bn_add_words(&(t[n2]),lp, &(r[0]),n);
+ if (oneg)
+ c1-=bn_sub_words(&(t[n2]),&(t[n2]),&(t[0]),n);
+ else
+ c1+=bn_add_words(&(t[n2]),&(t[n2]),&(t[0]),n);
+
+ c2 =bn_add_words(&(r[0]),&(r[0]),&(t[n2+n]),n);
+ c2+=bn_add_words(&(r[0]),&(r[0]),&(r[n]),n);
+ if (oneg)
+ c2-=bn_sub_words(&(r[0]),&(r[0]),&(t[n]),n);
+ else
+ c2+=bn_add_words(&(r[0]),&(r[0]),&(t[n]),n);
+
+ if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
+ {
+ i=0;
+ if (c1 > 0)
+ {
+ lc=c1;
+ do {
+ ll=(r[i]+lc)&BN_MASK2;
+ r[i++]=ll;
+ lc=(lc > ll);
+ } while (lc);
+ }
+ else
+ {
+ lc= -c1;
+ do {
+ ll=r[i];
+ r[i++]=(ll-lc)&BN_MASK2;
+ lc=(lc > ll);
+ } while (lc);
+ }
+ }
+ if (c2 != 0) /* Add starting at r[1] */
+ {
+ i=n;
+ if (c2 > 0)
+ {
+ lc=c2;
+ do {
+ ll=(r[i]+lc)&BN_MASK2;
+ r[i++]=ll;
+ lc=(lc > ll);
+ } while (lc);
+ }
+ else
+ {
+ lc= -c2;
+ do {
+ ll=r[i];
+ r[i++]=(ll-lc)&BN_MASK2;
+ lc=(lc > ll);
+ } while (lc);
+ }
}
- return(BN_mm(r,a,b,&bp));
}
+#endif
-/* r must be different to a and b */
-int BN_mm(m, A, B, bp)
-BIGNUM *m,*A,*B;
-BN_POOL *bp;
+int BN_mul(r,a,b,ctx)
+BIGNUM *r,*a,*b;
+BN_CTX *ctx;
{
- int i,num;
- int an,bn;
- BIGNUM *a,*b,*c,*d,*ac,*bd;
+ int top,i,j,k,al,bl;
+ BIGNUM *t;
+
+ t=NULL;
+ i=j=k=0;
+
+#ifdef BN_COUNT
+printf("BN_mul %d * %d\n",a->top,b->top);
+#endif
+
+ bn_check_top(a);
+ bn_check_top(b);
+ bn_check_top(r);
- an=A->top;
- bn=B->top;
- if ((an <= limit) || (bn <= limit))
+ al=a->top;
+ bl=b->top;
+ r->neg=a->neg^b->neg;
+
+ if ((al == 0) || (bl == 0))
{
- return(BN_mmul(m,A,B));
+ BN_zero(r);
+ return(1);
}
+ top=al+bl;
+#if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
+ if (al == bl)
+ {
+# ifdef BN_MUL_COMBA
+/* if (al == 4)
+ {
+ if (bn_wexpand(r,8) == NULL) return(0);
+ r->top=8;
+ bn_mul_comba4(r->d,a->d,b->d);
+ goto end;
+ }
+ else */ if (al == 8)
+ {
+ if (bn_wexpand(r,16) == NULL) return(0);
+ r->top=16;
+ bn_mul_comba8(r->d,a->d,b->d);
+ goto end;
+ }
+ else
+# endif
+#ifdef BN_RECURSION
+ if (al < BN_MULL_SIZE_NORMAL)
+#endif
+ {
+ if (bn_wexpand(r,top) == NULL) return(0);
+ r->top=top;
+ bn_mul_normal(r->d,a->d,al,b->d,bl);
+ goto end;
+ }
+# ifdef BN_RECURSION
+ goto symetric;
+# endif
+ }
+#endif
+#ifdef BN_RECURSION
+ else if ((al < BN_MULL_SIZE_NORMAL) || (bl < BN_MULL_SIZE_NORMAL))
+ {
+ if (bn_wexpand(r,top) == NULL) return(0);
+ r->top=top;
+ bn_mul_normal(r->d,a->d,al,b->d,bl);
+ goto end;
+ }
+ else
+ {
+ i=(al-bl);
+ if ((i == 1) && !BN_get_flags(b,BN_FLG_STATIC_DATA))
+ {
+ bn_wexpand(b,al);
+ b->d[bl]=0;
+ bl++;
+ goto symetric;
+ }
+ else if ((i == -1) && !BN_get_flags(a,BN_FLG_STATIC_DATA))
+ {
+ bn_wexpand(a,bl);
+ a->d[al]=0;
+ al++;
+ goto symetric;
+ }
+ }
+#endif
- a=BN_POOL_push(bp);
- b=BN_POOL_push(bp);
- c=BN_POOL_push(bp);
- d=BN_POOL_push(bp);
- ac=BN_POOL_push(bp);
- bd=BN_POOL_push(bp);
+ /* asymetric and >= 4 */
+ if (bn_wexpand(r,top) == NULL) return(0);
+ r->top=top;
+ bn_mul_normal(r->d,a->d,al,b->d,bl);
- num=(an <= bn)?an:bn;
- num=1<<(BN_num_bits_word(num-1)-1);
+#ifdef BN_RECURSION
+ if (0)
+ {
+symetric:
+ /* symetric and > 4 */
+ /* 16 or larger */
+ j=BN_num_bits_word((BN_ULONG)al);
+ j=1<<(j-1);
+ k=j+j;
+ t= &(ctx->bn[ctx->tos]);
+ if (al == j) /* exact multiple */
+ {
+ bn_wexpand(t,k*2);
+ bn_wexpand(r,k*2);
+ bn_mul_recursive(r->d,a->d,b->d,al,t->d);
+ }
+ else
+ {
+ bn_wexpand(a,k);
+ bn_wexpand(b,k);
+ bn_wexpand(t,k*4);
+ bn_wexpand(r,k*4);
+ for (i=a->top; i<k; i++)
+ a->d[i]=0;
+ for (i=b->top; i<k; i++)
+ b->d[i]=0;
+ bn_mul_part_recursive(r->d,a->d,b->d,al-j,j,t->d);
+ }
+ r->top=top;
+ }
+#endif
+end:
+ bn_fix_top(r);
+ return(1);
+ }
- /* Are going to now chop things into 'num' word chunks. */
- num*=BN_BITS2;
+void bn_mul_normal(r,a,na,b,nb)
+BN_ULONG *r,*a;
+int na;
+BN_ULONG *b;
+int nb;
+ {
+ BN_ULONG *rr;
- BN_copy(a,A);
- BN_mask_bits(a,num);
- BN_rshift(b,A,num);
+#ifdef BN_COUNT
+printf(" bn_mul_normal %d * %d\n",na,nb);
+#endif
- BN_copy(c,B);
- BN_mask_bits(c,num);
- BN_rshift(d,B,num);
+ if (na < nb)
+ {
+ int itmp;
+ BN_ULONG *ltmp;
- BN_sub(ac ,b,a);
- BN_sub(bd,c,d);
- BN_mm(m,ac,bd,bp);
- BN_mm(ac,a,c,bp);
- BN_mm(bd,b,d,bp);
+ itmp=na; na=nb; nb=itmp;
+ ltmp=a; a=b; b=ltmp;
- BN_add(m,m,ac);
- BN_add(m,m,bd);
- BN_lshift(m,m,num);
- BN_lshift(bd,bd,num*2);
+ }
+ rr= &(r[na]);
+ rr[0]=bn_mul_words(r,a,na,b[0]);
- BN_add(m,m,ac);
- BN_add(m,m,bd);
- BN_POOL_pop(bp,6);
- return(1);
+ for (;;)
+ {
+ if (--nb <= 0) return;
+ rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]);
+ if (--nb <= 0) return;
+ rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]);
+ if (--nb <= 0) return;
+ rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]);
+ if (--nb <= 0) return;
+ rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]);
+ rr+=4;
+ r+=4;
+ b+=4;
+ }
}
+
+void bn_mul_low_normal(r,a,b,n)
+BN_ULONG *r,*a,*b;
+int n;
+ {
+#ifdef BN_COUNT
+printf(" bn_mul_low_normal %d * %d\n",n,n);
#endif
+ bn_mul_words(r,a,n,b[0]);
+
+ for (;;)
+ {
+ if (--n <= 0) return;
+ bn_mul_add_words(&(r[1]),a,n,b[1]);
+ if (--n <= 0) return;
+ bn_mul_add_words(&(r[2]),a,n,b[2]);
+ if (--n <= 0) return;
+ bn_mul_add_words(&(r[3]),a,n,b[3]);
+ if (--n <= 0) return;
+ bn_mul_add_words(&(r[4]),a,n,b[4]);
+ r+=4;
+ b+=4;
+ }
+ }
+
diff --git a/crypto/bn/bn_opts.c b/crypto/bn/bn_opts.c
new file mode 100644
index 0000000000..86a03e2423
--- /dev/null
+++ b/crypto/bn/bn_opts.c
@@ -0,0 +1,342 @@
+/* crypto/bn/expspeed.c */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+/* most of this code has been pilfered from my libdes speed.c program */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include "crypto.h"
+#include "tmdiff.h"
+#include "bn.h"
+#include "err.h"
+
+#define DEFAULT_SIZE 512
+#define DEFAULT_TIME 3
+
+int verbose=1;
+
+typedef struct parms_st
+ {
+ char *name;
+ void (*func)();
+ BIGNUM r;
+ BIGNUM a;
+ BIGNUM b;
+ BIGNUM c;
+ BIGNUM low;
+ BN_CTX *ctx;
+ BN_MONT_CTX *mont;
+ int w;
+ } PARMS;
+
+void do_mul_exp(int num,PARMS *p);
+void do_mul(int num,PARMS *p);
+void do_sqr(int num,PARMS *p);
+void do_mul_low(int num,PARMS *p);
+void do_mul_high(int num,PARMS *p);
+void do_from_montgomery(int num,PARMS *p);
+int time_it(int sec, PARMS *p);
+void do_it(int sec, PARMS *p);
+
+#define P_EXP 1
+#define P_MUL 2
+#define P_SQR 3
+#define P_MULL 4
+#define P_MULH 5
+#define P_MRED 6
+
+int main(argc,argv)
+int argc;
+char **argv;
+ {
+ PARMS p;
+ BN_MONT_CTX *mont;
+ int size=0,num;
+ char *name;
+ int type=P_EXP;
+
+ mont=BN_MONT_CTX_new();
+ p.mont=NULL;
+ p.ctx=BN_CTX_new();
+ BN_init(&p.r);
+ BN_init(&p.a);
+ BN_init(&p.b);
+ BN_init(&p.c);
+ BN_init(&p.low);
+ p.w=0;
+
+ for (;;)
+ {
+ if (argc > 1)
+ {
+ if (argv[1][0] == '-')
+ {
+ switch(argv[1][1])
+ {
+ case 'e': type=P_EXP; break;
+ case 'm': type=P_MUL; break;
+ case 's': type=P_SQR; break;
+ case 'l': type=P_MULL; break;
+ case 'h': type=P_MULH; break;
+ case 'r': type=P_MRED; break;
+ default:
+ fprintf(stderr,"options: -[emslhr]\n");
+ exit(1);
+ }
+ }
+ else
+ {
+ size=atoi(argv[1]);
+ }
+ argc--;
+ argv++;
+ }
+ else
+ break;
+ }
+ if (size == 0)
+ size=DEFAULT_SIZE;
+
+ printf("bit size:%5d\n",size);
+
+ BN_rand(&p.a,size,1,0);
+ BN_rand(&p.b,size,1,0);
+ BN_rand(&p.c,size,1,1);
+ BN_mod(&p.a,&p.a,&p.c,p.ctx);
+ BN_mod(&p.b,&p.b,&p.c,p.ctx);
+ p.w=(p.a.top+1)/2;
+
+ BN_mul(&p.low,&p.a,&p.b,p.ctx);
+ p.low.top=p.a.top;
+
+ switch(type)
+ {
+ case P_EXP:
+ p.name="r=a^b%c";
+ p.func=do_mul_exp;
+ p.mont=mont;
+ break;
+ case P_MUL:
+ p.name="r=a*b";
+ p.func=do_mul;
+ break;
+ case P_SQR:
+ p.name="r=a*a";
+ p.func=do_sqr;
+ break;
+ case P_MULL:
+ p.name="r=low(a*b)";
+ p.func=do_mul_low;
+ break;
+ case P_MULH:
+ p.name="r=high(a*b)";
+ p.func=do_mul_high;
+ break;
+ case P_MRED:
+ p.name="r=montgomery_reduction(a)";
+ p.func=do_from_montgomery;
+ p.mont=mont;
+ break;
+ default:
+ fprintf(stderr,"options: -[emslhr]\n");
+ exit(1);
+ }
+
+ num=time_it(DEFAULT_TIME,&p);
+ do_it(num,&p);
+ }
+
+void do_it(num,p)
+int num;
+PARMS *p;
+ {
+ char *start,*end;
+ int i,j,number;
+ double d;
+
+ start=ms_time_new();
+ end=ms_time_new();
+
+ number=BN_num_bits_word((BN_ULONG)BN_num_bits(&(p->c)))-
+ BN_num_bits_word(BN_BITS2)+2;
+ for (i=number-1; i >=0; i--)
+ {
+ if (i == 1) continue;
+ BN_set_params(i,i,i,1);
+ if (p->mont != NULL)
+ BN_MONT_CTX_set(p->mont,&(p->c),p->ctx);
+
+ printf("Timing %5d (%2d bit) %2d %2d %2d %2d :",
+ (1<<i)*BN_BITS2,i,
+ BN_get_params(0),
+ BN_get_params(1),
+ BN_get_params(2),
+ BN_get_params(3));
+ fflush(stdout);
+
+ ms_time_get(start);
+ p->func(num,p);
+ ms_time_get(end);
+ d=ms_time_diff(start,end);
+ printf("%6.6f sec, or %d in %.4f seconds\n",
+ (double)d/num,num,d);
+ }
+ }
+
+int time_it(sec,p)
+int sec;
+PARMS *p;
+ {
+ char *start,*end;
+ int i,j;
+ double d;
+
+ if (p->mont != NULL)
+ BN_MONT_CTX_set(p->mont,&(p->c),p->ctx);
+
+ start=ms_time_new();
+ end=ms_time_new();
+
+ i=1;
+ for (;;)
+ {
+ if (verbose)
+ printf("timing %s for %d interations\n",p->name,i);
+
+ ms_time_get(start);
+ p->func(i,p);
+ ms_time_get(end);
+ d=ms_time_diff(start,end);
+
+ if (d < 0.01) i*=100;
+ else if (d < 0.1 ) i*=10;
+ else if (d > (double)sec) break;
+ else
+ {
+ i=(int)(1.0*i*sec/d);
+ break;
+ }
+ }
+ if (verbose)
+ printf("using %d interations\n",i);
+ return(i);
+ }
+
+void do_mul_exp(num,p)
+int num;
+PARMS *p;
+ {
+ int i;
+
+ for (i=0; i<num; i++)
+ BN_mod_exp_mont(&(p->r),&(p->a),&(p->b),&(p->c),
+ p->ctx,p->mont);
+ }
+
+void do_mul(num,p)
+int num;
+PARMS *p;
+ {
+ int i;
+
+ for (i=0; i<num; i++)
+ BN_mul(&(p->r),&(p->a),&(p->b),p->ctx);
+ }
+
+void do_sqr(num,p)
+int num;
+PARMS *p;
+ {
+ int i;
+
+ for (i=0; i<num; i++)
+ BN_sqr(&(p->r),&(p->a),p->ctx);
+ }
+
+void do_mul_low(num,p)
+int num;
+PARMS *p;
+ {
+ int i;
+
+ for (i=0; i<num; i++)
+ BN_mul_low(&(p->r),&(p->a),&(p->b),p->w,p->ctx);
+ }
+
+void do_mul_high(num,p)
+int num;
+PARMS *p;
+ {
+ int i;
+
+ for (i=0; i<num; i++)
+ BN_mul_low(&(p->r),&(p->a),&(p->b),&(p->low),p->w,p->ctx);
+ }
+
+void do_from_montgomery(num,p)
+int num;
+PARMS *p;
+ {
+ int i;
+
+ for (i=0; i<num; i++)
+ BN_from_montgomery(&(p->r),&(p->a),p->mont,p->ctx);
+ }
+
diff --git a/crypto/bn/bn_prime.c b/crypto/bn/bn_prime.c
index 0c85f70b59..c4fb58ef9a 100644
--- a/crypto/bn/bn_prime.c
+++ b/crypto/bn/bn_prime.c
@@ -83,7 +83,8 @@ static int probable_prime_dh();
static int probable_prime_dh_strong();
#endif
-BIGNUM *BN_generate_prime(bits,strong,add,rem,callback,cb_arg)
+BIGNUM *BN_generate_prime(ret,bits,strong,add,rem,callback,cb_arg)
+BIGNUM *ret;
int bits;
int strong;
BIGNUM *add;
@@ -92,16 +93,19 @@ void (*callback)(P_I_I_P);
char *cb_arg;
{
BIGNUM *rnd=NULL;
- BIGNUM *ret=NULL;
- BIGNUM *t=NULL;
+ BIGNUM t;
int i,j,c1=0;
BN_CTX *ctx;
ctx=BN_CTX_new();
if (ctx == NULL) goto err;
- if ((rnd=BN_new()) == NULL) goto err;
- if (strong)
- if ((t=BN_new()) == NULL) goto err;
+ if (ret == NULL)
+ {
+ if ((rnd=BN_new()) == NULL) goto err;
+ }
+ else
+ rnd=ret;
+ BN_init(&t);
loop:
/* make a random number and set the top and bottom bits */
if (add == NULL)
@@ -136,7 +140,7 @@ loop:
* check that (p-1)/2 is prime.
* Since a prime is odd, We just
* need to divide by 2 */
- if (!BN_rshift1(t,rnd)) goto err;
+ if (!BN_rshift1(&t,rnd)) goto err;
for (i=0; i<BN_prime_checks; i++)
{
@@ -144,7 +148,7 @@ loop:
if (j == -1) goto err;
if (j == 0) goto loop;
- j=BN_is_prime(t,1,callback,ctx,cb_arg);
+ j=BN_is_prime(&t,1,callback,ctx,cb_arg);
if (j == -1) goto err;
if (j == 0) goto loop;
@@ -156,7 +160,7 @@ loop:
ret=rnd;
err:
if ((ret == NULL) && (rnd != NULL)) BN_free(rnd);
- if (t != NULL) BN_free(t);
+ BN_free(&t);
if (ctx != NULL) BN_CTX_free(ctx);
return(ret);
}
@@ -183,7 +187,7 @@ char *cb_arg;
if ((ctx2=BN_CTX_new()) == NULL) goto err;
if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
- check=ctx->bn[ctx->tos++];
+ check= &(ctx->bn[ctx->tos++]);
/* Setup the montgomery structure */
if (!BN_MONT_CTX_set(mont,a,ctx2)) goto err;
@@ -224,14 +228,14 @@ BN_MONT_CTX *mont;
BIGNUM *d,*dd,*tmp,*d1,*d2,*n1;
BIGNUM *mont_one,*mont_n1,*mont_a;
- d1=ctx->bn[ctx->tos];
- d2=ctx->bn[ctx->tos+1];
- n1=ctx->bn[ctx->tos+2];
+ d1= &(ctx->bn[ctx->tos]);
+ d2= &(ctx->bn[ctx->tos+1]);
+ n1= &(ctx->bn[ctx->tos+2]);
ctx->tos+=3;
- mont_one=ctx2->bn[ctx2->tos];
- mont_n1=ctx2->bn[ctx2->tos+1];
- mont_a=ctx2->bn[ctx2->tos+2];
+ mont_one= &(ctx2->bn[ctx2->tos]);
+ mont_n1= &(ctx2->bn[ctx2->tos+1]);
+ mont_a= &(ctx2->bn[ctx2->tos+2]);
ctx2->tos+=3;
d=d1;
@@ -287,8 +291,9 @@ int bits;
{
int i;
MS_STATIC BN_ULONG mods[NUMPRIMES];
- BN_ULONG delta;
+ BN_ULONG delta,d;
+again:
if (!BN_rand(rnd,bits,1,1)) return(0);
/* we now have a random number 'rand' to test. */
for (i=1; i<NUMPRIMES; i++)
@@ -300,9 +305,12 @@ int bits;
* that gcd(rnd-1,primes) == 1 (except for 2) */
if (((mods[i]+delta)%primes[i]) <= 1)
{
+ d=delta;
delta+=2;
/* perhaps need to check for overflow of
- * delta (but delta can be upto 2^32) */
+ * delta (but delta can be upto 2^32)
+ * 21-May-98 eay - added overflow check */
+ if (delta < d) goto again;
goto loop;
}
}
@@ -320,7 +328,7 @@ BN_CTX *ctx;
int i,ret=0;
BIGNUM *t1;
- t1=ctx->bn[ctx->tos++];
+ t1= &(ctx->bn[ctx->tos++]);
if (!BN_rand(rnd,bits,0,1)) goto err;
@@ -361,9 +369,9 @@ BN_CTX *ctx;
BIGNUM *t1,*qadd=NULL,*q=NULL;
bits--;
- t1=ctx->bn[ctx->tos++];
- q=ctx->bn[ctx->tos++];
- qadd=ctx->bn[ctx->tos++];
+ t1= &(ctx->bn[ctx->tos++]);
+ q= &(ctx->bn[ctx->tos++]);
+ qadd= &(ctx->bn[ctx->tos++]);
if (!BN_rshift1(qadd,padd)) goto err;
@@ -413,11 +421,11 @@ BN_CTX *ctx;
BIGNUM *d,*dd,*tmp;
BIGNUM *d1,*d2,*x,*n1,*inv;
- d1=ctx->bn[ctx->tos];
- d2=ctx->bn[ctx->tos+1];
- x=ctx->bn[ctx->tos+2];
- n1=ctx->bn[ctx->tos+3];
- inv=ctx->bn[ctx->tos+4];
+ d1= &(ctx->bn[ctx->tos]);
+ d2= &(ctx->bn[ctx->tos+1]);
+ x= &(ctx->bn[ctx->tos+2]);
+ n1= &(ctx->bn[ctx->tos+3]);
+ inv=&(ctx->bn[ctx->tos+4]);
ctx->tos+=5;
d=d1;
diff --git a/crypto/bn/bn_recp.c b/crypto/bn/bn_recp.c
index 72cd69d3fc..97ca857ed1 100644
--- a/crypto/bn/bn_recp.c
+++ b/crypto/bn/bn_recp.c
@@ -60,66 +60,182 @@
#include "cryptlib.h"
#include "bn_lcl.h"
-int BN_mod_mul_reciprocal(r, x, y, m, i, nb, ctx)
+void BN_RECP_CTX_init(recp)
+BN_RECP_CTX *recp;
+ {
+ BN_init(&(recp->N));
+ BN_init(&(recp->Nr));
+ recp->num_bits=0;
+ recp->flags=0;
+ }
+
+BN_RECP_CTX *BN_RECP_CTX_new()
+ {
+ BN_RECP_CTX *ret;
+
+ if ((ret=(BN_RECP_CTX *)Malloc(sizeof(BN_RECP_CTX))) == NULL)
+ return(NULL);
+
+ BN_RECP_CTX_init(ret);
+ ret->flags=BN_FLG_MALLOCED;
+ return(ret);
+ }
+
+void BN_RECP_CTX_free(recp)
+BN_RECP_CTX *recp;
+ {
+ BN_free(&(recp->N));
+ BN_free(&(recp->Nr));
+ if (recp->flags & BN_FLG_MALLOCED)
+ Free(recp);
+ }
+
+int BN_RECP_CTX_set(recp,d,ctx)
+BN_RECP_CTX *recp;
+BIGNUM *d;
+BN_CTX *ctx;
+ {
+ BN_copy(&(recp->N),d);
+ BN_zero(&(recp->Nr));
+ recp->num_bits=BN_num_bits(d);
+ recp->shift=0;
+ return(1);
+ }
+
+int BN_mod_mul_reciprocal(r, x, y, recp, ctx)
BIGNUM *r;
BIGNUM *x;
BIGNUM *y;
+BN_RECP_CTX *recp;
+BN_CTX *ctx;
+ {
+ int ret=0;
+ BIGNUM *a;
+
+ a= &(ctx->bn[ctx->tos++]);
+ if (y != NULL)
+ {
+ if (x == y)
+ { if (!BN_sqr(a,x,ctx)) goto err; }
+ else
+ { if (!BN_mul(a,x,y,ctx)) goto err; }
+ }
+ else
+ a=x; /* Just do the mod */
+
+ BN_div_recp(NULL,r,a,recp,ctx);
+ ret=1;
+err:
+ ctx->tos--;
+ return(ret);
+ }
+
+int BN_div_recp(dv,rem,m,recp,ctx)
+BIGNUM *dv;
+BIGNUM *rem;
BIGNUM *m;
-BIGNUM *i;
-int nb;
+BN_RECP_CTX *recp;
BN_CTX *ctx;
{
- int ret=0,j;
- BIGNUM *a,*b,*c,*d;
+ int i,j,tos,ret=0,ex;
+ BIGNUM *a,*b,*d,*r;
+
+ tos=ctx->tos;
+ a= &(ctx->bn[ctx->tos++]);
+ b= &(ctx->bn[ctx->tos++]);
+ if (dv != NULL)
+ d=dv;
+ else
+ d= &(ctx->bn[ctx->tos++]);
+ if (rem != NULL)
+ r=rem;
+ else
+ r= &(ctx->bn[ctx->tos++]);
+
+ if (BN_ucmp(m,&(recp->N)) < 0)
+ {
+ BN_zero(d);
+ BN_copy(r,m);
+ ctx->tos=tos;
+ return(1);
+ }
- a=ctx->bn[ctx->tos++];
- b=ctx->bn[ctx->tos++];
- c=ctx->bn[ctx->tos++];
- d=ctx->bn[ctx->tos++];
+ /* We want the remainder
+ * Given input of ABCDEF / ab
+ * we need multiply ABCDEF by 3 digests of the reciprocal of ab
+ *
+ */
+ i=BN_num_bits(m);
- if (x == y)
- { if (!BN_sqr(a,x,ctx)) goto err; }
+ j=recp->num_bits*2;
+ if (j > i)
+ {
+ i=j;
+ ex=0;
+ }
else
- { if (!BN_mul(a,x,y)) goto err; }
- if (!BN_rshift(d,a,nb)) goto err;
- if (!BN_mul(b,d,i)) goto err;
- if (!BN_rshift(c,b,nb)) goto err;
- if (!BN_mul(b,m,c)) goto err;
- if (!BN_sub(r,a,b)) goto err;
+ {
+ ex=(i-j)/2;
+ }
+
+ j=i/2;
+
+ if (i != recp->shift)
+ recp->shift=BN_reciprocal(&(recp->Nr),&(recp->N),
+ i,ctx);
+
+ if (!BN_rshift(a,m,j-ex)) goto err;
+ if (!BN_mul(b,a,&(recp->Nr),ctx)) goto err;
+ if (!BN_rshift(d,b,j+ex)) goto err;
+ d->neg=0;
+ if (!BN_mul(b,&(recp->N),d,ctx)) goto err;
+ if (!BN_usub(r,m,b)) goto err;
+ r->neg=0;
+
j=0;
- while (BN_cmp(r,m) >= 0)
+#if 1
+ while (BN_ucmp(r,&(recp->N)) >= 0)
{
if (j++ > 2)
{
BNerr(BN_F_BN_MOD_MUL_RECIPROCAL,BN_R_BAD_RECIPROCAL);
goto err;
}
- if (!BN_sub(r,r,m)) goto err;
+ if (!BN_usub(r,r,&(recp->N))) goto err;
+ if (!BN_add_word(d,1)) goto err;
}
+#endif
+ r->neg=BN_is_zero(r)?0:m->neg;
+ d->neg=m->neg^recp->N.neg;
ret=1;
err:
- ctx->tos-=4;
+ ctx->tos=tos;
return(ret);
- }
+ }
-int BN_reciprocal(r, m,ctx)
+/* len is the expected size of the result
+ * We actually calculate with an extra word of precision, so
+ * we can do faster division if the remainder is not required.
+ */
+int BN_reciprocal(r,m,len,ctx)
BIGNUM *r;
BIGNUM *m;
+int len;
BN_CTX *ctx;
{
- int nm,ret= -1;
- BIGNUM *t;
+ int ret= -1;
+ BIGNUM t;
- t=ctx->bn[ctx->tos++];
+ BN_init(&t);
- nm=BN_num_bits(m);
- if (!BN_lshift(t,BN_value_one(),nm*2)) goto err;
+ BN_zero(&t);
+ if (!BN_set_bit(&t,len)) goto err;
- if (!BN_div(r,NULL,t,m,ctx)) goto err;
- ret=nm;
+ if (!BN_div(r,NULL,&t,m,ctx)) goto err;
+ ret=len;
err:
- ctx->tos--;
+ BN_free(&t);
return(ret);
}
diff --git a/crypto/bn/bn_sqr.c b/crypto/bn/bn_sqr.c
index a8464610e5..19ec0ddf84 100644
--- a/crypto/bn/bn_sqr.c
+++ b/crypto/bn/bn_sqr.c
@@ -67,30 +67,84 @@ BIGNUM *r;
BIGNUM *a;
BN_CTX *ctx;
{
- int i,j,max,al;
+ int max,al;
BIGNUM *tmp;
- BN_ULONG *ap,*rp;
- tmp=ctx->bn[ctx->tos];
+#ifdef BN_COUNT
+printf("BN_sqr %d * %d\n",a->top,a->top);
+#endif
+ bn_check_top(a);
+ tmp= &(ctx->bn[ctx->tos]);
al=a->top;
- if (al == 0)
+ if (al <= 0)
{
r->top=0;
return(1);
}
- max=(al*2);
- if (bn_wexpand(r,1+max) == NULL) return(0);
- if (bn_wexpand(tmp,1+max) == NULL) return(0);
+ max=(al+al);
+ if (bn_wexpand(r,max+1) == NULL) return(0);
r->neg=0;
+ if (al == 4)
+ {
+#ifndef BN_SQR_COMBA
+ BN_ULONG t[8];
+ bn_sqr_normal(r->d,a->d,4,t);
+#else
+ bn_sqr_comba4(r->d,a->d);
+#endif
+ }
+ else if (al == 8)
+ {
+#ifndef BN_SQR_COMBA
+ BN_ULONG t[16];
+ bn_sqr_normal(r->d,a->d,8,t);
+#else
+ bn_sqr_comba8(r->d,a->d);
+#endif
+ }
+ else
+ {
+#if defined(BN_RECURSION)
+ if (al < BN_SQR_RECURSIVE_SIZE_NORMAL)
+ {
+ BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL*2];
+ bn_sqr_normal(r->d,a->d,al,t);
+ }
+ else
+ {
+ if (bn_wexpand(tmp,2*max+1) == NULL) return(0);
+ bn_sqr_recursive(r->d,a->d,al,tmp->d);
+ }
+#else
+ if (bn_wexpand(tmp,max) == NULL) return(0);
+ bn_sqr_normal(r->d,a->d,al,tmp->d);
+#endif
+ }
+
+ r->top=max;
+ if ((max > 0) && (r->d[max-1] == 0)) r->top--;
+ return(1);
+ }
+
+/* tmp must have 2*n words */
+void bn_sqr_normal(r, a, n, tmp)
+BN_ULONG *r;
+BN_ULONG *a;
+int n;
+BN_ULONG *tmp;
+ {
+ int i,j,max;
+ BN_ULONG *ap,*rp;
- ap=a->d;
- rp=r->d;
+ max=n*2;
+ ap=a;
+ rp=r;
rp[0]=rp[max-1]=0;
rp++;
- j=al;
+ j=n;
if (--j > 0)
{
@@ -99,7 +153,7 @@ BN_CTX *ctx;
rp+=2;
}
- for (i=2; i<al; i++)
+ for (i=n-2; i>0; i--)
{
j--;
ap++;
@@ -107,16 +161,115 @@ BN_CTX *ctx;
rp+=2;
}
- bn_add_words(r->d,r->d,r->d,max);
+ bn_add_words(r,r,r,max);
/* There will not be a carry */
- bn_sqr_words(tmp->d,a->d,al);
+ bn_sqr_words(tmp,a,n);
- bn_add_words(r->d,r->d,tmp->d,max);
-
- r->top=max;
- if (r->d[max-1] == 0) r->top--;
- return(1);
+ bn_add_words(r,r,tmp,max);
}
+#ifdef BN_RECURSION
+/* r is 2*n words in size,
+ * a and b are both n words in size.
+ * n must be a power of 2.
+ * We multiply and return the result.
+ * t must be 2*n words in size
+ * We calulate
+ * a[0]*b[0]
+ * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
+ * a[1]*b[1]
+ */
+void bn_sqr_recursive(r,a,n2,t)
+BN_ULONG *r,*a;
+int n2;
+BN_ULONG *t;
+ {
+ int n=n2/2;
+ int zero,c1;
+ BN_ULONG ln,lo,*p;
+
+#ifdef BN_COUNT
+printf(" bn_sqr_recursive %d * %d\n",n2,n2);
+#endif
+ if (n2 == 4)
+ {
+#ifndef BN_SQR_COMBA
+ bn_sqr_normal(r,a,4,t);
+#else
+ bn_sqr_comba4(r,a);
+#endif
+ return;
+ }
+ else if (n2 == 8)
+ {
+#ifndef BN_SQR_COMBA
+ bn_sqr_normal(r,a,8,t);
+#else
+ bn_sqr_comba8(r,a);
+#endif
+ return;
+ }
+ if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL)
+ {
+ bn_sqr_normal(r,a,n2,t);
+ return;
+ }
+ /* r=(a[0]-a[1])*(a[1]-a[0]) */
+ c1=bn_cmp_words(a,&(a[n]),n);
+ zero=0;
+ if (c1 > 0)
+ bn_sub_words(t,a,&(a[n]),n);
+ else if (c1 < 0)
+ bn_sub_words(t,&(a[n]),a,n);
+ else
+ zero=1;
+
+ /* The result will always be negative unless it is zero */
+ p= &(t[n2*2]);
+
+ if (!zero)
+ bn_sqr_recursive(&(t[n2]),t,n,p);
+ else
+ memset(&(t[n2]),0,n*sizeof(BN_ULONG));
+ bn_sqr_recursive(r,a,n,p);
+ bn_sqr_recursive(&(r[n2]),&(a[n]),n,p);
+
+ /* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero
+ * r[10] holds (a[0]*b[0])
+ * r[32] holds (b[1]*b[1])
+ */
+
+ c1=bn_add_words(t,r,&(r[n2]),n2);
+
+ /* t[32] is negative */
+ c1-=bn_sub_words(&(t[n2]),t,&(t[n2]),n2);
+
+ /* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1])
+ * r[10] holds (a[0]*a[0])
+ * r[32] holds (a[1]*a[1])
+ * c1 holds the carry bits
+ */
+ c1+=bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2);
+ if (c1)
+ {
+ p= &(r[n+n2]);
+ lo= *p;
+ ln=(lo+c1)&BN_MASK2;
+ *p=ln;
+
+ /* The overflow will stop before we over write
+ * words we should not overwrite */
+ if (ln < (BN_ULONG)c1)
+ {
+ do {
+ p++;
+ lo= *p;
+ ln=(lo+1)&BN_MASK2;
+ *p=ln;
+ } while (ln == 0);
+ }
+ }
+ }
+#endif
diff --git a/crypto/bn/bn_word.c b/crypto/bn/bn_word.c
index 4b3d0f011d..9c168e4f48 100644
--- a/crypto/bn/bn_word.c
+++ b/crypto/bn/bn_word.c
@@ -62,7 +62,7 @@
BN_ULONG BN_mod_word(a, w)
BIGNUM *a;
-unsigned long w;
+BN_ULONG w;
{
#ifndef BN_LLONG
BN_ULONG ret=0;
@@ -75,8 +75,8 @@ unsigned long w;
for (i=a->top-1; i>=0; i--)
{
#ifndef BN_LLONG
- ret=((ret<<BN_BITS4)|((a->d[i]>>BN_BITS4)&BN_MASK2l))%(unsigned long)w;
- ret=((ret<<BN_BITS4)|(a->d[i]&BN_MASK2l))%(unsigned long)w;
+ ret=((ret<<BN_BITS4)|((a->d[i]>>BN_BITS4)&BN_MASK2l))%w;
+ ret=((ret<<BN_BITS4)|(a->d[i]&BN_MASK2l))%w;
#else
ret=(BN_ULLONG)(((ret<<(BN_ULLONG)BN_BITS2)|a->d[i])%
(BN_ULLONG)w);
@@ -87,7 +87,7 @@ unsigned long w;
BN_ULONG BN_div_word(a, w)
BIGNUM *a;
-unsigned long w;
+BN_ULONG w;
{
BN_ULONG ret;
int i;
@@ -100,18 +100,18 @@ unsigned long w;
BN_ULONG l,d;
l=a->d[i];
- d=bn_div64(ret,l,w);
+ d=bn_div_words(ret,l,w);
ret=(l-((d*w)&BN_MASK2))&BN_MASK2;
a->d[i]=d;
}
- if (a->d[a->top-1] == 0)
+ if ((a->top > 0) && (a->d[a->top-1] == 0))
a->top--;
return(ret);
}
int BN_add_word(a, w)
BIGNUM *a;
-unsigned long w;
+BN_ULONG w;
{
BN_ULONG l;
int i;
@@ -144,7 +144,7 @@ unsigned long w;
int BN_sub_word(a, w)
BIGNUM *a;
-unsigned long w;
+BN_ULONG w;
{
int i;
@@ -185,7 +185,7 @@ unsigned long w;
int BN_mul_word(a,w)
BIGNUM *a;
-unsigned long w;
+BN_ULONG w;
{
BN_ULONG ll;
@@ -199,6 +199,6 @@ unsigned long w;
a->d[a->top++]=ll;
}
}
- return(0);
+ return(1);
}
diff --git a/crypto/bn/bnspeed.c b/crypto/bn/bnspeed.c
index f7c2790fff..777212c1ba 100644
--- a/crypto/bn/bnspeed.c
+++ b/crypto/bn/bnspeed.c
@@ -94,7 +94,8 @@ struct tms {
#include <sys/timeb.h>
#endif
-#ifdef sun
+#if defined(sun) || defined(__ultrix)
+#define _POSIX_SOURCE
#include <limits.h>
#include <sys/param.h>
#endif
@@ -180,15 +181,14 @@ int argc;
char **argv;
{
BN_CTX *ctx;
- BIGNUM *a,*b,*c,*r;
+ BIGNUM a,b,c;
ctx=BN_CTX_new();
- a=BN_new();
- b=BN_new();
- c=BN_new();
- r=BN_new();
+ BN_init(&a);
+ BN_init(&b);
+ BN_init(&c);
- do_mul(a,b,c,ctx);
+ do_mul(&a,&b,&c,ctx);
}
void do_mul(r,a,b,ctx)
@@ -211,7 +211,7 @@ BN_CTX *ctx;
BN_rand(b,sizes[j],1,0);
Time_F(START);
for (k=0; k<num; k++)
- BN_mul(r,b,a);
+ BN_mul(r,b,a,ctx);
tm=Time_F(STOP);
printf("mul %4d x %4d -> %8.3fms\n",sizes[i],sizes[j],tm*1000.0/num);
}
diff --git a/crypto/bn/bntest.c b/crypto/bn/bntest.c
index 9ebd68b429..ec48bad738 100644
--- a/crypto/bn/bntest.c
+++ b/crypto/bn/bntest.c
@@ -71,19 +71,20 @@
#endif
#ifndef NOPROTO
-int test_add (BIO *bp);
-int test_sub (BIO *bp);
-int test_lshift1 (BIO *bp);
-int test_lshift (BIO *bp);
-int test_rshift1 (BIO *bp);
-int test_rshift (BIO *bp);
-int test_div (BIO *bp,BN_CTX *ctx);
-int test_mul (BIO *bp);
-int test_sqr (BIO *bp,BN_CTX *ctx);
-int test_mont (BIO *bp,BN_CTX *ctx);
-int test_mod (BIO *bp,BN_CTX *ctx);
-int test_mod_mul (BIO *bp,BN_CTX *ctx);
-int test_mod_exp (BIO *bp,BN_CTX *ctx);
+int test_add(BIO *bp);
+int test_sub(BIO *bp);
+int test_lshift1(BIO *bp);
+int test_lshift(BIO *bp);
+int test_rshift1(BIO *bp);
+int test_rshift(BIO *bp);
+int test_div(BIO *bp,BN_CTX *ctx);
+int test_div_recp(BIO *bp,BN_CTX *ctx);
+int test_mul(BIO *bp);
+int test_sqr(BIO *bp,BN_CTX *ctx);
+int test_mont(BIO *bp,BN_CTX *ctx);
+int test_mod(BIO *bp,BN_CTX *ctx);
+int test_mod_mul(BIO *bp,BN_CTX *ctx);
+int test_mod_exp(BIO *bp,BN_CTX *ctx);
int rand_neg(void);
#else
int test_add ();
@@ -192,6 +193,10 @@ char *argv[];
if (!test_div(out,ctx)) goto err;
fflush(stdout);
+ fprintf(stderr,"test BN_div_recp\n");
+ if (!test_div_recp(out,ctx)) goto err;
+ fflush(stdout);
+
fprintf(stderr,"test BN_mod\n");
if (!test_mod(out,ctx)) goto err;
fflush(stdout);
@@ -221,80 +226,80 @@ err:
int test_add(bp)
BIO *bp;
{
- BIGNUM *a,*b,*c;
+ BIGNUM a,b,c;
int i;
int j;
- a=BN_new();
- b=BN_new();
- c=BN_new();
+ BN_init(&a);
+ BN_init(&b);
+ BN_init(&c);
- BN_rand(a,512,0,0);
+ BN_rand(&a,512,0,0);
for (i=0; i<100; i++)
{
- BN_rand(b,450+i,0,0);
- a->neg=rand_neg();
- b->neg=rand_neg();
+ BN_rand(&b,450+i,0,0);
+ a.neg=rand_neg();
+ b.neg=rand_neg();
if (bp == NULL)
for (j=0; j<10000; j++)
- BN_add(c,a,b);
- BN_add(c,a,b);
+ BN_add(&c,&a,&b);
+ BN_add(&c,&a,&b);
if (bp != NULL)
{
if (!results)
{
- BN_print(bp,a);
+ BN_print(bp,&a);
BIO_puts(bp," + ");
- BN_print(bp,b);
+ BN_print(bp,&b);
BIO_puts(bp," - ");
}
- BN_print(bp,c);
+ BN_print(bp,&c);
BIO_puts(bp,"\n");
}
}
- BN_free(a);
- BN_free(b);
- BN_free(c);
+ BN_free(&a);
+ BN_free(&b);
+ BN_free(&c);
return(1);
}
int test_sub(bp)
BIO *bp;
{
- BIGNUM *a,*b,*c;
+ BIGNUM a,b,c;
int i;
int j;
- a=BN_new();
- b=BN_new();
- c=BN_new();
+ BN_init(&a);
+ BN_init(&b);
+ BN_init(&c);
- BN_rand(a,512,0,0);
+ BN_rand(&a,512,0,0);
for (i=0; i<100; i++)
{
- BN_rand(b,400+i,0,0);
- a->neg=rand_neg();
- b->neg=rand_neg();
+ BN_rand(&b,400+i,0,0);
+ a.neg=rand_neg();
+ b.neg=rand_neg();
if (bp == NULL)
for (j=0; j<10000; j++)
- BN_sub(c,a,b);
- BN_sub(c,a,b);
+ BN_sub(&c,&a,&b);
+ BN_sub(&c,&a,&b);
if (bp != NULL)
{
if (!results)
{
- BN_print(bp,a);
+ BN_print(bp,&a);
BIO_puts(bp," - ");
- BN_print(bp,b);
+ BN_print(bp,&b);
BIO_puts(bp," - ");
}
- BN_print(bp,c);
+ BN_print(bp,&c);
BIO_puts(bp,"\n");
}
}
- BN_free(a);
- BN_free(b);
- BN_free(c);
+ BN_free(&a);
+ BN_free(&b);
+ BN_free(&c);
return(1);
}
@@ -302,92 +307,154 @@ int test_div(bp,ctx)
BIO *bp;
BN_CTX *ctx;
{
- BIGNUM *a,*b,*c,*d;
+ BIGNUM a,b,c,d;
int i;
int j;
- a=BN_new();
- b=BN_new();
- c=BN_new();
- d=BN_new();
+ BN_init(&a);
+ BN_init(&b);
+ BN_init(&c);
+ BN_init(&d);
- BN_rand(a,400,0,0);
+ BN_rand(&a,400,0,0);
for (i=0; i<100; i++)
{
- BN_rand(b,50+i,0,0);
- a->neg=rand_neg();
- b->neg=rand_neg();
+ BN_rand(&b,50+i,0,0);
+ a.neg=rand_neg();
+ b.neg=rand_neg();
if (bp == NULL)
for (j=0; j<100; j++)
- BN_div(d,c,a,b,ctx);
- BN_div(d,c,a,b,ctx);
+ BN_div(&d,&c,&a,&b,ctx);
+ BN_div(&d,&c,&a,&b,ctx);
if (bp != NULL)
{
if (!results)
{
- BN_print(bp,a);
+ BN_print(bp,&a);
BIO_puts(bp," / ");
- BN_print(bp,b);
+ BN_print(bp,&b);
BIO_puts(bp," - ");
}
- BN_print(bp,d);
+ BN_print(bp,&d);
BIO_puts(bp,"\n");
if (!results)
{
- BN_print(bp,a);
+ BN_print(bp,&a);
BIO_puts(bp," % ");
- BN_print(bp,b);
+ BN_print(bp,&b);
BIO_puts(bp," - ");
}
- BN_print(bp,c);
+ BN_print(bp,&c);
BIO_puts(bp,"\n");
}
}
- BN_free(a);
- BN_free(b);
- BN_free(c);
- BN_free(d);
+ BN_free(&a);
+ BN_free(&b);
+ BN_free(&c);
+ BN_free(&d);
+ return(1);
+ }
+
+int test_div_recp(bp,ctx)
+BIO *bp;
+BN_CTX *ctx;
+ {
+ BIGNUM a,b,c,d;
+ BN_RECP_CTX recp;
+ int i;
+ int j;
+
+ BN_RECP_CTX_init(&recp);
+ BN_init(&a);
+ BN_init(&b);
+ BN_init(&c);
+ BN_init(&d);
+
+ BN_rand(&a,400,0,0);
+ for (i=0; i<100; i++)
+ {
+ BN_rand(&b,50+i,0,0);
+ a.neg=rand_neg();
+ b.neg=rand_neg();
+ BN_RECP_CTX_set(&recp,&b,ctx);
+ if (bp == NULL)
+ for (j=0; j<100; j++)
+ BN_div_recp(&d,&c,&a,&recp,ctx);
+ BN_div_recp(&d,&c,&a,&recp,ctx);
+ if (bp != NULL)
+ {
+ if (!results)
+ {
+ BN_print(bp,&a);
+ BIO_puts(bp," / ");
+ BN_print(bp,&b);
+ BIO_puts(bp," - ");
+ }
+ BN_print(bp,&d);
+ BIO_puts(bp,"\n");
+
+ if (!results)
+ {
+ BN_print(bp,&a);
+ BIO_puts(bp," % ");
+ BN_print(bp,&b);
+ BIO_puts(bp," - ");
+ }
+ BN_print(bp,&c);
+ BIO_puts(bp,"\n");
+ }
+ }
+ BN_free(&a);
+ BN_free(&b);
+ BN_free(&c);
+ BN_free(&d);
+ BN_RECP_CTX_free(&recp);
return(1);
}
int test_mul(bp)
BIO *bp;
{
- BIGNUM *a,*b,*c;
+ BIGNUM a,b,c;
int i;
int j;
+ BN_CTX ctx;
- a=BN_new();
- b=BN_new();
- c=BN_new();
+ BN_CTX_init(&ctx);
+ BN_init(&a);
+ BN_init(&b);
+ BN_init(&c);
- BN_rand(a,200,0,0);
+ BN_rand(&a,200,0,0);
for (i=0; i<100; i++)
{
- BN_rand(b,250+i,0,0);
- a->neg=rand_neg();
- b->neg=rand_neg();
+ BN_rand(&b,250+i,0,0);
+ BN_rand(&b,200,0,0);
+ a.neg=rand_neg();
+ b.neg=rand_neg();
if (bp == NULL)
for (j=0; j<100; j++)
- BN_mul(c,a,b);
- BN_mul(c,a,b);
+ BN_mul(&c,&a,&b,&ctx);
+ BN_mul(&c,&a,&b,&ctx);
+/*bn_do(&c,&a,&b,ctx); */
if (bp != NULL)
{
if (!results)
{
- BN_print(bp,a);
+ BN_print(bp,&a);
BIO_puts(bp," * ");
- BN_print(bp,b);
+ BN_print(bp,&b);
BIO_puts(bp," - ");
}
- BN_print(bp,c);
+ BN_print(bp,&c);
BIO_puts(bp,"\n");
}
}
- BN_free(a);
- BN_free(b);
- BN_free(c);
+ BN_free(&a);
+ BN_free(&b);
+ BN_free(&c);
+ BN_CTX_free(&ctx);
return(1);
}
@@ -395,36 +462,36 @@ int test_sqr(bp,ctx)
BIO *bp;
BN_CTX *ctx;
{
- BIGNUM *a,*c;
+ BIGNUM a,c;
int i;
int j;
- a=BN_new();
- c=BN_new();
+ BN_init(&a);
+ BN_init(&c);
for (i=0; i<40; i++)
{
- BN_rand(a,40+i*10,0,0);
- a->neg=rand_neg();
+ BN_rand(&a,40+i*10,0,0);
+ a.neg=rand_neg();
if (bp == NULL)
for (j=0; j<100; j++)
- BN_sqr(c,a,ctx);
- BN_sqr(c,a,ctx);
+ BN_sqr(&c,&a,ctx);
+ BN_sqr(&c,&a,ctx);
if (bp != NULL)
{
if (!results)
{
- BN_print(bp,a);
+ BN_print(bp,&a);
BIO_puts(bp," * ");
- BN_print(bp,a);
+ BN_print(bp,&a);
BIO_puts(bp," - ");
}
- BN_print(bp,c);
+ BN_print(bp,&c);
BIO_puts(bp,"\n");
}
}
- BN_free(a);
- BN_free(c);
+ BN_free(&a);
+ BN_free(&c);
return(1);
}
@@ -432,61 +499,61 @@ int test_mont(bp,ctx)
BIO *bp;
BN_CTX *ctx;
{
- BIGNUM *a,*b,*c,*A,*B;
- BIGNUM *n;
+ BIGNUM a,b,c,A,B;
+ BIGNUM n;
int i;
int j;
BN_MONT_CTX *mont;
- a=BN_new();
- b=BN_new();
- c=BN_new();
- A=BN_new();
- B=BN_new();
- n=BN_new();
+ BN_init(&a);
+ BN_init(&b);
+ BN_init(&c);
+ BN_init(&A);
+ BN_init(&B);
+ BN_init(&n);
mont=BN_MONT_CTX_new();
- BN_rand(a,100,0,0); /**/
- BN_rand(b,100,0,0); /**/
+ BN_rand(&a,100,0,0); /**/
+ BN_rand(&b,100,0,0); /**/
for (i=0; i<10; i++)
{
- BN_rand(n,(100%BN_BITS2+1)*BN_BITS2*i*BN_BITS2,0,1); /**/
- BN_MONT_CTX_set(mont,n,ctx);
+ BN_rand(&n,(100%BN_BITS2+1)*BN_BITS2*i*BN_BITS2,0,1); /**/
+ BN_MONT_CTX_set(mont,&n,ctx);
- BN_to_montgomery(A,a,mont,ctx);
- BN_to_montgomery(B,b,mont,ctx);
+ BN_to_montgomery(&A,&a,mont,ctx);
+ BN_to_montgomery(&B,&b,mont,ctx);
if (bp == NULL)
for (j=0; j<100; j++)
- BN_mod_mul_montgomery(c,A,B,mont,ctx);/**/
- BN_mod_mul_montgomery(c,A,B,mont,ctx);/**/
- BN_from_montgomery(A,c,mont,ctx);/**/
+ BN_mod_mul_montgomery(&c,&A,&B,mont,ctx);/**/
+ BN_mod_mul_montgomery(&c,&A,&B,mont,ctx);/**/
+ BN_from_montgomery(&A,&c,mont,ctx);/**/
if (bp != NULL)
{
if (!results)
{
#ifdef undef
fprintf(stderr,"%d * %d %% %d\n",
-BN_num_bits(a),
-BN_num_bits(b),
+BN_num_bits(&a),
+BN_num_bits(&b),
BN_num_bits(mont->N));
#endif
- BN_print(bp,a);
+ BN_print(bp,&a);
BIO_puts(bp," * ");
- BN_print(bp,b);
+ BN_print(bp,&b);
BIO_puts(bp," % ");
- BN_print(bp,mont->N);
+ BN_print(bp,&(mont->N));
BIO_puts(bp," - ");
}
- BN_print(bp,A);
+ BN_print(bp,&A);
BIO_puts(bp,"\n");
}
}
BN_MONT_CTX_free(mont);
- BN_free(a);
- BN_free(b);
- BN_free(c);
+ BN_free(&a);
+ BN_free(&b);
+ BN_free(&c);
return(1);
}
diff --git a/crypto/bn/comba.pl b/crypto/bn/comba.pl
new file mode 100644
index 0000000000..211a8b45c7
--- /dev/null
+++ b/crypto/bn/comba.pl
@@ -0,0 +1,285 @@
+#!/usr/local/bin/perl
+
+$num=8;
+$num2=8/2;
+
+print <<"EOF";
+/* crypto/bn/bn_comba.c */
+#include <stdio.h>
+#include "bn_lcl.h"
+/* Auto generated from crypto/bn/comba.pl
+ */
+
+#undef bn_mul_comba8
+#undef bn_mul_comba4
+#undef bn_sqr_comba8
+#undef bn_sqr_comba4
+
+#ifdef BN_LLONG
+#define mul_add_c(a,b,c0,c1,c2) \\
+ t=(BN_ULLONG)a*b; \\
+ t1=(BN_ULONG)Lw(t); \\
+ t2=(BN_ULONG)Hw(t); \\
+ c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \\
+ c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
+
+#define mul_add_c2(a,b,c0,c1,c2) \\
+ t=(BN_ULLONG)a*b; \\
+ tt=(t+t)&BN_MASK; \\
+ if (tt < t) c2++; \\
+ t1=(BN_ULONG)Lw(tt); \\
+ t2=(BN_ULONG)Hw(tt); \\
+ c0=(c0+t1)&BN_MASK2; \\
+ if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \\
+ c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
+
+#define sqr_add_c(a,i,c0,c1,c2) \\
+ t=(BN_ULLONG)a[i]*a[i]; \\
+ t1=(BN_ULONG)Lw(t); \\
+ t2=(BN_ULONG)Hw(t); \\
+ c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \\
+ c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
+
+#define sqr_add_c2(a,i,j,c0,c1,c2) \\
+ mul_add_c2((a)[i],(a)[j],c0,c1,c2)
+#else
+#define mul_add_c(a,b,c0,c1,c2) \\
+ t1=LBITS(a); t2=HBITS(a); \\
+ bl=LBITS(b); bh=HBITS(b); \\
+ mul64(t1,t2,bl,bh); \\
+ c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \\
+ c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
+
+#define mul_add_c2(a,b,c0,c1,c2) \\
+ t1=LBITS(a); t2=HBITS(a); \\
+ bl=LBITS(b); bh=HBITS(b); \\
+ mul64(t1,t2,bl,bh); \\
+ if (t2 & BN_TBIT) c2++; \\
+ t2=(t2+t2)&BN_MASK2; \\
+ if (t1 & BN_TBIT) t2++; \\
+ t1=(t1+t1)&BN_MASK2; \\
+ c0=(c0+t1)&BN_MASK2; \\
+ if ((c0 < t1) && (((++t2)&BN_MASK2) == 0)) c2++; \\
+ c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
+
+#define sqr_add_c(a,i,c0,c1,c2) \\
+ sqr64(t1,t2,(a)[i]); \\
+ c0=(c0+t1)&BN_MASK2; if ((c0) < t1) t2++; \\
+ c1=(c1+t2)&BN_MASK2; if ((c1) < t2) c2++;
+
+#define sqr_add_c2(a,i,j,c0,c1,c2) \\
+ mul_add_c2((a)[i],(a)[j],c0,c1,c2)
+#endif
+
+void bn_mul_comba${num}(r,a,b)
+BN_ULONG *r,*a,*b;
+ {
+#ifdef BN_LLONG
+ BN_ULLONG t;
+#else
+ BN_ULONG bl,bh;
+#endif
+ BN_ULONG t1,t2;
+ BN_ULONG c1,c2,c3;
+
+EOF
+$ret=&combas_mul("r","a","b",$num,"c1","c2","c3");
+printf <<"EOF";
+ }
+
+void bn_mul_comba${num2}(r,a,b)
+BN_ULONG *r,*a,*b;
+ {
+#ifdef BN_LLONG
+ BN_ULLONG t;
+#else
+ BN_ULONG bl,bh;
+#endif
+ BN_ULONG t1,t2;
+ BN_ULONG c1,c2,c3;
+
+EOF
+$ret=&combas_mul("r","a","b",$num2,"c1","c2","c3");
+printf <<"EOF";
+ }
+
+void bn_sqr_comba${num}(r,a)
+BN_ULONG *r,*a;
+ {
+#ifdef BN_LLONG
+ BN_ULLONG t,tt;
+#else
+ BN_ULONG bl,bh;
+#endif
+ BN_ULONG t1,t2;
+ BN_ULONG c1,c2,c3;
+
+EOF
+$ret=&combas_sqr("r","a",$num,"c1","c2","c3");
+printf <<"EOF";
+ }
+
+void bn_sqr_comba${num2}(r,a)
+BN_ULONG *r,*a;
+ {
+#ifdef BN_LLONG
+ BN_ULLONG t,tt;
+#else
+ BN_ULONG bl,bh;
+#endif
+ BN_ULONG t1,t2;
+ BN_ULONG c1,c2,c3;
+
+EOF
+$ret=&combas_sqr("r","a",$num2,"c1","c2","c3");
+printf <<"EOF";
+ }
+EOF
+
+sub bn_str
+ {
+ local($var,$val)=@_;
+ print "\t$var=$val;\n";
+ }
+
+sub bn_ary
+ {
+ local($var,$idx)=@_;
+ return("${var}[$idx]");
+ }
+
+sub bn_clr
+ {
+ local($var)=@_;
+
+ print "\t$var=0;\n";
+ }
+
+sub bn_mad
+ {
+ local($a,$b,$c0,$c1,$c2,$num)=@_;
+
+ if ($num == 2)
+ { printf("\tmul_add_c2($a,$b,$c0,$c1,$c2);\n"); }
+ else
+ { printf("\tmul_add_c($a,$b,$c0,$c1,$c2);\n"); }
+ }
+
+sub bn_sad
+ {
+ local($a,$i,$j,$c0,$c1,$c2,$num)=@_;
+
+ if ($num == 2)
+ { printf("\tsqr_add_c2($a,$i,$j,$c0,$c1,$c2);\n"); }
+ else
+ { printf("\tsqr_add_c($a,$i,$c0,$c1,$c2);\n"); }
+ }
+
+sub combas_mul
+ {
+ local($r,$a,$b,$num,$c0,$c1,$c2)=@_;
+ local($i,$as,$ae,$bs,$be,$ai,$bi);
+ local($tot,$end);
+
+ $as=0;
+ $ae=0;
+ $bs=0;
+ $be=0;
+ $tot=$num+$num-1;
+ &bn_clr($c0);
+ &bn_clr($c1);
+ for ($i=0; $i<$tot; $i++)
+ {
+ $ai=$as;
+ $bi=$bs;
+ $end=$be+1;
+ @numa=@numb=();
+
+#print "($as $ae) ($bs $be) $bs -> $end [$i $num]\n";
+ for ($j=$bs; $j<$end; $j++)
+ {
+ push(@numa,$ai);
+ push(@numb,$bi);
+ $ai--;
+ $bi++;
+ }
+
+ if ($i & 1)
+ {
+ @numa=reverse(@numa);
+ @numb=reverse(@numb);
+ }
+
+ &bn_clr($c2);
+ for ($j=0; $j<=$#numa; $j++)
+ {
+ &bn_mad(&bn_ary($a,$numa[$j]),
+ &bn_ary($b,$numb[$j]),$c0,$c1,$c2,1);
+ }
+ &bn_str(&bn_ary($r,$i),$c0);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+
+ $as++ if ($i < ($num-1));
+ $ae++ if ($i >= ($num-1));
+
+ $bs++ if ($i >= ($num-1));
+ $be++ if ($i < ($num-1));
+ }
+ &bn_str(&bn_ary($r,$i),$c0);
+ }
+
+sub combas_sqr
+ {
+ local($r,$a,$num,$c0,$c1,$c2)=@_;
+ local($i,$as,$ae,$bs,$be,$ai,$bi);
+ local($b,$tot,$end,$half);
+
+ $b=$a;
+ $as=0;
+ $ae=0;
+ $bs=0;
+ $be=0;
+ $tot=$num+$num-1;
+ &bn_clr($c0);
+ &bn_clr($c1);
+ for ($i=0; $i<$tot; $i++)
+ {
+ $ai=$as;
+ $bi=$bs;
+ $end=$be+1;
+ @numa=@numb=();
+
+#print "($as $ae) ($bs $be) $bs -> $end [$i $num]\n";
+ for ($j=$bs; $j<$end; $j++)
+ {
+ push(@numa,$ai);
+ push(@numb,$bi);
+ $ai--;
+ $bi++;
+ last if ($ai < $bi);
+ }
+ if (!($i & 1))
+ {
+ @numa=reverse(@numa);
+ @numb=reverse(@numb);
+ }
+
+ &bn_clr($c2);
+ for ($j=0; $j <= $#numa; $j++)
+ {
+ if ($numa[$j] == $numb[$j])
+ {&bn_sad($a,$numa[$j],$numb[$j],$c0,$c1,$c2,1);}
+ else
+ {&bn_sad($a,$numa[$j],$numb[$j],$c0,$c1,$c2,2);}
+ }
+ &bn_str(&bn_ary($r,$i),$c0);
+ ($c0,$c1,$c2)=($c1,$c2,$c0);
+
+ $as++ if ($i < ($num-1));
+ $ae++ if ($i >= ($num-1));
+
+ $bs++ if ($i >= ($num-1));
+ $be++ if ($i < ($num-1));
+ }
+ &bn_str(&bn_ary($r,$i),$c0);
+ }
diff --git a/crypto/bn/d.c b/crypto/bn/d.c
new file mode 100644
index 0000000000..f738b5025e
--- /dev/null
+++ b/crypto/bn/d.c
@@ -0,0 +1,72 @@
+#include <stdio.h>
+#include "bio.h"
+#include "bn_lcl.h"
+
+#define SIZE_A (100*4+4)
+#define SIZE_B (13*4)
+
+main(argc,argv)
+int argc;
+char *argv[];
+ {
+ BN_CTX ctx;
+ BN_RECP_CTX recp;
+ BIGNUM a,b,dd,d,r,rr,t,l;
+ int i;
+
+ MemCheck_start();
+ MemCheck_on();
+ BN_CTX_init(&ctx);
+ BN_RECP_CTX_init(&recp);
+
+ BN_init(&r);
+ BN_init(&rr);
+ BN_init(&d);
+ BN_init(&dd);
+ BN_init(&a);
+ BN_init(&b);
+
+ {
+ BN_rand(&a,SIZE_A,0,0);
+ BN_rand(&b,SIZE_B,0,0);
+
+ a.neg=1;
+ BN_RECP_CTX_set(&recp,&b,&ctx);
+
+ BN_print_fp(stdout,&a); printf(" a\n");
+ BN_print_fp(stdout,&b); printf(" b\n");
+
+ BN_print_fp(stdout,&recp.N); printf(" N\n");
+ BN_print_fp(stdout,&recp.Nr); printf(" Nr num_bits=%d\n",recp.num_bits);
+
+ BN_div_recp(&r,&d,&a,&recp,&ctx);
+
+for (i=0; i<300; i++)
+ BN_div(&rr,&dd,&a,&b,&ctx);
+
+ BN_print_fp(stdout,&r); printf(" div recp\n");
+ BN_print_fp(stdout,&rr); printf(" div\n");
+ BN_print_fp(stdout,&d); printf(" rem recp\n");
+ BN_print_fp(stdout,&dd); printf(" rem\n");
+ }
+ BN_CTX_free(&ctx);
+ BN_RECP_CTX_free(&recp);
+
+ BN_free(&r);
+ BN_free(&rr);
+ BN_free(&d);
+ BN_free(&dd);
+ BN_free(&a);
+ BN_free(&b);
+
+ {
+ BIO *out;
+
+ if ((out=BIO_new(BIO_s_file())) != NULL)
+ BIO_set_fp(out,stderr,BIO_NOCLOSE|BIO_FP_TEXT);
+
+ CRYPTO_mem_leaks(out);
+ BIO_free(out);
+ }
+
+ }
diff --git a/crypto/bn/exp.c b/crypto/bn/exp.c
new file mode 100644
index 0000000000..2427116564
--- /dev/null
+++ b/crypto/bn/exp.c
@@ -0,0 +1,60 @@
+#include <stdio.h>
+#include "tmdiff.h"
+#include "bn_lcl.h"
+
+#define SIZE 256
+#define NUM (8*8*8)
+#define MOD (8*8*8*8*8)
+
+main(argc,argv)
+int argc;
+char *argv[];
+ {
+ BN_CTX ctx;
+ BIGNUM a,b,c,r,rr,t,l;
+ int j,i,size=SIZE,num=NUM,mod=MOD;
+ char *start,*end;
+ BN_MONT_CTX mont;
+ double d,md;
+
+ BN_MONT_CTX_init(&mont);
+ BN_CTX_init(&ctx);
+ BN_init(&a);
+ BN_init(&b);
+ BN_init(&c);
+ BN_init(&r);
+
+ start=ms_time_new();
+ end=ms_time_new();
+ while (size <= 1024*8)
+ {
+ BN_rand(&a,size,0,0);
+ BN_rand(&b,size,1,0);
+ BN_rand(&c,size,0,1);
+
+ BN_mod(&a,&a,&c,&ctx);
+
+ ms_time_get(start);
+ for (i=0; i<10; i++)
+ BN_MONT_CTX_set(&mont,&c,&ctx);
+ ms_time_get(end);
+ md=ms_time_diff(start,end);
+
+ ms_time_get(start);
+ for (i=0; i<num; i++)
+ {
+ //bn_mull(&r,&a,&b,&ctx);
+ //BN_sqr(&r,&a,&ctx);
+ BN_mod_exp_mont(&r,&a,&b,&c,&ctx,&mont);
+ }
+ ms_time_get(end);
+ d=ms_time_diff(start,end) *50/33 /**/;
+ printf("%5d bit:%6.2f %6d %6.4f %4d m_set(%5.4f)\n",size,
+ d,num,d/num,(int)((d/num)*mod),md/10.0);
+ num/=8;
+ mod/=8;
+ if (num <= 0) num=1;
+ size*=2;
+ }
+
+ }
diff --git a/crypto/bn/expspeed.c b/crypto/bn/expspeed.c
index 344f883d35..fe00373246 100644
--- a/crypto/bn/expspeed.c
+++ b/crypto/bn/expspeed.c
@@ -94,7 +94,8 @@ struct tms {
#include <sys/timeb.h>
#endif
-#ifdef sun
+#if defined(sun) || defined(__ultrix)
+#define _POSIX_SOURCE
#include <limits.h>
#include <sys/param.h>
#endif
diff --git a/crypto/bn/exptest.c b/crypto/bn/exptest.c
index 67dc95d726..1ec61c2c87 100644
--- a/crypto/bn/exptest.c
+++ b/crypto/bn/exptest.c
@@ -79,6 +79,8 @@ char *argv[];
unsigned char c;
BIGNUM *r_mont,*r_recp,*a,*b,*m;
+ ERR_load_BN_strings();
+
ctx=BN_CTX_new();
if (ctx == NULL) exit(1);
r_mont=BN_new();
@@ -114,11 +116,19 @@ char *argv[];
ret=BN_mod_exp_mont(r_mont,a,b,m,ctx,NULL);
if (ret <= 0)
- { printf("BN_mod_exp_mont() problems\n"); exit(1); }
+ {
+ printf("BN_mod_exp_mont() problems\n");
+ ERR_print_errors(out);
+ exit(1);
+ }
ret=BN_mod_exp_recp(r_recp,a,b,m,ctx);
if (ret <= 0)
- { printf("BN_mod_exp_recp() problems\n"); exit(1); }
+ {
+ printf("BN_mod_exp_recp() problems\n");
+ ERR_print_errors(out);
+ exit(1);
+ }
if (BN_cmp(r_mont,r_recp) != 0)
{
@@ -137,6 +147,7 @@ char *argv[];
fflush(stdout);
}
}
+ CRYPTO_mem_leaks(out);
printf(" done\n");
exit(0);
err:
diff --git a/crypto/bn/m.pl b/crypto/bn/m.pl
new file mode 100644
index 0000000000..f69b036666
--- /dev/null
+++ b/crypto/bn/m.pl
@@ -0,0 +1,32 @@
+#!/usr/local/bin/perl
+
+
+for ($i=0; $i<256; $i++)
+ {
+ for ($j=0; $j<256; $j++)
+ {
+ $a0=$i&0x0f;
+ $a1=($i>>4)&0x0f;
+ $b0=$j&0x0f;
+ $b1=($j>>4)&0x0f;
+
+ $a0b0=$a0*$b0;
+ $a1b1=$a1*$b1;
+
+ $a01=$a0-$a1;
+ $b10=$b1-$b0;
+ $a01b10=$a01*$b10;
+
+ if ($a01b10 < 0)
+ {
+ $neg=1;
+ $a01b10= -$a01b10;
+ }
+ $t=($a0b0>>4)+($a0b0&0x0f)+($a1b1&0x0f);
+ if ($neg)
+ { $t-=($a01b10&0x0f); }
+ else { $t+=($a01b10&0x0f); }
+ printf("%02X %s%02X %02X\n",$a1b1,($neg)?"-":" ",$a01b10,$a0b0)
+ if ($t < 0)
+ }
+ }
diff --git a/crypto/bn/new b/crypto/bn/new
new file mode 100644
index 0000000000..285d506f19
--- /dev/null
+++ b/crypto/bn/new
@@ -0,0 +1,23 @@
+void BN_RECP_CTX_init(BN_RECP_CTX *recp);
+BN_RECP_CTX *BN_RECP_CTX_new();
+void BN_RECP_CTX_free(BN_RECP_CTX *recp);
+int BN_RECP_CTX_set(BN_RECP_CTX *recp,BIGNUM *div,BN_CTX *ctx);
+
+int BN_mod_exp_recp(BIGNUM *r, BIGNUM *a, BIGNUM *p, BIGNUM *m,
+ BN_RECP_CTX *recp,BN_CTX *ctx);
+
+int BN_div_recp(BIGNUM *dv, BIGNUM *rem, BIGNUM *m, BIGNUM *d,
+ BN_RECP_CTX *recp, BN_CTX *ctx);
+int BN_mod_recp(BIGNUM *rem, BIGNUM *m, BIGNUM *d,
+ BN_RECP_CTX *recp, BN_CTX *ctx);
+int BN_mod_mul_recp(BIGNUM *ret,BIGNUM *a,BIGNUM *b,BIGNUM *m
+
+int BN_mod_exp_montgomery(BIGNUM *r, BIGNUM *a, BIGNUM *p,
+ BN_MONT_CTX *m_ctx,BN_CTX *ctx);
+int BN_mod_exp2_montgomery(BIGNUM *r, BIGNUM *a1, BIGNUM *p1,BIGNUM *a2,
+ BIGNUM *p2,BN_MONT_CTX *m_ctx,BN_CTX *ctx);
+
+
+bn_div64 -> bn_div_words
+
+
diff --git a/crypto/bn/old/b_sqr.c b/crypto/bn/old/b_sqr.c
new file mode 100644
index 0000000000..e1a61b8471
--- /dev/null
+++ b/crypto/bn/old/b_sqr.c
@@ -0,0 +1,205 @@
+/* crypto/bn/bn_mul.c */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#include <stdio.h>
+#include "cryptlib.h"
+#include "bn_lcl.h"
+
+static int bn_mm(BIGNUM *m,BIGNUM *A,BIGNUM *B, BIGNUM *sk,BN_CTX *ctx);
+
+/* r must be different to a and b */
+/* int BN_mmul(r, a, b) */
+int BN_mul(r, a, b)
+BIGNUM *r;
+BIGNUM *a;
+BIGNUM *b;
+ {
+ BN_ULONG *ap,*bp,*rp;
+ BIGNUM *sk;
+ int i,n,ret;
+ int max,al,bl;
+ BN_CTX ctx;
+
+ bn_check_top(a);
+ bn_check_top(b);
+
+ al=a->top;
+ bl=b->top;
+ if ((al == 0) || (bl == 0))
+ {
+ r->top=0;
+ return(1);
+ }
+#ifdef BN_MUL_DEBUG
+printf("BN_mul(%d,%d)\n",a->top,b->top);
+#endif
+
+ if ( (bn_limit_bits > 0) &&
+ (bl > bn_limit_num) && (al > bn_limit_num))
+ {
+ n=(BN_num_bits_word(al|bl)-bn_limit_bits);
+ n*=2;
+ sk=(BIGNUM *)Malloc(sizeof(BIGNUM)*n);
+ memset(sk,0,sizeof(BIGNUM)*n);
+ memset(&ctx,0,sizeof(ctx));
+
+ ret=bn_mm(r,a,b,&(sk[0]),&ctx);
+ for (i=0; i<n; i+=2)
+ {
+ BN_clear_free(&sk[i]);
+ BN_clear_free(&sk[i+1]);
+ }
+ Free(sk);
+ return(ret);
+ }
+
+ max=(al+bl);
+ if (bn_wexpand(r,max) == NULL) return(0);
+ r->top=max;
+ r->neg=a->neg^b->neg;
+ ap=a->d;
+ bp=b->d;
+ rp=r->d;
+
+ rp[al]=bn_mul_words(rp,ap,al,*(bp++));
+ rp++;
+ for (i=1; i<bl; i++)
+ {
+ rp[al]=bn_mul_add_words(rp,ap,al,*(bp++));
+ rp++;
+ }
+ if ((max > 0) && (r->d[max-1] == 0)) r->top--;
+ return(1);
+ }
+
+
+#define ahal (sk[0])
+#define blbh (sk[1])
+
+/* r must be different to a and b */
+int bn_mm(m, A, B, sk,ctx)
+BIGNUM *m,*A,*B;
+BIGNUM *sk;
+BN_CTX *ctx;
+ {
+ int n,num,sqr=0;
+ int an,bn;
+ BIGNUM ah,al,bh,bl;
+
+ an=A->top;
+ bn=B->top;
+#ifdef BN_MUL_DEBUG
+printf("bn_mm(%d,%d)\n",A->top,B->top);
+#endif
+
+ if (A == B) sqr=1;
+ num=(an>bn)?an:bn;
+ n=(num+1)/2;
+ /* Are going to now chop things into 'num' word chunks. */
+
+ BN_init(&ah);
+ BN_init(&al);
+ BN_init(&bh);
+ BN_init(&bl);
+
+ bn_set_low (&al,A,n);
+ bn_set_high(&ah,A,n);
+ bn_set_low (&bl,B,n);
+ bn_set_high(&bh,B,n);
+
+ BN_sub(&ahal,&ah,&al);
+ BN_sub(&blbh,&bl,&bh);
+
+ if (num <= (bn_limit_num+bn_limit_num))
+ {
+ BN_mul(m,&ahal,&blbh);
+ if (sqr)
+ {
+ BN_sqr(&ahal,&al,ctx);
+ BN_sqr(&blbh,&ah,ctx);
+ }
+ else
+ {
+ BN_mul(&ahal,&al,&bl);
+ BN_mul(&blbh,&ah,&bh);
+ }
+ }
+ else
+ {
+ bn_mm(m,&ahal,&blbh,&(sk[2]),ctx);
+ bn_mm(&ahal,&al,&bl,&(sk[2]),ctx);
+ bn_mm(&blbh,&ah,&bh,&(sk[2]),ctx);
+ }
+
+ BN_add(m,m,&ahal);
+ BN_add(m,m,&blbh);
+
+ BN_lshift(m,m,n*BN_BITS2);
+ BN_lshift(&blbh,&blbh,n*BN_BITS2*2);
+
+ BN_add(m,m,&ahal);
+ BN_add(m,m,&blbh);
+
+ m->neg=A->neg^B->neg;
+ return(1);
+ }
+#undef ahal (sk[0])
+#undef blbh (sk[1])
+
+#include "bn_low.c"
+#include "bn_high.c"
diff --git a/crypto/bn/old/bn_com.c b/crypto/bn/old/bn_com.c
new file mode 100644
index 0000000000..7666b2304c
--- /dev/null
+++ b/crypto/bn/old/bn_com.c
@@ -0,0 +1,90 @@
+/* crypto/bn/bn_mulw.c */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#include <stdio.h>
+#include "cryptlib.h"
+#include "bn_lcl.h"
+
+#ifdef BN_LLONG
+
+ab
+12
+ a2 b2
+a1 b1
+
+abc
+123
+ a3 b3 c3
+ a2 b2 c2
+a1 b1 c1
+
+abcd
+1234
+ a4 b4 c4 d4
+ a3 b3 c3 d3
+ a2 b2 c2 d2
+a1 b1 c1 d1
+
+abcde
+01234
+ a5 b5 c5 d5 e5
+ a4 b4 c4 d4 e4
+ a3 b3 c3 d3 e3
+ a2 b2 c2 d2 e2
+ a1 b1 c1 d1 e1
+a0 b0 c0 d0 e0
diff --git a/crypto/bn/old/bn_high.c b/crypto/bn/old/bn_high.c
new file mode 100644
index 0000000000..90268fb31a
--- /dev/null
+++ b/crypto/bn/old/bn_high.c
@@ -0,0 +1,137 @@
+#include <stdio.h>
+#include "cryptlib.h"
+#include "bn_lcl.h"
+
+#undef BN_MUL_HIGH_DEBUG
+
+#ifdef BN_MUL_HIGH_DEBUG
+#define debug_BN_print(a,b,c) BN_print_fp(a,b); printf(c);
+#else
+#define debug_BN_print(a,b,c)
+#endif
+
+int BN_mul_high(BIGNUM *r,BIGNUM *a,BIGNUM *b,BIGNUM *low, int words);
+
+#undef t1
+#undef t2
+
+int BN_mul_high(r,a,b,low,words)
+BIGNUM *r,*a,*b,*low;
+int words;
+ {
+ int w2,borrow=0,full=0;
+ BIGNUM t1,t2,t3,h,ah,al,bh,bl,m,s0,s1;
+ BN_ULONG ul1,ul2;
+
+ BN_mul(r,a,b);
+ BN_rshift(r,r,words*BN_BITS2);
+ return(1);
+
+ w2=(words+1)/2;
+
+#ifdef BN_MUL_HIGH_DEBUG
+fprintf(stdout,"words=%d w2=%d\n",words,w2);
+#endif
+debug_BN_print(stdout,a," a\n");
+debug_BN_print(stdout,b," b\n");
+debug_BN_print(stdout,low," low\n");
+ BN_init(&al); BN_init(&ah);
+ BN_init(&bl); BN_init(&bh);
+ BN_init(&t1); BN_init(&t2); BN_init(&t3);
+ BN_init(&s0); BN_init(&s1);
+ BN_init(&h); BN_init(&m);
+
+ bn_set_low (&al,a,w2);
+ bn_set_high(&ah,a,w2);
+ bn_set_low (&bl,b,w2);
+ bn_set_high(&bh,b,w2);
+
+ bn_set_low(&s0,low,w2);
+ bn_set_high(&s1,low,w2);
+
+debug_BN_print(stdout,&al," al\n");
+debug_BN_print(stdout,&ah," ah\n");
+debug_BN_print(stdout,&bl," bl\n");
+debug_BN_print(stdout,&bh," bh\n");
+debug_BN_print(stdout,&s0," s0\n");
+debug_BN_print(stdout,&s1," s1\n");
+
+ /* Calculate (al-ah)*(bh-bl) */
+ BN_sub(&t1,&al,&ah);
+ BN_sub(&t2,&bh,&bl);
+ BN_mul(&m,&t1,&t2);
+
+ /* Calculate ah*bh */
+ BN_mul(&h,&ah,&bh);
+
+ /* s0 == low(al*bl)
+ * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
+ * We know s0 and s1 so the only unknown is high(al*bl)
+ * high(al*bl) == s1 - low(ah*bh+(al-ah)*(bh-bl)+s0)
+ */
+ BN_add(&m,&m,&h);
+ BN_add(&t2,&m,&s0);
+
+debug_BN_print(stdout,&t2," middle value\n");
+
+ /* Quick and dirty mask off of high words */
+ if (w2 < t2.top) t2.top=w2;
+#if 0
+ bn_set_low(&t3,&t2,w2);
+#endif
+
+debug_BN_print(stdout,&t2," low middle value\n");
+ BN_sub(&t1,&s1,&t2);
+
+ if (t1.neg)
+ {
+debug_BN_print(stdout,&t1," before\n");
+ BN_zero(&t2);
+ BN_set_bit(&t2,w2*BN_BITS2);
+ BN_add(&t1,&t2,&t1);
+ /* BN_mask_bits(&t1,w2*BN_BITS2); */
+ /* if (words < t1.top) t1.top=words; */
+debug_BN_print(stdout,&t1," after\n");
+ borrow=1;
+ }
+
+/* XXXXX SPEED THIS UP */
+ /* al*bl == high(al*bl)<<words+s0 */
+ BN_lshift(&t1,&t1,w2*BN_BITS2);
+ BN_add(&t1,&t1,&s0);
+ if (w2*2 < t1.top) t1.top=w2*2; /* This should not happen? */
+
+ /* We now have
+ * al*bl - t1
+ * (al-ah)*(bh-bl)+ah*bh - m
+ * ah*bh - h
+ */
+#if 0
+ BN_add(&m,&m,&t1);
+debug_BN_print(stdout,&t1," s10\n");
+debug_BN_print(stdout,&m," s21\n");
+debug_BN_print(stdout,&h," s32\n");
+ BN_lshift(&m,&m,w2*BN_BITS2);
+ BN_lshift(&h,&h,w2*2*BN_BITS2);
+ BN_add(r,&m,&t1);
+ BN_add(r,r,&h);
+ BN_rshift(r,r,w2*2*BN_BITS2);
+#else
+ BN_add(&m,&m,&t1); /* Do a cmp then +1 if needed? */
+ bn_set_high(&t3,&t1,w2);
+ BN_add(&m,&m,&t3);
+ bn_set_high(&t3,&m,w2);
+ BN_add(r,&h,&t3);
+#endif
+
+#ifdef BN_MUL_HIGH_DEBUG
+printf("carry=%d\n",borrow);
+#endif
+debug_BN_print(stdout,r," ret\n");
+ BN_free(&t1); BN_free(&t2);
+ BN_free(&m); BN_free(&h);
+ return(1);
+ }
+
+
+
diff --git a/crypto/bn/old/bn_ka.c b/crypto/bn/old/bn_ka.c
new file mode 100644
index 0000000000..b49a52aa73
--- /dev/null
+++ b/crypto/bn/old/bn_ka.c
@@ -0,0 +1,578 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include "bn_lcl.h"
+
+/* r is 2*n2 words in size,
+ * a and b are both n2 words in size.
+ * n2 must be a power of 2.
+ * We multiply and return the result.
+ * t must be 2*n2 words in size
+ * We calulate
+ * a[0]*b[0]
+ * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
+ * a[1]*b[1]
+ */
+void bn_mul_recursive(r,a,b,n2,t)
+BN_ULONG *r,*a,*b;
+int n2;
+BN_ULONG *t;
+ {
+ int n=n2/2;
+ int neg,zero,c1,c2;
+ BN_ULONG ln,lo,*p;
+
+#ifdef BN_COUNT
+printf(" bn_mul_recursive %d * %d\n",n2,n2);
+#endif
+ if (n2 <= 8)
+ {
+ if (n2 == 8)
+ bn_mul_comba8(r,a,b);
+ else
+ bn_mul_normal(r,a,n2,b,n2);
+ return;
+ }
+
+ if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
+ {
+ /* This should not happen */
+ /*abort(); */
+ bn_mul_normal(r,a,n2,b,n2);
+ return;
+ }
+ /* r=(a[0]-a[1])*(b[1]-b[0]) */
+ c1=bn_cmp_words(a,&(a[n]),n);
+ c2=bn_cmp_words(&(b[n]),b,n);
+ zero=neg=0;
+ switch (c1*3+c2)
+ {
+ case -4:
+ bn_sub_words(t, &(a[n]),a, n); /* - */
+ bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */
+ break;
+ case -3:
+ zero=1;
+ break;
+ case -2:
+ bn_sub_words(t, &(a[n]),a, n); /* - */
+ bn_sub_words(&(t[n]),&(b[n]),b, n); /* + */
+ neg=1;
+ break;
+ case -1:
+ case 0:
+ case 1:
+ zero=1;
+ break;
+ case 2:
+ bn_sub_words(t, a, &(a[n]),n); /* + */
+ bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */
+ neg=1;
+ break;
+ case 3:
+ zero=1;
+ break;
+ case 4:
+ bn_sub_words(t, a, &(a[n]),n);
+ bn_sub_words(&(t[n]),&(b[n]),b, n);
+ break;
+ }
+
+ if (n == 8)
+ {
+ if (!zero)
+ bn_mul_comba8(&(t[n2]),t,&(t[n]));
+ else
+ memset(&(t[n2]),0,8*sizeof(BN_ULONG));
+
+ bn_mul_comba8(r,a,b);
+ bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n]));
+ }
+ else
+ {
+ p= &(t[n2*2]);
+ if (!zero)
+ bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p);
+ else
+ memset(&(t[n2]),0,n*sizeof(BN_ULONG));
+ bn_mul_recursive(r,a,b,n,p);
+ bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,p);
+ }
+
+ /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
+ * r[10] holds (a[0]*b[0])
+ * r[32] holds (b[1]*b[1])
+ */
+
+ c1=bn_add_words(t,r,&(r[n2]),n2);
+
+ if (neg) /* if t[32] is negative */
+ {
+ c1-=bn_sub_words(&(t[n2]),t,&(t[n2]),n2);
+ }
+ else
+ {
+ /* Might have a carry */
+ c1+=bn_add_words(&(t[n2]),&(t[n2]),t,n2);
+ }
+
+ /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
+ * r[10] holds (a[0]*b[0])
+ * r[32] holds (b[1]*b[1])
+ * c1 holds the carry bits
+ */
+ c1+=bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2);
+ if (c1)
+ {
+ p= &(r[n+n2]);
+ lo= *p;
+ ln=(lo+c1)&BN_MASK2;
+ *p=ln;
+
+ /* The overflow will stop before we over write
+ * words we should not overwrite */
+ if (ln < c1)
+ {
+ do {
+ p++;
+ lo= *p;
+ ln=(lo+1)&BN_MASK2;
+ *p=ln;
+ } while (ln == 0);
+ }
+ }
+ }
+
+/* n+tn is the word length
+ * t needs to be n*4 is size, as does r */
+void bn_mul_part_recursive(r,a,b,tn,n,t)
+BN_ULONG *r,*a,*b;
+int tn,n;
+BN_ULONG *t;
+ {
+ int n2=n*2,i,j;
+ int c1;
+ BN_ULONG ln,lo,*p;
+
+#ifdef BN_COUNT
+printf(" bn_mul_part_recursive %d * %d\n",tn+n,tn+n);
+#endif
+ if (n < 8)
+ {
+ i=tn+n;
+ bn_mul_normal(r,a,i,b,i);
+ return;
+ }
+
+ /* r=(a[0]-a[1])*(b[1]-b[0]) */
+ bn_sub_words(t, a, &(a[n]),n); /* + */
+ bn_sub_words(&(t[n]),b, &(b[n]),n); /* - */
+
+ if (n == 8)
+ {
+ bn_mul_comba8(&(t[n2]),t,&(t[n]));
+ bn_mul_comba8(r,a,b);
+ bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
+ memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
+ }
+ else
+ {
+ p= &(t[n2*2]);
+ bn_mul_recursive(&(t[n2]),t,&(t[n]),n,p);
+ bn_mul_recursive(r,a,b,n,p);
+ i=n/2;
+ /* If there is only a bottom half to the number,
+ * just do it */
+ j=tn-i;
+ if (j == 0)
+ {
+ bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),i,p);
+ memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
+ }
+ else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
+ {
+ bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
+ j,i,p);
+ memset(&(r[n2+tn*2]),0,
+ sizeof(BN_ULONG)*(n2-tn*2));
+ }
+ else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
+ {
+ memset(&(r[n2]),0,sizeof(BN_ULONG)*(tn*2));
+ for (;;)
+ {
+ i/=2;
+ if (i < tn)
+ {
+ bn_mul_part_recursive(&(r[n2]),
+ &(a[n]),&(b[n]),
+ tn-i,i,p);
+ break;
+ }
+ else if (i == tn)
+ {
+ bn_mul_recursive(&(r[n2]),
+ &(a[n]),&(b[n]),
+ i,p);
+ break;
+ }
+ }
+ }
+ }
+
+ /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
+ * r[10] holds (a[0]*b[0])
+ * r[32] holds (b[1]*b[1])
+ */
+
+ c1=bn_add_words(t,r,&(r[n2]),n2);
+ c1-=bn_sub_words(&(t[n2]),t,&(t[n2]),n2);
+
+ /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
+ * r[10] holds (a[0]*b[0])
+ * r[32] holds (b[1]*b[1])
+ * c1 holds the carry bits
+ */
+ c1+=bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2);
+ if (c1)
+ {
+ p= &(r[n+n2]);
+ lo= *p;
+ ln=(lo+c1)&BN_MASK2;
+ *p=ln;
+
+ /* The overflow will stop before we over write
+ * words we should not overwrite */
+ if (ln < c1)
+ {
+ do {
+ p++;
+ lo= *p;
+ ln=(lo+1)&BN_MASK2;
+ *p=ln;
+ } while (ln == 0);
+ }
+ }
+ }
+
+/* r is 2*n words in size,
+ * a and b are both n words in size.
+ * n must be a power of 2.
+ * We multiply and return the result.
+ * t must be 2*n words in size
+ * We calulate
+ * a[0]*b[0]
+ * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
+ * a[1]*b[1]
+ */
+void bn_sqr_recursive(r,a,n2,t)
+BN_ULONG *r,*a;
+int n2;
+BN_ULONG *t;
+ {
+ int n=n2/2;
+ int zero,c1;
+ BN_ULONG ln,lo,*p;
+
+#ifdef BN_COUNT
+printf(" bn_sqr_recursive %d * %d\n",n2,n2);
+#endif
+ if (n2 == 4)
+ {
+ bn_sqr_comba4(r,a);
+ return;
+ }
+ else if (n2 == 8)
+ {
+ bn_sqr_comba8(r,a);
+ return;
+ }
+ if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL)
+ {
+ bn_sqr_normal(r,a,n2,t);
+ return;
+ abort();
+ }
+ /* r=(a[0]-a[1])*(a[1]-a[0]) */
+ c1=bn_cmp_words(a,&(a[n]),n);
+ zero=0;
+ if (c1 > 0)
+ bn_sub_words(t,a,&(a[n]),n);
+ else if (c1 < 0)
+ bn_sub_words(t,&(a[n]),a,n);
+ else
+ zero=1;
+
+ /* The result will always be negative unless it is zero */
+
+ if (n == 8)
+ {
+ if (!zero)
+ bn_sqr_comba8(&(t[n2]),t);
+ else
+ memset(&(t[n2]),0,8*sizeof(BN_ULONG));
+
+ bn_sqr_comba8(r,a);
+ bn_sqr_comba8(&(r[n2]),&(a[n]));
+ }
+ else
+ {
+ p= &(t[n2*2]);
+ if (!zero)
+ bn_sqr_recursive(&(t[n2]),t,n,p);
+ else
+ memset(&(t[n2]),0,n*sizeof(BN_ULONG));
+ bn_sqr_recursive(r,a,n,p);
+ bn_sqr_recursive(&(r[n2]),&(a[n]),n,p);
+ }
+
+ /* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero
+ * r[10] holds (a[0]*b[0])
+ * r[32] holds (b[1]*b[1])
+ */
+
+ c1=bn_add_words(t,r,&(r[n2]),n2);
+
+ /* t[32] is negative */
+ c1-=bn_sub_words(&(t[n2]),t,&(t[n2]),n2);
+
+ /* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1])
+ * r[10] holds (a[0]*a[0])
+ * r[32] holds (a[1]*a[1])
+ * c1 holds the carry bits
+ */
+ c1+=bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2);
+ if (c1)
+ {
+ p= &(r[n+n2]);
+ lo= *p;
+ ln=(lo+c1)&BN_MASK2;
+ *p=ln;
+
+ /* The overflow will stop before we over write
+ * words we should not overwrite */
+ if (ln < c1)
+ {
+ do {
+ p++;
+ lo= *p;
+ ln=(lo+1)&BN_MASK2;
+ *p=ln;
+ } while (ln == 0);
+ }
+ }
+ }
+
+#if 1
+/* a and b must be the same size, which is n2.
+ * r needs to be n2 words and t needs to be n2*2
+ */
+void bn_mul_low_recursive(r,a,b,n2,t)
+BN_ULONG *r,*a,*b;
+int n2;
+BN_ULONG *t;
+ {
+ int n=n2/2;
+
+#ifdef BN_COUNT
+printf(" bn_mul_low_recursive %d * %d\n",n2,n2);
+#endif
+
+ bn_mul_recursive(r,a,b,n,&(t[0]));
+ if (n > BN_MUL_LOW_RECURSIVE_SIZE_NORMAL)
+ {
+ bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2]));
+ bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
+ bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2]));
+ bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
+ }
+ else
+ {
+ bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n);
+ bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n);
+ bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
+ bn_add_words(&(r[n]),&(r[n]),&(t[n]),n);
+ }
+ }
+
+/* a and b must be the same size, which is n2.
+ * r needs to be n2 words and t needs to be n2*2
+ * l is the low words of the output.
+ * t needs to be n2*3
+ */
+void bn_mul_high(r,a,b,l,n2,t)
+BN_ULONG *r,*a,*b,*l;
+int n2;
+BN_ULONG *t;
+ {
+ int j,i,n,c1,c2;
+ int neg,oneg,zero;
+ BN_ULONG ll,lc,*lp,*mp;
+
+#ifdef BN_COUNT
+printf(" bn_mul_high %d * %d\n",n2,n2);
+#endif
+ n=(n2+1)/2;
+
+ /* Calculate (al-ah)*(bh-bl) */
+ neg=zero=0;
+ c1=bn_cmp_words(&(a[0]),&(a[n]),n);
+ c2=bn_cmp_words(&(b[n]),&(b[0]),n);
+ switch (c1*3+c2)
+ {
+ case -4:
+ bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
+ bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
+ break;
+ case -3:
+ zero=1;
+ break;
+ case -2:
+ bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
+ bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
+ neg=1;
+ break;
+ case -1:
+ case 0:
+ case 1:
+ zero=1;
+ break;
+ case 2:
+ bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
+ bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
+ neg=1;
+ break;
+ case 3:
+ zero=1;
+ break;
+ case 4:
+ bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
+ bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
+ break;
+ }
+
+ oneg=neg;
+ /* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
+ bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,&(t[n2]));
+ /* r[10] = (a[1]*b[1]) */
+ bn_mul_recursive(r,&(a[n]),&(b[n]),n,&(t[n2]));
+
+ /* s0 == low(al*bl)
+ * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
+ * We know s0 and s1 so the only unknown is high(al*bl)
+ * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
+ * high(al*bl) == s1 - (r[0]+l[0]+t[0])
+ */
+ if (l != NULL)
+ {
+ lp= &(t[n2+n]);
+ c1=bn_add_words(lp,&(r[0]),&(l[0]),n);
+ }
+ else
+ {
+ c1=0;
+ lp= &(r[0]);
+ }
+
+ if (neg)
+ neg=bn_sub_words(&(t[n2]),lp,&(t[0]),n);
+ else
+ {
+ bn_add_words(&(t[n2]),lp,&(t[0]),n);
+ neg=0;
+ }
+
+ if (l != NULL)
+ {
+ bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n);
+ }
+ else
+ {
+ lp= &(t[n2+n]);
+ mp= &(t[n2]);
+ for (i=0; i<n; i++)
+ lp[i]=((~mp[i])+1)&BN_MASK2;
+ }
+
+ /* s[0] = low(al*bl)
+ * t[3] = high(al*bl)
+ * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
+ * r[10] = (a[1]*b[1])
+ */
+ /* R[10] = al*bl
+ * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
+ * R[32] = ah*bh
+ */
+ /* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
+ * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
+ * R[3]=r[1]+(carry/borrow)
+ */
+ if (l != NULL)
+ {
+ lp= &(t[n2]);
+ c1= bn_add_words(lp,&(t[n2+n]),&(l[0]),n);
+ }
+ else
+ {
+ lp= &(t[n2+n]);
+ c1=0;
+ }
+ c1+=bn_add_words(&(t[n2]),lp, &(r[0]),n);
+ if (oneg)
+ c1-=bn_sub_words(&(t[n2]),&(t[n2]),&(t[0]),n);
+ else
+ c1+=bn_add_words(&(t[n2]),&(t[n2]),&(t[0]),n);
+
+ c2 =bn_add_words(&(r[0]),&(r[0]),&(t[n2+n]),n);
+ c2+=bn_add_words(&(r[0]),&(r[0]),&(r[n]),n);
+ if (oneg)
+ c2-=bn_sub_words(&(r[0]),&(r[0]),&(t[n]),n);
+ else
+ c2+=bn_add_words(&(r[0]),&(r[0]),&(t[n]),n);
+
+ if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
+ {
+ i=0;
+ if (c1 > 0)
+ {
+ lc=c1;
+ do {
+ ll=(r[i]+lc)&BN_MASK2;
+ r[i++]=ll;
+ lc=(lc > ll);
+ } while (lc);
+ }
+ else
+ {
+ lc= -c1;
+ do {
+ ll=r[i];
+ r[i++]=(ll-lc)&BN_MASK2;
+ lc=(lc > ll);
+ } while (lc);
+ }
+ }
+ if (c2 != 0) /* Add starting at r[1] */
+ {
+ i=n;
+ if (c2 > 0)
+ {
+ lc=c2;
+ do {
+ ll=(r[i]+lc)&BN_MASK2;
+ r[i++]=ll;
+ lc=(lc > ll);
+ } while (lc);
+ }
+ else
+ {
+ lc= -c2;
+ do {
+ ll=r[i];
+ r[i++]=(ll-lc)&BN_MASK2;
+ lc=(lc > ll);
+ } while (lc);
+ }
+ }
+ }
+#endif
diff --git a/crypto/bn/old/bn_low.c b/crypto/bn/old/bn_low.c
new file mode 100644
index 0000000000..217c8c2f96
--- /dev/null
+++ b/crypto/bn/old/bn_low.c
@@ -0,0 +1,201 @@
+/* crypto/bn/bn_mul.c */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#include <stdio.h>
+#include "cryptlib.h"
+#include "bn_lcl.h"
+
+static int bn_mm_low(BIGNUM *m,BIGNUM *A,BIGNUM *B, int num,
+ BIGNUM *sk,BN_CTX *ctx);
+int BN_mul_low(BIGNUM *r, BIGNUM *a, BIGNUM *b,int words);
+
+/* r must be different to a and b */
+int BN_mul_low(r, a, b, num)
+BIGNUM *r;
+BIGNUM *a;
+BIGNUM *b;
+int num;
+ {
+ BN_ULONG *ap,*bp,*rp;
+ BIGNUM *sk;
+ int j,i,n,ret;
+ int max,al,bl;
+ BN_CTX ctx;
+
+ bn_check_top(a);
+ bn_check_top(b);
+
+#ifdef BN_MUL_DEBUG
+printf("BN_mul_low(%d,%d,%d)\n",a->top,b->top,num);
+#endif
+
+ al=a->top;
+ bl=b->top;
+ if ((al == 0) || (bl == 0))
+ {
+ r->top=0;
+ return(1);
+ }
+
+ if ((bn_limit_bits_low > 0) && (num > bn_limit_num_low))
+ {
+ n=BN_num_bits_word(num*2)-bn_limit_bits_low;
+ n*=2;
+ sk=(BIGNUM *)Malloc(sizeof(BIGNUM)*n);
+ memset(sk,0,sizeof(BIGNUM)*n);
+ memset(&ctx,0,sizeof(ctx));
+
+ ret=bn_mm_low(r,a,b,num,&(sk[0]),&ctx);
+ for (i=0; i<n; i+=2)
+ {
+ BN_clear_free(&sk[i]);
+ BN_clear_free(&sk[i+1]);
+ }
+ Free(sk);
+ return(ret);
+ }
+
+ max=(al+bl);
+ if (bn_wexpand(r,max) == NULL) return(0);
+ r->neg=a->neg^b->neg;
+ ap=a->d;
+ bp=b->d;
+ rp=r->d;
+ r->top=(max > num)?num:max;
+
+ rp[al]=bn_mul_words(rp,ap,al,*(bp++));
+ rp++;
+ j=bl;
+ for (i=1; i<j; i++)
+ {
+ if (al >= num--)
+ {
+ al--;
+ if (al <= 0) break;
+ }
+ rp[al]=bn_mul_add_words(rp,ap,al,*(bp++));
+ rp++;
+ }
+
+ while ((r->top > 0) && (r->d[r->top-1] == 0))
+ r->top--;
+ return(1);
+ }
+
+
+#define t1 (sk[0])
+#define t2 (sk[1])
+
+/* r must be different to a and b */
+int bn_mm_low(m, A, B, num, sk,ctx)
+BIGNUM *m,*A,*B;
+int num;
+BIGNUM *sk;
+BN_CTX *ctx;
+ {
+ int n; /* ,sqr=0; */
+ int an,bn;
+ BIGNUM ah,al,bh,bl;
+
+ bn_wexpand(m,num+3);
+ an=A->top;
+ bn=B->top;
+
+#ifdef BN_MUL_DEBUG
+printf("bn_mm_low(%d,%d,%d)\n",A->top,B->top,num);
+#endif
+
+ n=(num+1)/2;
+
+ BN_init(&ah); BN_init(&al); BN_init(&bh); BN_init(&bl);
+
+ bn_set_low( &al,A,n);
+ bn_set_high(&ah,A,n);
+ bn_set_low( &bl,B,n);
+ bn_set_high(&bh,B,n);
+
+ if (num <= (bn_limit_num_low+bn_limit_num_low))
+ {
+ BN_mul(m,&al,&bl);
+ BN_mul_low(&t1,&al,&bh,n);
+ BN_mul_low(&t2,&ah,&bl,n);
+ }
+ else
+ {
+ bn_mm(m ,&al,&bl,&(sk[2]),ctx);
+ bn_mm_low(&t1,&al,&bh,n,&(sk[2]),ctx);
+ bn_mm_low(&t2,&ah,&bl,n,&(sk[2]),ctx);
+ }
+
+ BN_add(&t1,&t1,&t2);
+
+ /* We will now do an evil hack instead of
+ * BN_lshift(&t1,&t1,n*BN_BITS2);
+ * BN_add(m,m,&t1);
+ * BN_mask_bits(m,num*BN_BITS2);
+ */
+ bn_set_high(&ah,m,n); ah.max=num+2;
+ BN_add(&ah,&ah,&t1);
+ m->top=num;
+
+ m->neg=A->neg^B->neg;
+ return(1);
+ }
+
+#undef t1 (sk[0])
+#undef t2 (sk[1])
diff --git a/crypto/bn/old/bn_m.c b/crypto/bn/old/bn_m.c
new file mode 100644
index 0000000000..1cf51e8e2a
--- /dev/null
+++ b/crypto/bn/old/bn_m.c
@@ -0,0 +1,142 @@
+/* crypto/bn/bn_m.c */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#include <stdio.h>
+/*#include "cryptlib.h"*/
+#include "bn_lcl.h"
+
+#define limit_bits 5 /* 2^5, or 32 words */
+#define limit_num (1<<limit_bits)
+
+int BN_m(r,a,b)
+BIGNUM *r,*a,*b;
+ {
+ BIGNUM *sk;
+ int i,n;
+
+ n=(BN_num_bits_word(a->top|b->top)-limit_bits);
+ n*=2;
+ sk=(BIGNUM *)malloc(sizeof(BIGNUM)*n);
+ for (i=0; i<n; i++)
+ BN_init(&(sk[i]));
+
+ return(BN_mm(r,a,b,&(sk[0])));
+ }
+
+#define ahal (sk[0])
+#define blbh (sk[1])
+
+/* r must be different to a and b */
+int BN_mm(m, A, B, sk)
+BIGNUM *m,*A,*B;
+BIGNUM *sk;
+ {
+ int i,num,anum,bnum;
+ int an,bn;
+ BIGNUM ah,al,bh,bl;
+
+ an=A->top;
+ bn=B->top;
+ if ((an <= limit_num) || (bn <= limit_num))
+ {
+ return(BN_mul(m,A,B));
+ }
+
+ anum=(an>bn)?an:bn;
+ num=(anum)/2;
+
+ /* Are going to now chop things into 'num' word chunks. */
+ bnum=num*BN_BITS2;
+
+ BN_init(&ahal);
+ BN_init(&blbh);
+ BN_init(&ah);
+ BN_init(&al);
+ BN_init(&bh);
+ BN_init(&bl);
+
+ al.top=num;
+ al.d=A->d;
+ ah.top=A->top-num;
+ ah.d= &(A->d[num]);
+
+ bl.top=num;
+ bl.d=B->d;
+ bh.top=B->top-num;
+ bh.d= &(B->d[num]);
+
+ BN_sub(&ahal,&ah,&al);
+ BN_sub(&blbh,&bl,&bh);
+
+ BN_mm(m,&ahal,&blbh,&(sk[2]));
+ BN_mm(&ahal,&al,&bl,&(sk[2]));
+ BN_mm(&blbh,&ah,&bh,&(sk[2]));
+
+ BN_add(m,m,&ahal);
+ BN_add(m,m,&blbh);
+
+ BN_lshift(m,m,bnum);
+ BN_add(m,m,&ahal);
+
+ BN_lshift(&blbh,&blbh,bnum*2);
+ BN_add(m,m,&blbh);
+
+ m->neg=A->neg^B->neg;
+ return(1);
+ }
+
diff --git a/crypto/bn/old/bn_mul.c.works b/crypto/bn/old/bn_mul.c.works
new file mode 100644
index 0000000000..6d565d44a2
--- /dev/null
+++ b/crypto/bn/old/bn_mul.c.works
@@ -0,0 +1,219 @@
+/* crypto/bn/bn_mul.c */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#include <stdio.h>
+#include "cryptlib.h"
+#include "bn_lcl.h"
+
+int bn_mm(BIGNUM *m,BIGNUM *A,BIGNUM *B, BIGNUM *sk,BN_CTX *ctx);
+
+/* r must be different to a and b */
+int BN_mul(r, a, b)
+BIGNUM *r;
+BIGNUM *a;
+BIGNUM *b;
+ {
+ BN_ULONG *ap,*bp,*rp;
+ BIGNUM *sk;
+ int i,n,ret;
+ int max,al,bl;
+ BN_CTX ctx;
+
+ bn_check_top(a);
+ bn_check_top(b);
+
+ al=a->top;
+ bl=b->top;
+ if ((al == 0) || (bl == 0))
+ {
+ r->top=0;
+ return(1);
+ }
+#ifdef BN_MUL_DEBUG
+printf("BN_mul(%d,%d)\n",a->top,b->top);
+#endif
+
+#ifdef BN_RECURSION
+ if ( (bn_limit_bits > 0) &&
+ (bl > bn_limit_num) && (al > bn_limit_num))
+ {
+ n=(BN_num_bits_word(al|bl)-bn_limit_bits);
+ n*=2;
+ sk=(BIGNUM *)Malloc(sizeof(BIGNUM)*n);
+ memset(sk,0,sizeof(BIGNUM)*n);
+ memset(&ctx,0,sizeof(ctx));
+
+ ret=bn_mm(r,a,b,&(sk[0]),&ctx);
+ for (i=0; i<n; i+=2)
+ {
+ BN_clear_free(&sk[i]);
+ BN_clear_free(&sk[i+1]);
+ }
+ Free(sk);
+ return(ret);
+ }
+#endif
+
+ max=(al+bl);
+ if (bn_wexpand(r,max) == NULL) return(0);
+ r->top=max;
+ r->neg=a->neg^b->neg;
+ ap=a->d;
+ bp=b->d;
+ rp=r->d;
+
+#ifdef BN_RECURSION
+ if ((al == bl) && (al == 8))
+ {
+ bn_mul_comba8(rp,ap,bp);
+ }
+ else
+#endif
+ {
+ rp[al]=bn_mul_words(rp,ap,al,*(bp++));
+ rp++;
+ for (i=1; i<bl; i++)
+ {
+ rp[al]=bn_mul_add_words(rp,ap,al,*(bp++));
+ rp++;
+ }
+ }
+ if ((max > 0) && (r->d[max-1] == 0)) r->top--;
+ return(1);
+ }
+
+#ifdef BN_RECURSION
+
+#define ahal (sk[0])
+#define blbh (sk[1])
+
+/* r must be different to a and b */
+int bn_mm(m, A, B, sk,ctx)
+BIGNUM *m,*A,*B;
+BIGNUM *sk;
+BN_CTX *ctx;
+ {
+ int n,num,sqr=0;
+ int an,bn;
+ BIGNUM ah,al,bh,bl;
+
+ an=A->top;
+ bn=B->top;
+#ifdef BN_MUL_DEBUG
+printf("bn_mm(%d,%d)\n",A->top,B->top);
+#endif
+
+ if (A == B) sqr=1;
+ num=(an>bn)?an:bn;
+ n=(num+1)/2;
+ /* Are going to now chop things into 'num' word chunks. */
+
+ BN_init(&ah);
+ BN_init(&al);
+ BN_init(&bh);
+ BN_init(&bl);
+
+ bn_set_low (&al,A,n);
+ bn_set_high(&ah,A,n);
+ bn_set_low (&bl,B,n);
+ bn_set_high(&bh,B,n);
+
+ BN_sub(&ahal,&ah,&al);
+ BN_sub(&blbh,&bl,&bh);
+
+ if (num <= (bn_limit_num+bn_limit_num))
+ {
+ BN_mul(m,&ahal,&blbh);
+ if (sqr)
+ {
+ BN_sqr(&ahal,&al,ctx);
+ BN_sqr(&blbh,&ah,ctx);
+ }
+ else
+ {
+ BN_mul(&ahal,&al,&bl);
+ BN_mul(&blbh,&ah,&bh);
+ }
+ }
+ else
+ {
+ bn_mm(m,&ahal,&blbh,&(sk[2]),ctx);
+ bn_mm(&ahal,&al,&bl,&(sk[2]),ctx);
+ bn_mm(&blbh,&ah,&bh,&(sk[2]),ctx);
+ }
+
+ BN_add(m,m,&ahal);
+ BN_add(m,m,&blbh);
+
+ BN_lshift(m,m,n*BN_BITS2);
+ BN_lshift(&blbh,&blbh,n*BN_BITS2*2);
+
+ BN_add(m,m,&ahal);
+ BN_add(m,m,&blbh);
+
+ m->neg=A->neg^B->neg;
+ return(1);
+ }
+#undef ahal (sk[0])
+#undef blbh (sk[1])
+
+#include "bn_low.c"
+#include "bn_high.c"
+#include "f.c"
+
+#endif
diff --git a/crypto/bn/old/bn_wmul.c b/crypto/bn/old/bn_wmul.c
new file mode 100644
index 0000000000..e3ce107921
--- /dev/null
+++ b/crypto/bn/old/bn_wmul.c
@@ -0,0 +1,181 @@
+#include <stdio.h>
+#include "bn_lcl.h"
+
+#if 1
+
+int bn_mull(BIGNUM *r,BIGNUM *a,BIGNUM *b, BN_CTX *ctx);
+
+int bn_mull(r,a,b,ctx)
+BIGNUM *r,*a,*b;
+BN_CTX *ctx;
+ {
+ int top,i,j,k,al,bl;
+ BIGNUM *t;
+
+#ifdef BN_COUNT
+printf("bn_mull %d * %d\n",a->top,b->top);
+#endif
+
+ bn_check_top(a);
+ bn_check_top(b);
+ bn_check_top(r);
+
+ al=a->top;
+ bl=b->top;
+ r->neg=a->neg^b->neg;
+
+ top=al+bl;
+ if ((al < 4) || (bl < 4))
+ {
+ if (bn_wexpand(r,top) == NULL) return(0);
+ r->top=top;
+ bn_mul_normal(r->d,a->d,al,b->d,bl);
+ goto end;
+ }
+ else if (al == bl) /* A good start, they are the same size */
+ goto symetric;
+ else
+ {
+ i=(al-bl);
+ if ((i == 1) && !BN_get_flags(b,BN_FLG_STATIC_DATA))
+ {
+ bn_wexpand(b,al);
+ b->d[bl]=0;
+ bl++;
+ goto symetric;
+ }
+ else if ((i == -1) && !BN_get_flags(a,BN_FLG_STATIC_DATA))
+ {
+ bn_wexpand(a,bl);
+ a->d[al]=0;
+ al++;
+ goto symetric;
+ }
+ }
+
+ /* asymetric and >= 4 */
+ if (bn_wexpand(r,top) == NULL) return(0);
+ r->top=top;
+ bn_mul_normal(r->d,a->d,al,b->d,bl);
+
+ if (0)
+ {
+ /* symetric and > 4 */
+symetric:
+ if (al == 4)
+ {
+ if (bn_wexpand(r,al*2) == NULL) return(0);
+ r->top=top;
+ bn_mul_comba4(r->d,a->d,b->d);
+ goto end;
+ }
+ if (al == 8)
+ {
+ if (bn_wexpand(r,al*2) == NULL) return(0);
+ r->top=top;
+ bn_mul_comba8(r->d,a->d,b->d);
+ goto end;
+ }
+ if (al <= BN_MULL_NORMAL_SIZE)
+ {
+ if (bn_wexpand(r,al*2) == NULL) return(0);
+ r->top=top;
+ bn_mul_normal(r->d,a->d,al,b->d,bl);
+ goto end;
+ }
+ /* 16 or larger */
+ j=BN_num_bits_word((BN_ULONG)al);
+ j=1<<(j-1);
+ k=j+j;
+ t= &(ctx->bn[ctx->tos]);
+ if (al == j) /* exact multiple */
+ {
+ bn_wexpand(t,k*2);
+ bn_wexpand(r,k*2);
+ bn_mul_recursive(r->d,a->d,b->d,al,t->d);
+ }
+ else
+ {
+ bn_wexpand(a,k);
+ bn_wexpand(b,k);
+ bn_wexpand(t,k*4);
+ bn_wexpand(r,k*4);
+ for (i=a->top; i<k; i++)
+ a->d[i]=0;
+ for (i=b->top; i<k; i++)
+ b->d[i]=0;
+ bn_mul_part_recursive(r->d,a->d,b->d,al-j,j,t->d);
+ }
+ r->top=top;
+ }
+end:
+ bn_fix_top(r);
+ return(1);
+ }
+#endif
+
+void bn_mul_normal(r,a,na,b,nb)
+BN_ULONG *r,*a;
+int na;
+BN_ULONG *b;
+int nb;
+ {
+ BN_ULONG *rr;
+
+#ifdef BN_COUNT
+printf(" bn_mul_normal %d * %d\n",na,nb);
+#endif
+
+ if (na < nb)
+ {
+ int itmp;
+ BN_ULONG *ltmp;
+
+ itmp=na; na=nb; nb=itmp;
+ ltmp=a; a=b; b=ltmp;
+
+ }
+ rr= &(r[na]);
+ rr[0]=bn_mul_words(r,a,na,b[0]);
+
+ for (;;)
+ {
+ if (--nb <= 0) return;
+ rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]);
+ if (--nb <= 0) return;
+ rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]);
+ if (--nb <= 0) return;
+ rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]);
+ if (--nb <= 0) return;
+ rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]);
+ rr+=4;
+ r+=4;
+ b+=4;
+ }
+ }
+
+#if 1
+void bn_mul_low_normal(r,a,b,n)
+BN_ULONG *r,*a,*b;
+int n;
+ {
+#ifdef BN_COUNT
+printf(" bn_mul_low_normal %d * %d\n",n,n);
+#endif
+ bn_mul_words(r,a,n,b[0]);
+
+ for (;;)
+ {
+ if (--n <= 0) return;
+ bn_mul_add_words(&(r[1]),a,n,b[1]);
+ if (--n <= 0) return;
+ bn_mul_add_words(&(r[2]),a,n,b[2]);
+ if (--n <= 0) return;
+ bn_mul_add_words(&(r[3]),a,n,b[3]);
+ if (--n <= 0) return;
+ bn_mul_add_words(&(r[4]),a,n,b[4]);
+ r+=4;
+ b+=4;
+ }
+ }
+#endif
diff --git a/crypto/bn/old/build b/crypto/bn/old/build
new file mode 100755
index 0000000000..8cd99e5f17
--- /dev/null
+++ b/crypto/bn/old/build
@@ -0,0 +1,3 @@
+#!/bin/sh -x
+
+gcc -g -I../../include test.c -L../.. -lcrypto
diff --git a/crypto/bn/old/info b/crypto/bn/old/info
new file mode 100644
index 0000000000..5ac99c3b23
--- /dev/null
+++ b/crypto/bn/old/info
@@ -0,0 +1,22 @@
+Given A1A0 * B1B0 == S3S2S1S0
+
+S0= low(A0*B0)
+S1= low( (A1-A0)*(B0-B1)) +low( A1*B1) +high(A0*B0)
+S2= high((A1-A0)*(B0-B1)) +high(A1*B1) +low( A1*B1)
+S3= high(A1*B1);
+
+Assume we know S1 and S0, and can calulate A1*B1 and high((A1-A0)*(B0-B1))
+
+k0= S0 == low(A0*B0)
+k1= S1
+k2= low( A1*B1)
+k3= high(A1*B1)
+k4= high((A1-A0)*(B0-B1))
+
+k1= low((A1-A0)*(B0-B1)) +k2 +high(A0*B0)
+S2= k4 +k3 +k2
+S3= k3
+
+S1-k2= low((A1-A0)*(B0-B1)) +high(A0*B0)
+
+We potentially have a carry or a borrow from S1
diff --git a/crypto/bn/old/test.works b/crypto/bn/old/test.works
new file mode 100644
index 0000000000..127c7b415d
--- /dev/null
+++ b/crypto/bn/old/test.works
@@ -0,0 +1,205 @@
+#include <stdio.h>
+#include "cryptlib.h"
+#include "bn_lcl.h"
+
+#define SIZE 128
+
+#define BN_MONT_CTX_set bn_mcs
+#define BN_from_montgomery bn_fm
+#define BN_mod_mul_montgomery bn_mmm
+#undef BN_to_montgomery
+#define BN_to_montgomery(r,a,mont,ctx) bn_mmm(\
+ r,a,(mont)->RR,(mont),ctx)
+
+main()
+ {
+ BIGNUM prime,a,b,r,A,B,R;
+ BN_MONT_CTX *mont;
+ BN_CTX *ctx;
+ int i;
+
+ ctx=BN_CTX_new();
+ BN_init(&prime);
+ BN_init(&a); BN_init(&b); BN_init(&r);
+ BN_init(&A); BN_init(&B); BN_init(&R);
+
+ BN_generate_prime(&prime,SIZE,0,NULL,NULL,NULL,NULL);
+ BN_rand(&A,SIZE,1,0);
+ BN_rand(&B,SIZE,1,0);
+ BN_mod(&A,&A,&prime,ctx);
+ BN_mod(&B,&B,&prime,ctx);
+
+ mont=BN_MONT_CTX_new();
+ BN_MONT_CTX_set(mont,&prime,ctx);
+
+ BN_to_montgomery(&a,&A,mont,ctx);
+ BN_to_montgomery(&b,&B,mont,ctx);
+
+ BN_mul(&r,&a,&b);
+ BN_print_fp(stdout,&r); printf("\n");
+ BN_from_montgomery(&r,&r,mont,ctx);
+ BN_print_fp(stdout,&r); printf("\n");
+ BN_from_montgomery(&r,&r,mont,ctx);
+ BN_print_fp(stdout,&r); printf("\n");
+
+ BN_mod_mul(&R,&A,&B,&prime,ctx);
+
+ BN_print_fp(stdout,&a); printf("\n");
+ BN_print_fp(stdout,&b); printf("\n");
+ BN_print_fp(stdout,&prime); printf("\n");
+ BN_print_fp(stdout,&r); printf("\n\n");
+
+ BN_print_fp(stdout,&A); printf("\n");
+ BN_print_fp(stdout,&B); printf("\n");
+ BN_print_fp(stdout,&prime); printf("\n");
+ BN_print_fp(stdout,&R); printf("\n\n");
+
+ BN_mul(&r,&a,&b);
+ BN_print_fp(stdout,&r); printf(" <- BA*DC\n");
+ BN_copy(&A,&r);
+ i=SIZE/2;
+ BN_mask_bits(&A,i*2);
+// BN_print_fp(stdout,&A); printf(" <- low(BA*DC)\n");
+ bn_do_lower(&r,&a,&b,&A,i);
+// BN_print_fp(stdout,&r); printf(" <- low(BA*DC)\n");
+ }
+
+int bn_mul_low(r,a,b,low,i)
+BIGNUM *r,*a,*b,*low;
+int i;
+ {
+ int w;
+ BIGNUM Kh,Km,t1,t2,h,ah,al,bh,bl,l,m,s0,s1;
+
+ BN_init(&Kh); BN_init(&Km); BN_init(&t1); BN_init(&t2); BN_init(&l);
+ BN_init(&ah); BN_init(&al); BN_init(&bh); BN_init(&bl); BN_init(&h);
+ BN_init(&m); BN_init(&s0); BN_init(&s1);
+
+ BN_copy(&al,a); BN_mask_bits(&al,i); BN_rshift(&ah,a,i);
+ BN_copy(&bl,b); BN_mask_bits(&bl,i); BN_rshift(&bh,b,i);
+
+
+ BN_sub(&t1,&al,&ah);
+ BN_sub(&t2,&bh,&bl);
+ BN_mul(&m,&t1,&t2);
+ BN_mul(&h,&ah,&bh);
+
+ BN_copy(&s0,low); BN_mask_bits(&s0,i);
+ BN_rshift(&s1,low,i);
+
+ BN_add(&t1,&h,&m);
+ BN_add(&t1,&t1,&s0);
+
+ BN_copy(&t2,&t1); BN_mask_bits(&t2,i);
+ BN_sub(&t1,&s1,&t2);
+ BN_lshift(&t1,&t1,i);
+ BN_add(&t1,&t1,&s0);
+ if (t1.neg)
+ {
+ BN_lshift(&t2,BN_value_one(),i*2);
+ BN_add(&t1,&t2,&t1);
+ BN_mask_bits(&t1,i*2);
+ }
+
+ BN_free(&Kh); BN_free(&Km); BN_free(&t1); BN_free(&t2);
+ BN_free(&ah); BN_free(&al); BN_free(&bh); BN_free(&bl);
+ }
+
+int BN_mod_mul_montgomery(r,a,b,mont,ctx)
+BIGNUM *r,*a,*b;
+BN_MONT_CTX *mont;
+BN_CTX *ctx;
+ {
+ BIGNUM *tmp;
+
+ tmp= &(ctx->bn[ctx->tos++]);
+
+ if (a == b)
+ {
+ if (!BN_sqr(tmp,a,ctx)) goto err;
+ }
+ else
+ {
+ if (!BN_mul(tmp,a,b)) goto err;
+ }
+ /* reduce from aRR to aR */
+ if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err;
+ ctx->tos--;
+ return(1);
+err:
+ return(0);
+ }
+
+int BN_from_montgomery(r,a,mont,ctx)
+BIGNUM *r;
+BIGNUM *a;
+BN_MONT_CTX *mont;
+BN_CTX *ctx;
+ {
+ BIGNUM z1;
+ BIGNUM *t1,*t2;
+ BN_ULONG *ap,*bp,*rp;
+ int j,i,bl,al;
+
+ BN_init(&z1);
+ t1= &(ctx->bn[ctx->tos]);
+ t2= &(ctx->bn[ctx->tos+1]);
+
+ if (!BN_copy(t1,a)) goto err;
+ /* can cheat */
+ BN_mask_bits(t1,mont->ri);
+ if (!BN_mul(t2,t1,mont->Ni)) goto err;
+ BN_mask_bits(t2,mont->ri);
+
+ if (!BN_mul(t1,t2,mont->N)) goto err;
+ if (!BN_add(t2,t1,a)) goto err;
+
+ /* At this point, t2 has the bottom ri bits set to zero.
+ * This means that the bottom ri bits == the 1^ri minus the bottom
+ * ri bits of a.
+ * This means that only the bits above 'ri' in a need to be added,
+ * and XXXXXXXXXXXXXXXXXXXXXXXX
+ */
+BN_print_fp(stdout,t2); printf("\n");
+ BN_rshift(r,t2,mont->ri);
+
+ if (BN_ucmp(r,mont->N) >= 0)
+ bn_qsub(r,r,mont->N);
+
+ return(1);
+err:
+ return(0);
+ }
+
+int BN_MONT_CTX_set(mont,mod,ctx)
+BN_MONT_CTX *mont;
+BIGNUM *mod;
+BN_CTX *ctx;
+ {
+ BIGNUM *Ri=NULL,*R=NULL;
+
+ if (mont->RR == NULL) mont->RR=BN_new();
+ if (mont->N == NULL) mont->N=BN_new();
+
+ R=mont->RR; /* grab RR as a temp */
+ BN_copy(mont->N,mod); /* Set N */
+
+ mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
+ BN_lshift(R,BN_value_one(),mont->ri); /* R */
+ if ((Ri=BN_mod_inverse(NULL,R,mod,ctx)) == NULL) goto err;/* Ri */
+ BN_lshift(Ri,Ri,mont->ri); /* R*Ri */
+ bn_qsub(Ri,Ri,BN_value_one()); /* R*Ri - 1 */
+ BN_div(Ri,NULL,Ri,mod,ctx);
+ if (mont->Ni != NULL) BN_free(mont->Ni);
+ mont->Ni=Ri; /* Ni=(R*Ri-1)/N */
+
+ /* setup RR for conversions */
+ BN_lshift(mont->RR,BN_value_one(),mont->ri*2);
+ BN_mod(mont->RR,mont->RR,mont->N,ctx);
+
+ return(1);
+err:
+ return(0);
+ }
+
+
diff --git a/crypto/bn/test.c b/crypto/bn/test.c
new file mode 100644
index 0000000000..e23f21583f
--- /dev/null
+++ b/crypto/bn/test.c
@@ -0,0 +1,252 @@
+#include <stdio.h>
+#include "cryptlib.h"
+#include "bn_lcl.h"
+
+#define SIZE 32
+
+#define BN_MONT_CTX_set bn_mcs
+#define BN_from_montgomery bn_fm
+#define BN_mod_mul_montgomery bn_mmm
+#undef BN_to_montgomery
+#define BN_to_montgomery(r,a,mont,ctx) bn_mmm(\
+ r,a,(mont)->RR,(mont),ctx)
+
+main()
+ {
+ BIGNUM prime,a,b,r,A,B,R;
+ BN_MONT_CTX *mont;
+ BN_CTX *ctx;
+ int i;
+
+ ctx=BN_CTX_new();
+ BN_init(&prime);
+ BN_init(&a); BN_init(&b); BN_init(&r);
+ BN_init(&A); BN_init(&B); BN_init(&R);
+
+ BN_generate_prime(&prime,SIZE,0,NULL,NULL,NULL,NULL);
+ BN_rand(&A,SIZE,1,0);
+ BN_rand(&B,SIZE,1,0);
+ BN_mod(&A,&A,&prime,ctx);
+ BN_mod(&B,&B,&prime,ctx);
+
+ i=A.top;
+ BN_mul(&R,&A,&B,ctx);
+ BN_mask_bits(&R,i*BN_BITS2);
+
+
+ BN_print_fp(stdout,&A); printf(" <- a\n");
+ BN_print_fp(stdout,&B); printf(" <- b\n");
+ BN_mul_high(&r,&A,&B,&R,i);
+ BN_print_fp(stdout,&r); printf(" <- high(BA*DC)\n");
+
+ BN_mask_bits(&A,i*32);
+ BN_mask_bits(&B,i*32);
+
+ BN_mul(&R,&A,&B);
+ BN_rshift(&R,&R,i*32);
+ BN_print_fp(stdout,&R); printf(" <- norm BA*DC\n");
+ BN_sub(&R,&R,&r);
+ BN_print_fp(stdout,&R); printf(" <- diff\n");
+ }
+
+#if 0
+int bn_mul_high(r,a,b,low,words)
+BIGNUM *r,*a,*b,*low;
+int words;
+ {
+ int i;
+ BIGNUM t1,t2,t3,h,ah,al,bh,bl,m,s0,s1;
+
+ BN_init(&al); BN_init(&ah);
+ BN_init(&bl); BN_init(&bh);
+ BN_init(&t1); BN_init(&t2); BN_init(&t3);
+ BN_init(&s0); BN_init(&s1);
+ BN_init(&h); BN_init(&m);
+
+ i=a->top;
+ if (i >= words)
+ {
+ al.top=words;
+ ah.top=a->top-words;
+ ah.d= &(a->d[ah.top]);
+ }
+ else
+ al.top=i;
+ al.d=a->d;
+
+ i=b->top;
+ if (i >= words)
+ {
+ bl.top=words;
+ bh.top=i-words;
+ bh.d= &(b->d[bh.top]);
+ }
+ else
+ bl.top=i;
+ bl.d=b->d;
+
+ i=low->top;
+ if (i >= words)
+ {
+ s0.top=words;
+ s1.top=i-words;
+ s1.d= &(low->d[s1.top]);
+ }
+ else
+ s0.top=i;
+ s0.d=low->d;
+
+al.max=al.top; ah.max=ah.top;
+bl.max=bl.top; bh.max=bh.top;
+s0.max=bl.top; s1.max=bh.top;
+
+ /* Calculate (al-ah)*(bh-bl) */
+ BN_sub(&t1,&al,&ah);
+ BN_sub(&t2,&bh,&bl);
+ BN_mul(&m,&t1,&t2);
+
+ /* Calculate ah*bh */
+ BN_mul(&h,&ah,&bh);
+
+ /* s0 == low(al*bl)
+ * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
+ * We know s0 and s1 so the only unknown is high(al*bl)
+ * high(al*bl) == s1 - low(ah*bh+(al-ah)*(bh-bl)+s0)
+ */
+ BN_add(&m,&m,&h);
+ BN_add(&t2,&m,&s0);
+ /* Quick and dirty mask off of high words */
+ t3.d=t2.d;
+ t3.top=(t2.top > words)?words:t2.top;
+ t3.neg=t2.neg;
+t3.max=t3.top;
+// BN_print_fp(stdout,&s1); printf(" s1\n");
+// BN_print_fp(stdout,&t2); printf(" middle value\n");
+// BN_print_fp(stdout,&t3); printf(" low middle value\n");
+ BN_sub(&t1,&s1,&t3);
+
+ if (t1.neg)
+ {
+//printf("neg fixup\n"); //BN_print_fp(stdout,&t1); printf(" before\n");
+ BN_lshift(&t2,BN_value_one(),words*32);
+ BN_add(&t1,&t2,&t1);
+ BN_mask_bits(&t1,words*32);
+// BN_print_fp(stdout,&t1); printf(" after\n");
+ }
+ /* al*bl == high(al*bl)<<words+s0 */
+ BN_lshift(&t1,&t1,words*32);
+ BN_add(&t1,&t1,&s0);
+
+ /* We now have
+ * al*bl - t1
+ * (al-ah)*(bh-bl)+ah*bh - m
+ * ah*bh - h
+ */
+ BN_copy(r,&t1);
+ BN_mask_bits(r,words*32*2);
+
+ /*BN_lshift(&m,&m,words*/
+
+ BN_free(&t1); BN_free(&t2);
+ BN_free(&m); BN_free(&h);
+ }
+
+int BN_mod_mul_montgomery(r,a,b,mont,ctx)
+BIGNUM *r,*a,*b;
+BN_MONT_CTX *mont;
+BN_CTX *ctx;
+ {
+ BIGNUM *tmp;
+
+ tmp= &(ctx->bn[ctx->tos++]);
+
+ if (a == b)
+ {
+ if (!BN_sqr(tmp,a,ctx)) goto err;
+ }
+ else
+ {
+ if (!BN_mul(tmp,a,b)) goto err;
+ }
+ /* reduce from aRR to aR */
+ if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err;
+ ctx->tos--;
+ return(1);
+err:
+ return(0);
+ }
+
+int BN_from_montgomery(r,a,mont,ctx)
+BIGNUM *r;
+BIGNUM *a;
+BN_MONT_CTX *mont;
+BN_CTX *ctx;
+ {
+ BIGNUM z1;
+ BIGNUM *t1,*t2;
+ BN_ULONG *ap,*bp,*rp;
+ int j,i,bl,al;
+
+ BN_init(&z1);
+ t1= &(ctx->bn[ctx->tos]);
+ t2= &(ctx->bn[ctx->tos+1]);
+
+ if (!BN_copy(t1,a)) goto err;
+ /* can cheat */
+ BN_mask_bits(t1,mont->ri);
+ if (!BN_mul(t2,t1,mont->Ni)) goto err;
+ BN_mask_bits(t2,mont->ri);
+
+ if (!BN_mul(t1,t2,mont->N)) goto err;
+ if (!BN_add(t2,t1,a)) goto err;
+
+ /* At this point, t2 has the bottom ri bits set to zero.
+ * This means that the bottom ri bits == the 1^ri minus the bottom
+ * ri bits of a.
+ * This means that only the bits above 'ri' in a need to be added,
+ * and XXXXXXXXXXXXXXXXXXXXXXXX
+ */
+BN_print_fp(stdout,t2); printf("\n");
+ BN_rshift(r,t2,mont->ri);
+
+ if (BN_ucmp(r,mont->N) >= 0)
+ BN_usub(r,r,mont->N);
+
+ return(1);
+err:
+ return(0);
+ }
+
+int BN_MONT_CTX_set(mont,mod,ctx)
+BN_MONT_CTX *mont;
+BIGNUM *mod;
+BN_CTX *ctx;
+ {
+ BIGNUM *Ri=NULL,*R=NULL;
+
+ if (mont->RR == NULL) mont->RR=BN_new();
+ if (mont->N == NULL) mont->N=BN_new();
+
+ R=mont->RR; /* grab RR as a temp */
+ BN_copy(mont->N,mod); /* Set N */
+
+ mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
+ BN_lshift(R,BN_value_one(),mont->ri); /* R */
+ if ((Ri=BN_mod_inverse(NULL,R,mod,ctx)) == NULL) goto err;/* Ri */
+ BN_lshift(Ri,Ri,mont->ri); /* R*Ri */
+ BN_usub(Ri,Ri,BN_value_one()); /* R*Ri - 1 */
+ BN_div(Ri,NULL,Ri,mod,ctx);
+ if (mont->Ni != NULL) BN_free(mont->Ni);
+ mont->Ni=Ri; /* Ni=(R*Ri-1)/N */
+
+ /* setup RR for conversions */
+ BN_lshift(mont->RR,BN_value_one(),mont->ri*2);
+ BN_mod(mont->RR,mont->RR,mont->N,ctx);
+
+ return(1);
+err:
+ return(0);
+ }
+
+
+#endif
diff --git a/crypto/bn/todo b/crypto/bn/todo
new file mode 100644
index 0000000000..e47e381aea
--- /dev/null
+++ b/crypto/bn/todo
@@ -0,0 +1,3 @@
+Cache RECP_CTX values
+make the result argument independant of the inputs.
+split up the _exp_ functions