aboutsummaryrefslogtreecommitdiffstats
path: root/crypto/bn/asm
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/bn/asm')
-rw-r--r--crypto/bn/asm/README30
-rw-r--r--crypto/bn/asm/alpha.s310
-rw-r--r--crypto/bn/asm/pa-risc.s710
-rw-r--r--crypto/bn/asm/pa-risc2.s416
-rw-r--r--crypto/bn/asm/r3000.s646
-rw-r--r--crypto/bn/asm/sparc.s359
-rw-r--r--crypto/bn/asm/x86-bsdi.s272
-rw-r--r--crypto/bn/asm/x86-lnx.s282
-rw-r--r--crypto/bn/asm/x86-lnxa.s282
-rw-r--r--crypto/bn/asm/x86-sol.s224
-rw-r--r--crypto/bn/asm/x86nt32.asm288
-rw-r--r--crypto/bn/asm/x86nt32.uu22
-rw-r--r--crypto/bn/asm/x86w16.asm297
-rw-r--r--crypto/bn/asm/x86w16.uu20
-rw-r--r--crypto/bn/asm/x86w32.asm303
-rw-r--r--crypto/bn/asm/x86w32.uu23
16 files changed, 4484 insertions, 0 deletions
diff --git a/crypto/bn/asm/README b/crypto/bn/asm/README
new file mode 100644
index 0000000000..d93fbff77f
--- /dev/null
+++ b/crypto/bn/asm/README
@@ -0,0 +1,30 @@
+All assember in this directory are just version of the file
+crypto/bn/bn_mulw.c.
+
+Quite a few of these files are just the assember output from gcc since on
+quite a few machines they are 2 times faster than the system compiler.
+
+For the x86, I have hand written assember because of the bad job all
+compilers seem to do on it. This normally gives a 2 time speed up in the RSA
+routines.
+
+For the DEC alpha, I also hand wrote the assember (except the division which
+is just the output from the C compiler pasted on the end of the file).
+On the 2 alpha C compilers I had access to, it was not possible to do
+64b x 64b -> 128b calculations (both long and the long long data types
+were 64 bits). So the hand assember gives access to the 128 bit result and
+a 2 times speedup :-).
+
+The x86xxxx.obj files are the assembled version of x86xxxx.asm files.
+I had such a hard time finding a macro assember for Microsoft, I decided to
+include the object file to save others the hassle :-).
+
+I have also included uu encoded versions of the .obj incase they get
+trashed.
+
+There are 2 versions of assember for the HP PA-RISC.
+pa-risc.s is the origional one which works fine.
+pa-risc2.s is a new version that often generates warnings but if the
+tests pass, it gives performance that is over 2 times faster than
+pa-risc.s.
+Both were generated using gcc :-)
diff --git a/crypto/bn/asm/alpha.s b/crypto/bn/asm/alpha.s
new file mode 100644
index 0000000000..d56f715ecd
--- /dev/null
+++ b/crypto/bn/asm/alpha.s
@@ -0,0 +1,310 @@
+ # DEC Alpha assember
+ # The bn_div64 is actually gcc output but the other parts are hand done.
+ # Thanks to tzeruch@ceddec.com for sending me the gcc output for
+ # bn_div64.
+ .file 1 "bn_mulw.c"
+ .version "01.01"
+ .set noat
+gcc2_compiled.:
+__gnu_compiled_c:
+ .text
+ .align 3
+ .globl bn_mul_add_word
+ .ent bn_mul_add_word
+bn_mul_add_word:
+bn_mul_add_word..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+ subq $18,2,$25 # num=-2
+ bis $31,$31,$0
+ blt $25,$42
+ .align 5
+$142:
+ subq $18,2,$18 # num-=2
+ subq $25,2,$25 # num-=2
+
+ ldq $1,0($17) # a[0]
+ ldq $2,8($17) # a[1]
+
+ mulq $19,$1,$3 # a[0]*w low part r3
+ umulh $19,$1,$1 # a[0]*w high part r1
+ mulq $19,$2,$4 # a[1]*w low part r4
+ umulh $19,$2,$2 # a[1]*w high part r2
+
+ ldq $22,0($16) # r[0] r22
+ ldq $23,8($16) # r[1] r23
+
+ addq $3,$22,$3 # a0 low part + r[0]
+ addq $4,$23,$4 # a1 low part + r[1]
+ cmpult $3,$22,$5 # overflow?
+ cmpult $4,$23,$6 # overflow?
+ addq $5,$1,$1 # high part + overflow
+ addq $6,$2,$2 # high part + overflow
+
+ addq $3,$0,$3 # add c
+ cmpult $3,$0,$5 # overflow?
+ stq $3,0($16)
+ addq $5,$1,$0 # c=high part + overflow
+
+ addq $4,$0,$4 # add c
+ cmpult $4,$0,$5 # overflow?
+ stq $4,8($16)
+ addq $5,$2,$0 # c=high part + overflow
+
+ ble $18,$43
+
+ addq $16,16,$16
+ addq $17,16,$17
+ blt $25,$42
+
+ br $31,$142
+$42:
+ ldq $1,0($17) # a[0]
+ umulh $19,$1,$3 # a[0]*w high part
+ mulq $19,$1,$1 # a[0]*w low part
+ ldq $2,0($16) # r[0]
+ addq $1,$2,$1 # low part + r[0]
+ cmpult $1,$2,$4 # overflow?
+ addq $4,$3,$3 # high part + overflow
+ addq $1,$0,$1 # add c
+ cmpult $1,$0,$4 # overflow?
+ addq $4,$3,$0 # c=high part + overflow
+ stq $1,0($16)
+
+ .align 4
+$43:
+ ret $31,($26),1
+ .end bn_mul_add_word
+ .align 3
+ .globl bn_mul_word
+ .ent bn_mul_word
+bn_mul_word:
+bn_mul_word..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+ subq $18,2,$25 # num=-2
+ bis $31,$31,$0
+ blt $25,$242
+ .align 5
+$342:
+ subq $18,2,$18 # num-=2
+ subq $25,2,$25 # num-=2
+
+ ldq $1,0($17) # a[0]
+ ldq $2,8($17) # a[1]
+
+ mulq $19,$1,$3 # a[0]*w low part r3
+ umulh $19,$1,$1 # a[0]*w high part r1
+ mulq $19,$2,$4 # a[1]*w low part r4
+ umulh $19,$2,$2 # a[1]*w high part r2
+
+ addq $3,$0,$3 # add c
+ cmpult $3,$0,$5 # overflow?
+ stq $3,0($16)
+ addq $5,$1,$0 # c=high part + overflow
+
+ addq $4,$0,$4 # add c
+ cmpult $4,$0,$5 # overflow?
+ stq $4,8($16)
+ addq $5,$2,$0 # c=high part + overflow
+
+ ble $18,$243
+
+ addq $16,16,$16
+ addq $17,16,$17
+ blt $25,$242
+
+ br $31,$342
+$242:
+ ldq $1,0($17) # a[0]
+ umulh $19,$1,$3 # a[0]*w high part
+ mulq $19,$1,$1 # a[0]*w low part
+ addq $1,$0,$1 # add c
+ cmpult $1,$0,$4 # overflow?
+ addq $4,$3,$0 # c=high part + overflow
+ stq $1,0($16)
+$243:
+ ret $31,($26),1
+ .end bn_mul_word
+ .align 3
+ .globl bn_sqr_words
+ .ent bn_sqr_words
+bn_sqr_words:
+bn_sqr_words..ng:
+ .frame $30,0,$26,0
+ .prologue 0
+
+ subq $18,2,$25 # num=-2
+ blt $25,$442
+ .align 5
+$542:
+ subq $18,2,$18 # num-=2
+ subq $25,2,$25 # num-=2
+
+ ldq $1,0($17) # a[0]
+ ldq $4,8($17) # a[1]
+
+ mulq $1,$1,$2 # a[0]*w low part r2
+ umulh $1,$1,$3 # a[0]*w high part r3
+ mulq $4,$4,$5 # a[1]*w low part r5
+ umulh $4,$4,$6 # a[1]*w high part r6
+
+ stq $2,0($16) # r[0]
+ stq $3,8($16) # r[1]
+ stq $5,16($16) # r[3]
+ stq $6,24($16) # r[4]
+
+ ble $18,$443
+
+ addq $16,32,$16
+ addq $17,16,$17
+ blt $25,$442
+ br $31,$542
+
+$442:
+ ldq $1,0($17) # a[0]
+ mulq $1,$1,$2 # a[0]*w low part r2
+ umulh $1,$1,$3 # a[0]*w high part r3
+ stq $2,0($16) # r[0]
+ stq $3,8($16) # r[1]
+
+ .align 4
+$443:
+ ret $31,($26),1
+ .end bn_sqr_words
+
+ #
+ # What follows was taken directly from the C compiler with a few
+ # hacks to redo the lables.
+ #
+.text
+ .align 3
+ .globl bn_div64
+ .ent bn_div64
+bn_div64:
+ ldgp $29,0($27)
+bn_div64..ng:
+ lda $30,-48($30)
+ .frame $30,48,$26,0
+ stq $26,0($30)
+ stq $9,8($30)
+ stq $10,16($30)
+ stq $11,24($30)
+ stq $12,32($30)
+ stq $13,40($30)
+ .mask 0x4003e00,-48
+ .prologue 1
+ bis $16,$16,$9
+ bis $17,$17,$10
+ bis $18,$18,$11
+ bis $31,$31,$13
+ bis $31,2,$12
+ bne $11,$119
+ lda $0,-1
+ br $31,$136
+ .align 4
+$119:
+ bis $11,$11,$16
+ jsr $26,BN_num_bits_word
+ ldgp $29,0($26)
+ subq $0,64,$1
+ beq $1,$120
+ bis $31,1,$1
+ sll $1,$0,$1
+ cmpule $9,$1,$1
+ bne $1,$120
+ # lda $16,_IO_stderr_
+ # lda $17,$C32
+ # bis $0,$0,$18
+ # jsr $26,fprintf
+ # ldgp $29,0($26)
+ jsr $26,abort
+ ldgp $29,0($26)
+ .align 4
+$120:
+ bis $31,64,$3
+ cmpult $9,$11,$2
+ subq $3,$0,$1
+ addl $1,$31,$0
+ subq $9,$11,$1
+ cmoveq $2,$1,$9
+ beq $0,$122
+ zapnot $0,15,$2
+ subq $3,$0,$1
+ sll $11,$2,$11
+ sll $9,$2,$3
+ srl $10,$1,$1
+ sll $10,$2,$10
+ bis $3,$1,$9
+$122:
+ srl $11,32,$5
+ zapnot $11,15,$6
+ lda $7,-1
+ .align 5
+$123:
+ srl $9,32,$1
+ subq $1,$5,$1
+ bne $1,$126
+ zapnot $7,15,$27
+ br $31,$127
+ .align 4
+$126:
+ bis $9,$9,$24
+ bis $5,$5,$25
+ divqu $24,$25,$27
+$127:
+ srl $10,32,$4
+ .align 5
+$128:
+ mulq $27,$5,$1
+ subq $9,$1,$3
+ zapnot $3,240,$1
+ bne $1,$129
+ mulq $6,$27,$2
+ sll $3,32,$1
+ addq $1,$4,$1
+ cmpule $2,$1,$2
+ bne $2,$129
+ subq $27,1,$27
+ br $31,$128
+ .align 4
+$129:
+ mulq $27,$6,$1
+ mulq $27,$5,$4
+ srl $1,32,$3
+ sll $1,32,$1
+ addq $4,$3,$4
+ cmpult $10,$1,$2
+ subq $10,$1,$10
+ addq $2,$4,$2
+ cmpult $9,$2,$1
+ bis $2,$2,$4
+ beq $1,$134
+ addq $9,$11,$9
+ subq $27,1,$27
+$134:
+ subl $12,1,$12
+ subq $9,$4,$9
+ beq $12,$124
+ sll $27,32,$13
+ sll $9,32,$2
+ srl $10,32,$1
+ sll $10,32,$10
+ bis $2,$1,$9
+ br $31,$123
+ .align 4
+$124:
+ bis $13,$27,$0
+$136:
+ ldq $26,0($30)
+ ldq $9,8($30)
+ ldq $10,16($30)
+ ldq $11,24($30)
+ ldq $12,32($30)
+ ldq $13,40($30)
+ addq $30,48,$30
+ ret $31,($26),1
+ .end bn_div64
+ .ident "GCC: (GNU) 2.7.2.1"
+
+
diff --git a/crypto/bn/asm/pa-risc.s b/crypto/bn/asm/pa-risc.s
new file mode 100644
index 0000000000..c49c433a83
--- /dev/null
+++ b/crypto/bn/asm/pa-risc.s
@@ -0,0 +1,710 @@
+ .SPACE $PRIVATE$
+ .SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31
+ .SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82
+ .SPACE $TEXT$
+ .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44
+ .SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY
+ .IMPORT $global$,DATA
+ .IMPORT $$dyncall,MILLICODE
+; gcc_compiled.:
+ .SPACE $TEXT$
+ .SUBSPA $CODE$
+
+ .align 4
+ .EXPORT bn_mul_add_word,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR
+bn_mul_add_word
+ .PROC
+ .CALLINFO FRAME=0,CALLS,SAVE_RP
+ .ENTRY
+ stw %r2,-20(0,%r30)
+ ldi 0,%r28
+ extru %r23,31,16,%r2
+ stw %r2,-16(0,%r30)
+ extru %r23,15,16,%r23
+ ldil L'65536,%r31
+ fldws -16(0,%r30),%fr11R
+ stw %r23,-16(0,%r30)
+ ldo 12(%r25),%r29
+ ldo 12(%r26),%r23
+ fldws -16(0,%r30),%fr11L
+L$0002
+ ldw 0(0,%r25),%r19
+ extru %r19,31,16,%r20
+ stw %r20,-16(0,%r30)
+ extru %r19,15,16,%r19
+ fldws -16(0,%r30),%fr22L
+ stw %r19,-16(0,%r30)
+ xmpyu %fr22L,%fr11R,%fr8
+ fldws -16(0,%r30),%fr22L
+ fstws %fr8R,-16(0,%r30)
+ xmpyu %fr11R,%fr22L,%fr10
+ ldw -16(0,%r30),%r2
+ stw %r20,-16(0,%r30)
+ xmpyu %fr22L,%fr11L,%fr9
+ fldws -16(0,%r30),%fr22L
+ fstws %fr10R,-16(0,%r30)
+ copy %r2,%r22
+ ldw -16(0,%r30),%r2
+ fstws %fr9R,-16(0,%r30)
+ xmpyu %fr11L,%fr22L,%fr8
+ copy %r2,%r19
+ ldw -16(0,%r30),%r2
+ fstws %fr8R,-16(0,%r30)
+ copy %r2,%r20
+ ldw -16(0,%r30),%r2
+ addl %r2,%r19,%r21
+ comclr,<<= %r19,%r21,0
+ addl %r20,%r31,%r20
+L$0005
+ extru %r21,15,16,%r19
+ addl %r20,%r19,%r20
+ zdep %r21,15,16,%r19
+ addl %r22,%r19,%r22
+ comclr,<<= %r19,%r22,0
+ addi,tr 1,%r20,%r19
+ copy %r20,%r19
+ addl %r22,%r28,%r20
+ comclr,<<= %r28,%r20,0
+ addi 1,%r19,%r19
+ ldw 0(0,%r26),%r28
+ addl %r20,%r28,%r20
+ comclr,<<= %r28,%r20,0
+ addi,tr 1,%r19,%r28
+ copy %r19,%r28
+ addib,= -1,%r24,L$0003
+ stw %r20,0(0,%r26)
+ ldw -8(0,%r29),%r19
+ extru %r19,31,16,%r20
+ stw %r20,-16(0,%r30)
+ extru %r19,15,16,%r19
+ fldws -16(0,%r30),%fr22L
+ stw %r19,-16(0,%r30)
+ xmpyu %fr22L,%fr11R,%fr8
+ fldws -16(0,%r30),%fr22L
+ fstws %fr8R,-16(0,%r30)
+ xmpyu %fr11R,%fr22L,%fr10
+ ldw -16(0,%r30),%r2
+ stw %r20,-16(0,%r30)
+ xmpyu %fr22L,%fr11L,%fr9
+ fldws -16(0,%r30),%fr22L
+ fstws %fr10R,-16(0,%r30)
+ copy %r2,%r22
+ ldw -16(0,%r30),%r2
+ fstws %fr9R,-16(0,%r30)
+ xmpyu %fr11L,%fr22L,%fr8
+ copy %r2,%r19
+ ldw -16(0,%r30),%r2
+ fstws %fr8R,-16(0,%r30)
+ copy %r2,%r20
+ ldw -16(0,%r30),%r2
+ addl %r2,%r19,%r21
+ comclr,<<= %r19,%r21,0
+ addl %r20,%r31,%r20
+L$0010
+ extru %r21,15,16,%r19
+ addl %r20,%r19,%r20
+ zdep %r21,15,16,%r19
+ addl %r22,%r19,%r22
+ comclr,<<= %r19,%r22,0
+ addi,tr 1,%r20,%r19
+ copy %r20,%r19
+ addl %r22,%r28,%r20
+ comclr,<<= %r28,%r20,0
+ addi 1,%r19,%r19
+ ldw -8(0,%r23),%r28
+ addl %r20,%r28,%r20
+ comclr,<<= %r28,%r20,0
+ addi,tr 1,%r19,%r28
+ copy %r19,%r28
+ addib,= -1,%r24,L$0003
+ stw %r20,-8(0,%r23)
+ ldw -4(0,%r29),%r19
+ extru %r19,31,16,%r20
+ stw %r20,-16(0,%r30)
+ extru %r19,15,16,%r19
+ fldws -16(0,%r30),%fr22L
+ stw %r19,-16(0,%r30)
+ xmpyu %fr22L,%fr11R,%fr8
+ fldws -16(0,%r30),%fr22L
+ fstws %fr8R,-16(0,%r30)
+ xmpyu %fr11R,%fr22L,%fr10
+ ldw -16(0,%r30),%r2
+ stw %r20,-16(0,%r30)
+ xmpyu %fr22L,%fr11L,%fr9
+ fldws -16(0,%r30),%fr22L
+ fstws %fr10R,-16(0,%r30)
+ copy %r2,%r22
+ ldw -16(0,%r30),%r2
+ fstws %fr9R,-16(0,%r30)
+ xmpyu %fr11L,%fr22L,%fr8
+ copy %r2,%r19
+ ldw -16(0,%r30),%r2
+ fstws %fr8R,-16(0,%r30)
+ copy %r2,%r20
+ ldw -16(0,%r30),%r2
+ addl %r2,%r19,%r21
+ comclr,<<= %r19,%r21,0
+ addl %r20,%r31,%r20
+L$0015
+ extru %r21,15,16,%r19
+ addl %r20,%r19,%r20
+ zdep %r21,15,16,%r19
+ addl %r22,%r19,%r22
+ comclr,<<= %r19,%r22,0
+ addi,tr 1,%r20,%r19
+ copy %r20,%r19
+ addl %r22,%r28,%r20
+ comclr,<<= %r28,%r20,0
+ addi 1,%r19,%r19
+ ldw -4(0,%r23),%r28
+ addl %r20,%r28,%r20
+ comclr,<<= %r28,%r20,0
+ addi,tr 1,%r19,%r28
+ copy %r19,%r28
+ addib,= -1,%r24,L$0003
+ stw %r20,-4(0,%r23)
+ ldw 0(0,%r29),%r19
+ extru %r19,31,16,%r20
+ stw %r20,-16(0,%r30)
+ extru %r19,15,16,%r19
+ fldws -16(0,%r30),%fr22L
+ stw %r19,-16(0,%r30)
+ xmpyu %fr22L,%fr11R,%fr8
+ fldws -16(0,%r30),%fr22L
+ fstws %fr8R,-16(0,%r30)
+ xmpyu %fr11R,%fr22L,%fr10
+ ldw -16(0,%r30),%r2
+ stw %r20,-16(0,%r30)
+ xmpyu %fr22L,%fr11L,%fr9
+ fldws -16(0,%r30),%fr22L
+ fstws %fr10R,-16(0,%r30)
+ copy %r2,%r22
+ ldw -16(0,%r30),%r2
+ fstws %fr9R,-16(0,%r30)
+ xmpyu %fr11L,%fr22L,%fr8
+ copy %r2,%r19
+ ldw -16(0,%r30),%r2
+ fstws %fr8R,-16(0,%r30)
+ copy %r2,%r20
+ ldw -16(0,%r30),%r2
+ addl %r2,%r19,%r21
+ comclr,<<= %r19,%r21,0
+ addl %r20,%r31,%r20
+L$0020
+ extru %r21,15,16,%r19
+ addl %r20,%r19,%r20
+ zdep %r21,15,16,%r19
+ addl %r22,%r19,%r22
+ comclr,<<= %r19,%r22,0
+ addi,tr 1,%r20,%r19
+ copy %r20,%r19
+ addl %r22,%r28,%r20
+ comclr,<<= %r28,%r20,0
+ addi 1,%r19,%r19
+ ldw 0(0,%r23),%r28
+ addl %r20,%r28,%r20
+ comclr,<<= %r28,%r20,0
+ addi,tr 1,%r19,%r28
+ copy %r19,%r28
+ addib,= -1,%r24,L$0003
+ stw %r20,0(0,%r23)
+ ldo 16(%r29),%r29
+ ldo 16(%r25),%r25
+ ldo 16(%r23),%r23
+ bl L$0002,0
+ ldo 16(%r26),%r26
+L$0003
+ ldw -20(0,%r30),%r2
+ bv,n 0(%r2)
+ .EXIT
+ .PROCEND
+ .align 4
+ .EXPORT bn_mul_word,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR
+bn_mul_word
+ .PROC
+ .CALLINFO FRAME=0,CALLS,SAVE_RP
+ .ENTRY
+ stw %r2,-20(0,%r30)
+ ldi 0,%r28
+ extru %r23,31,16,%r2
+ stw %r2,-16(0,%r30)
+ extru %r23,15,16,%r23
+ ldil L'65536,%r31
+ fldws -16(0,%r30),%fr11R
+ stw %r23,-16(0,%r30)
+ ldo 12(%r26),%r29
+ ldo 12(%r25),%r23
+ fldws -16(0,%r30),%fr11L
+L$0026
+ ldw 0(0,%r25),%r19
+ extru %r19,31,16,%r20
+ stw %r20,-16(0,%r30)
+ extru %r19,15,16,%r19
+ fldws -16(0,%r30),%fr22L
+ stw %r19,-16(0,%r30)
+ xmpyu %fr22L,%fr11R,%fr8
+ fldws -16(0,%r30),%fr22L
+ fstws %fr8R,-16(0,%r30)
+ xmpyu %fr11R,%fr22L,%fr10
+ ldw -16(0,%r30),%r2
+ stw %r20,-16(0,%r30)
+ xmpyu %fr22L,%fr11L,%fr9
+ fldws -16(0,%r30),%fr22L
+ fstws %fr10R,-16(0,%r30)
+ copy %r2,%r22
+ ldw -16(0,%r30),%r2
+ fstws %fr9R,-16(0,%r30)
+ xmpyu %fr11L,%fr22L,%fr8
+ copy %r2,%r19
+ ldw -16(0,%r30),%r2
+ fstws %fr8R,-16(0,%r30)
+ copy %r2,%r20
+ ldw -16(0,%r30),%r2
+ addl %r2,%r19,%r21
+ comclr,<<= %r19,%r21,0
+ addl %r20,%r31,%r20
+L$0029
+ extru %r21,15,16,%r19
+ addl %r20,%r19,%r20
+ zdep %r21,15,16,%r19
+ addl %r22,%r19,%r22
+ comclr,<<= %r19,%r22,0
+ addi,tr 1,%r20,%r19
+ copy %r20,%r19
+ addl %r22,%r28,%r20
+ comclr,<<= %r28,%r20,0
+ addi,tr 1,%r19,%r28
+ copy %r19,%r28
+ addib,= -1,%r24,L$0027
+ stw %r20,0(0,%r26)
+ ldw -8(0,%r23),%r19
+ extru %r19,31,16,%r20
+ stw %r20,-16(0,%r30)
+ extru %r19,15,16,%r19
+ fldws -16(0,%r30),%fr22L
+ stw %r19,-16(0,%r30)
+ xmpyu %fr22L,%fr11R,%fr8
+ fldws -16(0,%r30),%fr22L
+ fstws %fr8R,-16(0,%r30)
+ xmpyu %fr11R,%fr22L,%fr10
+ ldw -16(0,%r30),%r2
+ stw %r20,-16(0,%r30)
+ xmpyu %fr22L,%fr11L,%fr9
+ fldws -16(0,%r30),%fr22L
+ fstws %fr10R,-16(0,%r30)
+ copy %r2,%r22
+ ldw -16(0,%r30),%r2
+ fstws %fr9R,-16(0,%r30)
+ xmpyu %fr11L,%fr22L,%fr8
+ copy %r2,%r19
+ ldw -16(0,%r30),%r2
+ fstws %fr8R,-16(0,%r30)
+ copy %r2,%r20
+ ldw -16(0,%r30),%r2
+ addl %r2,%r19,%r21
+ comclr,<<= %r19,%r21,0
+ addl %r20,%r31,%r20
+L$0033
+ extru %r21,15,16,%r19
+ addl %r20,%r19,%r20
+ zdep %r21,15,16,%r19
+ addl %r22,%r19,%r22
+ comclr,<<= %r19,%r22,0
+ addi,tr 1,%r20,%r19
+ copy %r20,%r19
+ addl %r22,%r28,%r20
+ comclr,<<= %r28,%r20,0
+ addi,tr 1,%r19,%r28
+ copy %r19,%r28
+ addib,= -1,%r24,L$0027
+ stw %r20,-8(0,%r29)
+ ldw -4(0,%r23),%r19
+ extru %r19,31,16,%r20
+ stw %r20,-16(0,%r30)
+ extru %r19,15,16,%r19
+ fldws -16(0,%r30),%fr22L
+ stw %r19,-16(0,%r30)
+ xmpyu %fr22L,%fr11R,%fr8
+ fldws -16(0,%r30),%fr22L
+ fstws %fr8R,-16(0,%r30)
+ xmpyu %fr11R,%fr22L,%fr10
+ ldw -16(0,%r30),%r2
+ stw %r20,-16(0,%r30)
+ xmpyu %fr22L,%fr11L,%fr9
+ fldws -16(0,%r30),%fr22L
+ fstws %fr10R,-16(0,%r30)
+ copy %r2,%r22
+ ldw -16(0,%r30),%r2
+ fstws %fr9R,-16(0,%r30)
+ xmpyu %fr11L,%fr22L,%fr8
+ copy %r2,%r19
+ ldw -16(0,%r30),%r2
+ fstws %fr8R,-16(0,%r30)
+ copy %r2,%r20
+ ldw -16(0,%r30),%r2
+ addl %r2,%r19,%r21
+ comclr,<<= %r19,%r21,0
+ addl %r20,%r31,%r20
+L$0037
+ extru %r21,15,16,%r19
+ addl %r20,%r19,%r20
+ zdep %r21,15,16,%r19
+ addl %r22,%r19,%r22
+ comclr,<<= %r19,%r22,0
+ addi,tr 1,%r20,%r19
+ copy %r20,%r19
+ addl %r22,%r28,%r20
+ comclr,<<= %r28,%r20,0
+ addi,tr 1,%r19,%r28
+ copy %r19,%r28
+ addib,= -1,%r24,L$0027
+ stw %r20,-4(0,%r29)
+ ldw 0(0,%r23),%r19
+ extru %r19,31,16,%r20
+ stw %r20,-16(0,%r30)
+ extru %r19,15,16,%r19
+ fldws -16(0,%r30),%fr22L
+ stw %r19,-16(0,%r30)
+ xmpyu %fr22L,%fr11R,%fr8
+ fldws -16(0,%r30),%fr22L
+ fstws %fr8R,-16(0,%r30)
+ xmpyu %fr11R,%fr22L,%fr10
+ ldw -16(0,%r30),%r2
+ stw %r20,-16(0,%r30)
+ xmpyu %fr22L,%fr11L,%fr9
+ fldws -16(0,%r30),%fr22L
+ fstws %fr10R,-16(0,%r30)
+ copy %r2,%r22
+ ldw -16(0,%r30),%r2
+ fstws %fr9R,-16(0,%r30)
+ xmpyu %fr11L,%fr22L,%fr8
+ copy %r2,%r19
+ ldw -16(0,%r30),%r2
+ fstws %fr8R,-16(0,%r30)
+ copy %r2,%r20
+ ldw -16(0,%r30),%r2
+ addl %r2,%r19,%r21
+ comclr,<<= %r19,%r21,0
+ addl %r20,%r31,%r20
+L$0041
+ extru %r21,15,16,%r19
+ addl %r20,%r19,%r20
+ zdep %r21,15,16,%r19
+ addl %r22,%r19,%r22
+ comclr,<<= %r19,%r22,0
+ addi,tr 1,%r20,%r19
+ copy %r20,%r19
+ addl %r22,%r28,%r20
+ comclr,<<= %r28,%r20,0
+ addi,tr 1,%r19,%r28
+ copy %r19,%r28
+ addib,= -1,%r24,L$0027
+ stw %r20,0(0,%r29)
+ ldo 16(%r23),%r23
+ ldo 16(%r25),%r25
+ ldo 16(%r29),%r29
+ bl L$0026,0
+ ldo 16(%r26),%r26
+L$0027
+ ldw -20(0,%r30),%r2
+ bv,n 0(%r2)
+ .EXIT
+ .PROCEND
+ .align 4
+ .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR
+bn_sqr_words
+ .PROC
+ .CALLINFO FRAME=0,NO_CALLS
+ .ENTRY
+ ldo 28(%r26),%r23
+ ldo 12(%r25),%r28
+L$0046
+ ldw 0(0,%r25),%r21
+ extru %r21,31,16,%r22
+ stw %r22,-16(0,%r30)
+ extru %r21,15,16,%r21
+ fldws -16(0,%r30),%fr10L
+ stw %r21,-16(0,%r30)
+ fldws -16(0,%r30),%fr10R
+ xmpyu %fr10L,%fr10R,%fr8
+ fstws %fr8R,-16(0,%r30)
+ ldw -16(0,%r30),%r29
+ stw %r22,-16(0,%r30)
+ fldws -16(0,%r30),%fr10R
+ stw %r21,-16(0,%r30)
+ copy %r29,%r19
+ xmpyu %fr10L,%fr10R,%fr8
+ fldws -16(0,%r30),%fr10L
+ stw %r21,-16(0,%r30)
+ fldws -16(0,%r30),%fr10R
+ fstws %fr8R,-16(0,%r30)
+ extru %r19,16,17,%r20
+ zdep %r19,14,15,%r19
+ ldw -16(0,%r30),%r29
+ xmpyu %fr10L,%fr10R,%fr9
+ addl %r29,%r19,%r22
+ stw %r22,0(0,%r26)
+ fstws %fr9R,-16(0,%r30)
+ ldw -16(0,%r30),%r29
+ addl %r29,%r20,%r21
+ comclr,<<= %r19,%r22,0
+ addi 1,%r21,%r21
+ addib,= -1,%r24,L$0057
+ stw %r21,-24(0,%r23)
+ ldw -8(0,%r28),%r21
+ extru %r21,31,16,%r22
+ stw %r22,-16(0,%r30)
+ extru %r21,15,16,%r21
+ fldws -16(0,%r30),%fr10L
+ stw %r21,-16(0,%r30)
+ fldws -16(0,%r30),%fr10R
+ xmpyu %fr10L,%fr10R,%fr8
+ fstws %fr8R,-16(0,%r30)
+ ldw -16(0,%r30),%r29
+ stw %r22,-16(0,%r30)
+ fldws -16(0,%r30),%fr10R
+ stw %r21,-16(0,%r30)
+ copy %r29,%r19
+ xmpyu %fr10L,%fr10R,%fr8
+ fldws -16(0,%r30),%fr10L
+ stw %r21,-16(0,%r30)
+ fldws -16(0,%r30),%fr10R
+ fstws %fr8R,-16(0,%r30)
+ extru %r19,16,17,%r20
+ zdep %r19,14,15,%r19
+ ldw -16(0,%r30),%r29
+ xmpyu %fr10L,%fr10R,%fr9
+ addl %r29,%r19,%r22
+ stw %r22,-20(0,%r23)
+ fstws %fr9R,-16(0,%r30)
+ ldw -16(0,%r30),%r29
+ addl %r29,%r20,%r21
+ comclr,<<= %r19,%r22,0
+ addi 1,%r21,%r21
+ addib,= -1,%r24,L$0057
+ stw %r21,-16(0,%r23)
+ ldw -4(0,%r28),%r21
+ extru %r21,31,16,%r22
+ stw %r22,-16(0,%r30)
+ extru %r21,15,16,%r21
+ fldws -16(0,%r30),%fr10L
+ stw %r21,-16(0,%r30)
+ fldws -16(0,%r30),%fr10R
+ xmpyu %fr10L,%fr10R,%fr8
+ fstws %fr8R,-16(0,%r30)
+ ldw -16(0,%r30),%r29
+ stw %r22,-16(0,%r30)
+ fldws -16(0,%r30),%fr10R
+ stw %r21,-16(0,%r30)
+ copy %r29,%r19
+ xmpyu %fr10L,%fr10R,%fr8
+ fldws -16(0,%r30),%fr10L
+ stw %r21,-16(0,%r30)
+ fldws -16(0,%r30),%fr10R
+ fstws %fr8R,-16(0,%r30)
+ extru %r19,16,17,%r20
+ zdep %r19,14,15,%r19
+ ldw -16(0,%r30),%r29
+ xmpyu %fr10L,%fr10R,%fr9
+ addl %r29,%r19,%r22
+ stw %r22,-12(0,%r23)
+ fstws %fr9R,-16(0,%r30)
+ ldw -16(0,%r30),%r29
+ addl %r29,%r20,%r21
+ comclr,<<= %r19,%r22,0
+ addi 1,%r21,%r21
+ addib,= -1,%r24,L$0057
+ stw %r21,-8(0,%r23)
+ ldw 0(0,%r28),%r21
+ extru %r21,31,16,%r22
+ stw %r22,-16(0,%r30)
+ extru %r21,15,16,%r21
+ fldws -16(0,%r30),%fr10L
+ stw %r21,-16(0,%r30)
+ fldws -16(0,%r30),%fr10R
+ xmpyu %fr10L,%fr10R,%fr8
+ fstws %fr8R,-16(0,%r30)
+ ldw -16(0,%r30),%r29
+ stw %r22,-16(0,%r30)
+ fldws -16(0,%r30),%fr10R
+ stw %r21,-16(0,%r30)
+ copy %r29,%r19
+ xmpyu %fr10L,%fr10R,%fr8
+ fldws -16(0,%r30),%fr10L
+ stw %r21,-16(0,%r30)
+ fldws -16(0,%r30),%fr10R
+ fstws %fr8R,-16(0,%r30)
+ extru %r19,16,17,%r20
+ zdep %r19,14,15,%r19
+ ldw -16(0,%r30),%r29
+ xmpyu %fr10L,%fr10R,%fr9
+ addl %r29,%r19,%r22
+ stw %r22,-4(0,%r23)
+ fstws %fr9R,-16(0,%r30)
+ ldw -16(0,%r30),%r29
+ addl %r29,%r20,%r21
+ comclr,<<= %r19,%r22,0
+ addi 1,%r21,%r21
+ addib,= -1,%r24,L$0057
+ stw %r21,0(0,%r23)
+ ldo 16(%r28),%r28
+ ldo 16(%r25),%r25
+ ldo 32(%r23),%r23
+ bl L$0046,0
+ ldo 32(%r26),%r26
+L$0057
+ bv,n 0(%r2)
+ .EXIT
+ .PROCEND
+ .IMPORT BN_num_bits_word,CODE
+ .IMPORT fprintf,CODE
+ .IMPORT __iob,DATA
+ .SPACE $TEXT$
+ .SUBSPA $LIT$
+
+ .align 4
+L$C0000
+ .STRING "Division would overflow\x0a\x00"
+ .IMPORT abort,CODE
+ .SPACE $TEXT$
+ .SUBSPA $CODE$
+
+ .align 4
+ .EXPORT bn_div64,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR
+bn_div64
+ .PROC
+ .CALLINFO FRAME=128,CALLS,SAVE_RP,ENTRY_GR=8
+ .ENTRY
+ stw %r2,-20(0,%r30)
+ stwm %r8,128(0,%r30)
+ stw %r7,-124(0,%r30)
+ stw %r4,-112(0,%r30)
+ stw %r3,-108(0,%r30)
+ copy %r26,%r3
+ copy %r25,%r4
+ stw %r6,-120(0,%r30)
+ ldi 0,%r7
+ stw %r5,-116(0,%r30)
+ movb,<> %r24,%r5,L$0059
+ ldi 2,%r6
+ bl L$0076,0
+ ldi -1,%r28
+L$0059
+ .CALL ARGW0=GR
+ bl BN_num_bits_word,%r2
+ copy %r5,%r26
+ ldi 32,%r19
+ comb,= %r19,%r28,L$0060
+ subi 31,%r28,%r19
+ mtsar %r19
+ zvdepi 1,32,%r19
+ comb,>>= %r19,%r3,L$0060
+ addil LR'__iob-$global$+32,%r27
+ ldo RR'__iob-$global$+32(%r1),%r26
+ ldil LR'L$C0000,%r25
+ .CALL ARGW0=GR,ARGW1=GR
+ bl fprintf,%r2
+ ldo RR'L$C0000(%r25),%r25
+ .CALL
+ bl abort,%r2
+ nop
+L$0060
+ comb,>> %r5,%r3,L$0061
+ subi 32,%r28,%r28
+ sub %r3,%r5,%r3
+L$0061
+ comib,= 0,%r28,L$0062
+ subi 31,%r28,%r19
+ mtsar %r19
+ zvdep %r5,32,%r5
+ zvdep %r3,32,%r21
+ subi 32,%r28,%r20
+ mtsar %r20
+ vshd 0,%r4,%r20
+ or %r21,%r20,%r3
+ mtsar %r19
+ zvdep %r4,32,%r4
+L$0062
+ extru %r5,15,16,%r23
+ extru %r5,31,16,%r28
+L$0063
+ extru %r3,15,16,%r19
+ comb,<> %r23,%r19,L$0066
+ copy %r3,%r26
+ bl L$0067,0
+ zdepi -1,31,16,%r29
+L$0066
+ .IMPORT $$divU,MILLICODE
+ bl $$divU,%r31
+ copy %r23,%r25
+L$0067
+ stw %r29,-16(0,%r30)
+ fldws -16(0,%r30),%fr10L
+ stw %r28,-16(0,%r30)
+ fldws -16(0,%r30),%fr10R
+ stw %r23,-16(0,%r30)
+ xmpyu %fr10L,%fr10R,%fr8
+ fldws -16(0,%r30),%fr10R
+ fstws %fr8R,-16(0,%r30)
+ xmpyu %fr10L,%fr10R,%fr9
+ ldw -16(0,%r30),%r8
+ fstws %fr9R,-16(0,%r30)
+ copy %r8,%r22
+ ldw -16(0,%r30),%r8
+ extru %r4,15,16,%r24
+ copy %r8,%r21
+L$0068
+ sub %r3,%r21,%r20
+ copy %r20,%r19
+ depi 0,31,16,%r19
+ comib,<> 0,%r19,L$0069
+ zdep %r20,15,16,%r19
+ addl %r19,%r24,%r19
+ comb,>>= %r19,%r22,L$0069
+ sub %r22,%r28,%r22
+ sub %r21,%r23,%r21
+ bl L$0068,0
+ ldo -1(%r29),%r29
+L$0069
+ stw %r29,-16(0,%r30)
+ fldws -16(0,%r30),%fr10L
+ stw %r28,-16(0,%r30)
+ fldws -16(0,%r30),%fr10R
+ xmpyu %fr10L,%fr10R,%fr8
+ fstws %fr8R,-16(0,%r30)
+ ldw -16(0,%r30),%r8
+ stw %r23,-16(0,%r30)
+ fldws -16(0,%r30),%fr10R
+ copy %r8,%r19
+ xmpyu %fr10L,%fr10R,%fr8
+ fstws %fr8R,-16(0,%r30)
+ extru %r19,15,16,%r20
+ ldw -16(0,%r30),%r8
+ zdep %r19,15,16,%r19
+ addl %r8,%r20,%r20
+ comclr,<<= %r19,%r4,0
+ addi 1,%r20,%r20
+ comb,<<= %r20,%r3,L$0074
+ sub %r4,%r19,%r4
+ addl %r3,%r5,%r3
+ ldo -1(%r29),%r29
+L$0074
+ addib,= -1,%r6,L$0064
+ sub %r3,%r20,%r3
+ zdep %r29,15,16,%r7
+ shd %r3,%r4,16,%r3
+ bl L$0063,0
+ zdep %r4,15,16,%r4
+L$0064
+ or %r7,%r29,%r28
+L$0076
+ ldw -148(0,%r30),%r2
+ ldw -124(0,%r30),%r7
+ ldw -120(0,%r30),%r6
+ ldw -116(0,%r30),%r5
+ ldw -112(0,%r30),%r4
+ ldw -108(0,%r30),%r3
+ bv 0(%r2)
+ ldwm -128(0,%r30),%r8
+ .EXIT
+ .PROCEND
diff --git a/crypto/bn/asm/pa-risc2.s b/crypto/bn/asm/pa-risc2.s
new file mode 100644
index 0000000000..5e07b7d2e8
--- /dev/null
+++ b/crypto/bn/asm/pa-risc2.s
@@ -0,0 +1,416 @@
+ .SPACE $PRIVATE$
+ .SUBSPA $DATA$,QUAD=1,ALIGN=8,ACCESS=31
+ .SUBSPA $BSS$,QUAD=1,ALIGN=8,ACCESS=31,ZERO,SORT=82
+ .SPACE $TEXT$
+ .SUBSPA $LIT$,QUAD=0,ALIGN=8,ACCESS=44
+ .SUBSPA $CODE$,QUAD=0,ALIGN=8,ACCESS=44,CODE_ONLY
+ .IMPORT $global$,DATA
+ .IMPORT $$dyncall,MILLICODE
+; gcc_compiled.:
+ .SPACE $TEXT$
+ .SUBSPA $CODE$
+
+ .align 4
+ .EXPORT bn_mul_add_word,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR
+bn_mul_add_word
+ .PROC
+ .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=4
+ .ENTRY
+ stw %r2,-20(0,%r30)
+ stwm %r4,64(0,%r30)
+ copy %r24,%r31
+ stw %r3,-60(0,%r30)
+ ldi 0,%r20
+ ldo 12(%r26),%r2
+ stw %r23,-16(0,%r30)
+ copy %r25,%r3
+ ldo 12(%r3),%r1
+ fldws -16(0,%r30),%fr8L
+L$0010
+ copy %r20,%r25
+ ldi 0,%r24
+ fldws 0(0,%r3),%fr9L
+ ldw 0(0,%r26),%r19
+ xmpyu %fr8L,%fr9L,%fr9
+ fstds %fr9,-16(0,%r30)
+ copy %r19,%r23
+ ldw -16(0,%r30),%r28
+ ldw -12(0,%r30),%r29
+ ldi 0,%r22
+ add %r23,%r29,%r29
+ addc %r22,%r28,%r28
+ add %r25,%r29,%r29
+ addc %r24,%r28,%r28
+ copy %r28,%r21
+ ldi 0,%r20
+ copy %r21,%r20
+ addib,= -1,%r31,L$0011
+ stw %r29,0(0,%r26)
+ copy %r20,%r25
+ ldi 0,%r24
+ fldws -8(0,%r1),%fr9L
+ ldw -8(0,%r2),%r19
+ xmpyu %fr8L,%fr9L,%fr9
+ fstds %fr9,-16(0,%r30)
+ copy %r19,%r23
+ ldw -16(0,%r30),%r28
+ ldw -12(0,%r30),%r29
+ ldi 0,%r22
+ add %r23,%r29,%r29
+ addc %r22,%r28,%r28
+ add %r25,%r29,%r29
+ addc %r24,%r28,%r28
+ copy %r28,%r21
+ ldi 0,%r20
+ copy %r21,%r20
+ addib,= -1,%r31,L$0011
+ stw %r29,-8(0,%r2)
+ copy %r20,%r25
+ ldi 0,%r24
+ fldws -4(0,%r1),%fr9L
+ ldw -4(0,%r2),%r19
+ xmpyu %fr8L,%fr9L,%fr9
+ fstds %fr9,-16(0,%r30)
+ copy %r19,%r23
+ ldw -16(0,%r30),%r28
+ ldw -12(0,%r30),%r29
+ ldi 0,%r22
+ add %r23,%r29,%r29
+ addc %r22,%r28,%r28
+ add %r25,%r29,%r29
+ addc %r24,%r28,%r28
+ copy %r28,%r21
+ ldi 0,%r20
+ copy %r21,%r20
+ addib,= -1,%r31,L$0011
+ stw %r29,-4(0,%r2)
+ copy %r20,%r25
+ ldi 0,%r24
+ fldws 0(0,%r1),%fr9L
+ ldw 0(0,%r2),%r19
+ xmpyu %fr8L,%fr9L,%fr9
+ fstds %fr9,-16(0,%r30)
+ copy %r19,%r23
+ ldw -16(0,%r30),%r28
+ ldw -12(0,%r30),%r29
+ ldi 0,%r22
+ add %r23,%r29,%r29
+ addc %r22,%r28,%r28
+ add %r25,%r29,%r29
+ addc %r24,%r28,%r28
+ copy %r28,%r21
+ ldi 0,%r20
+ copy %r21,%r20
+ addib,= -1,%r31,L$0011
+ stw %r29,0(0,%r2)
+ ldo 16(%r1),%r1
+ ldo 16(%r3),%r3
+ ldo 16(%r2),%r2
+ bl L$0010,0
+ ldo 16(%r26),%r26
+L$0011
+ copy %r20,%r28
+ ldw -84(0,%r30),%r2
+ ldw -60(0,%r30),%r3
+ bv 0(%r2)
+ ldwm -64(0,%r30),%r4
+ .EXIT
+ .PROCEND
+ .align 4
+ .EXPORT bn_mul_word,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR
+bn_mul_word
+ .PROC
+ .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=3
+ .ENTRY
+ stw %r2,-20(0,%r30)
+ copy %r25,%r2
+ stwm %r4,64(0,%r30)
+ copy %r24,%r19
+ ldi 0,%r28
+ stw %r23,-16(0,%r30)
+ ldo 12(%r26),%r31
+ ldo 12(%r2),%r29
+ fldws -16(0,%r30),%fr8L
+L$0026
+ fldws 0(0,%r2),%fr9L
+ xmpyu %fr8L,%fr9L,%fr9
+ fstds %fr9,-16(0,%r30)
+ copy %r28,%r21
+ ldi 0,%r20
+ ldw -16(0,%r30),%r24
+ ldw -12(0,%r30),%r25
+ add %r21,%r25,%r25
+ addc %r20,%r24,%r24
+ copy %r24,%r23
+ ldi 0,%r22
+ copy %r23,%r28
+ addib,= -1,%r19,L$0027
+ stw %r25,0(0,%r26)
+ fldws -8(0,%r29),%fr9L
+ xmpyu %fr8L,%fr9L,%fr9
+ fstds %fr9,-16(0,%r30)
+ copy %r28,%r21
+ ldi 0,%r20
+ ldw -16(0,%r30),%r24
+ ldw -12(0,%r30),%r25
+ add %r21,%r25,%r25
+ addc %r20,%r24,%r24
+ copy %r24,%r23
+ ldi 0,%r22
+ copy %r23,%r28
+ addib,= -1,%r19,L$0027
+ stw %r25,-8(0,%r31)
+ fldws -4(0,%r29),%fr9L
+ xmpyu %fr8L,%fr9L,%fr9
+ fstds %fr9,-16(0,%r30)
+ copy %r28,%r21
+ ldi 0,%r20
+ ldw -16(0,%r30),%r24
+ ldw -12(0,%r30),%r25
+ add %r21,%r25,%r25
+ addc %r20,%r24,%r24
+ copy %r24,%r23
+ ldi 0,%r22
+ copy %r23,%r28
+ addib,= -1,%r19,L$0027
+ stw %r25,-4(0,%r31)
+ fldws 0(0,%r29),%fr9L
+ xmpyu %fr8L,%fr9L,%fr9
+ fstds %fr9,-16(0,%r30)
+ copy %r28,%r21
+ ldi 0,%r20
+ ldw -16(0,%r30),%r24
+ ldw -12(0,%r30),%r25
+ add %r21,%r25,%r25
+ addc %r20,%r24,%r24
+ copy %r24,%r23
+ ldi 0,%r22
+ copy %r23,%r28
+ addib,= -1,%r19,L$0027
+ stw %r25,0(0,%r31)
+ ldo 16(%r29),%r29
+ ldo 16(%r2),%r2
+ ldo 16(%r31),%r31
+ bl L$0026,0
+ ldo 16(%r26),%r26
+L$0027
+ ldw -84(0,%r30),%r2
+ bv 0(%r2)
+ ldwm -64(0,%r30),%r4
+ .EXIT
+ .PROCEND
+ .align 4
+ .EXPORT bn_sqr_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR
+bn_sqr_words
+ .PROC
+ .CALLINFO FRAME=0,NO_CALLS
+ .ENTRY
+ ldo 28(%r26),%r19
+ ldo 12(%r25),%r28
+L$0042
+ fldws 0(0,%r25),%fr8L
+ fldws 0(0,%r25),%fr8R
+ xmpyu %fr8L,%fr8R,%fr8
+ fstds %fr8,-16(0,%r30)
+ ldw -16(0,%r30),%r22
+ ldw -12(0,%r30),%r23
+ stw %r23,0(0,%r26)
+ copy %r22,%r21
+ ldi 0,%r20
+ addib,= -1,%r24,L$0049
+ stw %r21,-24(0,%r19)
+ fldws -8(0,%r28),%fr8L
+ fldws -8(0,%r28),%fr8R
+ xmpyu %fr8L,%fr8R,%fr8
+ fstds %fr8,-16(0,%r30)
+ ldw -16(0,%r30),%r22
+ ldw -12(0,%r30),%r23
+ stw %r23,-20(0,%r19)
+ copy %r22,%r21
+ ldi 0,%r20
+ addib,= -1,%r24,L$0049
+ stw %r21,-16(0,%r19)
+ fldws -4(0,%r28),%fr8L
+ fldws -4(0,%r28),%fr8R
+ xmpyu %fr8L,%fr8R,%fr8
+ fstds %fr8,-16(0,%r30)
+ ldw -16(0,%r30),%r22
+ ldw -12(0,%r30),%r23
+ stw %r23,-12(0,%r19)
+ copy %r22,%r21
+ ldi 0,%r20
+ addib,= -1,%r24,L$0049
+ stw %r21,-8(0,%r19)
+ fldws 0(0,%r28),%fr8L
+ fldws 0(0,%r28),%fr8R
+ xmpyu %fr8L,%fr8R,%fr8
+ fstds %fr8,-16(0,%r30)
+ ldw -16(0,%r30),%r22
+ ldw -12(0,%r30),%r23
+ stw %r23,-4(0,%r19)
+ copy %r22,%r21
+ ldi 0,%r20
+ addib,= -1,%r24,L$0049
+ stw %r21,0(0,%r19)
+ ldo 16(%r28),%r28
+ ldo 16(%r25),%r25
+ ldo 32(%r19),%r19
+ bl L$0042,0
+ ldo 32(%r26),%r26
+L$0049
+ bv,n 0(%r2)
+ .EXIT
+ .PROCEND
+ .IMPORT BN_num_bits_word,CODE
+ .IMPORT fprintf,CODE
+ .IMPORT __iob,DATA
+ .SPACE $TEXT$
+ .SUBSPA $LIT$
+
+ .align 4
+L$C0000
+ .STRING "Division would overflow (%d)\x0a\x00"
+ .IMPORT abort,CODE
+ .SPACE $TEXT$
+ .SUBSPA $CODE$
+
+ .align 4
+ .EXPORT bn_div64,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,RTNVAL=GR
+bn_div64
+ .PROC
+ .CALLINFO FRAME=128,CALLS,SAVE_RP,ENTRY_GR=8
+ .ENTRY
+ stw %r2,-20(0,%r30)
+ stwm %r8,128(0,%r30)
+ stw %r7,-124(0,%r30)
+ stw %r4,-112(0,%r30)
+ stw %r3,-108(0,%r30)
+ copy %r26,%r3
+ copy %r25,%r4
+ stw %r6,-120(0,%r30)
+ ldi 0,%r7
+ stw %r5,-116(0,%r30)
+ movb,<> %r24,%r5,L$0051
+ ldi 2,%r6
+ bl L$0068,0
+ ldi -1,%r28
+L$0051
+ .CALL ARGW0=GR
+ bl BN_num_bits_word,%r2
+ copy %r5,%r26
+ copy %r28,%r24
+ ldi 32,%r19
+ comb,= %r19,%r24,L$0052
+ subi 31,%r24,%r19
+ mtsar %r19
+ zvdepi 1,32,%r19
+ comb,>>= %r19,%r3,L$0052
+ addil LR'__iob-$global$+32,%r27
+ ldo RR'__iob-$global$+32(%r1),%r26
+ ldil LR'L$C0000,%r25
+ .CALL ARGW0=GR,ARGW1=GR,ARGW2=GR
+ bl fprintf,%r2
+ ldo RR'L$C0000(%r25),%r25
+ .CALL
+ bl abort,%r2
+ nop
+L$0052
+ comb,>> %r5,%r3,L$0053
+ subi 32,%r24,%r24
+ sub %r3,%r5,%r3
+L$0053
+ comib,= 0,%r24,L$0054
+ subi 31,%r24,%r19
+ mtsar %r19
+ zvdep %r5,32,%r5
+ zvdep %r3,32,%r21
+ subi 32,%r24,%r20
+ mtsar %r20
+ vshd 0,%r4,%r20
+ or %r21,%r20,%r3
+ mtsar %r19
+ zvdep %r4,32,%r4
+L$0054
+ extru %r5,15,16,%r23
+ extru %r5,31,16,%r28
+L$0055
+ extru %r3,15,16,%r19
+ comb,<> %r23,%r19,L$0058
+ copy %r3,%r26
+ bl L$0059,0
+ zdepi -1,31,16,%r29
+L$0058
+ .IMPORT $$divU,MILLICODE
+ bl $$divU,%r31
+ copy %r23,%r25
+L$0059
+ stw %r29,-16(0,%r30)
+ fldws -16(0,%r30),%fr10L
+ stw %r28,-16(0,%r30)
+ fldws -16(0,%r30),%fr10R
+ stw %r23,-16(0,%r30)
+ xmpyu %fr10L,%fr10R,%fr8
+ fldws -16(0,%r30),%fr10R
+ fstws %fr8R,-16(0,%r30)
+ xmpyu %fr10L,%fr10R,%fr9
+ ldw -16(0,%r30),%r8
+ fstws %fr9R,-16(0,%r30)
+ copy %r8,%r22
+ ldw -16(0,%r30),%r8
+ extru %r4,15,16,%r24
+ copy %r8,%r21
+L$0060
+ sub %r3,%r21,%r20
+ copy %r20,%r19
+ depi 0,31,16,%r19
+ comib,<> 0,%r19,L$0061
+ zdep %r20,15,16,%r19
+ addl %r19,%r24,%r19
+ comb,>>= %r19,%r22,L$0061
+ sub %r22,%r28,%r22
+ sub %r21,%r23,%r21
+ bl L$0060,0
+ ldo -1(%r29),%r29
+L$0061
+ stw %r29,-16(0,%r30)
+ fldws -16(0,%r30),%fr10L
+ stw %r28,-16(0,%r30)
+ fldws -16(0,%r30),%fr10R
+ xmpyu %fr10L,%fr10R,%fr8
+ fstws %fr8R,-16(0,%r30)
+ ldw -16(0,%r30),%r8
+ stw %r23,-16(0,%r30)
+ fldws -16(0,%r30),%fr10R
+ copy %r8,%r19
+ xmpyu %fr10L,%fr10R,%fr8
+ fstws %fr8R,-16(0,%r30)
+ extru %r19,15,16,%r20
+ ldw -16(0,%r30),%r8
+ zdep %r19,15,16,%r19
+ addl %r8,%r20,%r20
+ comclr,<<= %r19,%r4,0
+ addi 1,%r20,%r20
+ comb,<<= %r20,%r3,L$0066
+ sub %r4,%r19,%r4
+ addl %r3,%r5,%r3
+ ldo -1(%r29),%r29
+L$0066
+ addib,= -1,%r6,L$0056
+ sub %r3,%r20,%r3
+ zdep %r29,15,16,%r7
+ shd %r3,%r4,16,%r3
+ bl L$0055,0
+ zdep %r4,15,16,%r4
+L$0056
+ or %r7,%r29,%r28
+L$0068
+ ldw -148(0,%r30),%r2
+ ldw -124(0,%r30),%r7
+ ldw -120(0,%r30),%r6
+ ldw -116(0,%r30),%r5
+ ldw -112(0,%r30),%r4
+ ldw -108(0,%r30),%r3
+ bv 0(%r2)
+ ldwm -128(0,%r30),%r8
+ .EXIT
+ .PROCEND
diff --git a/crypto/bn/asm/r3000.s b/crypto/bn/asm/r3000.s
new file mode 100644
index 0000000000..5be2a0d0e6
--- /dev/null
+++ b/crypto/bn/asm/r3000.s
@@ -0,0 +1,646 @@
+ .file 1 "../bn_mulw.c"
+ .set nobopt
+ .option pic2
+
+ # GNU C 2.6.3 [AL 1.1, MM 40] SGI running IRIX 5.0 compiled by GNU C
+
+ # Cc1 defaults:
+ # -mabicalls
+
+ # Cc1 arguments (-G value = 0, Cpu = 3000, ISA = 1):
+ # -quiet -dumpbase -O2 -o
+
+gcc2_compiled.:
+__gnu_compiled_c:
+ .rdata
+
+ .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
+ .byte 0x6e,0x3a,0x20,0x31,0x2e,0x34,0x39,0x20
+ .byte 0x24,0x0
+
+ .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
+ .byte 0x6e,0x3a,0x20,0x31,0x2e,0x33,0x34,0x20
+ .byte 0x24,0x0
+
+ .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
+ .byte 0x6e,0x3a,0x20,0x31,0x2e,0x35,0x20,0x24
+ .byte 0x0
+
+ .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
+ .byte 0x6e,0x3a,0x20,0x31,0x2e,0x38,0x20,0x24
+ .byte 0x0
+
+ .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
+ .byte 0x6e,0x3a,0x20,0x31,0x2e,0x32,0x33,0x20
+ .byte 0x24,0x0
+
+ .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
+ .byte 0x6e,0x3a,0x20,0x31,0x2e,0x37,0x38,0x20
+ .byte 0x24,0x0
+
+ .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
+ .byte 0x6e,0x3a,0x20,0x33,0x2e,0x37,0x30,0x20
+ .byte 0x24,0x0
+
+ .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
+ .byte 0x6e,0x3a,0x20,0x31,0x2e,0x32,0x20,0x24
+ .byte 0x0
+
+ .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
+ .byte 0x6e,0x3a,0x20,0x31,0x2e,0x34,0x20,0x24
+ .byte 0x0
+
+ .byte 0x24,0x52,0x65,0x76,0x69,0x73,0x69,0x6f
+ .byte 0x6e,0x3a,0x20,0x31,0x2e,0x38,0x20,0x24
+ .byte 0x0
+ .text
+ .align 2
+ .globl bn_mul_add_word
+ .ent bn_mul_add_word
+bn_mul_add_word:
+ .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0
+ .mask 0x00000000,0
+ .fmask 0x00000000,0
+ .set noreorder
+ .cpload $25
+ .set reorder
+ move $12,$4
+ move $14,$5
+ move $9,$6
+ move $13,$7
+ move $8,$0
+ addu $10,$12,12
+ addu $11,$14,12
+$L2:
+ lw $6,0($14)
+ #nop
+ multu $13,$6
+ mfhi $6
+ mflo $7
+ #nop
+ move $5,$8
+ move $4,$0
+ lw $3,0($12)
+ addu $9,$9,-1
+ move $2,$0
+ addu $7,$7,$3
+ sltu $8,$7,$3
+ addu $6,$6,$2
+ addu $6,$6,$8
+ addu $7,$7,$5
+ sltu $2,$7,$5
+ addu $6,$6,$4
+ addu $6,$6,$2
+ srl $3,$6,0
+ move $2,$0
+ move $8,$3
+ .set noreorder
+ .set nomacro
+ beq $9,$0,$L3
+ sw $7,0($12)
+ .set macro
+ .set reorder
+
+ lw $6,-8($11)
+ #nop
+ multu $13,$6
+ mfhi $6
+ mflo $7
+ #nop
+ move $5,$8
+ move $4,$0
+ lw $3,-8($10)
+ addu $9,$9,-1
+ move $2,$0
+ addu $7,$7,$3
+ sltu $8,$7,$3
+ addu $6,$6,$2
+ addu $6,$6,$8
+ addu $7,$7,$5
+ sltu $2,$7,$5
+ addu $6,$6,$4
+ addu $6,$6,$2
+ srl $3,$6,0
+ move $2,$0
+ move $8,$3
+ .set noreorder
+ .set nomacro
+ beq $9,$0,$L3
+ sw $7,-8($10)
+ .set macro
+ .set reorder
+
+ lw $6,-4($11)
+ #nop
+ multu $13,$6
+ mfhi $6
+ mflo $7
+ #nop
+ move $5,$8
+ move $4,$0
+ lw $3,-4($10)
+ addu $9,$9,-1
+ move $2,$0
+ addu $7,$7,$3
+ sltu $8,$7,$3
+ addu $6,$6,$2
+ addu $6,$6,$8
+ addu $7,$7,$5
+ sltu $2,$7,$5
+ addu $6,$6,$4
+ addu $6,$6,$2
+ srl $3,$6,0
+ move $2,$0
+ move $8,$3
+ .set noreorder
+ .set nomacro
+ beq $9,$0,$L3
+ sw $7,-4($10)
+ .set macro
+ .set reorder
+
+ lw $6,0($11)
+ #nop
+ multu $13,$6
+ mfhi $6
+ mflo $7
+ #nop
+ move $5,$8
+ move $4,$0
+ lw $3,0($10)
+ addu $9,$9,-1
+ move $2,$0
+ addu $7,$7,$3
+ sltu $8,$7,$3
+ addu $6,$6,$2
+ addu $6,$6,$8
+ addu $7,$7,$5
+ sltu $2,$7,$5
+ addu $6,$6,$4
+ addu $6,$6,$2
+ srl $3,$6,0
+ move $2,$0
+ move $8,$3
+ .set noreorder
+ .set nomacro
+ beq $9,$0,$L3
+ sw $7,0($10)
+ .set macro
+ .set reorder
+
+ addu $11,$11,16
+ addu $14,$14,16
+ addu $10,$10,16
+ .set noreorder
+ .set nomacro
+ j $L2
+ addu $12,$12,16
+ .set macro
+ .set reorder
+
+$L3:
+ .set noreorder
+ .set nomacro
+ j $31
+ move $2,$8
+ .set macro
+ .set reorder
+
+ .end bn_mul_add_word
+ .align 2
+ .globl bn_mul_word
+ .ent bn_mul_word
+bn_mul_word:
+ .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0
+ .mask 0x00000000,0
+ .fmask 0x00000000,0
+ .set noreorder
+ .cpload $25
+ .set reorder
+ move $11,$4
+ move $12,$5
+ move $8,$6
+ move $6,$0
+ addu $10,$11,12
+ addu $9,$12,12
+$L10:
+ lw $4,0($12)
+ #nop
+ multu $7,$4
+ mfhi $4
+ mflo $5
+ #nop
+ move $3,$6
+ move $2,$0
+ addu $8,$8,-1
+ addu $5,$5,$3
+ sltu $6,$5,$3
+ addu $4,$4,$2
+ addu $4,$4,$6
+ srl $3,$4,0
+ move $2,$0
+ move $6,$3
+ .set noreorder
+ .set nomacro
+ beq $8,$0,$L11
+ sw $5,0($11)
+ .set macro
+ .set reorder
+
+ lw $4,-8($9)
+ #nop
+ multu $7,$4
+ mfhi $4
+ mflo $5
+ #nop
+ move $3,$6
+ move $2,$0
+ addu $8,$8,-1
+ addu $5,$5,$3
+ sltu $6,$5,$3
+ addu $4,$4,$2
+ addu $4,$4,$6
+ srl $3,$4,0
+ move $2,$0
+ move $6,$3
+ .set noreorder
+ .set nomacro
+ beq $8,$0,$L11
+ sw $5,-8($10)
+ .set macro
+ .set reorder
+
+ lw $4,-4($9)
+ #nop
+ multu $7,$4
+ mfhi $4
+ mflo $5
+ #nop
+ move $3,$6
+ move $2,$0
+ addu $8,$8,-1
+ addu $5,$5,$3
+ sltu $6,$5,$3
+ addu $4,$4,$2
+ addu $4,$4,$6
+ srl $3,$4,0
+ move $2,$0
+ move $6,$3
+ .set noreorder
+ .set nomacro
+ beq $8,$0,$L11
+ sw $5,-4($10)
+ .set macro
+ .set reorder
+
+ lw $4,0($9)
+ #nop
+ multu $7,$4
+ mfhi $4
+ mflo $5
+ #nop
+ move $3,$6
+ move $2,$0
+ addu $8,$8,-1
+ addu $5,$5,$3
+ sltu $6,$5,$3
+ addu $4,$4,$2
+ addu $4,$4,$6
+ srl $3,$4,0
+ move $2,$0
+ move $6,$3
+ .set noreorder
+ .set nomacro
+ beq $8,$0,$L11
+ sw $5,0($10)
+ .set macro
+ .set reorder
+
+ addu $9,$9,16
+ addu $12,$12,16
+ addu $10,$10,16
+ .set noreorder
+ .set nomacro
+ j $L10
+ addu $11,$11,16
+ .set macro
+ .set reorder
+
+$L11:
+ .set noreorder
+ .set nomacro
+ j $31
+ move $2,$6
+ .set macro
+ .set reorder
+
+ .end bn_mul_word
+ .align 2
+ .globl bn_sqr_words
+ .ent bn_sqr_words
+bn_sqr_words:
+ .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0
+ .mask 0x00000000,0
+ .fmask 0x00000000,0
+ .set noreorder
+ .cpload $25
+ .set reorder
+ move $9,$4
+ addu $7,$9,28
+ addu $8,$5,12
+$L18:
+ lw $2,0($5)
+ #nop
+ multu $2,$2
+ mfhi $2
+ mflo $3
+ #nop
+ addu $6,$6,-1
+ sw $3,0($9)
+ srl $3,$2,0
+ move $2,$0
+ .set noreorder
+ .set nomacro
+ beq $6,$0,$L19
+ sw $3,-24($7)
+ .set macro
+ .set reorder
+
+ lw $2,-8($8)
+ #nop
+ multu $2,$2
+ mfhi $2
+ mflo $3
+ #nop
+ addu $6,$6,-1
+ sw $3,-20($7)
+ srl $3,$2,0
+ move $2,$0
+ .set noreorder
+ .set nomacro
+ beq $6,$0,$L19
+ sw $3,-16($7)
+ .set macro
+ .set reorder
+
+ lw $2,-4($8)
+ #nop
+ multu $2,$2
+ mfhi $2
+ mflo $3
+ #nop
+ addu $6,$6,-1
+ sw $3,-12($7)
+ srl $3,$2,0
+ move $2,$0
+ .set noreorder
+ .set nomacro
+ beq $6,$0,$L19
+ sw $3,-8($7)
+ .set macro
+ .set reorder
+
+ lw $2,0($8)
+ #nop
+ multu $2,$2
+ mfhi $2
+ mflo $3
+ #nop
+ addu $6,$6,-1
+ sw $3,-4($7)
+ srl $3,$2,0
+ move $2,$0
+ .set noreorder
+ .set nomacro
+ beq $6,$0,$L19
+ sw $3,0($7)
+ .set macro
+ .set reorder
+
+ addu $8,$8,16
+ addu $5,$5,16
+ addu $7,$7,32
+ .set noreorder
+ .set nomacro
+ j $L18
+ addu $9,$9,32
+ .set macro
+ .set reorder
+
+$L19:
+ j $31
+ .end bn_sqr_words
+ .rdata
+ .align 2
+$LC0:
+
+ .byte 0x44,0x69,0x76,0x69,0x73,0x69,0x6f,0x6e
+ .byte 0x20,0x77,0x6f,0x75,0x6c,0x64,0x20,0x6f
+ .byte 0x76,0x65,0x72,0x66,0x6c,0x6f,0x77,0xa
+ .byte 0x0
+ .text
+ .align 2
+ .globl bn_div64
+ .ent bn_div64
+bn_div64:
+ .frame $sp,56,$31 # vars= 0, regs= 7/0, args= 16, extra= 8
+ .mask 0x901f0000,-8
+ .fmask 0x00000000,0
+ .set noreorder
+ .cpload $25
+ .set reorder
+ subu $sp,$sp,56
+ .cprestore 16
+ sw $16,24($sp)
+ move $16,$4
+ sw $17,28($sp)
+ move $17,$5
+ sw $18,32($sp)
+ move $18,$6
+ sw $20,40($sp)
+ move $20,$0
+ sw $19,36($sp)
+ li $19,0x00000002 # 2
+ sw $31,48($sp)
+ .set noreorder
+ .set nomacro
+ bne $18,$0,$L26
+ sw $28,44($sp)
+ .set macro
+ .set reorder
+
+ .set noreorder
+ .set nomacro
+ j $L43
+ li $2,-1 # 0xffffffff
+ .set macro
+ .set reorder
+
+$L26:
+ move $4,$18
+ jal BN_num_bits_word
+ move $4,$2
+ li $2,0x00000020 # 32
+ .set noreorder
+ .set nomacro
+ beq $4,$2,$L27
+ li $2,0x00000001 # 1
+ .set macro
+ .set reorder
+
+ sll $2,$2,$4
+ sltu $2,$2,$16
+ .set noreorder
+ .set nomacro
+ beq $2,$0,$L44
+ li $5,0x00000020 # 32
+ .set macro
+ .set reorder
+
+ la $4,__iob+32
+ la $5,$LC0
+ jal fprintf
+ jal abort
+$L27:
+ li $5,0x00000020 # 32
+$L44:
+ sltu $2,$16,$18
+ .set noreorder
+ .set nomacro
+ bne $2,$0,$L28
+ subu $4,$5,$4
+ .set macro
+ .set reorder
+
+ subu $16,$16,$18
+$L28:
+ .set noreorder
+ .set nomacro
+ beq $4,$0,$L29
+ li $10,-65536 # 0xffff0000
+ .set macro
+ .set reorder
+
+ sll $18,$18,$4
+ sll $3,$16,$4
+ subu $2,$5,$4
+ srl $2,$17,$2
+ or $16,$3,$2
+ sll $17,$17,$4
+$L29:
+ srl $7,$18,16
+ andi $9,$18,0xffff
+$L30:
+ srl $2,$16,16
+ .set noreorder
+ .set nomacro
+ beq $2,$7,$L34
+ li $6,0x0000ffff # 65535
+ .set macro
+ .set reorder
+
+ divu $6,$16,$7
+$L34:
+ mult $6,$9
+ mflo $5
+ #nop
+ #nop
+ mult $6,$7
+ and $2,$17,$10
+ srl $8,$2,16
+ mflo $4
+$L35:
+ subu $3,$16,$4
+ and $2,$3,$10
+ .set noreorder
+ .set nomacro
+ bne $2,$0,$L36
+ sll $2,$3,16
+ .set macro
+ .set reorder
+
+ addu $2,$2,$8
+ sltu $2,$2,$5
+ .set noreorder
+ .set nomacro
+ beq $2,$0,$L36
+ subu $5,$5,$9
+ .set macro
+ .set reorder
+
+ subu $4,$4,$7
+ .set noreorder
+ .set nomacro
+ j $L35
+ addu $6,$6,-1
+ .set macro
+ .set reorder
+
+$L36:
+ mult $6,$7
+ mflo $5
+ #nop
+ #nop
+ mult $6,$9
+ mflo $4
+ #nop
+ #nop
+ srl $3,$4,16
+ sll $2,$4,16
+ and $4,$2,$10
+ sltu $2,$17,$4
+ .set noreorder
+ .set nomacro
+ beq $2,$0,$L40
+ addu $5,$5,$3
+ .set macro
+ .set reorder
+
+ addu $5,$5,1
+$L40:
+ sltu $2,$16,$5
+ .set noreorder
+ .set nomacro
+ beq $2,$0,$L41
+ subu $17,$17,$4
+ .set macro
+ .set reorder
+
+ addu $16,$16,$18
+ addu $6,$6,-1
+$L41:
+ addu $19,$19,-1
+ .set noreorder
+ .set nomacro
+ beq $19,$0,$L31
+ subu $16,$16,$5
+ .set macro
+ .set reorder
+
+ sll $20,$6,16
+ sll $3,$16,16
+ srl $2,$17,16
+ or $16,$3,$2
+ .set noreorder
+ .set nomacro
+ j $L30
+ sll $17,$17,16
+ .set macro
+ .set reorder
+
+$L31:
+ or $2,$20,$6
+$L43:
+ lw $31,48($sp)
+ lw $20,40($sp)
+ lw $19,36($sp)
+ lw $18,32($sp)
+ lw $17,28($sp)
+ lw $16,24($sp)
+ addu $sp,$sp,56
+ j $31
+ .end bn_div64
+
+ .globl abort .text
+ .globl fprintf .text
+ .globl BN_num_bits_word .text
diff --git a/crypto/bn/asm/sparc.s b/crypto/bn/asm/sparc.s
new file mode 100644
index 0000000000..37c5fb194e
--- /dev/null
+++ b/crypto/bn/asm/sparc.s
@@ -0,0 +1,359 @@
+ .file "bn_mulw.c"
+gcc2_compiled.:
+.section ".text"
+ .align 4
+ .global bn_mul_add_word
+ .type bn_mul_add_word,#function
+ .proc 016
+bn_mul_add_word:
+ !#PROLOGUE# 0
+ save %sp,-112,%sp
+ !#PROLOGUE# 1
+ mov %i0,%o0
+ mov %i1,%o2
+ mov %i2,%g1
+ mov %i3,%o1
+ mov 0,%i4
+ add %o0,12,%g4
+ add %o2,12,%o7
+.LL2:
+ mov %i4,%i3
+ mov 0,%i2
+ ld [%o0],%g2
+ mov %g2,%i1
+ ld [%o2],%g2
+ mov 0,%i0
+ umul %o1,%g2,%g3
+ rd %y,%g2
+ addcc %g3,%i1,%g3
+ addx %g2,%i0,%g2
+ addcc %g3,%i3,%g3
+ addx %g2,%i2,%g2
+ st %g3,[%o0]
+ mov %g2,%i5
+ mov 0,%i4
+ addcc %g1,-1,%g1
+ be .LL3
+ mov %i5,%i4
+ mov %i4,%i3
+ mov 0,%i2
+ ld [%g4-8],%g2
+ mov %g2,%i1
+ ld [%o7-8],%g2
+ mov 0,%i0
+ umul %o1,%g2,%g3
+ rd %y,%g2
+ addcc %g3,%i1,%g3
+ addx %g2,%i0,%g2
+ addcc %g3,%i3,%g3
+ addx %g2,%i2,%g2
+ st %g3,[%g4-8]
+ mov %g2,%i5
+ mov 0,%i4
+ addcc %g1,-1,%g1
+ be .LL3
+ mov %i5,%i4
+ mov %i4,%i3
+ mov 0,%i2
+ ld [%g4-4],%g2
+ mov %g2,%i1
+ ld [%o7-4],%g2
+ mov 0,%i0
+ umul %o1,%g2,%g3
+ rd %y,%g2
+ addcc %g3,%i1,%g3
+ addx %g2,%i0,%g2
+ addcc %g3,%i3,%g3
+ addx %g2,%i2,%g2
+ st %g3,[%g4-4]
+ mov %g2,%i5
+ mov 0,%i4
+ addcc %g1,-1,%g1
+ be .LL3
+ mov %i5,%i4
+ mov %i4,%i3
+ mov 0,%i2
+ ld [%g4],%g2
+ mov %g2,%i1
+ ld [%o7],%g2
+ mov 0,%i0
+ umul %o1,%g2,%g3
+ rd %y,%g2
+ addcc %g3,%i1,%g3
+ addx %g2,%i0,%g2
+ addcc %g3,%i3,%g3
+ addx %g2,%i2,%g2
+ st %g3,[%g4]
+ mov %g2,%i5
+ mov 0,%i4
+ addcc %g1,-1,%g1
+ be .LL3
+ mov %i5,%i4
+ add %o7,16,%o7
+ add %o2,16,%o2
+ add %g4,16,%g4
+ b .LL2
+ add %o0,16,%o0
+.LL3:
+ ret
+ restore %g0,%i4,%o0
+.LLfe1:
+ .size bn_mul_add_word,.LLfe1-bn_mul_add_word
+ .align 4
+ .global bn_mul_word
+ .type bn_mul_word,#function
+ .proc 016
+bn_mul_word:
+ !#PROLOGUE# 0
+ save %sp,-112,%sp
+ !#PROLOGUE# 1
+ mov %i0,%o7
+ mov %i1,%o0
+ mov %i2,%i4
+ mov %i3,%g4
+ mov 0,%i0
+ add %o7,12,%g1
+ add %o0,12,%i5
+.LL18:
+ mov %i0,%g3
+ mov 0,%g2
+ ld [%o0],%i2
+ umul %g4,%i2,%i3
+ rd %y,%i2
+ addcc %i3,%g3,%i3
+ addx %i2,%g2,%i2
+ st %i3,[%o7]
+ mov %i2,%i1
+ mov 0,%i0
+ addcc %i4,-1,%i4
+ be .LL19
+ mov %i1,%i0
+ mov %i0,%g3
+ mov 0,%g2
+ ld [%i5-8],%i2
+ umul %g4,%i2,%i3
+ rd %y,%i2
+ addcc %i3,%g3,%i3
+ addx %i2,%g2,%i2
+ st %i3,[%g1-8]
+ mov %i2,%i1
+ mov 0,%i0
+ addcc %i4,-1,%i4
+ be .LL19
+ mov %i1,%i0
+ mov %i0,%g3
+ mov 0,%g2
+ ld [%i5-4],%i2
+ umul %g4,%i2,%i3
+ rd %y,%i2
+ addcc %i3,%g3,%i3
+ addx %i2,%g2,%i2
+ st %i3,[%g1-4]
+ mov %i2,%i1
+ mov 0,%i0
+ addcc %i4,-1,%i4
+ be .LL19
+ mov %i1,%i0
+ mov %i0,%g3
+ mov 0,%g2
+ ld [%i5],%i2
+ umul %g4,%i2,%i3
+ rd %y,%i2
+ addcc %i3,%g3,%i3
+ addx %i2,%g2,%i2
+ st %i3,[%g1]
+ mov %i2,%i1
+ mov 0,%i0
+ addcc %i4,-1,%i4
+ be .LL19
+ mov %i1,%i0
+ add %i5,16,%i5
+ add %o0,16,%o0
+ add %g1,16,%g1
+ b .LL18
+ add %o7,16,%o7
+.LL19:
+ ret
+ restore
+.LLfe2:
+ .size bn_mul_word,.LLfe2-bn_mul_word
+ .align 4
+ .global bn_sqr_words
+ .type bn_sqr_words,#function
+ .proc 020
+bn_sqr_words:
+ !#PROLOGUE# 0
+ !#PROLOGUE# 1
+ mov %o0,%g4
+ add %g4,28,%o3
+ add %o1,12,%g1
+.LL34:
+ ld [%o1],%o0
+ addcc %o2,-1,%o2
+ umul %o0,%o0,%o5
+ rd %y,%o4
+ st %o5,[%g4]
+ mov %o4,%g3
+ mov 0,%g2
+ be .LL35
+ st %g3,[%o3-24]
+ ld [%g1-8],%o0
+ addcc %o2,-1,%o2
+ umul %o0,%o0,%o5
+ rd %y,%o4
+ st %o5,[%o3-20]
+ mov %o4,%g3
+ mov 0,%g2
+ be .LL35
+ st %g3,[%o3-16]
+ ld [%g1-4],%o0
+ addcc %o2,-1,%o2
+ umul %o0,%o0,%o5
+ rd %y,%o4
+ st %o5,[%o3-12]
+ mov %o4,%g3
+ mov 0,%g2
+ be .LL35
+ st %g3,[%o3-8]
+ ld [%g1],%o0
+ addcc %o2,-1,%o2
+ umul %o0,%o0,%o5
+ rd %y,%o4
+ st %o5,[%o3-4]
+ mov %o4,%g3
+ mov 0,%g2
+ be .LL35
+ st %g3,[%o3]
+ add %g1,16,%g1
+ add %o1,16,%o1
+ add %o3,32,%o3
+ b .LL34
+ add %g4,32,%g4
+.LL35:
+ retl
+ nop
+.LLfe3:
+ .size bn_sqr_words,.LLfe3-bn_sqr_words
+.section ".rodata"
+ .align 8
+.LLC0:
+ .asciz "Division would overflow\n"
+.section ".text"
+ .align 4
+ .global bn_div64
+ .type bn_div64,#function
+ .proc 016
+bn_div64:
+ !#PROLOGUE# 0
+ save %sp,-112,%sp
+ !#PROLOGUE# 1
+ mov 0,%l1
+ cmp %i2,0
+ bne .LL42
+ mov 2,%l0
+ b .LL59
+ mov -1,%i0
+.LL42:
+ call BN_num_bits_word,0
+ mov %i2,%o0
+ mov %o0,%o2
+ cmp %o2,32
+ be .LL43
+ mov 1,%o0
+ sll %o0,%o2,%o0
+ cmp %i0,%o0
+ bleu .LL60
+ mov 32,%o0
+ sethi %hi(__iob+32),%o0
+ or %o0,%lo(__iob+32),%o0
+ sethi %hi(.LLC0),%o1
+ call fprintf,0
+ or %o1,%lo(.LLC0),%o1
+ call abort,0
+ nop
+.LL43:
+ mov 32,%o0
+.LL60:
+ cmp %i0,%i2
+ blu .LL44
+ sub %o0,%o2,%o2
+ sub %i0,%i2,%i0
+.LL44:
+ cmp %o2,0
+ be .LL45
+ sethi %hi(-65536),%o7
+ sll %i2,%o2,%i2
+ sll %i0,%o2,%o1
+ sub %o0,%o2,%o0
+ srl %i1,%o0,%o0
+ or %o1,%o0,%i0
+ sll %i1,%o2,%i1
+.LL45:
+ srl %i2,16,%g2
+ sethi %hi(65535),%o0
+ or %o0,%lo(65535),%o1
+ and %i2,%o1,%g3
+ mov %o0,%g4
+ mov %o1,%g1
+.LL46:
+ srl %i0,16,%o0
+ cmp %o0,%g2
+ be .LL50
+ or %g4,%lo(65535),%o3
+ wr %g0,%g0,%y
+ nop
+ nop
+ nop
+ udiv %i0,%g2,%o3
+.LL50:
+ and %i1,%o7,%o0
+ srl %o0,16,%o5
+ smul %o3,%g3,%o4
+ smul %o3,%g2,%o2
+.LL51:
+ sub %i0,%o2,%o1
+ andcc %o1,%o7,%g0
+ bne .LL52
+ sll %o1,16,%o0
+ add %o0,%o5,%o0
+ cmp %o4,%o0
+ bleu .LL52
+ sub %o4,%g3,%o4
+ sub %o2,%g2,%o2
+ b .LL51
+ add %o3,-1,%o3
+.LL52:
+ smul %o3,%g2,%o2
+ smul %o3,%g3,%o0
+ srl %o0,16,%o1
+ sll %o0,16,%o0
+ and %o0,%o7,%o0
+ cmp %i1,%o0
+ bgeu .LL56
+ add %o2,%o1,%o2
+ add %o2,1,%o2
+.LL56:
+ cmp %i0,%o2
+ bgeu .LL57
+ sub %i1,%o0,%i1
+ add %i0,%i2,%i0
+ add %o3,-1,%o3
+.LL57:
+ addcc %l0,-1,%l0
+ be .LL47
+ sub %i0,%o2,%i0
+ sll %o3,16,%l1
+ sll %i0,16,%o0
+ srl %i1,16,%o1
+ or %o0,%o1,%i0
+ and %i1,%g1,%o0
+ b .LL46
+ sll %o0,16,%i1
+.LL47:
+ or %l1,%o3,%i0
+.LL59:
+ ret
+ restore
+.LLfe4:
+ .size bn_div64,.LLfe4-bn_div64
+ .ident "GCC: (GNU) 2.7.0"
diff --git a/crypto/bn/asm/x86-bsdi.s b/crypto/bn/asm/x86-bsdi.s
new file mode 100644
index 0000000000..ca6687648e
--- /dev/null
+++ b/crypto/bn/asm/x86-bsdi.s
@@ -0,0 +1,272 @@
+ .file "bn_mulw.c"
+ .version "01.01"
+gcc2_compiled.:
+.text
+ .align 4
+.globl _bn_mul_add_word
+_bn_mul_add_word:
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+
+ # ax L(t)
+ # dx H(t)
+ # bx a
+ # cx w
+ # di r
+ # si c
+ # bp num
+ xorl %esi,%esi # c=0
+ movl 20(%esp),%edi # r => edi
+ movl 24(%esp),%ebx # a => exb
+ movl 32(%esp),%ecx # w => ecx
+ movl 28(%esp),%ebp # num => ebp
+
+ shrl $2,%ebp # num/4
+ je .L910
+
+# .align 4
+.L110:
+ # Round 1
+ movl %ecx,%eax # w => eax
+ mull (%ebx) # w * *a
+ addl (%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+= carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+
+ # Round 2
+ movl %ecx,%eax # w => eax
+ mull 4(%ebx) # w * *a
+ addl 4(%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+= carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,4(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+
+ # Round 3
+ movl %ecx,%eax # w => eax
+ mull 8(%ebx) # w * *a
+ addl 8(%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+=carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,8(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+
+ # Round 4
+ movl %ecx,%eax # w => eax
+ mull 12(%ebx) # w * *a
+ addl 12(%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+=carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,12(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+
+ addl $16,%ebx # a+=4 (4 words)
+ addl $16,%edi # r+=4 (4 words)
+
+ decl %ebp # --num
+ je .L910
+ jmp .L110
+# .align 4
+.L910:
+ movl 28(%esp),%ebp # num => ebp
+ andl $3,%ebp
+ je .L111
+
+ # Round 1
+ movl %ecx,%eax # w => eax
+ mull (%ebx) # w * *a
+ addl (%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+=carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L111
+
+ # Round 2
+ movl %ecx,%eax # w => eax
+ mull 4(%ebx) # w * *a
+ addl 4(%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+=carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,4(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L111
+
+ # Round 3
+ movl %ecx,%eax # w => eax
+ mull 8(%ebx) # w * *a
+ addl 8(%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+=carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,8(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+
+# .align 4
+.L111:
+ movl %esi,%eax # return(c)
+ popl %ebx
+ popl %esi
+ popl %edi
+ popl %ebp
+ ret
+.Lfe1:
+ .align 4
+.globl _bn_mul_word
+_bn_mul_word:
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+
+ # ax L(t)
+ # dx H(t)
+ # bx a
+ # cx w
+ # di r
+ # num bp
+ # si c
+ xorl %esi,%esi # c=0
+ movl 20(%esp),%edi # r => edi
+ movl 24(%esp),%ebx # a => exb
+ movl 28(%esp),%ebp # num => bp
+ movl 32(%esp),%ecx # w => ecx
+
+# .align 4
+.L210:
+ movl %ecx,%eax # w => eax
+ mull (%ebx) # w * *a
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,(%edi) # *r=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L211
+
+ movl %ecx,%eax # w => eax
+ mull 4(%ebx) # w * *a
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,4(%edi) # *r=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L211
+
+ movl %ecx,%eax # w => eax
+ mull 8(%ebx) # w * *a
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,8(%edi) # *r=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L211
+
+ movl %ecx,%eax # w => eax
+ mull 12(%ebx) # w * *a
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,12(%edi) # *r=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L211
+
+ addl $16,%ebx # a+=4 (4 words)
+ addl $16,%edi # r+=4 (4 words)
+
+ jmp .L210
+# .align 4
+.L211:
+ movl %esi,%eax # return(c)
+ popl %ebx
+ popl %esi
+ popl %edi
+ popl %ebp
+ ret
+.Lfe2:
+ .align 4
+.globl _bn_sqr_words
+_bn_sqr_words:
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ movl 16(%esp),%esi # r
+ movl 20(%esp),%edi # a
+ movl 24(%esp),%ebx # n
+# .align 4
+ shrl $2,%ebx
+ jz .L99
+.L28:
+ movl (%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,(%esi) # put low into return addr
+ movl %edx,4(%esi) # put high into return addr
+
+ movl 4(%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,8(%esi) # put low into return addr
+ movl %edx,12(%esi) # put high into return addr
+
+ movl 8(%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,16(%esi) # put low into return addr
+ movl %edx,20(%esi) # put high into return addr
+
+ movl 12(%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,24(%esi) # put low into return addr
+ movl %edx,28(%esi) # put high into return addr
+
+ addl $16,%edi
+ addl $32,%esi
+ decl %ebx # n-=4;
+ jz .L99
+ jmp .L28
+# .align 4
+.L99:
+ movl 24(%esp),%ebx # n
+ andl $3,%ebx
+ jz .L29
+ movl (%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,(%esi) # put low into return addr
+ movl %edx,4(%esi) # put high into return addr
+ decl %ebx # n--;
+ jz .L29
+ movl 4(%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,8(%esi) # put low into return addr
+ movl %edx,12(%esi) # put high into return addr
+ decl %ebx # n--;
+ jz .L29
+ movl 8(%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,16(%esi) # put low into return addr
+ movl %edx,20(%esi) # put high into return addr
+
+.L29:
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+.Lfe3:
+ .align 4
+.globl _bn_div64
+_bn_div64:
+ movl 4(%esp),%edx # a
+ movl 8(%esp),%eax # b
+ divl 12(%esp) # ab/c
+ ret
+.Lfe4:
+ .ident "GCC: (GNU) 2.6.3"
diff --git a/crypto/bn/asm/x86-lnx.s b/crypto/bn/asm/x86-lnx.s
new file mode 100644
index 0000000000..5123867440
--- /dev/null
+++ b/crypto/bn/asm/x86-lnx.s
@@ -0,0 +1,282 @@
+ .file "bn_mulw.c"
+ .version "01.01"
+gcc2_compiled.:
+.text
+ .align 16
+.globl bn_mul_add_word
+ .type bn_mul_add_word,@function
+bn_mul_add_word:
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+
+ # ax L(t)
+ # dx H(t)
+ # bx a
+ # cx w
+ # di r
+ # si c
+ # bp num
+ xorl %esi,%esi # c=0
+ movl 20(%esp),%edi # r => edi
+ movl 24(%esp),%ebx # a => exb
+ movl 32(%esp),%ecx # w => ecx
+ movl 28(%esp),%ebp # num => ebp
+
+ shrl $2,%ebp # num/4
+ je .L910
+
+ .align 4
+.L110:
+ # Round 1
+ movl %ecx,%eax # w => eax
+ mull (%ebx) # w * *a
+ addl (%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+= carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+
+ # Round 2
+ movl %ecx,%eax # w => eax
+ mull 4(%ebx) # w * *a
+ addl 4(%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+= carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,4(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+
+ # Round 3
+ movl %ecx,%eax # w => eax
+ mull 8(%ebx) # w * *a
+ addl 8(%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+=carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,8(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+
+ # Round 4
+ movl %ecx,%eax # w => eax
+ mull 12(%ebx) # w * *a
+ addl 12(%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+=carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,12(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+
+ addl $16,%ebx # a+=4 (4 words)
+ addl $16,%edi # r+=4 (4 words)
+
+ decl %ebp # --num
+ je .L910
+ jmp .L110
+ .align 4
+.L910:
+ movl 28(%esp),%ebp # num => ebp
+ andl $3,%ebp
+ je .L111
+
+ # Round 1
+ movl %ecx,%eax # w => eax
+ mull (%ebx) # w * *a
+ addl (%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+=carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L111
+
+ # Round 2
+ movl %ecx,%eax # w => eax
+ mull 4(%ebx) # w * *a
+ addl 4(%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+=carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,4(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L111
+
+ # Round 3
+ movl %ecx,%eax # w => eax
+ mull 8(%ebx) # w * *a
+ addl 8(%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+=carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,8(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+
+ .align 4
+.L111:
+ movl %esi,%eax # return(c)
+ popl %ebx
+ popl %esi
+ popl %edi
+ popl %ebp
+ ret
+.Lfe1:
+ .size bn_mul_add_word,.Lfe1-bn_mul_add_word
+ .align 16
+.globl bn_mul_word
+ .type bn_mul_word,@function
+bn_mul_word:
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+
+ # ax L(t)
+ # dx H(t)
+ # bx a
+ # cx w
+ # di r
+ # num bp
+ # si c
+ xorl %esi,%esi # c=0
+ movl 20(%esp),%edi # r => edi
+ movl 24(%esp),%ebx # a => exb
+ movl 28(%esp),%ebp # num => bp
+ movl 32(%esp),%ecx # w => ecx
+
+ .align 4
+.L210:
+ movl %ecx,%eax # w => eax
+ mull (%ebx) # w * *a
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,(%edi) # *r=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L211
+
+ movl %ecx,%eax # w => eax
+ mull 4(%ebx) # w * *a
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,4(%edi) # *r=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L211
+
+ movl %ecx,%eax # w => eax
+ mull 8(%ebx) # w * *a
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,8(%edi) # *r=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L211
+
+ movl %ecx,%eax # w => eax
+ mull 12(%ebx) # w * *a
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,12(%edi) # *r=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L211
+
+ addl $16,%ebx # a+=4 (4 words)
+ addl $16,%edi # r+=4 (4 words)
+
+ jmp .L210
+ .align 16
+.L211:
+ movl %esi,%eax # return(c)
+ popl %ebx
+ popl %esi
+ popl %edi
+ popl %ebp
+ ret
+.Lfe2:
+ .size bn_mul_word,.Lfe2-bn_mul_word
+
+ .align 16
+.globl bn_sqr_words
+ .type bn_sqr_words,@function
+bn_sqr_words:
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ movl 16(%esp),%esi # r
+ movl 20(%esp),%edi # a
+ movl 24(%esp),%ebx # n
+ .align 4
+ shrl $2,%ebx
+ jz .L99
+.L28:
+ movl (%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,(%esi) # put low into return addr
+ movl %edx,4(%esi) # put high into return addr
+
+ movl 4(%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,8(%esi) # put low into return addr
+ movl %edx,12(%esi) # put high into return addr
+
+ movl 8(%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,16(%esi) # put low into return addr
+ movl %edx,20(%esi) # put high into return addr
+
+ movl 12(%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,24(%esi) # put low into return addr
+ movl %edx,28(%esi) # put high into return addr
+
+ addl $16,%edi
+ addl $32,%esi
+ decl %ebx # n-=4;
+ jz .L99
+ jmp .L28
+ .align 16
+.L99:
+ movl 24(%esp),%ebx # n
+ andl $3,%ebx
+ jz .L29
+ movl (%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,(%esi) # put low into return addr
+ movl %edx,4(%esi) # put high into return addr
+ decl %ebx # n--;
+ jz .L29
+ movl 4(%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,8(%esi) # put low into return addr
+ movl %edx,12(%esi) # put high into return addr
+ decl %ebx # n--;
+ jz .L29
+ movl 8(%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,16(%esi) # put low into return addr
+ movl %edx,20(%esi) # put high into return addr
+
+.L29:
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+.Lfe3:
+ .size bn_sqr_words,.Lfe3-bn_sqr_words
+
+ .align 16
+.globl bn_div64
+ .type bn_div64,@function
+bn_div64:
+ movl 4(%esp),%edx # a
+ movl 8(%esp),%eax # b
+ divl 12(%esp) # ab/c
+ ret
+.Lfe4:
+ .size bn_div64,.Lfe4-bn_div64
+ .ident "GCC: (GNU) 2.6.3"
diff --git a/crypto/bn/asm/x86-lnxa.s b/crypto/bn/asm/x86-lnxa.s
new file mode 100644
index 0000000000..74855dc74d
--- /dev/null
+++ b/crypto/bn/asm/x86-lnxa.s
@@ -0,0 +1,282 @@
+ .file "bn_mulw.c"
+ .version "01.01"
+gcc2_compiled.:
+.text
+ .align 4
+.globl _bn_mul_add_word
+ .type _bn_mul_add_word,@function
+_bn_mul_add_word:
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+
+ # ax L(t)
+ # dx H(t)
+ # bx a
+ # cx w
+ # di r
+ # si c
+ # bp num
+ xorl %esi,%esi # c=0
+ movl 20(%esp),%edi # r => edi
+ movl 24(%esp),%ebx # a => exb
+ movl 32(%esp),%ecx # w => ecx
+ movl 28(%esp),%ebp # num => ebp
+
+ shrl $2,%ebp # num/4
+ je .L910
+
+# .align 4
+.L110:
+ # Round 1
+ movl %ecx,%eax # w => eax
+ mull (%ebx) # w * *a
+ addl (%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+= carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+
+ # Round 2
+ movl %ecx,%eax # w => eax
+ mull 4(%ebx) # w * *a
+ addl 4(%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+= carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,4(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+
+ # Round 3
+ movl %ecx,%eax # w => eax
+ mull 8(%ebx) # w * *a
+ addl 8(%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+=carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,8(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+
+ # Round 4
+ movl %ecx,%eax # w => eax
+ mull 12(%ebx) # w * *a
+ addl 12(%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+=carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,12(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+
+ addl $16,%ebx # a+=4 (4 words)
+ addl $16,%edi # r+=4 (4 words)
+
+ decl %ebp # --num
+ je .L910
+ jmp .L110
+# .align 4
+.L910:
+ movl 28(%esp),%ebp # num => ebp
+ andl $3,%ebp
+ je .L111
+
+ # Round 1
+ movl %ecx,%eax # w => eax
+ mull (%ebx) # w * *a
+ addl (%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+=carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L111
+
+ # Round 2
+ movl %ecx,%eax # w => eax
+ mull 4(%ebx) # w * *a
+ addl 4(%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+=carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,4(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L111
+
+ # Round 3
+ movl %ecx,%eax # w => eax
+ mull 8(%ebx) # w * *a
+ addl 8(%edi),%eax # *r+=L(t)
+ adcl $0,%edx # H(t)+=carry
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,8(%edi) # *r+=L(t)
+ movl %edx,%esi # c=H(t)
+
+# .align 4
+.L111:
+ movl %esi,%eax # return(c)
+ popl %ebx
+ popl %esi
+ popl %edi
+ popl %ebp
+ ret
+.Lfe1:
+ .size _bn_mul_add_word,.Lfe1-_bn_mul_add_word
+ .align 4
+.globl _bn_mul_word
+ .type _bn_mul_word,@function
+_bn_mul_word:
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+
+ # ax L(t)
+ # dx H(t)
+ # bx a
+ # cx w
+ # di r
+ # num bp
+ # si c
+ xorl %esi,%esi # c=0
+ movl 20(%esp),%edi # r => edi
+ movl 24(%esp),%ebx # a => exb
+ movl 28(%esp),%ebp # num => bp
+ movl 32(%esp),%ecx # w => ecx
+
+# .align 4
+.L210:
+ movl %ecx,%eax # w => eax
+ mull (%ebx) # w * *a
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,(%edi) # *r=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L211
+
+ movl %ecx,%eax # w => eax
+ mull 4(%ebx) # w * *a
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,4(%edi) # *r=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L211
+
+ movl %ecx,%eax # w => eax
+ mull 8(%ebx) # w * *a
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,8(%edi) # *r=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L211
+
+ movl %ecx,%eax # w => eax
+ mull 12(%ebx) # w * *a
+ addl %esi,%eax # L(t)+=c
+ adcl $0,%edx # H(t)+=carry
+ movl %eax,12(%edi) # *r=L(t)
+ movl %edx,%esi # c=H(t)
+ decl %ebp # --num
+ je .L211
+
+ addl $16,%ebx # a+=4 (4 words)
+ addl $16,%edi # r+=4 (4 words)
+
+ jmp .L210
+# .align 4
+.L211:
+ movl %esi,%eax # return(c)
+ popl %ebx
+ popl %esi
+ popl %edi
+ popl %ebp
+ ret
+.Lfe2:
+ .size _bn_mul_word,.Lfe2-_bn_mul_word
+
+ .align 4
+.globl _bn_sqr_words
+ .type _bn_sqr_words,@function
+_bn_sqr_words:
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ movl 16(%esp),%esi # r
+ movl 20(%esp),%edi # a
+ movl 24(%esp),%ebx # n
+# .align 4
+ shrl $2,%ebx
+ jz .L99
+.L28:
+ movl (%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,(%esi) # put low into return addr
+ movl %edx,4(%esi) # put high into return addr
+
+ movl 4(%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,8(%esi) # put low into return addr
+ movl %edx,12(%esi) # put high into return addr
+
+ movl 8(%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,16(%esi) # put low into return addr
+ movl %edx,20(%esi) # put high into return addr
+
+ movl 12(%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,24(%esi) # put low into return addr
+ movl %edx,28(%esi) # put high into return addr
+
+ addl $16,%edi
+ addl $32,%esi
+ decl %ebx # n-=4;
+ jz .L99
+ jmp .L28
+# .align 4
+.L99:
+ movl 24(%esp),%ebx # n
+ andl $3,%ebx
+ jz .L29
+ movl (%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,(%esi) # put low into return addr
+ movl %edx,4(%esi) # put high into return addr
+ decl %ebx # n--;
+ jz .L29
+ movl 4(%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,8(%esi) # put low into return addr
+ movl %edx,12(%esi) # put high into return addr
+ decl %ebx # n--;
+ jz .L29
+ movl 8(%edi),%eax # get a
+ mull %eax # a*a
+ movl %eax,16(%esi) # put low into return addr
+ movl %edx,20(%esi) # put high into return addr
+
+.L29:
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+.Lfe3:
+ .size _bn_sqr_words,.Lfe3-_bn_sqr_words
+
+ .align 4
+.globl _bn_div64
+ .type _bn_div64,@function
+_bn_div64:
+ movl 4(%esp),%edx # a
+ movl 8(%esp),%eax # b
+ divl 12(%esp) # ab/c
+ ret
+.Lfe4:
+ .size _bn_div64,.Lfe4-_bn_div64
+ .ident "GCC: (GNU) 2.6.3"
diff --git a/crypto/bn/asm/x86-sol.s b/crypto/bn/asm/x86-sol.s
new file mode 100644
index 0000000000..c961e64fa0
--- /dev/null
+++ b/crypto/bn/asm/x86-sol.s
@@ -0,0 +1,224 @@
+ .file "bn_mulw.c"
+ .version "01.01"
+gcc2_compiled.:
+.text
+ .align 16
+.globl bn_mul_add_word
+ .type bn_mul_add_word,@function
+bn_mul_add_word:
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+
+ / ax L(t)
+ / dx H(t)
+ / bx a
+ / cx w
+ / di r
+ / si c
+ / bp num
+ xorl %esi,%esi / c=0
+ movl 20(%esp),%edi / r => edi
+ movl 24(%esp),%ebx / a => exb
+ movl 28(%esp),%ebp / num => ebp
+ movl 32(%esp),%ecx / w => ecx
+
+ .align 4
+.L110:
+ movl %ecx,%eax / w => eax
+ mull (%ebx) / w * *a
+ addl (%edi),%eax / L(t)+= *r
+ adcl $0,%edx / H(t)+= carry
+ addl %esi,%eax / L(t)+=c
+ adcl $0,%edx / H(t)+=carry
+ movl %eax,(%edi) / *r=L(t)
+ movl %edx,%esi / c=H(t)
+ decl %ebp / --num
+ je .L111
+
+ movl %ecx,%eax / w => eax
+ mull 4(%ebx) / w * *a
+ addl 4(%edi),%eax / L(t)+= *r
+ adcl $0,%edx / H(t)+= carry
+ addl %esi,%eax / L(t)+=c
+ adcl $0,%edx / H(t)+=carry
+ movl %eax,4(%edi) / *r=L(t)
+ movl %edx,%esi / c=H(t)
+ decl %ebp / --num
+ je .L111
+
+ movl %ecx,%eax / w => eax
+ mull 8(%ebx) / w * *a
+ addl 8(%edi),%eax / L(t)+= *r
+ adcl $0,%edx / H(t)+= carry
+ addl %esi,%eax / L(t)+=c
+ adcl $0,%edx / H(t)+=carry
+ movl %eax,8(%edi) / *r=L(t)
+ movl %edx,%esi / c=H(t)
+ decl %ebp / --num
+ je .L111
+
+ movl %ecx,%eax / w => eax
+ mull 12(%ebx) / w * *a
+ addl 12(%edi),%eax / L(t)+= *r
+ adcl $0,%edx / H(t)+= carry
+ addl %esi,%eax / L(t)+=c
+ adcl $0,%edx / H(t)+=carry
+ movl %eax,12(%edi) / *r=L(t)
+ movl %edx,%esi / c=H(t)
+ decl %ebp / --num
+ je .L111
+
+ addl $16,%ebx / a+=4 (4 words)
+ addl $16,%edi / r+=4 (4 words)
+
+ jmp .L110
+ .align 16
+.L111:
+ movl %esi,%eax / return(c)
+ popl %ebx
+ popl %esi
+ popl %edi
+ popl %ebp
+ ret
+.Lfe1:
+ .size bn_mul_add_word,.Lfe1-bn_mul_add_word
+ .align 16
+.globl bn_mul_word
+ .type bn_mul_word,@function
+bn_mul_word:
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+
+ / ax L(t)
+ / dx H(t)
+ / bx a
+ / cx w
+ / di r
+ / num bp
+ / si c
+ xorl %esi,%esi / c=0
+ movl 20(%esp),%edi / r => edi
+ movl 24(%esp),%ebx / a => exb
+ movl 28(%esp),%ebp / num => ebp
+ movl 32(%esp),%ecx / w => ecx
+
+ .align 4
+.L210:
+ movl %ecx,%eax / w => eax
+ mull (%ebx) / w * *a
+ addl %esi,%eax / L(t)+=c
+ adcl $0,%edx / H(t)+=carry
+ movl %eax,(%edi) / *r=L(t)
+ movl %edx,%esi / c=H(t)
+ decl %ebp / --num
+ je .L211
+
+ movl %ecx,%eax / w => eax
+ mull 4(%ebx) / w * *a
+ addl %esi,%eax / L(t)+=c
+ adcl $0,%edx / H(t)+=carry
+ movl %eax,4(%edi) / *r=L(t)
+ movl %edx,%esi / c=H(t)
+ decl %ebp / --num
+ je .L211
+
+ movl %ecx,%eax / w => eax
+ mull 8(%ebx) / w * *a
+ addl %esi,%eax / L(t)+=c
+ adcl $0,%edx / H(t)+=carry
+ movl %eax,8(%edi) / *r=L(t)
+ movl %edx,%esi / c=H(t)
+ decl %ebp / --num
+ je .L211
+
+ movl %ecx,%eax / w => eax
+ mull 12(%ebx) / w * *a
+ addl %esi,%eax / L(t)+=c
+ adcl $0,%edx / H(t)+=carry
+ movl %eax,12(%edi) / *r=L(t)
+ movl %edx,%esi / c=H(t)
+ decl %ebp / --num
+ je .L211
+
+ addl $16,%ebx / a+=4 (4 words)
+ addl $16,%edi / r+=4 (4 words)
+
+ jmp .L210
+ .align 16
+.L211:
+ movl %esi,%eax / return(c)
+ popl %ebx
+ popl %esi
+ popl %edi
+ popl %ebp
+ ret
+.Lfe2:
+ .size bn_mul_word,.Lfe2-bn_mul_word
+
+ .align 16
+.globl bn_sqr_words
+ .type bn_sqr_words,@function
+bn_sqr_words:
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ movl 16(%esp),%esi / r
+ movl 20(%esp),%edi / a
+ movl 24(%esp),%ebx / n
+ .align 4
+.L28:
+ movl (%edi),%eax / get a
+ mull %eax / a*a
+ movl %eax,(%esi) / put low into return addr
+ movl %edx,4(%esi) / put high into return addr
+ decl %ebx / n--;
+ je .L29
+
+ movl 4(%edi),%eax / get a
+ mull %eax / a*a
+ movl %eax,8(%esi) / put low into return addr
+ movl %edx,12(%esi) / put high into return addr
+ decl %ebx / n--;
+ je .L29
+
+ movl 8(%edi),%eax / get a
+ mull %eax / a*a
+ movl %eax,16(%esi) / put low into return addr
+ movl %edx,20(%esi) / put high into return addr
+ decl %ebx / n--;
+ je .L29
+
+ movl 12(%edi),%eax / get a
+ mull %eax / a*a
+ movl %eax,24(%esi) / put low into return addr
+ movl %edx,28(%esi) / put high into return addr
+ decl %ebx / n--;
+ je .L29
+
+ addl $16,%edi
+ addl $32,%esi
+ jmp .L28
+ .align 16
+.L29:
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+.Lfe3:
+ .size bn_sqr_words,.Lfe3-bn_sqr_words
+
+ .align 16
+.globl bn_div64
+ .type bn_div64,@function
+bn_div64:
+ movl 4(%esp),%edx / a
+ movl 8(%esp),%eax / b
+ divl 12(%esp) / ab/c
+ ret
+.Lfe4:
+ .size bn_div64,.Lfe4-bn_div64
+ .ident "GCC: (GNU) 2.6.3"
diff --git a/crypto/bn/asm/x86nt32.asm b/crypto/bn/asm/x86nt32.asm
new file mode 100644
index 0000000000..0198c2c583
--- /dev/null
+++ b/crypto/bn/asm/x86nt32.asm
@@ -0,0 +1,288 @@
+ TITLE bn_mulw.c
+ .386P
+.model FLAT
+PUBLIC _bn_mul_add_word
+_TEXT SEGMENT
+; File bn_mulw.c
+_bn_mul_add_word PROC NEAR
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov edi,DWORD PTR 20[esp] ; r
+ mov ebx,DWORD PTR 24[esp] ; a
+ mov ecx,DWORD PTR 32[esp] ; w
+ xor esi,esi ; c=0
+
+ mov ebp,DWORD PTR 28[esp] ; num
+ shr ebp,2 ; num/4
+ jz $L666
+
+$L546:
+ ; Round one
+ mov eax,DWORD PTR [ebx] ; edx:eax = *a * w
+ mul ecx
+ add eax,DWORD PTR [edi] ; *r+=ax
+ adc edx,0
+ add eax,esi ; edx:eax += c
+ adc edx,0
+ mov DWORD PTR [edi],eax ; *r+=ax
+ mov esi,edx ; c = overflow
+
+ ; Round two
+ mov eax,DWORD PTR 4[ebx] ; edx:eax = *a * w
+ mul ecx
+ add eax,DWORD PTR 4[edi] ; *r+=ax
+ adc edx,0
+ add eax,esi ; edx:eax += c
+ adc edx,0
+ mov DWORD PTR 4[edi],eax ; *r+=ax
+ mov esi,edx ; c = overflow
+
+ ; Round three
+ mov eax,DWORD PTR 8[ebx] ; edx:eax = *a * w
+ mul ecx
+ add eax,DWORD PTR 8[edi] ; *r+=ax
+ adc edx,0
+ add eax,esi ; edx:eax += c
+ adc edx,0
+ mov DWORD PTR 8[edi],eax ; *r+=ax
+ mov esi,edx ; c = overflow
+
+ ; Round four
+ mov eax,DWORD PTR 12[ebx] ; edx:eax = *a * w
+ mul ecx
+ add eax,DWORD PTR 12[edi] ; *r+=ax
+ adc edx,0
+ add eax,esi ; edx:eax += c
+ adc edx,0
+ mov DWORD PTR 12[edi],eax ; *r+=ax
+ mov esi,edx ; c = overflow
+
+ add ebx,16
+ add edi,16
+
+ dec ebp
+ jz $L666
+ jmp $L546
+$L666:
+ mov ebp,DWORD PTR 28[esp] ; num
+ and ebp,3 ; num%4
+ jz $L547
+
+ ; Round one
+ mov eax,DWORD PTR [ebx] ; edx:eax = *a * w
+ mul ecx
+ add eax,DWORD PTR [edi] ; *r+=ax
+ adc edx,0
+ add eax,esi ; edx:eax += c
+ adc edx,0
+ mov DWORD PTR [edi],eax ; *r+=ax
+ mov esi,edx ; c = overflow
+ dec ebp
+ jz $L547
+ ; Round two
+ mov eax,DWORD PTR 4[ebx] ; edx:eax = *a * w
+ mul ecx
+ add eax,DWORD PTR 4[edi] ; *r+=ax
+ adc edx,0
+ add eax,esi ; edx:eax += c
+ adc edx,0
+ mov DWORD PTR 4[edi],eax ; *r+=ax
+ mov esi,edx ; c = overflow
+ dec ebp
+ jz $L547
+ ; Round three
+ mov eax,DWORD PTR 8[ebx] ; edx:eax = *a * w
+ mul ecx
+ add eax,DWORD PTR 8[edi] ; *r+=ax
+ adc edx,0
+ add eax,esi ; edx:eax += c
+ adc edx,0
+ mov DWORD PTR 8[edi],eax ; *r+=ax
+ mov esi,edx ; c = overflow
+
+$L547:
+ mov eax,esi
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+_bn_mul_add_word ENDP
+_TEXT ENDS
+PUBLIC _bn_mul_word
+_TEXT SEGMENT
+_bn_mul_word PROC NEAR
+ push ebp
+ push ebx
+ push esi
+ push edi
+
+ mov edi,DWORD PTR 20[esp] ; r
+ mov ebx,DWORD PTR 24[esp] ; a
+ mov ebp,DWORD PTR 28[esp] ; num
+ mov ecx,DWORD PTR 32[esp] ; w
+ xor esi,esi ; c=0
+
+ shr ebp,2 ; num/4
+ jz $L266
+
+$L593:
+ ; Round one
+ mov eax,DWORD PTR [ebx] ; edx:eax= w * *a
+ mul ecx
+ add eax,esi ; edx:eax+=c
+ adc edx,0
+ mov DWORD PTR [edi],eax ; *r=eax
+ mov esi,edx ; c=edx
+ ; Round two
+ mov eax,DWORD PTR 4[ebx] ; edx:eax= w * *a
+ mul ecx
+ add eax,esi ; edx:eax+=c
+ adc edx,0
+ mov DWORD PTR 4[edi],eax ; *r=eax
+ mov esi,edx ; c=edx
+ ; Round three
+ mov eax,DWORD PTR 8[ebx] ; edx:eax= w * *a
+ mul ecx
+ add eax,esi ; edx:eax+=c
+ adc edx,0
+ mov DWORD PTR 8[edi],eax ; *r=eax
+ mov esi,edx ; c=edx
+ ; Round four
+ mov eax,DWORD PTR 12[ebx] ; edx:eax= w * *a
+ mul ecx
+ add eax,esi ; edx:eax+=c
+ adc edx,0
+ mov DWORD PTR 12[edi],eax ; *r=eax
+ mov esi,edx ; c=edx
+
+ add ebx,16
+ add edi,16
+
+ dec ebp
+ jz $L266
+ jmp $L593
+$L266:
+ mov ebp,DWORD PTR 28[esp] ; num
+ and ebp,3
+ jz $L601
+
+ ; Round one
+ mov eax,DWORD PTR [ebx] ; edx:eax= w * *a
+ mul ecx
+ add eax,esi ; edx:eax+=c
+ adc edx,0
+ mov DWORD PTR [edi],eax ; *r=eax
+ mov esi,edx ; c=edx
+ dec ebp
+ jz $L601
+ ; Round two
+ mov eax,DWORD PTR 4[ebx] ; edx:eax= w * *a
+ mul ecx
+ add eax,esi ; edx:eax+=c
+ adc edx,0
+ mov DWORD PTR 4[edi],eax ; *r=eax
+ mov esi,edx ; c=edx
+ dec ebp
+ jz $L601
+ ; Round three
+ mov eax,DWORD PTR 8[ebx] ; edx:eax= w * *a
+ mul ecx
+ add eax,esi ; edx:eax+=c
+ adc edx,0
+ mov DWORD PTR 8[edi],eax ; *r=eax
+ mov esi,edx ; c=edx
+
+$L601:
+ mov eax,esi
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+_bn_mul_word ENDP
+_TEXT ENDS
+PUBLIC _bn_sqr_words
+_TEXT SEGMENT
+_bn_sqr_words PROC NEAR
+ push ebx
+ push esi
+ push edi
+ mov esi,DWORD PTR 16[esp] ; r
+ mov edi,DWORD PTR 20[esp] ; a
+ mov ebx,DWORD PTR 24[esp] ; num
+
+ shr ebx,2 ; num/4
+ jz $L111
+$L640:
+ ; Round 1
+ mov eax, DWORD PTR [edi]
+ mul eax ; *a * *a
+ mov DWORD PTR [esi],eax
+ mov DWORD PTR 4[esi],edx
+ ; Round 2
+ mov eax, DWORD PTR 4[edi]
+ mul eax ; *a * *a
+ mov DWORD PTR 8[esi],eax
+ mov DWORD PTR 12[esi],edx
+ ; Round 3
+ mov eax, DWORD PTR 8[edi]
+ mul eax ; *a * *a
+ mov DWORD PTR 16[esi],eax
+ mov DWORD PTR 20[esi],edx
+ ; Round 4
+ mov eax, DWORD PTR 12[edi]
+ mul eax ; *a * *a
+ mov DWORD PTR 24[esi],eax
+ mov DWORD PTR 28[esi],edx
+
+ add edi,16
+ add esi,32
+
+ dec ebx
+ jz $L111
+ jmp $L640
+$L111:
+ mov ebx,DWORD PTR 24[esp] ; num
+ and ebx,3 ; num%3
+ jz $L645
+
+ ; Round 1
+ mov eax, DWORD PTR [edi]
+ mul eax ; *a * *a
+ mov DWORD PTR [esi],eax
+ mov DWORD PTR 4[esi],edx
+ dec ebx
+ jz $L645
+ ; Round 2
+ mov eax, DWORD PTR 4[edi]
+ mul eax ; *a * *a
+ mov DWORD PTR 8[esi],eax
+ mov DWORD PTR 12[esi],edx
+ dec ebx
+ jz $L645
+ ; Round 3
+ mov eax, DWORD PTR 8[edi]
+ mul eax ; *a * *a
+ mov DWORD PTR 16[esi],eax
+ mov DWORD PTR 20[esi],edx
+
+$L645:
+ pop edi
+ pop esi
+ pop ebx
+ ret
+_bn_sqr_words ENDP
+_TEXT ENDS
+PUBLIC _bn_div64
+_TEXT SEGMENT
+_bn_div64 PROC NEAR
+ mov edx, DWORD PTR 4[esp]
+ mov eax, DWORD PTR 8[esp]
+ div DWORD PTR 12[esp]
+ ret
+_bn_div64 ENDP
+_TEXT ENDS
+END
diff --git a/crypto/bn/asm/x86nt32.uu b/crypto/bn/asm/x86nt32.uu
new file mode 100644
index 0000000000..99207987c1
--- /dev/null
+++ b/crypto/bn/asm/x86nt32.uu
@@ -0,0 +1,22 @@
+begin 640 x86nt32.obj
+M3`$"`/H&DC-6`@``"P`````````N=&5X=```````````````\@$``&0`````
+M```````````````@`#!@+F1A=&$```#R`0````````````!6`@``````````
+M````````0``PP%535E>+?"04BUPD&(M,)"`S]HML)!S![0)T7(L#]^$#!X/2
+M``/&@](`B0>+\HM#!/?A`T<$@](``\:#T@")1P2+\HM#"/?A`T<(@](``\:#
+MT@")1PB+\HM##/?A`T<,@](``\:#T@")1PR+\H/#$(/'$$UT`NNDBVPD'(/E
+M`W1"BP/WX0,'@](``\:#T@")!XOR370MBT,$]^$#1P2#T@`#QH/2`(E'!(OR
+M3705BT,(]^$#1PB#T@`#QH/2`(E'"(ORB\9?7EM=PU535E>+?"04BUPD&(ML
+M)!R+3"0@,_;![0)T18L#]^$#QH/2`(D'B_*+0P3WX0/&@](`B4<$B_*+0PCW
+MX0/&@](`B4<(B_*+0PSWX0/&@](`B4<,B_*#PQ"#QQ!-=`+KNXML)!R#Y0-T
+M,8L#]^$#QH/2`(D'B_)-="&+0P3WX0/&@](`B4<$B_)-=`^+0PCWX0/&@](`
+MB4<(B_*+QE]>6UW#4U97BW0D$(M\)!2+7"08P>L"=#6+!_?@B0:)5@2+1P3W
+MX(E&"(E6#(M'"/?@B480B584BT<,]^")1AB)5AR#QQ"#QB!+=`+KRXM<)!B#
+MXP-T)8L']^")!HE6!$MT&8M'!/?@B48(B58,2W0+BT<(]^")1A")5A1?7EO#
+MBU0D!(M$)`CW="0,PRYF:6QE`````````/[_``!G`BY<8W)Y<'1O7&)N7&%S
+M;5QX.#9N=#,R+F%S;0```````````"YT97AT``````````$````#`?(!````
+M`````````````````"YD871A``````````(````#`0``````````````````
+M```````````$``````````$`(``"```````5````R0````$`(``"```````B
+M````:@$```$`(``"```````P````Y0$```$`(``"`#H```!?8FY?;75L7V%D
+L9%]W;W)D`%]B;E]M=6Q?=V]R9`!?8FY?<W%R7W=O<F1S`%]B;E]D:78V-```
+`
+end
diff --git a/crypto/bn/asm/x86w16.asm b/crypto/bn/asm/x86w16.asm
new file mode 100644
index 0000000000..66874913e9
--- /dev/null
+++ b/crypto/bn/asm/x86w16.asm
@@ -0,0 +1,297 @@
+; Static Name Aliases
+;
+ TITLE bn_mulw.c
+ .8087
+F_TEXT SEGMENT WORD PUBLIC 'CODE'
+F_TEXT ENDS
+_DATA SEGMENT WORD PUBLIC 'DATA'
+_DATA ENDS
+CONST SEGMENT WORD PUBLIC 'CONST'
+CONST ENDS
+_BSS SEGMENT WORD PUBLIC 'BSS'
+_BSS ENDS
+DGROUP GROUP CONST, _BSS, _DATA
+ ASSUME DS: DGROUP, SS: DGROUP
+F_TEXT SEGMENT
+ ASSUME CS: F_TEXT
+ PUBLIC _bn_mul_add_word
+_bn_mul_add_word PROC FAR
+; Line 58
+ push bp
+ push bx
+ push si
+ push di
+ push ds
+ push es
+ mov bp,sp
+; w = 26
+; num = 24
+; ap = 20
+; rp = 16
+ xor si,si ;c=0;
+ mov di,WORD PTR [bp+16] ; load r
+ mov ds,WORD PTR [bp+18] ; load r
+ mov bx,WORD PTR [bp+20] ; load a
+ mov es,WORD PTR [bp+22] ; load a
+ mov cx,WORD PTR [bp+26] ; load w
+ mov bp,WORD PTR [bp+24] ; load num
+
+ shr bp,1 ; div count by 4 and do groups of 4
+ shr bp,1
+ je $L555
+
+$L546:
+ mov ax,cx
+ mul WORD PTR es:[bx] ; w* *a
+ add ax,WORD PTR ds:[di] ; + *r
+ adc dx,0
+ adc ax,si
+ adc dx,0
+ mov WORD PTR ds:[di],ax
+ mov si,dx
+ ;
+ mov ax,cx
+ mul WORD PTR es:[bx+2] ; w* *a
+ add ax,WORD PTR ds:[di+2] ; + *r
+ adc dx,0
+ adc ax,si
+ adc dx,0
+ mov WORD PTR ds:[di+2],ax
+ mov si,dx
+ ;
+ mov ax,cx
+ mul WORD PTR es:[bx+4] ; w* *a
+ add ax,WORD PTR ds:[di+4] ; + *r
+ adc dx,0
+ adc ax,si
+ adc dx,0
+ mov WORD PTR ds:[di+4],ax
+ mov si,dx
+ ;
+ mov ax,cx
+ mul WORD PTR es:[bx+6] ; w* *a
+ add ax,WORD PTR ds:[di+6] ; + *r
+ adc dx,0
+ adc ax,si
+ adc dx,0
+ mov WORD PTR ds:[di+6],ax
+ mov si,dx
+ ;
+ add bx,8
+ add di,8
+ ;
+ dec bp
+ je $L555
+ jmp $L546
+;
+;
+$L555:
+ mov bp,sp
+ mov bp,WORD PTR [bp+24] ; load num
+ and bp,3
+ dec bp
+ js $L547
+
+ mov ax,cx
+ mul WORD PTR es:[bx] ; w* *a
+ add ax,WORD PTR ds:[di] ; + *r
+ adc dx,0
+ adc ax,si
+ adc dx,0
+ mov WORD PTR ds:[di],ax
+ mov si,dx
+ dec bp
+ js $L547 ; Note that we are now testing for -1
+ ;
+ mov ax,cx
+ mul WORD PTR es:[bx+2] ; w* *a
+ add ax,WORD PTR ds:[di+2] ; + *r
+ adc dx,0
+ adc ax,si
+ adc dx,0
+ mov WORD PTR ds:[di+2],ax
+ mov si,dx
+ dec bp
+ js $L547
+ ;
+ mov ax,cx
+ mul WORD PTR es:[bx+4] ; w* *a
+ add ax,WORD PTR ds:[di+4] ; + *r
+ adc dx,0
+ adc ax,si
+ adc dx,0
+ mov WORD PTR ds:[di+4],ax
+ mov si,dx
+$L547:
+ mov ax,si
+ pop es
+ pop ds
+ pop di
+ pop si
+ pop bx
+ pop bp
+ ret
+ nop
+
+_bn_mul_add_word ENDP
+ PUBLIC _bn_mul_word
+_bn_mul_word PROC FAR
+; Line 76
+ push bp
+ push bx
+ push si
+ push di
+ push ds
+ push es
+ xor si,si
+ mov bp,sp
+ mov di,WORD PTR [bp+16] ; r
+ mov ds,WORD PTR [bp+18]
+ mov bx,WORD PTR [bp+20] ; a
+ mov es,WORD PTR [bp+22]
+ mov cx,WORD PTR [bp+26] ; w
+ mov bp,WORD PTR [bp+24] ; num
+$FC743:
+ mov ax,cx
+ mul WORD PTR es:[bx]
+ add ax,si
+ adc dx,0
+ mov WORD PTR ds:[di],ax
+ mov si,dx
+ dec bp
+ je $L764
+ ;
+ mov ax,cx
+ mul WORD PTR es:[bx+2]
+ add ax,si
+ adc dx,0
+ mov WORD PTR ds:[di+2],ax
+ mov si,dx
+ dec bp
+ je $L764
+ ;
+ mov ax,cx
+ mul WORD PTR es:[bx+4]
+ add ax,si
+ adc dx,0
+ mov WORD PTR ds:[di+4],ax
+ mov si,dx
+ dec bp
+ je $L764
+ ;
+ mov ax,cx
+ mul WORD PTR es:[bx+6]
+ add ax,si
+ adc dx,0
+ mov WORD PTR ds:[di+6],ax
+ mov si,dx
+ dec bp
+ je $L764
+ ;
+ add bx,8
+ add di,8
+ jmp $FC743
+ nop
+$L764:
+ mov ax,si
+ pop es
+ pop ds
+ pop di
+ pop si
+ pop bx
+ pop bp
+ ret
+ nop
+_bn_mul_word ENDP
+ PUBLIC _bn_sqr_words
+_bn_sqr_words PROC FAR
+; Line 92
+ push bp
+ push bx
+ push si
+ push di
+ push ds
+ push es
+ mov bp,sp
+ mov si,WORD PTR [bp+16]
+ mov ds,WORD PTR [bp+18]
+ mov di,WORD PTR [bp+20]
+ mov es,WORD PTR [bp+22]
+ mov bx,WORD PTR [bp+24]
+
+ mov bp,bx ; save a memory lookup later
+ shr bx,1 ; div count by 4 and do groups of 4
+ shr bx,1
+ je $L666
+
+$L765:
+ mov ax,WORD PTR es:[di]
+ mul ax
+ mov WORD PTR ds:[si],ax
+ mov WORD PTR ds:[si+2],dx
+ ;
+ mov ax,WORD PTR es:[di+2]
+ mul ax
+ mov WORD PTR ds:[si+4],ax
+ mov WORD PTR ds:[si+6],dx
+ ;
+ mov ax,WORD PTR es:[di+4]
+ mul ax
+ mov WORD PTR ds:[si+8],ax
+ mov WORD PTR ds:[si+10],dx
+ ;
+ mov ax,WORD PTR es:[di+6]
+ mul ax
+ mov WORD PTR ds:[si+12],ax
+ mov WORD PTR ds:[si+14],dx
+ ;
+ add di,8
+ add si,16
+ dec bx
+ je $L666
+ jmp $L765
+$L666:
+ and bp,3
+ dec bp ; The copied value of bx (num)
+ js $L645
+ ;
+ mov ax,WORD PTR es:[di]
+ mul ax
+ mov WORD PTR ds:[si],ax
+ mov WORD PTR ds:[si+2],dx
+ dec bp
+ js $L645
+ ;
+ mov ax,WORD PTR es:[di+2]
+ mul ax
+ mov WORD PTR ds:[si+4],ax
+ mov WORD PTR ds:[si+6],dx
+ dec bp
+ js $L645
+ ;
+ mov ax,WORD PTR es:[di+4]
+ mul ax
+ mov WORD PTR ds:[si+8],ax
+ mov WORD PTR ds:[si+10],dx
+$L645:
+ pop es
+ pop ds
+ pop di
+ pop si
+ pop bx
+ pop bp
+ ret
+
+_bn_sqr_words ENDP
+ PUBLIC _bn_div64
+_bn_div64 PROC FAR
+ push bp
+ mov bp,sp
+ mov dx, WORD PTR [bp+6]
+ mov ax, WORD PTR [bp+8]
+ div WORD PTR [bp+10]
+ pop bp
+ ret
+_bn_div64 ENDP
+F_TEXT ENDS
+END
diff --git a/crypto/bn/asm/x86w16.uu b/crypto/bn/asm/x86w16.uu
new file mode 100644
index 0000000000..89c5e144b7
--- /dev/null
+++ b/crypto/bn/asm/x86w16.uu
@@ -0,0 +1,20 @@
+begin 640 x86w16.obj
+M@!P`&BY<8W)Y<'1O7&)N7&%S;5QX.#9W,38N87-MQY8U```$7T)34P5?1$%4
+M009$1U)/55`&1E]415A4!4-/3E-4`T)34P5#3TY35`1$051!!$-/1$5EF`<`
+M2/`!!0H!&)@'`$@```,)`0R8!P!(```&"`$*F`<`2````@<!#YH(``3_`O\#
+M_P14D$4```$-7V)N7W-Q<E]W;W)D<U4!``E?8FY?9&EV-C3B`0`07V)N7VUU
+M;%]A9&1?=V]R9`````Q?8FY?;75L7W=O<F3<``#`B`0``*(!T:#T`0$``%53
+M5E<>!HOL,_:+?A".7A*+7A2.1A:+3AJ+;AC1[='M=&"+P2;W)P,%@](`$\:#
+MT@")!8ORB\$F]V<"`T4"@](`$\:#T@")10*+\HO!)O=G!`-%!(/2`!/&@](`
+MB44$B_*+P2;W9P8#10:#T@`3QH/2`(E%!HOR@\,(@\<(370"ZZ"+[(MN&(/E
+M`TUX18O!)O<G`P6#T@`3QH/2`(D%B_)->"^+P2;W9P(#10*#T@`3QH/2`(E%
+M`HOR37@6B\$F]V<$`T4$@](`$\:#T@")102+\HO&!Q]?7EM=RY!54U97'@8S
+M]HOLBWX0CEX2BUX4CD86BTX:BVX8B\$F]R<#QH/2`(D%B_)-=$*+P2;W9P(#
+MQH/2`(E%`HOR370OB\$F]V<$`\:#T@")102+\DUT'(O!)O=G!@/&@](`B44&
+MB_)-=`F#PPB#QPCKKI"+Q@<?7UY;7<N055-65QX&B^R+=A".7A*+?A2.1A:+
+M7AB+Z]'KT>MT.2:+!??@B02)5`(FBT4"]^")1`2)5`8FBT4$]^")1`B)5`HF
+MBT4&]^")1`R)5`Z#QPB#QA!+=`+KQX/E`TUX*":+!??@B02)5`)->!LFBT4"
+M]^")1`2)5`9->`PFBT4$]^")1`B)5`H''U]>6UW+58OLBU8&BT8(]W8*7<NZ
+%B@(``'0`
+`
+end
diff --git a/crypto/bn/asm/x86w32.asm b/crypto/bn/asm/x86w32.asm
new file mode 100644
index 0000000000..0e4452dfa9
--- /dev/null
+++ b/crypto/bn/asm/x86w32.asm
@@ -0,0 +1,303 @@
+; Static Name Aliases
+;
+ TITLE bn_mulw.c
+ .386
+F_TEXT SEGMENT WORD USE16 PUBLIC 'CODE'
+F_TEXT ENDS
+_DATA SEGMENT WORD USE16 PUBLIC 'DATA'
+_DATA ENDS
+CONST SEGMENT WORD USE16 PUBLIC 'CONST'
+CONST ENDS
+_BSS SEGMENT WORD USE16 PUBLIC 'BSS'
+_BSS ENDS
+DGROUP GROUP CONST, _BSS, _DATA
+ ASSUME DS: DGROUP, SS: DGROUP
+F_TEXT SEGMENT
+ ASSUME CS: F_TEXT
+ PUBLIC _bn_mul_add_word
+_bn_mul_add_word PROC FAR
+; Line 58
+ push bp
+ push bx
+ push esi
+ push di
+ push ds
+ push es
+ mov bp,sp
+; w = 28
+; num = 26
+; ap = 22
+; rp = 18
+ xor esi,esi ;c=0;
+ mov di,WORD PTR [bp+18] ; load r
+ mov ds,WORD PTR [bp+20] ; load r
+ mov bx,WORD PTR [bp+22] ; load a
+ mov es,WORD PTR [bp+24] ; load a
+ mov ecx,DWORD PTR [bp+28] ; load w
+ mov bp,WORD PTR [bp+26] ; load num
+ shr bp,1 ; div count by 4 and do groups of 4
+ shr bp,1
+ je $L555
+
+$L546:
+ mov eax,ecx
+ mul DWORD PTR es:[bx] ; w* *a
+ add eax,DWORD PTR ds:[di] ; + *r
+ adc edx,0
+ adc eax,esi
+ adc edx,0
+ mov DWORD PTR ds:[di],eax
+ mov esi,edx
+ ;
+ mov eax,ecx
+ mul DWORD PTR es:[bx+4] ; w* *a
+ add eax,DWORD PTR ds:[di+4] ; + *r
+ adc edx,0
+ adc eax,esi
+ adc edx,0
+ mov DWORD PTR ds:[di+4],eax
+ mov esi,edx
+ ;
+ mov eax,ecx
+ mul DWORD PTR es:[bx+8] ; w* *a
+ add eax,DWORD PTR ds:[di+8] ; + *r
+ adc edx,0
+ adc eax,esi
+ adc edx,0
+ mov DWORD PTR ds:[di+8],eax
+ mov esi,edx
+ ;
+ mov eax,ecx
+ mul DWORD PTR es:[bx+12] ; w* *a
+ add eax,DWORD PTR ds:[di+12] ; + *r
+ adc edx,0
+ adc eax,esi
+ adc edx,0
+ mov DWORD PTR ds:[di+12],eax
+ mov esi,edx
+ ;
+ add bx,16
+ add di,16
+ ;
+ dec bp
+ je $L555
+ jmp $L546
+;
+;
+$L555:
+ mov bp,sp
+ mov bp,WORD PTR [bp+26] ; load num
+ and bp,3
+ dec bp
+ js $L547
+
+ mov eax,ecx
+ mul DWORD PTR es:[bx] ; w* *a
+ add eax,DWORD PTR ds:[di] ; + *r
+ adc edx,0
+ adc eax,esi
+ adc edx,0
+ mov DWORD PTR ds:[di],eax
+ mov esi,edx
+ dec bp
+ js $L547 ; Note that we are now testing for -1
+ ;
+ mov eax,ecx
+ mul DWORD PTR es:[bx+4] ; w* *a
+ add eax,DWORD PTR ds:[di+4] ; + *r
+ adc edx,0
+ adc eax,esi
+ adc edx,0
+ mov DWORD PTR ds:[di+4],eax
+ mov esi,edx
+ dec bp
+ js $L547
+ ;
+ mov eax,ecx
+ mul DWORD PTR es:[bx+8] ; w* *a
+ add eax,DWORD PTR ds:[di+8] ; + *r
+ adc edx,0
+ adc eax,esi
+ adc edx,0
+ mov DWORD PTR ds:[di+8],eax
+ mov esi,edx
+$L547:
+ mov eax,esi
+ mov edx,esi
+ shr edx,16
+ pop es
+ pop ds
+ pop di
+ pop esi
+ pop bx
+ pop bp
+ ret
+ nop
+
+_bn_mul_add_word ENDP
+ PUBLIC _bn_mul_word
+_bn_mul_word PROC FAR
+; Line 76
+ push bp
+ push bx
+ push esi
+ push di
+ push ds
+ push es
+ xor esi,esi
+ mov bp,sp
+ mov di,WORD PTR [bp+18] ; r
+ mov ds,WORD PTR [bp+20]
+ mov bx,WORD PTR [bp+22] ; a
+ mov es,WORD PTR [bp+24]
+ mov ecx,DWORD PTR [bp+28] ; w
+ mov bp,WORD PTR [bp+26] ; num
+
+$FC743:
+ mov eax,ecx
+ mul DWORD PTR es:[bx]
+ add eax,esi
+ adc edx,0
+ mov DWORD PTR ds:[di],eax
+ mov esi,edx
+ dec bp
+ je $L764
+ ;
+ mov eax,ecx
+ mul DWORD PTR es:[bx+4]
+ add eax,esi
+ adc edx,0
+ mov DWORD PTR ds:[di+4],eax
+ mov esi,edx
+ dec bp
+ je $L764
+ ;
+ mov eax,ecx
+ mul DWORD PTR es:[bx+8]
+ add eax,esi
+ adc edx,0
+ mov DWORD PTR ds:[di+8],eax
+ mov esi,edx
+ dec bp
+ je $L764
+ ;
+ mov eax,ecx
+ mul DWORD PTR es:[bx+12]
+ add eax,esi
+ adc edx,0
+ mov DWORD PTR ds:[di+12],eax
+ mov esi,edx
+ dec bp
+ je $L764
+ ;
+ add bx,16
+ add di,16
+ jmp $FC743
+ nop
+$L764:
+ mov eax,esi
+ mov edx,esi
+ shr edx,16
+ pop es
+ pop ds
+ pop di
+ pop esi
+ pop bx
+ pop bp
+ ret
+ nop
+_bn_mul_word ENDP
+ PUBLIC _bn_sqr_words
+_bn_sqr_words PROC FAR
+; Line 92
+ push bp
+ push bx
+ push si
+ push di
+ push ds
+ push es
+ mov bp,sp
+ mov si,WORD PTR [bp+16]
+ mov ds,WORD PTR [bp+18]
+ mov di,WORD PTR [bp+20]
+ mov es,WORD PTR [bp+22]
+ mov bx,WORD PTR [bp+24]
+
+ mov bp,bx ; save a memory lookup later
+ shr bx,1 ; div count by 4 and do groups of 4
+ shr bx,1
+ je $L666
+
+$L765:
+ mov eax,DWORD PTR es:[di]
+ mul eax
+ mov DWORD PTR ds:[si],eax
+ mov DWORD PTR ds:[si+4],edx
+ ;
+ mov eax,DWORD PTR es:[di+4]
+ mul eax
+ mov DWORD PTR ds:[si+8],eax
+ mov DWORD PTR ds:[si+12],edx
+ ;
+ mov eax,DWORD PTR es:[di+8]
+ mul eax
+ mov DWORD PTR ds:[si+16],eax
+ mov DWORD PTR ds:[si+20],edx
+ ;
+ mov eax,DWORD PTR es:[di+12]
+ mul eax
+ mov DWORD PTR ds:[si+24],eax
+ mov DWORD PTR ds:[si+28],edx
+ ;
+ add di,16
+ add si,32
+ dec bx
+ je $L666
+ jmp $L765
+$L666:
+ and bp,3
+ dec bp ; The copied value of bx (num)
+ js $L645
+ ;
+ mov eax,DWORD PTR es:[di]
+ mul eax
+ mov DWORD PTR ds:[si],eax
+ mov DWORD PTR ds:[si+4],edx
+ dec bp
+ js $L645
+ ;
+ mov eax,DWORD PTR es:[di+4]
+ mul eax
+ mov DWORD PTR ds:[si+8],eax
+ mov DWORD PTR ds:[si+12],edx
+ dec bp
+ js $L645
+ ;
+ mov eax,DWORD PTR es:[di+8]
+ mul eax
+ mov DWORD PTR ds:[si+16],eax
+ mov DWORD PTR ds:[si+20],edx
+$L645:
+ pop es
+ pop ds
+ pop di
+ pop si
+ pop bx
+ pop bp
+ ret
+
+_bn_sqr_words ENDP
+ PUBLIC _bn_div64
+_bn_div64 PROC FAR
+ push bp
+ mov bp,sp
+ mov edx, DWORD PTR [bp+6]
+ mov eax, DWORD PTR [bp+10]
+ div DWORD PTR [bp+14]
+ mov edx,eax
+ shr edx,16
+ pop bp
+ ret
+_bn_div64 ENDP
+F_TEXT ENDS
+END
diff --git a/crypto/bn/asm/x86w32.uu b/crypto/bn/asm/x86w32.uu
new file mode 100644
index 0000000000..edcd84e25e
--- /dev/null
+++ b/crypto/bn/asm/x86w32.uu
@@ -0,0 +1,23 @@
+begin 640 x86w32.obj
+M@!P`&BY<8W)Y<'1O7&)N7&%S;5QX.#9W,S(N87-MR98U```$7T)34P5?1$%4
+M009$1U)/55`&1E]415A4!4-/3E-4`T)34P5#3TY35`1$051!!$-/1$5EF`<`
+M2(`"!0H!AY@'`$@```,)`0R8!P!(```&"`$*F`<`2````@<!#YH(``3_`O\#
+M_P14D$4```$-7V)N7W-Q<E]W;W)D<[\!``E?8FY?9&EV-C1H`@`07V)N7VUU
+M;%]A9&1?=V]R9`````Q?8FY?;75L7W=O<F0B`0"(B`0``*(!T:"$`@$``%53
+M9E97'@:+[&8S]HM^$HY>%(M>%HY&&&:+3AR+;AK1[='M#X2``&:+P68F]R=F
+M`P5F@](`9A/&9H/2`&:)!6:+\F:+P68F]V<$9@-%!&:#T@!F$\9F@](`9HE%
+M!&:+\F:+P68F]V<(9@-%"&:#T@!F$\9F@](`9HE%"&:+\F:+P68F]V<,9@-%
+M#&:#T@!F$\9F@](`9HE%#&:+\H/#$(/'$$UT`NN`B^R+;AJ#Y0-->%UFB\%F
+M)O<G9@,%9H/2`&83QF:#T@!FB05FB_)->#]FB\%F)O=G!&8#101F@](`9A/&
+M9H/2`&:)101FB_)->!YFB\%F)O=G"&8#10AF@](`9A/&9H/2`&:)10AFB_)F
+MB\9FB]9FP>H0!Q]?9EY;7<N055-F5E<>!F8S]HOLBWX2CEX4BUX6CD889HM.
+M'(MN&F:+P68F]R=F`\9F@](`9HD%9HOR37149HO!9B;W9P1F`\9F@](`9HE%
+M!&:+\DUT.V:+P68F]V<(9@/&9H/2`&:)10AFB_)-=")FB\%F)O=G#&8#QF:#
+MT@!FB44,9HOR370)@\,0@\<0ZY:09HO&9HO69L'J$`<?7V9>6UW+D%535E<>
+M!HOLBW80CEX2BWX4CD86BUX8B^O1Z]'K=$EF)HL%9O?@9HD$9HE4!&8FBT4$
+M9O?@9HE$"&:)5`QF)HM%"&;WX&:)1!!FB5049B:+10QF]^!FB4089HE4'(/'
+M$(/&($MT`NNW@^4#37@T9B:+!6;WX&:)!&:)5`1->"-F)HM%!&;WX&:)1`AF
+MB50,37@09B:+10AF]^!FB4009HE4%`<?7UY;7<M5B^QFBU8&9HM&"F;W=@YF
+.B]!FP>H07<O`B@(``'0`
+`
+end