aboutsummaryrefslogtreecommitdiffstats
path: root/crypto/bn/asm/x86nt32.asm
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/bn/asm/x86nt32.asm')
-rw-r--r--crypto/bn/asm/x86nt32.asm288
1 files changed, 288 insertions, 0 deletions
diff --git a/crypto/bn/asm/x86nt32.asm b/crypto/bn/asm/x86nt32.asm
new file mode 100644
index 0000000000..0198c2c583
--- /dev/null
+++ b/crypto/bn/asm/x86nt32.asm
@@ -0,0 +1,288 @@
+ TITLE bn_mulw.c
+ .386P
+.model FLAT
+PUBLIC _bn_mul_add_word
+_TEXT SEGMENT
+; File bn_mulw.c
+_bn_mul_add_word PROC NEAR
+ push ebp
+ push ebx
+ push esi
+ push edi
+ mov edi,DWORD PTR 20[esp] ; r
+ mov ebx,DWORD PTR 24[esp] ; a
+ mov ecx,DWORD PTR 32[esp] ; w
+ xor esi,esi ; c=0
+
+ mov ebp,DWORD PTR 28[esp] ; num
+ shr ebp,2 ; num/4
+ jz $L666
+
+$L546:
+ ; Round one
+ mov eax,DWORD PTR [ebx] ; edx:eax = *a * w
+ mul ecx
+ add eax,DWORD PTR [edi] ; *r+=ax
+ adc edx,0
+ add eax,esi ; edx:eax += c
+ adc edx,0
+ mov DWORD PTR [edi],eax ; *r+=ax
+ mov esi,edx ; c = overflow
+
+ ; Round two
+ mov eax,DWORD PTR 4[ebx] ; edx:eax = *a * w
+ mul ecx
+ add eax,DWORD PTR 4[edi] ; *r+=ax
+ adc edx,0
+ add eax,esi ; edx:eax += c
+ adc edx,0
+ mov DWORD PTR 4[edi],eax ; *r+=ax
+ mov esi,edx ; c = overflow
+
+ ; Round three
+ mov eax,DWORD PTR 8[ebx] ; edx:eax = *a * w
+ mul ecx
+ add eax,DWORD PTR 8[edi] ; *r+=ax
+ adc edx,0
+ add eax,esi ; edx:eax += c
+ adc edx,0
+ mov DWORD PTR 8[edi],eax ; *r+=ax
+ mov esi,edx ; c = overflow
+
+ ; Round four
+ mov eax,DWORD PTR 12[ebx] ; edx:eax = *a * w
+ mul ecx
+ add eax,DWORD PTR 12[edi] ; *r+=ax
+ adc edx,0
+ add eax,esi ; edx:eax += c
+ adc edx,0
+ mov DWORD PTR 12[edi],eax ; *r+=ax
+ mov esi,edx ; c = overflow
+
+ add ebx,16
+ add edi,16
+
+ dec ebp
+ jz $L666
+ jmp $L546
+$L666:
+ mov ebp,DWORD PTR 28[esp] ; num
+ and ebp,3 ; num%4
+ jz $L547
+
+ ; Round one
+ mov eax,DWORD PTR [ebx] ; edx:eax = *a * w
+ mul ecx
+ add eax,DWORD PTR [edi] ; *r+=ax
+ adc edx,0
+ add eax,esi ; edx:eax += c
+ adc edx,0
+ mov DWORD PTR [edi],eax ; *r+=ax
+ mov esi,edx ; c = overflow
+ dec ebp
+ jz $L547
+ ; Round two
+ mov eax,DWORD PTR 4[ebx] ; edx:eax = *a * w
+ mul ecx
+ add eax,DWORD PTR 4[edi] ; *r+=ax
+ adc edx,0
+ add eax,esi ; edx:eax += c
+ adc edx,0
+ mov DWORD PTR 4[edi],eax ; *r+=ax
+ mov esi,edx ; c = overflow
+ dec ebp
+ jz $L547
+ ; Round three
+ mov eax,DWORD PTR 8[ebx] ; edx:eax = *a * w
+ mul ecx
+ add eax,DWORD PTR 8[edi] ; *r+=ax
+ adc edx,0
+ add eax,esi ; edx:eax += c
+ adc edx,0
+ mov DWORD PTR 8[edi],eax ; *r+=ax
+ mov esi,edx ; c = overflow
+
+$L547:
+ mov eax,esi
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+_bn_mul_add_word ENDP
+_TEXT ENDS
+PUBLIC _bn_mul_word
+_TEXT SEGMENT
+_bn_mul_word PROC NEAR
+ push ebp
+ push ebx
+ push esi
+ push edi
+
+ mov edi,DWORD PTR 20[esp] ; r
+ mov ebx,DWORD PTR 24[esp] ; a
+ mov ebp,DWORD PTR 28[esp] ; num
+ mov ecx,DWORD PTR 32[esp] ; w
+ xor esi,esi ; c=0
+
+ shr ebp,2 ; num/4
+ jz $L266
+
+$L593:
+ ; Round one
+ mov eax,DWORD PTR [ebx] ; edx:eax= w * *a
+ mul ecx
+ add eax,esi ; edx:eax+=c
+ adc edx,0
+ mov DWORD PTR [edi],eax ; *r=eax
+ mov esi,edx ; c=edx
+ ; Round two
+ mov eax,DWORD PTR 4[ebx] ; edx:eax= w * *a
+ mul ecx
+ add eax,esi ; edx:eax+=c
+ adc edx,0
+ mov DWORD PTR 4[edi],eax ; *r=eax
+ mov esi,edx ; c=edx
+ ; Round three
+ mov eax,DWORD PTR 8[ebx] ; edx:eax= w * *a
+ mul ecx
+ add eax,esi ; edx:eax+=c
+ adc edx,0
+ mov DWORD PTR 8[edi],eax ; *r=eax
+ mov esi,edx ; c=edx
+ ; Round four
+ mov eax,DWORD PTR 12[ebx] ; edx:eax= w * *a
+ mul ecx
+ add eax,esi ; edx:eax+=c
+ adc edx,0
+ mov DWORD PTR 12[edi],eax ; *r=eax
+ mov esi,edx ; c=edx
+
+ add ebx,16
+ add edi,16
+
+ dec ebp
+ jz $L266
+ jmp $L593
+$L266:
+ mov ebp,DWORD PTR 28[esp] ; num
+ and ebp,3
+ jz $L601
+
+ ; Round one
+ mov eax,DWORD PTR [ebx] ; edx:eax= w * *a
+ mul ecx
+ add eax,esi ; edx:eax+=c
+ adc edx,0
+ mov DWORD PTR [edi],eax ; *r=eax
+ mov esi,edx ; c=edx
+ dec ebp
+ jz $L601
+ ; Round two
+ mov eax,DWORD PTR 4[ebx] ; edx:eax= w * *a
+ mul ecx
+ add eax,esi ; edx:eax+=c
+ adc edx,0
+ mov DWORD PTR 4[edi],eax ; *r=eax
+ mov esi,edx ; c=edx
+ dec ebp
+ jz $L601
+ ; Round three
+ mov eax,DWORD PTR 8[ebx] ; edx:eax= w * *a
+ mul ecx
+ add eax,esi ; edx:eax+=c
+ adc edx,0
+ mov DWORD PTR 8[edi],eax ; *r=eax
+ mov esi,edx ; c=edx
+
+$L601:
+ mov eax,esi
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+ ret
+_bn_mul_word ENDP
+_TEXT ENDS
+PUBLIC _bn_sqr_words
+_TEXT SEGMENT
+_bn_sqr_words PROC NEAR
+ push ebx
+ push esi
+ push edi
+ mov esi,DWORD PTR 16[esp] ; r
+ mov edi,DWORD PTR 20[esp] ; a
+ mov ebx,DWORD PTR 24[esp] ; num
+
+ shr ebx,2 ; num/4
+ jz $L111
+$L640:
+ ; Round 1
+ mov eax, DWORD PTR [edi]
+ mul eax ; *a * *a
+ mov DWORD PTR [esi],eax
+ mov DWORD PTR 4[esi],edx
+ ; Round 2
+ mov eax, DWORD PTR 4[edi]
+ mul eax ; *a * *a
+ mov DWORD PTR 8[esi],eax
+ mov DWORD PTR 12[esi],edx
+ ; Round 3
+ mov eax, DWORD PTR 8[edi]
+ mul eax ; *a * *a
+ mov DWORD PTR 16[esi],eax
+ mov DWORD PTR 20[esi],edx
+ ; Round 4
+ mov eax, DWORD PTR 12[edi]
+ mul eax ; *a * *a
+ mov DWORD PTR 24[esi],eax
+ mov DWORD PTR 28[esi],edx
+
+ add edi,16
+ add esi,32
+
+ dec ebx
+ jz $L111
+ jmp $L640
+$L111:
+ mov ebx,DWORD PTR 24[esp] ; num
+ and ebx,3 ; num%3
+ jz $L645
+
+ ; Round 1
+ mov eax, DWORD PTR [edi]
+ mul eax ; *a * *a
+ mov DWORD PTR [esi],eax
+ mov DWORD PTR 4[esi],edx
+ dec ebx
+ jz $L645
+ ; Round 2
+ mov eax, DWORD PTR 4[edi]
+ mul eax ; *a * *a
+ mov DWORD PTR 8[esi],eax
+ mov DWORD PTR 12[esi],edx
+ dec ebx
+ jz $L645
+ ; Round 3
+ mov eax, DWORD PTR 8[edi]
+ mul eax ; *a * *a
+ mov DWORD PTR 16[esi],eax
+ mov DWORD PTR 20[esi],edx
+
+$L645:
+ pop edi
+ pop esi
+ pop ebx
+ ret
+_bn_sqr_words ENDP
+_TEXT ENDS
+PUBLIC _bn_div64
+_TEXT SEGMENT
+_bn_div64 PROC NEAR
+ mov edx, DWORD PTR 4[esp]
+ mov eax, DWORD PTR 8[esp]
+ div DWORD PTR 12[esp]
+ ret
+_bn_div64 ENDP
+_TEXT ENDS
+END