diff options
Diffstat (limited to 'crypto/bn/asm')
-rw-r--r-- | crypto/bn/asm/alpha.s | 56 | ||||
-rw-r--r-- | crypto/bn/asm/bn-586.pl | 314 | ||||
-rw-r--r-- | crypto/bn/asm/bn-win32.asm | 689 | ||||
-rw-r--r-- | crypto/bn/asm/bn86unix.cpp | 752 | ||||
-rw-r--r-- | crypto/bn/asm/pa-risc.s | 8 | ||||
-rw-r--r-- | crypto/bn/asm/pa-risc2.s | 8 | ||||
-rw-r--r-- | crypto/bn/asm/r3000.s | 16 | ||||
-rw-r--r-- | crypto/bn/asm/sparc.s | 185 | ||||
-rw-r--r-- | crypto/bn/asm/x86w16.asm | 12 | ||||
-rw-r--r-- | crypto/bn/asm/x86w32.asm | 73 |
10 files changed, 2032 insertions, 81 deletions
diff --git a/crypto/bn/asm/alpha.s b/crypto/bn/asm/alpha.s index d56f715ecd..1d17b1d619 100644 --- a/crypto/bn/asm/alpha.s +++ b/crypto/bn/asm/alpha.s @@ -3,16 +3,15 @@ # Thanks to tzeruch@ceddec.com for sending me the gcc output for # bn_div64. .file 1 "bn_mulw.c" - .version "01.01" .set noat gcc2_compiled.: __gnu_compiled_c: .text .align 3 - .globl bn_mul_add_word - .ent bn_mul_add_word -bn_mul_add_word: -bn_mul_add_word..ng: + .globl bn_mul_add_words + .ent bn_mul_add_words +bn_mul_add_words: +bn_mul_add_words..ng: .frame $30,0,$26,0 .prologue 0 subq $18,2,$25 # num=-2 @@ -74,12 +73,12 @@ $42: .align 4 $43: ret $31,($26),1 - .end bn_mul_add_word + .end bn_mul_add_words .align 3 - .globl bn_mul_word - .ent bn_mul_word -bn_mul_word: -bn_mul_word..ng: + .globl bn_mul_words + .ent bn_mul_words +bn_mul_words: +bn_mul_words..ng: .frame $30,0,$26,0 .prologue 0 subq $18,2,$25 # num=-2 @@ -125,7 +124,7 @@ $242: stq $1,0($16) $243: ret $31,($26),1 - .end bn_mul_word + .end bn_mul_words .align 3 .globl bn_sqr_words .ent bn_sqr_words @@ -173,6 +172,41 @@ $443: ret $31,($26),1 .end bn_sqr_words + .align 3 + .globl bn_add_words + .ent bn_add_words +bn_add_words: +bn_add_words..ng: + .frame $30,0,$26,0 + .prologue 0 + + bis $31,$31,$8 # carry = 0 + ble $19,$900 +$901: + ldq $0,0($17) # a[0] + ldq $1,0($18) # a[1] + + addq $0,$1,$3 # c=a+b; + addq $17,8,$17 # a++ + + cmpult $3,$1,$7 # did we overflow? + addq $18,8,$18 # b++ + + addq $8,$3,$3 # c+=carry + + cmpult $3,$8,$8 # did we overflow? + stq $3,($16) # r[0]=c + + addq $7,$8,$8 # add into overflow + subq $19,1,$19 # loop-- + + addq $16,8,$16 # r++ + bgt $19,$901 +$900: + bis $8,$8,$0 # return carry + ret $31,($26),1 + .end bn_add_words + # # What follows was taken directly from the C compiler with a few # hacks to redo the lables. diff --git a/crypto/bn/asm/bn-586.pl b/crypto/bn/asm/bn-586.pl new file mode 100644 index 0000000000..128f0f29d6 --- /dev/null +++ b/crypto/bn/asm/bn-586.pl @@ -0,0 +1,314 @@ +#!/usr/local/bin/perl +# + +#!/usr/local/bin/perl + +push(@INC,"perlasm","../../perlasm"); +require "x86asm.pl"; + +&asm_init($ARGV[0],"bn-586.pl"); + +&bn_mul_add_words("bn_mul_add_words"); +&bn_mul_words("bn_mul_words"); +&bn_sqr_words("bn_sqr_words"); +&bn_div64("bn_div64"); +&bn_add_words("bn_add_words"); + +&asm_finish(); + +sub bn_mul_add_words + { + local($name)=@_; + + &function_begin($name,""); + + &comment(""); + $Low="eax"; + $High="edx"; + $a="ebx"; + $w="ebp"; + $r="edi"; + $c="esi"; + + &xor($c,$c); # clear carry + &mov($r,&wparam(0)); # + + &mov("ecx",&wparam(2)); # + &mov($a,&wparam(1)); # + + &and("ecx",0xfffffff8); # num / 8 + &mov($w,&wparam(3)); # + + &push("ecx"); # Up the stack for a tmp variable + + &jz(&label("maw_finish")); + + &set_label("maw_loop",0); + + &mov(&swtmp(0),"ecx"); # + + for ($i=0; $i<32; $i+=4) + { + &comment("Round $i"); + + &mov("eax",&DWP($i,$a,"",0)); # *a + &mul($w); # *a * w + &add("eax",$c); # L(t)+= *r + &mov($c,&DWP($i,$r,"",0)); # L(t)+= *r + &adc("edx",0); # H(t)+=carry + &add("eax",$c); # L(t)+=c + &adc("edx",0); # H(t)+=carry + &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); + &mov($c,"edx"); # c= H(t); + } + + &comment(""); + &mov("ecx",&swtmp(0)); # + &add($a,32); + &add($r,32); + &sub("ecx",8); + &jnz(&label("maw_loop")); + + &set_label("maw_finish",0); + &mov("ecx",&wparam(2)); # get num + &and("ecx",7); + &jnz(&label("maw_finish2")); # helps branch prediction + &jmp(&label("maw_end")); + + &set_label("maw_finish2",1); + for ($i=0; $i<7; $i++) + { + &comment("Tail Round $i"); + &mov("eax",&DWP($i*4,$a,"",0));# *a + &mul($w); # *a * w + &add("eax",$c); # L(t)+=c + &mov($c,&DWP($i*4,$r,"",0)); # L(t)+= *r + &adc("edx",0); # H(t)+=carry + &add("eax",$c); + &adc("edx",0); # H(t)+=carry + &dec("ecx") if ($i != 7-1); + &mov(&DWP($i*4,$r,"",0),"eax"); # *r= L(t); + &mov($c,"edx"); # c= H(t); + &jz(&label("maw_end")) if ($i != 7-1); + } + &set_label("maw_end",0); + &mov("eax",$c); + + &pop("ecx"); # clear variable from + + &function_end($name); + } + +sub bn_mul_words + { + local($name)=@_; + + &function_begin($name,""); + + &comment(""); + $Low="eax"; + $High="edx"; + $a="ebx"; + $w="ecx"; + $r="edi"; + $c="esi"; + $num="ebp"; + + &xor($c,$c); # clear carry + &mov($r,&wparam(0)); # + &mov($a,&wparam(1)); # + &mov($num,&wparam(2)); # + &mov($w,&wparam(3)); # + + &and($num,0xfffffff8); # num / 8 + &jz(&label("mw_finish")); + + &set_label("mw_loop",0); + for ($i=0; $i<32; $i+=4) + { + &comment("Round $i"); + + &mov("eax",&DWP($i,$a,"",0)); # *a + &mul($w); # *a * w + &add("eax",$c); # L(t)+=c + # XXX + + &adc("edx",0); # H(t)+=carry + &mov(&DWP($i,$r,"",0),"eax"); # *r= L(t); + + &mov($c,"edx"); # c= H(t); + } + + &comment(""); + &add($a,32); + &add($r,32); + &sub($num,8); + &jz(&label("mw_finish")); + &jmp(&label("mw_loop")); + + &set_label("mw_finish",0); + &mov($num,&wparam(2)); # get num + &and($num,7); + &jnz(&label("mw_finish2")); + &jmp(&label("mw_end")); + + &set_label("mw_finish2",1); + for ($i=0; $i<7; $i++) + { + &comment("Tail Round $i"); + &mov("eax",&DWP($i*4,$a,"",0));# *a + &mul($w); # *a * w + &add("eax",$c); # L(t)+=c + # XXX + &adc("edx",0); # H(t)+=carry + &mov(&DWP($i*4,$r,"",0),"eax");# *r= L(t); + &mov($c,"edx"); # c= H(t); + &dec($num) if ($i != 7-1); + &jz(&label("mw_end")) if ($i != 7-1); + } + &set_label("mw_end",0); + &mov("eax",$c); + + &function_end($name); + } + +sub bn_sqr_words + { + local($name)=@_; + + &function_begin($name,""); + + &comment(""); + $r="esi"; + $a="edi"; + $num="ebx"; + + &mov($r,&wparam(0)); # + &mov($a,&wparam(1)); # + &mov($num,&wparam(2)); # + + &and($num,0xfffffff8); # num / 8 + &jz(&label("sw_finish")); + + &set_label("sw_loop",0); + for ($i=0; $i<32; $i+=4) + { + &comment("Round $i"); + &mov("eax",&DWP($i,$a,"",0)); # *a + # XXX + &mul("eax"); # *a * *a + &mov(&DWP($i*2,$r,"",0),"eax"); # + &mov(&DWP($i*2+4,$r,"",0),"edx");# + } + + &comment(""); + &add($a,32); + &add($r,64); + &sub($num,8); + &jnz(&label("sw_loop")); + + &set_label("sw_finish",0); + &mov($num,&wparam(2)); # get num + &and($num,7); + &jz(&label("sw_end")); + + for ($i=0; $i<7; $i++) + { + &comment("Tail Round $i"); + &mov("eax",&DWP($i*4,$a,"",0)); # *a + # XXX + &mul("eax"); # *a * *a + &mov(&DWP($i*8,$r,"",0),"eax"); # + &dec($num) if ($i != 7-1); + &mov(&DWP($i*8+4,$r,"",0),"edx"); + &jz(&label("sw_end")) if ($i != 7-1); + } + &set_label("sw_end",0); + + &function_end($name); + } + +sub bn_div64 + { + local($name)=@_; + + &function_begin($name,""); + &mov("edx",&wparam(0)); # + &mov("eax",&wparam(1)); # + &mov("ebx",&wparam(2)); # + &div("ebx"); + &function_end($name); + } + +sub bn_add_words + { + local($name)=@_; + + &function_begin($name,""); + + &comment(""); + $a="esi"; + $b="edi"; + $c="eax"; + $r="ebx"; + $tmp1="ecx"; + $tmp2="edx"; + $num="ebp"; + + &mov($r,&wparam(0)); # get r + &mov($a,&wparam(1)); # get a + &mov($b,&wparam(2)); # get b + &mov($num,&wparam(3)); # get num + &xor($c,$c); # clear carry + &and($num,0xfffffff8); # num / 8 + + &jz(&label("aw_finish")); + + &set_label("aw_loop",0); + for ($i=0; $i<8; $i++) + { + &comment("Round $i"); + + &mov($tmp1,&DWP($i*4,$a,"",0)); # *a + &mov($tmp2,&DWP($i*4,$b,"",0)); # *b + &add($tmp1,$c); + &mov($c,0); + &adc($c,$c); + &add($tmp1,$tmp2); + &adc($c,0); + &mov(&DWP($i*4,$r,"",0),$tmp1); # *r + } + + &comment(""); + &add($a,32); + &add($b,32); + &add($r,32); + &sub($num,8); + &jnz(&label("aw_loop")); + + &set_label("aw_finish",0); + &mov($num,&wparam(3)); # get num + &and($num,7); + &jz(&label("aw_end")); + + for ($i=0; $i<7; $i++) + { + &comment("Tail Round $i"); + &mov($tmp1,&DWP($i*4,$a,"",0)); # *a + &mov($tmp2,&DWP($i*4,$b,"",0));# *b + &add($tmp1,$c); + &mov($c,0); + &adc($c,$c); + &add($tmp1,$tmp2); + &adc($c,0); + &dec($num) if ($i != 6); + &mov(&DWP($i*4,$r,"",0),$tmp1); # *a + &jz(&label("aw_end")) if ($i != 6); + } + &set_label("aw_end",0); + + &mov("eax",$c); + + &function_end($name); + } + diff --git a/crypto/bn/asm/bn-win32.asm b/crypto/bn/asm/bn-win32.asm new file mode 100644 index 0000000000..017ea462b0 --- /dev/null +++ b/crypto/bn/asm/bn-win32.asm @@ -0,0 +1,689 @@ + ; Don't even think of reading this code + ; It was automatically generated by bn-586.pl + ; Which is a perl program used to generate the x86 assember for + ; any of elf, a.out, BSDI,Win32, or Solaris + ; eric <eay@cryptsoft.com> + ; + TITLE bn-586.asm + .386 +.model FLAT +_TEXT SEGMENT +PUBLIC _bn_mul_add_words + +_bn_mul_add_words PROC NEAR + push ebp + push ebx + push esi + push edi + ; + xor esi, esi + mov edi, DWORD PTR 20[esp] + mov ecx, DWORD PTR 28[esp] + mov ebx, DWORD PTR 24[esp] + and ecx, 4294967288 + mov ebp, DWORD PTR 32[esp] + push ecx + jz $L000maw_finish +L001maw_loop: + mov DWORD PTR [esp],ecx + ; Round 0 + mov eax, DWORD PTR [ebx] + mul ebp + add eax, esi + mov esi, DWORD PTR [edi] + adc edx, 0 + add eax, esi + adc edx, 0 + mov DWORD PTR [edi],eax + mov esi, edx + ; Round 4 + mov eax, DWORD PTR 4[ebx] + mul ebp + add eax, esi + mov esi, DWORD PTR 4[edi] + adc edx, 0 + add eax, esi + adc edx, 0 + mov DWORD PTR 4[edi],eax + mov esi, edx + ; Round 8 + mov eax, DWORD PTR 8[ebx] + mul ebp + add eax, esi + mov esi, DWORD PTR 8[edi] + adc edx, 0 + add eax, esi + adc edx, 0 + mov DWORD PTR 8[edi],eax + mov esi, edx + ; Round 12 + mov eax, DWORD PTR 12[ebx] + mul ebp + add eax, esi + mov esi, DWORD PTR 12[edi] + adc edx, 0 + add eax, esi + adc edx, 0 + mov DWORD PTR 12[edi],eax + mov esi, edx + ; Round 16 + mov eax, DWORD PTR 16[ebx] + mul ebp + add eax, esi + mov esi, DWORD PTR 16[edi] + adc edx, 0 + add eax, esi + adc edx, 0 + mov DWORD PTR 16[edi],eax + mov esi, edx + ; Round 20 + mov eax, DWORD PTR 20[ebx] + mul ebp + add eax, esi + mov esi, DWORD PTR 20[edi] + adc edx, 0 + add eax, esi + adc edx, 0 + mov DWORD PTR 20[edi],eax + mov esi, edx + ; Round 24 + mov eax, DWORD PTR 24[ebx] + mul ebp + add eax, esi + mov esi, DWORD PTR 24[edi] + adc edx, 0 + add eax, esi + adc edx, 0 + mov DWORD PTR 24[edi],eax + mov esi, edx + ; Round 28 + mov eax, DWORD PTR 28[ebx] + mul ebp + add eax, esi + mov esi, DWORD PTR 28[edi] + adc edx, 0 + add eax, esi + adc edx, 0 + mov DWORD PTR 28[edi],eax + mov esi, edx + ; + mov ecx, DWORD PTR [esp] + add ebx, 32 + add edi, 32 + sub ecx, 8 + jnz L001maw_loop +$L000maw_finish: + mov ecx, DWORD PTR 32[esp] + and ecx, 7 + jnz $L002maw_finish2 + jmp $L003maw_end +$L002maw_finish2: + ; Tail Round 0 + mov eax, DWORD PTR [ebx] + mul ebp + add eax, esi + mov esi, DWORD PTR [edi] + adc edx, 0 + add eax, esi + adc edx, 0 + dec ecx + mov DWORD PTR [edi],eax + mov esi, edx + jz $L003maw_end + ; Tail Round 1 + mov eax, DWORD PTR 4[ebx] + mul ebp + add eax, esi + mov esi, DWORD PTR 4[edi] + adc edx, 0 + add eax, esi + adc edx, 0 + dec ecx + mov DWORD PTR 4[edi],eax + mov esi, edx + jz $L003maw_end + ; Tail Round 2 + mov eax, DWORD PTR 8[ebx] + mul ebp + add eax, esi + mov esi, DWORD PTR 8[edi] + adc edx, 0 + add eax, esi + adc edx, 0 + dec ecx + mov DWORD PTR 8[edi],eax + mov esi, edx + jz $L003maw_end + ; Tail Round 3 + mov eax, DWORD PTR 12[ebx] + mul ebp + add eax, esi + mov esi, DWORD PTR 12[edi] + adc edx, 0 + add eax, esi + adc edx, 0 + dec ecx + mov DWORD PTR 12[edi],eax + mov esi, edx + jz $L003maw_end + ; Tail Round 4 + mov eax, DWORD PTR 16[ebx] + mul ebp + add eax, esi + mov esi, DWORD PTR 16[edi] + adc edx, 0 + add eax, esi + adc edx, 0 + dec ecx + mov DWORD PTR 16[edi],eax + mov esi, edx + jz $L003maw_end + ; Tail Round 5 + mov eax, DWORD PTR 20[ebx] + mul ebp + add eax, esi + mov esi, DWORD PTR 20[edi] + adc edx, 0 + add eax, esi + adc edx, 0 + dec ecx + mov DWORD PTR 20[edi],eax + mov esi, edx + jz $L003maw_end + ; Tail Round 6 + mov eax, DWORD PTR 24[ebx] + mul ebp + add eax, esi + mov esi, DWORD PTR 24[edi] + adc edx, 0 + add eax, esi + adc edx, 0 + mov DWORD PTR 24[edi],eax + mov esi, edx +$L003maw_end: + mov eax, esi + pop ecx + pop edi + pop esi + pop ebx + pop ebp + ret +_bn_mul_add_words ENDP +_TEXT ENDS +_TEXT SEGMENT +PUBLIC _bn_mul_words + +_bn_mul_words PROC NEAR + push ebp + push ebx + push esi + push edi + ; + xor esi, esi + mov edi, DWORD PTR 20[esp] + mov ebx, DWORD PTR 24[esp] + mov ebp, DWORD PTR 28[esp] + mov ecx, DWORD PTR 32[esp] + and ebp, 4294967288 + jz $L004mw_finish +L005mw_loop: + ; Round 0 + mov eax, DWORD PTR [ebx] + mul ecx + add eax, esi + adc edx, 0 + mov DWORD PTR [edi],eax + mov esi, edx + ; Round 4 + mov eax, DWORD PTR 4[ebx] + mul ecx + add eax, esi + adc edx, 0 + mov DWORD PTR 4[edi],eax + mov esi, edx + ; Round 8 + mov eax, DWORD PTR 8[ebx] + mul ecx + add eax, esi + adc edx, 0 + mov DWORD PTR 8[edi],eax + mov esi, edx + ; Round 12 + mov eax, DWORD PTR 12[ebx] + mul ecx + add eax, esi + adc edx, 0 + mov DWORD PTR 12[edi],eax + mov esi, edx + ; Round 16 + mov eax, DWORD PTR 16[ebx] + mul ecx + add eax, esi + adc edx, 0 + mov DWORD PTR 16[edi],eax + mov esi, edx + ; Round 20 + mov eax, DWORD PTR 20[ebx] + mul ecx + add eax, esi + adc edx, 0 + mov DWORD PTR 20[edi],eax + mov esi, edx + ; Round 24 + mov eax, DWORD PTR 24[ebx] + mul ecx + add eax, esi + adc edx, 0 + mov DWORD PTR 24[edi],eax + mov esi, edx + ; Round 28 + mov eax, DWORD PTR 28[ebx] + mul ecx + add eax, esi + adc edx, 0 + mov DWORD PTR 28[edi],eax + mov esi, edx + ; + add ebx, 32 + add edi, 32 + sub ebp, 8 + jz $L004mw_finish + jmp L005mw_loop +$L004mw_finish: + mov ebp, DWORD PTR 28[esp] + and ebp, 7 + jnz $L006mw_finish2 + jmp $L007mw_end +$L006mw_finish2: + ; Tail Round 0 + mov eax, DWORD PTR [ebx] + mul ecx + add eax, esi + adc edx, 0 + mov DWORD PTR [edi],eax + mov esi, edx + dec ebp + jz $L007mw_end + ; Tail Round 1 + mov eax, DWORD PTR 4[ebx] + mul ecx + add eax, esi + adc edx, 0 + mov DWORD PTR 4[edi],eax + mov esi, edx + dec ebp + jz $L007mw_end + ; Tail Round 2 + mov eax, DWORD PTR 8[ebx] + mul ecx + add eax, esi + adc edx, 0 + mov DWORD PTR 8[edi],eax + mov esi, edx + dec ebp + jz $L007mw_end + ; Tail Round 3 + mov eax, DWORD PTR 12[ebx] + mul ecx + add eax, esi + adc edx, 0 + mov DWORD PTR 12[edi],eax + mov esi, edx + dec ebp + jz $L007mw_end + ; Tail Round 4 + mov eax, DWORD PTR 16[ebx] + mul ecx + add eax, esi + adc edx, 0 + mov DWORD PTR 16[edi],eax + mov esi, edx + dec ebp + jz $L007mw_end + ; Tail Round 5 + mov eax, DWORD PTR 20[ebx] + mul ecx + add eax, esi + adc edx, 0 + mov DWORD PTR 20[edi],eax + mov esi, edx + dec ebp + jz $L007mw_end + ; Tail Round 6 + mov eax, DWORD PTR 24[ebx] + mul ecx + add eax, esi + adc edx, 0 + mov DWORD PTR 24[edi],eax + mov esi, edx +$L007mw_end: + mov eax, esi + pop edi + pop esi + pop ebx + pop ebp + ret +_bn_mul_words ENDP +_TEXT ENDS +_TEXT SEGMENT +PUBLIC _bn_sqr_words + +_bn_sqr_words PROC NEAR + push ebp + push ebx + push esi + push edi + ; + mov esi, DWORD PTR 20[esp] + mov edi, DWORD PTR 24[esp] + mov ebx, DWORD PTR 28[esp] + and ebx, 4294967288 + jz $L008sw_finish +L009sw_loop: + ; Round 0 + mov eax, DWORD PTR [edi] + mul eax + mov DWORD PTR [esi],eax + mov DWORD PTR 4[esi],edx + ; Round 4 + mov eax, DWORD PTR 4[edi] + mul eax + mov DWORD PTR 8[esi],eax + mov DWORD PTR 12[esi],edx + ; Round 8 + mov eax, DWORD PTR 8[edi] + mul eax + mov DWORD PTR 16[esi],eax + mov DWORD PTR 20[esi],edx + ; Round 12 + mov eax, DWORD PTR 12[edi] + mul eax + mov DWORD PTR 24[esi],eax + mov DWORD PTR 28[esi],edx + ; Round 16 + mov eax, DWORD PTR 16[edi] + mul eax + mov DWORD PTR 32[esi],eax + mov DWORD PTR 36[esi],edx + ; Round 20 + mov eax, DWORD PTR 20[edi] + mul eax + mov DWORD PTR 40[esi],eax + mov DWORD PTR 44[esi],edx + ; Round 24 + mov eax, DWORD PTR 24[edi] + mul eax + mov DWORD PTR 48[esi],eax + mov DWORD PTR 52[esi],edx + ; Round 28 + mov eax, DWORD PTR 28[edi] + mul eax + mov DWORD PTR 56[esi],eax + mov DWORD PTR 60[esi],edx + ; + add edi, 32 + add esi, 64 + sub ebx, 8 + jnz L009sw_loop +$L008sw_finish: + mov ebx, DWORD PTR 28[esp] + and ebx, 7 + jz $L010sw_end + ; Tail Round 0 + mov eax, DWORD PTR [edi] + mul eax + mov DWORD PTR [esi],eax + dec ebx + mov DWORD PTR 4[esi],edx + jz $L010sw_end + ; Tail Round 1 + mov eax, DWORD PTR 4[edi] + mul eax + mov DWORD PTR 8[esi],eax + dec ebx + mov DWORD PTR 12[esi],edx + jz $L010sw_end + ; Tail Round 2 + mov eax, DWORD PTR 8[edi] + mul eax + mov DWORD PTR 16[esi],eax + dec ebx + mov DWORD PTR 20[esi],edx + jz $L010sw_end + ; Tail Round 3 + mov eax, DWORD PTR 12[edi] + mul eax + mov DWORD PTR 24[esi],eax + dec ebx + mov DWORD PTR 28[esi],edx + jz $L010sw_end + ; Tail Round 4 + mov eax, DWORD PTR 16[edi] + mul eax + mov DWORD PTR 32[esi],eax + dec ebx + mov DWORD PTR 36[esi],edx + jz $L010sw_end + ; Tail Round 5 + mov eax, DWORD PTR 20[edi] + mul eax + mov DWORD PTR 40[esi],eax + dec ebx + mov DWORD PTR 44[esi],edx + jz $L010sw_end + ; Tail Round 6 + mov eax, DWORD PTR 24[edi] + mul eax + mov DWORD PTR 48[esi],eax + mov DWORD PTR 52[esi],edx +$L010sw_end: + pop edi + pop esi + pop ebx + pop ebp + ret +_bn_sqr_words ENDP +_TEXT ENDS +_TEXT SEGMENT +PUBLIC _bn_div64 + +_bn_div64 PROC NEAR + push ebp + push ebx + push esi + push edi + mov edx, DWORD PTR 20[esp] + mov eax, DWORD PTR 24[esp] + mov ebx, DWORD PTR 28[esp] + div ebx + pop edi + pop esi + pop ebx + pop ebp + ret +_bn_div64 ENDP +_TEXT ENDS +_TEXT SEGMENT +PUBLIC _bn_add_words + +_bn_add_words PROC NEAR + push ebp + push ebx + push esi + push edi + ; + mov ebx, DWORD PTR 20[esp] + mov esi, DWORD PTR 24[esp] + mov edi, DWORD PTR 28[esp] + mov ebp, DWORD PTR 32[esp] + xor eax, eax + and ebp, 4294967288 + jz $L011aw_finish +L012aw_loop: + ; Round 0 + mov ecx, DWORD PTR [esi] + mov edx, DWORD PTR [edi] + add ecx, eax + mov eax, 0 + adc eax, eax + add ecx, edx + adc eax, 0 + mov DWORD PTR [ebx],ecx + ; Round 1 + mov ecx, DWORD PTR 4[esi] + mov edx, DWORD PTR 4[edi] + add ecx, eax + mov eax, 0 + adc eax, eax + add ecx, edx + adc eax, 0 + mov DWORD PTR 4[ebx],ecx + ; Round 2 + mov ecx, DWORD PTR 8[esi] + mov edx, DWORD PTR 8[edi] + add ecx, eax + mov eax, 0 + adc eax, eax + add ecx, edx + adc eax, 0 + mov DWORD PTR 8[ebx],ecx + ; Round 3 + mov ecx, DWORD PTR 12[esi] + mov edx, DWORD PTR 12[edi] + add ecx, eax + mov eax, 0 + adc eax, eax + add ecx, edx + adc eax, 0 + mov DWORD PTR 12[ebx],ecx + ; Round 4 + mov ecx, DWORD PTR 16[esi] + mov edx, DWORD PTR 16[edi] + add ecx, eax + mov eax, 0 + adc eax, eax + add ecx, edx + adc eax, 0 + mov DWORD PTR 16[ebx],ecx + ; Round 5 + mov ecx, DWORD PTR 20[esi] + mov edx, DWORD PTR 20[edi] + add ecx, eax + mov eax, 0 + adc eax, eax + add ecx, edx + adc eax, 0 + mov DWORD PTR 20[ebx],ecx + ; Round 6 + mov ecx, DWORD PTR 24[esi] + mov edx, DWORD PTR 24[edi] + add ecx, eax + mov eax, 0 + adc eax, eax + add ecx, edx + adc eax, 0 + mov DWORD PTR 24[ebx],ecx + ; Round 7 + mov ecx, DWORD PTR 28[esi] + mov edx, DWORD PTR 28[edi] + add ecx, eax + mov eax, 0 + adc eax, eax + add ecx, edx + adc eax, 0 + mov DWORD PTR 28[ebx],ecx + ; + add esi, 32 + add edi, 32 + add ebx, 32 + sub ebp, 8 + jnz L012aw_loop +$L011aw_finish: + mov ebp, DWORD PTR 32[esp] + and ebp, 7 + jz $L013aw_end + ; Tail Round 0 + mov ecx, DWORD PTR [esi] + mov edx, DWORD PTR [edi] + add ecx, eax + mov eax, 0 + adc eax, eax + add ecx, edx + adc eax, 0 + dec ebp + mov DWORD PTR [ebx],ecx + jz $L013aw_end + ; Tail Round 1 + mov ecx, DWORD PTR 4[esi] + mov edx, DWORD PTR 4[edi] + add ecx, eax + mov eax, 0 + adc eax, eax + add ecx, edx + adc eax, 0 + dec ebp + mov DWORD PTR 4[ebx],ecx + jz $L013aw_end + ; Tail Round 2 + mov ecx, DWORD PTR 8[esi] + mov edx, DWORD PTR 8[edi] + add ecx, eax + mov eax, 0 + adc eax, eax + add ecx, edx + adc eax, 0 + dec ebp + mov DWORD PTR 8[ebx],ecx + jz $L013aw_end + ; Tail Round 3 + mov ecx, DWORD PTR 12[esi] + mov edx, DWORD PTR 12[edi] + add ecx, eax + mov eax, 0 + adc eax, eax + add ecx, edx + adc eax, 0 + dec ebp + mov DWORD PTR 12[ebx],ecx + jz $L013aw_end + ; Tail Round 4 + mov ecx, DWORD PTR 16[esi] + mov edx, DWORD PTR 16[edi] + add ecx, eax + mov eax, 0 + adc eax, eax + add ecx, edx + adc eax, 0 + dec ebp + mov DWORD PTR 16[ebx],ecx + jz $L013aw_end + ; Tail Round 5 + mov ecx, DWORD PTR 20[esi] + mov edx, DWORD PTR 20[edi] + add ecx, eax + mov eax, 0 + adc eax, eax + add ecx, edx + adc eax, 0 + dec ebp + mov DWORD PTR 20[ebx],ecx + jz $L013aw_end + ; Tail Round 6 + mov ecx, DWORD PTR 24[esi] + mov edx, DWORD PTR 24[edi] + add ecx, eax + mov eax, 0 + adc eax, eax + add ecx, edx + adc eax, 0 + mov DWORD PTR 24[ebx],ecx +$L013aw_end: + mov eax, eax + pop edi + pop esi + pop ebx + pop ebp + ret +_bn_add_words ENDP +_TEXT ENDS +END diff --git a/crypto/bn/asm/bn86unix.cpp b/crypto/bn/asm/bn86unix.cpp new file mode 100644 index 0000000000..64702201ea --- /dev/null +++ b/crypto/bn/asm/bn86unix.cpp @@ -0,0 +1,752 @@ +/* Run the C pre-processor over this file with one of the following defined + * ELF - elf object files, + * OUT - a.out object files, + * BSDI - BSDI style a.out object files + * SOL - Solaris style elf + */ + +#define TYPE(a,b) .type a,b +#define SIZE(a,b) .size a,b + +#if defined(OUT) || defined(BSDI) +#define bn_mul_add_words _bn_mul_add_words +#define bn_mul_words _bn_mul_words +#define bn_sqr_words _bn_sqr_words +#define bn_div64 _bn_div64 +#define bn_add_words _bn_add_words + +#endif + +#ifdef OUT +#define OK 1 +#define ALIGN 4 +#endif + +#ifdef BSDI +#define OK 1 +#define ALIGN 4 +#undef SIZE +#undef TYPE +#define SIZE(a,b) +#define TYPE(a,b) +#endif + +#if defined(ELF) || defined(SOL) +#define OK 1 +#define ALIGN 16 +#endif + +#ifndef OK +You need to define one of +ELF - elf systems - linux-elf, NetBSD and DG-UX +OUT - a.out systems - linux-a.out and FreeBSD +SOL - solaris systems, which are elf with strange comment lines +BSDI - a.out with a very primative version of as. +#endif + +/* Let the Assembler begin :-) */ + /* Don't even think of reading this code */ + /* It was automatically generated by bn-586.pl */ + /* Which is a perl program used to generate the x86 assember for */ + /* any of elf, a.out, BSDI,Win32, or Solaris */ + /* eric <eay@cryptsoft.com> */ + + .file "bn-586.s" + .version "01.01" +gcc2_compiled.: +.text + .align ALIGN +.globl bn_mul_add_words + TYPE(bn_mul_add_words,@function) +bn_mul_add_words: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + + xorl %esi, %esi + movl 20(%esp), %edi + movl 28(%esp), %ecx + movl 24(%esp), %ebx + andl $4294967288, %ecx + movl 32(%esp), %ebp + pushl %ecx + jz .L000maw_finish +.L001maw_loop: + movl %ecx, (%esp) + /* Round 0 */ + movl (%ebx), %eax + mull %ebp + addl %esi, %eax + movl (%edi), %esi + adcl $0, %edx + addl %esi, %eax + adcl $0, %edx + movl %eax, (%edi) + movl %edx, %esi + /* Round 4 */ + movl 4(%ebx), %eax + mull %ebp + addl %esi, %eax + movl 4(%edi), %esi + adcl $0, %edx + addl %esi, %eax + adcl $0, %edx + movl %eax, 4(%edi) + movl %edx, %esi + /* Round 8 */ + movl 8(%ebx), %eax + mull %ebp + addl %esi, %eax + movl 8(%edi), %esi + adcl $0, %edx + addl %esi, %eax + adcl $0, %edx + movl %eax, 8(%edi) + movl %edx, %esi + /* Round 12 */ + movl 12(%ebx), %eax + mull %ebp + addl %esi, %eax + movl 12(%edi), %esi + adcl $0, %edx + addl %esi, %eax + adcl $0, %edx + movl %eax, 12(%edi) + movl %edx, %esi + /* Round 16 */ + movl 16(%ebx), %eax + mull %ebp + addl %esi, %eax + movl 16(%edi), %esi + adcl $0, %edx + addl %esi, %eax + adcl $0, %edx + movl %eax, 16(%edi) + movl %edx, %esi + /* Round 20 */ + movl 20(%ebx), %eax + mull %ebp + addl %esi, %eax + movl 20(%edi), %esi + adcl $0, %edx + addl %esi, %eax + adcl $0, %edx + movl %eax, 20(%edi) + movl %edx, %esi + /* Round 24 */ + movl 24(%ebx), %eax + mull %ebp + addl %esi, %eax + movl 24(%edi), %esi + adcl $0, %edx + addl %esi, %eax + adcl $0, %edx + movl %eax, 24(%edi) + movl %edx, %esi + /* Round 28 */ + movl 28(%ebx), %eax + mull %ebp + addl %esi, %eax + movl 28(%edi), %esi + adcl $0, %edx + addl %esi, %eax + adcl $0, %edx + movl %eax, 28(%edi) + movl %edx, %esi + + movl (%esp), %ecx + addl $32, %ebx + addl $32, %edi + subl $8, %ecx + jnz .L001maw_loop +.L000maw_finish: + movl 32(%esp), %ecx + andl $7, %ecx + jnz .L002maw_finish2 + jmp .L003maw_end +.align ALIGN +.L002maw_finish2: + /* Tail Round 0 */ + movl (%ebx), %eax + mull %ebp + addl %esi, %eax + movl (%edi), %esi + adcl $0, %edx + addl %esi, %eax + adcl $0, %edx + decl %ecx + movl %eax, (%edi) + movl %edx, %esi + jz .L003maw_end + /* Tail Round 1 */ + movl 4(%ebx), %eax + mull %ebp + addl %esi, %eax + movl 4(%edi), %esi + adcl $0, %edx + addl %esi, %eax + adcl $0, %edx + decl %ecx + movl %eax, 4(%edi) + movl %edx, %esi + jz .L003maw_end + /* Tail Round 2 */ + movl 8(%ebx), %eax + mull %ebp + addl %esi, %eax + movl 8(%edi), %esi + adcl $0, %edx + addl %esi, %eax + adcl $0, %edx + decl %ecx + movl %eax, 8(%edi) + movl %edx, %esi + jz .L003maw_end + /* Tail Round 3 */ + movl 12(%ebx), %eax + mull %ebp + addl %esi, %eax + movl 12(%edi), %esi + adcl $0, %edx + addl %esi, %eax + adcl $0, %edx + decl %ecx + movl %eax, 12(%edi) + movl %edx, %esi + jz .L003maw_end + /* Tail Round 4 */ + movl 16(%ebx), %eax + mull %ebp + addl %esi, %eax + movl 16(%edi), %esi + adcl $0, %edx + addl %esi, %eax + adcl $0, %edx + decl %ecx + movl %eax, 16(%edi) + movl %edx, %esi + jz .L003maw_end + /* Tail Round 5 */ + movl 20(%ebx), %eax + mull %ebp + addl %esi, %eax + movl 20(%edi), %esi + adcl $0, %edx + addl %esi, %eax + adcl $0, %edx + decl %ecx + movl %eax, 20(%edi) + movl %edx, %esi + jz .L003maw_end + /* Tail Round 6 */ + movl 24(%ebx), %eax + mull %ebp + addl %esi, %eax + movl 24(%edi), %esi + adcl $0, %edx + addl %esi, %eax + adcl $0, %edx + movl %eax, 24(%edi) + movl %edx, %esi +.L003maw_end: + movl %esi, %eax + popl %ecx + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.bn_mul_add_words_end: + SIZE(bn_mul_add_words,.bn_mul_add_words_end-bn_mul_add_words) +.ident "bn_mul_add_words" +.text + .align ALIGN +.globl bn_mul_words + TYPE(bn_mul_words,@function) +bn_mul_words: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + + xorl %esi, %esi + movl 20(%esp), %edi + movl 24(%esp), %ebx + movl 28(%esp), %ebp + movl 32(%esp), %ecx + andl $4294967288, %ebp + jz .L004mw_finish +.L005mw_loop: + /* Round 0 */ + movl (%ebx), %eax + mull %ecx + addl %esi, %eax + adcl $0, %edx + movl %eax, (%edi) + movl %edx, %esi + /* Round 4 */ + movl 4(%ebx), %eax + mull %ecx + addl %esi, %eax + adcl $0, %edx + movl %eax, 4(%edi) + movl %edx, %esi + /* Round 8 */ + movl 8(%ebx), %eax + mull %ecx + addl %esi, %eax + adcl $0, %edx + movl %eax, 8(%edi) + movl %edx, %esi + /* Round 12 */ + movl 12(%ebx), %eax + mull %ecx + addl %esi, %eax + adcl $0, %edx + movl %eax, 12(%edi) + movl %edx, %esi + /* Round 16 */ + movl 16(%ebx), %eax + mull %ecx + addl %esi, %eax + adcl $0, %edx + movl %eax, 16(%edi) + movl %edx, %esi + /* Round 20 */ + movl 20(%ebx), %eax + mull %ecx + addl %esi, %eax + adcl $0, %edx + movl %eax, 20(%edi) + movl %edx, %esi + /* Round 24 */ + movl 24(%ebx), %eax + mull %ecx + addl %esi, %eax + adcl $0, %edx + movl %eax, 24(%edi) + movl %edx, %esi + /* Round 28 */ + movl 28(%ebx), %eax + mull %ecx + addl %esi, %eax + adcl $0, %edx + movl %eax, 28(%edi) + movl %edx, %esi + + addl $32, %ebx + addl $32, %edi + subl $8, %ebp + jz .L004mw_finish + jmp .L005mw_loop +.L004mw_finish: + movl 28(%esp), %ebp + andl $7, %ebp + jnz .L006mw_finish2 + jmp .L007mw_end +.align ALIGN +.L006mw_finish2: + /* Tail Round 0 */ + movl (%ebx), %eax + mull %ecx + addl %esi, %eax + adcl $0, %edx + movl %eax, (%edi) + movl %edx, %esi + decl %ebp + jz .L007mw_end + /* Tail Round 1 */ + movl 4(%ebx), %eax + mull %ecx + addl %esi, %eax + adcl $0, %edx + movl %eax, 4(%edi) + movl %edx, %esi + decl %ebp + jz .L007mw_end + /* Tail Round 2 */ + movl 8(%ebx), %eax + mull %ecx + addl %esi, %eax + adcl $0, %edx + movl %eax, 8(%edi) + movl %edx, %esi + decl %ebp + jz .L007mw_end + /* Tail Round 3 */ + movl 12(%ebx), %eax + mull %ecx + addl %esi, %eax + adcl $0, %edx + movl %eax, 12(%edi) + movl %edx, %esi + decl %ebp + jz .L007mw_end + /* Tail Round 4 */ + movl 16(%ebx), %eax + mull %ecx + addl %esi, %eax + adcl $0, %edx + movl %eax, 16(%edi) + movl %edx, %esi + decl %ebp + jz .L007mw_end + /* Tail Round 5 */ + movl 20(%ebx), %eax + mull %ecx + addl %esi, %eax + adcl $0, %edx + movl %eax, 20(%edi) + movl %edx, %esi + decl %ebp + jz .L007mw_end + /* Tail Round 6 */ + movl 24(%ebx), %eax + mull %ecx + addl %esi, %eax + adcl $0, %edx + movl %eax, 24(%edi) + movl %edx, %esi +.L007mw_end: + movl %esi, %eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.bn_mul_words_end: + SIZE(bn_mul_words,.bn_mul_words_end-bn_mul_words) +.ident "bn_mul_words" +.text + .align ALIGN +.globl bn_sqr_words + TYPE(bn_sqr_words,@function) +bn_sqr_words: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + + movl 20(%esp), %esi + movl 24(%esp), %edi + movl 28(%esp), %ebx + andl $4294967288, %ebx + jz .L008sw_finish +.L009sw_loop: + /* Round 0 */ + movl (%edi), %eax + mull %eax + movl %eax, (%esi) + movl %edx, 4(%esi) + /* Round 4 */ + movl 4(%edi), %eax + mull %eax + movl %eax, 8(%esi) + movl %edx, 12(%esi) + /* Round 8 */ + movl 8(%edi), %eax + mull %eax + movl %eax, 16(%esi) + movl %edx, 20(%esi) + /* Round 12 */ + movl 12(%edi), %eax + mull %eax + movl %eax, 24(%esi) + movl %edx, 28(%esi) + /* Round 16 */ + movl 16(%edi), %eax + mull %eax + movl %eax, 32(%esi) + movl %edx, 36(%esi) + /* Round 20 */ + movl 20(%edi), %eax + mull %eax + movl %eax, 40(%esi) + movl %edx, 44(%esi) + /* Round 24 */ + movl 24(%edi), %eax + mull %eax + movl %eax, 48(%esi) + movl %edx, 52(%esi) + /* Round 28 */ + movl 28(%edi), %eax + mull %eax + movl %eax, 56(%esi) + movl %edx, 60(%esi) + + addl $32, %edi + addl $64, %esi + subl $8, %ebx + jnz .L009sw_loop +.L008sw_finish: + movl 28(%esp), %ebx + andl $7, %ebx + jz .L010sw_end + /* Tail Round 0 */ + movl (%edi), %eax + mull %eax + movl %eax, (%esi) + decl %ebx + movl %edx, 4(%esi) + jz .L010sw_end + /* Tail Round 1 */ + movl 4(%edi), %eax + mull %eax + movl %eax, 8(%esi) + decl %ebx + movl %edx, 12(%esi) + jz .L010sw_end + /* Tail Round 2 */ + movl 8(%edi), %eax + mull %eax + movl %eax, 16(%esi) + decl %ebx + movl %edx, 20(%esi) + jz .L010sw_end + /* Tail Round 3 */ + movl 12(%edi), %eax + mull %eax + movl %eax, 24(%esi) + decl %ebx + movl %edx, 28(%esi) + jz .L010sw_end + /* Tail Round 4 */ + movl 16(%edi), %eax + mull %eax + movl %eax, 32(%esi) + decl %ebx + movl %edx, 36(%esi) + jz .L010sw_end + /* Tail Round 5 */ + movl 20(%edi), %eax + mull %eax + movl %eax, 40(%esi) + decl %ebx + movl %edx, 44(%esi) + jz .L010sw_end + /* Tail Round 6 */ + movl 24(%edi), %eax + mull %eax + movl %eax, 48(%esi) + movl %edx, 52(%esi) +.L010sw_end: + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.bn_sqr_words_end: + SIZE(bn_sqr_words,.bn_sqr_words_end-bn_sqr_words) +.ident "bn_sqr_words" +.text + .align ALIGN +.globl bn_div64 + TYPE(bn_div64,@function) +bn_div64: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + movl 20(%esp), %edx + movl 24(%esp), %eax + movl 28(%esp), %ebx + divl %ebx + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.bn_div64_end: + SIZE(bn_div64,.bn_div64_end-bn_div64) +.ident "bn_div64" +.text + .align ALIGN +.globl bn_add_words + TYPE(bn_add_words,@function) +bn_add_words: + pushl %ebp + pushl %ebx + pushl %esi + pushl %edi + + + movl 20(%esp), %ebx + movl 24(%esp), %esi + movl 28(%esp), %edi + movl 32(%esp), %ebp + xorl %eax, %eax + andl $4294967288, %ebp + jz .L011aw_finish +.L012aw_loop: + /* Round 0 */ + movl (%esi), %ecx + movl (%edi), %edx + addl %eax, %ecx + movl $0, %eax + adcl %eax, %eax + addl %edx, %ecx + adcl $0, %eax + movl %ecx, (%ebx) + /* Round 1 */ + movl 4(%esi), %ecx + movl 4(%edi), %edx + addl %eax, %ecx + movl $0, %eax + adcl %eax, %eax + addl %edx, %ecx + adcl $0, %eax + movl %ecx, 4(%ebx) + /* Round 2 */ + movl 8(%esi), %ecx + movl 8(%edi), %edx + addl %eax, %ecx + movl $0, %eax + adcl %eax, %eax + addl %edx, %ecx + adcl $0, %eax + movl %ecx, 8(%ebx) + /* Round 3 */ + movl 12(%esi), %ecx + movl 12(%edi), %edx + addl %eax, %ecx + movl $0, %eax + adcl %eax, %eax + addl %edx, %ecx + adcl $0, %eax + movl %ecx, 12(%ebx) + /* Round 4 */ + movl 16(%esi), %ecx + movl 16(%edi), %edx + addl %eax, %ecx + movl $0, %eax + adcl %eax, %eax + addl %edx, %ecx + adcl $0, %eax + movl %ecx, 16(%ebx) + /* Round 5 */ + movl 20(%esi), %ecx + movl 20(%edi), %edx + addl %eax, %ecx + movl $0, %eax + adcl %eax, %eax + addl %edx, %ecx + adcl $0, %eax + movl %ecx, 20(%ebx) + /* Round 6 */ + movl 24(%esi), %ecx + movl 24(%edi), %edx + addl %eax, %ecx + movl $0, %eax + adcl %eax, %eax + addl %edx, %ecx + adcl $0, %eax + movl %ecx, 24(%ebx) + /* Round 7 */ + movl 28(%esi), %ecx + movl 28(%edi), %edx + addl %eax, %ecx + movl $0, %eax + adcl %eax, %eax + addl %edx, %ecx + adcl $0, %eax + movl %ecx, 28(%ebx) + + addl $32, %esi + addl $32, %edi + addl $32, %ebx + subl $8, %ebp + jnz .L012aw_loop +.L011aw_finish: + movl 32(%esp), %ebp + andl $7, %ebp + jz .L013aw_end + /* Tail Round 0 */ + movl (%esi), %ecx + movl (%edi), %edx + addl %eax, %ecx + movl $0, %eax + adcl %eax, %eax + addl %edx, %ecx + adcl $0, %eax + decl %ebp + movl %ecx, (%ebx) + jz .L013aw_end + /* Tail Round 1 */ + movl 4(%esi), %ecx + movl 4(%edi), %edx + addl %eax, %ecx + movl $0, %eax + adcl %eax, %eax + addl %edx, %ecx + adcl $0, %eax + decl %ebp + movl %ecx, 4(%ebx) + jz .L013aw_end + /* Tail Round 2 */ + movl 8(%esi), %ecx + movl 8(%edi), %edx + addl %eax, %ecx + movl $0, %eax + adcl %eax, %eax + addl %edx, %ecx + adcl $0, %eax + decl %ebp + movl %ecx, 8(%ebx) + jz .L013aw_end + /* Tail Round 3 */ + movl 12(%esi), %ecx + movl 12(%edi), %edx + addl %eax, %ecx + movl $0, %eax + adcl %eax, %eax + addl %edx, %ecx + adcl $0, %eax + decl %ebp + movl %ecx, 12(%ebx) + jz .L013aw_end + /* Tail Round 4 */ + movl 16(%esi), %ecx + movl 16(%edi), %edx + addl %eax, %ecx + movl $0, %eax + adcl %eax, %eax + addl %edx, %ecx + adcl $0, %eax + decl %ebp + movl %ecx, 16(%ebx) + jz .L013aw_end + /* Tail Round 5 */ + movl 20(%esi), %ecx + movl 20(%edi), %edx + addl %eax, %ecx + movl $0, %eax + adcl %eax, %eax + addl %edx, %ecx + adcl $0, %eax + decl %ebp + movl %ecx, 20(%ebx) + jz .L013aw_end + /* Tail Round 6 */ + movl 24(%esi), %ecx + movl 24(%edi), %edx + addl %eax, %ecx + movl $0, %eax + adcl %eax, %eax + addl %edx, %ecx + adcl $0, %eax + movl %ecx, 24(%ebx) +.L013aw_end: + movl %eax, %eax + popl %edi + popl %esi + popl %ebx + popl %ebp + ret +.bn_add_words_end: + SIZE(bn_add_words,.bn_add_words_end-bn_add_words) +.ident "bn_add_words" diff --git a/crypto/bn/asm/pa-risc.s b/crypto/bn/asm/pa-risc.s index c49c433a83..775130a191 100644 --- a/crypto/bn/asm/pa-risc.s +++ b/crypto/bn/asm/pa-risc.s @@ -11,8 +11,8 @@ .SUBSPA $CODE$ .align 4 - .EXPORT bn_mul_add_word,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR -bn_mul_add_word + .EXPORT bn_mul_add_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR +bn_mul_add_words .PROC .CALLINFO FRAME=0,CALLS,SAVE_RP .ENTRY @@ -219,8 +219,8 @@ L$0003 .EXIT .PROCEND .align 4 - .EXPORT bn_mul_word,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR -bn_mul_word + .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR +bn_mul_words .PROC .CALLINFO FRAME=0,CALLS,SAVE_RP .ENTRY diff --git a/crypto/bn/asm/pa-risc2.s b/crypto/bn/asm/pa-risc2.s index 5e07b7d2e8..c2725996a4 100644 --- a/crypto/bn/asm/pa-risc2.s +++ b/crypto/bn/asm/pa-risc2.s @@ -11,8 +11,8 @@ .SUBSPA $CODE$ .align 4 - .EXPORT bn_mul_add_word,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR -bn_mul_add_word + .EXPORT bn_mul_add_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR +bn_mul_add_words .PROC .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=4 .ENTRY @@ -117,8 +117,8 @@ L$0011 .EXIT .PROCEND .align 4 - .EXPORT bn_mul_word,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR -bn_mul_word + .EXPORT bn_mul_words,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,RTNVAL=GR +bn_mul_words .PROC .CALLINFO FRAME=64,CALLS,SAVE_RP,ENTRY_GR=3 .ENTRY diff --git a/crypto/bn/asm/r3000.s b/crypto/bn/asm/r3000.s index 5be2a0d0e6..e95269afa3 100644 --- a/crypto/bn/asm/r3000.s +++ b/crypto/bn/asm/r3000.s @@ -55,9 +55,9 @@ __gnu_compiled_c: .byte 0x0 .text .align 2 - .globl bn_mul_add_word - .ent bn_mul_add_word -bn_mul_add_word: + .globl bn_mul_add_words + .ent bn_mul_add_words +bn_mul_add_words: .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0 .mask 0x00000000,0 .fmask 0x00000000,0 @@ -206,11 +206,11 @@ $L3: .set macro .set reorder - .end bn_mul_add_word + .end bn_mul_add_words .align 2 - .globl bn_mul_word - .ent bn_mul_word -bn_mul_word: + .globl bn_mul_words + .ent bn_mul_words +bn_mul_words: .frame $sp,0,$31 # vars= 0, regs= 0/0, args= 0, extra= 0 .mask 0x00000000,0 .fmask 0x00000000,0 @@ -334,7 +334,7 @@ $L11: .set macro .set reorder - .end bn_mul_word + .end bn_mul_words .align 2 .globl bn_sqr_words .ent bn_sqr_words diff --git a/crypto/bn/asm/sparc.s b/crypto/bn/asm/sparc.s index 37c5fb194e..f9e533caa8 100644 --- a/crypto/bn/asm/sparc.s +++ b/crypto/bn/asm/sparc.s @@ -2,10 +2,10 @@ gcc2_compiled.: .section ".text" .align 4 - .global bn_mul_add_word - .type bn_mul_add_word,#function + .global bn_mul_add_words + .type bn_mul_add_words,#function .proc 016 -bn_mul_add_word: +bn_mul_add_words: !#PROLOGUE# 0 save %sp,-112,%sp !#PROLOGUE# 1 @@ -98,12 +98,12 @@ bn_mul_add_word: ret restore %g0,%i4,%o0 .LLfe1: - .size bn_mul_add_word,.LLfe1-bn_mul_add_word + .size bn_mul_add_words,.LLfe1-bn_mul_add_words .align 4 - .global bn_mul_word - .type bn_mul_word,#function + .global bn_mul_words + .type bn_mul_words,#function .proc 016 -bn_mul_word: +bn_mul_words: !#PROLOGUE# 0 save %sp,-112,%sp !#PROLOGUE# 1 @@ -176,7 +176,7 @@ bn_mul_word: ret restore .LLfe2: - .size bn_mul_word,.LLfe2-bn_mul_word + .size bn_mul_words,.LLfe2-bn_mul_words .align 4 .global bn_sqr_words .type bn_sqr_words,#function @@ -234,10 +234,113 @@ bn_sqr_words: nop .LLfe3: .size bn_sqr_words,.LLfe3-bn_sqr_words + .align 4 + .global bn_add_words + .type bn_add_words,#function + .proc 016 +bn_add_words: + !#PROLOGUE# 0 + save %sp,-112,%sp + !#PROLOGUE# 1 + mov %i0,%o2 + mov %i1,%o3 + mov %i2,%o4 + mov %i3,%i5 + mov 0,%o0 + mov 0,%o1 + add %o2,12,%o7 + add %o4,12,%g4 + b .LL42 + add %o3,12,%g1 +.LL45: + add %i5,-1,%i5 + mov %i4,%g3 + ld [%g4-8],%i4 + mov 0,%g2 + mov %i4,%i1 + mov 0,%i0 + addcc %g3,%i1,%g3 + addx %g2,%i0,%g2 + addcc %o1,%g3,%o1 + addx %o0,%g2,%o0 + st %o1,[%o7-8] + mov %o0,%i3 + mov 0,%i2 + mov %i2,%o0 + mov %i3,%o1 + cmp %i5,0 + ble .LL43 + add %i5,-1,%i5 + ld [%g1-4],%i4 + mov %i4,%g3 + ld [%g4-4],%i4 + mov 0,%g2 + mov %i4,%i1 + mov 0,%i0 + addcc %g3,%i1,%g3 + addx %g2,%i0,%g2 + addcc %o1,%g3,%o1 + addx %o0,%g2,%o0 + st %o1,[%o7-4] + mov %o0,%i3 + mov 0,%i2 + mov %i2,%o0 + mov %i3,%o1 + cmp %i5,0 + ble .LL43 + add %i5,-1,%i5 + ld [%g1],%i4 + mov %i4,%g3 + ld [%g4],%i4 + mov 0,%g2 + mov %i4,%i1 + mov 0,%i0 + addcc %g3,%i1,%g3 + addx %g2,%i0,%g2 + addcc %o1,%g3,%o1 + addx %o0,%g2,%o0 + st %o1,[%o7] + mov %o0,%i3 + mov 0,%i2 + mov %i2,%o0 + mov %i3,%o1 + cmp %i5,0 + ble .LL43 + add %g1,16,%g1 + add %o3,16,%o3 + add %g4,16,%g4 + add %o4,16,%o4 + add %o7,16,%o7 + add %o2,16,%o2 +.LL42: + ld [%o3],%i4 + add %i5,-1,%i5 + mov %i4,%g3 + ld [%o4],%i4 + mov 0,%g2 + mov %i4,%i1 + mov 0,%i0 + addcc %g3,%i1,%g3 + addx %g2,%i0,%g2 + addcc %o1,%g3,%o1 + addx %o0,%g2,%o0 + st %o1,[%o2] + mov %o0,%i3 + mov 0,%i2 + mov %i2,%o0 + mov %i3,%o1 + cmp %i5,0 + bg,a .LL45 + ld [%g1-8],%i4 +.LL43: + ret + restore %g0,%o1,%o0 +.LLfe4: + .size bn_add_words,.LLfe4-bn_add_words .section ".rodata" .align 8 .LLC0: - .asciz "Division would overflow\n" + .asciz "Division would overflow (%d)\n" .section ".text" .align 4 .global bn_div64 @@ -249,20 +352,20 @@ bn_div64: !#PROLOGUE# 1 mov 0,%l1 cmp %i2,0 - bne .LL42 + bne .LL51 mov 2,%l0 - b .LL59 + b .LL68 mov -1,%i0 -.LL42: +.LL51: call BN_num_bits_word,0 mov %i2,%o0 mov %o0,%o2 cmp %o2,32 - be .LL43 + be .LL52 mov 1,%o0 sll %o0,%o2,%o0 cmp %i0,%o0 - bleu .LL60 + bleu .LL69 mov 32,%o0 sethi %hi(__iob+32),%o0 or %o0,%lo(__iob+32),%o0 @@ -271,89 +374,89 @@ bn_div64: or %o1,%lo(.LLC0),%o1 call abort,0 nop -.LL43: +.LL52: mov 32,%o0 -.LL60: +.LL69: cmp %i0,%i2 - blu .LL44 + blu .LL53 sub %o0,%o2,%o2 sub %i0,%i2,%i0 -.LL44: +.LL53: cmp %o2,0 - be .LL45 - sethi %hi(-65536),%o7 - sll %i2,%o2,%i2 + be .LL54 sll %i0,%o2,%o1 + sll %i2,%o2,%i2 sub %o0,%o2,%o0 srl %i1,%o0,%o0 or %o1,%o0,%i0 sll %i1,%o2,%i1 -.LL45: +.LL54: srl %i2,16,%g2 sethi %hi(65535),%o0 or %o0,%lo(65535),%o1 and %i2,%o1,%g3 mov %o0,%g4 + sethi %hi(-65536),%o7 mov %o1,%g1 -.LL46: +.LL55: srl %i0,16,%o0 cmp %o0,%g2 - be .LL50 + be .LL59 or %g4,%lo(65535),%o3 wr %g0,%g0,%y nop nop nop udiv %i0,%g2,%o3 -.LL50: +.LL59: and %i1,%o7,%o0 srl %o0,16,%o5 smul %o3,%g3,%o4 smul %o3,%g2,%o2 -.LL51: +.LL60: sub %i0,%o2,%o1 andcc %o1,%o7,%g0 - bne .LL52 + bne .LL61 sll %o1,16,%o0 add %o0,%o5,%o0 cmp %o4,%o0 - bleu .LL52 + bleu .LL61 sub %o4,%g3,%o4 sub %o2,%g2,%o2 - b .LL51 + b .LL60 add %o3,-1,%o3 -.LL52: +.LL61: smul %o3,%g2,%o2 smul %o3,%g3,%o0 srl %o0,16,%o1 sll %o0,16,%o0 and %o0,%o7,%o0 cmp %i1,%o0 - bgeu .LL56 + bgeu .LL65 add %o2,%o1,%o2 add %o2,1,%o2 -.LL56: +.LL65: cmp %i0,%o2 - bgeu .LL57 + bgeu .LL66 sub %i1,%o0,%i1 add %i0,%i2,%i0 add %o3,-1,%o3 -.LL57: +.LL66: addcc %l0,-1,%l0 - be .LL47 + be .LL56 sub %i0,%o2,%i0 sll %o3,16,%l1 sll %i0,16,%o0 srl %i1,16,%o1 or %o0,%o1,%i0 and %i1,%g1,%o0 - b .LL46 + b .LL55 sll %o0,16,%i1 -.LL47: +.LL56: or %l1,%o3,%i0 -.LL59: +.LL68: ret restore -.LLfe4: - .size bn_div64,.LLfe4-bn_div64 - .ident "GCC: (GNU) 2.7.0" +.LLfe5: + .size bn_div64,.LLfe5-bn_div64 + .ident "GCC: (GNU) 2.7.2.3" diff --git a/crypto/bn/asm/x86w16.asm b/crypto/bn/asm/x86w16.asm index 66874913e9..74a933a8cd 100644 --- a/crypto/bn/asm/x86w16.asm +++ b/crypto/bn/asm/x86w16.asm @@ -14,8 +14,8 @@ DGROUP GROUP CONST, _BSS, _DATA ASSUME DS: DGROUP, SS: DGROUP F_TEXT SEGMENT ASSUME CS: F_TEXT - PUBLIC _bn_mul_add_word -_bn_mul_add_word PROC FAR + PUBLIC _bn_mul_add_words +_bn_mul_add_words PROC FAR ; Line 58 push bp push bx @@ -133,9 +133,9 @@ $L547: ret nop -_bn_mul_add_word ENDP - PUBLIC _bn_mul_word -_bn_mul_word PROC FAR +_bn_mul_add_words ENDP + PUBLIC _bn_mul_words +_bn_mul_words PROC FAR ; Line 76 push bp push bx @@ -202,7 +202,7 @@ $L764: pop bp ret nop -_bn_mul_word ENDP +_bn_mul_words ENDP PUBLIC _bn_sqr_words _bn_sqr_words PROC FAR ; Line 92 diff --git a/crypto/bn/asm/x86w32.asm b/crypto/bn/asm/x86w32.asm index 0e4452dfa9..fc6f917714 100644 --- a/crypto/bn/asm/x86w32.asm +++ b/crypto/bn/asm/x86w32.asm @@ -14,8 +14,8 @@ DGROUP GROUP CONST, _BSS, _DATA ASSUME DS: DGROUP, SS: DGROUP F_TEXT SEGMENT ASSUME CS: F_TEXT - PUBLIC _bn_mul_add_word -_bn_mul_add_word PROC FAR + PUBLIC _bn_mul_add_words +_bn_mul_add_words PROC FAR ; Line 58 push bp push bx @@ -133,10 +133,10 @@ $L547: pop bp ret nop +_bn_mul_add_words ENDP -_bn_mul_add_word ENDP - PUBLIC _bn_mul_word -_bn_mul_word PROC FAR + PUBLIC _bn_mul_words +_bn_mul_words PROC FAR ; Line 76 push bp push bx @@ -206,7 +206,7 @@ $L764: pop bp ret nop -_bn_mul_word ENDP +_bn_mul_words ENDP PUBLIC _bn_sqr_words _bn_sqr_words PROC FAR ; Line 92 @@ -285,8 +285,8 @@ $L645: pop bx pop bp ret - _bn_sqr_words ENDP + PUBLIC _bn_div64 _bn_div64 PROC FAR push bp @@ -299,5 +299,64 @@ _bn_div64 PROC FAR pop bp ret _bn_div64 ENDP + + PUBLIC _bn_add_words +_bn_add_words PROC FAR +; Line 58 + push bp + push bx + push esi + push di + push ds + push es + mov bp,sp +; w = 28 +; num = 26 +; ap = 22 +; rp = 18 + xor esi,esi ;c=0; + mov si,WORD PTR [bp+22] ; load a + mov es,WORD PTR [bp+24] ; load a + mov di,WORD PTR [bp+26] ; load b + mov ds,WORD PTR [bp+28] ; load b + + mov dx,WORD PTR [bp+30] ; load num + dec dx + js $L547 + xor ecx,ecx + +$L5477: + xor ebx,ebx + mov eax,DWORD PTR es:[si] ; *a + add eax,ecx + adc ebx,0 + add si,4 ; a++ + add eax,DWORD PTR ds:[di] ; + *b + mov ecx,ebx + adc ecx,0 + add di,4 + mov bx,WORD PTR [bp+18] + mov ds,WORD PTR [bp+20] + mov DWORD PTR ds:[bx],eax + add bx,4 + mov ds,WORD PTR [bp+28] + mov WORD PTR [bp+18],bx + dec dx + js $L547 ; Note that we are now testing for -1 + jmp $L5477 + ; +$L547: + mov eax,ecx + mov edx,ecx + shr edx,16 + pop es + pop ds + pop di + pop esi + pop bx + pop bp + ret + nop +_bn_add_words ENDP F_TEXT ENDS END |