From 384e6de4c7e35e37fb3d6fbeb32ddcb5eb0d3d3f Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Fri, 3 Feb 2017 12:07:16 +0100 Subject: x86_64 assembly pack: Win64 SEH face-lift. - harmonize handlers with guidelines and themselves; - fix some bugs in handlers; - add missing handlers in chacha and ecp_nistz256 modules; Reviewed-by: Rich Salz --- crypto/bn/asm/rsaz-avx2.pl | 2 +- crypto/bn/asm/x86_64-gf2m.pl | 17 +++++++++++++---- crypto/bn/asm/x86_64-mont.pl | 12 ++++++------ crypto/bn/asm/x86_64-mont5.pl | 4 ++-- 4 files changed, 22 insertions(+), 13 deletions(-) (limited to 'crypto/bn') diff --git a/crypto/bn/asm/rsaz-avx2.pl b/crypto/bn/asm/rsaz-avx2.pl index f34c84f452..e620285e61 100755 --- a/crypto/bn/asm/rsaz-avx2.pl +++ b/crypto/bn/asm/rsaz-avx2.pl @@ -1738,11 +1738,11 @@ $code.=<<___ if ($win64); movaps -0x38(%r11),%xmm13 movaps -0x28(%r11),%xmm14 movaps -0x18(%r11),%xmm15 -.LSEH_end_rsaz_1024_gather5: ___ $code.=<<___; lea (%r11),%rsp ret +.LSEH_end_rsaz_1024_gather5: .size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2 ___ } diff --git a/crypto/bn/asm/x86_64-gf2m.pl b/crypto/bn/asm/x86_64-gf2m.pl index d962f62033..d237c1e3d2 100644 --- a/crypto/bn/asm/x86_64-gf2m.pl +++ b/crypto/bn/asm/x86_64-gf2m.pl @@ -174,8 +174,9 @@ $code.=<<___; .type bn_GF2m_mul_2x2,\@abi-omnipotent .align 16 bn_GF2m_mul_2x2: - mov OPENSSL_ia32cap_P(%rip),%rax - bt \$33,%rax + mov %rsp,%rax + mov OPENSSL_ia32cap_P(%rip),%r10 + bt \$33,%r10 jnc .Lvanilla_mul_2x2 movq $a1,%xmm0 @@ -280,6 +281,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; lea 8*17(%rsp),%rsp +.Lepilogue_mul_2x2: ret .Lend_mul_2x2: .size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 @@ -312,13 +314,19 @@ se_handler: pushfq sub \$64,%rsp - mov 152($context),%rax # pull context->Rsp + mov 120($context),%rax # pull context->Rax mov 248($context),%rbx # pull context->Rip lea .Lbody_mul_2x2(%rip),%r10 cmp %r10,%rbx # context->Rip<"prologue" label jb .Lin_prologue + mov 152($context),%rax # pull context->Rsp + + lea .Lepilogue_mul_2x2(%rip),%r10 + cmp %r10,%rbx # context->Rip>="epilogue" label + jae .Lin_prologue + mov 8*10(%rax),%r14 # mimic epilogue mov 8*11(%rax),%r13 mov 8*12(%rax),%r12 @@ -335,8 +343,9 @@ se_handler: mov %r13,224($context) # restore context->R13 mov %r14,232($context) # restore context->R14 -.Lin_prologue: lea 8*17(%rax),%rax + +.Lin_prologue: mov %rax,152($context) # restore context->Rsp mov 40($disp),%rdi # disp->ContextRecord diff --git a/crypto/bn/asm/x86_64-mont.pl b/crypto/bn/asm/x86_64-mont.pl index df4cca5bfe..7b5e88547b 100755 --- a/crypto/bn/asm/x86_64-mont.pl +++ b/crypto/bn/asm/x86_64-mont.pl @@ -695,10 +695,11 @@ ___ my @ri=("%rax","%rdx",$m0,$m1); $code.=<<___; mov 16(%rsp,$num,8),$rp # restore $rp + lea -4($num),$j mov 0(%rsp),@ri[0] # tp[0] pxor %xmm0,%xmm0 mov 8(%rsp),@ri[1] # tp[1] - shr \$2,$num # num/=4 + shr \$2,$j # j=num/4-1 lea (%rsp),$ap # borrow ap for tp xor $i,$i # i=0 and clear CF! @@ -706,7 +707,6 @@ $code.=<<___; mov 16($ap),@ri[2] # tp[2] mov 24($ap),@ri[3] # tp[3] sbb 8($np),@ri[1] - lea -1($num),$j # j=num/4-1 jmp .Lsub4x .align 16 .Lsub4x: @@ -740,8 +740,9 @@ $code.=<<___; not @ri[0] mov $rp,$np and @ri[0],$np - lea -1($num),$j + lea -4($num),$j or $np,$ap # ap=borrow?tp:rp + shr \$2,$j # j=num/4-1 movdqu ($ap),%xmm1 movdqa %xmm0,(%rsp) @@ -759,7 +760,6 @@ $code.=<<___; dec $j jnz .Lcopy4x - shl \$2,$num movdqu 16($ap,$i),%xmm2 movdqa %xmm0,16(%rsp,$i) movdqu %xmm2,16($rp,$i) @@ -1401,12 +1401,12 @@ sqr_handler: mov 0(%r11),%r10d # HandlerData[0] lea (%rsi,%r10),%r10 # end of prologue label - cmp %r10,%rbx # context->Rip<.Lsqr_body + cmp %r10,%rbx # context->Rip<.Lsqr_prologue jb .Lcommon_seh_tail mov 4(%r11),%r10d # HandlerData[1] lea (%rsi,%r10),%r10 # body label - cmp %r10,%rbx # context->Rip>=.Lsqr_epilogue + cmp %r10,%rbx # context->Rip<.Lsqr_body jb .Lcommon_pop_regs mov 152($context),%rax # pull context->Rsp diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl index d041d738cf..226f4360d6 100755 --- a/crypto/bn/asm/x86_64-mont5.pl +++ b/crypto/bn/asm/x86_64-mont5.pl @@ -3669,8 +3669,8 @@ mul_handler: jb .Lcommon_seh_tail mov 4(%r11),%r10d # HandlerData[1] - lea (%rsi,%r10),%r10 # epilogue label - cmp %r10,%rbx # context->Rip>=epilogue label + lea (%rsi,%r10),%r10 # beginning of body label + cmp %r10,%rbx # context->RipRsp -- cgit v1.2.3