diff options
author | Ralf S. Engelschall <rse@openssl.org> | 1998-12-21 10:52:47 +0000 |
---|---|---|
committer | Ralf S. Engelschall <rse@openssl.org> | 1998-12-21 10:52:47 +0000 |
commit | d02b48c63a58ea4367a0e905979f140b7d090f86 (patch) | |
tree | 504f62ed3d84799f785b9cd9fab255a21b0e1b0e /crypto/bn/asm/x86-lnx.s | |
download | openssl-d02b48c63a58ea4367a0e905979f140b7d090f86.tar.gz |
Import of old SSLeay release: SSLeay 0.8.1b
Diffstat (limited to 'crypto/bn/asm/x86-lnx.s')
-rw-r--r-- | crypto/bn/asm/x86-lnx.s | 282 |
1 files changed, 282 insertions, 0 deletions
diff --git a/crypto/bn/asm/x86-lnx.s b/crypto/bn/asm/x86-lnx.s new file mode 100644 index 0000000000..5123867440 --- /dev/null +++ b/crypto/bn/asm/x86-lnx.s @@ -0,0 +1,282 @@ + .file "bn_mulw.c" + .version "01.01" +gcc2_compiled.: +.text + .align 16 +.globl bn_mul_add_word + .type bn_mul_add_word,@function +bn_mul_add_word: + pushl %ebp + pushl %edi + pushl %esi + pushl %ebx + + # ax L(t) + # dx H(t) + # bx a + # cx w + # di r + # si c + # bp num + xorl %esi,%esi # c=0 + movl 20(%esp),%edi # r => edi + movl 24(%esp),%ebx # a => exb + movl 32(%esp),%ecx # w => ecx + movl 28(%esp),%ebp # num => ebp + + shrl $2,%ebp # num/4 + je .L910 + + .align 4 +.L110: + # Round 1 + movl %ecx,%eax # w => eax + mull (%ebx) # w * *a + addl (%edi),%eax # *r+=L(t) + adcl $0,%edx # H(t)+= carry + addl %esi,%eax # L(t)+=c + adcl $0,%edx # H(t)+=carry + movl %eax,(%edi) # *r+=L(t) + movl %edx,%esi # c=H(t) + + # Round 2 + movl %ecx,%eax # w => eax + mull 4(%ebx) # w * *a + addl 4(%edi),%eax # *r+=L(t) + adcl $0,%edx # H(t)+= carry + addl %esi,%eax # L(t)+=c + adcl $0,%edx # H(t)+=carry + movl %eax,4(%edi) # *r+=L(t) + movl %edx,%esi # c=H(t) + + # Round 3 + movl %ecx,%eax # w => eax + mull 8(%ebx) # w * *a + addl 8(%edi),%eax # *r+=L(t) + adcl $0,%edx # H(t)+=carry + addl %esi,%eax # L(t)+=c + adcl $0,%edx # H(t)+=carry + movl %eax,8(%edi) # *r+=L(t) + movl %edx,%esi # c=H(t) + + # Round 4 + movl %ecx,%eax # w => eax + mull 12(%ebx) # w * *a + addl 12(%edi),%eax # *r+=L(t) + adcl $0,%edx # H(t)+=carry + addl %esi,%eax # L(t)+=c + adcl $0,%edx # H(t)+=carry + movl %eax,12(%edi) # *r+=L(t) + movl %edx,%esi # c=H(t) + + addl $16,%ebx # a+=4 (4 words) + addl $16,%edi # r+=4 (4 words) + + decl %ebp # --num + je .L910 + jmp .L110 + .align 4 +.L910: + movl 28(%esp),%ebp # num => ebp + andl $3,%ebp + je .L111 + + # Round 1 + movl %ecx,%eax # w => eax + mull (%ebx) # w * *a + addl (%edi),%eax # *r+=L(t) + adcl $0,%edx # H(t)+=carry + addl %esi,%eax # L(t)+=c + adcl $0,%edx # H(t)+=carry + movl %eax,(%edi) # *r+=L(t) + movl %edx,%esi # c=H(t) + decl %ebp # --num + je .L111 + + # Round 2 + movl %ecx,%eax # w => eax + mull 4(%ebx) # w * *a + addl 4(%edi),%eax # *r+=L(t) + adcl $0,%edx # H(t)+=carry + addl %esi,%eax # L(t)+=c + adcl $0,%edx # H(t)+=carry + movl %eax,4(%edi) # *r+=L(t) + movl %edx,%esi # c=H(t) + decl %ebp # --num + je .L111 + + # Round 3 + movl %ecx,%eax # w => eax + mull 8(%ebx) # w * *a + addl 8(%edi),%eax # *r+=L(t) + adcl $0,%edx # H(t)+=carry + addl %esi,%eax # L(t)+=c + adcl $0,%edx # H(t)+=carry + movl %eax,8(%edi) # *r+=L(t) + movl %edx,%esi # c=H(t) + + .align 4 +.L111: + movl %esi,%eax # return(c) + popl %ebx + popl %esi + popl %edi + popl %ebp + ret +.Lfe1: + .size bn_mul_add_word,.Lfe1-bn_mul_add_word + .align 16 +.globl bn_mul_word + .type bn_mul_word,@function +bn_mul_word: + pushl %ebp + pushl %edi + pushl %esi + pushl %ebx + + # ax L(t) + # dx H(t) + # bx a + # cx w + # di r + # num bp + # si c + xorl %esi,%esi # c=0 + movl 20(%esp),%edi # r => edi + movl 24(%esp),%ebx # a => exb + movl 28(%esp),%ebp # num => bp + movl 32(%esp),%ecx # w => ecx + + .align 4 +.L210: + movl %ecx,%eax # w => eax + mull (%ebx) # w * *a + addl %esi,%eax # L(t)+=c + adcl $0,%edx # H(t)+=carry + movl %eax,(%edi) # *r=L(t) + movl %edx,%esi # c=H(t) + decl %ebp # --num + je .L211 + + movl %ecx,%eax # w => eax + mull 4(%ebx) # w * *a + addl %esi,%eax # L(t)+=c + adcl $0,%edx # H(t)+=carry + movl %eax,4(%edi) # *r=L(t) + movl %edx,%esi # c=H(t) + decl %ebp # --num + je .L211 + + movl %ecx,%eax # w => eax + mull 8(%ebx) # w * *a + addl %esi,%eax # L(t)+=c + adcl $0,%edx # H(t)+=carry + movl %eax,8(%edi) # *r=L(t) + movl %edx,%esi # c=H(t) + decl %ebp # --num + je .L211 + + movl %ecx,%eax # w => eax + mull 12(%ebx) # w * *a + addl %esi,%eax # L(t)+=c + adcl $0,%edx # H(t)+=carry + movl %eax,12(%edi) # *r=L(t) + movl %edx,%esi # c=H(t) + decl %ebp # --num + je .L211 + + addl $16,%ebx # a+=4 (4 words) + addl $16,%edi # r+=4 (4 words) + + jmp .L210 + .align 16 +.L211: + movl %esi,%eax # return(c) + popl %ebx + popl %esi + popl %edi + popl %ebp + ret +.Lfe2: + .size bn_mul_word,.Lfe2-bn_mul_word + + .align 16 +.globl bn_sqr_words + .type bn_sqr_words,@function +bn_sqr_words: + pushl %edi + pushl %esi + pushl %ebx + movl 16(%esp),%esi # r + movl 20(%esp),%edi # a + movl 24(%esp),%ebx # n + .align 4 + shrl $2,%ebx + jz .L99 +.L28: + movl (%edi),%eax # get a + mull %eax # a*a + movl %eax,(%esi) # put low into return addr + movl %edx,4(%esi) # put high into return addr + + movl 4(%edi),%eax # get a + mull %eax # a*a + movl %eax,8(%esi) # put low into return addr + movl %edx,12(%esi) # put high into return addr + + movl 8(%edi),%eax # get a + mull %eax # a*a + movl %eax,16(%esi) # put low into return addr + movl %edx,20(%esi) # put high into return addr + + movl 12(%edi),%eax # get a + mull %eax # a*a + movl %eax,24(%esi) # put low into return addr + movl %edx,28(%esi) # put high into return addr + + addl $16,%edi + addl $32,%esi + decl %ebx # n-=4; + jz .L99 + jmp .L28 + .align 16 +.L99: + movl 24(%esp),%ebx # n + andl $3,%ebx + jz .L29 + movl (%edi),%eax # get a + mull %eax # a*a + movl %eax,(%esi) # put low into return addr + movl %edx,4(%esi) # put high into return addr + decl %ebx # n--; + jz .L29 + movl 4(%edi),%eax # get a + mull %eax # a*a + movl %eax,8(%esi) # put low into return addr + movl %edx,12(%esi) # put high into return addr + decl %ebx # n--; + jz .L29 + movl 8(%edi),%eax # get a + mull %eax # a*a + movl %eax,16(%esi) # put low into return addr + movl %edx,20(%esi) # put high into return addr + +.L29: + popl %ebx + popl %esi + popl %edi + ret +.Lfe3: + .size bn_sqr_words,.Lfe3-bn_sqr_words + + .align 16 +.globl bn_div64 + .type bn_div64,@function +bn_div64: + movl 4(%esp),%edx # a + movl 8(%esp),%eax # b + divl 12(%esp) # ab/c + ret +.Lfe4: + .size bn_div64,.Lfe4-bn_div64 + .ident "GCC: (GNU) 2.6.3" |