diff options
author | Andy Polyakov <appro@openssl.org> | 2004-11-21 10:36:25 +0000 |
---|---|---|
committer | Andy Polyakov <appro@openssl.org> | 2004-11-21 10:36:25 +0000 |
commit | 376729e1301f82a8f20ce78f36b7107c75720a7c (patch) | |
tree | 85c5a915d6d75b5de41f0926c47ab39d6a0f6c23 /crypto/perlasm/x86unix.pl | |
parent | 00dd8f6d6e703dadab3c50af84ed55ffff598ddc (diff) | |
download | openssl-376729e1301f82a8f20ce78f36b7107c75720a7c.tar.gz |
RC4 tune-up for Intel P4 core, both 32- and 64-bit ones. As it's
apparently impossible to compose blended code with would perform
satisfactory on all x86 and x86_64 cores, an extra RC4_CHAR
code-path is introduced and P4 core is detected at run-time. This
way we keep original performance on non-P4 implementations and
turbo-charge P4 performance by factor of 2.8x (on 32-bit core).
Diffstat (limited to 'crypto/perlasm/x86unix.pl')
-rw-r--r-- | crypto/perlasm/x86unix.pl | 11 |
1 files changed, 6 insertions, 5 deletions
diff --git a/crypto/perlasm/x86unix.pl b/crypto/perlasm/x86unix.pl index 7d87eb1701..867fa09e48 100644 --- a/crypto/perlasm/x86unix.pl +++ b/crypto/perlasm/x86unix.pl @@ -161,7 +161,7 @@ sub main'shl { &out2("sall",@_); } sub main'shr { &out2("shrl",@_); } sub main'xor { &out2("xorl",@_); } sub main'xorb { &out2("xorb",@_); } -sub main'add { &out2("addl",@_); } +sub main'add { &out2($_[0]=~/%[a-d][lh]/?"addb":"addl",@_); } sub main'adc { &out2("adcl",@_); } sub main'sub { &out2("subl",@_); } sub main'sbb { &out2("sbbl",@_); } @@ -189,7 +189,7 @@ sub main'jc { &out1("jc",@_); } sub main'jnc { &out1("jnc",@_); } sub main'jno { &out1("jno",@_); } sub main'dec { &out1("decl",@_); } -sub main'inc { &out1("incl",@_); } +sub main'inc { &out1($_[0]=~/%[a-d][hl]/?"incb":"incl",@_); } sub main'push { &out1("pushl",@_); $stack+=4; } sub main'pop { &out1("popl",@_); $stack-=4; } sub main'pushf { &out0("pushfl"); $stack+=4; } @@ -205,9 +205,10 @@ sub main'nop { &out0("nop"); } sub main'test { &out2("testl",@_); } sub main'bt { &out2("btl",@_); } sub main'leave { &out0("leave"); } -sub main'cpuid { &out0(".byte 0x0f; .byte 0xa2"); } -sub main'rdtsc { &out0(".byte 0x0f; .byte 0x31"); } +sub main'cpuid { &out0(".byte\t0x0f,0xa2"); } +sub main'rdtsc { &out0(".byte\t0x0f,0x31"); } sub main'halt { &out0("hlt"); } +sub main'movz { &out2("movzb",@_); } # SSE2 sub main'emms { &out0("emms"); } @@ -558,7 +559,7 @@ sub main'file_end pushl %ebx movl %edx,%edi movl \$1,%eax - .byte 0x0f; .byte 0xa2 + .byte 0x0f,0xa2 orl \$1<<10,%edx movl %edx,0(%edi) popl %ebx |