aboutsummaryrefslogtreecommitdiffstats
path: root/crypto/perlasm
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2004-11-21 10:36:25 +0000
committerAndy Polyakov <appro@openssl.org>2004-11-21 10:36:25 +0000
commit376729e1301f82a8f20ce78f36b7107c75720a7c (patch)
tree85c5a915d6d75b5de41f0926c47ab39d6a0f6c23 /crypto/perlasm
parent00dd8f6d6e703dadab3c50af84ed55ffff598ddc (diff)
downloadopenssl-376729e1301f82a8f20ce78f36b7107c75720a7c.tar.gz
RC4 tune-up for Intel P4 core, both 32- and 64-bit ones. As it's
apparently impossible to compose blended code with would perform satisfactory on all x86 and x86_64 cores, an extra RC4_CHAR code-path is introduced and P4 core is detected at run-time. This way we keep original performance on non-P4 implementations and turbo-charge P4 performance by factor of 2.8x (on 32-bit core).
Diffstat (limited to 'crypto/perlasm')
-rw-r--r--crypto/perlasm/x86unix.pl11
1 files changed, 6 insertions, 5 deletions
diff --git a/crypto/perlasm/x86unix.pl b/crypto/perlasm/x86unix.pl
index 7d87eb1701..867fa09e48 100644
--- a/crypto/perlasm/x86unix.pl
+++ b/crypto/perlasm/x86unix.pl
@@ -161,7 +161,7 @@ sub main'shl { &out2("sall",@_); }
sub main'shr { &out2("shrl",@_); }
sub main'xor { &out2("xorl",@_); }
sub main'xorb { &out2("xorb",@_); }
-sub main'add { &out2("addl",@_); }
+sub main'add { &out2($_[0]=~/%[a-d][lh]/?"addb":"addl",@_); }
sub main'adc { &out2("adcl",@_); }
sub main'sub { &out2("subl",@_); }
sub main'sbb { &out2("sbbl",@_); }
@@ -189,7 +189,7 @@ sub main'jc { &out1("jc",@_); }
sub main'jnc { &out1("jnc",@_); }
sub main'jno { &out1("jno",@_); }
sub main'dec { &out1("decl",@_); }
-sub main'inc { &out1("incl",@_); }
+sub main'inc { &out1($_[0]=~/%[a-d][hl]/?"incb":"incl",@_); }
sub main'push { &out1("pushl",@_); $stack+=4; }
sub main'pop { &out1("popl",@_); $stack-=4; }
sub main'pushf { &out0("pushfl"); $stack+=4; }
@@ -205,9 +205,10 @@ sub main'nop { &out0("nop"); }
sub main'test { &out2("testl",@_); }
sub main'bt { &out2("btl",@_); }
sub main'leave { &out0("leave"); }
-sub main'cpuid { &out0(".byte 0x0f; .byte 0xa2"); }
-sub main'rdtsc { &out0(".byte 0x0f; .byte 0x31"); }
+sub main'cpuid { &out0(".byte\t0x0f,0xa2"); }
+sub main'rdtsc { &out0(".byte\t0x0f,0x31"); }
sub main'halt { &out0("hlt"); }
+sub main'movz { &out2("movzb",@_); }
# SSE2
sub main'emms { &out0("emms"); }
@@ -558,7 +559,7 @@ sub main'file_end
pushl %ebx
movl %edx,%edi
movl \$1,%eax
- .byte 0x0f; .byte 0xa2
+ .byte 0x0f,0xa2
orl \$1<<10,%edx
movl %edx,0(%edi)
popl %ebx