aboutsummaryrefslogtreecommitdiffstats
path: root/crypto/rc4
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2004-11-30 15:46:46 +0000
committerAndy Polyakov <appro@openssl.org>2004-11-30 15:46:46 +0000
commitb7b46c9a87c9fe7275a84c5ecb9f5f3459d7b307 (patch)
tree331c232ea63bb53c95ffc684a7cd1fef7b4be996 /crypto/rc4
parente6e1f4cb5e37f77fe61ff568dd2904f21ec5b82c (diff)
downloadopenssl-b7b46c9a87c9fe7275a84c5ecb9f5f3459d7b307.tar.gz
Add 0.9.7 specific comments to RC4 assembler modules.
Diffstat (limited to 'crypto/rc4')
-rw-r--r--crypto/rc4/asm/rc4-586.pl15
-rwxr-xr-xcrypto/rc4/asm/rc4-amd64.pl4
-rw-r--r--crypto/rc4/asm/rc4-ia64.S2
3 files changed, 14 insertions, 7 deletions
diff --git a/crypto/rc4/asm/rc4-586.pl b/crypto/rc4/asm/rc4-586.pl
index 977a9f1237..07b2bc6fcd 100644
--- a/crypto/rc4/asm/rc4-586.pl
+++ b/crypto/rc4/asm/rc4-586.pl
@@ -1,7 +1,7 @@
#!/usr/local/bin/perl
# At some point it became apparent that the original SSLeay RC4
-# assembler implementation performs suboptimal on latest IA-32
+# assembler implementation performs suboptimaly on latest IA-32
# microarchitectures. After re-tuning performance has changed as
# following:
#
@@ -15,10 +15,12 @@
# In other words code performing further 13% faster on AMD
# would perform almost 2 times slower on Intel PIII...
# For reference! This code delivers ~80% of rc4-amd64.pl
-# performance on same Opteron machine.
+# performance on the same Opteron machine.
# (**) This number requires compressed key schedule set up by
-# RC4_set_key, see commentary section in rc4_skey.c for
-# further details.
+# RC4_set_key and therefore doesn't apply to 0.9.7 [option for
+# compressed key schedule is implemented in 0.9.8 and later,
+# see commentary section in rc4_skey.c for further details].
+#
# <appro@fy.chalmers.se>
push(@INC,"perlasm","../../perlasm");
@@ -130,6 +132,8 @@ sub RC4
&add( $d, 8);
# detect compressed schedule, see commentary section in rc4_skey.c...
+ # in 0.9.7 context ~50 bytes below RC4_CHAR label remain redundant,
+ # as compressed key schedule is set up in 0.9.8 and later.
&cmp(&DWP(256,$d),-1);
&je(&label("RC4_CHAR"));
@@ -190,7 +194,8 @@ sub RC4
&jmp(&label("finished"));
&align(16);
- # this is essentially Intel P4 specific codepath, see rc4_skey.c...
+ # this is essentially Intel P4 specific codepath, see rc4_skey.c,
+ # and is engaged in 0.9.8 and later context...
&set_label("RC4_CHAR");
&lea ($ty,&DWP(0,$in,$ty));
diff --git a/crypto/rc4/asm/rc4-amd64.pl b/crypto/rc4/asm/rc4-amd64.pl
index 35e426d561..9e0da8af99 100755
--- a/crypto/rc4/asm/rc4-amd64.pl
+++ b/crypto/rc4/asm/rc4-amd64.pl
@@ -30,7 +30,9 @@
# RC4_CHAR. Kind of ironic, huh? As it's apparently impossible to
# compose blended code, which would perform even within 30% marginal
# on either AMD and Intel platforms, I implement both cases. See
-# rc4_skey.c for further details...
+# rc4_skey.c for further details... This applies to 0.9.8 and later.
+# In 0.9.7 context RC4_CHAR codepath is never engaged and ~70 bytes
+# of code remain redundant.
$output=shift;
diff --git a/crypto/rc4/asm/rc4-ia64.S b/crypto/rc4/asm/rc4-ia64.S
index 4af7fba7b3..ae84af6729 100644
--- a/crypto/rc4/asm/rc4-ia64.S
+++ b/crypto/rc4/asm/rc4-ia64.S
@@ -18,7 +18,7 @@
// to input and output streams. Secondly, less obvious, it's possible
// to pull up some references to elements of the key schedule itself.
// Fact is that such prior loads are not safe only for "degenerated"
-// key schedule, when all elements equal to the same value, which is
+// key schedule, when some elements equal to the same value, which is
// never the case [key schedule setup routine makes sure it's not].
// Furthermore. In order to compress loop body to the minimum, I chose
// to deploy deposit instruction, which substitutes for the whole