diff options
author | Andy Polyakov <appro@openssl.org> | 2004-11-30 15:46:46 +0000 |
---|---|---|
committer | Andy Polyakov <appro@openssl.org> | 2004-11-30 15:46:46 +0000 |
commit | b7b46c9a87c9fe7275a84c5ecb9f5f3459d7b307 (patch) | |
tree | 331c232ea63bb53c95ffc684a7cd1fef7b4be996 /crypto/rc4 | |
parent | e6e1f4cb5e37f77fe61ff568dd2904f21ec5b82c (diff) | |
download | openssl-b7b46c9a87c9fe7275a84c5ecb9f5f3459d7b307.tar.gz |
Add 0.9.7 specific comments to RC4 assembler modules.
Diffstat (limited to 'crypto/rc4')
-rw-r--r-- | crypto/rc4/asm/rc4-586.pl | 15 | ||||
-rwxr-xr-x | crypto/rc4/asm/rc4-amd64.pl | 4 | ||||
-rw-r--r-- | crypto/rc4/asm/rc4-ia64.S | 2 |
3 files changed, 14 insertions, 7 deletions
diff --git a/crypto/rc4/asm/rc4-586.pl b/crypto/rc4/asm/rc4-586.pl index 977a9f1237..07b2bc6fcd 100644 --- a/crypto/rc4/asm/rc4-586.pl +++ b/crypto/rc4/asm/rc4-586.pl @@ -1,7 +1,7 @@ #!/usr/local/bin/perl # At some point it became apparent that the original SSLeay RC4 -# assembler implementation performs suboptimal on latest IA-32 +# assembler implementation performs suboptimaly on latest IA-32 # microarchitectures. After re-tuning performance has changed as # following: # @@ -15,10 +15,12 @@ # In other words code performing further 13% faster on AMD # would perform almost 2 times slower on Intel PIII... # For reference! This code delivers ~80% of rc4-amd64.pl -# performance on same Opteron machine. +# performance on the same Opteron machine. # (**) This number requires compressed key schedule set up by -# RC4_set_key, see commentary section in rc4_skey.c for -# further details. +# RC4_set_key and therefore doesn't apply to 0.9.7 [option for +# compressed key schedule is implemented in 0.9.8 and later, +# see commentary section in rc4_skey.c for further details]. +# # <appro@fy.chalmers.se> push(@INC,"perlasm","../../perlasm"); @@ -130,6 +132,8 @@ sub RC4 &add( $d, 8); # detect compressed schedule, see commentary section in rc4_skey.c... + # in 0.9.7 context ~50 bytes below RC4_CHAR label remain redundant, + # as compressed key schedule is set up in 0.9.8 and later. &cmp(&DWP(256,$d),-1); &je(&label("RC4_CHAR")); @@ -190,7 +194,8 @@ sub RC4 &jmp(&label("finished")); &align(16); - # this is essentially Intel P4 specific codepath, see rc4_skey.c... + # this is essentially Intel P4 specific codepath, see rc4_skey.c, + # and is engaged in 0.9.8 and later context... &set_label("RC4_CHAR"); &lea ($ty,&DWP(0,$in,$ty)); diff --git a/crypto/rc4/asm/rc4-amd64.pl b/crypto/rc4/asm/rc4-amd64.pl index 35e426d561..9e0da8af99 100755 --- a/crypto/rc4/asm/rc4-amd64.pl +++ b/crypto/rc4/asm/rc4-amd64.pl @@ -30,7 +30,9 @@ # RC4_CHAR. Kind of ironic, huh? As it's apparently impossible to # compose blended code, which would perform even within 30% marginal # on either AMD and Intel platforms, I implement both cases. See -# rc4_skey.c for further details... +# rc4_skey.c for further details... This applies to 0.9.8 and later. +# In 0.9.7 context RC4_CHAR codepath is never engaged and ~70 bytes +# of code remain redundant. $output=shift; diff --git a/crypto/rc4/asm/rc4-ia64.S b/crypto/rc4/asm/rc4-ia64.S index 4af7fba7b3..ae84af6729 100644 --- a/crypto/rc4/asm/rc4-ia64.S +++ b/crypto/rc4/asm/rc4-ia64.S @@ -18,7 +18,7 @@ // to input and output streams. Secondly, less obvious, it's possible // to pull up some references to elements of the key schedule itself. // Fact is that such prior loads are not safe only for "degenerated" -// key schedule, when all elements equal to the same value, which is +// key schedule, when some elements equal to the same value, which is // never the case [key schedule setup routine makes sure it's not]. // Furthermore. In order to compress loop body to the minimum, I chose // to deploy deposit instruction, which substitutes for the whole |