diff options
author | Andy Polyakov <appro@openssl.org> | 2012-10-15 14:04:52 +0000 |
---|---|---|
committer | Andy Polyakov <appro@openssl.org> | 2012-10-15 14:04:52 +0000 |
commit | fd3b0eb01dac9fda98a0f1a586eee72c65e36b10 (patch) | |
tree | ccbc383ed5e3fad9d099ec06ccd9e4ed618fa7a7 /crypto | |
parent | d17b59e49f32ec47be8e2418b439c239a5cd9618 (diff) | |
download | openssl-fd3b0eb01dac9fda98a0f1a586eee72c65e36b10.tar.gz |
sparcv9_modes.pl: membars are reported as must-have.
Diffstat (limited to 'crypto')
-rw-r--r-- | crypto/perlasm/sparcv9_modes.pl | 25 |
1 files changed, 10 insertions, 15 deletions
diff --git a/crypto/perlasm/sparcv9_modes.pl b/crypto/perlasm/sparcv9_modes.pl index d372586bbc..445ca4fc88 100644 --- a/crypto/perlasm/sparcv9_modes.pl +++ b/crypto/perlasm/sparcv9_modes.pl @@ -12,14 +12,9 @@ # This is "cooperative" optimization, as it reduces overall pressure # on memory interface. Benefits can't be observed/quantified with # usual benchmarks, on the contrary you can notice that single-thread -# performance for parallelizable modes is ~1.5% worse. Special note -# about commented 'membar' instructions, otherwise recommended by -# manual. Rationale is following. Memory view is consistent from -# viewpoint of processor executing the code even when ASI in question -# is used. If thread on another processor has to access the result, -# its availability would have to be mediated and it can be done only -# through a syncronization operation which would requre ... 'membar'. -# All this based on suggestions from David Miller. +# performance for parallelizable modes is ~1.5% worse for largest +# block sizes [though few percent better for not so long ones]. All +# this based on suggestions from David Miller. my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5)); my ($ileft,$iright,$ooff,$omask,$ivoff,$blk_init)=map("%l$_",(0..7)); @@ -213,7 +208,7 @@ $::code.=<<___; brnz,pt $len, .L${bits}_cbc_enc_blk_loop add $out, 8, $out - !membar 0x0f + membar #StoreLoad|#StoreStore brnz,pt $blk_init, .L${bits}_cbc_enc_loop mov $blk_init, $len ___ @@ -277,9 +272,9 @@ $::code.=<<___; mov 0xff, $omask sub $iright, $ileft, $iright and $out, 7, $ooff - cmp $len, 127 + cmp $len, 255 movrnz $ooff, 0, $blk_init ! if ( $out&7 || - movleu $::size_t_cc, 0, $blk_init ! $len<128 || + movleu $::size_t_cc, 0, $blk_init ! $len<256 || brnz,pn $blk_init, .L${bits}cbc_dec_blk ! $inp==$out) srl $omask, $ooff, $omask @@ -569,7 +564,7 @@ $::code.=<<___; add $blk_init, $len, $len andcc $len, 1, %g0 ! is number of blocks even? - !membar 0x0f + membar #StoreLoad|#StoreStore bnz,pt %icc, .L${bits}_cbc_dec_loop srl $len, 0, $len brnz,pn $len, .L${bits}_cbc_dec_loop2x @@ -630,9 +625,9 @@ ${alg}${bits}_t4_ctr32_encrypt: mov 0xff, $omask sub $iright, $ileft, $iright and $out, 7, $ooff - cmp $len, 127 + cmp $len, 255 movrnz $ooff, 0, $blk_init ! if ( $out&7 || - movleu $::size_t_cc, 0, $blk_init ! $len<128 || + movleu $::size_t_cc, 0, $blk_init ! $len<256 || brnz,pn $blk_init, .L${bits}_ctr32_blk ! $inp==$out) srl $omask, $ooff, $omask @@ -884,7 +879,7 @@ $::code.=<<___; add $blk_init, $len, $len andcc $len, 1, %g0 ! is number of blocks even? - !membar 0x0f + membar #StoreLoad|#StoreStore bnz,pt %icc, .L${bits}_ctr32_loop srl $len, 0, $len brnz,pn $len, .L${bits}_ctr32_loop2x |