sparcv9_modes.pl: membars are reported as must-have.

author: Andy Polyakov <appro@openssl.org> 2012-10-15 14:04:52 +0000
committer: Andy Polyakov <appro@openssl.org> 2012-10-15 14:04:52 +0000
commit: fd3b0eb01dac9fda98a0f1a586eee72c65e36b10 (patch)
tree: ccbc383ed5e3fad9d099ec06ccd9e4ed618fa7a7 /crypto
parent: d17b59e49f32ec47be8e2418b439c239a5cd9618 (diff)
download: openssl-fd3b0eb01dac9fda98a0f1a586eee72c65e36b10.tar.gz
1 files changed, 10 insertions, 15 deletions
diff --git a/crypto/perlasm/sparcv9_modes.pl b/crypto/perlasm/sparcv9_modes.pl
index d372586bbc..445ca4fc88 100644
--- a/crypto/perlasm/sparcv9_modes.pl
+++ b/crypto/perlasm/sparcv9_modes.pl
@@ -12,14 +12,9 @@
 # This is "cooperative" optimization, as it reduces overall pressure
 # on memory interface. Benefits can't be observed/quantified with
 # usual benchmarks, on the contrary you can notice that single-thread
-# performance for parallelizable modes is ~1.5% worse. Special note
-# about commented 'membar' instructions, otherwise recommended by
-# manual. Rationale is following. Memory view is consistent from
-# viewpoint of processor executing the code even when ASI in question
-# is used. If thread on another processor has to access the result,
-# its availability would have to be mediated and it can be done only
-# through a syncronization operation which would requre ... 'membar'.
-# All this based on suggestions from David Miller.
+# performance for parallelizable modes is ~1.5% worse for largest
+# block sizes [though few percent better for not so long ones]. All
+# this based on suggestions from David Miller.
 
 my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5));
 my ($ileft,$iright,$ooff,$omask,$ivoff,$blk_init)=map("%l$_",(0..7));
@@ -213,7 +208,7 @@ $::code.=<<___;
 	brnz,pt		$len, .L${bits}_cbc_enc_blk_loop
 	add		$out, 8, $out
 
-	!membar		0x0f
+	membar		#StoreLoad|#StoreStore
 	brnz,pt		$blk_init, .L${bits}_cbc_enc_loop
 	mov		$blk_init, $len
 ___
@@ -277,9 +272,9 @@ $::code.=<<___;
 	mov		0xff, $omask
 	sub		$iright, $ileft, $iright
 	and		$out, 7, $ooff
-	cmp		$len, 127
+	cmp		$len, 255
 	movrnz		$ooff, 0, $blk_init		! if (	$out&7 ||
-	movleu		$::size_t_cc, 0, $blk_init	!	$len<128 ||
+	movleu		$::size_t_cc, 0, $blk_init	!	$len<256 ||
 	brnz,pn		$blk_init, .L${bits}cbc_dec_blk	!	$inp==$out)
 	srl		$omask, $ooff, $omask
 
@@ -569,7 +564,7 @@ $::code.=<<___;
 
 	add		$blk_init, $len, $len
 	andcc		$len, 1, %g0		! is number of blocks even?
-	!membar		0x0f
+	membar		#StoreLoad|#StoreStore
 	bnz,pt		%icc, .L${bits}_cbc_dec_loop
 	srl		$len, 0, $len
 	brnz,pn		$len, .L${bits}_cbc_dec_loop2x
@@ -630,9 +625,9 @@ ${alg}${bits}_t4_ctr32_encrypt:
 	mov		0xff, $omask
 	sub		$iright, $ileft, $iright
 	and		$out, 7, $ooff
-	cmp		$len, 127
+	cmp		$len, 255
 	movrnz		$ooff, 0, $blk_init		! if (	$out&7 ||
-	movleu		$::size_t_cc, 0, $blk_init	!	$len<128 ||
+	movleu		$::size_t_cc, 0, $blk_init	!	$len<256 ||
 	brnz,pn		$blk_init, .L${bits}_ctr32_blk	!	$inp==$out)
 	srl		$omask, $ooff, $omask
 
@@ -884,7 +879,7 @@ $::code.=<<___;
 
 	add		$blk_init, $len, $len
 	andcc		$len, 1, %g0		! is number of blocks even?
-	!membar		0x0f
+	membar		#StoreLoad|#StoreStore
 	bnz,pt		%icc, .L${bits}_ctr32_loop
 	srl		$len, 0, $len
 	brnz,pn		$len, .L${bits}_ctr32_loop2x
author	Andy Polyakov <appro@openssl.org>	2012-10-15 14:04:52 +0000
committer	Andy Polyakov <appro@openssl.org>	2012-10-15 14:04:52 +0000
commit	fd3b0eb01dac9fda98a0f1a586eee72c65e36b10 (patch)
tree	ccbc383ed5e3fad9d099ec06ccd9e4ed618fa7a7 /crypto
parent	d17b59e49f32ec47be8e2418b439c239a5cd9618 (diff)
download	openssl-fd3b0eb01dac9fda98a0f1a586eee72c65e36b10.tar.gz