aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Polyakov <appro@openssl.org>2012-03-29 18:20:11 +0000
committerAndy Polyakov <appro@openssl.org>2012-03-29 18:20:11 +0000
commit482a7d80cfa87f3506cfba3aacf3e18e842605bf (patch)
treea5e472984b284ed001042e3a748f829471680157
parentee743dca53055e5c213ccc9ea7fbfbd8871a2e64 (diff)
downloadopenssl-482a7d80cfa87f3506cfba3aacf3e18e842605bf.tar.gz
sha512-armv4.pl: optimize NEON code path by utilizing vbsl, bitwise select.
-rw-r--r--crypto/sha/asm/sha512-armv4.pl25
1 files changed, 11 insertions, 14 deletions
diff --git a/crypto/sha/asm/sha512-armv4.pl b/crypto/sha/asm/sha512-armv4.pl
index 7faf37b147..ddeb8d5a96 100644
--- a/crypto/sha/asm/sha512-armv4.pl
+++ b/crypto/sha/asm/sha512-armv4.pl
@@ -26,7 +26,7 @@
# March 2011.
#
# Add NEON implementation. On Cortex A8 it was measured to process
-# one byte in 25.5 cycles or 47% faster than integer-only code.
+# one byte in 23.3 cycles or ~60% faster than integer-only code.
# Byte order [in]dependence. =========================================
#
@@ -463,31 +463,28 @@ $code.=<<___;
vld1.64 {$K},[$Ktbl,:64]! @ K[i++]
vsli.64 $t0,$e,#`64-@Sigma1[0]`
vsli.64 $t1,$e,#`64-@Sigma1[1]`
+ vmov $Ch,$e
vsli.64 $t2,$e,#`64-@Sigma1[2]`
#if $i<16 && defined(__ARMEL__)
vrev64.8 @X[$i],@X[$i]
#endif
- vadd.i64 $T1,$K,$h
- veor $Ch,$f,$g
- veor $t0,$t1
- vand $Ch,$e
- veor $t0,$t2 @ Sigma1(e)
- veor $Ch,$g @ Ch(e,f,g)
- vadd.i64 $T1,$t0
+ vbsl $Ch,$f,$g @ Ch(e,f,g)
+ veor $t1,$t0
vshr.u64 $t0,$a,#@Sigma0[0]
- vadd.i64 $T1,$Ch
+ veor $t2,$t1 @ Sigma1(e)
vshr.u64 $t1,$a,#@Sigma0[1]
+ vadd.i64 $T1,$h,$t2
vshr.u64 $t2,$a,#@Sigma0[2]
+ vadd.i64 $T1,$Ch
vsli.64 $t0,$a,#`64-@Sigma0[0]`
+ vadd.i64 $T1,@X[$i%16]
vsli.64 $t1,$a,#`64-@Sigma0[1]`
+ vadd.i64 $T1,$K
vsli.64 $t2,$a,#`64-@Sigma0[2]`
- vadd.i64 $T1,@X[$i%16]
- vorr $Maj,$a,$c
- vand $Ch,$a,$c
veor $h,$t0,$t1
- vand $Maj,$b
+ veor $Maj,$a,$b
veor $h,$t2 @ Sigma0(a)
- vorr $Maj,$Ch @ Maj(a,b,c)
+ vbsl $Maj,$c,$b @ Maj(a,b,c)
vadd.i64 $h,$T1
vadd.i64 $d,$T1
vadd.i64 $h,$Maj