diff options
Diffstat (limited to 'crypto/sm4/asm/sm4-armv8.pl')
-rwxr-xr-x | crypto/sm4/asm/sm4-armv8.pl | 635 |
1 files changed, 635 insertions, 0 deletions
diff --git a/crypto/sm4/asm/sm4-armv8.pl b/crypto/sm4/asm/sm4-armv8.pl new file mode 100755 index 0000000000..7358a6e6a2 --- /dev/null +++ b/crypto/sm4/asm/sm4-armv8.pl @@ -0,0 +1,635 @@ +#! /usr/bin/env perl +# Copyright 2022 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the Apache License 2.0 (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + +# +# This module implements support for SM4 hw support on aarch64 +# Oct 2021 +# + +# $output is the last argument if it looks like a file (it has an extension) +# $flavour is the first argument if it doesn't look like a file +$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; +$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or +( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or +die "can't locate arm-xlate.pl"; + +open OUT,"| \"$^X\" $xlate $flavour \"$output\"" + or die "can't call $xlate: $!"; +*STDOUT=*OUT; + +$prefix="sm4_v8"; +my @rks=map("v$_",(0..7)); + +sub rev32() { +my $dst = shift; +my $src = shift; +$code.=<<___; +#ifndef __ARMEB__ + rev32 $dst.16b,$src.16b +#endif +___ +} + +sub enc_blk () { +my $data = shift; +$code.=<<___; + sm4e $data.4s,@rks[0].4s + sm4e $data.4s,@rks[1].4s + sm4e $data.4s,@rks[2].4s + sm4e $data.4s,@rks[3].4s + sm4e $data.4s,@rks[4].4s + sm4e $data.4s,@rks[5].4s + sm4e $data.4s,@rks[6].4s + sm4e $data.4s,@rks[7].4s + rev64 $data.4S,$data.4S + ext $data.16b,$data.16b,$data.16b,#8 +___ +} + +sub enc_4blks () { +my $data0 = shift; +my $data1 = shift; +my $data2 = shift; +my $data3 = shift; +$code.=<<___; + sm4e $data0.4s,@rks[0].4s + sm4e $data1.4s,@rks[0].4s + sm4e $data2.4s,@rks[0].4s + sm4e $data3.4s,@rks[0].4s + + sm4e $data0.4s,@rks[1].4s + sm4e $data1.4s,@rks[1].4s + sm4e $data2.4s,@rks[1].4s + sm4e $data3.4s,@rks[1].4s + + sm4e $data0.4s,@rks[2].4s + sm4e $data1.4s,@rks[2].4s + sm4e $data2.4s,@rks[2].4s + sm4e $data3.4s,@rks[2].4s + + sm4e $data0.4s,@rks[3].4s + sm4e $data1.4s,@rks[3].4s + sm4e $data2.4s,@rks[3].4s + sm4e $data3.4s,@rks[3].4s + + sm4e $data0.4s,@rks[4].4s + sm4e $data1.4s,@rks[4].4s + sm4e $data2.4s,@rks[4].4s + sm4e $data3.4s,@rks[4].4s + + sm4e $data0.4s,@rks[5].4s + sm4e $data1.4s,@rks[5].4s + sm4e $data2.4s,@rks[5].4s + sm4e $data3.4s,@rks[5].4s + + sm4e $data0.4s,@rks[6].4s + sm4e $data1.4s,@rks[6].4s + sm4e $data2.4s,@rks[6].4s + sm4e $data3.4s,@rks[6].4s + + sm4e $data0.4s,@rks[7].4s + rev64 $data0.4S,$data0.4S + sm4e $data1.4s,@rks[7].4s + ext $data0.16b,$data0.16b,$data0.16b,#8 + rev64 $data1.4S,$data1.4S + sm4e $data2.4s,@rks[7].4s + ext $data1.16b,$data1.16b,$data1.16b,#8 + rev64 $data2.4S,$data2.4S + sm4e $data3.4s,@rks[7].4s + ext $data2.16b,$data2.16b,$data2.16b,#8 + rev64 $data3.4S,$data3.4S + ext $data3.16b,$data3.16b,$data3.16b,#8 +___ +} + +$code=<<___; +#include "arm_arch.h" +.arch armv8-a+crypto +.text +___ + +{{{ +$code.=<<___; +.align 6 +.Lck: + .long 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269 + .long 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9 + .long 0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249 + .long 0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9 + .long 0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229 + .long 0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299 + .long 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209 + .long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279 +.Lfk: + .long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc +___ +}}} + +{{{ +my ($key,$keys)=("x0","x1"); +my ($tmp)=("x2"); +my ($key0,$key1,$key2,$key3,$key4,$key5,$key6,$key7)=map("v$_",(0..7)); +my ($const0,$const1,$const2,$const3,$const4,$const5,$const6,$const7)=map("v$_",(16..23)); +my ($fkconst) = ("v24"); +$code.=<<___; +.globl ${prefix}_set_encrypt_key +.type ${prefix}_set_encrypt_key,%function +.align 5 +${prefix}_set_encrypt_key: + AARCH64_VALID_CALL_TARGET + ld1 {$key0.4s},[$key] + adr $tmp,.Lfk + ld1 {$fkconst.4s},[$tmp] + adr $tmp,.Lck + ld1 {$const0.4s,$const1.4s,$const2.4s,$const3.4s},[$tmp],64 +___ + &rev32($key0, $key0); +$code.=<<___; + ld1 {$const4.4s,$const5.4s,$const6.4s,$const7.4s},[$tmp] + eor $key0.16b,$key0.16b,$fkconst.16b; + sm4ekey $key0.4S,$key0.4S,$const0.4S + sm4ekey $key1.4S,$key0.4S,$const1.4S + sm4ekey $key2.4S,$key1.4S,$const2.4S + sm4ekey $key3.4S,$key2.4S,$const3.4S + sm4ekey $key4.4S,$key3.4S,$const4.4S + st1 {$key0.4s,$key1.4s,$key2.4s,$key3.4s},[$keys],64 + sm4ekey $key5.4S,$key4.4S,$const5.4S + sm4ekey $key6.4S,$key5.4S,$const6.4S + sm4ekey $key7.4S,$key6.4S,$const7.4S + st1 {$key4.4s,$key5.4s,$key6.4s,$key7.4s},[$keys] + ret +.size ${prefix}_set_encrypt_key,.-${prefix}_set_encrypt_key +___ +}}} + +{{{ +my ($key,$keys)=("x0","x1"); +my ($tmp)=("x2"); +my ($key7,$key6,$key5,$key4,$key3,$key2,$key1,$key0)=map("v$_",(0..7)); +my ($const0,$const1,$const2,$const3,$const4,$const5,$const6,$const7)=map("v$_",(16..23)); +my ($fkconst) = ("v24"); +$code.=<<___; +.globl ${prefix}_set_decrypt_key +.type ${prefix}_set_decrypt_key,%function +.align 5 +${prefix}_set_decrypt_key: + AARCH64_VALID_CALL_TARGET + ld1 {$key0.4s},[$key] + adr $tmp,.Lfk + ld1 {$fkconst.4s},[$tmp] + adr $tmp, .Lck + ld1 {$const0.4s,$const1.4s,$const2.4s,$const3.4s},[$tmp],64 +___ + &rev32($key0, $key0); +$code.=<<___; + ld1 {$const4.4s,$const5.4s,$const6.4s,$const7.4s},[$tmp] + eor $key0.16b, $key0.16b,$fkconst.16b; + sm4ekey $key0.4S,$key0.4S,$const0.4S + sm4ekey $key1.4S,$key0.4S,$const1.4S + sm4ekey $key2.4S,$key1.4S,$const2.4S + rev64 $key0.4s,$key0.4s + rev64 $key1.4s,$key1.4s + ext $key0.16b,$key0.16b,$key0.16b,#8 + ext $key1.16b,$key1.16b,$key1.16b,#8 + sm4ekey $key3.4S,$key2.4S,$const3.4S + sm4ekey $key4.4S,$key3.4S,$const4.4S + rev64 $key2.4s,$key2.4s + rev64 $key3.4s,$key3.4s + ext $key2.16b,$key2.16b,$key2.16b,#8 + ext $key3.16b,$key3.16b,$key3.16b,#8 + sm4ekey $key5.4S,$key4.4S,$const5.4S + sm4ekey $key6.4S,$key5.4S,$const6.4S + rev64 $key4.4s,$key4.4s + rev64 $key5.4s,$key5.4s + ext $key4.16b,$key4.16b,$key4.16b,#8 + ext $key5.16b,$key5.16b,$key5.16b,#8 + sm4ekey $key7.4S,$key6.4S,$const7.4S + rev64 $key6.4s, $key6.4s + rev64 $key7.4s, $key7.4s + ext $key6.16b,$key6.16b,$key6.16b,#8 + ext $key7.16b,$key7.16b,$key7.16b,#8 + st1 {$key7.4s,$key6.4s,$key5.4s,$key4.4s},[$keys],64 + st1 {$key3.4s,$key2.4s,$key1.4s,$key0.4s},[$keys] + ret +.size ${prefix}_set_decrypt_key,.-${prefix}_set_decrypt_key +___ +}}} + +{{{ +sub gen_block () { +my $dir = shift; +my ($inp,$out,$rk)=map("x$_",(0..2)); +my ($data)=("v16"); +$code.=<<___; +.globl ${prefix}_${dir}crypt +.type ${prefix}_${dir}crypt,%function +.align 5 +${prefix}_${dir}crypt: + AARCH64_VALID_CALL_TARGET + ld1 {$data.4s},[$inp] + ld1 {@rks[0].4s,@rks[1].4s,@rks[2].4s,@rks[3].4s},[$rk],64 + ld1 {@rks[4].4s,@rks[5].4s,@rks[6].4s,@rks[7].4s},[$rk] +___ + &rev32($data,$data); + &enc_blk($data); + &rev32($data,$data); +$code.=<<___; + st1 {$data.4s},[$out] + ret +.size ${prefix}_${dir}crypt,.-${prefix}_${dir}crypt +___ +} + +&gen_block("en"); +&gen_block("de"); +}}} + +{{{ +my ($inp,$out,$len,$rk)=map("x$_",(0..3)); +my ($enc) = ("w4"); +my @dat=map("v$_",(16..23)); +$code.=<<___; +.globl ${prefix}_ecb_encrypt +.type ${prefix}_ecb_encrypt,%function +.align 5 +${prefix}_ecb_encrypt: + AARCH64_VALID_CALL_TARGET + ld1 {@rks[0].4s,@rks[1].4s,@rks[2].4s,@rks[3].4s},[$rk],#64 + ld1 {@rks[4].4s,@rks[5].4s,@rks[6].4s,@rks[7].4s},[$rk] +1: + cmp $len,#64 + b.lt 1f + ld1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$inp],#64 + cmp $len,#128 + b.lt 2f + ld1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$inp],#64 + // 8 blocks +___ + &rev32(@dat[0],@dat[0]); + &rev32(@dat[1],@dat[1]); + &rev32(@dat[2],@dat[2]); + &rev32(@dat[3],@dat[3]); + &rev32(@dat[4],@dat[4]); + &rev32(@dat[5],@dat[5]); + &rev32(@dat[6],@dat[6]); + &rev32(@dat[7],@dat[7]); + &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]); + &enc_4blks(@dat[4],@dat[5],@dat[6],@dat[7]); + &rev32(@dat[0],@dat[0]); + &rev32(@dat[1],@dat[1]); + &rev32(@dat[2],@dat[2]); + &rev32(@dat[3],@dat[3]); + &rev32(@dat[4],@dat[4]); + &rev32(@dat[5],@dat[5]); +$code.=<<___; + st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64 +___ + &rev32(@dat[6],@dat[6]); + &rev32(@dat[7],@dat[7]); +$code.=<<___; + st1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$out],#64 + subs $len,$len,#128 + b.gt 1b + ret + // 4 blocks +2: +___ + &rev32(@dat[0],@dat[0]); + &rev32(@dat[1],@dat[1]); + &rev32(@dat[2],@dat[2]); + &rev32(@dat[3],@dat[3]); + &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]); + &rev32(@dat[0],@dat[0]); + &rev32(@dat[1],@dat[1]); + &rev32(@dat[2],@dat[2]); + &rev32(@dat[3],@dat[3]); +$code.=<<___; + st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64 + subs $len,$len,#64 + b.gt 1b +1: + subs $len,$len,#16 + b.lt 1f + ld1 {@dat[0].4s},[$inp],#16 +___ + &rev32(@dat[0],@dat[0]); + &enc_blk(@dat[0]); + &rev32(@dat[0],@dat[0]); +$code.=<<___; + st1 {@dat[0].4s},[$out],#16 + b.ne 1b +1: + ret +.size ${prefix}_ecb_encrypt,.-${prefix}_ecb_encrypt +___ +}}} + +{{{ +my ($inp,$out,$len,$rk,$ivp)=map("x$_",(0..4)); +my ($enc) = ("w5"); +my @dat=map("v$_",(16..23)); +my @in=map("v$_",(24..31)); +my ($ivec) = ("v8"); +$code.=<<___; +.globl ${prefix}_cbc_encrypt +.type ${prefix}_cbc_encrypt,%function +.align 5 +${prefix}_cbc_encrypt: + AARCH64_VALID_CALL_TARGET + stp d8,d9,[sp, #-16]! + + ld1 {@rks[0].4s,@rks[1].4s,@rks[2].4s,@rks[3].4s},[$rk],#64 + ld1 {@rks[4].4s,@rks[5].4s,@rks[6].4s,@rks[7].4s},[$rk] + ld1 {$ivec.4s},[$ivp] + cmp $enc,#0 + b.eq .Ldec +1: + cmp $len, #64 + b.lt 1f + ld1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$inp],#64 + eor @dat[0].16b,@dat[0].16b,$ivec.16b +___ + &rev32(@dat[1],@dat[1]); + &rev32(@dat[0],@dat[0]); + &rev32(@dat[2],@dat[2]); + &rev32(@dat[3],@dat[3]); + &enc_blk(@dat[0]); +$code.=<<___; + eor @dat[1].16b,@dat[1].16b,@dat[0].16b +___ + &enc_blk(@dat[1]); + &rev32(@dat[0],@dat[0]); +$code.=<<___; + eor @dat[2].16b,@dat[2].16b,@dat[1].16b +___ + &enc_blk(@dat[2]); + &rev32(@dat[1],@dat[1]); +$code.=<<___; + eor @dat[3].16b,@dat[3].16b,@dat[2].16b +___ + &enc_blk(@dat[3]); + &rev32(@dat[2],@dat[2]); + &rev32(@dat[3],@dat[3]); +$code.=<<___; + mov $ivec.16b,@dat[3].16b + st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64 + subs $len,$len,#64 + b.ne 1b +1: + subs $len,$len,#16 + b.lt 3f + ld1 {@dat[0].4s},[$inp],#16 + eor $ivec.16b,$ivec.16b,@dat[0].16b +___ + &rev32($ivec,$ivec); + &enc_blk($ivec); + &rev32($ivec,$ivec); +$code.=<<___; + st1 {$ivec.16b},[$out],#16 + b.ne 1b + b 3f +.Ldec: +1: + cmp $len, #64 + b.lt 1f + ld1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$inp] + ld1 {@in[0].4s,@in[1].4s,@in[2].4s,@in[3].4s},[$inp],#64 + cmp $len,#128 + b.lt 2f + // 8 blocks mode + ld1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$inp] + ld1 {@in[4].4s,@in[5].4s,@in[6].4s,@in[7].4s},[$inp],#64 +___ + &rev32(@dat[0],@dat[0]); + &rev32(@dat[1],@dat[1]); + &rev32(@dat[2],@dat[2]); + &rev32(@dat[3],$dat[3]); + &rev32(@dat[4],@dat[4]); + &rev32(@dat[5],@dat[5]); + &rev32(@dat[6],@dat[6]); + &rev32(@dat[7],$dat[7]); + &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]); + &enc_4blks(@dat[4],@dat[5],@dat[6],@dat[7]); + &rev32(@dat[0],@dat[0]); + &rev32(@dat[1],@dat[1]); + &rev32(@dat[2],@dat[2]); + &rev32(@dat[3],@dat[3]); + &rev32(@dat[4],@dat[4]); + &rev32(@dat[5],@dat[5]); + &rev32(@dat[6],@dat[6]); + &rev32(@dat[7],@dat[7]); +$code.=<<___; + eor @dat[0].16b,@dat[0].16b,$ivec.16b + eor @dat[1].16b,@dat[1].16b,@in[0].16b + eor @dat[2].16b,@dat[2].16b,@in[1].16b + mov $ivec.16b,@in[7].16b + eor @dat[3].16b,$dat[3].16b,@in[2].16b + eor @dat[4].16b,$dat[4].16b,@in[3].16b + eor @dat[5].16b,$dat[5].16b,@in[4].16b + eor @dat[6].16b,$dat[6].16b,@in[5].16b + eor @dat[7].16b,$dat[7].16b,@in[6].16b + st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64 + st1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$out],#64 + subs $len,$len,128 + b.gt 1b + b 3f + // 4 blocks mode +2: +___ + &rev32(@dat[0],@dat[0]); + &rev32(@dat[1],@dat[1]); + &rev32(@dat[2],@dat[2]); + &rev32(@dat[3],$dat[3]); + &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]); + &rev32(@dat[0],@dat[0]); + &rev32(@dat[1],@dat[1]); + &rev32(@dat[2],@dat[2]); + &rev32(@dat[3],@dat[3]); +$code.=<<___; + eor @dat[0].16b,@dat[0].16b,$ivec.16b + eor @dat[1].16b,@dat[1].16b,@in[0].16b + mov $ivec.16b,@in[3].16b + eor @dat[2].16b,@dat[2].16b,@in[1].16b + eor @dat[3].16b,$dat[3].16b,@in[2].16b + st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64 + subs $len,$len,#64 + b.gt 1b +1: + subs $len,$len,#16 + b.lt 3f + ld1 {@dat[0].4s},[$inp],#16 + mov @in[0].16b,@dat[0].16b +___ + &rev32(@dat[0],@dat[0]); + &enc_blk(@dat[0]); + &rev32(@dat[0],@dat[0]); +$code.=<<___; + eor @dat[0].16b,@dat[0].16b,$ivec.16b + mov $ivec.16b,@in[0].16b + st1 {@dat[0].16b},[$out],#16 + b.ne 1b +3: + // save back IV + st1 {$ivec.16b},[$ivp] + ldp d8,d9,[sp],#16 + ret +.size ${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt +___ +}}} + +{{{ +my ($inp,$out,$len,$rk,$ivp)=map("x$_",(0..4)); +my ($ctr)=("w5"); +my @dat=map("v$_",(16..23)); +my @in=map("v$_",(24..31)); +my ($ivec)=("v8"); +$code.=<<___; +.globl ${prefix}_ctr32_encrypt_blocks +.type ${prefix}_ctr32_encrypt_blocks,%function +.align 5 +${prefix}_ctr32_encrypt_blocks: + AARCH64_VALID_CALL_TARGET + stp d8,d9,[sp, #-16]! + + ld1 {$ivec.4s},[$ivp] + ld1 {@rks[0].4s,@rks[1].4s,@rks[2].4s,@rks[3].4s},[$rk],64 + ld1 {@rks[4].4s,@rks[5].4s,@rks[6].4s,@rks[7].4s},[$rk] +___ + &rev32($ivec,$ivec); +$code.=<<___; + mov $ctr,$ivec.s[3] +1: + cmp $len,#4 + b.lt 1f + ld1 {@in[0].4s,@in[1].4s,@in[2].4s,@in[3].4s},[$inp],#64 + mov @dat[0].16b,$ivec.16b + mov @dat[1].16b,$ivec.16b + mov @dat[2].16b,$ivec.16b + mov @dat[3].16b,$ivec.16b + add $ctr,$ctr,#1 + mov $dat[1].s[3],$ctr + add $ctr,$ctr,#1 + mov @dat[2].s[3],$ctr + add $ctr,$ctr,#1 + mov @dat[3].s[3],$ctr + cmp $len,#8 + b.lt 2f + ld1 {@in[4].4s,@in[5].4s,@in[6].4s,@in[7].4s},[$inp],#64 + mov @dat[4].16b,$ivec.16b + mov @dat[5].16b,$ivec.16b + mov @dat[6].16b,$ivec.16b + mov @dat[7].16b,$ivec.16b + add $ctr,$ctr,#1 + mov $dat[4].s[3],$ctr + add $ctr,$ctr,#1 + mov @dat[5].s[3],$ctr + add $ctr,$ctr,#1 + mov @dat[6].s[3],$ctr + add $ctr,$ctr,#1 + mov @dat[7].s[3],$ctr +___ + &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]); + &enc_4blks(@dat[4],@dat[5],@dat[6],@dat[7]); + &rev32(@dat[0],@dat[0]); + &rev32(@dat[1],@dat[1]); + &rev32(@dat[2],@dat[2]); + &rev32(@dat[3],@dat[3]); + &rev32(@dat[4],@dat[4]); + &rev32(@dat[5],@dat[5]); + &rev32(@dat[6],@dat[6]); + &rev32(@dat[7],@dat[7]); +$code.=<<___; + eor @dat[0].16b,@dat[0].16b,@in[0].16b + eor @dat[1].16b,@dat[1].16b,@in[1].16b + eor @dat[2].16b,@dat[2].16b,@in[2].16b + eor @dat[3].16b,@dat[3].16b,@in[3].16b + eor @dat[4].16b,@dat[4].16b,@in[4].16b + eor @dat[5].16b,@dat[5].16b,@in[5].16b + eor @dat[6].16b,@dat[6].16b,@in[6].16b + eor @dat[7].16b,@dat[7].16b,@in[7].16b + st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64 + st1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$out],#64 + subs $len,$len,#8 + b.eq 3f + add $ctr,$ctr,#1 + mov $ivec.s[3],$ctr + b 1b +2: +___ + &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]); + &rev32(@dat[0],@dat[0]); + &rev32(@dat[1],@dat[1]); + &rev32(@dat[2],@dat[2]); + &rev32(@dat[3],@dat[3]); +$code.=<<___; + eor @dat[0].16b,@dat[0].16b,@in[0].16b + eor @dat[1].16b,@dat[1].16b,@in[1].16b + eor @dat[2].16b,@dat[2].16b,@in[2].16b + eor @dat[3].16b,@dat[3].16b,@in[3].16b + st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64 + subs $len,$len,#4 + b.eq 3f + add $ctr,$ctr,#1 + mov $ivec.s[3],$ctr + b 1b +1: + subs $len,$len,#1 + b.lt 3f + mov $dat[0].16b,$ivec.16b + ld1 {@in[0].4s},[$inp],#16 +___ + &enc_blk(@dat[0]); + &rev32(@dat[0],@dat[0]); +$code.=<<___; + eor $dat[0].16b,$dat[0].16b,@in[0].16b + st1 {$dat[0].4s},[$out],#16 + b.eq 3f + add $ctr,$ctr,#1 + mov $ivec.s[3],$ctr + b 1b +3: + ldp d8,d9,[sp],#16 + ret +.size ${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks +___ +}}} +######################################## +{ my %opcode = ( + "sm4e" => 0xcec08400, + "sm4ekey" => 0xce60c800); + + sub unsm4 { + my ($mnemonic,$arg)=@_; + + $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o + && + sprintf ".inst\t0x%08x\t//%s %s", + $opcode{$mnemonic}|$1|($2<<5)|($3<<16), + $mnemonic,$arg; + } +} + +open SELF,$0; +while(<SELF>) { + next if (/^#!/); + last if (!s/^#/\/\// and !/^$/); + print; +} +close SELF; + +foreach(split("\n",$code)) { + s/\`([^\`]*)\`/eval($1)/ge; + + s/\b(sm4\w+)\s+([qv].*)/unsm4($1,$2)/ge; + print $_,"\n"; +} + +close STDOUT or die "error closing STDOUT: $!"; |