From 18ed3a58b01f8f1affdedced1f4f62447b7df9f9 Mon Sep 17 00:00:00 2001 From: Phoebe Chen Date: Tue, 12 Sep 2023 07:02:30 -0700 Subject: riscv: Provide vector crypto implementation of AES-CTR mode. Support zvbb-zvkned based rvv AES-128/192/256-CTR encryption. Signed-off-by: Phoebe Chen Reviewed-by: Tomas Mraz Reviewed-by: Paul Dale Reviewed-by: Hugo Landau (Merged from https://github.com/openssl/openssl/pull/21923) --- crypto/aes/asm/aes-riscv64-zvkb-zvkned.pl | 376 ++++++++++++++++++++++++++++++ crypto/aes/build.info | 3 +- 2 files changed, 378 insertions(+), 1 deletion(-) create mode 100644 crypto/aes/asm/aes-riscv64-zvkb-zvkned.pl (limited to 'crypto/aes') diff --git a/crypto/aes/asm/aes-riscv64-zvkb-zvkned.pl b/crypto/aes/asm/aes-riscv64-zvkb-zvkned.pl new file mode 100644 index 0000000000..8b87b21e4c --- /dev/null +++ b/crypto/aes/asm/aes-riscv64-zvkb-zvkned.pl @@ -0,0 +1,376 @@ +#! /usr/bin/env perl +# This file is dual-licensed, meaning that you can use it under your +# choice of either of the following two licenses: +# +# Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the Apache License 2.0 (the "License"). You can obtain +# a copy in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html +# +# or +# +# Copyright (c) 2023, Jerry Shih +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# - RV64I +# - RISC-V Vector ('V') with VLEN >= 128 +# - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb') +# - RISC-V Vector AES block cipher extension ('Zvkned') +# - RISC-V Zicclsm(Main memory supports misaligned loads/stores) + +use strict; +use warnings; + +use FindBin qw($Bin); +use lib "$Bin"; +use lib "$Bin/../../perlasm"; +use riscv; + +# $output is the last argument if it looks like a file (it has an extension) +# $flavour is the first argument if it doesn't look like a file +my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; +my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; + +$output and open STDOUT,">$output"; + +my $code=<<___; +.text +___ + +################################################################################ +# void rv64i_zvkb_zvkned_ctr32_encrypt_blocks(const unsigned char *in, +# unsigned char *out, size_t blocks, +# const void *key, +# const unsigned char ivec[16]); +{ +my ($INP, $OUTP, $BLOCK_NUM, $KEYP, $IVP) = ("a0", "a1", "a2", "a3", "a4"); +my ($T0, $T1, $T2, $T3) = ("t0", "t1", "t2", "t3"); +my ($VL) = ("t4"); +my ($LEN32) = ("t5"); +my ($CTR) = ("t6"); +my ($MASK) = ("v0"); +my ($V0, $V1, $V2, $V3, $V4, $V5, $V6, $V7, + $V8, $V9, $V10, $V11, $V12, $V13, $V14, $V15, + $V16, $V17, $V18, $V19, $V20, $V21, $V22, $V23, + $V24, $V25, $V26, $V27, $V28, $V29, $V30, $V31, +) = map("v$_",(0..31)); + +# Prepare the AES ctr input data into v16. +sub init_aes_ctr_input { + my $code=<<___; + # Setup mask into v0 + # The mask pattern for 4*N-th elements + # mask v0: [000100010001....] + # Note: + # We could setup the mask just for the maximum element length instead of + # the VLMAX. + li $T0, 0b10001000 + @{[vsetvli $T2, "zero", "e8", "m1", "ta", "ma"]} + @{[vmv_v_x $MASK, $T0]} + # Load IV. + # v31:[IV0, IV1, IV2, big-endian count] + @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} + @{[vle32_v $V31, $IVP]} + # Convert the big-endian counter into little-endian. + @{[vsetivli "zero", 4, "e32", "m1", "ta", "mu"]} + @{[vrev8_v $V31, $V31, $MASK]} + # Splat the IV to v16 + @{[vsetvli "zero", $LEN32, "e32", "m4", "ta", "ma"]} + @{[vmv_v_i $V16, 0]} + @{[vaesz_vs $V16, $V31]} + # Prepare the ctr pattern into v20 + # v20: [x, x, x, 0, x, x, x, 1, x, x, x, 2, ...] + @{[viota_m $V20, $MASK, $MASK]} + # v16:[IV0, IV1, IV2, count+0, IV0, IV1, IV2, count+1, ...] + @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]} + @{[vadd_vv $V16, $V16, $V20, $MASK]} +___ + + return $code; +} + +$code .= <<___; +.p2align 3 +.globl rv64i_zvkb_zvkned_ctr32_encrypt_blocks +.type rv64i_zvkb_zvkned_ctr32_encrypt_blocks,\@function +rv64i_zvkb_zvkned_ctr32_encrypt_blocks: + beqz $BLOCK_NUM, 1f + + # Load number of rounds + lwu $T0, 240($KEYP) + li $T1, 14 + li $T2, 12 + li $T3, 10 + + slli $LEN32, $BLOCK_NUM, 2 + + beq $T0, $T1, ctr32_encrypt_blocks_256 + beq $T0, $T2, ctr32_encrypt_blocks_192 + beq $T0, $T3, ctr32_encrypt_blocks_128 + +1: + ret + +.size rv64i_zvkb_zvkned_ctr32_encrypt_blocks,.-rv64i_zvkb_zvkned_ctr32_encrypt_blocks +___ + +$code .= <<___; +.p2align 3 +ctr32_encrypt_blocks_128: + # Load all 11 round keys to v1-v11 registers. + @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} + @{[vle32_v $V1, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V2, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V3, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V4, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V5, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V6, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V7, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V8, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V9, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V10, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V11, $KEYP]} + + @{[init_aes_ctr_input]} + + ##### AES body + j 2f +1: + @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]} + # Increase ctr in v16. + @{[vadd_vx $V16, $V16, $CTR, $MASK]} +2: + # Load plaintext into v20 + @{[vle32_v $V20, $INP]} + slli $T0, $VL, 2 + srli $CTR, $VL, 2 + sub $LEN32, $LEN32, $VL + add $INP, $INP, $T0 + # Prepare the AES ctr input into v24. + # The ctr data uses big-endian form. + @{[vmv_v_v $V24, $V16]} + @{[vrev8_v $V24, $V24, $MASK]} + + @{[vaesz_vs $V24, $V1]} + @{[vaesem_vs $V24, $V2]} + @{[vaesem_vs $V24, $V3]} + @{[vaesem_vs $V24, $V4]} + @{[vaesem_vs $V24, $V5]} + @{[vaesem_vs $V24, $V6]} + @{[vaesem_vs $V24, $V7]} + @{[vaesem_vs $V24, $V8]} + @{[vaesem_vs $V24, $V9]} + @{[vaesem_vs $V24, $V10]} + @{[vaesef_vs $V24, $V11]} + + # ciphertext + @{[vxor_vv $V24, $V24, $V20]} + + # Store the ciphertext. + @{[vse32_v $V24, $OUTP]} + add $OUTP, $OUTP, $T0 + + bnez $LEN32, 1b + + ret +.size ctr32_encrypt_blocks_128,.-ctr32_encrypt_blocks_128 +___ + +$code .= <<___; +.p2align 3 +ctr32_encrypt_blocks_192: + # Load all 13 round keys to v1-v13 registers. + @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} + @{[vle32_v $V1, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V2, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V3, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V4, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V5, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V6, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V7, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V8, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V9, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V10, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V11, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V12, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V13, $KEYP]} + + @{[init_aes_ctr_input]} + + ##### AES body + j 2f +1: + @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]} + # Increase ctr in v16. + @{[vadd_vx $V16, $V16, $CTR, $MASK]} +2: + # Load plaintext into v20 + @{[vle32_v $V20, $INP]} + slli $T0, $VL, 2 + srli $CTR, $VL, 2 + sub $LEN32, $LEN32, $VL + add $INP, $INP, $T0 + # Prepare the AES ctr input into v24. + # The ctr data uses big-endian form. + @{[vmv_v_v $V24, $V16]} + @{[vrev8_v $V24, $V24, $MASK]} + + @{[vaesz_vs $V24, $V1]} + @{[vaesem_vs $V24, $V2]} + @{[vaesem_vs $V24, $V3]} + @{[vaesem_vs $V24, $V4]} + @{[vaesem_vs $V24, $V5]} + @{[vaesem_vs $V24, $V6]} + @{[vaesem_vs $V24, $V7]} + @{[vaesem_vs $V24, $V8]} + @{[vaesem_vs $V24, $V9]} + @{[vaesem_vs $V24, $V10]} + @{[vaesem_vs $V24, $V11]} + @{[vaesem_vs $V24, $V12]} + @{[vaesef_vs $V24, $V13]} + + # ciphertext + @{[vxor_vv $V24, $V24, $V20]} + + # Store the ciphertext. + @{[vse32_v $V24, $OUTP]} + add $OUTP, $OUTP, $T0 + + bnez $LEN32, 1b + + ret +.size ctr32_encrypt_blocks_192,.-ctr32_encrypt_blocks_192 +___ + +$code .= <<___; +.p2align 3 +ctr32_encrypt_blocks_256: + # Load all 15 round keys to v1-v15 registers. + @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} + @{[vle32_v $V1, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V2, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V3, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V4, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V5, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V6, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V7, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V8, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V9, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V10, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V11, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V12, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V13, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V14, $KEYP]} + addi $KEYP, $KEYP, 16 + @{[vle32_v $V15, $KEYP]} + + @{[init_aes_ctr_input]} + + ##### AES body + j 2f +1: + @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]} + # Increase ctr in v16. + @{[vadd_vx $V16, $V16, $CTR, $MASK]} +2: + # Load plaintext into v20 + @{[vle32_v $V20, $INP]} + slli $T0, $VL, 2 + srli $CTR, $VL, 2 + sub $LEN32, $LEN32, $VL + add $INP, $INP, $T0 + # Prepare the AES ctr input into v24. + # The ctr data uses big-endian form. + @{[vmv_v_v $V24, $V16]} + @{[vrev8_v $V24, $V24, $MASK]} + + @{[vaesz_vs $V24, $V1]} + @{[vaesem_vs $V24, $V2]} + @{[vaesem_vs $V24, $V3]} + @{[vaesem_vs $V24, $V4]} + @{[vaesem_vs $V24, $V5]} + @{[vaesem_vs $V24, $V6]} + @{[vaesem_vs $V24, $V7]} + @{[vaesem_vs $V24, $V8]} + @{[vaesem_vs $V24, $V9]} + @{[vaesem_vs $V24, $V10]} + @{[vaesem_vs $V24, $V11]} + @{[vaesem_vs $V24, $V12]} + @{[vaesem_vs $V24, $V13]} + @{[vaesem_vs $V24, $V14]} + @{[vaesef_vs $V24, $V15]} + + # ciphertext + @{[vxor_vv $V24, $V24, $V20]} + + # Store the ciphertext. + @{[vse32_v $V24, $OUTP]} + add $OUTP, $OUTP, $T0 + + bnez $LEN32, 1b + + ret +.size ctr32_encrypt_blocks_256,.-ctr32_encrypt_blocks_256 +___ +} + +print $code; + +close STDOUT or die "error closing STDOUT: $!"; diff --git a/crypto/aes/build.info b/crypto/aes/build.info index 7865231212..c7fab5720d 100644 --- a/crypto/aes/build.info +++ b/crypto/aes/build.info @@ -47,7 +47,7 @@ IF[{- !$disabled{asm} -}] # aes-c64xplus.s implements AES_ctr32_encrypt $AESDEF_c64xplus=AES_ASM AES_CTR_ASM - $AESASM_riscv64=aes_cbc.c aes-riscv64.s aes-riscv64-zkn.s aes-riscv64-zvkned.s + $AESASM_riscv64=aes_cbc.c aes-riscv64.s aes-riscv64-zkn.s aes-riscv64-zvkb-zvkned.s aes-riscv64-zvkned.s aes-riscv64-zvbb-zvkg-zvkned.s $AESDEF_riscv64=AES_ASM $AESASM_riscv32=aes_core.c aes_cbc.c aes-riscv32-zkn.s @@ -124,6 +124,7 @@ INCLUDE[aes-mips.o]=.. GENERATE[aes-riscv64.s]=asm/aes-riscv64.pl GENERATE[aes-riscv64-zkn.s]=asm/aes-riscv64-zkn.pl GENERATE[aes-riscv32-zkn.s]=asm/aes-riscv32-zkn.pl +GENERATE[aes-riscv64-zvkb-zvkned.s]=asm/aes-riscv64-zvkb-zvkned.pl GENERATE[aes-riscv64-zvkned.s]=asm/aes-riscv64-zvkned.pl GENERATE[aesv8-armx.S]=asm/aesv8-armx.pl -- cgit v1.2.3