From 3e181369dd635c8ce5d133e346928d6670cf83f6 Mon Sep 17 00:00:00 2001
From: Andy Polyakov <appro@openssl.org>
Date: Wed, 18 Apr 2012 13:01:36 +0000
Subject: C64x+ assembler pack. linux-c64xplus build is *not* tested nor can it
 be tested, because kernel is not in shape to handle it *yet*. The code is
 committed mostly to stimulate the kernel development.

---
 crypto/c64xpluscpuid.pl | 246 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 246 insertions(+)
 create mode 100644 crypto/c64xpluscpuid.pl

(limited to 'crypto/c64xpluscpuid.pl')

diff --git a/crypto/c64xpluscpuid.pl b/crypto/c64xpluscpuid.pl
new file mode 100644
index 0000000000..067b693d5c
--- /dev/null
+++ b/crypto/c64xpluscpuid.pl
@@ -0,0 +1,246 @@
+#!/usr/bin/env perl
+#
+
+while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
+open STDOUT,">$output";
+
+$code.=<<___;
+	.text
+
+	.asg	B3,RA
+
+	.global	_OPENSSL_rdtsc
+_OPENSSL_rdtsc:
+	.asmfunc
+	B	RA
+	MVC	TSCL,B0
+	MVC	TSCH,B1
+  [!B0]	MVC	B0,TSCL		; start TSC
+	MV	B0,A4
+	MV	B1,A5
+	.endasmfunc
+
+	.global	_OPENSSL_cleanse
+_OPENSSL_cleanse:
+	.asmfunc
+	ZERO	A3:A2
+||	ZERO	B2
+||	SHRU	B4,3,B0		; is length >= 8
+||	ADD	1,A4,B6
+  [!B0]	BNOP	RA
+||	ZERO	A1
+||	ZERO	B1
+   [B0]	MVC	B0,ILC
+||[!B0]	CMPLT	0,B4,A1
+||[!B0]	CMPLT	1,B4,B1
+   [A1]	STB	A2,*A4++[2]
+|| [B1] STB	B2,*B6++[2]
+||[!B0]	CMPLT	2,B4,A1
+||[!B0]	CMPLT	3,B4,B1
+   [A1]	STB	A2,*A4++[2]
+|| [B1] STB	B2,*B6++[2]
+||[!B0]	CMPLT	4,B4,A1
+||[!B0]	CMPLT	5,B4,B1
+   [A1]	STB	A2,*A4++[2]
+|| [B1] STB	B2,*B6++[2]
+||[!B0]	CMPLT	6,B4,A1
+   [A1]	STB	A2,*A4++[2]
+
+	SPLOOP	1
+	STNDW	A3:A2,*A4++
+||	SUB	B4,8,B4
+	SPKERNEL
+
+	MV	B4,B0		; remaining bytes
+||	ADD	1,A4,B6
+||	BNOP	RA
+   [B0]	CMPLT	0,B0,A1
+|| [B0]	CMPLT	1,B0,B1
+   [A1]	STB	A2,*A4++[2]
+|| [B1] STB	B2,*B6++[2]
+|| [B0]	CMPLT	2,B0,A1
+|| [B0]	CMPLT	3,B0,B1
+   [A1]	STB	A2,*A4++[2]
+|| [B1] STB	B2,*B6++[2]
+|| [B0]	CMPLT	4,B0,A1
+|| [B0]	CMPLT	5,B0,B1
+   [A1]	STB	A2,*A4++[2]
+|| [B1] STB	B2,*B6++[2]
+|| [B0]	CMPLT	6,B0,A1
+   [A1]	STB	A2,*A4++[2]
+	.endasmfunc
+
+	.global	_OPENSSL_atomic_add
+_OPENSSL_atomic_add:
+	.asmfunc
+	MV	A4,B0
+atomic_add?:
+	LL	*B0,B5
+	NOP	4
+	ADD	B4,B5,B5
+	SL	B5,*B0
+	CMTL	*B0,B1
+	NOP	4
+  [!B1]	B	atomic_add?
+   [B1]	BNOP	RA,4
+	MV	B5,A4
+	.endasmfunc
+
+	.global	_OPENSSL_wipe_cpu
+_OPENSSL_wipe_cpu:
+	.asmfunc
+	ZERO	A0
+||	ZERO	B0
+||	ZERO	A1
+||	ZERO	B1
+	ZERO	A3:A2
+||	MVD	B0,B2
+||	ZERO	A4
+||	ZERO	B4
+||	ZERO	A5
+||	ZERO	B5
+||	BNOP	RA
+	ZERO	A7:A6
+||	ZERO	B7:B6
+||	ZERO	A8
+||	ZERO	B8
+||	ZERO	A9
+||	ZERO	B9
+	ZERO	A17:A16
+||	ZERO	B17:B16
+||	ZERO	A18
+||	ZERO	B18
+||	ZERO	A19
+||	ZERO	B19
+	ZERO	A21:A20
+||	ZERO	B21:B20
+||	ZERO	A22
+||	ZERO	B22
+||	ZERO	A23
+||	ZERO	B23
+	ZERO	A25:A24
+||	ZERO	B25:B24
+||	ZERO	A26
+||	ZERO	B26
+||	ZERO	A27
+||	ZERO	B27
+	ZERO	A29:A28
+||	ZERO	B29:B28
+||	ZERO	A30
+||	ZERO	B30
+||	ZERO	A31
+||	ZERO	B31
+	.endasmfunc
+
+CLFLUSH	.macro	CONTROL,ADDR,LEN
+	B	passthrough?
+||	STW	ADDR,*CONTROL[0]
+	STW	LEN,*CONTROL[1]
+spinlock?:
+	LDW	*CONTROL[1],A0
+	NOP	3
+passthrough?:
+	NOP
+  [A0]	BNOP	spinlock?,5
+	.endm
+
+	.global	_OPENSSL_instrument_bus
+_OPENSSL_instrument_bus:
+	.asmfunc
+	MV	B4,B0			; reassign sizeof(output)
+||	MV	A4,B4			; reassign output
+||	MVK	0x00004030,A3
+	MV	B0,A4			; return value
+||	MVK	1,A1
+||	MVKH	0x01840000,A3		; L1DWIBAR
+	MVC	TSCL,B8			; collect 1st tick
+||	MVK	0x00004010,A5
+	MV	B8,B9			; lasttick = tick
+||	MVK	0,B7			; lastdiff = 0
+||	MVKH	0x01840000,A5		; L2WIBAR
+	CLFLUSH	A3,B4,A1		; write-back and invalidate L1D line
+	CLFLUSH	A5,B4,A1		; write-back and invalidate L2 line
+	LL	*B4,B5
+	NOP	4
+	ADD	B7,B5,B5
+	SL	B5,*B4
+	CMTL	*B4,B1
+	NOP	4
+	STW	B5,*B4
+bus_loop1?:
+	MVC	TSCL,B8
+|| [B0]	SUB	B0,1,B0
+	SUB	B8,B9,B7		; lastdiff = tick - lasttick
+||	MV	B8,B9			; lasttick = tick
+	CLFLUSH	A3,B4,A1		; write-back and invalidate L1D line
+	CLFLUSH	A5,B4,A1		; write-back and invalidate L2 line
+	LL	*B4,B5
+	NOP	4
+	ADD	B7,B5,B5
+	SL	B5,*B4
+	CMTL	*B4,B1
+	STW	B5,*B4			; [!B1] is removed to flatten samples
+||	ADDK	4,B4
+|| [B0]	BNOP	bus_loop1?,5
+
+	BNOP	RA,5
+	.endasmfunc
+
+	.global	_OPENSSL_instrument_bus2
+_OPENSSL_instrument_bus2:
+	.asmfunc
+	MV	A6,B0			; reassign max
+||	MV	B4,A6			; reassing sizeof(output)
+||	MVK	0x00004030,A3
+	MV	A4,B4			; reassign output
+||	MVK	0,A4			; return value
+||	MVK	1,A1
+||	MVKH	0x01840000,A3		; L1DWIBAR
+
+	MVC	TSCL,B8			; collect 1st tick
+||	MVK	0x00004010,A5
+	MV	B8,B9			; lasttick = tick
+||	MVK	0,B7			; lastdiff = 0
+||	MVKH	0x01840000,A5		; L2WIBAR
+	CLFLUSH	A3,B4,A1		; write-back and invalidate L1D line
+	CLFLUSH	A5,B4,A1		; write-back and invalidate L2 line
+	LL	*B4,B5
+	NOP	4
+	ADD	B7,B5,B5
+	SL	B5,*B4
+	CMTL	*B4,B1
+	NOP	4
+	STW	B5,*B4
+
+	MVC	TSCL,B8			; collect 1st diff
+	SUB	B8,B9,B7		; lastdiff = tick - lasttick
+||	MV	B8,B9			; lasttick = tick
+||	SUB	B0,1,B0
+bus_loop2?:
+	CLFLUSH	A3,B4,A1		; write-back and invalidate L1D line
+	CLFLUSH	A5,B4,A1		; write-back and invalidate L2 line
+	LL	*B4,B5
+	NOP	4
+	ADD	B7,B5,B5
+	SL	B5,*B4
+	CMTL	*B4,B1
+	STW	B5,*B4			; [!B1] is removed to flatten samples
+||[!B0]	BNOP	bus_loop2_done?,2
+||	SUB	B0,1,B0
+	MVC	TSCL,B8
+	SUB	B8,B9,B8
+||	MV	B8,B9
+	CMPEQ	B8,B7,B2
+||	MV	B8,B7
+  [!B2]	ADDAW	B4,1,B4
+||[!B2]	ADDK	1,A4
+	CMPEQ	A4,A6,A2
+  [!A2]	BNOP	bus_loop2?,5
+
+bus_loop2_done?:
+	BNOP	RA,5
+	.endasmfunc
+___
+
+print $code;
+close STDOUT;
-- 
cgit v1.2.3