You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
234 lines
4.3 KiB
234 lines
4.3 KiB
#!/usr/bin/env perl |
|
# |
|
# ==================================================================== |
|
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL |
|
# project. The module is, however, dual licensed under OpenSSL and |
|
# CRYPTOGAMS licenses depending on where you obtain it. For further |
|
# details see http://www.openssl.org/~appro/cryptogams/. |
|
# ==================================================================== |
|
# |
|
# February 2009 |
|
# |
|
# Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to |
|
# "cluster" Address Generation Interlocks, so that one pipeline stall |
|
# resolves several dependencies. |
|
|
|
# November 2010. |
|
# |
|
# Adapt for -m31 build. If kernel supports what's called "highgprs" |
|
# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit |
|
# instructions and achieve "64-bit" performance even in 31-bit legacy |
|
# application context. The feature is not specific to any particular |
|
# processor, as long as it's "z-CPU". Latter implies that the code |
|
# remains z/Architecture specific. On z990 it was measured to perform |
|
# 50% better than code generated by gcc 4.3. |
|
|
|
$flavour = shift; |
|
|
|
if ($flavour =~ /3[12]/) { |
|
$SIZE_T=4; |
|
$g=""; |
|
} else { |
|
$SIZE_T=8; |
|
$g="g"; |
|
} |
|
|
|
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} |
|
open STDOUT,">$output"; |
|
|
|
$rp="%r14"; |
|
$sp="%r15"; |
|
$code=<<___; |
|
.text |
|
|
|
___ |
|
|
|
# void RC4(RC4_KEY *key,size_t len,const void *inp,void *out) |
|
{ |
|
$acc="%r0"; |
|
$cnt="%r1"; |
|
$key="%r2"; |
|
$len="%r3"; |
|
$inp="%r4"; |
|
$out="%r5"; |
|
|
|
@XX=("%r6","%r7"); |
|
@TX=("%r8","%r9"); |
|
$YY="%r10"; |
|
$TY="%r11"; |
|
|
|
$code.=<<___; |
|
.globl RC4 |
|
.type RC4,\@function |
|
.align 64 |
|
RC4: |
|
stm${g} %r6,%r11,6*$SIZE_T($sp) |
|
___ |
|
$code.=<<___ if ($flavour =~ /3[12]/); |
|
llgfr $len,$len |
|
___ |
|
$code.=<<___; |
|
llgc $XX[0],0($key) |
|
llgc $YY,1($key) |
|
la $XX[0],1($XX[0]) |
|
nill $XX[0],0xff |
|
srlg $cnt,$len,3 |
|
ltgr $cnt,$cnt |
|
llgc $TX[0],2($XX[0],$key) |
|
jz .Lshort |
|
j .Loop8 |
|
|
|
.align 64 |
|
.Loop8: |
|
___ |
|
for ($i=0;$i<8;$i++) { |
|
$code.=<<___; |
|
la $YY,0($YY,$TX[0]) # $i |
|
nill $YY,255 |
|
la $XX[1],1($XX[0]) |
|
nill $XX[1],255 |
|
___ |
|
$code.=<<___ if ($i==1); |
|
llgc $acc,2($TY,$key) |
|
___ |
|
$code.=<<___ if ($i>1); |
|
sllg $acc,$acc,8 |
|
ic $acc,2($TY,$key) |
|
___ |
|
$code.=<<___; |
|
llgc $TY,2($YY,$key) |
|
stc $TX[0],2($YY,$key) |
|
llgc $TX[1],2($XX[1],$key) |
|
stc $TY,2($XX[0],$key) |
|
cr $XX[1],$YY |
|
jne .Lcmov$i |
|
la $TX[1],0($TX[0]) |
|
.Lcmov$i: |
|
la $TY,0($TY,$TX[0]) |
|
nill $TY,255 |
|
___ |
|
push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers |
|
} |
|
|
|
$code.=<<___; |
|
lg $TX[1],0($inp) |
|
sllg $acc,$acc,8 |
|
la $inp,8($inp) |
|
ic $acc,2($TY,$key) |
|
xgr $acc,$TX[1] |
|
stg $acc,0($out) |
|
la $out,8($out) |
|
brctg $cnt,.Loop8 |
|
|
|
.Lshort: |
|
lghi $acc,7 |
|
ngr $len,$acc |
|
jz .Lexit |
|
j .Loop1 |
|
|
|
.align 16 |
|
.Loop1: |
|
la $YY,0($YY,$TX[0]) |
|
nill $YY,255 |
|
llgc $TY,2($YY,$key) |
|
stc $TX[0],2($YY,$key) |
|
stc $TY,2($XX[0],$key) |
|
ar $TY,$TX[0] |
|
ahi $XX[0],1 |
|
nill $TY,255 |
|
nill $XX[0],255 |
|
llgc $acc,0($inp) |
|
la $inp,1($inp) |
|
llgc $TY,2($TY,$key) |
|
llgc $TX[0],2($XX[0],$key) |
|
xr $acc,$TY |
|
stc $acc,0($out) |
|
la $out,1($out) |
|
brct $len,.Loop1 |
|
|
|
.Lexit: |
|
ahi $XX[0],-1 |
|
stc $XX[0],0($key) |
|
stc $YY,1($key) |
|
lm${g} %r6,%r11,6*$SIZE_T($sp) |
|
br $rp |
|
.size RC4,.-RC4 |
|
.string "RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>" |
|
|
|
___ |
|
} |
|
|
|
# void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp) |
|
{ |
|
$cnt="%r0"; |
|
$idx="%r1"; |
|
$key="%r2"; |
|
$len="%r3"; |
|
$inp="%r4"; |
|
$acc="%r5"; |
|
$dat="%r6"; |
|
$ikey="%r7"; |
|
$iinp="%r8"; |
|
|
|
$code.=<<___; |
|
.globl private_RC4_set_key |
|
.type private_RC4_set_key,\@function |
|
.align 64 |
|
private_RC4_set_key: |
|
stm${g} %r6,%r8,6*$SIZE_T($sp) |
|
lhi $cnt,256 |
|
la $idx,0(%r0) |
|
sth $idx,0($key) |
|
.align 4 |
|
.L1stloop: |
|
stc $idx,2($idx,$key) |
|
la $idx,1($idx) |
|
brct $cnt,.L1stloop |
|
|
|
lghi $ikey,-256 |
|
lr $cnt,$len |
|
la $iinp,0(%r0) |
|
la $idx,0(%r0) |
|
.align 16 |
|
.L2ndloop: |
|
llgc $acc,2+256($ikey,$key) |
|
llgc $dat,0($iinp,$inp) |
|
la $idx,0($idx,$acc) |
|
la $ikey,1($ikey) |
|
la $idx,0($idx,$dat) |
|
nill $idx,255 |
|
la $iinp,1($iinp) |
|
tml $ikey,255 |
|
llgc $dat,2($idx,$key) |
|
stc $dat,2+256-1($ikey,$key) |
|
stc $acc,2($idx,$key) |
|
jz .Ldone |
|
brct $cnt,.L2ndloop |
|
lr $cnt,$len |
|
la $iinp,0(%r0) |
|
j .L2ndloop |
|
.Ldone: |
|
lm${g} %r6,%r8,6*$SIZE_T($sp) |
|
br $rp |
|
.size private_RC4_set_key,.-private_RC4_set_key |
|
|
|
___ |
|
} |
|
|
|
# const char *RC4_options() |
|
$code.=<<___; |
|
.globl RC4_options |
|
.type RC4_options,\@function |
|
.align 16 |
|
RC4_options: |
|
larl %r2,.Loptions |
|
br %r14 |
|
.size RC4_options,.-RC4_options |
|
.section .rodata |
|
.Loptions: |
|
.align 8 |
|
.string "rc4(8x,char)" |
|
___ |
|
|
|
print $code; |
|
close STDOUT; # force flush
|
|
|