You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
322 lines
5.8 KiB
322 lines
5.8 KiB
#!/usr/bin/env perl |
|
|
|
# ==================================================================== |
|
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL |
|
# project. The module is, however, dual licensed under OpenSSL and |
|
# CRYPTOGAMS licenses depending on where you obtain it. For further |
|
# details see http://www.openssl.org/~appro/cryptogams/. |
|
# ==================================================================== |
|
|
|
# SHA1 block procedure for Alpha. |
|
|
|
# On 21264 performance is 33% better than code generated by vendor |
|
# compiler, and 75% better than GCC [3.4], and in absolute terms is |
|
# 8.7 cycles per processed byte. Implementation features vectorized |
|
# byte swap, but not Xupdate. |
|
|
|
@X=( "\$0", "\$1", "\$2", "\$3", "\$4", "\$5", "\$6", "\$7", |
|
"\$8", "\$9", "\$10", "\$11", "\$12", "\$13", "\$14", "\$15"); |
|
$ctx="a0"; # $16 |
|
$inp="a1"; |
|
$num="a2"; |
|
$A="a3"; |
|
$B="a4"; # 20 |
|
$C="a5"; |
|
$D="t8"; |
|
$E="t9"; @V=($A,$B,$C,$D,$E); |
|
$t0="t10"; # 24 |
|
$t1="t11"; |
|
$t2="ra"; |
|
$t3="t12"; |
|
$K="AT"; # 28 |
|
|
|
sub BODY_00_19 { |
|
my ($i,$a,$b,$c,$d,$e)=@_; |
|
my $j=$i+1; |
|
$code.=<<___ if ($i==0); |
|
ldq_u @X[0],0+0($inp) |
|
ldq_u @X[1],0+7($inp) |
|
___ |
|
$code.=<<___ if (!($i&1) && $i<14); |
|
ldq_u @X[$i+2],($i+2)*4+0($inp) |
|
ldq_u @X[$i+3],($i+2)*4+7($inp) |
|
___ |
|
$code.=<<___ if (!($i&1) && $i<15); |
|
extql @X[$i],$inp,@X[$i] |
|
extqh @X[$i+1],$inp,@X[$i+1] |
|
|
|
or @X[$i+1],@X[$i],@X[$i] # pair of 32-bit values are fetched |
|
|
|
srl @X[$i],24,$t0 # vectorized byte swap |
|
srl @X[$i],8,$t2 |
|
|
|
sll @X[$i],8,$t3 |
|
sll @X[$i],24,@X[$i] |
|
zapnot $t0,0x11,$t0 |
|
zapnot $t2,0x22,$t2 |
|
|
|
zapnot @X[$i],0x88,@X[$i] |
|
or $t0,$t2,$t0 |
|
zapnot $t3,0x44,$t3 |
|
sll $a,5,$t1 |
|
|
|
or @X[$i],$t0,@X[$i] |
|
addl $K,$e,$e |
|
and $b,$c,$t2 |
|
zapnot $a,0xf,$a |
|
|
|
or @X[$i],$t3,@X[$i] |
|
srl $a,27,$t0 |
|
bic $d,$b,$t3 |
|
sll $b,30,$b |
|
|
|
extll @X[$i],4,@X[$i+1] # extract upper half |
|
or $t2,$t3,$t2 |
|
addl @X[$i],$e,$e |
|
|
|
addl $t1,$e,$e |
|
srl $b,32,$t3 |
|
zapnot @X[$i],0xf,@X[$i] |
|
|
|
addl $t0,$e,$e |
|
addl $t2,$e,$e |
|
or $t3,$b,$b |
|
___ |
|
$code.=<<___ if (($i&1) && $i<15); |
|
sll $a,5,$t1 |
|
addl $K,$e,$e |
|
and $b,$c,$t2 |
|
zapnot $a,0xf,$a |
|
|
|
srl $a,27,$t0 |
|
addl @X[$i%16],$e,$e |
|
bic $d,$b,$t3 |
|
sll $b,30,$b |
|
|
|
or $t2,$t3,$t2 |
|
addl $t1,$e,$e |
|
srl $b,32,$t3 |
|
zapnot @X[$i],0xf,@X[$i] |
|
|
|
addl $t0,$e,$e |
|
addl $t2,$e,$e |
|
or $t3,$b,$b |
|
___ |
|
$code.=<<___ if ($i>=15); # with forward Xupdate |
|
sll $a,5,$t1 |
|
addl $K,$e,$e |
|
and $b,$c,$t2 |
|
xor @X[($j+2)%16],@X[$j%16],@X[$j%16] |
|
|
|
zapnot $a,0xf,$a |
|
addl @X[$i%16],$e,$e |
|
bic $d,$b,$t3 |
|
xor @X[($j+8)%16],@X[$j%16],@X[$j%16] |
|
|
|
srl $a,27,$t0 |
|
addl $t1,$e,$e |
|
or $t2,$t3,$t2 |
|
xor @X[($j+13)%16],@X[$j%16],@X[$j%16] |
|
|
|
sll $b,30,$b |
|
addl $t0,$e,$e |
|
srl @X[$j%16],31,$t1 |
|
|
|
addl $t2,$e,$e |
|
srl $b,32,$t3 |
|
addl @X[$j%16],@X[$j%16],@X[$j%16] |
|
|
|
or $t3,$b,$b |
|
zapnot @X[$i%16],0xf,@X[$i%16] |
|
or $t1,@X[$j%16],@X[$j%16] |
|
___ |
|
} |
|
|
|
sub BODY_20_39 { |
|
my ($i,$a,$b,$c,$d,$e)=@_; |
|
my $j=$i+1; |
|
$code.=<<___ if ($i<79); # with forward Xupdate |
|
sll $a,5,$t1 |
|
addl $K,$e,$e |
|
zapnot $a,0xf,$a |
|
xor @X[($j+2)%16],@X[$j%16],@X[$j%16] |
|
|
|
sll $b,30,$t3 |
|
addl $t1,$e,$e |
|
xor $b,$c,$t2 |
|
xor @X[($j+8)%16],@X[$j%16],@X[$j%16] |
|
|
|
srl $b,2,$b |
|
addl @X[$i%16],$e,$e |
|
xor $d,$t2,$t2 |
|
xor @X[($j+13)%16],@X[$j%16],@X[$j%16] |
|
|
|
srl @X[$j%16],31,$t1 |
|
addl $t2,$e,$e |
|
srl $a,27,$t0 |
|
addl @X[$j%16],@X[$j%16],@X[$j%16] |
|
|
|
or $t3,$b,$b |
|
addl $t0,$e,$e |
|
or $t1,@X[$j%16],@X[$j%16] |
|
___ |
|
$code.=<<___ if ($i<77); |
|
zapnot @X[$i%16],0xf,@X[$i%16] |
|
___ |
|
$code.=<<___ if ($i==79); # with context fetch |
|
sll $a,5,$t1 |
|
addl $K,$e,$e |
|
zapnot $a,0xf,$a |
|
ldl @X[0],0($ctx) |
|
|
|
sll $b,30,$t3 |
|
addl $t1,$e,$e |
|
xor $b,$c,$t2 |
|
ldl @X[1],4($ctx) |
|
|
|
srl $b,2,$b |
|
addl @X[$i%16],$e,$e |
|
xor $d,$t2,$t2 |
|
ldl @X[2],8($ctx) |
|
|
|
srl $a,27,$t0 |
|
addl $t2,$e,$e |
|
ldl @X[3],12($ctx) |
|
|
|
or $t3,$b,$b |
|
addl $t0,$e,$e |
|
ldl @X[4],16($ctx) |
|
___ |
|
} |
|
|
|
sub BODY_40_59 { |
|
my ($i,$a,$b,$c,$d,$e)=@_; |
|
my $j=$i+1; |
|
$code.=<<___; # with forward Xupdate |
|
sll $a,5,$t1 |
|
addl $K,$e,$e |
|
zapnot $a,0xf,$a |
|
xor @X[($j+2)%16],@X[$j%16],@X[$j%16] |
|
|
|
srl $a,27,$t0 |
|
and $b,$c,$t2 |
|
and $b,$d,$t3 |
|
xor @X[($j+8)%16],@X[$j%16],@X[$j%16] |
|
|
|
sll $b,30,$b |
|
addl $t1,$e,$e |
|
xor @X[($j+13)%16],@X[$j%16],@X[$j%16] |
|
|
|
srl @X[$j%16],31,$t1 |
|
addl $t0,$e,$e |
|
or $t2,$t3,$t2 |
|
and $c,$d,$t3 |
|
|
|
or $t2,$t3,$t2 |
|
srl $b,32,$t3 |
|
addl @X[$i%16],$e,$e |
|
addl @X[$j%16],@X[$j%16],@X[$j%16] |
|
|
|
or $t3,$b,$b |
|
addl $t2,$e,$e |
|
or $t1,@X[$j%16],@X[$j%16] |
|
zapnot @X[$i%16],0xf,@X[$i%16] |
|
___ |
|
} |
|
|
|
$code=<<___; |
|
#ifdef __linux__ |
|
#include <asm/regdef.h> |
|
#else |
|
#include <asm.h> |
|
#include <regdef.h> |
|
#endif |
|
|
|
.text |
|
|
|
.set noat |
|
.set noreorder |
|
.globl sha1_block_data_order |
|
.align 5 |
|
.ent sha1_block_data_order |
|
sha1_block_data_order: |
|
lda sp,-64(sp) |
|
stq ra,0(sp) |
|
stq s0,8(sp) |
|
stq s1,16(sp) |
|
stq s2,24(sp) |
|
stq s3,32(sp) |
|
stq s4,40(sp) |
|
stq s5,48(sp) |
|
stq fp,56(sp) |
|
.mask 0x0400fe00,-64 |
|
.frame sp,64,ra |
|
.prologue 0 |
|
|
|
ldl $A,0($ctx) |
|
ldl $B,4($ctx) |
|
sll $num,6,$num |
|
ldl $C,8($ctx) |
|
ldl $D,12($ctx) |
|
ldl $E,16($ctx) |
|
addq $inp,$num,$num |
|
|
|
.Lloop: |
|
.set noreorder |
|
ldah $K,23170(zero) |
|
zapnot $B,0xf,$B |
|
lda $K,31129($K) # K_00_19 |
|
___ |
|
for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } |
|
|
|
$code.=<<___; |
|
ldah $K,28378(zero) |
|
lda $K,-5215($K) # K_20_39 |
|
___ |
|
for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } |
|
|
|
$code.=<<___; |
|
ldah $K,-28900(zero) |
|
lda $K,-17188($K) # K_40_59 |
|
___ |
|
for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } |
|
|
|
$code.=<<___; |
|
ldah $K,-13725(zero) |
|
lda $K,-15914($K) # K_60_79 |
|
___ |
|
for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } |
|
|
|
$code.=<<___; |
|
addl @X[0],$A,$A |
|
addl @X[1],$B,$B |
|
addl @X[2],$C,$C |
|
addl @X[3],$D,$D |
|
addl @X[4],$E,$E |
|
stl $A,0($ctx) |
|
stl $B,4($ctx) |
|
addq $inp,64,$inp |
|
stl $C,8($ctx) |
|
stl $D,12($ctx) |
|
stl $E,16($ctx) |
|
cmpult $inp,$num,$t1 |
|
bne $t1,.Lloop |
|
|
|
.set noreorder |
|
ldq ra,0(sp) |
|
ldq s0,8(sp) |
|
ldq s1,16(sp) |
|
ldq s2,24(sp) |
|
ldq s3,32(sp) |
|
ldq s4,40(sp) |
|
ldq s5,48(sp) |
|
ldq fp,56(sp) |
|
lda sp,64(sp) |
|
ret (ra) |
|
.end sha1_block_data_order |
|
.ascii "SHA1 block transform for Alpha, CRYPTOGAMS by <appro\@openssl.org>" |
|
.align 2 |
|
___ |
|
$output=shift and open STDOUT,">$output"; |
|
print $code; |
|
close STDOUT;
|
|
|