You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
326 lines
7.3 KiB
326 lines
7.3 KiB
#!/usr/bin/env perl |
|
|
|
# ==================================================================== |
|
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL |
|
# project. The module is, however, dual licensed under OpenSSL and |
|
# CRYPTOGAMS licenses depending on where you obtain it. For further |
|
# details see http://www.openssl.org/~appro/cryptogams/. |
|
# ==================================================================== |
|
|
|
# I let hardware handle unaligned input(*), except on page boundaries |
|
# (see below for details). Otherwise straightforward implementation |
|
# with X vector in register bank. The module is big-endian [which is |
|
# not big deal as there're no little-endian targets left around]. |
|
# |
|
# (*) this means that this module is inappropriate for PPC403? Does |
|
# anybody know if pre-POWER3 can sustain unaligned load? |
|
|
|
# -m64 -m32 |
|
# ---------------------------------- |
|
# PPC970,gcc-4.0.0 +76% +59% |
|
# Power6,xlc-7 +68% +33% |
|
|
|
$flavour = shift; |
|
|
|
if ($flavour =~ /64/) { |
|
$SIZE_T =8; |
|
$LRSAVE =2*$SIZE_T; |
|
$UCMP ="cmpld"; |
|
$STU ="stdu"; |
|
$POP ="ld"; |
|
$PUSH ="std"; |
|
} elsif ($flavour =~ /32/) { |
|
$SIZE_T =4; |
|
$LRSAVE =$SIZE_T; |
|
$UCMP ="cmplw"; |
|
$STU ="stwu"; |
|
$POP ="lwz"; |
|
$PUSH ="stw"; |
|
} else { die "nonsense $flavour"; } |
|
|
|
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
|
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or |
|
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or |
|
die "can't locate ppc-xlate.pl"; |
|
|
|
open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; |
|
|
|
$FRAME=24*$SIZE_T+64; |
|
$LOCALS=6*$SIZE_T; |
|
|
|
$K ="r0"; |
|
$sp ="r1"; |
|
$toc="r2"; |
|
$ctx="r3"; |
|
$inp="r4"; |
|
$num="r5"; |
|
$t0 ="r15"; |
|
$t1 ="r6"; |
|
|
|
$A ="r7"; |
|
$B ="r8"; |
|
$C ="r9"; |
|
$D ="r10"; |
|
$E ="r11"; |
|
$T ="r12"; |
|
|
|
@V=($A,$B,$C,$D,$E,$T); |
|
@X=("r16","r17","r18","r19","r20","r21","r22","r23", |
|
"r24","r25","r26","r27","r28","r29","r30","r31"); |
|
|
|
sub BODY_00_19 { |
|
my ($i,$a,$b,$c,$d,$e,$f)=@_; |
|
my $j=$i+1; |
|
$code.=<<___ if ($i==0); |
|
lwz @X[$i],`$i*4`($inp) |
|
___ |
|
$code.=<<___ if ($i<15); |
|
lwz @X[$j],`$j*4`($inp) |
|
add $f,$K,$e |
|
rotlwi $e,$a,5 |
|
add $f,$f,@X[$i] |
|
and $t0,$c,$b |
|
add $f,$f,$e |
|
andc $t1,$d,$b |
|
rotlwi $b,$b,30 |
|
or $t0,$t0,$t1 |
|
add $f,$f,$t0 |
|
___ |
|
$code.=<<___ if ($i>=15); |
|
add $f,$K,$e |
|
rotlwi $e,$a,5 |
|
xor @X[$j%16],@X[$j%16],@X[($j+2)%16] |
|
add $f,$f,@X[$i%16] |
|
and $t0,$c,$b |
|
xor @X[$j%16],@X[$j%16],@X[($j+8)%16] |
|
add $f,$f,$e |
|
andc $t1,$d,$b |
|
rotlwi $b,$b,30 |
|
or $t0,$t0,$t1 |
|
xor @X[$j%16],@X[$j%16],@X[($j+13)%16] |
|
add $f,$f,$t0 |
|
rotlwi @X[$j%16],@X[$j%16],1 |
|
___ |
|
} |
|
|
|
sub BODY_20_39 { |
|
my ($i,$a,$b,$c,$d,$e,$f)=@_; |
|
my $j=$i+1; |
|
$code.=<<___ if ($i<79); |
|
add $f,$K,$e |
|
rotlwi $e,$a,5 |
|
xor @X[$j%16],@X[$j%16],@X[($j+2)%16] |
|
add $f,$f,@X[$i%16] |
|
xor $t0,$b,$c |
|
xor @X[$j%16],@X[$j%16],@X[($j+8)%16] |
|
add $f,$f,$e |
|
rotlwi $b,$b,30 |
|
xor $t0,$t0,$d |
|
xor @X[$j%16],@X[$j%16],@X[($j+13)%16] |
|
add $f,$f,$t0 |
|
rotlwi @X[$j%16],@X[$j%16],1 |
|
___ |
|
$code.=<<___ if ($i==79); |
|
add $f,$K,$e |
|
rotlwi $e,$a,5 |
|
lwz r16,0($ctx) |
|
add $f,$f,@X[$i%16] |
|
xor $t0,$b,$c |
|
lwz r17,4($ctx) |
|
add $f,$f,$e |
|
rotlwi $b,$b,30 |
|
lwz r18,8($ctx) |
|
xor $t0,$t0,$d |
|
lwz r19,12($ctx) |
|
add $f,$f,$t0 |
|
lwz r20,16($ctx) |
|
___ |
|
} |
|
|
|
sub BODY_40_59 { |
|
my ($i,$a,$b,$c,$d,$e,$f)=@_; |
|
my $j=$i+1; |
|
$code.=<<___; |
|
add $f,$K,$e |
|
rotlwi $e,$a,5 |
|
xor @X[$j%16],@X[$j%16],@X[($j+2)%16] |
|
add $f,$f,@X[$i%16] |
|
and $t0,$b,$c |
|
xor @X[$j%16],@X[$j%16],@X[($j+8)%16] |
|
add $f,$f,$e |
|
or $t1,$b,$c |
|
rotlwi $b,$b,30 |
|
xor @X[$j%16],@X[$j%16],@X[($j+13)%16] |
|
and $t1,$t1,$d |
|
or $t0,$t0,$t1 |
|
rotlwi @X[$j%16],@X[$j%16],1 |
|
add $f,$f,$t0 |
|
___ |
|
} |
|
|
|
$code=<<___; |
|
.machine "any" |
|
.text |
|
|
|
.globl .sha1_block_data_order |
|
.align 4 |
|
.sha1_block_data_order: |
|
$STU $sp,-$FRAME($sp) |
|
mflr r0 |
|
$PUSH r15,`$FRAME-$SIZE_T*17`($sp) |
|
$PUSH r16,`$FRAME-$SIZE_T*16`($sp) |
|
$PUSH r17,`$FRAME-$SIZE_T*15`($sp) |
|
$PUSH r18,`$FRAME-$SIZE_T*14`($sp) |
|
$PUSH r19,`$FRAME-$SIZE_T*13`($sp) |
|
$PUSH r20,`$FRAME-$SIZE_T*12`($sp) |
|
$PUSH r21,`$FRAME-$SIZE_T*11`($sp) |
|
$PUSH r22,`$FRAME-$SIZE_T*10`($sp) |
|
$PUSH r23,`$FRAME-$SIZE_T*9`($sp) |
|
$PUSH r24,`$FRAME-$SIZE_T*8`($sp) |
|
$PUSH r25,`$FRAME-$SIZE_T*7`($sp) |
|
$PUSH r26,`$FRAME-$SIZE_T*6`($sp) |
|
$PUSH r27,`$FRAME-$SIZE_T*5`($sp) |
|
$PUSH r28,`$FRAME-$SIZE_T*4`($sp) |
|
$PUSH r29,`$FRAME-$SIZE_T*3`($sp) |
|
$PUSH r30,`$FRAME-$SIZE_T*2`($sp) |
|
$PUSH r31,`$FRAME-$SIZE_T*1`($sp) |
|
$PUSH r0,`$FRAME+$LRSAVE`($sp) |
|
lwz $A,0($ctx) |
|
lwz $B,4($ctx) |
|
lwz $C,8($ctx) |
|
lwz $D,12($ctx) |
|
lwz $E,16($ctx) |
|
andi. r0,$inp,3 |
|
bne Lunaligned |
|
Laligned: |
|
mtctr $num |
|
bl Lsha1_block_private |
|
b Ldone |
|
|
|
; PowerPC specification allows an implementation to be ill-behaved |
|
; upon unaligned access which crosses page boundary. "Better safe |
|
; than sorry" principle makes me treat it specially. But I don't |
|
; look for particular offending word, but rather for 64-byte input |
|
; block which crosses the boundary. Once found that block is aligned |
|
; and hashed separately... |
|
.align 4 |
|
Lunaligned: |
|
subfic $t1,$inp,4096 |
|
andi. $t1,$t1,4095 ; distance to closest page boundary |
|
srwi. $t1,$t1,6 ; t1/=64 |
|
beq Lcross_page |
|
$UCMP $num,$t1 |
|
ble- Laligned ; didn't cross the page boundary |
|
mtctr $t1 |
|
subfc $num,$t1,$num |
|
bl Lsha1_block_private |
|
Lcross_page: |
|
li $t1,16 |
|
mtctr $t1 |
|
addi r20,$sp,$LOCALS ; spot within the frame |
|
Lmemcpy: |
|
lbz r16,0($inp) |
|
lbz r17,1($inp) |
|
lbz r18,2($inp) |
|
lbz r19,3($inp) |
|
addi $inp,$inp,4 |
|
stb r16,0(r20) |
|
stb r17,1(r20) |
|
stb r18,2(r20) |
|
stb r19,3(r20) |
|
addi r20,r20,4 |
|
bdnz Lmemcpy |
|
|
|
$PUSH $inp,`$FRAME-$SIZE_T*18`($sp) |
|
li $t1,1 |
|
addi $inp,$sp,$LOCALS |
|
mtctr $t1 |
|
bl Lsha1_block_private |
|
$POP $inp,`$FRAME-$SIZE_T*18`($sp) |
|
addic. $num,$num,-1 |
|
bne- Lunaligned |
|
|
|
Ldone: |
|
$POP r0,`$FRAME+$LRSAVE`($sp) |
|
$POP r15,`$FRAME-$SIZE_T*17`($sp) |
|
$POP r16,`$FRAME-$SIZE_T*16`($sp) |
|
$POP r17,`$FRAME-$SIZE_T*15`($sp) |
|
$POP r18,`$FRAME-$SIZE_T*14`($sp) |
|
$POP r19,`$FRAME-$SIZE_T*13`($sp) |
|
$POP r20,`$FRAME-$SIZE_T*12`($sp) |
|
$POP r21,`$FRAME-$SIZE_T*11`($sp) |
|
$POP r22,`$FRAME-$SIZE_T*10`($sp) |
|
$POP r23,`$FRAME-$SIZE_T*9`($sp) |
|
$POP r24,`$FRAME-$SIZE_T*8`($sp) |
|
$POP r25,`$FRAME-$SIZE_T*7`($sp) |
|
$POP r26,`$FRAME-$SIZE_T*6`($sp) |
|
$POP r27,`$FRAME-$SIZE_T*5`($sp) |
|
$POP r28,`$FRAME-$SIZE_T*4`($sp) |
|
$POP r29,`$FRAME-$SIZE_T*3`($sp) |
|
$POP r30,`$FRAME-$SIZE_T*2`($sp) |
|
$POP r31,`$FRAME-$SIZE_T*1`($sp) |
|
mtlr r0 |
|
addi $sp,$sp,$FRAME |
|
blr |
|
.long 0 |
|
.byte 0,12,4,1,0x80,18,3,0 |
|
.long 0 |
|
___ |
|
|
|
# This is private block function, which uses tailored calling |
|
# interface, namely upon entry SHA_CTX is pre-loaded to given |
|
# registers and counter register contains amount of chunks to |
|
# digest... |
|
$code.=<<___; |
|
.align 4 |
|
Lsha1_block_private: |
|
___ |
|
$code.=<<___; # load K_00_19 |
|
lis $K,0x5a82 |
|
ori $K,$K,0x7999 |
|
___ |
|
for($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); } |
|
$code.=<<___; # load K_20_39 |
|
lis $K,0x6ed9 |
|
ori $K,$K,0xeba1 |
|
___ |
|
for(;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } |
|
$code.=<<___; # load K_40_59 |
|
lis $K,0x8f1b |
|
ori $K,$K,0xbcdc |
|
___ |
|
for(;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); } |
|
$code.=<<___; # load K_60_79 |
|
lis $K,0xca62 |
|
ori $K,$K,0xc1d6 |
|
___ |
|
for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } |
|
$code.=<<___; |
|
add r16,r16,$E |
|
add r17,r17,$T |
|
add r18,r18,$A |
|
add r19,r19,$B |
|
add r20,r20,$C |
|
stw r16,0($ctx) |
|
mr $A,r16 |
|
stw r17,4($ctx) |
|
mr $B,r17 |
|
stw r18,8($ctx) |
|
mr $C,r18 |
|
stw r19,12($ctx) |
|
mr $D,r19 |
|
stw r20,16($ctx) |
|
mr $E,r20 |
|
addi $inp,$inp,`16*4` |
|
bdnz- Lsha1_block_private |
|
blr |
|
.long 0 |
|
.byte 0,12,0x14,0,0,0,0,0 |
|
___ |
|
$code.=<<___; |
|
.asciz "SHA1 block transform for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>" |
|
___ |
|
|
|
$code =~ s/\`([^\`]*)\`/eval $1/gem; |
|
print $code; |
|
close STDOUT;
|
|
|