You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
284 lines
5.5 KiB
284 lines
5.5 KiB
#!/usr/bin/env perl |
|
|
|
$flavour = shift; |
|
$output = shift; |
|
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } |
|
|
|
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/); |
|
|
|
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; |
|
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or |
|
( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or |
|
die "can't locate x86_64-xlate.pl"; |
|
|
|
open OUT,"| \"$^X\" $xlate $flavour $output"; |
|
*STDOUT=*OUT; |
|
|
|
($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order |
|
("%rdi","%rsi","%rdx","%rcx"); # Unix order |
|
|
|
print<<___; |
|
.extern OPENSSL_cpuid_setup |
|
.hidden OPENSSL_cpuid_setup |
|
.section .init |
|
call OPENSSL_cpuid_setup |
|
|
|
.hidden OPENSSL_ia32cap_P |
|
.comm OPENSSL_ia32cap_P,8,4 |
|
|
|
.text |
|
|
|
.globl OPENSSL_atomic_add |
|
.type OPENSSL_atomic_add,\@abi-omnipotent |
|
.align 16 |
|
OPENSSL_atomic_add: |
|
movl ($arg1),%eax |
|
.Lspin: leaq ($arg2,%rax),%r8 |
|
.byte 0xf0 # lock |
|
cmpxchgl %r8d,($arg1) |
|
jne .Lspin |
|
movl %r8d,%eax |
|
.byte 0x48,0x98 # cltq/cdqe |
|
ret |
|
.size OPENSSL_atomic_add,.-OPENSSL_atomic_add |
|
|
|
.globl OPENSSL_rdtsc |
|
.type OPENSSL_rdtsc,\@abi-omnipotent |
|
.align 16 |
|
OPENSSL_rdtsc: |
|
rdtsc |
|
shl \$32,%rdx |
|
or %rdx,%rax |
|
ret |
|
.size OPENSSL_rdtsc,.-OPENSSL_rdtsc |
|
|
|
.globl OPENSSL_ia32_cpuid |
|
.type OPENSSL_ia32_cpuid,\@abi-omnipotent |
|
.align 16 |
|
OPENSSL_ia32_cpuid: |
|
mov %rbx,%r8 # save %rbx |
|
|
|
xor %eax,%eax |
|
cpuid |
|
mov %eax,%r11d # max value for standard query level |
|
|
|
xor %eax,%eax |
|
cmp \$0x756e6547,%ebx # "Genu" |
|
setne %al |
|
mov %eax,%r9d |
|
cmp \$0x49656e69,%edx # "ineI" |
|
setne %al |
|
or %eax,%r9d |
|
cmp \$0x6c65746e,%ecx # "ntel" |
|
setne %al |
|
or %eax,%r9d # 0 indicates Intel CPU |
|
jz .Lintel |
|
|
|
cmp \$0x68747541,%ebx # "Auth" |
|
setne %al |
|
mov %eax,%r10d |
|
cmp \$0x69746E65,%edx # "enti" |
|
setne %al |
|
or %eax,%r10d |
|
cmp \$0x444D4163,%ecx # "cAMD" |
|
setne %al |
|
or %eax,%r10d # 0 indicates AMD CPU |
|
jnz .Lintel |
|
|
|
# AMD specific |
|
mov \$0x80000000,%eax |
|
cpuid |
|
cmp \$0x80000001,%eax |
|
jb .Lintel |
|
mov %eax,%r10d |
|
mov \$0x80000001,%eax |
|
cpuid |
|
or %ecx,%r9d |
|
and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11 |
|
|
|
cmp \$0x80000008,%r10d |
|
jb .Lintel |
|
|
|
mov \$0x80000008,%eax |
|
cpuid |
|
movzb %cl,%r10 # number of cores - 1 |
|
inc %r10 # number of cores |
|
|
|
mov \$1,%eax |
|
cpuid |
|
bt \$28,%edx # test hyper-threading bit |
|
jnc .Lgeneric |
|
shr \$16,%ebx # number of logical processors |
|
cmp %r10b,%bl |
|
ja .Lgeneric |
|
and \$0xefffffff,%edx # ~(1<<28) |
|
jmp .Lgeneric |
|
|
|
.Lintel: |
|
cmp \$4,%r11d |
|
mov \$-1,%r10d |
|
jb .Lnocacheinfo |
|
|
|
mov \$4,%eax |
|
mov \$0,%ecx # query L1D |
|
cpuid |
|
mov %eax,%r10d |
|
shr \$14,%r10d |
|
and \$0xfff,%r10d # number of cores -1 per L1D |
|
|
|
.Lnocacheinfo: |
|
mov \$1,%eax |
|
cpuid |
|
and \$0xbfefffff,%edx # force reserved bits to 0 |
|
cmp \$0,%r9d |
|
jne .Lnotintel |
|
or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs |
|
and \$15,%ah |
|
cmp \$15,%ah # examine Family ID |
|
jne .Lnotintel |
|
or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR |
|
.Lnotintel: |
|
bt \$28,%edx # test hyper-threading bit |
|
jnc .Lgeneric |
|
and \$0xefffffff,%edx # ~(1<<28) |
|
cmp \$0,%r10d |
|
je .Lgeneric |
|
|
|
or \$0x10000000,%edx # 1<<28 |
|
shr \$16,%ebx |
|
cmp \$1,%bl # see if cache is shared |
|
ja .Lgeneric |
|
and \$0xefffffff,%edx # ~(1<<28) |
|
.Lgeneric: |
|
and \$0x00000800,%r9d # isolate AMD XOP flag |
|
and \$0xfffff7ff,%ecx |
|
or %ecx,%r9d # merge AMD XOP flag |
|
|
|
mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx |
|
bt \$27,%r9d # check OSXSAVE bit |
|
jnc .Lclear_avx |
|
xor %ecx,%ecx # XCR0 |
|
.byte 0x0f,0x01,0xd0 # xgetbv |
|
and \$6,%eax # isolate XMM and YMM state support |
|
cmp \$6,%eax |
|
je .Ldone |
|
.Lclear_avx: |
|
mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) |
|
and %eax,%r9d # clear AVX, FMA and AMD XOP bits |
|
.Ldone: |
|
shl \$32,%r9 |
|
mov %r10d,%eax |
|
mov %r8,%rbx # restore %rbx |
|
or %r9,%rax |
|
ret |
|
.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid |
|
|
|
.globl OPENSSL_cleanse |
|
.type OPENSSL_cleanse,\@abi-omnipotent |
|
.align 16 |
|
OPENSSL_cleanse: |
|
xor %rax,%rax |
|
cmp \$15,$arg2 |
|
jae .Lot |
|
cmp \$0,$arg2 |
|
je .Lret |
|
.Little: |
|
mov %al,($arg1) |
|
sub \$1,$arg2 |
|
lea 1($arg1),$arg1 |
|
jnz .Little |
|
.Lret: |
|
ret |
|
.align 16 |
|
.Lot: |
|
test \$7,$arg1 |
|
jz .Laligned |
|
mov %al,($arg1) |
|
lea -1($arg2),$arg2 |
|
lea 1($arg1),$arg1 |
|
jmp .Lot |
|
.Laligned: |
|
mov %rax,($arg1) |
|
lea -8($arg2),$arg2 |
|
test \$-8,$arg2 |
|
lea 8($arg1),$arg1 |
|
jnz .Laligned |
|
cmp \$0,$arg2 |
|
jne .Little |
|
ret |
|
.size OPENSSL_cleanse,.-OPENSSL_cleanse |
|
___ |
|
|
|
print<<___ if (!$win64); |
|
.globl OPENSSL_wipe_cpu |
|
.type OPENSSL_wipe_cpu,\@abi-omnipotent |
|
.align 16 |
|
OPENSSL_wipe_cpu: |
|
pxor %xmm0,%xmm0 |
|
pxor %xmm1,%xmm1 |
|
pxor %xmm2,%xmm2 |
|
pxor %xmm3,%xmm3 |
|
pxor %xmm4,%xmm4 |
|
pxor %xmm5,%xmm5 |
|
pxor %xmm6,%xmm6 |
|
pxor %xmm7,%xmm7 |
|
pxor %xmm8,%xmm8 |
|
pxor %xmm9,%xmm9 |
|
pxor %xmm10,%xmm10 |
|
pxor %xmm11,%xmm11 |
|
pxor %xmm12,%xmm12 |
|
pxor %xmm13,%xmm13 |
|
pxor %xmm14,%xmm14 |
|
pxor %xmm15,%xmm15 |
|
xorq %rcx,%rcx |
|
xorq %rdx,%rdx |
|
xorq %rsi,%rsi |
|
xorq %rdi,%rdi |
|
xorq %r8,%r8 |
|
xorq %r9,%r9 |
|
xorq %r10,%r10 |
|
xorq %r11,%r11 |
|
leaq 8(%rsp),%rax |
|
ret |
|
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu |
|
___ |
|
print<<___ if ($win64); |
|
.globl OPENSSL_wipe_cpu |
|
.type OPENSSL_wipe_cpu,\@abi-omnipotent |
|
.align 16 |
|
OPENSSL_wipe_cpu: |
|
pxor %xmm0,%xmm0 |
|
pxor %xmm1,%xmm1 |
|
pxor %xmm2,%xmm2 |
|
pxor %xmm3,%xmm3 |
|
pxor %xmm4,%xmm4 |
|
pxor %xmm5,%xmm5 |
|
xorq %rcx,%rcx |
|
xorq %rdx,%rdx |
|
xorq %r8,%r8 |
|
xorq %r9,%r9 |
|
xorq %r10,%r10 |
|
xorq %r11,%r11 |
|
leaq 8(%rsp),%rax |
|
ret |
|
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu |
|
___ |
|
|
|
print<<___; |
|
.globl OPENSSL_ia32_rdrand |
|
.type OPENSSL_ia32_rdrand,\@abi-omnipotent |
|
.align 16 |
|
OPENSSL_ia32_rdrand: |
|
mov \$8,%ecx |
|
.Loop_rdrand: |
|
rdrand %rax |
|
jc .Lbreak_rdrand |
|
loop .Loop_rdrand |
|
.Lbreak_rdrand: |
|
cmp \$0,%rax |
|
cmove %rcx,%rax |
|
ret |
|
.size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand |
|
___ |
|
|
|
close STDOUT; # flush
|
|
|