mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-09 06:18:06 +00:00
Micro-optimisation in sha256_sse2 code courtesy of Guido Ascioti guido.ascioti@gmail.com
This commit is contained in:
parent
68c807d755
commit
d356f44d53
@ -67,12 +67,6 @@ int scanhash_sse2_32(int thr_id, const unsigned char *pmidstate,
|
||||
|
||||
work_restart[thr_id].restart = 0;
|
||||
|
||||
/* For debugging */
|
||||
union {
|
||||
__m128i m;
|
||||
uint32_t i[4];
|
||||
} mi;
|
||||
|
||||
/* Message expansion */
|
||||
memcpy(m_midstate, pmidstate, sizeof(m_midstate));
|
||||
memcpy(m_w, pdata, sizeof(m_w)); /* The 2nd half of the data */
|
||||
@ -102,17 +96,12 @@ int scanhash_sse2_32(int thr_id, const unsigned char *pmidstate,
|
||||
CalcSha256_x86 (m_4hash, m_4hash1, sha256_32init);
|
||||
|
||||
for (j = 0; j < 4; j++) {
|
||||
mi.m = m_4hash[7];
|
||||
if (unlikely(mi.i[j] == 0))
|
||||
break;
|
||||
}
|
||||
if (unlikely(((uint32_t *)&(m_4hash[7]))[j] == 0)) {
|
||||
/* We found a hit...so check it */
|
||||
/* Use the C version for a check... */
|
||||
|
||||
/* If j = true, we found a hit...so check it */
|
||||
/* Use the C version for a check... */
|
||||
if (unlikely(j != 4)) {
|
||||
for (i = 0; i < 8; i++) {
|
||||
mi.m = m_4hash[i];
|
||||
*(uint32_t *)&(phash)[i*4] = mi.i[j];
|
||||
*(uint32_t *)&(phash)[i<<2] = ((uint32_t *)&(m_4hash[i]))[j];
|
||||
}
|
||||
|
||||
if (fulltest(phash, ptarget)) {
|
||||
@ -120,6 +109,7 @@ int scanhash_sse2_32(int thr_id, const unsigned char *pmidstate,
|
||||
*nNonce_p = nonce + j;
|
||||
return nonce + j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nonce += 4;
|
||||
|
@ -1,4 +1,4 @@
|
||||
;; SHA-256 for X86 for Linux, based off of:
|
||||
;; SHA-256 for X86 for Linux, based off of:A
|
||||
|
||||
; (c) Ufasoft 2011 http://ufasoft.com mailto:support@ufasoft.com
|
||||
; Version 2011
|
||||
@ -15,30 +15,21 @@ BITS 32
|
||||
|
||||
; 0 = (1024 - 256) (mod (LAB_CALC_UNROLL*LAB_CALC_PARA*16))
|
||||
%define LAB_CALC_PARA 2
|
||||
%define LAB_CALC_UNROLL 8
|
||||
%define LAB_CALC_UNROLL 24
|
||||
|
||||
%define LAB_LOOP_UNROLL 8
|
||||
%define LAB_LOOP_UNROLL 64
|
||||
|
||||
extern sha256_consts_m128i
|
||||
|
||||
global CalcSha256_x86
|
||||
; CalcSha256 hash(ecx), data(edx), init([esp+4])
|
||||
CalcSha256_x86:
|
||||
push esi
|
||||
push edi
|
||||
mov init, [esp+12]
|
||||
|
||||
push ebx
|
||||
|
||||
LAB_NEXT_NONCE:
|
||||
|
||||
mov eax, 64*4 ; 256 - rcx is # of SHA-2 rounds
|
||||
mov ebx, 16*4 ; 64 - rax is where we expand to
|
||||
push esi
|
||||
push edi
|
||||
mov init, [esp+12]
|
||||
|
||||
LAB_SHA:
|
||||
push eax
|
||||
lea eax, qword [data+eax*4] ; + 1024
|
||||
lea edi, qword [data+ebx*4] ; + 256
|
||||
lea edi, qword [data+256] ; + 256
|
||||
|
||||
LAB_CALC:
|
||||
%macro lab_calc_blk 1
|
||||
@ -116,13 +107,6 @@ LAB_CALC:
|
||||
%assign i i+LAB_CALC_PARA
|
||||
%endrep
|
||||
|
||||
add edi, LAB_CALC_UNROLL*LAB_CALC_PARA*16
|
||||
cmp edi, eax
|
||||
jb LAB_CALC
|
||||
|
||||
pop eax
|
||||
mov ebx, 0
|
||||
|
||||
; Load the init values of the message into the hash.
|
||||
|
||||
movdqa xmm7, [init]
|
||||
@ -143,14 +127,14 @@ LAB_CALC:
|
||||
|
||||
pshufd xmm0, xmm0, 0 ; xmm0 == e
|
||||
|
||||
|
||||
LAB_LOOP:
|
||||
|
||||
;; T t1 = h + (Rotr32(e, 6) ^ Rotr32(e, 11) ^ Rotr32(e, 25)) + ((e & f) ^ AndNot(e, g)) + Expand32<T>(g_sha256_k[j]) + w[j]
|
||||
|
||||
%macro lab_loop_blk 0
|
||||
movdqa xmm6, [data+ebx*4]
|
||||
paddd xmm6, sha256_consts_m128i[ebx*4]
|
||||
add ebx, 4
|
||||
%macro lab_loop_blk 1
|
||||
movdqa xmm6, [data+%1]
|
||||
paddd xmm6, sha256_consts_m128i[%1]
|
||||
|
||||
paddd xmm6, [hash+2*16] ; +h
|
||||
|
||||
@ -217,68 +201,52 @@ LAB_LOOP:
|
||||
|
||||
%assign i 0
|
||||
%rep LAB_LOOP_UNROLL
|
||||
lab_loop_blk
|
||||
%assign i i+1
|
||||
lab_loop_blk i
|
||||
%assign i i+16
|
||||
%endrep
|
||||
|
||||
cmp ebx, eax
|
||||
jb LAB_LOOP
|
||||
|
||||
; Finished the 64 rounds, calculate hash and save
|
||||
|
||||
movdqa xmm1, [init]
|
||||
pshufd xmm2, xmm1, 0x55
|
||||
pshufd xmm6, xmm1, 0xAA
|
||||
movdqa [hash+3*16], xmm6
|
||||
pshufd xmm6, xmm1, 0xFF
|
||||
movdqa [hash+4*16], xmm6
|
||||
movdqa xmm1, [init+16]
|
||||
|
||||
pshufd xmm2, xmm1, 0xFF
|
||||
movdqa xmm6, [hash+2*16]
|
||||
paddd xmm2, xmm6
|
||||
movdqa [hash+7*16], xmm2
|
||||
|
||||
pshufd xmm2, xmm1, 0xAA
|
||||
movdqa xmm6, [hash+1*16]
|
||||
paddd xmm2, xmm6
|
||||
movdqa [hash+6*16], xmm2
|
||||
|
||||
pshufd xmm2, xmm1, 0x55
|
||||
movdqa xmm6, [hash+0*16]
|
||||
paddd xmm2, xmm6
|
||||
movdqa [hash+5*16], xmm2
|
||||
|
||||
pshufd xmm1, xmm1, 0
|
||||
|
||||
paddd xmm5, xmm2
|
||||
paddd xmm4, [hash+3*16]
|
||||
paddd xmm3, [hash+4*16]
|
||||
paddd xmm7, xmm1
|
||||
|
||||
movdqa xmm1, [init+4*4]
|
||||
pshufd xmm2, xmm1, 0x55
|
||||
pshufd xmm6, xmm1, 0xAA
|
||||
movdqa [hash+3*16], xmm6
|
||||
pshufd xmm6, xmm1, 0xFF
|
||||
movdqa [hash+4*16], xmm6
|
||||
pshufd xmm1, xmm1, 0
|
||||
|
||||
movdqa xmm6, [hash+0*16]
|
||||
paddd xmm2, xmm6
|
||||
movdqa [hash+0*16], xmm2
|
||||
|
||||
|
||||
movdqa xmm2, [hash+3*16]
|
||||
movdqa xmm6, [hash+1*16]
|
||||
paddd xmm2, xmm6
|
||||
movdqa [hash+1*16], xmm2
|
||||
|
||||
movdqa xmm2, [hash+4*16]
|
||||
movdqa xmm6, [hash+2*16]
|
||||
paddd xmm2, xmm6
|
||||
movdqa [hash+2*16], xmm2
|
||||
|
||||
paddd xmm0, xmm1
|
||||
movdqa [hash+4*16], xmm0
|
||||
|
||||
movdqa xmm1, [hash+0*16]
|
||||
movdqa xmm2, [hash+1*16]
|
||||
movdqa xmm6, [hash+2*16]
|
||||
movdqa xmm1, [init]
|
||||
|
||||
pshufd xmm2, xmm1, 0xFF
|
||||
paddd xmm3, xmm2
|
||||
movdqa [hash+3*16], xmm3
|
||||
|
||||
pshufd xmm2, xmm1, 0xAA
|
||||
paddd xmm4, xmm2
|
||||
movdqa [hash+2*16], xmm4
|
||||
|
||||
pshufd xmm2, xmm1, 0x55
|
||||
paddd xmm5, xmm2
|
||||
movdqa [hash+1*16], xmm5
|
||||
|
||||
pshufd xmm1, xmm1, 0
|
||||
paddd xmm7, xmm1
|
||||
movdqa [hash+0*16], xmm7
|
||||
movdqa [hash+1*16], xmm5
|
||||
movdqa [hash+2*16], xmm4
|
||||
movdqa [hash+3*16], xmm3
|
||||
movdqa [hash+4*16], xmm0
|
||||
movdqa [hash+5*16], xmm1
|
||||
movdqa [hash+6*16], xmm2
|
||||
movdqa [hash+7*16], xmm6
|
||||
|
||||
LAB_RET:
|
||||
pop ebx
|
||||
pop edi
|
||||
pop esi
|
||||
retn 4
|
||||
pop edi
|
||||
pop esi
|
||||
retn 4
|
||||
|
Loading…
Reference in New Issue
Block a user