mirror of
https://github.com/GOSTSec/sgminer
synced 2025-01-10 14:58:01 +00:00
Micro-optimisation in sha256_sse2 code courtesy of Guido Ascioti guido.ascioti@gmail.com
This commit is contained in:
parent
68c807d755
commit
d356f44d53
@ -67,12 +67,6 @@ int scanhash_sse2_32(int thr_id, const unsigned char *pmidstate,
|
|||||||
|
|
||||||
work_restart[thr_id].restart = 0;
|
work_restart[thr_id].restart = 0;
|
||||||
|
|
||||||
/* For debugging */
|
|
||||||
union {
|
|
||||||
__m128i m;
|
|
||||||
uint32_t i[4];
|
|
||||||
} mi;
|
|
||||||
|
|
||||||
/* Message expansion */
|
/* Message expansion */
|
||||||
memcpy(m_midstate, pmidstate, sizeof(m_midstate));
|
memcpy(m_midstate, pmidstate, sizeof(m_midstate));
|
||||||
memcpy(m_w, pdata, sizeof(m_w)); /* The 2nd half of the data */
|
memcpy(m_w, pdata, sizeof(m_w)); /* The 2nd half of the data */
|
||||||
@ -102,17 +96,12 @@ int scanhash_sse2_32(int thr_id, const unsigned char *pmidstate,
|
|||||||
CalcSha256_x86 (m_4hash, m_4hash1, sha256_32init);
|
CalcSha256_x86 (m_4hash, m_4hash1, sha256_32init);
|
||||||
|
|
||||||
for (j = 0; j < 4; j++) {
|
for (j = 0; j < 4; j++) {
|
||||||
mi.m = m_4hash[7];
|
if (unlikely(((uint32_t *)&(m_4hash[7]))[j] == 0)) {
|
||||||
if (unlikely(mi.i[j] == 0))
|
/* We found a hit...so check it */
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If j = true, we found a hit...so check it */
|
|
||||||
/* Use the C version for a check... */
|
/* Use the C version for a check... */
|
||||||
if (unlikely(j != 4)) {
|
|
||||||
for (i = 0; i < 8; i++) {
|
for (i = 0; i < 8; i++) {
|
||||||
mi.m = m_4hash[i];
|
*(uint32_t *)&(phash)[i<<2] = ((uint32_t *)&(m_4hash[i]))[j];
|
||||||
*(uint32_t *)&(phash)[i*4] = mi.i[j];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fulltest(phash, ptarget)) {
|
if (fulltest(phash, ptarget)) {
|
||||||
@ -121,6 +110,7 @@ int scanhash_sse2_32(int thr_id, const unsigned char *pmidstate,
|
|||||||
return nonce + j;
|
return nonce + j;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
nonce += 4;
|
nonce += 4;
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
;; SHA-256 for X86 for Linux, based off of:
|
;; SHA-256 for X86 for Linux, based off of:A
|
||||||
|
|
||||||
; (c) Ufasoft 2011 http://ufasoft.com mailto:support@ufasoft.com
|
; (c) Ufasoft 2011 http://ufasoft.com mailto:support@ufasoft.com
|
||||||
; Version 2011
|
; Version 2011
|
||||||
@ -15,9 +15,9 @@ BITS 32
|
|||||||
|
|
||||||
; 0 = (1024 - 256) (mod (LAB_CALC_UNROLL*LAB_CALC_PARA*16))
|
; 0 = (1024 - 256) (mod (LAB_CALC_UNROLL*LAB_CALC_PARA*16))
|
||||||
%define LAB_CALC_PARA 2
|
%define LAB_CALC_PARA 2
|
||||||
%define LAB_CALC_UNROLL 8
|
%define LAB_CALC_UNROLL 24
|
||||||
|
|
||||||
%define LAB_LOOP_UNROLL 8
|
%define LAB_LOOP_UNROLL 64
|
||||||
|
|
||||||
extern sha256_consts_m128i
|
extern sha256_consts_m128i
|
||||||
|
|
||||||
@ -28,17 +28,8 @@ CalcSha256_x86:
|
|||||||
push edi
|
push edi
|
||||||
mov init, [esp+12]
|
mov init, [esp+12]
|
||||||
|
|
||||||
push ebx
|
|
||||||
|
|
||||||
LAB_NEXT_NONCE:
|
|
||||||
|
|
||||||
mov eax, 64*4 ; 256 - rcx is # of SHA-2 rounds
|
|
||||||
mov ebx, 16*4 ; 64 - rax is where we expand to
|
|
||||||
|
|
||||||
LAB_SHA:
|
LAB_SHA:
|
||||||
push eax
|
lea edi, qword [data+256] ; + 256
|
||||||
lea eax, qword [data+eax*4] ; + 1024
|
|
||||||
lea edi, qword [data+ebx*4] ; + 256
|
|
||||||
|
|
||||||
LAB_CALC:
|
LAB_CALC:
|
||||||
%macro lab_calc_blk 1
|
%macro lab_calc_blk 1
|
||||||
@ -116,13 +107,6 @@ LAB_CALC:
|
|||||||
%assign i i+LAB_CALC_PARA
|
%assign i i+LAB_CALC_PARA
|
||||||
%endrep
|
%endrep
|
||||||
|
|
||||||
add edi, LAB_CALC_UNROLL*LAB_CALC_PARA*16
|
|
||||||
cmp edi, eax
|
|
||||||
jb LAB_CALC
|
|
||||||
|
|
||||||
pop eax
|
|
||||||
mov ebx, 0
|
|
||||||
|
|
||||||
; Load the init values of the message into the hash.
|
; Load the init values of the message into the hash.
|
||||||
|
|
||||||
movdqa xmm7, [init]
|
movdqa xmm7, [init]
|
||||||
@ -143,14 +127,14 @@ LAB_CALC:
|
|||||||
|
|
||||||
pshufd xmm0, xmm0, 0 ; xmm0 == e
|
pshufd xmm0, xmm0, 0 ; xmm0 == e
|
||||||
|
|
||||||
|
|
||||||
LAB_LOOP:
|
LAB_LOOP:
|
||||||
|
|
||||||
;; T t1 = h + (Rotr32(e, 6) ^ Rotr32(e, 11) ^ Rotr32(e, 25)) + ((e & f) ^ AndNot(e, g)) + Expand32<T>(g_sha256_k[j]) + w[j]
|
;; T t1 = h + (Rotr32(e, 6) ^ Rotr32(e, 11) ^ Rotr32(e, 25)) + ((e & f) ^ AndNot(e, g)) + Expand32<T>(g_sha256_k[j]) + w[j]
|
||||||
|
|
||||||
%macro lab_loop_blk 0
|
%macro lab_loop_blk 1
|
||||||
movdqa xmm6, [data+ebx*4]
|
movdqa xmm6, [data+%1]
|
||||||
paddd xmm6, sha256_consts_m128i[ebx*4]
|
paddd xmm6, sha256_consts_m128i[%1]
|
||||||
add ebx, 4
|
|
||||||
|
|
||||||
paddd xmm6, [hash+2*16] ; +h
|
paddd xmm6, [hash+2*16] ; +h
|
||||||
|
|
||||||
@ -217,68 +201,52 @@ LAB_LOOP:
|
|||||||
|
|
||||||
%assign i 0
|
%assign i 0
|
||||||
%rep LAB_LOOP_UNROLL
|
%rep LAB_LOOP_UNROLL
|
||||||
lab_loop_blk
|
lab_loop_blk i
|
||||||
%assign i i+1
|
%assign i i+16
|
||||||
%endrep
|
%endrep
|
||||||
|
|
||||||
cmp ebx, eax
|
|
||||||
jb LAB_LOOP
|
|
||||||
|
|
||||||
; Finished the 64 rounds, calculate hash and save
|
; Finished the 64 rounds, calculate hash and save
|
||||||
|
|
||||||
movdqa xmm1, [init]
|
movdqa xmm1, [init+16]
|
||||||
pshufd xmm2, xmm1, 0x55
|
|
||||||
pshufd xmm6, xmm1, 0xAA
|
|
||||||
movdqa [hash+3*16], xmm6
|
|
||||||
pshufd xmm6, xmm1, 0xFF
|
|
||||||
movdqa [hash+4*16], xmm6
|
|
||||||
pshufd xmm1, xmm1, 0
|
|
||||||
|
|
||||||
paddd xmm5, xmm2
|
pshufd xmm2, xmm1, 0xFF
|
||||||
paddd xmm4, [hash+3*16]
|
movdqa xmm6, [hash+2*16]
|
||||||
paddd xmm3, [hash+4*16]
|
|
||||||
paddd xmm7, xmm1
|
|
||||||
|
|
||||||
movdqa xmm1, [init+4*4]
|
|
||||||
pshufd xmm2, xmm1, 0x55
|
|
||||||
pshufd xmm6, xmm1, 0xAA
|
|
||||||
movdqa [hash+3*16], xmm6
|
|
||||||
pshufd xmm6, xmm1, 0xFF
|
|
||||||
movdqa [hash+4*16], xmm6
|
|
||||||
pshufd xmm1, xmm1, 0
|
|
||||||
|
|
||||||
movdqa xmm6, [hash+0*16]
|
|
||||||
paddd xmm2, xmm6
|
paddd xmm2, xmm6
|
||||||
movdqa [hash+0*16], xmm2
|
movdqa [hash+7*16], xmm2
|
||||||
|
|
||||||
|
pshufd xmm2, xmm1, 0xAA
|
||||||
movdqa xmm2, [hash+3*16]
|
|
||||||
movdqa xmm6, [hash+1*16]
|
movdqa xmm6, [hash+1*16]
|
||||||
paddd xmm2, xmm6
|
paddd xmm2, xmm6
|
||||||
movdqa [hash+1*16], xmm2
|
|
||||||
|
|
||||||
movdqa xmm2, [hash+4*16]
|
|
||||||
movdqa xmm6, [hash+2*16]
|
|
||||||
paddd xmm2, xmm6
|
|
||||||
movdqa [hash+2*16], xmm2
|
|
||||||
|
|
||||||
paddd xmm0, xmm1
|
|
||||||
|
|
||||||
movdqa xmm1, [hash+0*16]
|
|
||||||
movdqa xmm2, [hash+1*16]
|
|
||||||
movdqa xmm6, [hash+2*16]
|
|
||||||
|
|
||||||
movdqa [hash+0*16], xmm7
|
|
||||||
movdqa [hash+1*16], xmm5
|
|
||||||
movdqa [hash+2*16], xmm4
|
|
||||||
movdqa [hash+3*16], xmm3
|
|
||||||
movdqa [hash+4*16], xmm0
|
|
||||||
movdqa [hash+5*16], xmm1
|
|
||||||
movdqa [hash+6*16], xmm2
|
movdqa [hash+6*16], xmm2
|
||||||
movdqa [hash+7*16], xmm6
|
|
||||||
|
pshufd xmm2, xmm1, 0x55
|
||||||
|
movdqa xmm6, [hash+0*16]
|
||||||
|
paddd xmm2, xmm6
|
||||||
|
movdqa [hash+5*16], xmm2
|
||||||
|
|
||||||
|
pshufd xmm1, xmm1, 0
|
||||||
|
paddd xmm0, xmm1
|
||||||
|
movdqa [hash+4*16], xmm0
|
||||||
|
|
||||||
|
movdqa xmm1, [init]
|
||||||
|
|
||||||
|
pshufd xmm2, xmm1, 0xFF
|
||||||
|
paddd xmm3, xmm2
|
||||||
|
movdqa [hash+3*16], xmm3
|
||||||
|
|
||||||
|
pshufd xmm2, xmm1, 0xAA
|
||||||
|
paddd xmm4, xmm2
|
||||||
|
movdqa [hash+2*16], xmm4
|
||||||
|
|
||||||
|
pshufd xmm2, xmm1, 0x55
|
||||||
|
paddd xmm5, xmm2
|
||||||
|
movdqa [hash+1*16], xmm5
|
||||||
|
|
||||||
|
pshufd xmm1, xmm1, 0
|
||||||
|
paddd xmm7, xmm1
|
||||||
|
movdqa [hash+0*16], xmm7
|
||||||
|
|
||||||
LAB_RET:
|
LAB_RET:
|
||||||
pop ebx
|
|
||||||
pop edi
|
pop edi
|
||||||
pop esi
|
pop esi
|
||||||
retn 4
|
retn 4
|
||||||
|
Loading…
Reference in New Issue
Block a user