mirror of
https://github.com/GOSTSec/ccminer
synced 2025-03-09 12:01:09 +00:00
xmr: stabilize the final kernel
This commit is contained in:
parent
066a569357
commit
c1f1ad9280
@ -274,6 +274,12 @@
|
||||
<CudaCompile Include="crypto\cuda_cryptonight_extra.cu">
|
||||
<MaxRegCount>255</MaxRegCount>
|
||||
</CudaCompile>
|
||||
<ClInclude Include="crypto\cn_aes.cuh" />
|
||||
<ClInclude Include="crypto\cn_blake.cuh" />
|
||||
<ClInclude Include="crypto\cn_groestl.cuh" />
|
||||
<ClInclude Include="crypto\cn_jh.cuh" />
|
||||
<ClInclude Include="crypto\cn_keccak.cuh" />
|
||||
<ClInclude Include="crypto\cn_skein.cuh" />
|
||||
<CudaCompile Include="crypto\wildkeccak.cu">
|
||||
<MaxRegCount>128</MaxRegCount>
|
||||
</CudaCompile>
|
||||
|
@ -97,6 +97,9 @@
|
||||
<Filter Include="Source Files\crypto\bbr">
|
||||
<UniqueIdentifier>{af387eac-e9e6-4e91-a5e8-637b1e7a8d93}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="Source Files\CUDA\xmr">
|
||||
<UniqueIdentifier>{0f9aec5e-5409-488f-992a-2c108590d1ac}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="compat\jansson\dump.c">
|
||||
@ -515,15 +518,33 @@
|
||||
<ClInclude Include="crypto\xmr-rpc.h">
|
||||
<Filter>Source Files\crypto</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="crypto\cryptonight.h">
|
||||
<Filter>Source Files\crypto\xmr</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="crypto\mman.h">
|
||||
<Filter>Source Files\crypto\bbr</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="crypto\wildkeccak.h">
|
||||
<Filter>Source Files\crypto\bbr</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="crypto\cn_aes.cuh">
|
||||
<Filter>Source Files\CUDA\xmr</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="crypto\cn_blake.cuh">
|
||||
<Filter>Source Files\CUDA\xmr</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="crypto\cn_groestl.cuh">
|
||||
<Filter>Source Files\CUDA\xmr</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="crypto\cn_jh.cuh">
|
||||
<Filter>Source Files\CUDA\xmr</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="crypto\cn_keccak.cuh">
|
||||
<Filter>Source Files\CUDA\xmr</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="crypto\cn_skein.cuh">
|
||||
<Filter>Source Files\CUDA\xmr</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="crypto\cryptonight.h">
|
||||
<Filter>Source Files\CUDA\xmr</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CudaCompile Include="cuda.cpp">
|
||||
@ -802,17 +823,17 @@
|
||||
<CudaCompile Include="sia\sia.cu">
|
||||
<Filter>Source Files\sia</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="crypto\wildkeccak.cu">
|
||||
<Filter>Source Files\crypto</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="crypto\cryptonight.cu">
|
||||
<Filter>Source Files\crypto</Filter>
|
||||
<Filter>Source Files\CUDA\xmr</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="crypto\cuda_cryptonight_core.cu">
|
||||
<Filter>Source Files\crypto</Filter>
|
||||
<Filter>Source Files\CUDA\xmr</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="crypto\cuda_cryptonight_extra.cu">
|
||||
<Filter>Source Files\crypto</Filter>
|
||||
<Filter>Source Files\CUDA\xmr</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="crypto\wildkeccak.cu">
|
||||
<Filter>Source Files\CUDA</Filter>
|
||||
</CudaCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
|
@ -181,7 +181,7 @@ void cn_jh_F8(jhHashState *state)
|
||||
}
|
||||
|
||||
__device__
|
||||
void cn_jh_update(jhHashState * __restrict__ state, const BitSequence * __restrict__ data, DataLength databitlen)
|
||||
void cn_jh_update(jhHashState * __restrict__ state, const uint8_t * __restrict__ data, DataLength databitlen)
|
||||
{
|
||||
DataLength index;
|
||||
|
||||
@ -222,7 +222,7 @@ void cn_jh_update(jhHashState * __restrict__ state, const BitSequence * __restri
|
||||
|
||||
/* pad the message, process the padded block(s), truncate the hash value H to obtain the message digest */
|
||||
__device__
|
||||
void cn_jh_final(jhHashState * __restrict__ state, BitSequence * __restrict__ hashval)
|
||||
void cn_jh_final(jhHashState * __restrict__ state, uint32_t * __restrict__ hashval)
|
||||
{
|
||||
unsigned int i;
|
||||
//uint32_t *bufptr = (uint32_t *)state->buffer;
|
||||
@ -268,7 +268,7 @@ void cn_jh_final(jhHashState * __restrict__ state, BitSequence * __restrict__ ha
|
||||
cn_jh_F8(state);
|
||||
}
|
||||
|
||||
memcpy(hashval, (unsigned char*)state->x+64+32, 32);
|
||||
MEMCPY4(hashval, ((unsigned char*)state->x) + 64 + 32, 8);
|
||||
}
|
||||
|
||||
__device__
|
||||
@ -277,13 +277,14 @@ void cn_jh_init(jhHashState *state, int hashbitlen)
|
||||
state->databitlen = 0;
|
||||
state->datasize_in_buffer = 0;
|
||||
state->hashbitlen = hashbitlen;
|
||||
memcpy(state->x, d_JH256_H0, 128);
|
||||
//memcpy(state->x, d_JH256_H0, 128);
|
||||
MEMCPY8(state->x, d_JH256_H0, 128 / 8);
|
||||
}
|
||||
|
||||
__device__
|
||||
void cn_jh(const BitSequence * __restrict__ data, DataLength len, BitSequence * __restrict__ hashval)
|
||||
void cn_jh256(const uint8_t * __restrict__ data, DataLength len, uint32_t * __restrict__ hashval)
|
||||
{
|
||||
int hashbitlen = 256;
|
||||
const int hashbitlen = 256;
|
||||
DataLength databitlen = len << 3;
|
||||
jhHashState state;
|
||||
|
||||
|
@ -109,24 +109,17 @@ typedef struct {
|
||||
uint8_t b[SKEIN_512_BLOCK_BYTES];
|
||||
} Skein_512_Ctxt_t;
|
||||
|
||||
typedef struct {
|
||||
Skein_Ctxt_Hdr_t h;
|
||||
uint64_t X[SKEIN1024_STATE_WORDS];
|
||||
uint8_t b[SKEIN1024_BLOCK_BYTES];
|
||||
} Skein1024_Ctxt_t;
|
||||
|
||||
typedef struct {
|
||||
uint_t statebits;
|
||||
union {
|
||||
Skein_Ctxt_Hdr_t h;
|
||||
Skein_256_Ctxt_t ctx_256;
|
||||
Skein_512_Ctxt_t ctx_512;
|
||||
Skein1024_Ctxt_t ctx1024;
|
||||
} u;
|
||||
} skeinHashState;
|
||||
|
||||
__device__
|
||||
void cn_skein_init(skeinHashState *state, size_t hashBitLen)
|
||||
void cn_skein256_init(skeinHashState *state, size_t hashBitLen)
|
||||
{
|
||||
const uint64_t SKEIN_512_IV_256[] =
|
||||
{
|
||||
@ -150,7 +143,7 @@ void cn_skein_init(skeinHashState *state, size_t hashBitLen)
|
||||
}
|
||||
|
||||
__device__
|
||||
void cn_skein512_processblock(Skein_512_Ctxt_t * __restrict__ ctx, const uint8_t * __restrict__ blkPtr, size_t blkCnt, size_t byteCntAdd)
|
||||
void cn_skein_processblock(Skein_512_Ctxt_t * __restrict__ ctx, const uint8_t * __restrict__ blkPtr, size_t blkCnt, size_t byteCntAdd)
|
||||
{
|
||||
enum {
|
||||
R_512_0_0=46, R_512_0_1=36, R_512_0_2=19, R_512_0_3=37,
|
||||
@ -226,51 +219,7 @@ void cn_skein512_processblock(Skein_512_Ctxt_t * __restrict__ ctx, const uint8_t
|
||||
}
|
||||
|
||||
__device__
|
||||
void cn_skein_final(skeinHashState * __restrict__ state, uint8_t * __restrict__ hashVal)
|
||||
{
|
||||
size_t i,n,byteCnt;
|
||||
uint64_t X[SKEIN_512_STATE_WORDS];
|
||||
Skein_512_Ctxt_t *ctx = (Skein_512_Ctxt_t *)&state->u.ctx_512;
|
||||
//size_t tmp;
|
||||
//uint8_t *p8;
|
||||
//uint64_t *p64;
|
||||
|
||||
ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;
|
||||
|
||||
if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) {
|
||||
|
||||
memset(&ctx->b[ctx->h.bCnt],0,SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
|
||||
//p8 = &ctx->b[ctx->h.bCnt];
|
||||
//tmp = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt;
|
||||
//for( i = 0; i < tmp; i++ ) *(p8+i) = 0;
|
||||
}
|
||||
|
||||
cn_skein512_processblock(ctx,ctx->b,1,ctx->h.bCnt);
|
||||
|
||||
byteCnt = (ctx->h.hashBitLen + 7) >> 3;
|
||||
|
||||
//uint8_t b[SKEIN_512_BLOCK_BYTES] == 64
|
||||
memset(ctx->b,0,sizeof(ctx->b));
|
||||
//p64 = (uint64_t *)ctx->b;
|
||||
//for( i = 0; i < 8; i++ ) *(p64+i) = 0;
|
||||
|
||||
memcpy(X,ctx->X,sizeof(X));
|
||||
|
||||
for (i=0;i*SKEIN_512_BLOCK_BYTES < byteCnt;i++) {
|
||||
|
||||
((uint64_t *)ctx->b)[0]= (uint64_t)i;
|
||||
Skein_Start_New_Type(ctx,OUT_FINAL);
|
||||
cn_skein512_processblock(ctx,ctx->b,1,sizeof(uint64_t));
|
||||
n = byteCnt - i*SKEIN_512_BLOCK_BYTES;
|
||||
if (n >= SKEIN_512_BLOCK_BYTES)
|
||||
n = SKEIN_512_BLOCK_BYTES;
|
||||
memcpy(hashVal+i*SKEIN_512_BLOCK_BYTES,ctx->X,n);
|
||||
memcpy(ctx->X,X,sizeof(X)); /* restore the counter mode key for next time */
|
||||
}
|
||||
}
|
||||
|
||||
__device__
|
||||
void cn_skein512_update(Skein_512_Ctxt_t * __restrict__ ctx, const uint8_t * __restrict__ msg, size_t msgByteCnt)
|
||||
void cn_skein_block(Skein_512_Ctxt_t * __restrict__ ctx, const uint8_t * __restrict__ msg, size_t msgByteCnt)
|
||||
{
|
||||
size_t n;
|
||||
|
||||
@ -288,14 +237,14 @@ void cn_skein512_update(Skein_512_Ctxt_t * __restrict__ ctx, const uint8_t * __r
|
||||
ctx->h.bCnt += n;
|
||||
}
|
||||
|
||||
cn_skein512_processblock(ctx,ctx->b,1,SKEIN_512_BLOCK_BYTES);
|
||||
cn_skein_processblock(ctx, ctx->b, 1, SKEIN_512_BLOCK_BYTES);
|
||||
ctx->h.bCnt = 0;
|
||||
}
|
||||
|
||||
if (msgByteCnt > SKEIN_512_BLOCK_BYTES) {
|
||||
|
||||
n = (msgByteCnt-1) / SKEIN_512_BLOCK_BYTES;
|
||||
cn_skein512_processblock(ctx,msg,n,SKEIN_512_BLOCK_BYTES);
|
||||
n = (msgByteCnt - 1) / SKEIN_512_BLOCK_BYTES;
|
||||
cn_skein_processblock(ctx, msg, n, SKEIN_512_BLOCK_BYTES);
|
||||
msgByteCnt -= n * SKEIN_512_BLOCK_BYTES;
|
||||
msg += n * SKEIN_512_BLOCK_BYTES;
|
||||
}
|
||||
@ -309,11 +258,11 @@ void cn_skein512_update(Skein_512_Ctxt_t * __restrict__ ctx, const uint8_t * __r
|
||||
}
|
||||
|
||||
__device__
|
||||
void cn_skein_update(skeinHashState * __restrict__ state, const BitSequence * __restrict__ data, DataLength databitlen)
|
||||
void cn_skein256_update(skeinHashState * __restrict__ state, const uint8_t * __restrict__ data, DataLength databitlen)
|
||||
{
|
||||
if ((databitlen & 7) == 0) {
|
||||
|
||||
cn_skein512_update(&state->u.ctx_512,data,databitlen >> 3);
|
||||
cn_skein_block(&state->u.ctx_512, data, databitlen >> 3);
|
||||
}
|
||||
else {
|
||||
|
||||
@ -323,15 +272,46 @@ void cn_skein_update(skeinHashState * __restrict__ state, const BitSequence * __
|
||||
mask = (uint8_t) (1u << (7 - (databitlen & 7)));
|
||||
b = (uint8_t) ((data[bCnt-1] & (0-mask)) | mask);
|
||||
|
||||
cn_skein512_update(&state->u.ctx_512, data, bCnt-1);
|
||||
cn_skein512_update(&state->u.ctx_512, &b, 1);
|
||||
cn_skein_block(&state->u.ctx_512, data, bCnt - 1);
|
||||
cn_skein_block(&state->u.ctx_512, &b, 1);
|
||||
|
||||
Skein_Set_Bit_Pad_Flag(state->u.h);
|
||||
}
|
||||
}
|
||||
|
||||
__device__
|
||||
void cn_skein(const BitSequence * __restrict__ data, DataLength len, BitSequence * __restrict__ hashval)
|
||||
void cn_skein256_final(skeinHashState * __restrict__ state, uint32_t * __restrict__ hashVal)
|
||||
{
|
||||
uint64_t X[SKEIN_512_STATE_WORDS];
|
||||
Skein_512_Ctxt_t *ctx = (Skein_512_Ctxt_t *)&state->u.ctx_512;
|
||||
const int byteCnt = (ctx->h.hashBitLen + 7) >> 3;
|
||||
|
||||
ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;
|
||||
|
||||
if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES)
|
||||
{
|
||||
memset(&ctx->b[ctx->h.bCnt], 0, SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
|
||||
}
|
||||
|
||||
cn_skein_processblock(ctx, ctx->b, 1, ctx->h.bCnt);
|
||||
|
||||
memset(ctx->b, 0, sizeof(ctx->b));
|
||||
memcpy(X, ctx->X, sizeof(X));
|
||||
|
||||
for (int i = 0; i*SKEIN_512_BLOCK_BYTES < byteCnt; i++)
|
||||
{
|
||||
int n = byteCnt - i*SKEIN_512_BLOCK_BYTES;
|
||||
if (n > SKEIN_512_BLOCK_BYTES) n = SKEIN_512_BLOCK_BYTES;
|
||||
((uint64_t *)ctx->b)[0] = (uint64_t)i;
|
||||
Skein_Start_New_Type(ctx, OUT_FINAL);
|
||||
cn_skein_processblock(ctx, ctx->b, 1, sizeof(uint64_t));
|
||||
memcpy(hashVal + (i*SKEIN_512_BLOCK_BYTES/sizeof(uint32_t)), ctx->X, n);
|
||||
memcpy(ctx->X, X, sizeof(X)); // restore the counter mode key for next time
|
||||
}
|
||||
}
|
||||
|
||||
__device__
|
||||
void cn_skein(const uint8_t * __restrict__ data, DataLength len, uint32_t * __restrict__ hashval)
|
||||
{
|
||||
int hashbitlen = 256;
|
||||
DataLength databitlen = len << 3;
|
||||
@ -339,7 +319,7 @@ void cn_skein(const BitSequence * __restrict__ data, DataLength len, BitSequence
|
||||
|
||||
state.statebits = 64*SKEIN_512_STATE_WORDS;
|
||||
|
||||
cn_skein_init(&state, hashbitlen);
|
||||
cn_skein_update(&state, data, databitlen);
|
||||
cn_skein_final(&state, hashval);
|
||||
cn_skein256_init(&state, hashbitlen);
|
||||
cn_skein256_update(&state, data, databitlen);
|
||||
cn_skein256_final(&state, hashval);
|
||||
}
|
||||
|
@ -214,6 +214,7 @@ static void cryptonight_hash_ctx(void* output, const void* input, size_t len, st
|
||||
|
||||
int extra_algo = ctx->state.hs.b[0] & 3;
|
||||
extra_hashes[extra_algo](&ctx->state, 200, output);
|
||||
if (opt_debug) applog(LOG_DEBUG, "extra algo=%d", extra_algo);
|
||||
|
||||
oaes_free((OAES_CTX **) &ctx->aes_ctx);
|
||||
}
|
||||
|
@ -86,7 +86,7 @@ extern "C" int scanhash_cryptonight(int thr_id, struct work* work, uint32_t max_
|
||||
init[thr_id] = true;
|
||||
}
|
||||
|
||||
throughput = cuda_default_throughput(thr_id, cn_blocks*cn_blocks);
|
||||
throughput = cuda_default_throughput(thr_id, cn_blocks*cn_threads);
|
||||
|
||||
do
|
||||
{
|
||||
@ -144,7 +144,7 @@ extern "C" int scanhash_cryptonight(int thr_id, struct work* work, uint32_t max_
|
||||
|
||||
done:
|
||||
gpulog(LOG_DEBUG, thr_id, "nonce %08x exit", nonce);
|
||||
|
||||
work->valid_nonces = res;
|
||||
*nonceptr = nonce;
|
||||
return res;
|
||||
}
|
||||
|
@ -11,6 +11,7 @@ struct uint3 {
|
||||
struct uint3 threadIdx;
|
||||
struct uint3 blockIdx;
|
||||
struct uint3 blockDim;
|
||||
#define atomicExch(p,y) (*p) = y
|
||||
#define __funnelshift_r(a,b,c) 1
|
||||
#define __syncthreads()
|
||||
#define asm(x)
|
||||
@ -143,10 +144,6 @@ static inline void exit_if_cudaerror(int thr_id, const char *src, int line)
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
void hash_permutation(union hash_state *state);
|
||||
void hash_process(union hash_state *state, const uint8_t *buf, size_t count);
|
||||
|
||||
void cryptonight_core_cpu_hash(int thr_id, int blocks, int threads, uint32_t *d_long_state, uint32_t *d_ctx_state, uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2);
|
||||
|
||||
void cryptonight_extra_cpu_setData(int thr_id, const void *data, const void *pTargetIn);
|
||||
|
@ -110,7 +110,7 @@ void cryptonight_extra_gpu_prepare(const uint32_t threads, uint32_t * __restrict
|
||||
}
|
||||
|
||||
__global__
|
||||
void cryptonight_extra_gpu_keccakf2(uint32_t threads, uint32_t * __restrict__ d_ctx_state)
|
||||
void cryptonight_extra_gpu_keccak(uint32_t threads, uint32_t * __restrict__ d_ctx_state)
|
||||
{
|
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if(thread < threads)
|
||||
@ -123,43 +123,59 @@ void cryptonight_extra_gpu_keccakf2(uint32_t threads, uint32_t * __restrict__ d_
|
||||
|
||||
cn_keccakf2(state);
|
||||
|
||||
#pragma unroll
|
||||
for(int i = 0; i < 25; i++)
|
||||
ctx_state[i] = state[i];
|
||||
// to reduce the final kernel stack frame, cut algos in 2 kernels
|
||||
// ps: these 2 final kernels are not important for the overall xmr hashrate (< 1%)
|
||||
switch (((uint8_t*)state)[0] & 0x03)
|
||||
{
|
||||
case 0: {
|
||||
uint32_t hash[8];
|
||||
cn_blake((uint8_t*)state, 200, (uint8_t*)hash);
|
||||
((uint32_t*)ctx_state)[0] = 0;
|
||||
((uint32_t*)ctx_state)[6] = hash[6];
|
||||
((uint32_t*)ctx_state)[7] = hash[7];
|
||||
break;
|
||||
}
|
||||
case 1: {
|
||||
uint32_t hash[8];
|
||||
cn_groestl((BitSequence*)state, 200, (BitSequence*)hash);
|
||||
((uint32_t*)ctx_state)[0] = 0;
|
||||
((uint32_t*)ctx_state)[6] = hash[6];
|
||||
((uint32_t*)ctx_state)[7] = hash[7];
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
#pragma unroll
|
||||
for(int i = 0; i < 25; i++)
|
||||
ctx_state[i] = state[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__
|
||||
void cryptonight_extra_gpu_nonces(uint32_t threads, const uint32_t startNonce, const uint32_t * __restrict__ d_ctx_state,
|
||||
void cryptonight_extra_gpu_final(uint32_t threads, const uint32_t startNonce, uint64_t * __restrict__ d_ctx_state,
|
||||
const uint32_t* d_target, uint32_t * resNonces)
|
||||
{
|
||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
if(thread < threads)
|
||||
{
|
||||
uint64_t* ctx_state = (uint64_t*) (&d_ctx_state[thread * 50]);
|
||||
uint64_t state[25];
|
||||
#pragma unroll
|
||||
for(int i = 0; i < 25; i++)
|
||||
state[i] = ctx_state[i];
|
||||
uint64_t* const state = &d_ctx_state[thread * 25];
|
||||
|
||||
uint32_t hash[8];
|
||||
switch(((uint8_t *)state)[0] & 0x03)
|
||||
{
|
||||
case 0: {
|
||||
cn_blake((uint8_t*)state, 200, (uint8_t*)hash);
|
||||
break;
|
||||
}
|
||||
case 1: {
|
||||
cn_groestl((BitSequence*)state, 200, (BitSequence*)hash);
|
||||
uint32_t* h32 = (uint32_t*)state;
|
||||
hash[6] = h32[6];
|
||||
hash[7] = h32[7];
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
// to double check..
|
||||
cn_jh((BitSequence*)state, 200, (BitSequence*)hash);
|
||||
cn_jh256((uint8_t*)state, 200, hash);
|
||||
break;
|
||||
}
|
||||
case 3: {
|
||||
cn_skein((BitSequence*)state, 200, (BitSequence*)hash);
|
||||
cn_skein((uint8_t*)state, 200, hash);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -195,7 +211,7 @@ void cryptonight_extra_cpu_init(int thr_id, uint32_t threads)
|
||||
__host__
|
||||
void cryptonight_extra_cpu_prepare(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_ctx_state, uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2)
|
||||
{
|
||||
int threadsperblock = 128;
|
||||
uint32_t threadsperblock = 128;
|
||||
|
||||
dim3 grid((threads + threadsperblock - 1) / threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
@ -207,16 +223,16 @@ void cryptonight_extra_cpu_prepare(int thr_id, uint32_t threads, uint32_t startN
|
||||
__host__
|
||||
void cryptonight_extra_cpu_final(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *resnonce, uint32_t *d_ctx_state)
|
||||
{
|
||||
int threadsperblock = 128;
|
||||
uint32_t threadsperblock = 128;
|
||||
|
||||
dim3 grid((threads + threadsperblock - 1) / threadsperblock);
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
cudaMemset(d_result[thr_id], 0xFF, 2*sizeof(uint32_t));
|
||||
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
|
||||
cryptonight_extra_gpu_keccakf2 <<<grid, block >>> (threads, d_ctx_state);
|
||||
cryptonight_extra_gpu_keccak <<<grid, block >>> (threads, d_ctx_state);
|
||||
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
|
||||
cryptonight_extra_gpu_nonces <<<grid, block >>> (threads, startNonce, d_ctx_state, d_target[thr_id], d_result[thr_id]);
|
||||
cryptonight_extra_gpu_final <<<grid, block >>> (threads, startNonce, (uint64_t*)d_ctx_state, d_target[thr_id], d_result[thr_id]);
|
||||
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
|
||||
cudaMemcpy(resnonce, d_result[thr_id], 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
||||
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
|
||||
|
Loading…
x
Reference in New Issue
Block a user