Browse Source

correct gostcoin hash calculation

2upstream
orignal 7 years ago
parent
commit
d7bd5f9f3d
  1. 20
      gost/cuda_gosthash.cu
  2. 21
      gost/gost.cu

20
gost/cuda_gosthash.cu

@ -577,6 +577,14 @@ void GOST_Copy512(uint64_t* dst, uint64_t* const __restrict__ src) @@ -577,6 +577,14 @@ void GOST_Copy512(uint64_t* dst, uint64_t* const __restrict__ src)
dst[i] = src[i];
}
__device__ __forceinline__
void GOST_Copy256(uint64_t* dst, uint64_t* const __restrict__ src)
{
#pragma unroll
for (int i=0; i<4; i++)
dst[i] = src[i];
}
__device__ __forceinline__
void GOST_Xor512(uint64_t* C, uint64_t* const A, const uint64_t* B)
{
@ -1058,21 +1066,21 @@ void GOST_hash_X(uint64_t *hash, uchar * const message, uint64_t len) @@ -1058,21 +1066,21 @@ void GOST_hash_X(uint64_t *hash, uchar * const message, uint64_t len)
__global__
__launch_bounds__(128, 3)
void streebog_gpu_hash_64(uint32_t threads, uint64_t *g_hash)
void streebog_gpu_hash_64(uint32_t threads, uint64_t *g_hash) // 80 bytes input
{
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
uint64_t* inout = (&g_hash[thread * 8U]);
uint64_t hash[8] = { 0 }; //iv
GOST_hash_X(hash, (uchar*) inout, 512);
GOST_hash_X(hash, (uchar*) inout, 640);
GOST_Copy512(inout, hash);
}
}
__global__
__launch_bounds__(128, 3)
void streebog_gpu_hash_32(uint32_t threads, uint64_t *g_hash)
void streebog_gpu_hash_32(uint32_t threads, uint64_t *g_hash) // 64 bytes input
{
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
@ -1081,12 +1089,12 @@ void streebog_gpu_hash_32(uint32_t threads, uint64_t *g_hash) @@ -1081,12 +1089,12 @@ void streebog_gpu_hash_32(uint32_t threads, uint64_t *g_hash)
uint64_t hash[8];
memset (&hash, 1, 64); // iv
GOST_hash_X(hash, (uchar*) inout, 512);
GOST_Copy512(inout, hash);
GOST_Copy256(inout, hash);
}
}
__host__
void gost_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash)
void gost_hash_64(int thr_id, uint32_t threads, uint32_t *d_hash)
{
const int threadsperblock = 128;
dim3 grid((threads + threadsperblock-1) / threadsperblock);
@ -1096,7 +1104,7 @@ void gost_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t * @@ -1096,7 +1104,7 @@ void gost_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *
}
__host__
void gost_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash)
void gost_hash_32(int thr_id, uint32_t threads, uint32_t *d_hash)
{
const int threadsperblock = 128;
dim3 grid((threads + threadsperblock-1) / threadsperblock);

21
gost/gost.cu

@ -32,8 +32,8 @@ extern "C" void gosthash(void *output, const void *input) @@ -32,8 +32,8 @@ extern "C" void gosthash(void *output, const void *input)
memcpy(output, hash, 32);
}
extern void gost_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash);
extern void gost_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash);
extern void gost_hash_64(int thr_id, uint32_t threads, uint32_t *d_hash);
extern void gost_hash_32(int thr_id, uint32_t threads, uint32_t *d_hash);
//#define _DEBUG
#define _DEBUG_PREFIX "sib"
@ -66,24 +66,23 @@ extern "C" int scanhash_gost(int thr_id, struct work* work, uint32_t max_nonce, @@ -66,24 +66,23 @@ extern "C" int scanhash_gost(int thr_id, struct work* work, uint32_t max_nonce,
}
gpulog(LOG_INFO, thr_id, "Intensity set to %g, %u cuda threads", throughput2intensity(throughput), throughput);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), -1);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 80 * throughput), -1);
cuda_check_cpu_init(thr_id, throughput);
init[thr_id] = true;
}
uint32_t endiandata[20];
for (int k=0; k < 20; k++)
be32enc(&endiandata[k], pdata[k]);
be32enc(&d_hash[thr_id][k], pdata[k]);
do {
int order = 0;
// Hash with CUDA
gost_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id]);
gost_hash_64(thr_id, throughput, d_hash[thr_id]);
TRACE("gost64 :");
gost_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id]);
gost_hash_32(thr_id, throughput, d_hash[thr_id]);
TRACE("gost32 :");
work->nonces[0] = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
@ -91,8 +90,8 @@ extern "C" int scanhash_gost(int thr_id, struct work* work, uint32_t max_nonce, @@ -91,8 +90,8 @@ extern "C" int scanhash_gost(int thr_id, struct work* work, uint32_t max_nonce,
{
const uint32_t Htarg = ptarget[7];
uint32_t _ALIGN(64) vhash[8];
be32enc(&endiandata[19], work->nonces[0]);
gosthash(vhash, endiandata);
be32enc(&d_hash[thr_id][19], work->nonces[0]);
gosthash(vhash, d_hash[thr_id]);
if (vhash[7] <= Htarg && fulltest(vhash, ptarget)) {
work->valid_nonces = 1;
@ -100,8 +99,8 @@ extern "C" int scanhash_gost(int thr_id, struct work* work, uint32_t max_nonce, @@ -100,8 +99,8 @@ extern "C" int scanhash_gost(int thr_id, struct work* work, uint32_t max_nonce,
work->nonces[1] =cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
*hashes_done = pdata[19] - first_nonce + throughput;
if (work->nonces[1] != 0) {
be32enc(&endiandata[19], work->nonces[1]);
sibhash(vhash, endiandata);
be32enc(&d_hash[thr_id][19], work->nonces[1]);
sibhash(vhash, d_hash[thr_id]);
bn_set_target_ratio(work, vhash, 1);
work->valid_nonces++;
pdata[19] = max(work->nonces[0], work->nonces[1]) + 1;

Loading…
Cancel
Save