Browse Source

gostd_hash

2upstream
orignal 7 years ago
parent
commit
449aeb2282
  1. 78
      gost/cuda_gosthash.cu

78
gost/cuda_gosthash.cu

@ -14,6 +14,10 @@ @@ -14,6 +14,10 @@
typedef unsigned char uchar;
static uint32_t* d_resNonces[MAX_GPUS] = { 0 };
__constant__ static uint32_t __align__(8) c_header[19];
__constant__ static uint32_t __align__(8) c_target[2]; // up to 64 bits
//#define FULL_UNROLL
#define memcpy(dst, src, len) { \
@ -1093,6 +1097,37 @@ void streebog_gpu_hash_32(uint32_t threads, uint64_t *g_hash) // 64 bytes input @@ -1093,6 +1097,37 @@ void streebog_gpu_hash_32(uint32_t threads, uint64_t *g_hash) // 64 bytes input
}
}
__global__
/*__launch_bounds__(256,3)*/
void gostd_gpu_hash_80(const uint32_t threads, const uint32_t startNonce, uint32_t *resNonces)
{
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
{
const uint32_t nonce = startNonce + thread;
uint32_t dat[20];
#pragma unroll
for (int i = 0; i < 19; i++) dat[i] = c_header[i];
dat[19] = nonce;
uint64_t hash1[8] = { 0 }; //iv for 512
GOST_hash_X(hash1, (uchar*)dat, 640); // 80 bytes
uint64_t hash[8];
#pragma unroll
for (int i=0; i<64; i++) ((uchar *)hash)[i] = 1; // iv for 256
GOST_hash_X(hash, (uchar *)hash1, 512); // 64 bytes
// result is first 32 bytes of hash
// check nonce
uint64_t high = MAKE_ULONGLONG(cuda_swab32(_LODWORD(hash[3])), cuda_swab32(_HIDWORD(hash[3]))); // swab uint64_t
if (high <= c_target[0])
{
//printf("%08x %08x - %016llx %016llx - %08x %08x\n", buf[7], buf[6], high, d_target[0], c_target[1], c_target[0]);
resNonces[1] = atomicExch(resNonces, nonce);
//d_target[0] = high;
}
}
}
__host__
void gost_hash_64(int thr_id, uint32_t threads, uint32_t *d_hash)
{
@ -1112,3 +1147,46 @@ void gost_hash_32(int thr_id, uint32_t threads, uint32_t *d_hash) @@ -1112,3 +1147,46 @@ void gost_hash_32(int thr_id, uint32_t threads, uint32_t *d_hash)
streebog_gpu_hash_32<<<grid, block>>>(threads, (uint64_t*)d_hash);
}
__host__
void gostd_init(int thr_id)
{
cuda_get_arch(thr_id);
CUDA_SAFE_CALL(cudaMalloc(&d_resNonces[thr_id], 2*sizeof(uint32_t)));
}
__host__
void gostd_free(int thr_id)
{
if (d_resNonces[thr_id]) cudaFree(d_resNonces[thr_id]);
d_resNonces[thr_id] = NULL;
}
__host__
void gostd_setBlock_80(uint32_t *pdata, uint32_t *ptarget)
{
uint32_t buf[19];
for (int i=0;i<19;i++) buf[i] = cuda_swab32(pdata[i]);
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_header, buf, 76, 0, cudaMemcpyHostToDevice));
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_target, &ptarget[6], 8, 0, cudaMemcpyHostToDevice));
}
__host__
void gostd_hash_80(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *resNonces)
{
const uint32_t threadsperblock = 256;
dim3 grid(threads/threadsperblock);
dim3 block(threadsperblock);
CUDA_SAFE_CALL(cudaMemset(d_resNonces[thr_id], 0xFF, 2 * sizeof(uint32_t)));
cudaThreadSynchronize();
gostd_gpu_hash_80 <<<grid, block>>> (threads, startNonce, d_resNonces[thr_id]);
cudaThreadSynchronize();
CUDA_SAFE_CALL(cudaMemcpy(resNonces, d_resNonces[thr_id], 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost));
if (resNonces[0] == resNonces[1]) {
resNonces[1] = UINT32_MAX;
}
}

Loading…
Cancel
Save