mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-31 00:44:15 +00:00
keccak: avoid to use twice cuda_default_throughput
and drop useless gpu hash alloc...
This commit is contained in:
parent
11a512f2a7
commit
73dd6aac5c
@ -22,7 +22,6 @@ static const uint64_t host_keccak_round_constants[24] = {
|
|||||||
0x0000000080000001ull, 0x8000000080008008ull
|
0x0000000080000001ull, 0x8000000080008008ull
|
||||||
};
|
};
|
||||||
|
|
||||||
static uint32_t *d_nounce[MAX_GPUS];
|
|
||||||
static uint32_t *d_KNonce[MAX_GPUS];
|
static uint32_t *d_KNonce[MAX_GPUS];
|
||||||
|
|
||||||
__constant__ uint32_t pTarget[8];
|
__constant__ uint32_t pTarget[8];
|
||||||
@ -170,7 +169,7 @@ static void keccak_blockv30(uint64_t *s, const uint64_t *keccak_round_constants)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
__global__ __launch_bounds__(128,5)
|
__global__ __launch_bounds__(128,5)
|
||||||
void keccak256_sm3_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *outputHash, uint32_t *resNounce)
|
void keccak256_sm3_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint32_t *resNounce)
|
||||||
{
|
{
|
||||||
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
if (thread < threads)
|
if (thread < threads)
|
||||||
@ -210,10 +209,9 @@ void keccak256_sm3_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *out
|
|||||||
}
|
}
|
||||||
|
|
||||||
__host__
|
__host__
|
||||||
uint32_t keccak256_sm3_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_outputHash, int order)
|
void keccak256_sm3_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *resNonces, int order)
|
||||||
{
|
{
|
||||||
uint32_t result = UINT32_MAX;
|
cudaMemset(d_KNonce[thr_id], 0xff, 2*sizeof(uint32_t));
|
||||||
cudaMemset(d_KNonce[thr_id], 0xff, sizeof(uint32_t));
|
|
||||||
const uint32_t threadsperblock = 128;
|
const uint32_t threadsperblock = 128;
|
||||||
|
|
||||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||||
@ -221,14 +219,10 @@ uint32_t keccak256_sm3_hash_80(int thr_id, uint32_t threads, uint32_t startNounc
|
|||||||
|
|
||||||
size_t shared_size = 0;
|
size_t shared_size = 0;
|
||||||
|
|
||||||
keccak256_sm3_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_outputHash, d_KNonce[thr_id]);
|
keccak256_sm3_gpu_hash_80<<<grid, block, shared_size>>>(threads, startNounce, d_KNonce[thr_id]);
|
||||||
|
|
||||||
MyStreamSynchronize(NULL, order, thr_id);
|
cudaMemcpy(resNonces, d_KNonce[thr_id], 2*sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
||||||
cudaMemcpy(d_nounce[thr_id], d_KNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
|
||||||
cudaThreadSynchronize();
|
cudaThreadSynchronize();
|
||||||
result = *d_nounce[thr_id];
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
@ -299,13 +293,11 @@ void keccak256_sm3_init(int thr_id, uint32_t threads)
|
|||||||
{
|
{
|
||||||
CUDA_SAFE_CALL(cudaMemcpyToSymbol(keccak_round_constants, host_keccak_round_constants,
|
CUDA_SAFE_CALL(cudaMemcpyToSymbol(keccak_round_constants, host_keccak_round_constants,
|
||||||
sizeof(host_keccak_round_constants), 0, cudaMemcpyHostToDevice));
|
sizeof(host_keccak_round_constants), 0, cudaMemcpyHostToDevice));
|
||||||
CUDA_SAFE_CALL(cudaMalloc(&d_KNonce[thr_id], sizeof(uint32_t)));
|
CUDA_SAFE_CALL(cudaMalloc(&d_KNonce[thr_id], 2*sizeof(uint32_t)));
|
||||||
CUDA_SAFE_CALL(cudaMallocHost(&d_nounce[thr_id], 1*sizeof(uint32_t)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__
|
__host__
|
||||||
void keccak256_sm3_free(int thr_id)
|
void keccak256_sm3_free(int thr_id)
|
||||||
{
|
{
|
||||||
cudaFree(d_KNonce[thr_id]);
|
cudaFree(d_KNonce[thr_id]);
|
||||||
cudaFreeHost(d_nounce[thr_id]);
|
|
||||||
}
|
}
|
||||||
|
@ -14,8 +14,6 @@ extern "C"
|
|||||||
|
|
||||||
#include "cuda_helper.h"
|
#include "cuda_helper.h"
|
||||||
|
|
||||||
static uint32_t *d_hash[MAX_GPUS];
|
|
||||||
|
|
||||||
// SM5+ cuda
|
// SM5+ cuda
|
||||||
extern void keccak256_cpu_init(int thr_id);
|
extern void keccak256_cpu_init(int thr_id);
|
||||||
extern void keccak256_cpu_free(int thr_id);
|
extern void keccak256_cpu_free(int thr_id);
|
||||||
@ -27,7 +25,7 @@ extern void keccak256_setOutput(int thr_id);
|
|||||||
extern void keccak256_sm3_init(int thr_id, uint32_t threads);
|
extern void keccak256_sm3_init(int thr_id, uint32_t threads);
|
||||||
extern void keccak256_sm3_free(int thr_id);
|
extern void keccak256_sm3_free(int thr_id);
|
||||||
extern void keccak256_sm3_setBlock_80(void *pdata, const void *ptarget);
|
extern void keccak256_sm3_setBlock_80(void *pdata, const void *ptarget);
|
||||||
extern uint32_t keccak256_sm3_hash_80(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_hash, int order);
|
extern uint32_t keccak256_sm3_hash_80(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t* resNonces, int order);
|
||||||
|
|
||||||
// CPU Hash
|
// CPU Hash
|
||||||
extern "C" void keccak256_hash(void *state, const void *input)
|
extern "C" void keccak256_hash(void *state, const void *input)
|
||||||
@ -52,13 +50,13 @@ extern "C" int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_no
|
|||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
const int dev_id = device_map[thr_id];
|
const int dev_id = device_map[thr_id];
|
||||||
uint32_t throughput = cuda_default_throughput(thr_id, 1U << 21); // 256*256*8*4
|
uint32_t throughput;
|
||||||
|
uint32_t intensity = 23;
|
||||||
if(!use_compat_kernels[thr_id]) {
|
if(!use_compat_kernels[thr_id]) {
|
||||||
uint32_t intensity = 23;
|
|
||||||
if (strstr(device_name[dev_id], "GTX 1070")) intensity = 25;
|
if (strstr(device_name[dev_id], "GTX 1070")) intensity = 25;
|
||||||
if (strstr(device_name[dev_id], "GTX 1080")) intensity = 26;
|
if (strstr(device_name[dev_id], "GTX 1080")) intensity = 26;
|
||||||
throughput = cuda_default_throughput(thr_id, 1U << intensity);
|
|
||||||
}
|
}
|
||||||
|
throughput = cuda_default_throughput(thr_id, 1U << intensity);
|
||||||
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
|
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
@ -80,7 +78,6 @@ extern "C" int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_no
|
|||||||
keccak256_cpu_init(thr_id);
|
keccak256_cpu_init(thr_id);
|
||||||
} else {
|
} else {
|
||||||
// really useful ?
|
// really useful ?
|
||||||
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], throughput * 64));
|
|
||||||
keccak256_sm3_init(thr_id, throughput);
|
keccak256_sm3_init(thr_id, throughput);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -107,7 +104,7 @@ extern "C" int scanhash_keccak256(int thr_id, struct work* work, uint32_t max_no
|
|||||||
*hashes_done = pdata[19] - first_nonce + throughput;
|
*hashes_done = pdata[19] - first_nonce + throughput;
|
||||||
|
|
||||||
if(use_compat_kernels[thr_id])
|
if(use_compat_kernels[thr_id])
|
||||||
work->nonces[0] = keccak256_sm3_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
keccak256_sm3_hash_80(thr_id, throughput, pdata[19], work->nonces, order++);
|
||||||
else {
|
else {
|
||||||
keccak256_cpu_hash_80(thr_id, throughput, pdata[19], work->nonces, highTarget);
|
keccak256_cpu_hash_80(thr_id, throughput, pdata[19], work->nonces, highTarget);
|
||||||
}
|
}
|
||||||
@ -170,7 +167,6 @@ extern "C" void free_keccak256(int thr_id)
|
|||||||
if(!use_compat_kernels[thr_id])
|
if(!use_compat_kernels[thr_id])
|
||||||
keccak256_cpu_free(thr_id);
|
keccak256_cpu_free(thr_id);
|
||||||
else {
|
else {
|
||||||
cudaFree(d_hash[thr_id]);
|
|
||||||
keccak256_sm3_free(thr_id);
|
keccak256_sm3_free(thr_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user