From e388c11c0248dce681cd9c0f398af1bb5bd87ec3 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Wed, 8 Mar 2017 13:13:43 +0100 Subject: [PATCH] blake2s fix and more missing cuda arch (for the benchmarks) --- Algo256/blake2s.cu | 2 +- Algo256/cuda_blake256.cu | 1 + neoscrypt/cuda_neoscrypt.cu | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Algo256/blake2s.cu b/Algo256/blake2s.cu index 53cd86e..0b4bbe0 100644 --- a/Algo256/blake2s.cu +++ b/Algo256/blake2s.cu @@ -531,7 +531,7 @@ extern "C" int scanhash_blake2s(int thr_id, struct work *work, uint32_t max_nonc } pdata[19] = max(work->nonces[0], work->nonces[1]); // next scan start return rc; - } else if (vhashcpu[6] > ptarget[6]) { + } else if (vhashcpu[7] > ptarget[7]) { gpu_increment_reject(thr_id); if (!opt_quiet) gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", resNonces[0]); diff --git a/Algo256/cuda_blake256.cu b/Algo256/cuda_blake256.cu index c3326e6..60e8fd0 100644 --- a/Algo256/cuda_blake256.cu +++ b/Algo256/cuda_blake256.cu @@ -242,6 +242,7 @@ void blake256_cpu_setBlock_80(uint32_t *pdata) __host__ void blake256_cpu_init(int thr_id, uint32_t threads) { + cuda_get_arch(thr_id); cudaMemcpyToSymbol(u256, c_u256, sizeof(c_u256), 0, cudaMemcpyHostToDevice); cudaMemcpyToSymbol(sigma, c_sigma, sizeof(c_sigma), 0, cudaMemcpyHostToDevice); } diff --git a/neoscrypt/cuda_neoscrypt.cu b/neoscrypt/cuda_neoscrypt.cu index dcea61f..eb25ad0 100644 --- a/neoscrypt/cuda_neoscrypt.cu +++ b/neoscrypt/cuda_neoscrypt.cu @@ -1471,6 +1471,8 @@ static __thread uint32_t *Trans3 = NULL; // 2 streams __host__ void neoscrypt_init(int thr_id, uint32_t threads) { + cuda_get_arch(thr_id); + CUDA_SAFE_CALL(cudaMalloc(&d_NNonce[thr_id], 2 * sizeof(uint32_t))); CUDA_SAFE_CALL(cudaMalloc(&hash1, 32 * 128 * sizeof(uint64_t) * min(8192, threads))); CUDA_SAFE_CALL(cudaMalloc(&Trans1, 32 * sizeof(uint64_t) * threads));