Browse Source

blake2s fix and more missing cuda arch (for the benchmarks)

2upstream
Tanguy Pruvot 7 years ago
parent
commit
e388c11c02
  1. 2
      Algo256/blake2s.cu
  2. 1
      Algo256/cuda_blake256.cu
  3. 2
      neoscrypt/cuda_neoscrypt.cu

2
Algo256/blake2s.cu

@ -531,7 +531,7 @@ extern "C" int scanhash_blake2s(int thr_id, struct work *work, uint32_t max_nonc
} }
pdata[19] = max(work->nonces[0], work->nonces[1]); // next scan start pdata[19] = max(work->nonces[0], work->nonces[1]); // next scan start
return rc; return rc;
} else if (vhashcpu[6] > ptarget[6]) { } else if (vhashcpu[7] > ptarget[7]) {
gpu_increment_reject(thr_id); gpu_increment_reject(thr_id);
if (!opt_quiet) if (!opt_quiet)
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", resNonces[0]); gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", resNonces[0]);

1
Algo256/cuda_blake256.cu

@ -242,6 +242,7 @@ void blake256_cpu_setBlock_80(uint32_t *pdata)
__host__ __host__
void blake256_cpu_init(int thr_id, uint32_t threads) void blake256_cpu_init(int thr_id, uint32_t threads)
{ {
cuda_get_arch(thr_id);
cudaMemcpyToSymbol(u256, c_u256, sizeof(c_u256), 0, cudaMemcpyHostToDevice); cudaMemcpyToSymbol(u256, c_u256, sizeof(c_u256), 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(sigma, c_sigma, sizeof(c_sigma), 0, cudaMemcpyHostToDevice); cudaMemcpyToSymbol(sigma, c_sigma, sizeof(c_sigma), 0, cudaMemcpyHostToDevice);
} }

2
neoscrypt/cuda_neoscrypt.cu

@ -1471,6 +1471,8 @@ static __thread uint32_t *Trans3 = NULL; // 2 streams
__host__ __host__
void neoscrypt_init(int thr_id, uint32_t threads) void neoscrypt_init(int thr_id, uint32_t threads)
{ {
cuda_get_arch(thr_id);
CUDA_SAFE_CALL(cudaMalloc(&d_NNonce[thr_id], 2 * sizeof(uint32_t))); CUDA_SAFE_CALL(cudaMalloc(&d_NNonce[thr_id], 2 * sizeof(uint32_t)));
CUDA_SAFE_CALL(cudaMalloc(&hash1, 32 * 128 * sizeof(uint64_t) * min(8192, threads))); CUDA_SAFE_CALL(cudaMalloc(&hash1, 32 * 128 * sizeof(uint64_t) * min(8192, threads)));
CUDA_SAFE_CALL(cudaMalloc(&Trans1, 32 * sizeof(uint64_t) * threads)); CUDA_SAFE_CALL(cudaMalloc(&Trans1, 32 * sizeof(uint64_t) * threads));

Loading…
Cancel
Save