Browse Source

lyra2v2: fix SM 3.5 support

May work also on SM 3.0 (to check)
2upstream
Tanguy Pruvot 9 years ago
parent
commit
03b2bddc16
  1. 16
      lyra2/lyra2REv2.cu

16
lyra2/lyra2REv2.cu

@ -90,10 +90,13 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc
if (!init[thr_id]) if (!init[thr_id])
{ {
size_t matrix_sz = 16 * sizeof(uint64_t) * 4 * 3;
cudaSetDevice(dev_id); cudaSetDevice(dev_id);
//cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); if (opt_cudaschedule == -1 && gpu_threads == 1) {
//if (gpu_threads == 1) cudaDeviceReset();
// cudaDeviceSetCacheConfig(cudaFuncCachePreferL1); // reduce cpu usage
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
}
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
blake256_cpu_init(thr_id, throughput); blake256_cpu_init(thr_id, throughput);
@ -102,8 +105,11 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc
bmw256_cpu_init(thr_id, throughput); bmw256_cpu_init(thr_id, throughput);
CUDA_LOG_ERROR(); CUDA_LOG_ERROR();
// DMatrix (780Ti may prefer 16 instead of 12, cf djm34) // SM 3 implentation requires a bit more memory
CUDA_SAFE_CALL(cudaMalloc(&d_matrix[thr_id], (size_t)12 * sizeof(uint64_t) * 4 * 4 * throughput)); if (device_sm[dev_id] < 500 || cuda_arch[dev_id] < 500)
matrix_sz = 16 * sizeof(uint64_t) * 4 * 4;
CUDA_SAFE_CALL(cudaMalloc(&d_matrix[thr_id], matrix_sz * throughput));
lyra2v2_cpu_init(thr_id, throughput, d_matrix[thr_id]); lyra2v2_cpu_init(thr_id, throughput, d_matrix[thr_id]);
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t)32 * throughput)); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t)32 * throughput));

Loading…
Cancel
Save