From 03b2bddc16a18950b4710b831ee175222d5baca4 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Thu, 29 Oct 2015 13:09:27 +0000 Subject: [PATCH] lyra2v2: fix SM 3.5 support May work also on SM 3.0 (to check) --- lyra2/lyra2REv2.cu | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/lyra2/lyra2REv2.cu b/lyra2/lyra2REv2.cu index 4553b5a..67e2dc9 100644 --- a/lyra2/lyra2REv2.cu +++ b/lyra2/lyra2REv2.cu @@ -90,10 +90,13 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc if (!init[thr_id]) { + size_t matrix_sz = 16 * sizeof(uint64_t) * 4 * 3; cudaSetDevice(dev_id); - //cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); - //if (gpu_threads == 1) - // cudaDeviceSetCacheConfig(cudaFuncCachePreferL1); + if (opt_cudaschedule == -1 && gpu_threads == 1) { + cudaDeviceReset(); + // reduce cpu usage + cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync); + } CUDA_LOG_ERROR(); blake256_cpu_init(thr_id, throughput); @@ -102,8 +105,11 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc bmw256_cpu_init(thr_id, throughput); CUDA_LOG_ERROR(); - // DMatrix (780Ti may prefer 16 instead of 12, cf djm34) - CUDA_SAFE_CALL(cudaMalloc(&d_matrix[thr_id], (size_t)12 * sizeof(uint64_t) * 4 * 4 * throughput)); + // SM 3 implentation requires a bit more memory + if (device_sm[dev_id] < 500 || cuda_arch[dev_id] < 500) + matrix_sz = 16 * sizeof(uint64_t) * 4 * 4; + + CUDA_SAFE_CALL(cudaMalloc(&d_matrix[thr_id], matrix_sz * throughput)); lyra2v2_cpu_init(thr_id, throughput, d_matrix[thr_id]); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t)32 * throughput));