Browse Source

attempt to reduce shared mem errors

2upstream
Tanguy Pruvot 9 years ago
parent
commit
0a0fd33cac
  1. 2
      configure.ac
  2. 2
      x11/c11.cu
  3. 3
      x11/cuda_x11_echo.cu
  4. 4
      x11/cuda_x11_shavite512.cu
  5. 2
      x11/s3.cu
  6. 2
      x11/sib.cu
  7. 2
      x11/x11.cu
  8. 2
      x11/x11evo.cu
  9. 2
      x13/x13.cu
  10. 2
      x15/x14.cu
  11. 2
      x15/x15.cu
  12. 2
      x17/x17.cu

2
configure.ac

@ -1,4 +1,4 @@ @@ -1,4 +1,4 @@
AC_INIT([ccminer], [1.8], [], [ccminer], [http://github.com/tpruvot/ccminer])
AC_INIT([ccminer], [1.8.1], [], [ccminer], [http://github.com/tpruvot/ccminer])
AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM

2
x11/c11.cu

@ -111,7 +111,7 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u @@ -111,7 +111,7 @@ extern "C" int scanhash_c11(int thr_id, struct work* work, uint32_t max_nonce, u
const uint32_t first_nonce = pdata[19];
int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19;
uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); // 19=256*256*8;
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
//if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x5;

3
x11/cuda_x11_echo.cu

@ -3,6 +3,8 @@ @@ -3,6 +3,8 @@
#include "cuda_helper.h"
extern __device__ __device_builtin__ void __threadfence_block(void);
#include "cuda_x11_aes.cuh"
__device__ __forceinline__ void AES_2ROUND(
@ -284,6 +286,7 @@ void x11_echo512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *g @@ -284,6 +286,7 @@ void x11_echo512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *g
__shared__ uint32_t sharedMemory[1024];
echo_gpu_init(sharedMemory);
__threadfence_block();
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)

4
x11/cuda_x11_shavite512.cu

@ -2,6 +2,8 @@ @@ -2,6 +2,8 @@
#include "cuda_helper.h"
extern __device__ __device_builtin__ void __threadfence_block(void);
#define TPB 128
__constant__ uint32_t c_PaddedMessage80[32]; // padded message (80 bytes + padding)
@ -1346,6 +1348,7 @@ void x11_shavite512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t @@ -1346,6 +1348,7 @@ void x11_shavite512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t
__shared__ uint32_t sharedMemory[1024];
shavite_gpu_init(sharedMemory);
__threadfence_block();
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
@ -1397,6 +1400,7 @@ void x11_shavite512_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *ou @@ -1397,6 +1400,7 @@ void x11_shavite512_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *ou
__shared__ uint32_t sharedMemory[1024];
shavite_gpu_init(sharedMemory);
__threadfence_block();
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)

2
x11/s3.cu

@ -72,7 +72,7 @@ extern "C" int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, un @@ -72,7 +72,7 @@ extern "C" int scanhash_s3(int thr_id, struct work* work, uint32_t max_nonce, un
intensity--;
#endif
uint32_t throughput = cuda_default_throughput(thr_id, 1 << intensity);
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
//if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
if (opt_benchmark)
ptarget[7] = 0xF;

2
x11/sib.cu

@ -106,7 +106,7 @@ extern "C" int scanhash_sib(int thr_id, struct work* work, uint32_t max_nonce, u @@ -106,7 +106,7 @@ extern "C" int scanhash_sib(int thr_id, struct work* work, uint32_t max_nonce, u
const uint32_t first_nonce = pdata[19];
int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 19 : 18;
uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); // 19=256*256*8;
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
//if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
if (opt_benchmark)
ptarget[7] = 0xf;

2
x11/x11.cu

@ -100,7 +100,7 @@ extern "C" int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, u @@ -100,7 +100,7 @@ extern "C" int scanhash_x11(int thr_id, struct work* work, uint32_t max_nonce, u
const uint32_t first_nonce = pdata[19];
int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19;
uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); // 19=256*256*8;
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
//if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
if (opt_benchmark)
ptarget[7] = 0x5;

2
x11/x11evo.cu

@ -232,7 +232,7 @@ extern "C" int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce @@ -232,7 +232,7 @@ extern "C" int scanhash_x11evo(int thr_id, struct work* work, uint32_t max_nonce
const uint32_t first_nonce = pdata[19];
int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19;
uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity); // 19=256*256*8;
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
//if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
if (opt_debug || s_ntime != pdata[17] || s_sequence == -1) {
uint32_t ntime = swab32(work->data[17]);

2
x13/x13.cu

@ -119,7 +119,7 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u @@ -119,7 +119,7 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u
const uint32_t first_nonce = pdata[19];
int intensity = 19; // (device_sm[device_map[thr_id]] > 500 && !is_windows()) ? 20 : 19;
uint32_t throughput = cuda_default_throughput(thr_id, 1 << intensity); // 19=256*256*8;
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
//if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x000f;

2
x15/x14.cu

@ -133,7 +133,7 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce, @@ -133,7 +133,7 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce,
uint32_t endiandata[20];
uint32_t throughput = cuda_default_throughput(thr_id, 1U << 19); // 19=256*256*8;
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
//if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
if (opt_benchmark)
ptarget[7] = 0x000f;

2
x15/x15.cu

@ -143,7 +143,7 @@ extern "C" int scanhash_x15(int thr_id, struct work* work, uint32_t max_nonce, @@ -143,7 +143,7 @@ extern "C" int scanhash_x15(int thr_id, struct work* work, uint32_t max_nonce,
uint32_t endiandata[20];
uint32_t throughput = cuda_default_throughput(thr_id, 1U << 19); // 19=256*256*8;
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
//if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
if (opt_benchmark)
ptarget[7] = 0x00FF;

2
x17/x17.cu

@ -159,7 +159,7 @@ extern "C" int scanhash_x17(int thr_id, struct work* work, uint32_t max_nonce, u @@ -159,7 +159,7 @@ extern "C" int scanhash_x17(int thr_id, struct work* work, uint32_t max_nonce, u
const uint32_t first_nonce = pdata[19];
uint32_t throughput = cuda_default_throughput(thr_id, 1U << 19); // 19=256*256*8;
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
//if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
if (opt_benchmark)
((uint32_t*)ptarget)[7] = 0x00ff;

Loading…
Cancel
Save