Browse Source

lyra2: remove unused nonce param

pull/5/head
Tanguy Pruvot 6 years ago
parent
commit
a9357e1ec8
  1. 6
      lyra2/allium.cu
  2. 30
      lyra2/cuda_lyra2.cu
  3. 4
      lyra2/cuda_lyra2_sm2.cuh
  4. 12
      lyra2/cuda_lyra2_sm5.cuh
  5. 4
      lyra2/lyra2RE.cu

6
lyra2/allium.cu

@ -30,7 +30,7 @@ extern void cubehash256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t start
extern void skein256_cpu_init(int thr_id, uint32_t threads); extern void skein256_cpu_init(int thr_id, uint32_t threads);
extern void lyra2_cpu_init(int thr_id, uint32_t threads, uint64_t *d_matrix); extern void lyra2_cpu_init(int thr_id, uint32_t threads, uint64_t *d_matrix);
extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, bool gtx750ti); extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint64_t *d_outputHash, bool gtx750ti);
extern void groestl256_cpu_init(int thr_id, uint32_t threads); extern void groestl256_cpu_init(int thr_id, uint32_t threads);
extern void groestl256_cpu_free(int thr_id); extern void groestl256_cpu_free(int thr_id);
@ -141,9 +141,9 @@ extern "C" int scanhash_allium(int thr_id, struct work* work, uint32_t max_nonce
//blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); //blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
//keccak256_sm3_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); //keccak256_sm3_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
blakeKeccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); blakeKeccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], gtx750ti); lyra2_cpu_hash_32(thr_id, throughput, d_hash[thr_id], gtx750ti);
cubehash256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); cubehash256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], gtx750ti); lyra2_cpu_hash_32(thr_id, throughput, d_hash[thr_id], gtx750ti);
skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;

30
lyra2/cuda_lyra2.cu

@ -409,7 +409,7 @@ __constant__ uint2x4 blake2b_IV[2] = {
}; };
__global__ __launch_bounds__(64, 1) __global__ __launch_bounds__(64, 1)
void lyra2_gpu_hash_32_1(uint32_t threads, uint32_t startNounce, uint2 *g_hash) void lyra2_gpu_hash_32_1(uint32_t threads, uint2 *g_hash)
{ {
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
@ -436,7 +436,7 @@ void lyra2_gpu_hash_32_1(uint32_t threads, uint32_t startNounce, uint2 *g_hash)
__global__ __global__
__launch_bounds__(TPB52, 1) __launch_bounds__(TPB52, 1)
void lyra2_gpu_hash_32_2(uint32_t threads, uint32_t startNounce, uint64_t *g_hash) void lyra2_gpu_hash_32_2(uint32_t threads, uint64_t *g_hash)
{ {
const uint32_t thread = blockDim.y * blockIdx.x + threadIdx.y; const uint32_t thread = blockDim.y * blockIdx.x + threadIdx.y;
@ -481,7 +481,7 @@ void lyra2_gpu_hash_32_2(uint32_t threads, uint32_t startNounce, uint64_t *g_has
} }
__global__ __launch_bounds__(64, 1) __global__ __launch_bounds__(64, 1)
void lyra2_gpu_hash_32_3(uint32_t threads, uint32_t startNounce, uint2 *g_hash) void lyra2_gpu_hash_32_3(uint32_t threads, uint2 *g_hash)
{ {
const uint32_t thread = blockDim.x * blockIdx.x + threadIdx.x; const uint32_t thread = blockDim.x * blockIdx.x + threadIdx.x;
@ -502,7 +502,7 @@ void lyra2_gpu_hash_32_3(uint32_t threads, uint32_t startNounce, uint2 *g_hash)
g_hash[thread + threads * 2] = state[0].z; g_hash[thread + threads * 2] = state[0].z;
g_hash[thread + threads * 3] = state[0].w; g_hash[thread + threads * 3] = state[0].w;
} //thread }
} }
#else #else
#if __CUDA_ARCH__ < 500 #if __CUDA_ARCH__ < 500
@ -510,9 +510,9 @@ void lyra2_gpu_hash_32_3(uint32_t threads, uint32_t startNounce, uint2 *g_hash)
/* for unsupported SM arch */ /* for unsupported SM arch */
__device__ void* DMatrix; __device__ void* DMatrix;
#endif #endif
__global__ void lyra2_gpu_hash_32_1(uint32_t threads, uint32_t startNounce, uint2 *g_hash) {} __global__ void lyra2_gpu_hash_32_1(uint32_t threads, uint2 *g_hash) {}
__global__ void lyra2_gpu_hash_32_2(uint32_t threads, uint32_t startNounce, uint64_t *g_hash) {} __global__ void lyra2_gpu_hash_32_2(uint32_t threads, uint64_t *g_hash) {}
__global__ void lyra2_gpu_hash_32_3(uint32_t threads, uint32_t startNounce, uint2 *g_hash) {} __global__ void lyra2_gpu_hash_32_3(uint32_t threads, uint2 *g_hash) {}
#endif #endif
__host__ __host__
@ -523,7 +523,7 @@ void lyra2_cpu_init(int thr_id, uint32_t threads, uint64_t *d_matrix)
} }
__host__ __host__
void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_hash, bool gtx750ti) void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint64_t *d_hash, bool gtx750ti)
{ {
int dev_id = device_map[thr_id % MAX_GPUS]; int dev_id = device_map[thr_id % MAX_GPUS];
@ -544,11 +544,11 @@ void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint6
if (cuda_arch[dev_id] >= 520) if (cuda_arch[dev_id] >= 520)
{ {
lyra2_gpu_hash_32_1 <<< grid2, block2 >>> (threads, startNounce, (uint2*)d_hash); lyra2_gpu_hash_32_1 <<< grid2, block2 >>> (threads, (uint2*)d_hash);
lyra2_gpu_hash_32_2 <<< grid1, block1, 24 * (8 - 0) * sizeof(uint2) * tpb >>> (threads, startNounce, d_hash); lyra2_gpu_hash_32_2 <<< grid1, block1, 24 * (8 - 0) * sizeof(uint2) * tpb >>> (threads, d_hash);
lyra2_gpu_hash_32_3 <<< grid2, block2 >>> (threads, startNounce, (uint2*)d_hash); lyra2_gpu_hash_32_3 <<< grid2, block2 >>> (threads, (uint2*)d_hash);
} }
else if (cuda_arch[dev_id] >= 500) else if (cuda_arch[dev_id] >= 500)
{ {
@ -561,12 +561,12 @@ void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint6
// suitable amount to adjust for 10warp // suitable amount to adjust for 10warp
shared_mem = 6144; shared_mem = 6144;
lyra2_gpu_hash_32_1_sm5 <<< grid2, block2 >>> (threads, startNounce, (uint2*)d_hash); lyra2_gpu_hash_32_1_sm5 <<< grid2, block2 >>> (threads, (uint2*)d_hash);
lyra2_gpu_hash_32_2_sm5 <<< grid1, block1, shared_mem >>> (threads, startNounce, (uint2*)d_hash); lyra2_gpu_hash_32_2_sm5 <<< grid1, block1, shared_mem >>> (threads, (uint2*)d_hash);
lyra2_gpu_hash_32_3_sm5 <<< grid2, block2 >>> (threads, startNounce, (uint2*)d_hash); lyra2_gpu_hash_32_3_sm5 <<< grid2, block2 >>> (threads, (uint2*)d_hash);
} }
else else
lyra2_gpu_hash_32_sm2 <<< grid3, block3 >>> (threads, startNounce, d_hash); lyra2_gpu_hash_32_sm2 <<< grid3, block3 >>> (threads, d_hash);
} }

4
lyra2/cuda_lyra2_sm2.cuh

@ -131,7 +131,7 @@ void reduceDuplexRowSetup(const int rowIn, const int rowInOut, const int rowOut,
} }
__global__ __launch_bounds__(TPB30, 1) __global__ __launch_bounds__(TPB30, 1)
void lyra2_gpu_hash_32_sm2(uint32_t threads, uint32_t startNounce, uint64_t *g_hash) void lyra2_gpu_hash_32_sm2(uint32_t threads, uint64_t *g_hash)
{ {
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads) if (thread < threads)
@ -224,5 +224,5 @@ void lyra2_gpu_hash_32_sm2(uint32_t threads, uint32_t startNounce, uint64_t *g_h
#else #else
/* if __CUDA_ARCH__ < 200 .. host */ /* if __CUDA_ARCH__ < 200 .. host */
__global__ void lyra2_gpu_hash_32_sm2(uint32_t threads, uint32_t startNounce, uint64_t *g_hash) {} __global__ void lyra2_gpu_hash_32_sm2(uint32_t threads, uint64_t *g_hash) {}
#endif #endif

12
lyra2/cuda_lyra2_sm5.cuh

@ -589,7 +589,7 @@ void reduceDuplexRowV50_8(const int rowInOut, uint2 state[4], const uint32_t thr
} }
__global__ __launch_bounds__(64, 1) __global__ __launch_bounds__(64, 1)
void lyra2_gpu_hash_32_1_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash) void lyra2_gpu_hash_32_1_sm5(uint32_t threads, uint2 *g_hash)
{ {
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
@ -622,7 +622,7 @@ void lyra2_gpu_hash_32_1_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_ha
} }
__global__ __launch_bounds__(TPB50, 1) __global__ __launch_bounds__(TPB50, 1)
void lyra2_gpu_hash_32_2_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash) void lyra2_gpu_hash_32_2_sm5(uint32_t threads, uint2 *g_hash)
{ {
const uint32_t thread = (blockDim.y * blockIdx.x + threadIdx.y); const uint32_t thread = (blockDim.y * blockIdx.x + threadIdx.y);
@ -662,7 +662,7 @@ void lyra2_gpu_hash_32_2_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_ha
} }
__global__ __launch_bounds__(64, 1) __global__ __launch_bounds__(64, 1)
void lyra2_gpu_hash_32_3_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash) void lyra2_gpu_hash_32_3_sm5(uint32_t threads, uint2 *g_hash)
{ {
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
@ -687,7 +687,7 @@ void lyra2_gpu_hash_32_3_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_ha
#else #else
/* if __CUDA_ARCH__ != 500 .. host */ /* if __CUDA_ARCH__ != 500 .. host */
__global__ void lyra2_gpu_hash_32_1_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash) {} __global__ void lyra2_gpu_hash_32_1_sm5(uint32_t threads, uint2 *g_hash) {}
__global__ void lyra2_gpu_hash_32_2_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash) {} __global__ void lyra2_gpu_hash_32_2_sm5(uint32_t threads, uint2 *g_hash) {}
__global__ void lyra2_gpu_hash_32_3_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash) {} __global__ void lyra2_gpu_hash_32_3_sm5(uint32_t threads, uint2 *g_hash) {}
#endif #endif

4
lyra2/lyra2RE.cu

@ -26,7 +26,7 @@ extern void skein256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNon
extern void skein256_cpu_init(int thr_id, uint32_t threads); extern void skein256_cpu_init(int thr_id, uint32_t threads);
extern void lyra2_cpu_init(int thr_id, uint32_t threads, uint64_t *d_matrix); extern void lyra2_cpu_init(int thr_id, uint32_t threads, uint64_t *d_matrix);
extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, bool gtx750ti); extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint64_t *d_outputHash, bool gtx750ti);
extern void groestl256_cpu_init(int thr_id, uint32_t threads); extern void groestl256_cpu_init(int thr_id, uint32_t threads);
extern void groestl256_cpu_free(int thr_id); extern void groestl256_cpu_free(int thr_id);
@ -130,7 +130,7 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce,
//blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); //blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
//keccak256_sm3_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); //keccak256_sm3_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
blakeKeccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); blakeKeccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], gtx750ti); lyra2_cpu_hash_32(thr_id, throughput, d_hash[thr_id], gtx750ti);
skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
*hashes_done = pdata[19] - first_nonce + throughput; *hashes_done = pdata[19] - first_nonce + throughput;

Loading…
Cancel
Save