Browse Source

lyra2: remove unused nonce param

pull/5/head
Tanguy Pruvot 6 years ago
parent
commit
a9357e1ec8
  1. 6
      lyra2/allium.cu
  2. 30
      lyra2/cuda_lyra2.cu
  3. 4
      lyra2/cuda_lyra2_sm2.cuh
  4. 12
      lyra2/cuda_lyra2_sm5.cuh
  5. 4
      lyra2/lyra2RE.cu

6
lyra2/allium.cu

@ -30,7 +30,7 @@ extern void cubehash256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t start @@ -30,7 +30,7 @@ extern void cubehash256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t start
extern void skein256_cpu_init(int thr_id, uint32_t threads);
extern void lyra2_cpu_init(int thr_id, uint32_t threads, uint64_t *d_matrix);
extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, bool gtx750ti);
extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint64_t *d_outputHash, bool gtx750ti);
extern void groestl256_cpu_init(int thr_id, uint32_t threads);
extern void groestl256_cpu_free(int thr_id);
@ -141,9 +141,9 @@ extern "C" int scanhash_allium(int thr_id, struct work* work, uint32_t max_nonce @@ -141,9 +141,9 @@ extern "C" int scanhash_allium(int thr_id, struct work* work, uint32_t max_nonce
//blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
//keccak256_sm3_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
blakeKeccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], gtx750ti);
lyra2_cpu_hash_32(thr_id, throughput, d_hash[thr_id], gtx750ti);
cubehash256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], gtx750ti);
lyra2_cpu_hash_32(thr_id, throughput, d_hash[thr_id], gtx750ti);
skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
*hashes_done = pdata[19] - first_nonce + throughput;

30
lyra2/cuda_lyra2.cu

@ -409,7 +409,7 @@ __constant__ uint2x4 blake2b_IV[2] = { @@ -409,7 +409,7 @@ __constant__ uint2x4 blake2b_IV[2] = {
};
__global__ __launch_bounds__(64, 1)
void lyra2_gpu_hash_32_1(uint32_t threads, uint32_t startNounce, uint2 *g_hash)
void lyra2_gpu_hash_32_1(uint32_t threads, uint2 *g_hash)
{
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
@ -436,7 +436,7 @@ void lyra2_gpu_hash_32_1(uint32_t threads, uint32_t startNounce, uint2 *g_hash) @@ -436,7 +436,7 @@ void lyra2_gpu_hash_32_1(uint32_t threads, uint32_t startNounce, uint2 *g_hash)
__global__
__launch_bounds__(TPB52, 1)
void lyra2_gpu_hash_32_2(uint32_t threads, uint32_t startNounce, uint64_t *g_hash)
void lyra2_gpu_hash_32_2(uint32_t threads, uint64_t *g_hash)
{
const uint32_t thread = blockDim.y * blockIdx.x + threadIdx.y;
@ -481,7 +481,7 @@ void lyra2_gpu_hash_32_2(uint32_t threads, uint32_t startNounce, uint64_t *g_has @@ -481,7 +481,7 @@ void lyra2_gpu_hash_32_2(uint32_t threads, uint32_t startNounce, uint64_t *g_has
}
__global__ __launch_bounds__(64, 1)
void lyra2_gpu_hash_32_3(uint32_t threads, uint32_t startNounce, uint2 *g_hash)
void lyra2_gpu_hash_32_3(uint32_t threads, uint2 *g_hash)
{
const uint32_t thread = blockDim.x * blockIdx.x + threadIdx.x;
@ -502,7 +502,7 @@ void lyra2_gpu_hash_32_3(uint32_t threads, uint32_t startNounce, uint2 *g_hash) @@ -502,7 +502,7 @@ void lyra2_gpu_hash_32_3(uint32_t threads, uint32_t startNounce, uint2 *g_hash)
g_hash[thread + threads * 2] = state[0].z;
g_hash[thread + threads * 3] = state[0].w;
} //thread
}
}
#else
#if __CUDA_ARCH__ < 500
@ -510,9 +510,9 @@ void lyra2_gpu_hash_32_3(uint32_t threads, uint32_t startNounce, uint2 *g_hash) @@ -510,9 +510,9 @@ void lyra2_gpu_hash_32_3(uint32_t threads, uint32_t startNounce, uint2 *g_hash)
/* for unsupported SM arch */
__device__ void* DMatrix;
#endif
__global__ void lyra2_gpu_hash_32_1(uint32_t threads, uint32_t startNounce, uint2 *g_hash) {}
__global__ void lyra2_gpu_hash_32_2(uint32_t threads, uint32_t startNounce, uint64_t *g_hash) {}
__global__ void lyra2_gpu_hash_32_3(uint32_t threads, uint32_t startNounce, uint2 *g_hash) {}
__global__ void lyra2_gpu_hash_32_1(uint32_t threads, uint2 *g_hash) {}
__global__ void lyra2_gpu_hash_32_2(uint32_t threads, uint64_t *g_hash) {}
__global__ void lyra2_gpu_hash_32_3(uint32_t threads, uint2 *g_hash) {}
#endif
__host__
@ -523,7 +523,7 @@ void lyra2_cpu_init(int thr_id, uint32_t threads, uint64_t *d_matrix) @@ -523,7 +523,7 @@ void lyra2_cpu_init(int thr_id, uint32_t threads, uint64_t *d_matrix)
}
__host__
void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_hash, bool gtx750ti)
void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint64_t *d_hash, bool gtx750ti)
{
int dev_id = device_map[thr_id % MAX_GPUS];
@ -544,11 +544,11 @@ void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint6 @@ -544,11 +544,11 @@ void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint6
if (cuda_arch[dev_id] >= 520)
{
lyra2_gpu_hash_32_1 <<< grid2, block2 >>> (threads, startNounce, (uint2*)d_hash);
lyra2_gpu_hash_32_1 <<< grid2, block2 >>> (threads, (uint2*)d_hash);
lyra2_gpu_hash_32_2 <<< grid1, block1, 24 * (8 - 0) * sizeof(uint2) * tpb >>> (threads, startNounce, d_hash);
lyra2_gpu_hash_32_2 <<< grid1, block1, 24 * (8 - 0) * sizeof(uint2) * tpb >>> (threads, d_hash);
lyra2_gpu_hash_32_3 <<< grid2, block2 >>> (threads, startNounce, (uint2*)d_hash);
lyra2_gpu_hash_32_3 <<< grid2, block2 >>> (threads, (uint2*)d_hash);
}
else if (cuda_arch[dev_id] >= 500)
{
@ -561,12 +561,12 @@ void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint6 @@ -561,12 +561,12 @@ void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint6
// suitable amount to adjust for 10warp
shared_mem = 6144;
lyra2_gpu_hash_32_1_sm5 <<< grid2, block2 >>> (threads, startNounce, (uint2*)d_hash);
lyra2_gpu_hash_32_1_sm5 <<< grid2, block2 >>> (threads, (uint2*)d_hash);
lyra2_gpu_hash_32_2_sm5 <<< grid1, block1, shared_mem >>> (threads, startNounce, (uint2*)d_hash);
lyra2_gpu_hash_32_2_sm5 <<< grid1, block1, shared_mem >>> (threads, (uint2*)d_hash);
lyra2_gpu_hash_32_3_sm5 <<< grid2, block2 >>> (threads, startNounce, (uint2*)d_hash);
lyra2_gpu_hash_32_3_sm5 <<< grid2, block2 >>> (threads, (uint2*)d_hash);
}
else
lyra2_gpu_hash_32_sm2 <<< grid3, block3 >>> (threads, startNounce, d_hash);
lyra2_gpu_hash_32_sm2 <<< grid3, block3 >>> (threads, d_hash);
}

4
lyra2/cuda_lyra2_sm2.cuh

@ -131,7 +131,7 @@ void reduceDuplexRowSetup(const int rowIn, const int rowInOut, const int rowOut, @@ -131,7 +131,7 @@ void reduceDuplexRowSetup(const int rowIn, const int rowInOut, const int rowOut,
}
__global__ __launch_bounds__(TPB30, 1)
void lyra2_gpu_hash_32_sm2(uint32_t threads, uint32_t startNounce, uint64_t *g_hash)
void lyra2_gpu_hash_32_sm2(uint32_t threads, uint64_t *g_hash)
{
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
if (thread < threads)
@ -224,5 +224,5 @@ void lyra2_gpu_hash_32_sm2(uint32_t threads, uint32_t startNounce, uint64_t *g_h @@ -224,5 +224,5 @@ void lyra2_gpu_hash_32_sm2(uint32_t threads, uint32_t startNounce, uint64_t *g_h
#else
/* if __CUDA_ARCH__ < 200 .. host */
__global__ void lyra2_gpu_hash_32_sm2(uint32_t threads, uint32_t startNounce, uint64_t *g_hash) {}
__global__ void lyra2_gpu_hash_32_sm2(uint32_t threads, uint64_t *g_hash) {}
#endif

12
lyra2/cuda_lyra2_sm5.cuh

@ -589,7 +589,7 @@ void reduceDuplexRowV50_8(const int rowInOut, uint2 state[4], const uint32_t thr @@ -589,7 +589,7 @@ void reduceDuplexRowV50_8(const int rowInOut, uint2 state[4], const uint32_t thr
}
__global__ __launch_bounds__(64, 1)
void lyra2_gpu_hash_32_1_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash)
void lyra2_gpu_hash_32_1_sm5(uint32_t threads, uint2 *g_hash)
{
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
@ -622,7 +622,7 @@ void lyra2_gpu_hash_32_1_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_ha @@ -622,7 +622,7 @@ void lyra2_gpu_hash_32_1_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_ha
}
__global__ __launch_bounds__(TPB50, 1)
void lyra2_gpu_hash_32_2_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash)
void lyra2_gpu_hash_32_2_sm5(uint32_t threads, uint2 *g_hash)
{
const uint32_t thread = (blockDim.y * blockIdx.x + threadIdx.y);
@ -662,7 +662,7 @@ void lyra2_gpu_hash_32_2_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_ha @@ -662,7 +662,7 @@ void lyra2_gpu_hash_32_2_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_ha
}
__global__ __launch_bounds__(64, 1)
void lyra2_gpu_hash_32_3_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash)
void lyra2_gpu_hash_32_3_sm5(uint32_t threads, uint2 *g_hash)
{
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
@ -687,7 +687,7 @@ void lyra2_gpu_hash_32_3_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_ha @@ -687,7 +687,7 @@ void lyra2_gpu_hash_32_3_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_ha
#else
/* if __CUDA_ARCH__ != 500 .. host */
__global__ void lyra2_gpu_hash_32_1_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash) {}
__global__ void lyra2_gpu_hash_32_2_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash) {}
__global__ void lyra2_gpu_hash_32_3_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash) {}
__global__ void lyra2_gpu_hash_32_1_sm5(uint32_t threads, uint2 *g_hash) {}
__global__ void lyra2_gpu_hash_32_2_sm5(uint32_t threads, uint2 *g_hash) {}
__global__ void lyra2_gpu_hash_32_3_sm5(uint32_t threads, uint2 *g_hash) {}
#endif

4
lyra2/lyra2RE.cu

@ -26,7 +26,7 @@ extern void skein256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNon @@ -26,7 +26,7 @@ extern void skein256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNon
extern void skein256_cpu_init(int thr_id, uint32_t threads);
extern void lyra2_cpu_init(int thr_id, uint32_t threads, uint64_t *d_matrix);
extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, bool gtx750ti);
extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint64_t *d_outputHash, bool gtx750ti);
extern void groestl256_cpu_init(int thr_id, uint32_t threads);
extern void groestl256_cpu_free(int thr_id);
@ -130,7 +130,7 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce, @@ -130,7 +130,7 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce,
//blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
//keccak256_sm3_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
blakeKeccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], gtx750ti);
lyra2_cpu_hash_32(thr_id, throughput, d_hash[thr_id], gtx750ti);
skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
*hashes_done = pdata[19] - first_nonce + throughput;

Loading…
Cancel
Save