diff --git a/lyra2/allium.cu b/lyra2/allium.cu index 931e6bc..6492c92 100644 --- a/lyra2/allium.cu +++ b/lyra2/allium.cu @@ -30,7 +30,7 @@ extern void cubehash256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t start extern void skein256_cpu_init(int thr_id, uint32_t threads); extern void lyra2_cpu_init(int thr_id, uint32_t threads, uint64_t *d_matrix); -extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, bool gtx750ti); +extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint64_t *d_outputHash, bool gtx750ti); extern void groestl256_cpu_init(int thr_id, uint32_t threads); extern void groestl256_cpu_free(int thr_id); @@ -141,9 +141,9 @@ extern "C" int scanhash_allium(int thr_id, struct work* work, uint32_t max_nonce //blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); //keccak256_sm3_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); blakeKeccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); - lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], gtx750ti); + lyra2_cpu_hash_32(thr_id, throughput, d_hash[thr_id], gtx750ti); cubehash256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); - lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], gtx750ti); + lyra2_cpu_hash_32(thr_id, throughput, d_hash[thr_id], gtx750ti); skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); *hashes_done = pdata[19] - first_nonce + throughput; diff --git a/lyra2/cuda_lyra2.cu b/lyra2/cuda_lyra2.cu index 7905d23..a280200 100644 --- a/lyra2/cuda_lyra2.cu +++ b/lyra2/cuda_lyra2.cu @@ -409,7 +409,7 @@ __constant__ uint2x4 blake2b_IV[2] = { }; __global__ __launch_bounds__(64, 1) -void lyra2_gpu_hash_32_1(uint32_t threads, uint32_t startNounce, uint2 *g_hash) +void lyra2_gpu_hash_32_1(uint32_t threads, uint2 *g_hash) { const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); @@ -436,7 +436,7 @@ void lyra2_gpu_hash_32_1(uint32_t threads, uint32_t startNounce, uint2 *g_hash) __global__ __launch_bounds__(TPB52, 1) -void lyra2_gpu_hash_32_2(uint32_t threads, uint32_t startNounce, uint64_t *g_hash) +void lyra2_gpu_hash_32_2(uint32_t threads, uint64_t *g_hash) { const uint32_t thread = blockDim.y * blockIdx.x + threadIdx.y; @@ -481,7 +481,7 @@ void lyra2_gpu_hash_32_2(uint32_t threads, uint32_t startNounce, uint64_t *g_has } __global__ __launch_bounds__(64, 1) -void lyra2_gpu_hash_32_3(uint32_t threads, uint32_t startNounce, uint2 *g_hash) +void lyra2_gpu_hash_32_3(uint32_t threads, uint2 *g_hash) { const uint32_t thread = blockDim.x * blockIdx.x + threadIdx.x; @@ -502,7 +502,7 @@ void lyra2_gpu_hash_32_3(uint32_t threads, uint32_t startNounce, uint2 *g_hash) g_hash[thread + threads * 2] = state[0].z; g_hash[thread + threads * 3] = state[0].w; - } //thread + } } #else #if __CUDA_ARCH__ < 500 @@ -510,9 +510,9 @@ void lyra2_gpu_hash_32_3(uint32_t threads, uint32_t startNounce, uint2 *g_hash) /* for unsupported SM arch */ __device__ void* DMatrix; #endif -__global__ void lyra2_gpu_hash_32_1(uint32_t threads, uint32_t startNounce, uint2 *g_hash) {} -__global__ void lyra2_gpu_hash_32_2(uint32_t threads, uint32_t startNounce, uint64_t *g_hash) {} -__global__ void lyra2_gpu_hash_32_3(uint32_t threads, uint32_t startNounce, uint2 *g_hash) {} +__global__ void lyra2_gpu_hash_32_1(uint32_t threads, uint2 *g_hash) {} +__global__ void lyra2_gpu_hash_32_2(uint32_t threads, uint64_t *g_hash) {} +__global__ void lyra2_gpu_hash_32_3(uint32_t threads, uint2 *g_hash) {} #endif __host__ @@ -523,7 +523,7 @@ void lyra2_cpu_init(int thr_id, uint32_t threads, uint64_t *d_matrix) } __host__ -void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_hash, bool gtx750ti) +void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint64_t *d_hash, bool gtx750ti) { int dev_id = device_map[thr_id % MAX_GPUS]; @@ -544,11 +544,11 @@ void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint6 if (cuda_arch[dev_id] >= 520) { - lyra2_gpu_hash_32_1 <<< grid2, block2 >>> (threads, startNounce, (uint2*)d_hash); + lyra2_gpu_hash_32_1 <<< grid2, block2 >>> (threads, (uint2*)d_hash); - lyra2_gpu_hash_32_2 <<< grid1, block1, 24 * (8 - 0) * sizeof(uint2) * tpb >>> (threads, startNounce, d_hash); + lyra2_gpu_hash_32_2 <<< grid1, block1, 24 * (8 - 0) * sizeof(uint2) * tpb >>> (threads, d_hash); - lyra2_gpu_hash_32_3 <<< grid2, block2 >>> (threads, startNounce, (uint2*)d_hash); + lyra2_gpu_hash_32_3 <<< grid2, block2 >>> (threads, (uint2*)d_hash); } else if (cuda_arch[dev_id] >= 500) { @@ -561,12 +561,12 @@ void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint6 // suitable amount to adjust for 10warp shared_mem = 6144; - lyra2_gpu_hash_32_1_sm5 <<< grid2, block2 >>> (threads, startNounce, (uint2*)d_hash); + lyra2_gpu_hash_32_1_sm5 <<< grid2, block2 >>> (threads, (uint2*)d_hash); - lyra2_gpu_hash_32_2_sm5 <<< grid1, block1, shared_mem >>> (threads, startNounce, (uint2*)d_hash); + lyra2_gpu_hash_32_2_sm5 <<< grid1, block1, shared_mem >>> (threads, (uint2*)d_hash); - lyra2_gpu_hash_32_3_sm5 <<< grid2, block2 >>> (threads, startNounce, (uint2*)d_hash); + lyra2_gpu_hash_32_3_sm5 <<< grid2, block2 >>> (threads, (uint2*)d_hash); } else - lyra2_gpu_hash_32_sm2 <<< grid3, block3 >>> (threads, startNounce, d_hash); + lyra2_gpu_hash_32_sm2 <<< grid3, block3 >>> (threads, d_hash); } diff --git a/lyra2/cuda_lyra2_sm2.cuh b/lyra2/cuda_lyra2_sm2.cuh index 18263b2..da621d0 100644 --- a/lyra2/cuda_lyra2_sm2.cuh +++ b/lyra2/cuda_lyra2_sm2.cuh @@ -131,7 +131,7 @@ void reduceDuplexRowSetup(const int rowIn, const int rowInOut, const int rowOut, } __global__ __launch_bounds__(TPB30, 1) -void lyra2_gpu_hash_32_sm2(uint32_t threads, uint32_t startNounce, uint64_t *g_hash) +void lyra2_gpu_hash_32_sm2(uint32_t threads, uint64_t *g_hash) { uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) @@ -224,5 +224,5 @@ void lyra2_gpu_hash_32_sm2(uint32_t threads, uint32_t startNounce, uint64_t *g_h #else /* if __CUDA_ARCH__ < 200 .. host */ -__global__ void lyra2_gpu_hash_32_sm2(uint32_t threads, uint32_t startNounce, uint64_t *g_hash) {} +__global__ void lyra2_gpu_hash_32_sm2(uint32_t threads, uint64_t *g_hash) {} #endif diff --git a/lyra2/cuda_lyra2_sm5.cuh b/lyra2/cuda_lyra2_sm5.cuh index fc13172..4a3caeb 100644 --- a/lyra2/cuda_lyra2_sm5.cuh +++ b/lyra2/cuda_lyra2_sm5.cuh @@ -589,7 +589,7 @@ void reduceDuplexRowV50_8(const int rowInOut, uint2 state[4], const uint32_t thr } __global__ __launch_bounds__(64, 1) -void lyra2_gpu_hash_32_1_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash) +void lyra2_gpu_hash_32_1_sm5(uint32_t threads, uint2 *g_hash) { const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); @@ -622,7 +622,7 @@ void lyra2_gpu_hash_32_1_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_ha } __global__ __launch_bounds__(TPB50, 1) -void lyra2_gpu_hash_32_2_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash) +void lyra2_gpu_hash_32_2_sm5(uint32_t threads, uint2 *g_hash) { const uint32_t thread = (blockDim.y * blockIdx.x + threadIdx.y); @@ -662,7 +662,7 @@ void lyra2_gpu_hash_32_2_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_ha } __global__ __launch_bounds__(64, 1) -void lyra2_gpu_hash_32_3_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash) +void lyra2_gpu_hash_32_3_sm5(uint32_t threads, uint2 *g_hash) { const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); @@ -687,7 +687,7 @@ void lyra2_gpu_hash_32_3_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_ha #else /* if __CUDA_ARCH__ != 500 .. host */ -__global__ void lyra2_gpu_hash_32_1_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash) {} -__global__ void lyra2_gpu_hash_32_2_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash) {} -__global__ void lyra2_gpu_hash_32_3_sm5(uint32_t threads, uint32_t startNounce, uint2 *g_hash) {} +__global__ void lyra2_gpu_hash_32_1_sm5(uint32_t threads, uint2 *g_hash) {} +__global__ void lyra2_gpu_hash_32_2_sm5(uint32_t threads, uint2 *g_hash) {} +__global__ void lyra2_gpu_hash_32_3_sm5(uint32_t threads, uint2 *g_hash) {} #endif diff --git a/lyra2/lyra2RE.cu b/lyra2/lyra2RE.cu index b3ad49f..b435371 100644 --- a/lyra2/lyra2RE.cu +++ b/lyra2/lyra2RE.cu @@ -26,7 +26,7 @@ extern void skein256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNon extern void skein256_cpu_init(int thr_id, uint32_t threads); extern void lyra2_cpu_init(int thr_id, uint32_t threads, uint64_t *d_matrix); -extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, bool gtx750ti); +extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint64_t *d_outputHash, bool gtx750ti); extern void groestl256_cpu_init(int thr_id, uint32_t threads); extern void groestl256_cpu_free(int thr_id); @@ -130,7 +130,7 @@ extern "C" int scanhash_lyra2(int thr_id, struct work* work, uint32_t max_nonce, //blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); //keccak256_sm3_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); blakeKeccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++); - lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], gtx750ti); + lyra2_cpu_hash_32(thr_id, throughput, d_hash[thr_id], gtx750ti); skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++); *hashes_done = pdata[19] - first_nonce + throughput;