diff --git a/Algo256/blake256.cu b/Algo256/blake256.cu index 2f1d2b9..f1a7291 100644 --- a/Algo256/blake256.cu +++ b/Algo256/blake256.cu @@ -245,7 +245,7 @@ __host__ uint32_t blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNonce, const uint64_t highTarget, const uint32_t crcsum, const int8_t rounds) { - const int threadsperblock = TPB; + const uint32_t threadsperblock = TPB; uint32_t result = UINT32_MAX; dim3 grid((threads + threadsperblock-1)/threadsperblock); @@ -331,7 +331,7 @@ __host__ static uint32_t blake256_cpu_hash_16(const int thr_id, const uint32_t threads, const uint32_t startNonce, const uint64_t highTarget, const int8_t rounds) { - const int threadsperblock = TPB; + const uint32_t threadsperblock = TPB; uint32_t result = UINT32_MAX; dim3 grid((threads + threadsperblock-1)/threadsperblock); diff --git a/Algo256/cuda_blake256.cu b/Algo256/cuda_blake256.cu index f7a7601..8a7ede0 100644 --- a/Algo256/cuda_blake256.cu +++ b/Algo256/cuda_blake256.cu @@ -218,7 +218,7 @@ void blake256_gpu_hash_80(const uint32_t threads, const uint32_t startNonce, uin __host__ void blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNonce, uint64_t *Hash, int order) { - const int threadsperblock = 256; + const uint32_t threadsperblock = 256; dim3 grid((threads + threadsperblock - 1) / threadsperblock); dim3 block(threadsperblock); @@ -243,7 +243,7 @@ void blake256_cpu_setBlock_80(uint32_t *pdata) } __host__ -void blake256_cpu_init(int thr_id, int threads) +void blake256_cpu_init(int thr_id, uint32_t threads) { cudaMemcpyToSymbol(u256, c_u256, sizeof(c_u256), 0, cudaMemcpyHostToDevice); cudaMemcpyToSymbol(sigma, c_sigma, sizeof(c_sigma), 0, cudaMemcpyHostToDevice); diff --git a/Algo256/cuda_fugue256.cu b/Algo256/cuda_fugue256.cu index a2ebaeb..bb72752 100644 --- a/Algo256/cuda_fugue256.cu +++ b/Algo256/cuda_fugue256.cu @@ -548,7 +548,7 @@ __global__ void __launch_bounds__(256) #else __global__ void #endif -fugue256_gpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHash, uint32_t *resNounce) +fugue256_gpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outputHash, uint32_t *resNounce) { #if USE_SHARED extern __shared__ char mixtabs[]; @@ -561,7 +561,7 @@ fugue256_gpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHas __syncthreads(); #endif - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { /* Nimm den State und verarbeite das letztenByte (die Nounce) */ @@ -718,7 +718,7 @@ fugue256_gpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHas cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } -void fugue256_cpu_init(int thr_id, int threads) +void fugue256_cpu_init(int thr_id, uint32_t threads) { cudaSetDevice(device_map[thr_id]); @@ -751,12 +751,12 @@ __host__ void fugue256_cpu_setBlock(int thr_id, void *data, void *pTargetIn) cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t)); } -__host__ void fugue256_cpu_hash(int thr_id, int threads, int startNounce, void *outputHashes, uint32_t *nounce) +__host__ void fugue256_cpu_hash(int thr_id, uint32_t threads, int startNounce, void *outputHashes, uint32_t *nounce) { #if USE_SHARED - const int threadsperblock = 256; // Alignment mit mixtab Grösse. NICHT ÄNDERN + const uint32_t threadsperblock = 256; // Alignment mit mixtab Grösse. NICHT ÄNDERN #else - const int threadsperblock = 512; // so einstellen wie gewünscht ;-) + const uint32_t threadsperblock = 512; // so einstellen wie gewünscht ;-) #endif // berechne wie viele Thread Blocks wir brauchen dim3 grid((threads + threadsperblock-1)/threadsperblock); diff --git a/Algo256/cuda_groestl256.cu b/Algo256/cuda_groestl256.cu index a4c5e47..888d29c 100644 --- a/Algo256/cuda_groestl256.cu +++ b/Algo256/cuda_groestl256.cu @@ -105,7 +105,7 @@ extern uint32_t T3up_cpu[]; extern uint32_t T3dn_cpu[]; __device__ __forceinline__ -void groestl256_perm_P(int thread,uint32_t *a, char *mixtabs) +void groestl256_perm_P(uint32_t thread,uint32_t *a, char *mixtabs) { #pragma unroll 10 for (int r = 0; r<10; r++) @@ -136,7 +136,7 @@ void groestl256_perm_P(int thread,uint32_t *a, char *mixtabs) } __device__ __forceinline__ -void groestl256_perm_Q(int thread, uint32_t *a, char *mixtabs) +void groestl256_perm_Q(uint32_t thread, uint32_t *a, char *mixtabs) { #pragma unroll for (int r = 0; r<10; r++) @@ -175,7 +175,7 @@ void groestl256_perm_Q(int thread, uint32_t *a, char *mixtabs) } __global__ __launch_bounds__(256,1) -void groestl256_gpu_hash32(int threads, uint32_t startNounce, uint64_t *outputHash, uint32_t *nonceVector) +void groestl256_gpu_hash32(uint32_t threads, uint32_t startNounce, uint64_t *outputHash, uint32_t *nonceVector) { #if USE_SHARED extern __shared__ char mixtabs[]; @@ -194,7 +194,7 @@ void groestl256_gpu_hash32(int threads, uint32_t startNounce, uint64_t *outputHa __syncthreads(); #endif - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { // GROESTL @@ -259,7 +259,7 @@ void groestl256_gpu_hash32(int threads, uint32_t startNounce, uint64_t *outputHa cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } \ __host__ -void groestl256_cpu_init(int thr_id, int threads) +void groestl256_cpu_init(int thr_id, uint32_t threads) { // Texturen mit obigem Makro initialisieren @@ -277,11 +277,11 @@ void groestl256_cpu_init(int thr_id, int threads) } __host__ -uint32_t groestl256_cpu_hash_32(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order) +uint32_t groestl256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_outputHash, int order) { uint32_t result = 0xffffffff; cudaMemset(d_GNonce[thr_id], 0xff, sizeof(uint32_t)); - const int threadsperblock = 256; + const uint32_t threadsperblock = 256; // berechne wie viele Thread Blocks wir brauchen dim3 grid((threads + threadsperblock-1)/threadsperblock); diff --git a/Algo256/cuda_keccak256.cu b/Algo256/cuda_keccak256.cu index cf385b4..a7c918e 100644 --- a/Algo256/cuda_keccak256.cu +++ b/Algo256/cuda_keccak256.cu @@ -170,9 +170,9 @@ static void keccak_blockv30(uint64_t *s, const uint64_t *keccak_round_constants) #endif __global__ __launch_bounds__(128,5) -void keccak256_gpu_hash_80(int threads, uint32_t startNounce, void *outputHash, uint32_t *resNounce) +void keccak256_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *outputHash, uint32_t *resNounce) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = startNounce + thread; @@ -210,11 +210,11 @@ void keccak256_gpu_hash_80(int threads, uint32_t startNounce, void *outputHash, } __host__ -uint32_t keccak256_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash, int order) +uint32_t keccak256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_outputHash, int order) { uint32_t result = UINT32_MAX; cudaMemset(d_KNonce[thr_id], 0xff, sizeof(uint32_t)); - const int threadsperblock = 128; + const uint32_t threadsperblock = 128; dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); @@ -232,9 +232,9 @@ uint32_t keccak256_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, ui } __global__ __launch_bounds__(256,3) -void keccak256_gpu_hash_32(int threads, uint32_t startNounce, uint64_t *outputHash) +void keccak256_gpu_hash_32(uint32_t threads, uint32_t startNounce, uint64_t *outputHash) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { #if __CUDA_ARCH__ >= 350 /* tpr: to double check if faster on SM5+ */ @@ -272,9 +272,9 @@ void keccak256_gpu_hash_32(int threads, uint32_t startNounce, uint64_t *outputHa } __host__ -void keccak256_cpu_hash_32(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order) +void keccak256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_outputHash, int order) { - const int threadsperblock = 256; + const uint32_t threadsperblock = 256; dim3 grid((threads + threadsperblock - 1) / threadsperblock); dim3 block(threadsperblock); @@ -293,7 +293,7 @@ void keccak256_setBlock_80(void *pdata,const void *pTargetIn) } __host__ -void keccak256_cpu_init(int thr_id, int threads) +void keccak256_cpu_init(int thr_id, uint32_t threads) { CUDA_SAFE_CALL(cudaMemcpyToSymbol(keccak_round_constants, host_keccak_round_constants, sizeof(host_keccak_round_constants), 0, cudaMemcpyHostToDevice)); diff --git a/Algo256/cuda_skein256.cu b/Algo256/cuda_skein256.cu index 5cc0904..71687dc 100644 --- a/Algo256/cuda_skein256.cu +++ b/Algo256/cuda_skein256.cu @@ -96,9 +96,9 @@ void Round_8_512v35(uint2 *ks, uint2 *ts, __global__ __launch_bounds__(256,3) -void skein256_gpu_hash_32(int threads, uint32_t startNounce, uint64_t *outputHash) +void skein256_gpu_hash_32(uint32_t threads, uint32_t startNounce, uint64_t *outputHash) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint2 h[9]; @@ -214,9 +214,9 @@ void Round_8_512v30(uint64_t *ks, uint64_t *ts, uint64_t &p0, uint64_t &p1, uint } __global__ __launch_bounds__(256, 3) -void skein256_gpu_hash_32_v30(int threads, uint32_t startNounce, uint64_t *outputHash) +void skein256_gpu_hash_32_v30(uint32_t threads, uint32_t startNounce, uint64_t *outputHash) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint64_t h[9]; @@ -293,15 +293,15 @@ void skein256_gpu_hash_32_v30(int threads, uint32_t startNounce, uint64_t *outpu } __host__ -void skein256_cpu_init(int thr_id, int threads) +void skein256_cpu_init(int thr_id, uint32_t threads) { //empty } __host__ -void skein256_cpu_hash_32(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order) +void skein256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_outputHash, int order) { - const int threadsperblock = 256; + const uint32_t threadsperblock = 256; dim3 grid((threads + threadsperblock - 1) / threadsperblock); dim3 block(threadsperblock); diff --git a/Algo256/keccak256.cu b/Algo256/keccak256.cu index 16f65a0..6171ce1 100644 --- a/Algo256/keccak256.cu +++ b/Algo256/keccak256.cu @@ -16,9 +16,9 @@ extern "C" static uint32_t *d_hash[MAX_GPUS]; -extern void keccak256_cpu_init(int thr_id, int threads); +extern void keccak256_cpu_init(int thr_id, uint32_t threads); extern void keccak256_setBlock_80(void *pdata,const void *ptarget); -extern uint32_t keccak256_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern uint32_t keccak256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); // CPU Hash extern "C" void keccak256_hash(void *state, const void *input) @@ -42,7 +42,7 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata, { const uint32_t first_nonce = pdata[19]; uint32_t throughput = device_intensity(thr_id, __func__, 1U << 21); // 256*256*8*4 - throughput = min(throughput, (max_nonce - first_nonce)); + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0005; diff --git a/JHA/cuda_jha_compactionTest.cu b/JHA/cuda_jha_compactionTest.cu index f701e0d..b864a8a 100644 --- a/JHA/cuda_jha_compactionTest.cu +++ b/JHA/cuda_jha_compactionTest.cu @@ -30,7 +30,7 @@ __device__ cuda_compactTestFunction_t d_JackpotTrueFunction = JackpotTrueTest, d cuda_compactTestFunction_t h_JackpotTrueFunction[MAX_GPUS], h_JackpotFalseFunction[MAX_GPUS]; // Setup-Funktionen -__host__ void jackpot_compactTest_cpu_init(int thr_id, int threads) +__host__ void jackpot_compactTest_cpu_init(int thr_id, uint32_t threads) { cudaMemcpyFromSymbol(&h_JackpotTrueFunction[thr_id], d_JackpotTrueFunction, sizeof(cuda_compactTestFunction_t)); cudaMemcpyFromSymbol(&h_JackpotFalseFunction[thr_id], d_JackpotFalseFunction, sizeof(cuda_compactTestFunction_t)); @@ -56,7 +56,7 @@ __host__ void jackpot_compactTest_cpu_init(int thr_id, int threads) #endif // Die Summenfunktion (vom NVIDIA SDK) -__global__ void jackpot_compactTest_gpu_SCAN(uint32_t *data, int width, uint32_t *partial_sums=NULL, cuda_compactTestFunction_t testFunc=NULL, int threads=0, uint32_t startNounce=0, uint32_t *inpHashes=NULL, uint32_t *d_validNonceTable=NULL) +__global__ void jackpot_compactTest_gpu_SCAN(uint32_t *data, int width, uint32_t *partial_sums=NULL, cuda_compactTestFunction_t testFunc=NULL, uint32_t threads=0, uint32_t startNounce=0, uint32_t *inpHashes=NULL, uint32_t *d_validNonceTable=NULL) { extern __shared__ uint32_t sums[]; int id = ((blockIdx.x * blockDim.x) + threadIdx.x); @@ -185,7 +185,7 @@ __global__ void jackpot_compactTest_gpu_ADD(uint32_t *data, uint32_t *partial_su } // Der Scatter -__global__ void jackpot_compactTest_gpu_SCATTER(uint32_t *sum, uint32_t *outp, cuda_compactTestFunction_t testFunc, int threads=0, uint32_t startNounce=0, uint32_t *inpHashes=NULL, uint32_t *d_validNonceTable=NULL) +__global__ void jackpot_compactTest_gpu_SCATTER(uint32_t *sum, uint32_t *outp, cuda_compactTestFunction_t testFunc, uint32_t threads=0, uint32_t startNounce=0, uint32_t *inpHashes=NULL, uint32_t *d_validNonceTable=NULL) { int id = ((blockIdx.x * blockDim.x) + threadIdx.x); uint32_t actNounce = id; @@ -234,7 +234,7 @@ __host__ static uint32_t jackpot_compactTest_roundUpExp(uint32_t val) return mask; } -__host__ void jackpot_compactTest_cpu_singleCompaction(int thr_id, int threads, uint32_t *nrm, +__host__ void jackpot_compactTest_cpu_singleCompaction(int thr_id, uint32_t threads, uint32_t *nrm, uint32_t *d_nonces1, cuda_compactTestFunction_t function, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable) { @@ -290,7 +290,7 @@ __host__ void jackpot_compactTest_cpu_singleCompaction(int thr_id, int threads, } ////// ACHTUNG: Diese funktion geht aktuell nur mit threads > 65536 (Am besten 256 * 1024 oder 256*2048) -__host__ void jackpot_compactTest_cpu_dualCompaction(int thr_id, int threads, uint32_t *nrm, +__host__ void jackpot_compactTest_cpu_dualCompaction(int thr_id, uint32_t threads, uint32_t *nrm, uint32_t *d_nonces1, uint32_t *d_nonces2, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable) { @@ -329,7 +329,7 @@ __host__ void jackpot_compactTest_cpu_dualCompaction(int thr_id, int threads, ui */ } -__host__ void jackpot_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable, +__host__ void jackpot_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable, uint32_t *d_nonces1, size_t *nrm1, uint32_t *d_nonces2, size_t *nrm2, int order) diff --git a/JHA/cuda_jha_keccak512.cu b/JHA/cuda_jha_keccak512.cu index 52ddd1f..d0d3c9b 100644 --- a/JHA/cuda_jha_keccak512.cu +++ b/JHA/cuda_jha_keccak512.cu @@ -100,9 +100,9 @@ keccak_block(uint64_t *s, const uint32_t *in, const uint64_t *keccak_round_const } } -__global__ void jackpot_keccak512_gpu_hash(int threads, uint32_t startNounce, uint64_t *g_hash) +__global__ void jackpot_keccak512_gpu_hash(uint32_t threads, uint32_t startNounce, uint64_t *g_hash) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = startNounce + thread; @@ -145,7 +145,7 @@ __global__ void jackpot_keccak512_gpu_hash(int threads, uint32_t startNounce, ui } // Setup-Funktionen -__host__ void jackpot_keccak512_cpu_init(int thr_id, int threads) +__host__ void jackpot_keccak512_cpu_init(int thr_id, uint32_t threads) { // Kopiere die Hash-Tabellen in den GPU-Speicher cudaMemcpyToSymbol( c_keccak_round_constants, @@ -522,9 +522,9 @@ __host__ void jackpot_keccak512_cpu_setBlock(void *pdata, size_t inlen) 0, cudaMemcpyHostToDevice); } -__host__ void jackpot_keccak512_cpu_hash(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order) +__host__ void jackpot_keccak512_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order) { - const int threadsperblock = 256; + const uint32_t threadsperblock = 256; // berechne wie viele Thread Blocks wir brauchen dim3 grid((threads + threadsperblock-1)/threadsperblock); diff --git a/JHA/jackpotcoin.cu b/JHA/jackpotcoin.cu index 25066fd..9fd1cf3 100644 --- a/JHA/jackpotcoin.cu +++ b/JHA/jackpotcoin.cu @@ -12,29 +12,29 @@ extern "C" static uint32_t *d_hash[MAX_GPUS]; -extern void jackpot_keccak512_cpu_init(int thr_id, int threads); +extern void jackpot_keccak512_cpu_init(int thr_id, uint32_t threads); extern void jackpot_keccak512_cpu_setBlock(void *pdata, size_t inlen); -extern void jackpot_keccak512_cpu_hash(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void jackpot_keccak512_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); -extern void quark_blake512_cpu_init(int thr_id, int threads); -extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_blake512_cpu_init(int thr_id, uint32_t threads); +extern void quark_blake512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_groestl512_cpu_init(int thr_id, int threads); -extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads); +extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_jh512_cpu_init(int thr_id, int threads); -extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_jh512_cpu_init(int thr_id, uint32_t threads); +extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_skein512_cpu_init(int thr_id, int threads); -extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_skein512_cpu_init(int thr_id, uint32_t threads); +extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void jackpot_compactTest_cpu_init(int thr_id, int threads); -extern void jackpot_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable, +extern void jackpot_compactTest_cpu_init(int thr_id, uint32_t threads); +extern void jackpot_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable, uint32_t *d_nonces1, size_t *nrm1, uint32_t *d_nonces2, size_t *nrm2, int order); -extern uint32_t cuda_check_hash_branch(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); +extern uint32_t cuda_check_hash_branch(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); // Speicher zur Generierung der Noncevektoren für die bedingten Hashes static uint32_t *d_jackpotNonces[MAX_GPUS]; @@ -93,8 +93,8 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata, { const uint32_t first_nonce = pdata[19]; - int throughput = (int) device_intensity(thr_id, __func__, 1U << 20); - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = device_intensity(thr_id, __func__, 1U << 20); + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x000f; diff --git a/cuda_checkhash.cu b/cuda_checkhash.cu index 84794a4..85c2c04 100644 --- a/cuda_checkhash.cu +++ b/cuda_checkhash.cu @@ -15,7 +15,7 @@ static uint32_t* h_resNonces[MAX_GPUS]; static uint32_t* d_resNonces[MAX_GPUS]; __host__ -void cuda_check_cpu_init(int thr_id, int threads) +void cuda_check_cpu_init(int thr_id, uint32_t threads) { CUDA_CALL_OR_RET(cudaMallocHost(&h_resNonces[thr_id], 8*sizeof(uint32_t))); CUDA_CALL_OR_RET(cudaMalloc(&d_resNonces[thr_id], 8*sizeof(uint32_t))); @@ -71,9 +71,9 @@ static bool hashbelowtarget(const uint32_t *const __restrict__ hash, const uint3 } __global__ __launch_bounds__(512, 4) -void cuda_checkhash_64(int threads, uint32_t startNounce, uint32_t *hash, uint32_t *resNonces) +void cuda_checkhash_64(uint32_t threads, uint32_t startNounce, uint32_t *hash, uint32_t *resNonces) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { // shl 4 = *16 x 4 (uint32) = 64 bytes @@ -88,11 +88,11 @@ void cuda_checkhash_64(int threads, uint32_t startNounce, uint32_t *hash, uint32 } __host__ -uint32_t cuda_check_hash(int thr_id, int threads, uint32_t startNounce, uint32_t *d_inputHash) +uint32_t cuda_check_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_inputHash) { cudaMemset(d_resNonces[thr_id], 0xff, sizeof(uint32_t)); - const int threadsperblock = 512; + const uint32_t threadsperblock = 512; dim3 grid((threads + threadsperblock - 1) / threadsperblock); dim3 block(threadsperblock); @@ -109,7 +109,7 @@ uint32_t cuda_check_hash(int thr_id, int threads, uint32_t startNounce, uint32_t __global__ __launch_bounds__(512, 4) void cuda_checkhash_64_suppl(uint32_t startNounce, uint32_t *hash, uint32_t *resNonces) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); uint32_t *inpHash = &hash[thread << 4]; @@ -122,11 +122,11 @@ void cuda_checkhash_64_suppl(uint32_t startNounce, uint32_t *hash, uint32_t *res } __host__ -uint32_t cuda_check_hash_suppl(int thr_id, int threads, uint32_t startNounce, uint32_t *d_inputHash, uint8_t numNonce) +uint32_t cuda_check_hash_suppl(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_inputHash, uint8_t numNonce) { uint32_t rescnt, result = 0; - const int threadsperblock = 512; + const uint32_t threadsperblock = 512; dim3 grid((threads + threadsperblock - 1) / threadsperblock); dim3 block(threadsperblock); @@ -152,9 +152,9 @@ uint32_t cuda_check_hash_suppl(int thr_id, int threads, uint32_t startNounce, ui /* --------------------------------------------------------------------------------------------- */ __global__ -void cuda_check_hash_branch_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint32_t *g_hash, uint32_t *resNounce) +void cuda_check_hash_branch_64(uint32_t threads, uint32_t startNounce, uint32_t *g_nonceVector, uint32_t *g_hash, uint32_t *resNounce) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = g_nonceVector[thread]; @@ -175,12 +175,12 @@ void cuda_check_hash_branch_64(int threads, uint32_t startNounce, uint32_t *g_no } __host__ -uint32_t cuda_check_hash_branch(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order) +uint32_t cuda_check_hash_branch(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order) { uint32_t result = 0xffffffff; cudaMemset(d_resNonces[thr_id], 0xff, sizeof(uint32_t)); - const int threadsperblock = 256; + const uint32_t threadsperblock = 256; dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); diff --git a/cuda_fugue256.h b/cuda_fugue256.h index c9fac91..ec9b771 100644 --- a/cuda_fugue256.h +++ b/cuda_fugue256.h @@ -1,8 +1,8 @@ #ifndef _CUDA_FUGUE512_H #define _CUDA_FUGUE512_H -void fugue256_cpu_hash(int thr_id, int threads, int startNounce, void *outputHashes, uint32_t *nounce); +void fugue256_cpu_hash(int thr_id, uint32_t threads, int startNounce, void *outputHashes, uint32_t *nounce); void fugue256_cpu_setBlock(int thr_id, void *data, void *pTargetIn); -void fugue256_cpu_init(int thr_id, int threads); +void fugue256_cpu_init(int thr_id, uint32_t threads); #endif diff --git a/cuda_groestlcoin.cu b/cuda_groestlcoin.cu index 375c329..47548a0 100644 --- a/cuda_groestlcoin.cu +++ b/cuda_groestlcoin.cu @@ -23,11 +23,11 @@ __constant__ uint32_t groestlcoin_gpu_msg[32]; #define SWAB32(x) cuda_swab32(x) __global__ __launch_bounds__(256, 4) -void groestlcoin_gpu_hash_quad(int threads, uint32_t startNounce, uint32_t *resNounce) +void groestlcoin_gpu_hash_quad(uint32_t threads, uint32_t startNounce, uint32_t *resNounce) { #if __CUDA_ARCH__ >= 300 // durch 4 dividieren, weil jeweils 4 Threads zusammen ein Hash berechnen - int thread = (blockDim.x * blockIdx.x + threadIdx.x) / 4; + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x) / 4; if (thread < threads) { // GROESTL @@ -95,7 +95,7 @@ void groestlcoin_gpu_hash_quad(int threads, uint32_t startNounce, uint32_t *resN } // Setup-Funktionen -__host__ void groestlcoin_cpu_init(int thr_id, int threads) +__host__ void groestlcoin_cpu_init(int thr_id, uint32_t threads) { cudaSetDevice(device_map[thr_id]); @@ -130,9 +130,9 @@ __host__ void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn) sizeof(uint32_t) * 8 ); } -__host__ void groestlcoin_cpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce) +__host__ void groestlcoin_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce) { - int threadsperblock = 256; + uint32_t threadsperblock = 256; // Compute 3.0 benutzt die registeroptimierte Quad Variante mit Warp Shuffle // mit den Quad Funktionen brauchen wir jetzt 4 threads pro Hash, daher Faktor 4 bei der Blockzahl diff --git a/cuda_groestlcoin.h b/cuda_groestlcoin.h index 8ad7dab..7b95b59 100644 --- a/cuda_groestlcoin.h +++ b/cuda_groestlcoin.h @@ -1,8 +1,8 @@ #ifndef _CUDA_GROESTLCOIN_H #define _CUDA_GROESTLCOIN_H -void groestlcoin_cpu_init(int thr_id, int threads); +void groestlcoin_cpu_init(int thr_id, uint32_t threads); void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn); -void groestlcoin_cpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce); +void groestlcoin_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce); #endif \ No newline at end of file diff --git a/cuda_helper.h b/cuda_helper.h index 2db4f2b..06349a8 100644 --- a/cuda_helper.h +++ b/cuda_helper.h @@ -21,10 +21,10 @@ extern "C" short device_map[MAX_GPUS]; extern "C" long device_sm[MAX_GPUS]; // common functions -extern void cuda_check_cpu_init(int thr_id, int threads); +extern void cuda_check_cpu_init(int thr_id, uint32_t threads); extern void cuda_check_cpu_setTarget(const void *ptarget); -extern uint32_t cuda_check_hash(int thr_id, int threads, uint32_t startNounce, uint32_t *d_inputHash); -extern uint32_t cuda_check_hash_suppl(int thr_id, int threads, uint32_t startNounce, uint32_t *d_inputHash, uint8_t numNonce); +extern uint32_t cuda_check_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_inputHash); +extern uint32_t cuda_check_hash_suppl(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_inputHash, uint8_t numNonce); extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id); extern void cudaReportHardwareFailure(int thr_id, cudaError_t error, const char* func); extern __device__ __device_builtin__ void __syncthreads(void); diff --git a/cuda_myriadgroestl.cu b/cuda_myriadgroestl.cu index f113744..a376740 100644 --- a/cuda_myriadgroestl.cu +++ b/cuda_myriadgroestl.cu @@ -222,11 +222,11 @@ __device__ void myriadgroestl_gpu_sha256(uint32_t *message) } __global__ void __launch_bounds__(256, 4) - myriadgroestl_gpu_hash_quad(int threads, uint32_t startNounce, uint32_t *hashBuffer) + myriadgroestl_gpu_hash_quad(uint32_t threads, uint32_t startNounce, uint32_t *hashBuffer) { #if __CUDA_ARCH__ >= 300 // durch 4 dividieren, weil jeweils 4 Threads zusammen ein Hash berechnen - int thread = (blockDim.x * blockIdx.x + threadIdx.x) / 4; + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x) / 4; if (thread < threads) { // GROESTL @@ -259,10 +259,10 @@ __global__ void __launch_bounds__(256, 4) } __global__ void - myriadgroestl_gpu_hash_quad2(int threads, uint32_t startNounce, uint32_t *resNounce, uint32_t *hashBuffer) + myriadgroestl_gpu_hash_quad2(uint32_t threads, uint32_t startNounce, uint32_t *resNounce, uint32_t *hashBuffer) { #if __CUDA_ARCH__ >= 300 - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = startNounce + thread; @@ -302,7 +302,7 @@ __global__ void } // Setup-Funktionen -__host__ void myriadgroestl_cpu_init(int thr_id, int threads) +__host__ void myriadgroestl_cpu_init(int thr_id, uint32_t threads) { cudaSetDevice(device_map[thr_id]); @@ -357,9 +357,9 @@ __host__ void myriadgroestl_cpu_setBlock(int thr_id, void *data, void *pTargetIn sizeof(uint32_t) * 8 ); } -__host__ void myriadgroestl_cpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce) +__host__ void myriadgroestl_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce) { - int threadsperblock = 256; + uint32_t threadsperblock = 256; // Compute 3.0 benutzt die registeroptimierte Quad Variante mit Warp Shuffle // mit den Quad Funktionen brauchen wir jetzt 4 threads pro Hash, daher Faktor 4 bei der Blockzahl diff --git a/cuda_nist5.cu b/cuda_nist5.cu index a745a88..4a8154a 100644 --- a/cuda_nist5.cu +++ b/cuda_nist5.cu @@ -13,21 +13,21 @@ extern "C" static uint32_t *d_hash[MAX_GPUS]; -extern void quark_blake512_cpu_init(int thr_id, int threads); +extern void quark_blake512_cpu_init(int thr_id, uint32_t threads); extern void quark_blake512_cpu_setBlock_80(void *pdata); -extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); -extern void quark_groestl512_cpu_init(int thr_id, int threads); -extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads); +extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_jh512_cpu_init(int thr_id, int threads); -extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_jh512_cpu_init(int thr_id, uint32_t threads); +extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_keccak512_cpu_init(int thr_id, int threads); -extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_keccak512_cpu_init(int thr_id, uint32_t threads); +extern void quark_keccak512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_skein512_cpu_init(int thr_id, int threads); -extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_skein512_cpu_init(int thr_id, uint32_t threads); +extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); // Original nist5hash Funktion aus einem miner Quelltext @@ -72,8 +72,8 @@ extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata, { const uint32_t first_nonce = pdata[19]; - int throughput = (int) device_intensity(thr_id, __func__, 1 << 20); // 256*256*16 - throughput = min(throughput, (int) (max_nonce - first_nonce)); + uint32_t throughput = device_intensity(thr_id, __func__, 1 << 20); // 256*256*16 + throughput = min(throughput, (max_nonce - first_nonce)); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x00FF; diff --git a/fuguecoin.cpp b/fuguecoin.cpp index 72ad965..ddc3cda 100644 --- a/fuguecoin.cpp +++ b/fuguecoin.cpp @@ -27,7 +27,7 @@ extern "C" int scanhash_fugue256(int thr_id, uint32_t *pdata, const uint32_t *pt { uint32_t start_nonce = pdata[19]++; int intensity = (device_sm[device_map[thr_id]] > 500) ? 22 : 19; - int throughput = (int) device_intensity(thr_id, __func__, 1 << intensity); // 256*256*8 + uint32_t throughput = device_intensity(thr_id, __func__, 1 << intensity); // 256*256*8 throughput = min(throughput, max_nonce - start_nonce); if (opt_benchmark) diff --git a/heavy/cuda_blake512.cu b/heavy/cuda_blake512.cu index 1a5df4e..ba12f23 100644 --- a/heavy/cuda_blake512.cu +++ b/heavy/cuda_blake512.cu @@ -118,9 +118,9 @@ template __device__ void blake512_compress( uint64_t *h, const u for( i = 0; i < 16; ++i ) h[i % 8] ^= v[i]; } -template __global__ void blake512_gpu_hash(int threads, uint32_t startNounce, void *outputHash, uint32_t *heftyHashes, uint32_t *nonceVector) +template __global__ void blake512_gpu_hash(uint32_t threads, uint32_t startNounce, void *outputHash, uint32_t *heftyHashes, uint32_t *nonceVector) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { // bestimme den aktuellen Zähler @@ -192,7 +192,7 @@ template __global__ void blake512_gpu_hash(int threads, uint32_t // ---------------------------- END CUDA blake512 functions ------------------------------------ // Setup-Funktionen -__host__ void blake512_cpu_init(int thr_id, int threads) +__host__ void blake512_cpu_init(int thr_id, uint32_t threads) { // Kopiere die Hash-Tabellen in den GPU-Speicher cudaMemcpyToSymbol( c_sigma, @@ -238,9 +238,9 @@ __host__ void blake512_cpu_setBlock(void *pdata, int len) BLOCKSIZE = len; } -__host__ void blake512_cpu_hash(int thr_id, int threads, uint32_t startNounce) +__host__ void blake512_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce) { - const int threadsperblock = 256; + const uint32_t threadsperblock = 256; // berechne wie viele Thread Blocks wir brauchen dim3 grid((threads + threadsperblock-1)/threadsperblock); diff --git a/heavy/cuda_combine.cu b/heavy/cuda_combine.cu index c813f24..3365cf1 100644 --- a/heavy/cuda_combine.cu +++ b/heavy/cuda_combine.cu @@ -102,9 +102,9 @@ static void combine_hashes(uint32_t *out, uint32_t *hash1, uint32_t *hash2, uint } __global__ -void combine_gpu_hash(int threads, uint32_t startNounce, uint32_t *out, uint32_t *hash2, uint32_t *hash3, uint32_t *hash4, uint32_t *hash5, uint32_t *nonceVector) +void combine_gpu_hash(uint32_t threads, uint32_t startNounce, uint32_t *out, uint32_t *hash2, uint32_t *hash3, uint32_t *hash4, uint32_t *hash5, uint32_t *nonceVector) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = nonceVector[thread]; @@ -121,18 +121,18 @@ void combine_gpu_hash(int threads, uint32_t startNounce, uint32_t *out, uint32_t } __host__ -void combine_cpu_init(int thr_id, int threads) +void combine_cpu_init(int thr_id, uint32_t threads) { // Speicher für alle Ergebnisse belegen CUDA_SAFE_CALL(cudaMalloc(&d_hashoutput[thr_id], 8 * sizeof(uint32_t) * threads)); } __host__ -void combine_cpu_hash(int thr_id, int threads, uint32_t startNounce, uint32_t *hash) +void combine_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *hash) { // diese Kopien sind optional, da die Hashes jetzt bereits auf der GPU liegen sollten - const int threadsperblock = 128; + const uint32_t threadsperblock = 128; // berechne wie viele Thread Blocks wir brauchen dim3 grid((threads + threadsperblock-1)/threadsperblock); diff --git a/heavy/cuda_groestl512.cu b/heavy/cuda_groestl512.cu index 3c4030b..0e503d3 100644 --- a/heavy/cuda_groestl512.cu +++ b/heavy/cuda_groestl512.cu @@ -669,9 +669,9 @@ __device__ void groestl512_perm_Q(uint32_t *a) } } -template __global__ void groestl512_gpu_hash(int threads, uint32_t startNounce, void *outputHash, uint32_t *heftyHashes, uint32_t *nonceVector) +template __global__ void groestl512_gpu_hash(uint32_t threads, uint32_t startNounce, void *outputHash, uint32_t *heftyHashes, uint32_t *nonceVector) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t message[32]; @@ -741,7 +741,7 @@ template __global__ void groestl512_gpu_hash(int threads, uint32 cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } \ // Setup-Funktionen -__host__ void groestl512_cpu_init(int thr_id, int threads) +__host__ void groestl512_cpu_init(int thr_id, uint32_t threads) { // Texturen mit obigem Makro initialisieren texDef(t0up, d_T0up, T0up_cpu, sizeof(uint32_t)*256); @@ -794,16 +794,16 @@ __host__ void groestl512_cpu_setBlock(void *data, int len) BLOCKSIZE = len; } -__host__ void groestl512_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy) +__host__ void groestl512_cpu_copyHeftyHash(int thr_id, uint32_t threads, void *heftyHashes, int copy) { // Hefty1 Hashes kopieren (eigentlich nur zum debuggen) if (copy) CUDA_SAFE_CALL(cudaMemcpy(heavy_heftyHashes[thr_id], heftyHashes, 8 * sizeof(uint32_t) * threads, cudaMemcpyHostToDevice)); } -__host__ void groestl512_cpu_hash(int thr_id, int threads, uint32_t startNounce) +__host__ void groestl512_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce) { - const int threadsperblock = 128; + const uint32_t threadsperblock = 128; // berechne wie viele Thread Blocks wir brauchen dim3 grid((threads + threadsperblock-1)/threadsperblock); diff --git a/heavy/cuda_hefty1.cu b/heavy/cuda_hefty1.cu index b8f0ca2..d8a65a8 100644 --- a/heavy/cuda_hefty1.cu +++ b/heavy/cuda_hefty1.cu @@ -207,7 +207,7 @@ void hefty_cpu_round(uint32_t *regs, uint32_t W, uint32_t K, uint32_t *sponge) } __global__ -void hefty_gpu_hash(int threads, uint32_t startNounce, uint32_t *outputHash) +void hefty_gpu_hash(uint32_t threads, uint32_t startNounce, uint32_t *outputHash) { #if USE_SHARED extern __shared__ unsigned char heftytab[]; @@ -219,7 +219,7 @@ void hefty_gpu_hash(int threads, uint32_t startNounce, uint32_t *outputHash) __syncthreads(); #endif - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { // bestimme den aktuellen Zähler @@ -306,7 +306,7 @@ void hefty_gpu_hash(int threads, uint32_t startNounce, uint32_t *outputHash) } __host__ -void hefty_cpu_init(int thr_id, int threads) +void hefty_cpu_init(int thr_id, uint32_t threads) { cudaSetDevice(device_map[thr_id]); @@ -320,7 +320,7 @@ void hefty_cpu_init(int thr_id, int threads) } __host__ -void hefty_cpu_setBlock(int thr_id, int threads, void *data, int len) +void hefty_cpu_setBlock(int thr_id, uint32_t threads, void *data, int len) // data muss 80/84-Byte haben! { // Nachricht expandieren und setzen @@ -390,9 +390,9 @@ void hefty_cpu_setBlock(int thr_id, int threads, void *data, int len) } __host__ -void hefty_cpu_hash(int thr_id, int threads, int startNounce) +void hefty_cpu_hash(int thr_id, uint32_t threads, int startNounce) { - int threadsperblock = 256; + uint32_t threadsperblock = 256; // berechne wie viele Thread Blocks wir brauchen dim3 grid((threads + threadsperblock-1)/threadsperblock); diff --git a/heavy/cuda_keccak512.cu b/heavy/cuda_keccak512.cu index c62ce81..7415848 100644 --- a/heavy/cuda_keccak512.cu +++ b/heavy/cuda_keccak512.cu @@ -137,9 +137,9 @@ keccak_block(uint64_t *s, const uint32_t *in, const uint64_t *keccak_round_const } // Die Hash-Funktion -template __global__ void keccak512_gpu_hash(int threads, uint32_t startNounce, void *outputHash, uint32_t *heftyHashes, uint32_t *nonceVector) +template __global__ void keccak512_gpu_hash(uint32_t threads, uint32_t startNounce, void *outputHash, uint32_t *heftyHashes, uint32_t *nonceVector) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { // bestimme den aktuellen Zähler @@ -186,7 +186,7 @@ template __global__ void keccak512_gpu_hash(int threads, uint32_ // ---------------------------- END CUDA keccak512 functions ------------------------------------ __host__ -void keccak512_cpu_init(int thr_id, int threads) +void keccak512_cpu_init(int thr_id, uint32_t threads) { // Kopiere die Hash-Tabellen in den GPU-Speicher cudaMemcpyToSymbol( c_keccak_round_constants, @@ -252,7 +252,7 @@ void keccak512_cpu_setBlock(void *data, int len) } __host__ -void keccak512_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy) +void keccak512_cpu_copyHeftyHash(int thr_id, uint32_t threads, void *heftyHashes, int copy) { // Hefty1 Hashes kopieren if (copy) @@ -261,9 +261,9 @@ void keccak512_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int } __host__ -void keccak512_cpu_hash(int thr_id, int threads, uint32_t startNounce) +void keccak512_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce) { - const int threadsperblock = 128; + const uint32_t threadsperblock = 128; // berechne wie viele Thread Blocks wir brauchen dim3 grid((threads + threadsperblock-1)/threadsperblock); diff --git a/heavy/cuda_sha256.cu b/heavy/cuda_sha256.cu index 68e5cbf..d0305da 100644 --- a/heavy/cuda_sha256.cu +++ b/heavy/cuda_sha256.cu @@ -42,9 +42,9 @@ uint32_t sha256_cpu_constantTable[] = { #define SWAB32(x) ( ((x & 0x000000FF) << 24) | ((x & 0x0000FF00) << 8) | ((x & 0x00FF0000) >> 8) | ((x & 0xFF000000) >> 24) ) // Die Hash-Funktion -template __global__ void sha256_gpu_hash(int threads, uint32_t startNounce, void *outputHash, uint32_t *heftyHashes, uint32_t *nonceVector) +template __global__ void sha256_gpu_hash(uint32_t threads, uint32_t startNounce, void *outputHash, uint32_t *heftyHashes, uint32_t *nonceVector) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { // bestimme den aktuellen Zähler @@ -161,7 +161,7 @@ template __global__ void sha256_gpu_hash(int threads, uint32_t s } // Setup-Funktionen -__host__ void sha256_cpu_init(int thr_id, int threads) +__host__ void sha256_cpu_init(int thr_id, uint32_t threads) { // Kopiere die Hash-Tabellen in den GPU-Speicher cudaMemcpyToSymbol( sha256_gpu_constantTable, @@ -248,7 +248,7 @@ __host__ void sha256_cpu_setBlock(void *data, int len) BLOCKSIZE = len; } -__host__ void sha256_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy) +__host__ void sha256_cpu_copyHeftyHash(int thr_id, uint32_t threads, void *heftyHashes, int copy) { // Hefty1 Hashes kopieren if (copy) @@ -256,9 +256,9 @@ __host__ void sha256_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashe //else cudaThreadSynchronize(); } -__host__ void sha256_cpu_hash(int thr_id, int threads, int startNounce) +__host__ void sha256_cpu_hash(int thr_id, uint32_t threads, int startNounce) { - const int threadsperblock = 256; + const uint32_t threadsperblock = 256; // berechne wie viele Thread Blocks wir brauchen dim3 grid((threads + threadsperblock-1)/threadsperblock); diff --git a/heavy/heavy.cu b/heavy/heavy.cu index b70b7fd..03f535d 100644 --- a/heavy/heavy.cu +++ b/heavy/heavy.cu @@ -136,8 +136,8 @@ int scanhash_heavy(int thr_id, uint32_t *pdata, { const uint32_t first_nonce = pdata[19]; // CUDA will process thousands of threads. - int throughput = (int) device_intensity(thr_id, __func__, (1U << 19) - 256); - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = device_intensity(thr_id, __func__, (1U << 19) - 256); + throughput = min(throughput, max_nonce - first_nonce); int rc = 0; uint32_t *hash = NULL; diff --git a/heavy/heavy.h b/heavy/heavy.h index de84344..59f3913 100644 --- a/heavy/heavy.h +++ b/heavy/heavy.h @@ -1,30 +1,30 @@ #ifndef _CUDA_HEAVY_H #define _CUDA_HEAVY_H -void blake512_cpu_init(int thr_id, int threads); +void blake512_cpu_init(int thr_id, uint32_t threads); void blake512_cpu_setBlock(void *pdata, int len); -void blake512_cpu_hash(int thr_id, int threads, uint32_t startNounce); +void blake512_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce); -void groestl512_cpu_init(int thr_id, int threads); -void groestl512_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy); +void groestl512_cpu_init(int thr_id, uint32_t threads); +void groestl512_cpu_copyHeftyHash(int thr_id, uint32_t threads, void *heftyHashes, int copy); void groestl512_cpu_setBlock(void *data, int len); -void groestl512_cpu_hash(int thr_id, int threads, uint32_t startNounce); +void groestl512_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce); -void hefty_cpu_hash(int thr_id, int threads, int startNounce); -void hefty_cpu_setBlock(int thr_id, int threads, void *data, int len); -void hefty_cpu_init(int thr_id, int threads); +void hefty_cpu_hash(int thr_id, uint32_t threads, int startNounce); +void hefty_cpu_setBlock(int thr_id, uint32_t threads, void *data, int len); +void hefty_cpu_init(int thr_id, uint32_t threads); -void keccak512_cpu_init(int thr_id, int threads); +void keccak512_cpu_init(int thr_id, uint32_t threads); void keccak512_cpu_setBlock(void *data, int len); -void keccak512_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy); -void keccak512_cpu_hash(int thr_id, int threads, uint32_t startNounce); +void keccak512_cpu_copyHeftyHash(int thr_id, uint32_t threads, void *heftyHashes, int copy); +void keccak512_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce); -void sha256_cpu_init(int thr_id, int threads); +void sha256_cpu_init(int thr_id, uint32_t threads); void sha256_cpu_setBlock(void *data, int len); -void sha256_cpu_hash(int thr_id, int threads, int startNounce); -void sha256_cpu_copyHeftyHash(int thr_id, int threads, void *heftyHashes, int copy); +void sha256_cpu_hash(int thr_id, uint32_t threads, int startNounce); +void sha256_cpu_copyHeftyHash(int thr_id, uint32_t threads, void *heftyHashes, int copy); -void combine_cpu_init(int thr_id, int threads); -void combine_cpu_hash(int thr_id, int threads, uint32_t startNounce, uint32_t *hash); +void combine_cpu_init(int thr_id, uint32_t threads); +void combine_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *hash); #endif diff --git a/lyra2/cuda_lyra2.cu b/lyra2/cuda_lyra2.cu index 7ebc542..9f29c44 100644 --- a/lyra2/cuda_lyra2.cu +++ b/lyra2/cuda_lyra2.cu @@ -117,9 +117,9 @@ void reduceDuplexRowSetup(const int rowIn, const int rowInOut, const int rowOut, } __global__ __launch_bounds__(TPB, 1) -void lyra2_gpu_hash_32(int threads, uint32_t startNounce, uint64_t *outputHash) +void lyra2_gpu_hash_32(uint32_t threads, uint32_t startNounce, uint64_t *outputHash) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint2 state[16]; @@ -209,9 +209,9 @@ void lyra2_gpu_hash_32(int threads, uint32_t startNounce, uint64_t *outputHash) } __host__ -void lyra2_cpu_hash_32(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order) +void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_outputHash, int order) { - const int threadsperblock = TPB; + const uint32_t threadsperblock = TPB; dim3 grid((threads + threadsperblock - 1) / threadsperblock); dim3 block(threadsperblock); diff --git a/lyra2/lyra2RE.cu b/lyra2/lyra2RE.cu index 12f9d26..8c7c472 100644 --- a/lyra2/lyra2RE.cu +++ b/lyra2/lyra2RE.cu @@ -11,19 +11,19 @@ extern "C" { static _ALIGN(64) uint64_t *d_hash[MAX_GPUS]; -extern void blake256_cpu_init(int thr_id, int threads); +extern void blake256_cpu_init(int thr_id, uint32_t threads); extern void blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNonce, uint64_t *Hash, int order); extern void blake256_cpu_setBlock_80(uint32_t *pdata); -extern void keccak256_cpu_hash_32(int thr_id, int threads, uint32_t startNonce, uint64_t *d_outputHash, int order); -extern void keccak256_cpu_init(int thr_id, int threads); -extern void skein256_cpu_hash_32(int thr_id, int threads, uint32_t startNonce, uint64_t *d_outputHash, int order); -extern void skein256_cpu_init(int thr_id, int threads); +extern void keccak256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, int order); +extern void keccak256_cpu_init(int thr_id, uint32_t threads); +extern void skein256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, int order); +extern void skein256_cpu_init(int thr_id, uint32_t threads); -extern void lyra2_cpu_hash_32(int thr_id, int threads, uint32_t startNonce, uint64_t *d_outputHash, int order); +extern void lyra2_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, int order); extern void groestl256_setTarget(const void *ptarget); -extern uint32_t groestl256_cpu_hash_32(int thr_id, int threads, uint32_t startNounce, uint64_t *d_outputHash, int order); -extern void groestl256_cpu_init(int thr_id, int threads); +extern uint32_t groestl256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_outputHash, int order); +extern void groestl256_cpu_init(int thr_id, uint32_t threads); extern "C" void lyra2_hash(void *state, const void *input) { @@ -63,8 +63,8 @@ extern "C" int scanhash_lyra2(int thr_id, uint32_t *pdata, { const uint32_t first_nonce = pdata[19]; int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 18 : 17; - int throughput = (int) device_intensity(thr_id, __func__, 1U << intensity); // 18=256*256*4; - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = device_intensity(thr_id, __func__, 1U << intensity); // 18=256*256*4; + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000ff; diff --git a/myriadgroestl.cpp b/myriadgroestl.cpp index c6db307..c91be8b 100644 --- a/myriadgroestl.cpp +++ b/myriadgroestl.cpp @@ -7,9 +7,9 @@ #include "miner.h" -void myriadgroestl_cpu_init(int thr_id, int threads); +void myriadgroestl_cpu_init(int thr_id, uint32_t threads); void myriadgroestl_cpu_setBlock(int thr_id, void *data, void *pTargetIn); -void myriadgroestl_cpu_hash(int thr_id, int threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce); +void myriadgroestl_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce); #define SWAP32(x) \ ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | \ diff --git a/pentablake.cu b/pentablake.cu index 0e9307d..d150a91 100644 --- a/pentablake.cu +++ b/pentablake.cu @@ -173,9 +173,9 @@ void pentablake_compress(uint64_t *h, const uint64_t *block, const uint64_t T0) } __global__ -void pentablake_gpu_hash_80(int threads, const uint32_t startNounce, void *outputHash) +void pentablake_gpu_hash_80(uint32_t threads, const uint32_t startNounce, void *outputHash) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint64_t h[8]; @@ -213,9 +213,9 @@ void pentablake_gpu_hash_80(int threads, const uint32_t startNounce, void *outpu } __host__ -void pentablake_cpu_hash_80(int thr_id, int threads, const uint32_t startNounce, uint32_t *d_outputHash, int order) +void pentablake_cpu_hash_80(int thr_id, uint32_t threads, const uint32_t startNounce, uint32_t *d_outputHash, int order) { - const int threadsperblock = TPB; + const uint32_t threadsperblock = TPB; dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); @@ -228,9 +228,9 @@ void pentablake_cpu_hash_80(int thr_id, int threads, const uint32_t startNounce, __global__ -void pentablake_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash) +void pentablake_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *g_hash) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { @@ -271,9 +271,9 @@ void pentablake_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash) } __host__ -void pentablake_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash, int order) +void pentablake_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_outputHash, int order) { - const int threadsperblock = TPB; + const uint32_t threadsperblock = TPB; dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); @@ -321,7 +321,7 @@ void pentablake_gpu_check_hash(uint32_t threads, uint32_t startNounce, uint32_t __host__ static uint32_t pentablake_check_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_inputHash, int order) { - const int threadsperblock = TPB; + const uint32_t threadsperblock = TPB; uint32_t result = UINT32_MAX; dim3 grid((threads + threadsperblock-1)/threadsperblock); @@ -370,8 +370,8 @@ extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t * const uint32_t first_nonce = pdata[19]; uint32_t endiandata[20]; int rc = 0; - int throughput = (int) device_intensity(thr_id, __func__, 128U * 2560); // 18.5 - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = device_intensity(thr_id, __func__, 128U * 2560); // 18.5 + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x000F; diff --git a/quark/animecoin.cu b/quark/animecoin.cu index 0839f81..68323a3 100644 --- a/quark/animecoin.cu +++ b/quark/animecoin.cu @@ -17,37 +17,37 @@ static uint32_t *d_branch1Nonces[MAX_GPUS]; static uint32_t *d_branch2Nonces[MAX_GPUS]; static uint32_t *d_branch3Nonces[MAX_GPUS]; -extern void quark_blake512_cpu_init(int thr_id, int threads); -extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_blake512_cpu_init(int thr_id, uint32_t threads); +extern void quark_blake512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_bmw512_cpu_init(int thr_id, int threads); +extern void quark_bmw512_cpu_init(int thr_id, uint32_t threads); extern void quark_bmw512_cpu_setBlock_80(void *pdata); -extern void quark_bmw512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash, int order); -extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_outputHash, int order); +extern void quark_bmw512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_outputHash, int order); +extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_outputHash, int order); -extern void quark_groestl512_cpu_init(int thr_id, int threads); -extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads); +extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_doublegroestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_skein512_cpu_init(int thr_id, int threads); -extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_skein512_cpu_init(int thr_id, uint32_t threads); +extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_keccak512_cpu_init(int thr_id, int threads); -extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_keccak512_cpu_init(int thr_id, uint32_t threads); +extern void quark_keccak512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_jh512_cpu_init(int thr_id, int threads); -extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_jh512_cpu_init(int thr_id, uint32_t threads); +extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_compactTest_cpu_init(int thr_id, int threads); -extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable, +extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads); +extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable, uint32_t *d_nonces1, size_t *nrm1, uint32_t *d_nonces2, size_t *nrm2, int order); -extern void quark_compactTest_single_false_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable, +extern void quark_compactTest_single_false_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable, uint32_t *d_nonces1, size_t *nrm1, int order); -extern uint32_t cuda_check_hash_branch(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); +extern uint32_t cuda_check_hash_branch(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); /* CPU Hash */ extern "C" void animehash(void *state, const void *input) @@ -166,8 +166,8 @@ extern "C" int scanhash_anime(int thr_id, uint32_t *pdata, unsigned long *hashes_done) { const uint32_t first_nonce = pdata[19]; - int throughput = (int) device_intensity(thr_id, __func__, 1 << 19); // 256*256*8 - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); // 256*256*8 + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x00000f; diff --git a/quark/cuda_bmw512.cu b/quark/cuda_bmw512.cu index bc5e69d..42eb0c4 100644 --- a/quark/cuda_bmw512.cu +++ b/quark/cuda_bmw512.cu @@ -436,7 +436,7 @@ void quark_bmw512_gpu_hash_80(uint32_t threads, uint32_t startNounce, uint64_t * } __host__ -void quark_bmw512_cpu_init(int thr_id, int threads) +void quark_bmw512_cpu_init(int thr_id, uint32_t threads) { } @@ -453,7 +453,7 @@ void quark_bmw512_cpu_setBlock_80(void *pdata) } __host__ -void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { const uint32_t threadsperblock = 32; @@ -465,7 +465,7 @@ void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uin } __host__ -void quark_bmw512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order) +void quark_bmw512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order) { const uint32_t threadsperblock = 128; diff --git a/quark/cuda_jh512.cu b/quark/cuda_jh512.cu index 8f62810..7e82e83 100644 --- a/quark/cuda_jh512.cu +++ b/quark/cuda_jh512.cu @@ -331,9 +331,9 @@ __device__ __forceinline__ void JHHash(const uint32_t *data, uint32_t *hashval) // Die Hash-Funktion __global__ __launch_bounds__(256, 3) -void quark_jh512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) +void quark_jh512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); @@ -347,7 +347,7 @@ void quark_jh512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash // Setup-Funktionen -__host__ void quark_jh512_cpu_init(int thr_id, int threads) +__host__ void quark_jh512_cpu_init(int thr_id, uint32_t threads) { cudaMemcpyToSymbol( c_E8_bitslice_roundconstant, @@ -361,9 +361,9 @@ __host__ void quark_jh512_cpu_init(int thr_id, int threads) 0, cudaMemcpyHostToDevice); } -__host__ void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +__host__ void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { - const int threadsperblock = 256; + const uint32_t threadsperblock = 256; // berechne wie viele Thread Blocks wir brauchen dim3 grid((threads + threadsperblock-1)/threadsperblock); diff --git a/quark/cuda_quark_blake512.cu b/quark/cuda_quark_blake512.cu index 64bd15c..54689c3 100644 --- a/quark/cuda_quark_blake512.cu +++ b/quark/cuda_quark_blake512.cu @@ -122,9 +122,9 @@ static const uint64_t d_constHashPadding[8] = { }; __global__ __launch_bounds__(256, 4) -void quark_blake512_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint64_t *g_hash) +void quark_blake512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t *g_nonceVector, uint64_t *g_hash) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); #if USE_SHUFFLE const int warpID = threadIdx.x & 0x0F; // 16 warps @@ -186,9 +186,9 @@ void quark_blake512_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_n } } -__global__ void quark_blake512_gpu_hash_80(int threads, uint32_t startNounce, void *outputHash) +__global__ void quark_blake512_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *outputHash) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint64_t buf[16]; @@ -236,7 +236,7 @@ __global__ void quark_blake512_gpu_hash_80(int threads, uint32_t startNounce, vo // ---------------------------- END CUDA quark_blake512 functions ------------------------------------ // Setup-Funktionen -__host__ void quark_blake512_cpu_init(int thr_id, int threads) +__host__ void quark_blake512_cpu_init(int thr_id, uint32_t threads) { // Kopiere die Hash-Tabellen in den GPU-Speicher CUDA_CALL_OR_RET( cudaMemcpyToSymbol(c_sigma, @@ -263,9 +263,9 @@ __host__ void quark_blake512_cpu_setBlock_80(void *pdata) ); } -__host__ void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_outputHash, int order) +__host__ void quark_blake512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_outputHash, int order) { - const int threadsperblock = 256; + const uint32_t threadsperblock = 256; // berechne wie viele Thread Blocks wir brauchen dim3 grid((threads + threadsperblock-1)/threadsperblock); @@ -280,9 +280,9 @@ __host__ void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t start MyStreamSynchronize(NULL, order, thr_id); } -__host__ void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash, int order) +__host__ void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_outputHash, int order) { - const int threadsperblock = 256; + const uint32_t threadsperblock = 256; // berechne wie viele Thread Blocks wir brauchen dim3 grid((threads + threadsperblock-1)/threadsperblock); diff --git a/quark/cuda_quark_compactionTest.cu b/quark/cuda_quark_compactionTest.cu index abffbab..183e08a 100644 --- a/quark/cuda_quark_compactionTest.cu +++ b/quark/cuda_quark_compactionTest.cu @@ -28,7 +28,7 @@ __device__ cuda_compactTestFunction_t d_QuarkTrueFunction = QuarkTrueTest, d_Qua cuda_compactTestFunction_t h_QuarkTrueFunction[MAX_GPUS], h_QuarkFalseFunction[MAX_GPUS]; // Setup-Funktionen -__host__ void quark_compactTest_cpu_init(int thr_id, int threads) +__host__ void quark_compactTest_cpu_init(int thr_id, uint32_t threads) { cudaMemcpyFromSymbol(&h_QuarkTrueFunction[thr_id], d_QuarkTrueFunction, sizeof(cuda_compactTestFunction_t)); cudaMemcpyFromSymbol(&h_QuarkFalseFunction[thr_id], d_QuarkFalseFunction, sizeof(cuda_compactTestFunction_t)); @@ -54,7 +54,7 @@ __host__ void quark_compactTest_cpu_init(int thr_id, int threads) #endif // Die Summenfunktion (vom NVIDIA SDK) -__global__ void quark_compactTest_gpu_SCAN(uint32_t *data, int width, uint32_t *partial_sums=NULL, cuda_compactTestFunction_t testFunc=NULL, int threads=0, uint32_t startNounce=0, uint32_t *inpHashes=NULL, uint32_t *d_validNonceTable=NULL) +__global__ void quark_compactTest_gpu_SCAN(uint32_t *data, int width, uint32_t *partial_sums=NULL, cuda_compactTestFunction_t testFunc=NULL, uint32_t threads=0, uint32_t startNounce=0, uint32_t *inpHashes=NULL, uint32_t *d_validNonceTable=NULL) { extern __shared__ uint32_t sums[]; int id = ((blockIdx.x * blockDim.x) + threadIdx.x); @@ -183,7 +183,7 @@ __global__ void quark_compactTest_gpu_ADD(uint32_t *data, uint32_t *partial_sums } // Der Scatter -__global__ void quark_compactTest_gpu_SCATTER(uint32_t *sum, uint32_t *outp, cuda_compactTestFunction_t testFunc, int threads=0, uint32_t startNounce=0, uint32_t *inpHashes=NULL, uint32_t *d_validNonceTable=NULL) +__global__ void quark_compactTest_gpu_SCATTER(uint32_t *sum, uint32_t *outp, cuda_compactTestFunction_t testFunc, uint32_t threads=0, uint32_t startNounce=0, uint32_t *inpHashes=NULL, uint32_t *d_validNonceTable=NULL) { int id = ((blockIdx.x * blockDim.x) + threadIdx.x); uint32_t actNounce = id; @@ -232,7 +232,7 @@ __host__ static uint32_t quark_compactTest_roundUpExp(uint32_t val) return mask; } -__host__ void quark_compactTest_cpu_singleCompaction(int thr_id, int threads, uint32_t *nrm, +__host__ void quark_compactTest_cpu_singleCompaction(int thr_id, uint32_t threads, uint32_t *nrm, uint32_t *d_nonces1, cuda_compactTestFunction_t function, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable) { @@ -288,7 +288,7 @@ __host__ void quark_compactTest_cpu_singleCompaction(int thr_id, int threads, ui } ////// ACHTUNG: Diese funktion geht aktuell nur mit threads > 65536 (Am besten 256 * 1024 oder 256*2048) -__host__ void quark_compactTest_cpu_dualCompaction(int thr_id, int threads, uint32_t *nrm, +__host__ void quark_compactTest_cpu_dualCompaction(int thr_id, uint32_t threads, uint32_t *nrm, uint32_t *d_nonces1, uint32_t *d_nonces2, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable) { @@ -327,7 +327,7 @@ __host__ void quark_compactTest_cpu_dualCompaction(int thr_id, int threads, uint */ } -__host__ void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable, +__host__ void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable, uint32_t *d_nonces1, size_t *nrm1, uint32_t *d_nonces2, size_t *nrm2, int order) @@ -344,7 +344,7 @@ __host__ void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t st *nrm2 = (size_t)h_numValid[thr_id][1]; } -__host__ void quark_compactTest_single_false_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable, +__host__ void quark_compactTest_single_false_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable, uint32_t *d_nonces1, size_t *nrm1, int order) { diff --git a/quark/cuda_quark_groestl512.cu b/quark/cuda_quark_groestl512.cu index e698be1..6381af7 100644 --- a/quark/cuda_quark_groestl512.cu +++ b/quark/cuda_quark_groestl512.cu @@ -16,11 +16,11 @@ #include "quark/cuda_quark_groestl512_sm20.cu" __global__ __launch_bounds__(TPB, THF) -void quark_groestl512_gpu_hash_64_quad(int threads, uint32_t startNounce, uint32_t * __restrict g_hash, uint32_t * __restrict g_nonceVector) +void quark_groestl512_gpu_hash_64_quad(uint32_t threads, uint32_t startNounce, uint32_t * __restrict g_hash, uint32_t * __restrict g_nonceVector) { #if __CUDA_ARCH__ >= 300 // durch 4 dividieren, weil jeweils 4 Threads zusammen ein Hash berechnen - int thread = (blockDim.x * blockIdx.x + threadIdx.x) >> 2; + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x) >> 2; if (thread < threads) { // GROESTL @@ -62,10 +62,10 @@ void quark_groestl512_gpu_hash_64_quad(int threads, uint32_t startNounce, uint32 } __global__ void __launch_bounds__(TPB, THF) - quark_doublegroestl512_gpu_hash_64_quad(int threads, uint32_t startNounce, uint32_t *g_hash, uint32_t *g_nonceVector) + quark_doublegroestl512_gpu_hash_64_quad(uint32_t threads, uint32_t startNounce, uint32_t *g_hash, uint32_t *g_nonceVector) { #if __CUDA_ARCH__ >= 300 - int thread = (blockDim.x * blockIdx.x + threadIdx.x)>>2; + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x)>>2; if (thread < threads) { // GROESTL @@ -124,13 +124,13 @@ __global__ void __launch_bounds__(TPB, THF) -__host__ void quark_groestl512_cpu_init(int thr_id, int threads) +__host__ void quark_groestl512_cpu_init(int thr_id, uint32_t threads) { if (device_sm[device_map[thr_id]] < 300) quark_groestl512_sm20_init(thr_id, threads); } -__host__ void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +__host__ void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { int threadsperblock = TPB; @@ -154,7 +154,7 @@ __host__ void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t sta MyStreamSynchronize(NULL, order, thr_id); } -__host__ void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +__host__ void quark_doublegroestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { const int factor = THF; int threadsperblock = TPB; diff --git a/quark/cuda_quark_groestl512_sm20.cu b/quark/cuda_quark_groestl512_sm20.cu index b754fcb..bf3c750 100644 --- a/quark/cuda_quark_groestl512_sm20.cu +++ b/quark/cuda_quark_groestl512_sm20.cu @@ -202,7 +202,7 @@ void quark_groestl512_perm_Q(uint32_t *a, char *mixtabs) #endif __global__ -void quark_groestl512_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_hash, uint32_t *g_nonceVector) +void quark_groestl512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32_t *g_hash, uint32_t *g_nonceVector) { #if __CUDA_ARCH__ < 300 extern __shared__ char mixtabs[]; @@ -221,7 +221,7 @@ void quark_groestl512_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g __syncthreads(); - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { // GROESTL @@ -285,7 +285,7 @@ void quark_groestl512_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } \ __host__ -void quark_groestl512_sm20_init(int thr_id, int threads) +void quark_groestl512_sm20_init(int thr_id, uint32_t threads) { // Texturen mit obigem Makro initialisieren texDef(t0up1, d_T0up, T0up_cpu, sizeof(uint32_t)*256); @@ -299,7 +299,7 @@ void quark_groestl512_sm20_init(int thr_id, int threads) } __host__ -void quark_groestl512_sm20_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +void quark_groestl512_sm20_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { int threadsperblock = 512; @@ -314,7 +314,7 @@ void quark_groestl512_sm20_hash_64(int thr_id, int threads, uint32_t startNounce } __host__ -void quark_doublegroestl512_sm20_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +void quark_doublegroestl512_sm20_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { int threadsperblock = 512; diff --git a/quark/cuda_quark_keccak512.cu b/quark/cuda_quark_keccak512.cu index 7231051..5d7dcd7 100644 --- a/quark/cuda_quark_keccak512.cu +++ b/quark/cuda_quark_keccak512.cu @@ -94,9 +94,9 @@ static void keccak_block(uint2 *s) } __global__ -void quark_keccak512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) +void quark_keccak512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); @@ -193,9 +193,9 @@ static void keccak_block_v30(uint64_t *s, const uint32_t *in) } __global__ -void quark_keccak512_gpu_hash_64_v30(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) +void quark_keccak512_gpu_hash_64_v30(uint32_t threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); @@ -232,7 +232,7 @@ void quark_keccak512_gpu_hash_64_v30(int threads, uint32_t startNounce, uint64_t } __host__ -void quark_keccak512_cpu_init(int thr_id, int threads) +void quark_keccak512_cpu_init(int thr_id, uint32_t threads) { cudaMemcpyToSymbol( d_keccak_round_constants, host_keccak_round_constants, @@ -241,9 +241,9 @@ void quark_keccak512_cpu_init(int thr_id, int threads) } __host__ -void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +void quark_keccak512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { - const int threadsperblock = 256; + const uint32_t threadsperblock = 256; dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); diff --git a/quark/cuda_skein512.cu b/quark/cuda_skein512.cu index 36fa3b7..dfa1c0d 100644 --- a/quark/cuda_skein512.cu +++ b/quark/cuda_skein512.cu @@ -350,9 +350,9 @@ uint64_t skein_rotl64(const uint64_t x, const int offset) __global__ -void quark_skein512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t * const __restrict__ g_hash, uint32_t *g_nonceVector) +void quark_skein512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t * const __restrict__ g_hash, uint32_t *g_nonceVector) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { // Skein @@ -450,9 +450,9 @@ void quark_skein512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t * co } __global__ -void quark_skein512_gpu_hash_64_v30(int threads, uint32_t startNounce, uint64_t * const __restrict__ g_hash, uint32_t *g_nonceVector) +void quark_skein512_gpu_hash_64_v30(uint32_t threads, uint32_t startNounce, uint64_t * const __restrict__ g_hash, uint32_t *g_nonceVector) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { // Skein @@ -550,14 +550,14 @@ void quark_skein512_gpu_hash_64_v30(int threads, uint32_t startNounce, uint64_t } __host__ -void quark_skein512_cpu_init(int thr_id, int threads) +void quark_skein512_cpu_init(int thr_id, uint32_t threads) { } __host__ -void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { - const int threadsperblock = 256; + const uint32_t threadsperblock = 256; // berechne wie viele Thread Blocks wir brauchen dim3 grid((threads + threadsperblock-1)/threadsperblock); diff --git a/quark/quarkcoin.cu b/quark/quarkcoin.cu index 0f3049f..d8bf02a 100644 --- a/quark/quarkcoin.cu +++ b/quark/quarkcoin.cu @@ -19,37 +19,37 @@ static uint32_t *d_branch1Nonces[MAX_GPUS]; static uint32_t *d_branch2Nonces[MAX_GPUS]; static uint32_t *d_branch3Nonces[MAX_GPUS]; -extern void quark_blake512_cpu_init(int thr_id, int threads); +extern void quark_blake512_cpu_init(int thr_id, uint32_t threads); extern void quark_blake512_cpu_setBlock_80(void *pdata); -extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); -extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void quark_blake512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_bmw512_cpu_init(int thr_id, int threads); -extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_bmw512_cpu_init(int thr_id, uint32_t threads); +extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_groestl512_cpu_init(int thr_id, int threads); -extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads); +extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_doublegroestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_skein512_cpu_init(int thr_id, int threads); -extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_skein512_cpu_init(int thr_id, uint32_t threads); +extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_keccak512_cpu_init(int thr_id, int threads); -extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_keccak512_cpu_init(int thr_id, uint32_t threads); +extern void quark_keccak512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_jh512_cpu_init(int thr_id, int threads); -extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_jh512_cpu_init(int thr_id, uint32_t threads); +extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_compactTest_cpu_init(int thr_id, int threads); -extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable, +extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads); +extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable, uint32_t *d_nonces1, size_t *nrm1, uint32_t *d_nonces2, size_t *nrm2, int order); -extern void quark_compactTest_single_false_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable, +extern void quark_compactTest_single_false_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable, uint32_t *d_nonces1, size_t *nrm1, int order); -extern uint32_t cuda_check_hash_branch(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); +extern uint32_t cuda_check_hash_branch(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); // Original Quarkhash Funktion aus einem miner Quelltext extern "C" void quarkhash(void *state, const void *input) @@ -137,8 +137,8 @@ extern "C" int scanhash_quark(int thr_id, uint32_t *pdata, { const uint32_t first_nonce = pdata[19]; - int throughput = (int) device_intensity(thr_id, __func__, 1 << 20); // 256*4096 - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = device_intensity(thr_id, __func__, 1 << 20); // 256*4096 + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x00F; diff --git a/qubit/deep.cu b/qubit/deep.cu index da81523..0523f12 100644 --- a/qubit/deep.cu +++ b/qubit/deep.cu @@ -16,17 +16,17 @@ extern "C" { static uint32_t *d_hash[MAX_GPUS]; -extern void qubit_luffa512_cpu_init(int thr_id, int threads); +extern void qubit_luffa512_cpu_init(int thr_id, uint32_t threads); extern void qubit_luffa512_cpu_setBlock_80(void *pdata); -extern void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void qubit_luffa512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); extern void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget); -extern uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); -extern void x11_cubehash512_cpu_init(int thr_id, int threads); -extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_cubehash512_cpu_init(int thr_id, uint32_t threads); +extern void x11_cubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_echo512_cpu_init(int thr_id, int threads); -extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_echo512_cpu_init(int thr_id, uint32_t threads); +extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern "C" void deephash(void *state, const void *input) { @@ -60,8 +60,8 @@ extern "C" int scanhash_deep(int thr_id, uint32_t *pdata, { const uint32_t first_nonce = pdata[19]; uint32_t endiandata[20]; - int throughput = (int) device_intensity(thr_id, __func__, 1U << 19); // 256*256*8 - throughput = min(throughput, (int) (max_nonce - first_nonce)); + uint32_t throughput = device_intensity(thr_id, __func__, 1U << 19); // 256*256*8 + throughput = min(throughput, (max_nonce - first_nonce)); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000f; diff --git a/qubit/doom.cu b/qubit/doom.cu index bfa0556..5eac147 100644 --- a/qubit/doom.cu +++ b/qubit/doom.cu @@ -12,11 +12,11 @@ extern "C" { static uint32_t *d_hash[MAX_GPUS]; -extern void qubit_luffa512_cpu_init(int thr_id, int threads); +extern void qubit_luffa512_cpu_init(int thr_id, uint32_t threads); extern void qubit_luffa512_cpu_setBlock_80(void *pdata); -extern void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void qubit_luffa512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); extern void qubit_luffa512_cpufinal_setBlock_80(void *pdata, const void *ptarget); -extern uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); extern void doomhash(void *state, const void *input) { @@ -41,7 +41,7 @@ extern "C" int scanhash_doom(int thr_id, uint32_t *pdata, const uint32_t first_nonce = pdata[19]; uint32_t endiandata[20]; uint32_t throughput = device_intensity(thr_id, __func__, 1U << 22); // 256*256*8*8 - throughput = min(throughput, (max_nonce - first_nonce)); + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000f; diff --git a/qubit/qubit.cu b/qubit/qubit.cu index 987b784..850732f 100644 --- a/qubit/qubit.cu +++ b/qubit/qubit.cu @@ -16,24 +16,24 @@ extern "C" { static uint32_t *d_hash[MAX_GPUS]; -extern void qubit_luffa512_cpu_init(int thr_id, int threads); +extern void qubit_luffa512_cpu_init(int thr_id, uint32_t threads); extern void qubit_luffa512_cpu_setBlock_80(void *pdata); -extern void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void qubit_luffa512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); -extern void x11_cubehash512_cpu_init(int thr_id, int threads); -extern void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_cubehash512_cpu_init(int thr_id, uint32_t threads); +extern void x11_cubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_shavite512_cpu_init(int thr_id, int threads); -extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_shavite512_cpu_init(int thr_id, uint32_t threads); +extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern int x11_simd512_cpu_init(int thr_id, int threads); -extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern int x11_simd512_cpu_init(int thr_id, uint32_t threads); +extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_echo512_cpu_init(int thr_id, int threads); -extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_echo512_cpu_init(int thr_id, uint32_t threads); +extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_compactTest_cpu_init(int thr_id, int threads); -extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, +extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads); +extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order); @@ -80,8 +80,8 @@ extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata, { uint32_t endiandata[20]; const uint32_t first_nonce = pdata[19]; - int throughput = (int) device_intensity(thr_id, __func__, 1U << 19); // 256*256*8 - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = device_intensity(thr_id, __func__, 1U << 19); // 256*256*8 + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000ff; diff --git a/qubit/qubit_luffa512.cu b/qubit/qubit_luffa512.cu index 4d3d04c..ae8566b 100644 --- a/qubit/qubit_luffa512.cu +++ b/qubit/qubit_luffa512.cu @@ -352,9 +352,9 @@ void finalization512(hashState *state, uint32_t *b) /***************************************************/ // Die Hash-Funktion __global__ -void qubit_luffa512_gpu_hash_80(int threads, uint32_t startNounce, void *outputHash) +void qubit_luffa512_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *outputHash) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = startNounce + thread; @@ -382,9 +382,9 @@ void qubit_luffa512_gpu_hash_80(int threads, uint32_t startNounce, void *outputH } __global__ -void qubit_luffa512_gpu_finalhash_80(int threads, uint32_t startNounce, void *outputHash, uint32_t *resNounce) +void qubit_luffa512_gpu_finalhash_80(uint32_t threads, uint32_t startNounce, void *outputHash, uint32_t *resNounce) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = startNounce + thread; @@ -445,7 +445,7 @@ void qubit_luffa512_gpu_finalhash_80(int threads, uint32_t startNounce, void *ou } __host__ -void qubit_luffa512_cpu_init(int thr_id, int threads) +void qubit_luffa512_cpu_init(int thr_id, uint32_t threads) { CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_IV, h2_IV, sizeof(h2_IV), 0, cudaMemcpyHostToDevice)); CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_CNS, h2_CNS, sizeof(h2_CNS), 0, cudaMemcpyHostToDevice)); @@ -454,11 +454,11 @@ void qubit_luffa512_cpu_init(int thr_id, int threads) } __host__ -uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order) +uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_outputHash,int order) { uint32_t result = UINT32_MAX; cudaMemset(d_resNounce[thr_id], 0xff, NBN * sizeof(uint32_t)); - const int threadsperblock = 256; + const uint32_t threadsperblock = 256; dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); @@ -478,9 +478,9 @@ uint32_t qubit_luffa512_cpu_finalhash_80(int thr_id, int threads, uint32_t start } __host__ -void qubit_luffa512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash,int order) +void qubit_luffa512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_outputHash,int order) { - const int threadsperblock = 256; + const uint32_t threadsperblock = 256; dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); diff --git a/scrypt.c b/scrypt.c index f0795a3..b716125 100644 --- a/scrypt.c +++ b/scrypt.c @@ -701,8 +701,8 @@ int scanhash_scrypt(int thr_id, uint32_t *pdata, uint32_t midstate[8]; uint32_t n = pdata[19] - 1; const uint32_t Htarg = ptarget[7]; - int throughput = scrypt_best_throughput(); - int i; + uint32_t throughput = scrypt_best_throughput(); + uint32_t i; #if HAVE_SHA256_4WAY if (sha256_use_4way()) diff --git a/x11/cuda_x11_cubehash512.cu b/x11/cuda_x11_cubehash512.cu index 1de1cb7..ed6e63a 100644 --- a/x11/cuda_x11_cubehash512.cu +++ b/x11/cuda_x11_cubehash512.cu @@ -256,9 +256,9 @@ static void Final(uint32_t x[2][2][2][2][2], BitSequence *hashval) /***************************************************/ // GPU Hash Function __global__ -void x11_cubehash512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) +void x11_cubehash512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); @@ -289,14 +289,14 @@ void x11_cubehash512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_ // Setup-Funktionen __host__ -void x11_cubehash512_cpu_init(int thr_id, int threads) +void x11_cubehash512_cpu_init(int thr_id, uint32_t threads) { } __host__ -void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +void x11_cubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { - const int threadsperblock = 256; + const uint32_t threadsperblock = 256; // berechne wie viele Thread Blocks wir brauchen dim3 grid((threads + threadsperblock-1)/threadsperblock); diff --git a/x11/cuda_x11_echo.cu b/x11/cuda_x11_echo.cu index 0866f8c..4a9af90 100644 --- a/x11/cuda_x11_echo.cu +++ b/x11/cuda_x11_echo.cu @@ -279,13 +279,13 @@ void echo_gpu_init(uint32_t *const __restrict__ sharedMemory) } __global__ __launch_bounds__(128, 7) /* will force 72 registers */ -void x11_echo512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) +void x11_echo512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) { __shared__ uint32_t sharedMemory[1024]; echo_gpu_init(sharedMemory); - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); @@ -298,15 +298,15 @@ void x11_echo512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash } __host__ -void x11_echo512_cpu_init(int thr_id, int threads) +void x11_echo512_cpu_init(int thr_id, uint32_t threads) { aes_cpu_init(thr_id); } __host__ -void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { - const int threadsperblock = 128; + const uint32_t threadsperblock = 128; dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); diff --git a/x11/cuda_x11_luffa512.cu b/x11/cuda_x11_luffa512.cu index 50d0bde..b2272ec 100644 --- a/x11/cuda_x11_luffa512.cu +++ b/x11/cuda_x11_luffa512.cu @@ -335,9 +335,9 @@ void finalization512(hashState *state, uint32_t *b) /***************************************************/ // Die Hash-Funktion -__global__ void x11_luffa512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) +__global__ void x11_luffa512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); @@ -358,15 +358,15 @@ __global__ void x11_luffa512_gpu_hash_64(int threads, uint32_t startNounce, uint // Setup Function __host__ -void x11_luffa512_cpu_init(int thr_id, int threads) +void x11_luffa512_cpu_init(int thr_id, uint32_t threads) { CUDA_CALL_OR_RET(cudaMemcpyToSymbol(c_IV, h_IV, sizeof(h_IV), 0, cudaMemcpyHostToDevice)); CUDA_CALL_OR_RET(cudaMemcpyToSymbol(c_CNS, h_CNS, sizeof(h_CNS), 0, cudaMemcpyHostToDevice)); } -__host__ void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +__host__ void x11_luffa512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { - const int threadsperblock = 256; + const uint32_t threadsperblock = 256; // berechne wie viele Thread Blocks wir brauchen dim3 grid((threads + threadsperblock-1)/threadsperblock); diff --git a/x11/cuda_x11_luffa512_Cubehash.cu b/x11/cuda_x11_luffa512_Cubehash.cu index 09e6767..d9b9e80 100644 --- a/x11/cuda_x11_luffa512_Cubehash.cu +++ b/x11/cuda_x11_luffa512_Cubehash.cu @@ -587,9 +587,9 @@ static void Final(uint32_t x[2][2][2][2][2], BitSequence *hashval) /***************************************************/ // Hash Function __global__ -void x11_luffaCubehash512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) +void x11_luffaCubehash512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); @@ -625,16 +625,16 @@ void x11_luffaCubehash512_gpu_hash_64(int threads, uint32_t startNounce, uint64_ // Setup __host__ -void x11_luffaCubehash512_cpu_init(int thr_id, int threads) +void x11_luffaCubehash512_cpu_init(int thr_id, uint32_t threads) { cudaMemcpyToSymbol(c_IV, h_IV, sizeof(h_IV), 0, cudaMemcpyHostToDevice); cudaMemcpyToSymbol(c_CNS, h_CNS, sizeof(h_CNS), 0, cudaMemcpyHostToDevice); } __host__ -void x11_luffaCubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +void x11_luffaCubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { - const int threadsperblock = 256; + const uint32_t threadsperblock = 256; dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); diff --git a/x11/cuda_x11_shavite512.cu b/x11/cuda_x11_shavite512.cu index 7d4e8de..f324d86 100644 --- a/x11/cuda_x11_shavite512.cu +++ b/x11/cuda_x11_shavite512.cu @@ -1341,13 +1341,13 @@ void shavite_gpu_init(uint32_t *sharedMemory) // GPU Hash __global__ __launch_bounds__(TPB, 7) /* 64 registers with 128,8 - 72 regs with 128,7 */ -void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) +void x11_shavite512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) { __shared__ uint32_t sharedMemory[1024]; shavite_gpu_init(sharedMemory); - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); @@ -1392,13 +1392,13 @@ void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_h } __global__ __launch_bounds__(TPB, 7) -void x11_shavite512_gpu_hash_80(int threads, uint32_t startNounce, void *outputHash) +void x11_shavite512_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *outputHash) { __shared__ uint32_t sharedMemory[1024]; shavite_gpu_init(sharedMemory); - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { const uint32_t nounce = startNounce + thread; @@ -1434,9 +1434,9 @@ void x11_shavite512_gpu_hash_80(int threads, uint32_t startNounce, void *outputH } __host__ -void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { - const int threadsperblock = TPB; + const uint32_t threadsperblock = TPB; dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); @@ -1446,9 +1446,9 @@ void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, u } __host__ -void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash, int order) +void x11_shavite512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_outputHash, int order) { - const int threadsperblock = TPB; + const uint32_t threadsperblock = TPB; dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); @@ -1458,7 +1458,7 @@ void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, u } __host__ -void x11_shavite512_cpu_init(int thr_id, int threads) +void x11_shavite512_cpu_init(int thr_id, uint32_t threads) { aes_cpu_init(thr_id); } diff --git a/x11/cuda_x11_simd512.cu b/x11/cuda_x11_simd512.cu index 9069065..ea807c6 100644 --- a/x11/cuda_x11_simd512.cu +++ b/x11/cuda_x11_simd512.cu @@ -578,7 +578,7 @@ void Expansion(const uint32_t *data, uint4 *g_temp4) /***************************************************/ __global__ __launch_bounds__(TPB, 4) -void x11_simd512_gpu_expand_64(int threads, uint32_t *g_hash, uint4 *g_temp4) +void x11_simd512_gpu_expand_64(uint32_t threads, uint32_t *g_hash, uint4 *g_temp4) { int threadBloc = (blockDim.x * blockIdx.x + threadIdx.x) / 8; if (threadBloc < threads) @@ -600,9 +600,9 @@ void x11_simd512_gpu_expand_64(int threads, uint32_t *g_hash, uint4 *g_temp4) } __global__ __launch_bounds__(TPB, 1) -void x11_simd512_gpu_compress1_64(int threads, uint32_t *g_hash, uint4 *g_fft4, uint32_t *g_state) +void x11_simd512_gpu_compress1_64(uint32_t threads, uint32_t *g_hash, uint4 *g_fft4, uint32_t *g_state) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t *Hash = &g_hash[thread * 16]; @@ -611,9 +611,9 @@ void x11_simd512_gpu_compress1_64(int threads, uint32_t *g_hash, uint4 *g_fft4, } __global__ __launch_bounds__(TPB, 1) -void x11_simd512_gpu_compress2_64(int threads, uint4 *g_fft4, uint32_t *g_state) +void x11_simd512_gpu_compress2_64(uint32_t threads, uint4 *g_fft4, uint32_t *g_state) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { Compression2(thread, g_fft4, g_state); @@ -621,9 +621,9 @@ void x11_simd512_gpu_compress2_64(int threads, uint4 *g_fft4, uint32_t *g_state) } __global__ __launch_bounds__(TPB, 2) -void x11_simd512_gpu_compress_64_maxwell(int threads, uint32_t *g_hash, uint4 *g_fft4, uint32_t *g_state) +void x11_simd512_gpu_compress_64_maxwell(uint32_t threads, uint32_t *g_hash, uint4 *g_fft4, uint32_t *g_state) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t *Hash = &g_hash[thread * 16]; @@ -633,9 +633,9 @@ void x11_simd512_gpu_compress_64_maxwell(int threads, uint32_t *g_hash, uint4 *g } __global__ __launch_bounds__(TPB, 2) -void x11_simd512_gpu_final_64(int threads, uint32_t *g_hash, uint4 *g_fft4, uint32_t *g_state) +void x11_simd512_gpu_final_64(uint32_t threads, uint32_t *g_hash, uint4 *g_fft4, uint32_t *g_state) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t *Hash = &g_hash[thread * 16]; @@ -644,7 +644,7 @@ void x11_simd512_gpu_final_64(int threads, uint32_t *g_hash, uint4 *g_fft4, uint } __host__ -int x11_simd512_cpu_init(int thr_id, int threads) +int x11_simd512_cpu_init(int thr_id, uint32_t threads) { CUDA_CALL_OR_RET_X(cudaMalloc(&d_temp4[thr_id], 64*sizeof(uint4)*threads), (int) err); /* todo: prevent -i 21 */ CUDA_CALL_OR_RET_X(cudaMalloc(&d_state[thr_id], 32*sizeof(int)*threads), (int) err); @@ -671,9 +671,9 @@ int x11_simd512_cpu_init(int thr_id, int threads) } __host__ -void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { - const int threadsperblock = TPB; + const uint32_t threadsperblock = TPB; dim3 block(threadsperblock); dim3 grid((threads + threadsperblock-1) / threadsperblock); diff --git a/x11/fresh.cu b/x11/fresh.cu index 0ebe3ae..9cb3e57 100644 --- a/x11/fresh.cu +++ b/x11/fresh.cu @@ -14,19 +14,19 @@ extern "C" { static uint32_t *d_hash[MAX_GPUS]; -extern void x11_shavite512_cpu_init(int thr_id, int threads); +extern void x11_shavite512_cpu_init(int thr_id, uint32_t threads); extern void x11_shavite512_setBlock_80(void *pdata); -extern void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); -extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_shavite512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern int x11_simd512_cpu_init(int thr_id, int threads); -extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern int x11_simd512_cpu_init(int thr_id, uint32_t threads); +extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_echo512_cpu_init(int thr_id, int threads); -extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_echo512_cpu_init(int thr_id, uint32_t threads); +extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_compactTest_cpu_init(int thr_id, int threads); -extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, +extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads); +extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order); @@ -77,8 +77,8 @@ extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata, const uint32_t first_nonce = pdata[19]; uint32_t endiandata[20]; - int throughput = (int) device_intensity(thr_id, __func__, 1 << 19); - throughput = min(throughput, (int) (max_nonce - first_nonce)); + uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); + throughput = min(throughput, (max_nonce - first_nonce)); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x00ff; diff --git a/x11/s3.cu b/x11/s3.cu index dd2e390..112dd4b 100644 --- a/x11/s3.cu +++ b/x11/s3.cu @@ -15,15 +15,15 @@ extern "C" { static uint32_t *d_hash[MAX_GPUS]; -extern void x11_shavite512_cpu_init(int thr_id, int threads); -extern void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void x11_shavite512_cpu_init(int thr_id, uint32_t threads); +extern void x11_shavite512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); extern void x11_shavite512_setBlock_80(void *pdata); -extern int x11_simd512_cpu_init(int thr_id, int threads); -extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern int x11_simd512_cpu_init(int thr_id, uint32_t threads); +extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_skein512_cpu_init(int thr_id, int threads); -extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_skein512_cpu_init(int thr_id, uint32_t threads); +extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); /* CPU HASH */ extern "C" void s3hash(void *output, const void *input) @@ -62,8 +62,8 @@ extern "C" int scanhash_s3(int thr_id, uint32_t *pdata, // reduce by one the intensity on windows intensity--; #endif - int throughput = (int) device_intensity(thr_id, __func__, 1 << intensity); - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = device_intensity(thr_id, __func__, 1 << intensity); + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0xF; diff --git a/x11/x11.cu b/x11/x11.cu index 5dc2980..df072b2 100644 --- a/x11/x11.cu +++ b/x11/x11.cu @@ -22,40 +22,40 @@ extern "C" static uint32_t *d_hash[MAX_GPUS]; -extern void quark_blake512_cpu_init(int thr_id, int threads); +extern void quark_blake512_cpu_init(int thr_id, uint32_t threads); extern void quark_blake512_cpu_setBlock_80(void *pdata); -extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); -extern void quark_bmw512_cpu_init(int thr_id, int threads); -extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_bmw512_cpu_init(int thr_id, uint32_t threads); +extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_groestl512_cpu_init(int thr_id, int threads); -extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_doublegroestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads); +extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_doublegroestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_skein512_cpu_init(int thr_id, int threads); -extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_skein512_cpu_init(int thr_id, uint32_t threads); +extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_keccak512_cpu_init(int thr_id, int threads); -extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_keccak512_cpu_init(int thr_id, uint32_t threads); +extern void quark_keccak512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_jh512_cpu_init(int thr_id, int threads); -extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_jh512_cpu_init(int thr_id, uint32_t threads); +extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_luffaCubehash512_cpu_init(int thr_id, int threads); -extern void x11_luffaCubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_luffaCubehash512_cpu_init(int thr_id, uint32_t threads); +extern void x11_luffaCubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_shavite512_cpu_init(int thr_id, int threads); -extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_shavite512_cpu_init(int thr_id, uint32_t threads); +extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern int x11_simd512_cpu_init(int thr_id, int threads); -extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern int x11_simd512_cpu_init(int thr_id, uint32_t threads); +extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_echo512_cpu_init(int thr_id, int threads); -extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_echo512_cpu_init(int thr_id, uint32_t threads); +extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_compactTest_cpu_init(int thr_id, int threads); -extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, +extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads); +extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order); // X11 CPU Hash @@ -133,8 +133,8 @@ extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, { const uint32_t first_nonce = pdata[19]; int intensity = (device_sm[device_map[thr_id]] >= 500 && !is_windows()) ? 20 : 19; - int throughput = (int) device_intensity(thr_id, __func__, 1 << intensity); // 19=256*256*8; - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = device_intensity(thr_id, __func__, 1 << intensity); // 19=256*256*8; + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x5; diff --git a/x13/cuda_x13_fugue512.cu b/x13/cuda_x13_fugue512.cu index 4360a0f..ea26f17 100644 --- a/x13/cuda_x13_fugue512.cu +++ b/x13/cuda_x13_fugue512.cu @@ -553,7 +553,7 @@ static const uint32_t mixtab0_cpu[] = { /***************************************************/ // Die Hash-Funktion -__global__ void x13_fugue512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) +__global__ void x13_fugue512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) { extern __shared__ char mixtabs[]; @@ -565,7 +565,7 @@ __global__ void x13_fugue512_gpu_hash_64(int threads, uint32_t startNounce, uint __syncthreads(); int i; - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); @@ -667,7 +667,7 @@ __global__ void x13_fugue512_gpu_hash_64(int threads, uint32_t startNounce, uint { cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(); \ cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } -__host__ void x13_fugue512_cpu_init(int thr_id, int threads) +__host__ void x13_fugue512_cpu_init(int thr_id, uint32_t threads) { texDef(mixTab0Tex, mixTab0m, mixtab0_cpu, sizeof(uint32_t)*256); texDef(mixTab1Tex, mixTab1m, mixtab1_cpu, sizeof(uint32_t)*256); @@ -675,9 +675,9 @@ __host__ void x13_fugue512_cpu_init(int thr_id, int threads) texDef(mixTab3Tex, mixTab3m, mixtab3_cpu, sizeof(uint32_t)*256); } -__host__ void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +__host__ void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { - const int threadsperblock = 256; + const uint32_t threadsperblock = 256; // berechne wie viele Thread Blocks wir brauchen dim3 grid((threads + threadsperblock-1)/threadsperblock); diff --git a/x13/cuda_x13_hamsi512.cu b/x13/cuda_x13_hamsi512.cu index f2085e5..69070fb 100644 --- a/x13/cuda_x13_hamsi512.cu +++ b/x13/cuda_x13_hamsi512.cu @@ -587,9 +587,9 @@ static const uint32_t T512[64][16] = { }; __global__ -void x13_hamsi512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) +void x13_hamsi512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); @@ -678,7 +678,7 @@ void x13_hamsi512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_has } __host__ -void x13_hamsi512_cpu_init(int thr_id, int threads) +void x13_hamsi512_cpu_init(int thr_id, uint32_t threads) { cudaMemcpyToSymbol(d_alpha_n, alpha_n, sizeof(uint32_t)*32, 0, cudaMemcpyHostToDevice); cudaMemcpyToSymbol(d_alpha_f, alpha_f, sizeof(uint32_t)*32, 0, cudaMemcpyHostToDevice); @@ -686,9 +686,9 @@ void x13_hamsi512_cpu_init(int thr_id, int threads) } __host__ -void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { - const int threadsperblock = 128; + const uint32_t threadsperblock = 128; dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); diff --git a/x13/x13.cu b/x13/x13.cu index abeca30..35cdb36 100644 --- a/x13/x13.cu +++ b/x13/x13.cu @@ -26,45 +26,45 @@ extern "C" static uint32_t *d_hash[MAX_GPUS]; -extern void quark_blake512_cpu_init(int thr_id, int threads); +extern void quark_blake512_cpu_init(int thr_id, uint32_t threads); extern void quark_blake512_cpu_setBlock_80(void *pdata); -extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); -extern void quark_bmw512_cpu_init(int thr_id, int threads); -extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_bmw512_cpu_init(int thr_id, uint32_t threads); +extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_groestl512_cpu_init(int thr_id, int threads); -extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads); +extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_skein512_cpu_init(int thr_id, int threads); -extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_skein512_cpu_init(int thr_id, uint32_t threads); +extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_keccak512_cpu_init(int thr_id, int threads); -extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_keccak512_cpu_init(int thr_id, uint32_t threads); +extern void quark_keccak512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_jh512_cpu_init(int thr_id, int threads); -extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_jh512_cpu_init(int thr_id, uint32_t threads); +extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_luffaCubehash512_cpu_init(int thr_id, int threads); -extern void x11_luffaCubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_luffaCubehash512_cpu_init(int thr_id, uint32_t threads); +extern void x11_luffaCubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_shavite512_cpu_init(int thr_id, int threads); -extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_shavite512_cpu_init(int thr_id, uint32_t threads); +extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern int x11_simd512_cpu_init(int thr_id, int threads); -extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern int x11_simd512_cpu_init(int thr_id, uint32_t threads); +extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_echo512_cpu_init(int thr_id, int threads); -extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_echo512_cpu_init(int thr_id, uint32_t threads); +extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x13_hamsi512_cpu_init(int thr_id, int threads); -extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x13_hamsi512_cpu_init(int thr_id, uint32_t threads); +extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x13_fugue512_cpu_init(int thr_id, int threads); -extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads); +extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_compactTest_cpu_init(int thr_id, int threads); -extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, +extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads); +extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order); // X13 CPU Hash @@ -152,8 +152,8 @@ extern "C" int scanhash_x13(int thr_id, uint32_t *pdata, { const uint32_t first_nonce = pdata[19]; int intensity = 19; // (device_sm[device_map[thr_id]] > 500 && !is_windows()) ? 20 : 19; - int throughput = (int) device_intensity(thr_id, __func__, 1 << intensity); // 19=256*256*8; - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = device_intensity(thr_id, __func__, 1 << intensity); // 19=256*256*8; + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x000f; diff --git a/x15/cuda_x14_shabal512.cu b/x15/cuda_x14_shabal512.cu index 5539ea5..a1d5a8d 100644 --- a/x15/cuda_x14_shabal512.cu +++ b/x15/cuda_x14_shabal512.cu @@ -361,11 +361,11 @@ static const uint32_t d_C512[] = { /***************************************************/ // GPU Hash Function -__global__ void x14_shabal512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) +__global__ void x14_shabal512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) { __syncthreads(); - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { @@ -453,14 +453,14 @@ __global__ void x14_shabal512_gpu_hash_64(int threads, uint32_t startNounce, uin } } -__host__ void x14_shabal512_cpu_init(int thr_id, int threads) +__host__ void x14_shabal512_cpu_init(int thr_id, uint32_t threads) { } // #include -__host__ void x14_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +__host__ void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { - const int threadsperblock = 256; + const uint32_t threadsperblock = 256; // berechne wie viele Thread Blocks wir brauchen dim3 grid((threads + threadsperblock-1)/threadsperblock); diff --git a/x15/cuda_x15_whirlpool.cu b/x15/cuda_x15_whirlpool.cu index 1774f6d..f53e941 100644 --- a/x15/cuda_x15_whirlpool.cu +++ b/x15/cuda_x15_whirlpool.cu @@ -2285,7 +2285,7 @@ const int i0, const int i1, const int i2, const int i3, const int i4, const int __global__ -void oldwhirlpool_gpu_hash_80(int threads, uint32_t startNounce, void *outputHash) +void oldwhirlpool_gpu_hash_80(uint32_t threads, uint32_t startNounce, void *outputHash) { __shared__ uint64_t sharedMemory[2048]; @@ -2302,7 +2302,7 @@ void oldwhirlpool_gpu_hash_80(int threads, uint32_t startNounce, void *outputHas #endif } - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = startNounce + thread; @@ -2381,7 +2381,7 @@ void oldwhirlpool_gpu_hash_80(int threads, uint32_t startNounce, void *outputHas } __global__ -void x15_whirlpool_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) +void x15_whirlpool_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) { __shared__ uint64_t sharedMemory[2048]; @@ -2398,7 +2398,7 @@ void x15_whirlpool_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_ha #endif } - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = g_nonceVector ? g_nonceVector[thread] : (startNounce + thread); @@ -2457,7 +2457,7 @@ void x15_whirlpool_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_ha } __global__ -void oldwhirlpool_gpu_finalhash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector, uint32_t *resNounce) +void oldwhirlpool_gpu_finalhash_64(uint32_t threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector, uint32_t *resNounce) { __shared__ uint64_t sharedMemory[2048]; @@ -2475,7 +2475,7 @@ void oldwhirlpool_gpu_finalhash_64(int threads, uint32_t startNounce, uint64_t * #endif } - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = g_nonceVector ? g_nonceVector[thread] : (startNounce + thread); @@ -2544,7 +2544,7 @@ void oldwhirlpool_gpu_finalhash_64(int threads, uint32_t startNounce, uint64_t * } __host__ -extern void x15_whirlpool_cpu_init(int thr_id, int threads, int mode) +extern void x15_whirlpool_cpu_init(int thr_id, uint32_t threads, int mode) { switch (mode) { case 0: /* x15 with rotated T1-T7 (based on T0) */ @@ -2586,7 +2586,7 @@ extern void x15_whirlpool_cpu_free(int thr_id) } __host__ -extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +extern void x15_whirlpool_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { dim3 grid((threads + threadsperblock-1) / threadsperblock); dim3 block(threadsperblock); @@ -2599,7 +2599,7 @@ extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNou } __host__ -extern uint32_t whirlpool512_cpu_finalhash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +extern uint32_t whirlpool512_cpu_finalhash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { uint32_t result = 0xffffffff; @@ -2621,7 +2621,7 @@ extern uint32_t whirlpool512_cpu_finalhash_64(int thr_id, int threads, uint32_t } __host__ -void whirlpool512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_outputHash, int order) +void whirlpool512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_outputHash, int order) { // berechne wie viele Thread Blocks wir brauchen dim3 grid((threads + threadsperblock-1) / threadsperblock); diff --git a/x15/whirlpool.cu b/x15/whirlpool.cu index 193e2e7..8ff3001 100644 --- a/x15/whirlpool.cu +++ b/x15/whirlpool.cu @@ -11,12 +11,12 @@ extern "C" static uint32_t *d_hash[MAX_GPUS]; -extern void x15_whirlpool_cpu_init(int thr_id, int threads, int mode); -extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x15_whirlpool_cpu_init(int thr_id, uint32_t threads, int mode); +extern void x15_whirlpool_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void whirlpool512_setBlock_80(void *pdata, const void *ptarget); -extern void whirlpool512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); -extern uint32_t whirlpool512_cpu_finalhash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void whirlpool512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern uint32_t whirlpool512_cpu_finalhash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); // CPU Hash function @@ -57,8 +57,8 @@ extern "C" int scanhash_whc(int thr_id, uint32_t *pdata, { const uint32_t first_nonce = pdata[19]; uint32_t endiandata[20]; - int throughput = (int) device_intensity(thr_id, __func__, 1U << 19); // 19=256*256*8; - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = device_intensity(thr_id, __func__, 1U << 19); // 19=256*256*8; + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x0000ff; diff --git a/x15/x14.cu b/x15/x14.cu index 05b9201..d9fdc4a 100644 --- a/x15/x14.cu +++ b/x15/x14.cu @@ -29,48 +29,48 @@ extern "C" { // Memory for the hash functions static uint32_t *d_hash[MAX_GPUS]; -extern void quark_blake512_cpu_init(int thr_id, int threads); +extern void quark_blake512_cpu_init(int thr_id, uint32_t threads); extern void quark_blake512_cpu_setBlock_80(void *pdata); -extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); -extern void quark_bmw512_cpu_init(int thr_id, int threads); -extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_bmw512_cpu_init(int thr_id, uint32_t threads); +extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_groestl512_cpu_init(int thr_id, int threads); -extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads); +extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_skein512_cpu_init(int thr_id, int threads); -extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_skein512_cpu_init(int thr_id, uint32_t threads); +extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_keccak512_cpu_init(int thr_id, int threads); -extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_keccak512_cpu_init(int thr_id, uint32_t threads); +extern void quark_keccak512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_jh512_cpu_init(int thr_id, int threads); -extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_jh512_cpu_init(int thr_id, uint32_t threads); +extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_luffaCubehash512_cpu_init(int thr_id, int threads); -extern void x11_luffaCubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_luffaCubehash512_cpu_init(int thr_id, uint32_t threads); +extern void x11_luffaCubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_shavite512_cpu_init(int thr_id, int threads); -extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_shavite512_cpu_init(int thr_id, uint32_t threads); +extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern int x11_simd512_cpu_init(int thr_id, int threads); -extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern int x11_simd512_cpu_init(int thr_id, uint32_t threads); +extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_echo512_cpu_init(int thr_id, int threads); -extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_echo512_cpu_init(int thr_id, uint32_t threads); +extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x13_hamsi512_cpu_init(int thr_id, int threads); -extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x13_hamsi512_cpu_init(int thr_id, uint32_t threads); +extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x13_fugue512_cpu_init(int thr_id, int threads); -extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads); +extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x14_shabal512_cpu_init(int thr_id, int threads); -extern void x14_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x14_shabal512_cpu_init(int thr_id, uint32_t threads); +extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_compactTest_cpu_init(int thr_id, int threads); -extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, +extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads); +extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order); // X14 CPU Hash function @@ -164,8 +164,8 @@ extern "C" int scanhash_x14(int thr_id, uint32_t *pdata, const uint32_t first_nonce = pdata[19]; uint32_t endiandata[20]; - int throughput = (int) device_intensity(thr_id, __func__, 1U << 19); // 19=256*256*8; - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = device_intensity(thr_id, __func__, 1U << 19); // 19=256*256*8; + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x000f; diff --git a/x15/x15.cu b/x15/x15.cu index 34b3124..f8c7985 100644 --- a/x15/x15.cu +++ b/x15/x15.cu @@ -30,52 +30,52 @@ extern "C" { // Memory for the hash functions static uint32_t *d_hash[MAX_GPUS]; -extern void quark_blake512_cpu_init(int thr_id, int threads); +extern void quark_blake512_cpu_init(int thr_id, uint32_t threads); extern void quark_blake512_cpu_setBlock_80(void *pdata); -extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); -extern void quark_bmw512_cpu_init(int thr_id, int threads); -extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_bmw512_cpu_init(int thr_id, uint32_t threads); +extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_groestl512_cpu_init(int thr_id, int threads); -extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads); +extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_skein512_cpu_init(int thr_id, int threads); -extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_skein512_cpu_init(int thr_id, uint32_t threads); +extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_keccak512_cpu_init(int thr_id, int threads); -extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_keccak512_cpu_init(int thr_id, uint32_t threads); +extern void quark_keccak512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_jh512_cpu_init(int thr_id, int threads); -extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_jh512_cpu_init(int thr_id, uint32_t threads); +extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_luffaCubehash512_cpu_init(int thr_id, int threads); -extern void x11_luffaCubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_luffaCubehash512_cpu_init(int thr_id, uint32_t threads); +extern void x11_luffaCubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_shavite512_cpu_init(int thr_id, int threads); -extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_shavite512_cpu_init(int thr_id, uint32_t threads); +extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern int x11_simd512_cpu_init(int thr_id, int threads); -extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern int x11_simd512_cpu_init(int thr_id, uint32_t threads); +extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_echo512_cpu_init(int thr_id, int threads); -extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_echo512_cpu_init(int thr_id, uint32_t threads); +extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x13_hamsi512_cpu_init(int thr_id, int threads); -extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x13_hamsi512_cpu_init(int thr_id, uint32_t threads); +extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x13_fugue512_cpu_init(int thr_id, int threads); -extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads); +extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x14_shabal512_cpu_init(int thr_id, int threads); -extern void x14_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x14_shabal512_cpu_init(int thr_id, uint32_t threads); +extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x15_whirlpool_cpu_init(int thr_id, int threads, int mode); -extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x15_whirlpool_cpu_init(int thr_id, uint32_t threads, int mode); +extern void x15_whirlpool_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x15_whirlpool_cpu_free(int thr_id); -extern void quark_compactTest_cpu_init(int thr_id, int threads); -extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, +extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads); +extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order); // X15 CPU Hash function @@ -174,8 +174,8 @@ extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, const uint32_t first_nonce = pdata[19]; uint32_t endiandata[20]; - int throughput = (int) device_intensity(thr_id, __func__, 1U << 19); // 19=256*256*8; - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = device_intensity(thr_id, __func__, 1U << 19); // 19=256*256*8; + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x00FF; diff --git a/x17/cuda_x17_haval512.cu b/x17/cuda_x17_haval512.cu index ab2cc86..c354782 100644 --- a/x17/cuda_x17_haval512.cu +++ b/x17/cuda_x17_haval512.cu @@ -291,9 +291,9 @@ static const uint32_t c_initVector[8] = { __global__ -void x17_haval256_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) +void x17_haval256_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = g_nonceVector ? g_nonceVector[thread] : (startNounce + thread); @@ -378,15 +378,15 @@ void x17_haval256_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_has } __host__ -void x17_haval256_cpu_init(int thr_id, int threads) +void x17_haval256_cpu_init(int thr_id, uint32_t threads) { cudaMemcpyToSymbol(initVector,c_initVector,sizeof(c_initVector),0, cudaMemcpyHostToDevice); } __host__ -void x17_haval256_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +void x17_haval256_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { - const int threadsperblock = 256; // Alignment mit mixtab Grösse. NICHT ÄNDERN + const uint32_t threadsperblock = 256; // Alignment mit mixtab Grösse. NICHT ÄNDERN // berechne wie viele Thread Blocks wir brauchen dim3 grid((threads + threadsperblock-1)/threadsperblock); diff --git a/x17/cuda_x17_sha512.cu b/x17/cuda_x17_sha512.cu index e996083..e67a1f0 100644 --- a/x17/cuda_x17_sha512.cu +++ b/x17/cuda_x17_sha512.cu @@ -149,9 +149,9 @@ uint64_t Tone(const uint64_t* sharedMemory, uint64_t r[8], uint64_t W[80], uint3 } __global__ -void x17_sha512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) +void x17_sha512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint64_t *g_hash, uint32_t *g_nonceVector) { - int thread = (blockDim.x * blockIdx.x + threadIdx.x); + uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x); if (thread < threads) { uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread); @@ -215,16 +215,16 @@ void x17_sha512_gpu_hash_64(int threads, uint32_t startNounce, uint64_t *g_hash, } __host__ -void x17_sha512_cpu_init(int thr_id, int threads) +void x17_sha512_cpu_init(int thr_id, uint32_t threads) { cudaMemcpyToSymbol(K_512,K512,80*sizeof(uint64_t),0, cudaMemcpyHostToDevice); cudaMemcpyToSymbol(H_512,H512,sizeof(H512),0, cudaMemcpyHostToDevice); } __host__ -void x17_sha512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) +void x17_sha512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order) { - const int threadsperblock = 256; + const uint32_t threadsperblock = 256; dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); diff --git a/x17/x17.cu b/x17/x17.cu index f8c47a2..ffd70c2 100644 --- a/x17/x17.cu +++ b/x17/x17.cu @@ -33,57 +33,57 @@ extern "C" static uint32_t *d_hash[MAX_GPUS]; -extern void quark_blake512_cpu_init(int thr_id, int threads); +extern void quark_blake512_cpu_init(int thr_id, uint32_t threads); extern void quark_blake512_cpu_setBlock_80(void *pdata); -extern void quark_blake512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); +extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int order); -extern void quark_bmw512_cpu_init(int thr_id, int threads); -extern void quark_bmw512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_bmw512_cpu_init(int thr_id, uint32_t threads); +extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_groestl512_cpu_init(int thr_id, int threads); -extern void quark_groestl512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads); +extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_skein512_cpu_init(int thr_id, int threads); -extern void quark_skein512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_skein512_cpu_init(int thr_id, uint32_t threads); +extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_keccak512_cpu_init(int thr_id, int threads); -extern void quark_keccak512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_keccak512_cpu_init(int thr_id, uint32_t threads); +extern void quark_keccak512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_jh512_cpu_init(int thr_id, int threads); -extern void quark_jh512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void quark_jh512_cpu_init(int thr_id, uint32_t threads); +extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_luffaCubehash512_cpu_init(int thr_id, int threads); -extern void x11_luffaCubehash512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_luffaCubehash512_cpu_init(int thr_id, uint32_t threads); +extern void x11_luffaCubehash512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_shavite512_cpu_init(int thr_id, int threads); -extern void x11_shavite512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_shavite512_cpu_init(int thr_id, uint32_t threads); +extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern int x11_simd512_cpu_init(int thr_id, int threads); -extern void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern int x11_simd512_cpu_init(int thr_id, uint32_t threads); +extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x11_echo512_cpu_init(int thr_id, int threads); -extern void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x11_echo512_cpu_init(int thr_id, uint32_t threads); +extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x13_hamsi512_cpu_init(int thr_id, int threads); -extern void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x13_hamsi512_cpu_init(int thr_id, uint32_t threads); +extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x13_fugue512_cpu_init(int thr_id, int threads); -extern void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads); +extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x14_shabal512_cpu_init(int thr_id, int threads); -extern void x14_shabal512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x14_shabal512_cpu_init(int thr_id, uint32_t threads); +extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x15_whirlpool_cpu_init(int thr_id, int threads, int flag); -extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x15_whirlpool_cpu_init(int thr_id, uint32_t threads, int flag); +extern void x15_whirlpool_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x17_sha512_cpu_init(int thr_id, int threads); -extern void x17_sha512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x17_sha512_cpu_init(int thr_id, uint32_t threads); +extern void x17_sha512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void x17_haval256_cpu_init(int thr_id, int threads); -extern void x17_haval256_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); +extern void x17_haval256_cpu_init(int thr_id, uint32_t threads); +extern void x17_haval256_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern void quark_compactTest_cpu_init(int thr_id, int threads); -extern void quark_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, +extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads); +extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse, int order); @@ -192,8 +192,8 @@ extern "C" int scanhash_x17(int thr_id, uint32_t *pdata, { const uint32_t first_nonce = pdata[19]; - int throughput = (int) device_intensity(thr_id, __func__, 1U << 19); // 19=256*256*8; - throughput = min(throughput, (int)(max_nonce - first_nonce)); + uint32_t throughput = device_intensity(thr_id, __func__, 1U << 19); // 19=256*256*8; + throughput = min(throughput, max_nonce - first_nonce); if (opt_benchmark) ((uint32_t*)ptarget)[7] = 0x00ff;