From cafd4477d7eb77340011e1b485ada1a4adf8e282 Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Thu, 22 Jan 2015 04:34:30 +0100 Subject: [PATCH] Handle a maximum of 16 gpus (vs 8 before) Some cards have 2 gpus on board... --- Algo256/blake256.cu | 6 +++--- Algo256/cuda_fugue256.cu | 4 ++-- Algo256/cuda_groestl256.cu | 4 ++-- Algo256/cuda_keccak256.cu | 4 ++-- Algo256/keccak256.cu | 4 ++-- JHA/cuda_jha_compactionTest.cu | 11 ++++++----- JHA/jackpotcoin.cu | 12 ++++++------ ccminer.cpp | 11 ++++++++--- cuda_checkhash.cu | 6 +++--- cuda_groestlcoin.cu | 2 +- cuda_helper.h | 8 ++++++-- cuda_myriadgroestl.cu | 4 ++-- cuda_nist5.cu | 4 ++-- fuguecoin.cpp | 4 ++-- groestlcoin.cpp | 2 +- heavy/cuda_blake512.cu | 6 +++--- heavy/cuda_combine.cu | 12 ++++++------ heavy/cuda_groestl512.cu | 6 +++--- heavy/cuda_hefty1.cu | 2 +- heavy/cuda_keccak512.cu | 10 +++++----- heavy/cuda_sha256.cu | 6 +++--- heavy/heavy.cu | 14 +++++++------- lyra2/lyra2RE.cu | 4 ++-- miner.h | 7 ++++--- myriadgroestl.cpp | 2 +- nvml.cpp | 4 ++-- pentablake.cu | 8 ++++---- quark/animecoin.cu | 12 ++++++------ quark/cuda_quark_compactionTest.cu | 11 ++++++----- quark/quarkcoin.cu | 14 +++++++------- qubit/deep.cu | 4 ++-- qubit/doom.cu | 4 ++-- qubit/qubit.cu | 4 ++-- qubit/qubit_luffa512.cu | 4 ++-- x11/cuda_x11_simd512.cu | 4 ++-- x11/fresh.cu | 4 ++-- x11/s3.cu | 4 ++-- x11/x11.cu | 4 ++-- x13/x13.cu | 4 ++-- x15/cuda_x15_whirlpool.cu | 4 ++-- x15/whirlpool.cu | 4 ++-- x15/x14.cu | 4 ++-- x15/x15.cu | 4 ++-- x17/x17.cu | 4 ++-- 44 files changed, 136 insertions(+), 124 deletions(-) diff --git a/Algo256/blake256.cu b/Algo256/blake256.cu index 6cbd917..baaf8db 100644 --- a/Algo256/blake256.cu +++ b/Algo256/blake256.cu @@ -49,8 +49,8 @@ extern "C" uint32_t crc32_u32t(const uint32_t *buf, size_t size); #endif /* 8 adapters max */ -static uint32_t *d_resNonce[8]; -static uint32_t *h_resNonce[8]; +static uint32_t *d_resNonce[MAX_GPUS]; +static uint32_t *h_resNonce[MAX_GPUS]; /* max count of found nonces in one call */ #define NBN 2 @@ -377,7 +377,7 @@ void blake256_cpu_setBlock_16(uint32_t *penddata, const uint32_t *midstate, cons } #endif -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done, int8_t blakerounds=14) diff --git a/Algo256/cuda_fugue256.cu b/Algo256/cuda_fugue256.cu index cb5f4cd..a2ebaeb 100644 --- a/Algo256/cuda_fugue256.cu +++ b/Algo256/cuda_fugue256.cu @@ -8,8 +8,8 @@ #define USE_SHARED 1 -uint32_t *d_fugue256_hashoutput[8]; -uint32_t *d_resultNonce[8]; +uint32_t *d_fugue256_hashoutput[MAX_GPUS]; +uint32_t *d_resultNonce[MAX_GPUS]; __constant__ uint32_t GPUstate[30]; // Single GPU __constant__ uint32_t pTarget[8]; // Single GPU diff --git a/Algo256/cuda_groestl256.cu b/Algo256/cuda_groestl256.cu index 8007f24..0f27b8a 100644 --- a/Algo256/cuda_groestl256.cu +++ b/Algo256/cuda_groestl256.cu @@ -2,8 +2,8 @@ #include "cuda_helper.h" -uint32_t *d_gnounce[8]; -uint32_t *d_GNonce[8]; +uint32_t *d_gnounce[MAX_GPUS]; +uint32_t *d_GNonce[MAX_GPUS]; __constant__ uint32_t pTarget[8]; diff --git a/Algo256/cuda_keccak256.cu b/Algo256/cuda_keccak256.cu index 8688a8a..cf385b4 100644 --- a/Algo256/cuda_keccak256.cu +++ b/Algo256/cuda_keccak256.cu @@ -22,8 +22,8 @@ static const uint64_t host_keccak_round_constants[24] = { 0x0000000080000001ull, 0x8000000080008008ull }; -uint32_t *d_nounce[8]; -uint32_t *d_KNonce[8]; +uint32_t *d_nounce[MAX_GPUS]; +uint32_t *d_KNonce[MAX_GPUS]; __constant__ uint32_t pTarget[8]; __constant__ uint64_t keccak_round_constants[24]; diff --git a/Algo256/keccak256.cu b/Algo256/keccak256.cu index 59a1d9b..a555c9a 100644 --- a/Algo256/keccak256.cu +++ b/Algo256/keccak256.cu @@ -14,7 +14,7 @@ extern "C" #include "cuda_helper.h" -static uint32_t *d_hash[8]; +static uint32_t *d_hash[MAX_GPUS]; extern void keccak256_cpu_init(int thr_id, int threads); extern void keccak256_setBlock_80(void *pdata,const void *ptarget); @@ -34,7 +34,7 @@ extern "C" void keccak256_hash(void *state, const void *input) memcpy(state, hash, 32); } -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, diff --git a/JHA/cuda_jha_compactionTest.cu b/JHA/cuda_jha_compactionTest.cu index 20e73db..f701e0d 100644 --- a/JHA/cuda_jha_compactionTest.cu +++ b/JHA/cuda_jha_compactionTest.cu @@ -4,11 +4,11 @@ #include "cuda_helper.h" #include -static uint32_t *d_tempBranch1Nonces[8]; -static uint32_t *d_numValid[8]; -static uint32_t *h_numValid[8]; +static uint32_t *d_tempBranch1Nonces[MAX_GPUS]; +static uint32_t *d_numValid[MAX_GPUS]; +static uint32_t *h_numValid[MAX_GPUS]; -static uint32_t *d_partSum[2][8]; // für bis zu vier partielle Summen +static uint32_t *d_partSum[2][MAX_GPUS]; // für bis zu vier partielle Summen // True/False tester typedef uint32_t(*cuda_compactTestFunction_t)(uint32_t *inpHash); @@ -26,7 +26,8 @@ __device__ uint32_t JackpotFalseTest(uint32_t *inpHash) } __device__ cuda_compactTestFunction_t d_JackpotTrueFunction = JackpotTrueTest, d_JackpotFalseFunction = JackpotFalseTest; -cuda_compactTestFunction_t h_JackpotTrueFunction[8], h_JackpotFalseFunction[8]; + +cuda_compactTestFunction_t h_JackpotTrueFunction[MAX_GPUS], h_JackpotFalseFunction[MAX_GPUS]; // Setup-Funktionen __host__ void jackpot_compactTest_cpu_init(int thr_id, int threads) diff --git a/JHA/jackpotcoin.cu b/JHA/jackpotcoin.cu index 29059a3..13c7a31 100644 --- a/JHA/jackpotcoin.cu +++ b/JHA/jackpotcoin.cu @@ -10,7 +10,7 @@ extern "C" #include "miner.h" #include "cuda_helper.h" -static uint32_t *d_hash[8]; +static uint32_t *d_hash[MAX_GPUS]; extern void jackpot_keccak512_cpu_init(int thr_id, int threads); extern void jackpot_keccak512_cpu_setBlock(void *pdata, size_t inlen); @@ -37,10 +37,10 @@ extern void jackpot_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t st extern uint32_t cuda_check_hash_branch(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); // Speicher zur Generierung der Noncevektoren für die bedingten Hashes -static uint32_t *d_jackpotNonces[8]; -static uint32_t *d_branch1Nonces[8]; -static uint32_t *d_branch2Nonces[8]; -static uint32_t *d_branch3Nonces[8]; +static uint32_t *d_jackpotNonces[MAX_GPUS]; +static uint32_t *d_branch1Nonces[MAX_GPUS]; +static uint32_t *d_branch2Nonces[MAX_GPUS]; +static uint32_t *d_branch3Nonces[MAX_GPUS]; // Original jackpothash Funktion aus einem miner Quelltext extern "C" unsigned int jackpothash(void *state, const void *input) @@ -85,7 +85,7 @@ extern "C" unsigned int jackpothash(void *state, const void *input) return round; } -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, diff --git a/ccminer.cpp b/ccminer.cpp index 661d4e3..b74e4d7 100644 --- a/ccminer.cpp +++ b/ccminer.cpp @@ -214,9 +214,9 @@ bool opt_trust_pool = false; uint16_t opt_vote = 9999; int num_cpus; int active_gpus; -char * device_name[8]; -short device_map[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; -long device_sm[8] = { 0 }; +char * device_name[MAX_GPUS]; +short device_map[MAX_GPUS] = { 0 }; +long device_sm[MAX_GPUS] = { 0 }; char *rpc_user = NULL; static char *rpc_url; static char *rpc_userpass; @@ -2223,6 +2223,11 @@ int main(int argc, char *argv[]) if (num_cpus < 1) num_cpus = 1; + // default thread to device map + for (i = 0; i < MAX_GPUS; i++) { + device_map[i] = i; + } + // number of gpus active_gpus = cuda_num_devices(); cuda_devicenames(); diff --git a/cuda_checkhash.cu b/cuda_checkhash.cu index 236bfbf..84794a4 100644 --- a/cuda_checkhash.cu +++ b/cuda_checkhash.cu @@ -10,9 +10,9 @@ __constant__ uint32_t pTarget[8]; // 32 bytes -// store 8 device arrays of 8 nonces -static uint32_t* h_resNonces[8]; -static uint32_t* d_resNonces[8]; +// store MAX_GPUS device arrays of 8 nonces +static uint32_t* h_resNonces[MAX_GPUS]; +static uint32_t* d_resNonces[MAX_GPUS]; __host__ void cuda_check_cpu_init(int thr_id, int threads) diff --git a/cuda_groestlcoin.cu b/cuda_groestlcoin.cu index 6d067f9..375c329 100644 --- a/cuda_groestlcoin.cu +++ b/cuda_groestlcoin.cu @@ -8,7 +8,7 @@ // globaler Speicher für alle HeftyHashes aller Threads __constant__ uint32_t pTarget[8]; // Single GPU -extern uint32_t *d_resultNonce[8]; +extern uint32_t *d_resultNonce[MAX_GPUS]; __constant__ uint32_t groestlcoin_gpu_msg[32]; diff --git a/cuda_helper.h b/cuda_helper.h index e7e0967..2a20e6c 100644 --- a/cuda_helper.h +++ b/cuda_helper.h @@ -13,8 +13,12 @@ #include -extern "C" short device_map[8]; -extern "C" long device_sm[8]; +#ifndef MAX_GPUS +#define MAX_GPUS 16 +#endif + +extern "C" short device_map[MAX_GPUS]; +extern "C" long device_sm[MAX_GPUS]; // common functions extern void cuda_check_cpu_init(int thr_id, int threads); diff --git a/cuda_myriadgroestl.cu b/cuda_myriadgroestl.cu index 480383a..f113744 100644 --- a/cuda_myriadgroestl.cu +++ b/cuda_myriadgroestl.cu @@ -15,8 +15,8 @@ // globaler Speicher für alle HeftyHashes aller Threads __constant__ uint32_t pTarget[8]; // Single GPU -uint32_t *d_outputHashes[8]; -extern uint32_t *d_resultNonce[8]; +uint32_t *d_outputHashes[MAX_GPUS]; +extern uint32_t *d_resultNonce[MAX_GPUS]; __constant__ uint32_t myriadgroestl_gpu_msg[32]; diff --git a/cuda_nist5.cu b/cuda_nist5.cu index e88017f..45131be 100644 --- a/cuda_nist5.cu +++ b/cuda_nist5.cu @@ -11,7 +11,7 @@ extern "C" #include "cuda_helper.h" -static uint32_t *d_hash[8]; +static uint32_t *d_hash[MAX_GPUS]; extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_setBlock_80(void *pdata); @@ -64,7 +64,7 @@ extern "C" void nist5hash(void *state, const void *input) memcpy(state, hash, 32); } -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, diff --git a/fuguecoin.cpp b/fuguecoin.cpp index 0362655..a47c173 100644 --- a/fuguecoin.cpp +++ b/fuguecoin.cpp @@ -14,13 +14,13 @@ extern "C" void my_fugue256_close(void *cc, void *dst); extern "C" void my_fugue256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst); // vorbereitete Kontexte nach den ersten 80 Bytes -sph_fugue256_context ctx_fugue_const[8]; +// sph_fugue256_context ctx_fugue_const[MAX_GPUS]; #define SWAP32(x) \ ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | \ (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu)) -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_fugue256(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done) diff --git a/groestlcoin.cpp b/groestlcoin.cpp index 23a8981..fc8d4af 100644 --- a/groestlcoin.cpp +++ b/groestlcoin.cpp @@ -58,7 +58,7 @@ extern "C" void groestlhash(void *state, const void *input) memcpy(state, hashB, 32); } -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done) diff --git a/heavy/cuda_blake512.cu b/heavy/cuda_blake512.cu index b177514..1a5df4e 100644 --- a/heavy/cuda_blake512.cu +++ b/heavy/cuda_blake512.cu @@ -4,11 +4,11 @@ #include "cuda_helper.h" // globaler Speicher für alle HeftyHashes aller Threads -extern uint32_t *heavy_heftyHashes[8]; -extern uint32_t *heavy_nonceVector[8]; +extern uint32_t *heavy_heftyHashes[MAX_GPUS]; +extern uint32_t *heavy_nonceVector[MAX_GPUS]; // globaler Speicher für unsere Ergebnisse -uint32_t *d_hash5output[8]; +uint32_t *d_hash5output[MAX_GPUS]; // die Message (112 bzw. 116 Bytes) mit Padding zur Berechnung auf der GPU __constant__ uint64_t c_PaddedMessage[16]; // padded message (80/84+32 bytes + padding) diff --git a/heavy/cuda_combine.cu b/heavy/cuda_combine.cu index b0b2ead..c813f24 100644 --- a/heavy/cuda_combine.cu +++ b/heavy/cuda_combine.cu @@ -3,13 +3,13 @@ #include "cuda_helper.h" // globaler Speicher für unsere Ergebnisse -static uint32_t *d_hashoutput[8]; -extern uint32_t *d_hash2output[8]; -extern uint32_t *d_hash3output[8]; -extern uint32_t *d_hash4output[8]; -extern uint32_t *d_hash5output[8]; +static uint32_t *d_hashoutput[MAX_GPUS]; +extern uint32_t *d_hash2output[MAX_GPUS]; +extern uint32_t *d_hash3output[MAX_GPUS]; +extern uint32_t *d_hash4output[MAX_GPUS]; +extern uint32_t *d_hash5output[MAX_GPUS]; -extern uint32_t *heavy_nonceVector[8]; +extern uint32_t *heavy_nonceVector[MAX_GPUS]; /* Combines top 64-bits from each hash into a single hash */ __device__ diff --git a/heavy/cuda_groestl512.cu b/heavy/cuda_groestl512.cu index 889002a..f9fdc0a 100644 --- a/heavy/cuda_groestl512.cu +++ b/heavy/cuda_groestl512.cu @@ -4,11 +4,11 @@ #include "cuda_helper.h" // globaler Speicher für alle HeftyHashes aller Threads -extern uint32_t *heavy_heftyHashes[8]; -extern uint32_t *heavy_nonceVector[8]; +extern uint32_t *heavy_heftyHashes[MAX_GPUS]; +extern uint32_t *heavy_nonceVector[MAX_GPUS]; // globaler Speicher für unsere Ergebnisse -uint32_t *d_hash4output[8]; +uint32_t *d_hash4output[MAX_GPUS]; __constant__ uint32_t groestl_gpu_state[32]; __constant__ uint32_t groestl_gpu_msg[32]; diff --git a/heavy/cuda_hefty1.cu b/heavy/cuda_hefty1.cu index 0ca3105..b8f0ca2 100644 --- a/heavy/cuda_hefty1.cu +++ b/heavy/cuda_hefty1.cu @@ -8,7 +8,7 @@ #define USE_SHARED 1 // globaler Speicher für alle HeftyHashes aller Threads -uint32_t *heavy_heftyHashes[8]; +uint32_t *heavy_heftyHashes[MAX_GPUS]; /* Hash-Tabellen */ __constant__ uint32_t hefty_gpu_constantTable[64]; diff --git a/heavy/cuda_keccak512.cu b/heavy/cuda_keccak512.cu index 8c96b66..c62ce81 100644 --- a/heavy/cuda_keccak512.cu +++ b/heavy/cuda_keccak512.cu @@ -4,13 +4,13 @@ #include "cuda_helper.h" // globaler Speicher für alle HeftyHashes aller Threads -extern uint32_t *heavy_heftyHashes[8]; -extern uint32_t *heavy_nonceVector[8]; +extern uint32_t *heavy_heftyHashes[MAX_GPUS]; +extern uint32_t *heavy_nonceVector[MAX_GPUS]; // globaler Speicher für unsere Ergebnisse -uint32_t *d_hash3output[8]; -extern uint32_t *d_hash4output[8]; -extern uint32_t *d_hash5output[8]; +uint32_t *d_hash3output[MAX_GPUS]; +extern uint32_t *d_hash4output[MAX_GPUS]; +extern uint32_t *d_hash5output[MAX_GPUS]; // der Keccak512 State nach der ersten Runde (72 Bytes) __constant__ uint64_t c_State[25]; diff --git a/heavy/cuda_sha256.cu b/heavy/cuda_sha256.cu index 3b63b76..68e5cbf 100644 --- a/heavy/cuda_sha256.cu +++ b/heavy/cuda_sha256.cu @@ -4,11 +4,11 @@ #include "cuda_helper.h" // globaler Speicher für alle HeftyHashes aller Threads -extern uint32_t *heavy_heftyHashes[8]; -extern uint32_t *heavy_nonceVector[8]; +extern uint32_t *heavy_heftyHashes[MAX_GPUS]; +extern uint32_t *heavy_nonceVector[MAX_GPUS]; // globaler Speicher für unsere Ergebnisse -uint32_t *d_hash2output[8]; +uint32_t *d_hash2output[MAX_GPUS]; /* Hash-Tabellen */ diff --git a/heavy/heavy.cu b/heavy/heavy.cu index af5d2a4..a3f1b10 100644 --- a/heavy/heavy.cu +++ b/heavy/heavy.cu @@ -16,18 +16,18 @@ extern "C" { #include "heavy/heavy.h" #include "cuda_helper.h" -extern uint32_t *d_hash2output[8]; -extern uint32_t *d_hash3output[8]; -extern uint32_t *d_hash4output[8]; -extern uint32_t *d_hash5output[8]; +extern uint32_t *d_hash2output[MAX_GPUS]; +extern uint32_t *d_hash3output[MAX_GPUS]; +extern uint32_t *d_hash4output[MAX_GPUS]; +extern uint32_t *d_hash5output[MAX_GPUS]; #define HEAVYCOIN_BLKHDR_SZ 84 #define MNR_BLKHDR_SZ 80 // nonce-array für die threads -uint32_t *heavy_nonceVector[8]; +uint32_t *heavy_nonceVector[MAX_GPUS]; -extern uint32_t *heavy_heftyHashes[8]; +extern uint32_t *heavy_heftyHashes[MAX_GPUS]; /* Combines top 64-bits from each hash into a single hash */ static void combine_hashes(uint32_t *out, const uint32_t *hash1, const uint32_t *hash2, const uint32_t *hash3, const uint32_t *hash4) @@ -127,7 +127,7 @@ struct check_nonce_for_remove } }; -static bool init[8] = {0,0,0,0,0,0,0,0}; +static bool init[MAX_GPUS] = { 0 }; __host__ int scanhash_heavy(int thr_id, uint32_t *pdata, diff --git a/lyra2/lyra2RE.cu b/lyra2/lyra2RE.cu index 85875ad..cfc9952 100644 --- a/lyra2/lyra2RE.cu +++ b/lyra2/lyra2RE.cu @@ -9,7 +9,7 @@ extern "C" { #include "miner.h" #include "cuda_helper.h" -static _ALIGN(64) uint64_t *d_hash[8]; +static _ALIGN(64) uint64_t *d_hash[MAX_GPUS]; extern void blake256_cpu_init(int thr_id, int threads); extern void blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNonce, uint64_t *Hash, int order); @@ -55,7 +55,7 @@ extern "C" void lyra2_hash(void *state, const void *input) memcpy(state, hashA, 32); } -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_lyra2(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, diff --git a/miner.h b/miner.h index 245e25c..fcddaf9 100644 --- a/miner.h +++ b/miner.h @@ -485,9 +485,10 @@ extern uint32_t opt_work_size; extern uint64_t global_hashrate; extern double global_diff; -extern char* device_name[8]; -extern short device_map[8]; -extern long device_sm[8]; +#define MAX_GPUS 16 +extern char* device_name[MAX_GPUS]; +extern short device_map[MAX_GPUS]; +extern long device_sm[MAX_GPUS]; #define CL_N "\x1B[0m" #define CL_RED "\x1B[31m" diff --git a/myriadgroestl.cpp b/myriadgroestl.cpp index a9bf54e..0b1fc28 100644 --- a/myriadgroestl.cpp +++ b/myriadgroestl.cpp @@ -32,7 +32,7 @@ extern "C" void myriadhash(void *state, const void *input) memcpy(state, hashB, 32); } -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done) diff --git a/nvml.cpp b/nvml.cpp index afd511d..8be79b0 100644 --- a/nvml.cpp +++ b/nvml.cpp @@ -34,7 +34,7 @@ int cuda_num_devices(); extern nvml_handle *hnvml; extern char driver_version[32]; -static uint32_t device_bus_ids[8] = { 0 }; +static uint32_t device_bus_ids[MAX_GPUS] = { 0 }; /* * Wrappers to emulate dlopen() on other systems like Windows @@ -451,7 +451,7 @@ int nvml_destroy(nvml_handle *nvmlh) #ifdef WIN32 #include "nvapi/nvapi_ccminer.h" -static int nvapi_dev_map[8] = { 0 }; +static int nvapi_dev_map[MAX_GPUS] = { 0 }; static NvDisplayHandle hDisplay_a[NVAPI_MAX_PHYSICAL_GPUS * 2] = { 0 }; static NvPhysicalGpuHandle phys[NVAPI_MAX_PHYSICAL_GPUS] = { 0 }; static NvU32 nvapi_dev_cnt = 0; diff --git a/pentablake.cu b/pentablake.cu index c10ce48..0440b03 100644 --- a/pentablake.cu +++ b/pentablake.cu @@ -49,9 +49,9 @@ static uint32_t __align__(32) c_Target[8]; __constant__ static uint64_t __align__(32) c_data[32]; -static uint32_t *d_hash[8]; -static uint32_t *d_resNounce[8]; -static uint32_t *h_resNounce[8]; +static uint32_t *d_hash[MAX_GPUS]; +static uint32_t *d_resNounce[MAX_GPUS]; +static uint32_t *h_resNounce[MAX_GPUS]; static uint32_t extra_results[2] = { UINT32_MAX, UINT32_MAX }; /* prefer uint32_t to prevent size conversions = speed +5/10 % */ @@ -362,7 +362,7 @@ void pentablake_cpu_setBlock_80(uint32_t *pdata, const uint32_t *ptarget) CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Target, ptarget, 32, 0, cudaMemcpyHostToDevice)); } -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done) diff --git a/quark/animecoin.cu b/quark/animecoin.cu index a6d311a..aa4733a 100644 --- a/quark/animecoin.cu +++ b/quark/animecoin.cu @@ -10,13 +10,13 @@ extern "C" #include "miner.h" #include "cuda_helper.h" -static uint32_t *d_hash[8]; +static uint32_t *d_hash[MAX_GPUS]; // Speicher zur Generierung der Noncevektoren für die bedingten Hashes -static uint32_t *d_animeNonces[8]; -static uint32_t *d_branch1Nonces[8]; -static uint32_t *d_branch2Nonces[8]; -static uint32_t *d_branch3Nonces[8]; +static uint32_t *d_animeNonces[MAX_GPUS]; +static uint32_t *d_branch1Nonces[MAX_GPUS]; +static uint32_t *d_branch2Nonces[MAX_GPUS]; +static uint32_t *d_branch3Nonces[MAX_GPUS]; extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); @@ -160,7 +160,7 @@ struct HashPredicate }; */ -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_anime(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, diff --git a/quark/cuda_quark_compactionTest.cu b/quark/cuda_quark_compactionTest.cu index cac7562..abffbab 100644 --- a/quark/cuda_quark_compactionTest.cu +++ b/quark/cuda_quark_compactionTest.cu @@ -4,11 +4,11 @@ #include "cuda_helper.h" #include -static uint32_t *d_tempBranch1Nonces[8]; -static uint32_t *d_numValid[8]; -static uint32_t *h_numValid[8]; +static uint32_t *d_tempBranch1Nonces[MAX_GPUS]; +static uint32_t *d_numValid[MAX_GPUS]; +static uint32_t *h_numValid[MAX_GPUS]; -static uint32_t *d_partSum[2][8]; // für bis zu vier partielle Summen +static uint32_t *d_partSum[2][MAX_GPUS]; // für bis zu vier partielle Summen // True/False tester typedef uint32_t(*cuda_compactTestFunction_t)(uint32_t *inpHash); @@ -24,7 +24,8 @@ __device__ uint32_t QuarkFalseTest(uint32_t *inpHash) } __device__ cuda_compactTestFunction_t d_QuarkTrueFunction = QuarkTrueTest, d_QuarkFalseFunction = QuarkFalseTest; -cuda_compactTestFunction_t h_QuarkTrueFunction[8], h_QuarkFalseFunction[8]; + +cuda_compactTestFunction_t h_QuarkTrueFunction[MAX_GPUS], h_QuarkFalseFunction[MAX_GPUS]; // Setup-Funktionen __host__ void quark_compactTest_cpu_init(int thr_id, int threads) diff --git a/quark/quarkcoin.cu b/quark/quarkcoin.cu index f52337f..c98daa2 100644 --- a/quark/quarkcoin.cu +++ b/quark/quarkcoin.cu @@ -12,13 +12,13 @@ extern "C" #include "cuda_helper.h" -static uint32_t *d_hash[8]; +static uint32_t *d_hash[MAX_GPUS]; // Speicher zur Generierung der Noncevektoren für die bedingten Hashes -static uint32_t *d_quarkNonces[8]; -static uint32_t *d_branch1Nonces[8]; -static uint32_t *d_branch2Nonces[8]; -static uint32_t *d_branch3Nonces[8]; +static uint32_t *d_quarkNonces[MAX_GPUS]; +static uint32_t *d_branch1Nonces[MAX_GPUS]; +static uint32_t *d_branch2Nonces[MAX_GPUS]; +static uint32_t *d_branch3Nonces[MAX_GPUS]; extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_setBlock_80(void *pdata); @@ -130,7 +130,7 @@ extern "C" void quarkhash(void *state, const void *input) memcpy(state, hash, 32); } -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_quark(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, @@ -149,7 +149,7 @@ extern "C" int scanhash_quark(int thr_id, uint32_t *pdata, cudaSetDevice(device_map[thr_id]); // Konstanten kopieren, Speicher belegen - cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); + CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput)); quark_blake512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput); diff --git a/qubit/deep.cu b/qubit/deep.cu index f2c5957..7245528 100644 --- a/qubit/deep.cu +++ b/qubit/deep.cu @@ -14,7 +14,7 @@ extern "C" { #include "cuda_helper.h" -static uint32_t *d_hash[8]; +static uint32_t *d_hash[MAX_GPUS]; extern void qubit_luffa512_cpu_init(int thr_id, int threads); extern void qubit_luffa512_cpu_setBlock_80(void *pdata); @@ -52,7 +52,7 @@ extern "C" void deephash(void *state, const void *input) memcpy(state, hash, 32); } -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_deep(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, diff --git a/qubit/doom.cu b/qubit/doom.cu index df30b04..c76b59a 100644 --- a/qubit/doom.cu +++ b/qubit/doom.cu @@ -10,7 +10,7 @@ extern "C" { #include "cuda_helper.h" -static uint32_t *d_hash[8]; +static uint32_t *d_hash[MAX_GPUS]; extern void qubit_luffa512_cpu_init(int thr_id, int threads); extern void qubit_luffa512_cpu_setBlock_80(void *pdata); @@ -32,7 +32,7 @@ extern void doomhash(void *state, const void *input) memcpy(state, hash, 32); } -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_doom(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, diff --git a/qubit/qubit.cu b/qubit/qubit.cu index f9c3461..fd478c7 100644 --- a/qubit/qubit.cu +++ b/qubit/qubit.cu @@ -14,7 +14,7 @@ extern "C" { #include "cuda_helper.h" -static uint32_t *d_hash[8]; +static uint32_t *d_hash[MAX_GPUS]; extern void qubit_luffa512_cpu_init(int thr_id, int threads); extern void qubit_luffa512_cpu_setBlock_80(void *pdata); @@ -72,7 +72,7 @@ extern "C" void qubithash(void *state, const void *input) memcpy(state, hash, 32); } -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, diff --git a/qubit/qubit_luffa512.cu b/qubit/qubit_luffa512.cu index c655538..4d3d04c 100644 --- a/qubit/qubit_luffa512.cu +++ b/qubit/qubit_luffa512.cu @@ -32,8 +32,8 @@ typedef unsigned char BitSequence; __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding) __constant__ uint32_t c_Target[8]; -static uint32_t *h_resNounce[8]; -static uint32_t *d_resNounce[8]; +static uint32_t *h_resNounce[MAX_GPUS]; +static uint32_t *d_resNounce[MAX_GPUS]; #define NBN 1 /* max results, could be 2, see blake32.cu */ #if NBN > 1 diff --git a/x11/cuda_x11_simd512.cu b/x11/cuda_x11_simd512.cu index 646fc2e..9069065 100644 --- a/x11/cuda_x11_simd512.cu +++ b/x11/cuda_x11_simd512.cu @@ -10,8 +10,8 @@ #include "miner.h" #include "cuda_helper.h" -uint32_t *d_state[8]; -uint4 *d_temp4[8]; +uint32_t *d_state[MAX_GPUS]; +uint4 *d_temp4[MAX_GPUS]; // texture bound to d_temp4[thr_id], for read access in Compaction kernel texture texRef1D_128; diff --git a/x11/fresh.cu b/x11/fresh.cu index 408aa36..4b780ad 100644 --- a/x11/fresh.cu +++ b/x11/fresh.cu @@ -12,7 +12,7 @@ extern "C" { // to test gpu hash on a null buffer #define NULLTEST 0 -static uint32_t *d_hash[8]; +static uint32_t *d_hash[MAX_GPUS]; extern void x11_shavite512_cpu_init(int thr_id, int threads); extern void x11_shavite512_setBlock_80(void *pdata); @@ -68,7 +68,7 @@ extern "C" void fresh_hash(void *state, const void *input) memcpy(state, hash, 32); } -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, diff --git a/x11/s3.cu b/x11/s3.cu index 0b495ac..a08ecbf 100644 --- a/x11/s3.cu +++ b/x11/s3.cu @@ -13,7 +13,7 @@ extern "C" { #include -static uint32_t *d_hash[8]; +static uint32_t *d_hash[MAX_GPUS]; extern void x11_shavite512_cpu_init(int thr_id, int threads); extern void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); @@ -49,7 +49,7 @@ extern "C" void s3hash(void *output, const void *input) memcpy(output, hash, 32); } -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; /* Main S3 entry point */ extern "C" int scanhash_s3(int thr_id, uint32_t *pdata, diff --git a/x11/x11.cu b/x11/x11.cu index 5a561ee..46c0d58 100644 --- a/x11/x11.cu +++ b/x11/x11.cu @@ -20,7 +20,7 @@ extern "C" #include #include -static uint32_t *d_hash[8]; +static uint32_t *d_hash[MAX_GPUS]; extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_setBlock_80(void *pdata); @@ -125,7 +125,7 @@ extern "C" void x11hash(void *output, const void *input) memcpy(output, hash, 32); } -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, diff --git a/x13/x13.cu b/x13/x13.cu index 1a0c3e9..17523e1 100644 --- a/x13/x13.cu +++ b/x13/x13.cu @@ -23,7 +23,7 @@ extern "C" #include "cuda_helper.h" -static uint32_t *d_hash[8]; +static uint32_t *d_hash[MAX_GPUS]; extern void quark_blake512_cpu_init(int thr_id, int threads); @@ -144,7 +144,7 @@ extern "C" void x13hash(void *output, const void *input) memcpy(output, hash, 32); } -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_x13(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, diff --git a/x15/cuda_x15_whirlpool.cu b/x15/cuda_x15_whirlpool.cu index 3e84818..1774f6d 100644 --- a/x15/cuda_x15_whirlpool.cu +++ b/x15/cuda_x15_whirlpool.cu @@ -14,8 +14,8 @@ __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding) __constant__ uint32_t pTarget[8]; -uint32_t *d_wnounce[8]; -uint32_t *d_WNonce[8]; +uint32_t *d_wnounce[MAX_GPUS]; +uint32_t *d_WNonce[MAX_GPUS]; #define USE_ALL_TABLES 1 diff --git a/x15/whirlpool.cu b/x15/whirlpool.cu index 5a7542b..e973f91 100644 --- a/x15/whirlpool.cu +++ b/x15/whirlpool.cu @@ -9,7 +9,7 @@ extern "C" #include "cuda_helper.h" -static uint32_t *d_hash[8]; +static uint32_t *d_hash[MAX_GPUS]; extern void x15_whirlpool_cpu_init(int thr_id, int threads, int mode); extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); @@ -49,7 +49,7 @@ extern "C" void wcoinhash(void *state, const void *input) memcpy(state, hash, 32); } -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_whc(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, diff --git a/x15/x14.cu b/x15/x14.cu index d14151c..aa0022d 100644 --- a/x15/x14.cu +++ b/x15/x14.cu @@ -27,7 +27,7 @@ extern "C" { #include "cuda_helper.h" // Memory for the hash functions -static uint32_t *d_hash[8]; +static uint32_t *d_hash[MAX_GPUS]; extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_setBlock_80(void *pdata); @@ -155,7 +155,7 @@ extern "C" void x14hash(void *output, const void *input) memcpy(output, hash, 32); } -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_x14(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, diff --git a/x15/x15.cu b/x15/x15.cu index c714bd3..49ee6e8 100644 --- a/x15/x15.cu +++ b/x15/x15.cu @@ -28,7 +28,7 @@ extern "C" { #include "cuda_helper.h" // Memory for the hash functions -static uint32_t *d_hash[8]; +static uint32_t *d_hash[MAX_GPUS]; extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_setBlock_80(void *pdata); @@ -165,7 +165,7 @@ extern "C" void x15hash(void *output, const void *input) memcpy(output, hash, 32); } -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, diff --git a/x17/x17.cu b/x17/x17.cu index 32b6dc7..598c0d7 100644 --- a/x17/x17.cu +++ b/x17/x17.cu @@ -31,7 +31,7 @@ extern "C" #include "miner.h" #include "cuda_helper.h" -static uint32_t *d_hash[8]; +static uint32_t *d_hash[MAX_GPUS]; extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_setBlock_80(void *pdata); @@ -184,7 +184,7 @@ extern "C" void x17hash(void *output, const void *input) memcpy(output, hash, 32); } -static bool init[8] = { 0 }; +static bool init[MAX_GPUS] = { 0 }; extern "C" int scanhash_x17(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce,