From cafd4477d7eb77340011e1b485ada1a4adf8e282 Mon Sep 17 00:00:00 2001
From: Tanguy Pruvot <tanguy.pruvot@gmail.com>
Date: Thu, 22 Jan 2015 04:34:30 +0100
Subject: [PATCH] Handle a maximum of 16 gpus (vs 8 before)

Some cards have 2 gpus on board...
---
 Algo256/blake256.cu                |  6 +++---
 Algo256/cuda_fugue256.cu           |  4 ++--
 Algo256/cuda_groestl256.cu         |  4 ++--
 Algo256/cuda_keccak256.cu          |  4 ++--
 Algo256/keccak256.cu               |  4 ++--
 JHA/cuda_jha_compactionTest.cu     | 11 ++++++-----
 JHA/jackpotcoin.cu                 | 12 ++++++------
 ccminer.cpp                        | 11 ++++++++---
 cuda_checkhash.cu                  |  6 +++---
 cuda_groestlcoin.cu                |  2 +-
 cuda_helper.h                      |  8 ++++++--
 cuda_myriadgroestl.cu              |  4 ++--
 cuda_nist5.cu                      |  4 ++--
 fuguecoin.cpp                      |  4 ++--
 groestlcoin.cpp                    |  2 +-
 heavy/cuda_blake512.cu             |  6 +++---
 heavy/cuda_combine.cu              | 12 ++++++------
 heavy/cuda_groestl512.cu           |  6 +++---
 heavy/cuda_hefty1.cu               |  2 +-
 heavy/cuda_keccak512.cu            | 10 +++++-----
 heavy/cuda_sha256.cu               |  6 +++---
 heavy/heavy.cu                     | 14 +++++++-------
 lyra2/lyra2RE.cu                   |  4 ++--
 miner.h                            |  7 ++++---
 myriadgroestl.cpp                  |  2 +-
 nvml.cpp                           |  4 ++--
 pentablake.cu                      |  8 ++++----
 quark/animecoin.cu                 | 12 ++++++------
 quark/cuda_quark_compactionTest.cu | 11 ++++++-----
 quark/quarkcoin.cu                 | 14 +++++++-------
 qubit/deep.cu                      |  4 ++--
 qubit/doom.cu                      |  4 ++--
 qubit/qubit.cu                     |  4 ++--
 qubit/qubit_luffa512.cu            |  4 ++--
 x11/cuda_x11_simd512.cu            |  4 ++--
 x11/fresh.cu                       |  4 ++--
 x11/s3.cu                          |  4 ++--
 x11/x11.cu                         |  4 ++--
 x13/x13.cu                         |  4 ++--
 x15/cuda_x15_whirlpool.cu          |  4 ++--
 x15/whirlpool.cu                   |  4 ++--
 x15/x14.cu                         |  4 ++--
 x15/x15.cu                         |  4 ++--
 x17/x17.cu                         |  4 ++--
 44 files changed, 136 insertions(+), 124 deletions(-)

diff --git a/Algo256/blake256.cu b/Algo256/blake256.cu
index 6cbd917..baaf8db 100644
--- a/Algo256/blake256.cu
+++ b/Algo256/blake256.cu
@@ -49,8 +49,8 @@ extern "C" uint32_t crc32_u32t(const uint32_t *buf, size_t size);
 #endif
 
 /* 8 adapters max */
-static uint32_t *d_resNonce[8];
-static uint32_t *h_resNonce[8];
+static uint32_t *d_resNonce[MAX_GPUS];
+static uint32_t *h_resNonce[MAX_GPUS];
 
 /* max count of found nonces in one call */
 #define NBN 2
@@ -377,7 +377,7 @@ void blake256_cpu_setBlock_16(uint32_t *penddata, const uint32_t *midstate, cons
 }
 #endif
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
 	uint32_t max_nonce, unsigned long *hashes_done, int8_t blakerounds=14)
diff --git a/Algo256/cuda_fugue256.cu b/Algo256/cuda_fugue256.cu
index cb5f4cd..a2ebaeb 100644
--- a/Algo256/cuda_fugue256.cu
+++ b/Algo256/cuda_fugue256.cu
@@ -8,8 +8,8 @@
 
 #define USE_SHARED 1
 
-uint32_t *d_fugue256_hashoutput[8];
-uint32_t *d_resultNonce[8];
+uint32_t *d_fugue256_hashoutput[MAX_GPUS];
+uint32_t *d_resultNonce[MAX_GPUS];
 
 __constant__ uint32_t GPUstate[30]; // Single GPU
 __constant__ uint32_t pTarget[8]; // Single GPU
diff --git a/Algo256/cuda_groestl256.cu b/Algo256/cuda_groestl256.cu
index 8007f24..0f27b8a 100644
--- a/Algo256/cuda_groestl256.cu
+++ b/Algo256/cuda_groestl256.cu
@@ -2,8 +2,8 @@
 
 #include "cuda_helper.h"
 
-uint32_t *d_gnounce[8];
-uint32_t *d_GNonce[8];
+uint32_t *d_gnounce[MAX_GPUS];
+uint32_t *d_GNonce[MAX_GPUS];
 
 __constant__ uint32_t pTarget[8];
 
diff --git a/Algo256/cuda_keccak256.cu b/Algo256/cuda_keccak256.cu
index 8688a8a..cf385b4 100644
--- a/Algo256/cuda_keccak256.cu
+++ b/Algo256/cuda_keccak256.cu
@@ -22,8 +22,8 @@ static const uint64_t host_keccak_round_constants[24] = {
 	0x0000000080000001ull, 0x8000000080008008ull
 };
 
-uint32_t *d_nounce[8];
-uint32_t *d_KNonce[8];
+uint32_t *d_nounce[MAX_GPUS];
+uint32_t *d_KNonce[MAX_GPUS];
 
 __constant__ uint32_t pTarget[8];
 __constant__ uint64_t keccak_round_constants[24];
diff --git a/Algo256/keccak256.cu b/Algo256/keccak256.cu
index 59a1d9b..a555c9a 100644
--- a/Algo256/keccak256.cu
+++ b/Algo256/keccak256.cu
@@ -14,7 +14,7 @@ extern "C"
 
 #include "cuda_helper.h"
 
-static uint32_t *d_hash[8];
+static uint32_t *d_hash[MAX_GPUS];
 
 extern void keccak256_cpu_init(int thr_id, int threads);
 extern void keccak256_setBlock_80(void *pdata,const void *ptarget);
@@ -34,7 +34,7 @@ extern "C" void keccak256_hash(void *state, const void *input)
 	memcpy(state, hash, 32);
 }
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
diff --git a/JHA/cuda_jha_compactionTest.cu b/JHA/cuda_jha_compactionTest.cu
index 20e73db..f701e0d 100644
--- a/JHA/cuda_jha_compactionTest.cu
+++ b/JHA/cuda_jha_compactionTest.cu
@@ -4,11 +4,11 @@
 #include "cuda_helper.h"
 #include <sm_30_intrinsics.h>
 
-static uint32_t *d_tempBranch1Nonces[8];
-static uint32_t *d_numValid[8];
-static uint32_t *h_numValid[8];
+static uint32_t *d_tempBranch1Nonces[MAX_GPUS];
+static uint32_t *d_numValid[MAX_GPUS];
+static uint32_t *h_numValid[MAX_GPUS];
 
-static uint32_t *d_partSum[2][8]; // für bis zu vier partielle Summen
+static uint32_t *d_partSum[2][MAX_GPUS]; // für bis zu vier partielle Summen
 
 // True/False tester
 typedef uint32_t(*cuda_compactTestFunction_t)(uint32_t *inpHash);
@@ -26,7 +26,8 @@ __device__ uint32_t JackpotFalseTest(uint32_t *inpHash)
 }
 
 __device__ cuda_compactTestFunction_t d_JackpotTrueFunction = JackpotTrueTest, d_JackpotFalseFunction = JackpotFalseTest;
-cuda_compactTestFunction_t h_JackpotTrueFunction[8], h_JackpotFalseFunction[8];
+
+cuda_compactTestFunction_t h_JackpotTrueFunction[MAX_GPUS], h_JackpotFalseFunction[MAX_GPUS];
 
 // Setup-Funktionen
 __host__ void jackpot_compactTest_cpu_init(int thr_id, int threads)
diff --git a/JHA/jackpotcoin.cu b/JHA/jackpotcoin.cu
index 29059a3..13c7a31 100644
--- a/JHA/jackpotcoin.cu
+++ b/JHA/jackpotcoin.cu
@@ -10,7 +10,7 @@ extern "C"
 #include "miner.h"
 #include "cuda_helper.h"
 
-static uint32_t *d_hash[8];
+static uint32_t *d_hash[MAX_GPUS];
 
 extern void jackpot_keccak512_cpu_init(int thr_id, int threads);
 extern void jackpot_keccak512_cpu_setBlock(void *pdata, size_t inlen);
@@ -37,10 +37,10 @@ extern void jackpot_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t st
 extern uint32_t cuda_check_hash_branch(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
 
 // Speicher zur Generierung der Noncevektoren für die bedingten Hashes
-static uint32_t *d_jackpotNonces[8];
-static uint32_t *d_branch1Nonces[8];
-static uint32_t *d_branch2Nonces[8];
-static uint32_t *d_branch3Nonces[8];
+static uint32_t *d_jackpotNonces[MAX_GPUS];
+static uint32_t *d_branch1Nonces[MAX_GPUS];
+static uint32_t *d_branch2Nonces[MAX_GPUS];
+static uint32_t *d_branch3Nonces[MAX_GPUS];
 
 // Original jackpothash Funktion aus einem miner Quelltext
 extern "C" unsigned int jackpothash(void *state, const void *input)
@@ -85,7 +85,7 @@ extern "C" unsigned int jackpothash(void *state, const void *input)
     return round;
 }
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
     const uint32_t *ptarget, uint32_t max_nonce,
diff --git a/ccminer.cpp b/ccminer.cpp
index 661d4e3..b74e4d7 100644
--- a/ccminer.cpp
+++ b/ccminer.cpp
@@ -214,9 +214,9 @@ bool opt_trust_pool = false;
 uint16_t opt_vote = 9999;
 int num_cpus;
 int active_gpus;
-char * device_name[8];
-short device_map[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
-long  device_sm[8] = { 0 };
+char * device_name[MAX_GPUS];
+short device_map[MAX_GPUS] = { 0 };
+long  device_sm[MAX_GPUS] = { 0 };
 char *rpc_user = NULL;
 static char *rpc_url;
 static char *rpc_userpass;
@@ -2223,6 +2223,11 @@ int main(int argc, char *argv[])
 	if (num_cpus < 1)
 		num_cpus = 1;
 
+	// default thread to device map
+	for (i = 0; i < MAX_GPUS; i++) {
+		device_map[i] = i;
+	}
+
 	// number of gpus
 	active_gpus = cuda_num_devices();
 	cuda_devicenames();
diff --git a/cuda_checkhash.cu b/cuda_checkhash.cu
index 236bfbf..84794a4 100644
--- a/cuda_checkhash.cu
+++ b/cuda_checkhash.cu
@@ -10,9 +10,9 @@
 
 __constant__ uint32_t pTarget[8]; // 32 bytes
 
-// store 8 device arrays of 8 nonces
-static uint32_t* h_resNonces[8];
-static uint32_t* d_resNonces[8];
+// store MAX_GPUS device arrays of 8 nonces
+static uint32_t* h_resNonces[MAX_GPUS];
+static uint32_t* d_resNonces[MAX_GPUS];
 
 __host__
 void cuda_check_cpu_init(int thr_id, int threads)
diff --git a/cuda_groestlcoin.cu b/cuda_groestlcoin.cu
index 6d067f9..375c329 100644
--- a/cuda_groestlcoin.cu
+++ b/cuda_groestlcoin.cu
@@ -8,7 +8,7 @@
 
 // globaler Speicher für alle HeftyHashes aller Threads
 __constant__ uint32_t pTarget[8]; // Single GPU
-extern uint32_t *d_resultNonce[8];
+extern uint32_t *d_resultNonce[MAX_GPUS];
 
 __constant__ uint32_t groestlcoin_gpu_msg[32];
 
diff --git a/cuda_helper.h b/cuda_helper.h
index e7e0967..2a20e6c 100644
--- a/cuda_helper.h
+++ b/cuda_helper.h
@@ -13,8 +13,12 @@
 
 #include <stdint.h>
 
-extern "C" short device_map[8];
-extern "C"  long device_sm[8];
+#ifndef MAX_GPUS
+#define MAX_GPUS 16
+#endif
+
+extern "C" short device_map[MAX_GPUS];
+extern "C"  long device_sm[MAX_GPUS];
 
 // common functions
 extern void cuda_check_cpu_init(int thr_id, int threads);
diff --git a/cuda_myriadgroestl.cu b/cuda_myriadgroestl.cu
index 480383a..f113744 100644
--- a/cuda_myriadgroestl.cu
+++ b/cuda_myriadgroestl.cu
@@ -15,8 +15,8 @@
 
 // globaler Speicher für alle HeftyHashes aller Threads
 __constant__ uint32_t pTarget[8]; // Single GPU
-uint32_t *d_outputHashes[8];
-extern uint32_t *d_resultNonce[8];
+uint32_t *d_outputHashes[MAX_GPUS];
+extern uint32_t *d_resultNonce[MAX_GPUS];
 
 __constant__ uint32_t myriadgroestl_gpu_msg[32];
 
diff --git a/cuda_nist5.cu b/cuda_nist5.cu
index e88017f..45131be 100644
--- a/cuda_nist5.cu
+++ b/cuda_nist5.cu
@@ -11,7 +11,7 @@ extern "C"
 
 #include "cuda_helper.h"
 
-static uint32_t *d_hash[8];
+static uint32_t *d_hash[MAX_GPUS];
 
 extern void quark_blake512_cpu_init(int thr_id, int threads);
 extern void quark_blake512_cpu_setBlock_80(void *pdata);
@@ -64,7 +64,7 @@ extern "C" void nist5hash(void *state, const void *input)
     memcpy(state, hash, 32);
 }
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata,
     const uint32_t *ptarget, uint32_t max_nonce,
diff --git a/fuguecoin.cpp b/fuguecoin.cpp
index 0362655..a47c173 100644
--- a/fuguecoin.cpp
+++ b/fuguecoin.cpp
@@ -14,13 +14,13 @@ extern "C" void my_fugue256_close(void *cc, void *dst);
 extern "C" void my_fugue256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
 
 // vorbereitete Kontexte nach den ersten 80 Bytes
-sph_fugue256_context  ctx_fugue_const[8];
+// sph_fugue256_context  ctx_fugue_const[MAX_GPUS];
 
 #define SWAP32(x) \
     ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u)   | \
       (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 extern "C" int scanhash_fugue256(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
 	uint32_t max_nonce, unsigned long *hashes_done)
diff --git a/groestlcoin.cpp b/groestlcoin.cpp
index 23a8981..fc8d4af 100644
--- a/groestlcoin.cpp
+++ b/groestlcoin.cpp
@@ -58,7 +58,7 @@ extern "C" void groestlhash(void *state, const void *input)
     memcpy(state, hashB, 32);
 }
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
     uint32_t max_nonce, unsigned long *hashes_done)
diff --git a/heavy/cuda_blake512.cu b/heavy/cuda_blake512.cu
index b177514..1a5df4e 100644
--- a/heavy/cuda_blake512.cu
+++ b/heavy/cuda_blake512.cu
@@ -4,11 +4,11 @@
 #include "cuda_helper.h"
 
 // globaler Speicher für alle HeftyHashes aller Threads
-extern uint32_t *heavy_heftyHashes[8];
-extern uint32_t *heavy_nonceVector[8];
+extern uint32_t *heavy_heftyHashes[MAX_GPUS];
+extern uint32_t *heavy_nonceVector[MAX_GPUS];
 
 // globaler Speicher für unsere Ergebnisse
-uint32_t *d_hash5output[8];
+uint32_t *d_hash5output[MAX_GPUS];
 
 // die Message (112 bzw. 116 Bytes) mit Padding zur Berechnung auf der GPU
 __constant__ uint64_t c_PaddedMessage[16]; // padded message (80/84+32 bytes + padding)
diff --git a/heavy/cuda_combine.cu b/heavy/cuda_combine.cu
index b0b2ead..c813f24 100644
--- a/heavy/cuda_combine.cu
+++ b/heavy/cuda_combine.cu
@@ -3,13 +3,13 @@
 #include "cuda_helper.h"
 
 // globaler Speicher für unsere Ergebnisse
-static uint32_t *d_hashoutput[8];
-extern uint32_t *d_hash2output[8];
-extern uint32_t *d_hash3output[8];
-extern uint32_t *d_hash4output[8];
-extern uint32_t *d_hash5output[8];
+static uint32_t *d_hashoutput[MAX_GPUS];
+extern uint32_t *d_hash2output[MAX_GPUS];
+extern uint32_t *d_hash3output[MAX_GPUS];
+extern uint32_t *d_hash4output[MAX_GPUS];
+extern uint32_t *d_hash5output[MAX_GPUS];
 
-extern uint32_t *heavy_nonceVector[8];
+extern uint32_t *heavy_nonceVector[MAX_GPUS];
 
 /* Combines top 64-bits from each hash into a single hash */
 __device__
diff --git a/heavy/cuda_groestl512.cu b/heavy/cuda_groestl512.cu
index 889002a..f9fdc0a 100644
--- a/heavy/cuda_groestl512.cu
+++ b/heavy/cuda_groestl512.cu
@@ -4,11 +4,11 @@
 #include "cuda_helper.h"
 
 // globaler Speicher für alle HeftyHashes aller Threads
-extern uint32_t *heavy_heftyHashes[8];
-extern uint32_t *heavy_nonceVector[8];
+extern uint32_t *heavy_heftyHashes[MAX_GPUS];
+extern uint32_t *heavy_nonceVector[MAX_GPUS];
 
 // globaler Speicher für unsere Ergebnisse
-uint32_t *d_hash4output[8];
+uint32_t *d_hash4output[MAX_GPUS];
 
 __constant__ uint32_t groestl_gpu_state[32];
 __constant__ uint32_t groestl_gpu_msg[32];
diff --git a/heavy/cuda_hefty1.cu b/heavy/cuda_hefty1.cu
index 0ca3105..b8f0ca2 100644
--- a/heavy/cuda_hefty1.cu
+++ b/heavy/cuda_hefty1.cu
@@ -8,7 +8,7 @@
 #define USE_SHARED 1
 
 // globaler Speicher für alle HeftyHashes aller Threads
-uint32_t *heavy_heftyHashes[8];
+uint32_t *heavy_heftyHashes[MAX_GPUS];
 
 /* Hash-Tabellen */
 __constant__ uint32_t hefty_gpu_constantTable[64];
diff --git a/heavy/cuda_keccak512.cu b/heavy/cuda_keccak512.cu
index 8c96b66..c62ce81 100644
--- a/heavy/cuda_keccak512.cu
+++ b/heavy/cuda_keccak512.cu
@@ -4,13 +4,13 @@
 #include "cuda_helper.h"
 
 // globaler Speicher für alle HeftyHashes aller Threads
-extern uint32_t *heavy_heftyHashes[8];
-extern uint32_t *heavy_nonceVector[8];
+extern uint32_t *heavy_heftyHashes[MAX_GPUS];
+extern uint32_t *heavy_nonceVector[MAX_GPUS];
 
 // globaler Speicher für unsere Ergebnisse
-uint32_t *d_hash3output[8];
-extern uint32_t *d_hash4output[8];
-extern uint32_t *d_hash5output[8];
+uint32_t *d_hash3output[MAX_GPUS];
+extern uint32_t *d_hash4output[MAX_GPUS];
+extern uint32_t *d_hash5output[MAX_GPUS];
 
 // der Keccak512 State nach der ersten Runde (72 Bytes)
 __constant__ uint64_t c_State[25];
diff --git a/heavy/cuda_sha256.cu b/heavy/cuda_sha256.cu
index 3b63b76..68e5cbf 100644
--- a/heavy/cuda_sha256.cu
+++ b/heavy/cuda_sha256.cu
@@ -4,11 +4,11 @@
 #include "cuda_helper.h"
 
 // globaler Speicher für alle HeftyHashes aller Threads
-extern uint32_t *heavy_heftyHashes[8];
-extern uint32_t *heavy_nonceVector[8];
+extern uint32_t *heavy_heftyHashes[MAX_GPUS];
+extern uint32_t *heavy_nonceVector[MAX_GPUS];
 
 // globaler Speicher für unsere Ergebnisse
-uint32_t *d_hash2output[8];
+uint32_t *d_hash2output[MAX_GPUS];
 
 
 /* Hash-Tabellen */
diff --git a/heavy/heavy.cu b/heavy/heavy.cu
index af5d2a4..a3f1b10 100644
--- a/heavy/heavy.cu
+++ b/heavy/heavy.cu
@@ -16,18 +16,18 @@ extern "C" {
 #include "heavy/heavy.h"
 #include "cuda_helper.h"
 
-extern uint32_t *d_hash2output[8];
-extern uint32_t *d_hash3output[8];
-extern uint32_t *d_hash4output[8];
-extern uint32_t *d_hash5output[8];
+extern uint32_t *d_hash2output[MAX_GPUS];
+extern uint32_t *d_hash3output[MAX_GPUS];
+extern uint32_t *d_hash4output[MAX_GPUS];
+extern uint32_t *d_hash5output[MAX_GPUS];
 
 #define HEAVYCOIN_BLKHDR_SZ 84
 #define MNR_BLKHDR_SZ       80
 
 // nonce-array für die threads
-uint32_t *heavy_nonceVector[8];
+uint32_t *heavy_nonceVector[MAX_GPUS];
 
-extern uint32_t *heavy_heftyHashes[8];
+extern uint32_t *heavy_heftyHashes[MAX_GPUS];
 
 /* Combines top 64-bits from each hash into a single hash */
 static void combine_hashes(uint32_t *out, const uint32_t *hash1, const uint32_t *hash2, const uint32_t *hash3, const uint32_t *hash4)
@@ -127,7 +127,7 @@ struct check_nonce_for_remove
     }
 };
 
-static bool init[8] = {0,0,0,0,0,0,0,0};
+static bool init[MAX_GPUS] = { 0 };
 
 __host__
 int scanhash_heavy(int thr_id, uint32_t *pdata,
diff --git a/lyra2/lyra2RE.cu b/lyra2/lyra2RE.cu
index 85875ad..cfc9952 100644
--- a/lyra2/lyra2RE.cu
+++ b/lyra2/lyra2RE.cu
@@ -9,7 +9,7 @@ extern "C" {
 #include "miner.h"
 #include "cuda_helper.h"
 
-static _ALIGN(64) uint64_t *d_hash[8];
+static _ALIGN(64) uint64_t *d_hash[MAX_GPUS];
 
 extern void blake256_cpu_init(int thr_id, int threads);
 extern void blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNonce, uint64_t *Hash, int order);
@@ -55,7 +55,7 @@ extern "C" void lyra2_hash(void *state, const void *input)
 	memcpy(state, hashA, 32);
 }
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 extern "C" int scanhash_lyra2(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
diff --git a/miner.h b/miner.h
index 245e25c..fcddaf9 100644
--- a/miner.h
+++ b/miner.h
@@ -485,9 +485,10 @@ extern uint32_t opt_work_size;
 extern uint64_t global_hashrate;
 extern double   global_diff;
 
-extern char* device_name[8];
-extern short device_map[8];
-extern long  device_sm[8];
+#define MAX_GPUS 16
+extern char* device_name[MAX_GPUS];
+extern short device_map[MAX_GPUS];
+extern long  device_sm[MAX_GPUS];
 
 #define CL_N    "\x1B[0m"
 #define CL_RED  "\x1B[31m"
diff --git a/myriadgroestl.cpp b/myriadgroestl.cpp
index a9bf54e..0b1fc28 100644
--- a/myriadgroestl.cpp
+++ b/myriadgroestl.cpp
@@ -32,7 +32,7 @@ extern "C" void myriadhash(void *state, const void *input)
     memcpy(state, hashB, 32);
 }
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
 	uint32_t max_nonce, unsigned long *hashes_done)
diff --git a/nvml.cpp b/nvml.cpp
index afd511d..8be79b0 100644
--- a/nvml.cpp
+++ b/nvml.cpp
@@ -34,7 +34,7 @@ int cuda_num_devices();
 extern nvml_handle *hnvml;
 extern char driver_version[32];
 
-static uint32_t device_bus_ids[8] = { 0 };
+static uint32_t device_bus_ids[MAX_GPUS] = { 0 };
 
 /*
  * Wrappers to emulate dlopen() on other systems like Windows
@@ -451,7 +451,7 @@ int nvml_destroy(nvml_handle *nvmlh)
 #ifdef WIN32
 #include "nvapi/nvapi_ccminer.h"
 
-static int nvapi_dev_map[8] = { 0 };
+static int nvapi_dev_map[MAX_GPUS] = { 0 };
 static NvDisplayHandle hDisplay_a[NVAPI_MAX_PHYSICAL_GPUS * 2] = { 0 };
 static NvPhysicalGpuHandle phys[NVAPI_MAX_PHYSICAL_GPUS] = { 0 };
 static NvU32 nvapi_dev_cnt = 0;
diff --git a/pentablake.cu b/pentablake.cu
index c10ce48..0440b03 100644
--- a/pentablake.cu
+++ b/pentablake.cu
@@ -49,9 +49,9 @@ static uint32_t __align__(32) c_Target[8];
 __constant__
 static uint64_t __align__(32) c_data[32];
 
-static uint32_t *d_hash[8];
-static uint32_t *d_resNounce[8];
-static uint32_t *h_resNounce[8];
+static uint32_t *d_hash[MAX_GPUS];
+static uint32_t *d_resNounce[MAX_GPUS];
+static uint32_t *h_resNounce[MAX_GPUS];
 static uint32_t extra_results[2] = { UINT32_MAX, UINT32_MAX };
 
 /* prefer uint32_t to prevent size conversions = speed +5/10 % */
@@ -362,7 +362,7 @@ void pentablake_cpu_setBlock_80(uint32_t *pdata, const uint32_t *ptarget)
 	CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Target, ptarget, 32, 0, cudaMemcpyHostToDevice));
 }
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
 	uint32_t max_nonce, unsigned long *hashes_done)
diff --git a/quark/animecoin.cu b/quark/animecoin.cu
index a6d311a..aa4733a 100644
--- a/quark/animecoin.cu
+++ b/quark/animecoin.cu
@@ -10,13 +10,13 @@ extern "C"
 #include "miner.h"
 #include "cuda_helper.h"
 
-static uint32_t *d_hash[8];
+static uint32_t *d_hash[MAX_GPUS];
 
 // Speicher zur Generierung der Noncevektoren f�r die bedingten Hashes
-static uint32_t *d_animeNonces[8];
-static uint32_t *d_branch1Nonces[8];
-static uint32_t *d_branch2Nonces[8];
-static uint32_t *d_branch3Nonces[8];
+static uint32_t *d_animeNonces[MAX_GPUS];
+static uint32_t *d_branch1Nonces[MAX_GPUS];
+static uint32_t *d_branch2Nonces[MAX_GPUS];
+static uint32_t *d_branch3Nonces[MAX_GPUS];
 
 extern void quark_blake512_cpu_init(int thr_id, int threads);
 extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@@ -160,7 +160,7 @@ struct HashPredicate
 };
 */
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 extern "C" int scanhash_anime(int thr_id, uint32_t *pdata,
     const uint32_t *ptarget, uint32_t max_nonce,
diff --git a/quark/cuda_quark_compactionTest.cu b/quark/cuda_quark_compactionTest.cu
index cac7562..abffbab 100644
--- a/quark/cuda_quark_compactionTest.cu
+++ b/quark/cuda_quark_compactionTest.cu
@@ -4,11 +4,11 @@
 #include "cuda_helper.h"
 #include <sm_30_intrinsics.h>
 
-static uint32_t *d_tempBranch1Nonces[8];
-static uint32_t *d_numValid[8];
-static uint32_t *h_numValid[8];
+static uint32_t *d_tempBranch1Nonces[MAX_GPUS];
+static uint32_t *d_numValid[MAX_GPUS];
+static uint32_t *h_numValid[MAX_GPUS];
 
-static uint32_t *d_partSum[2][8]; // für bis zu vier partielle Summen
+static uint32_t *d_partSum[2][MAX_GPUS]; // für bis zu vier partielle Summen
 
 // True/False tester
 typedef uint32_t(*cuda_compactTestFunction_t)(uint32_t *inpHash);
@@ -24,7 +24,8 @@ __device__ uint32_t QuarkFalseTest(uint32_t *inpHash)
 }
 
 __device__ cuda_compactTestFunction_t d_QuarkTrueFunction = QuarkTrueTest, d_QuarkFalseFunction = QuarkFalseTest;
-cuda_compactTestFunction_t h_QuarkTrueFunction[8], h_QuarkFalseFunction[8];
+
+cuda_compactTestFunction_t h_QuarkTrueFunction[MAX_GPUS], h_QuarkFalseFunction[MAX_GPUS];
 
 // Setup-Funktionen
 __host__ void quark_compactTest_cpu_init(int thr_id, int threads)
diff --git a/quark/quarkcoin.cu b/quark/quarkcoin.cu
index f52337f..c98daa2 100644
--- a/quark/quarkcoin.cu
+++ b/quark/quarkcoin.cu
@@ -12,13 +12,13 @@ extern "C"
 
 #include "cuda_helper.h"
 
-static uint32_t *d_hash[8];
+static uint32_t *d_hash[MAX_GPUS];
 
 // Speicher zur Generierung der Noncevektoren für die bedingten Hashes
-static uint32_t *d_quarkNonces[8];
-static uint32_t *d_branch1Nonces[8];
-static uint32_t *d_branch2Nonces[8];
-static uint32_t *d_branch3Nonces[8];
+static uint32_t *d_quarkNonces[MAX_GPUS];
+static uint32_t *d_branch1Nonces[MAX_GPUS];
+static uint32_t *d_branch2Nonces[MAX_GPUS];
+static uint32_t *d_branch3Nonces[MAX_GPUS];
 
 extern void quark_blake512_cpu_init(int thr_id, int threads);
 extern void quark_blake512_cpu_setBlock_80(void *pdata);
@@ -130,7 +130,7 @@ extern "C" void quarkhash(void *state, const void *input)
     memcpy(state, hash, 32);
 }
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 extern "C" int scanhash_quark(int thr_id, uint32_t *pdata,
     const uint32_t *ptarget, uint32_t max_nonce,
@@ -149,7 +149,7 @@ extern "C" int scanhash_quark(int thr_id, uint32_t *pdata,
 		cudaSetDevice(device_map[thr_id]);
 
 		// Konstanten kopieren, Speicher belegen
-		cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput);
+		CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
 
 		quark_blake512_cpu_init(thr_id, throughput);
 		quark_groestl512_cpu_init(thr_id, throughput);
diff --git a/qubit/deep.cu b/qubit/deep.cu
index f2c5957..7245528 100644
--- a/qubit/deep.cu
+++ b/qubit/deep.cu
@@ -14,7 +14,7 @@ extern "C" {
 
 #include "cuda_helper.h"
 
-static uint32_t *d_hash[8];
+static uint32_t *d_hash[MAX_GPUS];
 
 extern void qubit_luffa512_cpu_init(int thr_id, int threads);
 extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
@@ -52,7 +52,7 @@ extern "C" void deephash(void *state, const void *input)
 	memcpy(state, hash, 32);
 }
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 extern "C" int scanhash_deep(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
diff --git a/qubit/doom.cu b/qubit/doom.cu
index df30b04..c76b59a 100644
--- a/qubit/doom.cu
+++ b/qubit/doom.cu
@@ -10,7 +10,7 @@ extern "C" {
 
 #include "cuda_helper.h"
 
-static uint32_t *d_hash[8];
+static uint32_t *d_hash[MAX_GPUS];
 
 extern void qubit_luffa512_cpu_init(int thr_id, int threads);
 extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
@@ -32,7 +32,7 @@ extern void doomhash(void *state, const void *input)
 	memcpy(state, hash, 32);
 }
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
diff --git a/qubit/qubit.cu b/qubit/qubit.cu
index f9c3461..fd478c7 100644
--- a/qubit/qubit.cu
+++ b/qubit/qubit.cu
@@ -14,7 +14,7 @@ extern "C" {
 
 #include "cuda_helper.h"
 
-static uint32_t *d_hash[8];
+static uint32_t *d_hash[MAX_GPUS];
 
 extern void qubit_luffa512_cpu_init(int thr_id, int threads);
 extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
@@ -72,7 +72,7 @@ extern "C" void qubithash(void *state, const void *input)
 	memcpy(state, hash, 32);
 }
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
diff --git a/qubit/qubit_luffa512.cu b/qubit/qubit_luffa512.cu
index c655538..4d3d04c 100644
--- a/qubit/qubit_luffa512.cu
+++ b/qubit/qubit_luffa512.cu
@@ -32,8 +32,8 @@ typedef unsigned char BitSequence;
 __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
 __constant__ uint32_t c_Target[8];
 
-static uint32_t *h_resNounce[8];
-static uint32_t *d_resNounce[8];
+static uint32_t *h_resNounce[MAX_GPUS];
+static uint32_t *d_resNounce[MAX_GPUS];
 
 #define NBN 1 /* max results, could be 2, see blake32.cu */
 #if NBN > 1
diff --git a/x11/cuda_x11_simd512.cu b/x11/cuda_x11_simd512.cu
index 646fc2e..9069065 100644
--- a/x11/cuda_x11_simd512.cu
+++ b/x11/cuda_x11_simd512.cu
@@ -10,8 +10,8 @@
 #include "miner.h"
 #include "cuda_helper.h"
 
-uint32_t *d_state[8];
-uint4 *d_temp4[8];
+uint32_t *d_state[MAX_GPUS];
+uint4 *d_temp4[MAX_GPUS];
 
 // texture bound to d_temp4[thr_id], for read access in Compaction kernel
 texture<uint4, 1, cudaReadModeElementType> texRef1D_128;
diff --git a/x11/fresh.cu b/x11/fresh.cu
index 408aa36..4b780ad 100644
--- a/x11/fresh.cu
+++ b/x11/fresh.cu
@@ -12,7 +12,7 @@ extern "C" {
 // to test gpu hash on a null buffer
 #define NULLTEST 0
 
-static uint32_t *d_hash[8];
+static uint32_t *d_hash[MAX_GPUS];
 
 extern void x11_shavite512_cpu_init(int thr_id, int threads);
 extern void x11_shavite512_setBlock_80(void *pdata);
@@ -68,7 +68,7 @@ extern "C" void fresh_hash(void *state, const void *input)
 	memcpy(state, hash, 32);
 }
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
diff --git a/x11/s3.cu b/x11/s3.cu
index 0b495ac..a08ecbf 100644
--- a/x11/s3.cu
+++ b/x11/s3.cu
@@ -13,7 +13,7 @@ extern "C" {
 
 #include <stdint.h>
 
-static uint32_t *d_hash[8];
+static uint32_t *d_hash[MAX_GPUS];
 
 extern void x11_shavite512_cpu_init(int thr_id, int threads);
 extern void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
@@ -49,7 +49,7 @@ extern "C" void s3hash(void *output, const void *input)
 	memcpy(output, hash, 32);
 }
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 /* Main S3 entry point */
 extern "C" int scanhash_s3(int thr_id, uint32_t *pdata,
diff --git a/x11/x11.cu b/x11/x11.cu
index 5a561ee..46c0d58 100644
--- a/x11/x11.cu
+++ b/x11/x11.cu
@@ -20,7 +20,7 @@ extern "C"
 #include <stdio.h>
 #include <memory.h>
 
-static uint32_t *d_hash[8];
+static uint32_t *d_hash[MAX_GPUS];
 
 extern void quark_blake512_cpu_init(int thr_id, int threads);
 extern void quark_blake512_cpu_setBlock_80(void *pdata);
@@ -125,7 +125,7 @@ extern "C" void x11hash(void *output, const void *input)
 	memcpy(output, hash, 32);
 }
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
     const uint32_t *ptarget, uint32_t max_nonce,
diff --git a/x13/x13.cu b/x13/x13.cu
index 1a0c3e9..17523e1 100644
--- a/x13/x13.cu
+++ b/x13/x13.cu
@@ -23,7 +23,7 @@ extern "C"
 
 #include "cuda_helper.h"
 
-static uint32_t *d_hash[8];
+static uint32_t *d_hash[MAX_GPUS];
 
 
 extern void quark_blake512_cpu_init(int thr_id, int threads);
@@ -144,7 +144,7 @@ extern "C" void x13hash(void *output, const void *input)
 	memcpy(output, hash, 32);
 }
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
     const uint32_t *ptarget, uint32_t max_nonce,
diff --git a/x15/cuda_x15_whirlpool.cu b/x15/cuda_x15_whirlpool.cu
index 3e84818..1774f6d 100644
--- a/x15/cuda_x15_whirlpool.cu
+++ b/x15/cuda_x15_whirlpool.cu
@@ -14,8 +14,8 @@
 __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
 __constant__ uint32_t pTarget[8];
 
-uint32_t *d_wnounce[8];
-uint32_t *d_WNonce[8];
+uint32_t *d_wnounce[MAX_GPUS];
+uint32_t *d_WNonce[MAX_GPUS];
 
 #define USE_ALL_TABLES 1
 
diff --git a/x15/whirlpool.cu b/x15/whirlpool.cu
index 5a7542b..e973f91 100644
--- a/x15/whirlpool.cu
+++ b/x15/whirlpool.cu
@@ -9,7 +9,7 @@ extern "C"
 
 #include "cuda_helper.h"
 
-static uint32_t *d_hash[8];
+static uint32_t *d_hash[MAX_GPUS];
 
 extern void x15_whirlpool_cpu_init(int thr_id, int threads, int mode);
 extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@@ -49,7 +49,7 @@ extern "C" void wcoinhash(void *state, const void *input)
 	memcpy(state, hash, 32);
 }
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 extern "C" int scanhash_whc(int thr_id, uint32_t *pdata,
     const uint32_t *ptarget, uint32_t max_nonce,
diff --git a/x15/x14.cu b/x15/x14.cu
index d14151c..aa0022d 100644
--- a/x15/x14.cu
+++ b/x15/x14.cu
@@ -27,7 +27,7 @@ extern "C" {
 #include "cuda_helper.h"
 
 // Memory for the hash functions
-static uint32_t *d_hash[8];
+static uint32_t *d_hash[MAX_GPUS];
 
 extern void quark_blake512_cpu_init(int thr_id, int threads);
 extern void quark_blake512_cpu_setBlock_80(void *pdata);
@@ -155,7 +155,7 @@ extern "C" void x14hash(void *output, const void *input)
 	memcpy(output, hash, 32);
 }
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
diff --git a/x15/x15.cu b/x15/x15.cu
index c714bd3..49ee6e8 100644
--- a/x15/x15.cu
+++ b/x15/x15.cu
@@ -28,7 +28,7 @@ extern "C" {
 #include "cuda_helper.h"
 
 // Memory for the hash functions
-static uint32_t *d_hash[8];
+static uint32_t *d_hash[MAX_GPUS];
 
 extern void quark_blake512_cpu_init(int thr_id, int threads);
 extern void quark_blake512_cpu_setBlock_80(void *pdata);
@@ -165,7 +165,7 @@ extern "C" void x15hash(void *output, const void *input)
 	memcpy(output, hash, 32);
 }
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,
diff --git a/x17/x17.cu b/x17/x17.cu
index 32b6dc7..598c0d7 100644
--- a/x17/x17.cu
+++ b/x17/x17.cu
@@ -31,7 +31,7 @@ extern "C"
 #include "miner.h"
 #include "cuda_helper.h"
 
-static uint32_t *d_hash[8];
+static uint32_t *d_hash[MAX_GPUS];
 
 extern void quark_blake512_cpu_init(int thr_id, int threads);
 extern void quark_blake512_cpu_setBlock_80(void *pdata);
@@ -184,7 +184,7 @@ extern "C" void x17hash(void *output, const void *input)
 	memcpy(output, hash, 32);
 }
 
-static bool init[8] = { 0 };
+static bool init[MAX_GPUS] = { 0 };
 
 extern "C" int scanhash_x17(int thr_id, uint32_t *pdata,
 	const uint32_t *ptarget, uint32_t max_nonce,