Browse Source

Handle a maximum of 16 gpus (vs 8 before)

Some cards have 2 gpus on board...
master
Tanguy Pruvot 10 years ago
parent
commit
cafd4477d7
  1. 6
      Algo256/blake256.cu
  2. 4
      Algo256/cuda_fugue256.cu
  3. 4
      Algo256/cuda_groestl256.cu
  4. 4
      Algo256/cuda_keccak256.cu
  5. 4
      Algo256/keccak256.cu
  6. 11
      JHA/cuda_jha_compactionTest.cu
  7. 12
      JHA/jackpotcoin.cu
  8. 11
      ccminer.cpp
  9. 6
      cuda_checkhash.cu
  10. 2
      cuda_groestlcoin.cu
  11. 8
      cuda_helper.h
  12. 4
      cuda_myriadgroestl.cu
  13. 4
      cuda_nist5.cu
  14. 4
      fuguecoin.cpp
  15. 2
      groestlcoin.cpp
  16. 6
      heavy/cuda_blake512.cu
  17. 12
      heavy/cuda_combine.cu
  18. 6
      heavy/cuda_groestl512.cu
  19. 2
      heavy/cuda_hefty1.cu
  20. 10
      heavy/cuda_keccak512.cu
  21. 6
      heavy/cuda_sha256.cu
  22. 14
      heavy/heavy.cu
  23. 4
      lyra2/lyra2RE.cu
  24. 7
      miner.h
  25. 2
      myriadgroestl.cpp
  26. 4
      nvml.cpp
  27. 8
      pentablake.cu
  28. 12
      quark/animecoin.cu
  29. 11
      quark/cuda_quark_compactionTest.cu
  30. 14
      quark/quarkcoin.cu
  31. 4
      qubit/deep.cu
  32. 4
      qubit/doom.cu
  33. 4
      qubit/qubit.cu
  34. 4
      qubit/qubit_luffa512.cu
  35. 4
      x11/cuda_x11_simd512.cu
  36. 4
      x11/fresh.cu
  37. 4
      x11/s3.cu
  38. 4
      x11/x11.cu
  39. 4
      x13/x13.cu
  40. 4
      x15/cuda_x15_whirlpool.cu
  41. 4
      x15/whirlpool.cu
  42. 4
      x15/x14.cu
  43. 4
      x15/x15.cu
  44. 4
      x17/x17.cu

6
Algo256/blake256.cu

@ -49,8 +49,8 @@ extern "C" uint32_t crc32_u32t(const uint32_t *buf, size_t size);
#endif #endif
/* 8 adapters max */ /* 8 adapters max */
static uint32_t *d_resNonce[8]; static uint32_t *d_resNonce[MAX_GPUS];
static uint32_t *h_resNonce[8]; static uint32_t *h_resNonce[MAX_GPUS];
/* max count of found nonces in one call */ /* max count of found nonces in one call */
#define NBN 2 #define NBN 2
@ -377,7 +377,7 @@ void blake256_cpu_setBlock_16(uint32_t *penddata, const uint32_t *midstate, cons
} }
#endif #endif
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *ptarget, extern "C" int scanhash_blake256(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done, int8_t blakerounds=14) uint32_t max_nonce, unsigned long *hashes_done, int8_t blakerounds=14)

4
Algo256/cuda_fugue256.cu

@ -8,8 +8,8 @@
#define USE_SHARED 1 #define USE_SHARED 1
uint32_t *d_fugue256_hashoutput[8]; uint32_t *d_fugue256_hashoutput[MAX_GPUS];
uint32_t *d_resultNonce[8]; uint32_t *d_resultNonce[MAX_GPUS];
__constant__ uint32_t GPUstate[30]; // Single GPU __constant__ uint32_t GPUstate[30]; // Single GPU
__constant__ uint32_t pTarget[8]; // Single GPU __constant__ uint32_t pTarget[8]; // Single GPU

4
Algo256/cuda_groestl256.cu

@ -2,8 +2,8 @@
#include "cuda_helper.h" #include "cuda_helper.h"
uint32_t *d_gnounce[8]; uint32_t *d_gnounce[MAX_GPUS];
uint32_t *d_GNonce[8]; uint32_t *d_GNonce[MAX_GPUS];
__constant__ uint32_t pTarget[8]; __constant__ uint32_t pTarget[8];

4
Algo256/cuda_keccak256.cu

@ -22,8 +22,8 @@ static const uint64_t host_keccak_round_constants[24] = {
0x0000000080000001ull, 0x8000000080008008ull 0x0000000080000001ull, 0x8000000080008008ull
}; };
uint32_t *d_nounce[8]; uint32_t *d_nounce[MAX_GPUS];
uint32_t *d_KNonce[8]; uint32_t *d_KNonce[MAX_GPUS];
__constant__ uint32_t pTarget[8]; __constant__ uint32_t pTarget[8];
__constant__ uint64_t keccak_round_constants[24]; __constant__ uint64_t keccak_round_constants[24];

4
Algo256/keccak256.cu

@ -14,7 +14,7 @@ extern "C"
#include "cuda_helper.h" #include "cuda_helper.h"
static uint32_t *d_hash[8]; static uint32_t *d_hash[MAX_GPUS];
extern void keccak256_cpu_init(int thr_id, int threads); extern void keccak256_cpu_init(int thr_id, int threads);
extern void keccak256_setBlock_80(void *pdata,const void *ptarget); extern void keccak256_setBlock_80(void *pdata,const void *ptarget);
@ -34,7 +34,7 @@ extern "C" void keccak256_hash(void *state, const void *input)
memcpy(state, hash, 32); memcpy(state, hash, 32);
} }
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata, extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,

11
JHA/cuda_jha_compactionTest.cu

@ -4,11 +4,11 @@
#include "cuda_helper.h" #include "cuda_helper.h"
#include <sm_30_intrinsics.h> #include <sm_30_intrinsics.h>
static uint32_t *d_tempBranch1Nonces[8]; static uint32_t *d_tempBranch1Nonces[MAX_GPUS];
static uint32_t *d_numValid[8]; static uint32_t *d_numValid[MAX_GPUS];
static uint32_t *h_numValid[8]; static uint32_t *h_numValid[MAX_GPUS];
static uint32_t *d_partSum[2][8]; // für bis zu vier partielle Summen static uint32_t *d_partSum[2][MAX_GPUS]; // für bis zu vier partielle Summen
// True/False tester // True/False tester
typedef uint32_t(*cuda_compactTestFunction_t)(uint32_t *inpHash); typedef uint32_t(*cuda_compactTestFunction_t)(uint32_t *inpHash);
@ -26,7 +26,8 @@ __device__ uint32_t JackpotFalseTest(uint32_t *inpHash)
} }
__device__ cuda_compactTestFunction_t d_JackpotTrueFunction = JackpotTrueTest, d_JackpotFalseFunction = JackpotFalseTest; __device__ cuda_compactTestFunction_t d_JackpotTrueFunction = JackpotTrueTest, d_JackpotFalseFunction = JackpotFalseTest;
cuda_compactTestFunction_t h_JackpotTrueFunction[8], h_JackpotFalseFunction[8];
cuda_compactTestFunction_t h_JackpotTrueFunction[MAX_GPUS], h_JackpotFalseFunction[MAX_GPUS];
// Setup-Funktionen // Setup-Funktionen
__host__ void jackpot_compactTest_cpu_init(int thr_id, int threads) __host__ void jackpot_compactTest_cpu_init(int thr_id, int threads)

12
JHA/jackpotcoin.cu

@ -10,7 +10,7 @@ extern "C"
#include "miner.h" #include "miner.h"
#include "cuda_helper.h" #include "cuda_helper.h"
static uint32_t *d_hash[8]; static uint32_t *d_hash[MAX_GPUS];
extern void jackpot_keccak512_cpu_init(int thr_id, int threads); extern void jackpot_keccak512_cpu_init(int thr_id, int threads);
extern void jackpot_keccak512_cpu_setBlock(void *pdata, size_t inlen); extern void jackpot_keccak512_cpu_setBlock(void *pdata, size_t inlen);
@ -37,10 +37,10 @@ extern void jackpot_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t st
extern uint32_t cuda_check_hash_branch(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order); extern uint32_t cuda_check_hash_branch(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
// Speicher zur Generierung der Noncevektoren für die bedingten Hashes // Speicher zur Generierung der Noncevektoren für die bedingten Hashes
static uint32_t *d_jackpotNonces[8]; static uint32_t *d_jackpotNonces[MAX_GPUS];
static uint32_t *d_branch1Nonces[8]; static uint32_t *d_branch1Nonces[MAX_GPUS];
static uint32_t *d_branch2Nonces[8]; static uint32_t *d_branch2Nonces[MAX_GPUS];
static uint32_t *d_branch3Nonces[8]; static uint32_t *d_branch3Nonces[MAX_GPUS];
// Original jackpothash Funktion aus einem miner Quelltext // Original jackpothash Funktion aus einem miner Quelltext
extern "C" unsigned int jackpothash(void *state, const void *input) extern "C" unsigned int jackpothash(void *state, const void *input)
@ -85,7 +85,7 @@ extern "C" unsigned int jackpothash(void *state, const void *input)
return round; return round;
} }
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata, extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,

11
ccminer.cpp

@ -214,9 +214,9 @@ bool opt_trust_pool = false;
uint16_t opt_vote = 9999; uint16_t opt_vote = 9999;
int num_cpus; int num_cpus;
int active_gpus; int active_gpus;
char * device_name[8]; char * device_name[MAX_GPUS];
short device_map[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; short device_map[MAX_GPUS] = { 0 };
long device_sm[8] = { 0 }; long device_sm[MAX_GPUS] = { 0 };
char *rpc_user = NULL; char *rpc_user = NULL;
static char *rpc_url; static char *rpc_url;
static char *rpc_userpass; static char *rpc_userpass;
@ -2223,6 +2223,11 @@ int main(int argc, char *argv[])
if (num_cpus < 1) if (num_cpus < 1)
num_cpus = 1; num_cpus = 1;
// default thread to device map
for (i = 0; i < MAX_GPUS; i++) {
device_map[i] = i;
}
// number of gpus // number of gpus
active_gpus = cuda_num_devices(); active_gpus = cuda_num_devices();
cuda_devicenames(); cuda_devicenames();

6
cuda_checkhash.cu

@ -10,9 +10,9 @@
__constant__ uint32_t pTarget[8]; // 32 bytes __constant__ uint32_t pTarget[8]; // 32 bytes
// store 8 device arrays of 8 nonces // store MAX_GPUS device arrays of 8 nonces
static uint32_t* h_resNonces[8]; static uint32_t* h_resNonces[MAX_GPUS];
static uint32_t* d_resNonces[8]; static uint32_t* d_resNonces[MAX_GPUS];
__host__ __host__
void cuda_check_cpu_init(int thr_id, int threads) void cuda_check_cpu_init(int thr_id, int threads)

2
cuda_groestlcoin.cu

@ -8,7 +8,7 @@
// globaler Speicher für alle HeftyHashes aller Threads // globaler Speicher für alle HeftyHashes aller Threads
__constant__ uint32_t pTarget[8]; // Single GPU __constant__ uint32_t pTarget[8]; // Single GPU
extern uint32_t *d_resultNonce[8]; extern uint32_t *d_resultNonce[MAX_GPUS];
__constant__ uint32_t groestlcoin_gpu_msg[32]; __constant__ uint32_t groestlcoin_gpu_msg[32];

8
cuda_helper.h

@ -13,8 +13,12 @@
#include <stdint.h> #include <stdint.h>
extern "C" short device_map[8]; #ifndef MAX_GPUS
extern "C" long device_sm[8]; #define MAX_GPUS 16
#endif
extern "C" short device_map[MAX_GPUS];
extern "C" long device_sm[MAX_GPUS];
// common functions // common functions
extern void cuda_check_cpu_init(int thr_id, int threads); extern void cuda_check_cpu_init(int thr_id, int threads);

4
cuda_myriadgroestl.cu

@ -15,8 +15,8 @@
// globaler Speicher für alle HeftyHashes aller Threads // globaler Speicher für alle HeftyHashes aller Threads
__constant__ uint32_t pTarget[8]; // Single GPU __constant__ uint32_t pTarget[8]; // Single GPU
uint32_t *d_outputHashes[8]; uint32_t *d_outputHashes[MAX_GPUS];
extern uint32_t *d_resultNonce[8]; extern uint32_t *d_resultNonce[MAX_GPUS];
__constant__ uint32_t myriadgroestl_gpu_msg[32]; __constant__ uint32_t myriadgroestl_gpu_msg[32];

4
cuda_nist5.cu

@ -11,7 +11,7 @@ extern "C"
#include "cuda_helper.h" #include "cuda_helper.h"
static uint32_t *d_hash[8]; static uint32_t *d_hash[MAX_GPUS];
extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_setBlock_80(void *pdata); extern void quark_blake512_cpu_setBlock_80(void *pdata);
@ -64,7 +64,7 @@ extern "C" void nist5hash(void *state, const void *input)
memcpy(state, hash, 32); memcpy(state, hash, 32);
} }
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata, extern "C" int scanhash_nist5(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,

4
fuguecoin.cpp

@ -14,13 +14,13 @@ extern "C" void my_fugue256_close(void *cc, void *dst);
extern "C" void my_fugue256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst); extern "C" void my_fugue256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
// vorbereitete Kontexte nach den ersten 80 Bytes // vorbereitete Kontexte nach den ersten 80 Bytes
sph_fugue256_context ctx_fugue_const[8]; // sph_fugue256_context ctx_fugue_const[MAX_GPUS];
#define SWAP32(x) \ #define SWAP32(x) \
((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | \ ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) | \
(((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu)) (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_fugue256(int thr_id, uint32_t *pdata, const uint32_t *ptarget, extern "C" int scanhash_fugue256(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done) uint32_t max_nonce, unsigned long *hashes_done)

2
groestlcoin.cpp

@ -58,7 +58,7 @@ extern "C" void groestlhash(void *state, const void *input)
memcpy(state, hashB, 32); memcpy(state, hashB, 32);
} }
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t *ptarget, extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done) uint32_t max_nonce, unsigned long *hashes_done)

6
heavy/cuda_blake512.cu

@ -4,11 +4,11 @@
#include "cuda_helper.h" #include "cuda_helper.h"
// globaler Speicher für alle HeftyHashes aller Threads // globaler Speicher für alle HeftyHashes aller Threads
extern uint32_t *heavy_heftyHashes[8]; extern uint32_t *heavy_heftyHashes[MAX_GPUS];
extern uint32_t *heavy_nonceVector[8]; extern uint32_t *heavy_nonceVector[MAX_GPUS];
// globaler Speicher für unsere Ergebnisse // globaler Speicher für unsere Ergebnisse
uint32_t *d_hash5output[8]; uint32_t *d_hash5output[MAX_GPUS];
// die Message (112 bzw. 116 Bytes) mit Padding zur Berechnung auf der GPU // die Message (112 bzw. 116 Bytes) mit Padding zur Berechnung auf der GPU
__constant__ uint64_t c_PaddedMessage[16]; // padded message (80/84+32 bytes + padding) __constant__ uint64_t c_PaddedMessage[16]; // padded message (80/84+32 bytes + padding)

12
heavy/cuda_combine.cu

@ -3,13 +3,13 @@
#include "cuda_helper.h" #include "cuda_helper.h"
// globaler Speicher für unsere Ergebnisse // globaler Speicher für unsere Ergebnisse
static uint32_t *d_hashoutput[8]; static uint32_t *d_hashoutput[MAX_GPUS];
extern uint32_t *d_hash2output[8]; extern uint32_t *d_hash2output[MAX_GPUS];
extern uint32_t *d_hash3output[8]; extern uint32_t *d_hash3output[MAX_GPUS];
extern uint32_t *d_hash4output[8]; extern uint32_t *d_hash4output[MAX_GPUS];
extern uint32_t *d_hash5output[8]; extern uint32_t *d_hash5output[MAX_GPUS];
extern uint32_t *heavy_nonceVector[8]; extern uint32_t *heavy_nonceVector[MAX_GPUS];
/* Combines top 64-bits from each hash into a single hash */ /* Combines top 64-bits from each hash into a single hash */
__device__ __device__

6
heavy/cuda_groestl512.cu

@ -4,11 +4,11 @@
#include "cuda_helper.h" #include "cuda_helper.h"
// globaler Speicher für alle HeftyHashes aller Threads // globaler Speicher für alle HeftyHashes aller Threads
extern uint32_t *heavy_heftyHashes[8]; extern uint32_t *heavy_heftyHashes[MAX_GPUS];
extern uint32_t *heavy_nonceVector[8]; extern uint32_t *heavy_nonceVector[MAX_GPUS];
// globaler Speicher für unsere Ergebnisse // globaler Speicher für unsere Ergebnisse
uint32_t *d_hash4output[8]; uint32_t *d_hash4output[MAX_GPUS];
__constant__ uint32_t groestl_gpu_state[32]; __constant__ uint32_t groestl_gpu_state[32];
__constant__ uint32_t groestl_gpu_msg[32]; __constant__ uint32_t groestl_gpu_msg[32];

2
heavy/cuda_hefty1.cu

@ -8,7 +8,7 @@
#define USE_SHARED 1 #define USE_SHARED 1
// globaler Speicher für alle HeftyHashes aller Threads // globaler Speicher für alle HeftyHashes aller Threads
uint32_t *heavy_heftyHashes[8]; uint32_t *heavy_heftyHashes[MAX_GPUS];
/* Hash-Tabellen */ /* Hash-Tabellen */
__constant__ uint32_t hefty_gpu_constantTable[64]; __constant__ uint32_t hefty_gpu_constantTable[64];

10
heavy/cuda_keccak512.cu

@ -4,13 +4,13 @@
#include "cuda_helper.h" #include "cuda_helper.h"
// globaler Speicher für alle HeftyHashes aller Threads // globaler Speicher für alle HeftyHashes aller Threads
extern uint32_t *heavy_heftyHashes[8]; extern uint32_t *heavy_heftyHashes[MAX_GPUS];
extern uint32_t *heavy_nonceVector[8]; extern uint32_t *heavy_nonceVector[MAX_GPUS];
// globaler Speicher für unsere Ergebnisse // globaler Speicher für unsere Ergebnisse
uint32_t *d_hash3output[8]; uint32_t *d_hash3output[MAX_GPUS];
extern uint32_t *d_hash4output[8]; extern uint32_t *d_hash4output[MAX_GPUS];
extern uint32_t *d_hash5output[8]; extern uint32_t *d_hash5output[MAX_GPUS];
// der Keccak512 State nach der ersten Runde (72 Bytes) // der Keccak512 State nach der ersten Runde (72 Bytes)
__constant__ uint64_t c_State[25]; __constant__ uint64_t c_State[25];

6
heavy/cuda_sha256.cu

@ -4,11 +4,11 @@
#include "cuda_helper.h" #include "cuda_helper.h"
// globaler Speicher für alle HeftyHashes aller Threads // globaler Speicher für alle HeftyHashes aller Threads
extern uint32_t *heavy_heftyHashes[8]; extern uint32_t *heavy_heftyHashes[MAX_GPUS];
extern uint32_t *heavy_nonceVector[8]; extern uint32_t *heavy_nonceVector[MAX_GPUS];
// globaler Speicher für unsere Ergebnisse // globaler Speicher für unsere Ergebnisse
uint32_t *d_hash2output[8]; uint32_t *d_hash2output[MAX_GPUS];
/* Hash-Tabellen */ /* Hash-Tabellen */

14
heavy/heavy.cu

@ -16,18 +16,18 @@ extern "C" {
#include "heavy/heavy.h" #include "heavy/heavy.h"
#include "cuda_helper.h" #include "cuda_helper.h"
extern uint32_t *d_hash2output[8]; extern uint32_t *d_hash2output[MAX_GPUS];
extern uint32_t *d_hash3output[8]; extern uint32_t *d_hash3output[MAX_GPUS];
extern uint32_t *d_hash4output[8]; extern uint32_t *d_hash4output[MAX_GPUS];
extern uint32_t *d_hash5output[8]; extern uint32_t *d_hash5output[MAX_GPUS];
#define HEAVYCOIN_BLKHDR_SZ 84 #define HEAVYCOIN_BLKHDR_SZ 84
#define MNR_BLKHDR_SZ 80 #define MNR_BLKHDR_SZ 80
// nonce-array für die threads // nonce-array für die threads
uint32_t *heavy_nonceVector[8]; uint32_t *heavy_nonceVector[MAX_GPUS];
extern uint32_t *heavy_heftyHashes[8]; extern uint32_t *heavy_heftyHashes[MAX_GPUS];
/* Combines top 64-bits from each hash into a single hash */ /* Combines top 64-bits from each hash into a single hash */
static void combine_hashes(uint32_t *out, const uint32_t *hash1, const uint32_t *hash2, const uint32_t *hash3, const uint32_t *hash4) static void combine_hashes(uint32_t *out, const uint32_t *hash1, const uint32_t *hash2, const uint32_t *hash3, const uint32_t *hash4)
@ -127,7 +127,7 @@ struct check_nonce_for_remove
} }
}; };
static bool init[8] = {0,0,0,0,0,0,0,0}; static bool init[MAX_GPUS] = { 0 };
__host__ __host__
int scanhash_heavy(int thr_id, uint32_t *pdata, int scanhash_heavy(int thr_id, uint32_t *pdata,

4
lyra2/lyra2RE.cu

@ -9,7 +9,7 @@ extern "C" {
#include "miner.h" #include "miner.h"
#include "cuda_helper.h" #include "cuda_helper.h"
static _ALIGN(64) uint64_t *d_hash[8]; static _ALIGN(64) uint64_t *d_hash[MAX_GPUS];
extern void blake256_cpu_init(int thr_id, int threads); extern void blake256_cpu_init(int thr_id, int threads);
extern void blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNonce, uint64_t *Hash, int order); extern void blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const uint32_t startNonce, uint64_t *Hash, int order);
@ -55,7 +55,7 @@ extern "C" void lyra2_hash(void *state, const void *input)
memcpy(state, hashA, 32); memcpy(state, hashA, 32);
} }
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_lyra2(int thr_id, uint32_t *pdata, extern "C" int scanhash_lyra2(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,

7
miner.h

@ -485,9 +485,10 @@ extern uint32_t opt_work_size;
extern uint64_t global_hashrate; extern uint64_t global_hashrate;
extern double global_diff; extern double global_diff;
extern char* device_name[8]; #define MAX_GPUS 16
extern short device_map[8]; extern char* device_name[MAX_GPUS];
extern long device_sm[8]; extern short device_map[MAX_GPUS];
extern long device_sm[MAX_GPUS];
#define CL_N "\x1B[0m" #define CL_N "\x1B[0m"
#define CL_RED "\x1B[31m" #define CL_RED "\x1B[31m"

2
myriadgroestl.cpp

@ -32,7 +32,7 @@ extern "C" void myriadhash(void *state, const void *input)
memcpy(state, hashB, 32); memcpy(state, hashB, 32);
} }
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptarget, extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done) uint32_t max_nonce, unsigned long *hashes_done)

4
nvml.cpp

@ -34,7 +34,7 @@ int cuda_num_devices();
extern nvml_handle *hnvml; extern nvml_handle *hnvml;
extern char driver_version[32]; extern char driver_version[32];
static uint32_t device_bus_ids[8] = { 0 }; static uint32_t device_bus_ids[MAX_GPUS] = { 0 };
/* /*
* Wrappers to emulate dlopen() on other systems like Windows * Wrappers to emulate dlopen() on other systems like Windows
@ -451,7 +451,7 @@ int nvml_destroy(nvml_handle *nvmlh)
#ifdef WIN32 #ifdef WIN32
#include "nvapi/nvapi_ccminer.h" #include "nvapi/nvapi_ccminer.h"
static int nvapi_dev_map[8] = { 0 }; static int nvapi_dev_map[MAX_GPUS] = { 0 };
static NvDisplayHandle hDisplay_a[NVAPI_MAX_PHYSICAL_GPUS * 2] = { 0 }; static NvDisplayHandle hDisplay_a[NVAPI_MAX_PHYSICAL_GPUS * 2] = { 0 };
static NvPhysicalGpuHandle phys[NVAPI_MAX_PHYSICAL_GPUS] = { 0 }; static NvPhysicalGpuHandle phys[NVAPI_MAX_PHYSICAL_GPUS] = { 0 };
static NvU32 nvapi_dev_cnt = 0; static NvU32 nvapi_dev_cnt = 0;

8
pentablake.cu

@ -49,9 +49,9 @@ static uint32_t __align__(32) c_Target[8];
__constant__ __constant__
static uint64_t __align__(32) c_data[32]; static uint64_t __align__(32) c_data[32];
static uint32_t *d_hash[8]; static uint32_t *d_hash[MAX_GPUS];
static uint32_t *d_resNounce[8]; static uint32_t *d_resNounce[MAX_GPUS];
static uint32_t *h_resNounce[8]; static uint32_t *h_resNounce[MAX_GPUS];
static uint32_t extra_results[2] = { UINT32_MAX, UINT32_MAX }; static uint32_t extra_results[2] = { UINT32_MAX, UINT32_MAX };
/* prefer uint32_t to prevent size conversions = speed +5/10 % */ /* prefer uint32_t to prevent size conversions = speed +5/10 % */
@ -362,7 +362,7 @@ void pentablake_cpu_setBlock_80(uint32_t *pdata, const uint32_t *ptarget)
CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Target, ptarget, 32, 0, cudaMemcpyHostToDevice)); CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_Target, ptarget, 32, 0, cudaMemcpyHostToDevice));
} }
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t *ptarget, extern "C" int scanhash_pentablake(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done) uint32_t max_nonce, unsigned long *hashes_done)

12
quark/animecoin.cu

@ -10,13 +10,13 @@ extern "C"
#include "miner.h" #include "miner.h"
#include "cuda_helper.h" #include "cuda_helper.h"
static uint32_t *d_hash[8]; static uint32_t *d_hash[MAX_GPUS];
// Speicher zur Generierung der Noncevektoren für die bedingten Hashes // Speicher zur Generierung der Noncevektoren für die bedingten Hashes
static uint32_t *d_animeNonces[8]; static uint32_t *d_animeNonces[MAX_GPUS];
static uint32_t *d_branch1Nonces[8]; static uint32_t *d_branch1Nonces[MAX_GPUS];
static uint32_t *d_branch2Nonces[8]; static uint32_t *d_branch2Nonces[MAX_GPUS];
static uint32_t *d_branch3Nonces[8]; static uint32_t *d_branch3Nonces[MAX_GPUS];
extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void quark_blake512_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -160,7 +160,7 @@ struct HashPredicate
}; };
*/ */
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_anime(int thr_id, uint32_t *pdata, extern "C" int scanhash_anime(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,

11
quark/cuda_quark_compactionTest.cu

@ -4,11 +4,11 @@
#include "cuda_helper.h" #include "cuda_helper.h"
#include <sm_30_intrinsics.h> #include <sm_30_intrinsics.h>
static uint32_t *d_tempBranch1Nonces[8]; static uint32_t *d_tempBranch1Nonces[MAX_GPUS];
static uint32_t *d_numValid[8]; static uint32_t *d_numValid[MAX_GPUS];
static uint32_t *h_numValid[8]; static uint32_t *h_numValid[MAX_GPUS];
static uint32_t *d_partSum[2][8]; // für bis zu vier partielle Summen static uint32_t *d_partSum[2][MAX_GPUS]; // für bis zu vier partielle Summen
// True/False tester // True/False tester
typedef uint32_t(*cuda_compactTestFunction_t)(uint32_t *inpHash); typedef uint32_t(*cuda_compactTestFunction_t)(uint32_t *inpHash);
@ -24,7 +24,8 @@ __device__ uint32_t QuarkFalseTest(uint32_t *inpHash)
} }
__device__ cuda_compactTestFunction_t d_QuarkTrueFunction = QuarkTrueTest, d_QuarkFalseFunction = QuarkFalseTest; __device__ cuda_compactTestFunction_t d_QuarkTrueFunction = QuarkTrueTest, d_QuarkFalseFunction = QuarkFalseTest;
cuda_compactTestFunction_t h_QuarkTrueFunction[8], h_QuarkFalseFunction[8];
cuda_compactTestFunction_t h_QuarkTrueFunction[MAX_GPUS], h_QuarkFalseFunction[MAX_GPUS];
// Setup-Funktionen // Setup-Funktionen
__host__ void quark_compactTest_cpu_init(int thr_id, int threads) __host__ void quark_compactTest_cpu_init(int thr_id, int threads)

14
quark/quarkcoin.cu

@ -12,13 +12,13 @@ extern "C"
#include "cuda_helper.h" #include "cuda_helper.h"
static uint32_t *d_hash[8]; static uint32_t *d_hash[MAX_GPUS];
// Speicher zur Generierung der Noncevektoren für die bedingten Hashes // Speicher zur Generierung der Noncevektoren für die bedingten Hashes
static uint32_t *d_quarkNonces[8]; static uint32_t *d_quarkNonces[MAX_GPUS];
static uint32_t *d_branch1Nonces[8]; static uint32_t *d_branch1Nonces[MAX_GPUS];
static uint32_t *d_branch2Nonces[8]; static uint32_t *d_branch2Nonces[MAX_GPUS];
static uint32_t *d_branch3Nonces[8]; static uint32_t *d_branch3Nonces[MAX_GPUS];
extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_setBlock_80(void *pdata); extern void quark_blake512_cpu_setBlock_80(void *pdata);
@ -130,7 +130,7 @@ extern "C" void quarkhash(void *state, const void *input)
memcpy(state, hash, 32); memcpy(state, hash, 32);
} }
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_quark(int thr_id, uint32_t *pdata, extern "C" int scanhash_quark(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,
@ -149,7 +149,7 @@ extern "C" int scanhash_quark(int thr_id, uint32_t *pdata,
cudaSetDevice(device_map[thr_id]); cudaSetDevice(device_map[thr_id]);
// Konstanten kopieren, Speicher belegen // Konstanten kopieren, Speicher belegen
cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput); CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
quark_blake512_cpu_init(thr_id, throughput); quark_blake512_cpu_init(thr_id, throughput);
quark_groestl512_cpu_init(thr_id, throughput); quark_groestl512_cpu_init(thr_id, throughput);

4
qubit/deep.cu

@ -14,7 +14,7 @@ extern "C" {
#include "cuda_helper.h" #include "cuda_helper.h"
static uint32_t *d_hash[8]; static uint32_t *d_hash[MAX_GPUS];
extern void qubit_luffa512_cpu_init(int thr_id, int threads); extern void qubit_luffa512_cpu_init(int thr_id, int threads);
extern void qubit_luffa512_cpu_setBlock_80(void *pdata); extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
@ -52,7 +52,7 @@ extern "C" void deephash(void *state, const void *input)
memcpy(state, hash, 32); memcpy(state, hash, 32);
} }
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_deep(int thr_id, uint32_t *pdata, extern "C" int scanhash_deep(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,

4
qubit/doom.cu

@ -10,7 +10,7 @@ extern "C" {
#include "cuda_helper.h" #include "cuda_helper.h"
static uint32_t *d_hash[8]; static uint32_t *d_hash[MAX_GPUS];
extern void qubit_luffa512_cpu_init(int thr_id, int threads); extern void qubit_luffa512_cpu_init(int thr_id, int threads);
extern void qubit_luffa512_cpu_setBlock_80(void *pdata); extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
@ -32,7 +32,7 @@ extern void doomhash(void *state, const void *input)
memcpy(state, hash, 32); memcpy(state, hash, 32);
} }
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_doom(int thr_id, uint32_t *pdata, extern "C" int scanhash_doom(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,

4
qubit/qubit.cu

@ -14,7 +14,7 @@ extern "C" {
#include "cuda_helper.h" #include "cuda_helper.h"
static uint32_t *d_hash[8]; static uint32_t *d_hash[MAX_GPUS];
extern void qubit_luffa512_cpu_init(int thr_id, int threads); extern void qubit_luffa512_cpu_init(int thr_id, int threads);
extern void qubit_luffa512_cpu_setBlock_80(void *pdata); extern void qubit_luffa512_cpu_setBlock_80(void *pdata);
@ -72,7 +72,7 @@ extern "C" void qubithash(void *state, const void *input)
memcpy(state, hash, 32); memcpy(state, hash, 32);
} }
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata, extern "C" int scanhash_qubit(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,

4
qubit/qubit_luffa512.cu

@ -32,8 +32,8 @@ typedef unsigned char BitSequence;
__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding) __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
__constant__ uint32_t c_Target[8]; __constant__ uint32_t c_Target[8];
static uint32_t *h_resNounce[8]; static uint32_t *h_resNounce[MAX_GPUS];
static uint32_t *d_resNounce[8]; static uint32_t *d_resNounce[MAX_GPUS];
#define NBN 1 /* max results, could be 2, see blake32.cu */ #define NBN 1 /* max results, could be 2, see blake32.cu */
#if NBN > 1 #if NBN > 1

4
x11/cuda_x11_simd512.cu

@ -10,8 +10,8 @@
#include "miner.h" #include "miner.h"
#include "cuda_helper.h" #include "cuda_helper.h"
uint32_t *d_state[8]; uint32_t *d_state[MAX_GPUS];
uint4 *d_temp4[8]; uint4 *d_temp4[MAX_GPUS];
// texture bound to d_temp4[thr_id], for read access in Compaction kernel // texture bound to d_temp4[thr_id], for read access in Compaction kernel
texture<uint4, 1, cudaReadModeElementType> texRef1D_128; texture<uint4, 1, cudaReadModeElementType> texRef1D_128;

4
x11/fresh.cu

@ -12,7 +12,7 @@ extern "C" {
// to test gpu hash on a null buffer // to test gpu hash on a null buffer
#define NULLTEST 0 #define NULLTEST 0
static uint32_t *d_hash[8]; static uint32_t *d_hash[MAX_GPUS];
extern void x11_shavite512_cpu_init(int thr_id, int threads); extern void x11_shavite512_cpu_init(int thr_id, int threads);
extern void x11_shavite512_setBlock_80(void *pdata); extern void x11_shavite512_setBlock_80(void *pdata);
@ -68,7 +68,7 @@ extern "C" void fresh_hash(void *state, const void *input)
memcpy(state, hash, 32); memcpy(state, hash, 32);
} }
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata, extern "C" int scanhash_fresh(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,

4
x11/s3.cu

@ -13,7 +13,7 @@ extern "C" {
#include <stdint.h> #include <stdint.h>
static uint32_t *d_hash[8]; static uint32_t *d_hash[MAX_GPUS];
extern void x11_shavite512_cpu_init(int thr_id, int threads); extern void x11_shavite512_cpu_init(int thr_id, int threads);
extern void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order); extern void x11_shavite512_cpu_hash_80(int thr_id, int threads, uint32_t startNounce, uint32_t *d_hash, int order);
@ -49,7 +49,7 @@ extern "C" void s3hash(void *output, const void *input)
memcpy(output, hash, 32); memcpy(output, hash, 32);
} }
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
/* Main S3 entry point */ /* Main S3 entry point */
extern "C" int scanhash_s3(int thr_id, uint32_t *pdata, extern "C" int scanhash_s3(int thr_id, uint32_t *pdata,

4
x11/x11.cu

@ -20,7 +20,7 @@ extern "C"
#include <stdio.h> #include <stdio.h>
#include <memory.h> #include <memory.h>
static uint32_t *d_hash[8]; static uint32_t *d_hash[MAX_GPUS];
extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_setBlock_80(void *pdata); extern void quark_blake512_cpu_setBlock_80(void *pdata);
@ -125,7 +125,7 @@ extern "C" void x11hash(void *output, const void *input)
memcpy(output, hash, 32); memcpy(output, hash, 32);
} }
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_x11(int thr_id, uint32_t *pdata, extern "C" int scanhash_x11(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,

4
x13/x13.cu

@ -23,7 +23,7 @@ extern "C"
#include "cuda_helper.h" #include "cuda_helper.h"
static uint32_t *d_hash[8]; static uint32_t *d_hash[MAX_GPUS];
extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_init(int thr_id, int threads);
@ -144,7 +144,7 @@ extern "C" void x13hash(void *output, const void *input)
memcpy(output, hash, 32); memcpy(output, hash, 32);
} }
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_x13(int thr_id, uint32_t *pdata, extern "C" int scanhash_x13(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,

4
x15/cuda_x15_whirlpool.cu

@ -14,8 +14,8 @@
__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding) __constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
__constant__ uint32_t pTarget[8]; __constant__ uint32_t pTarget[8];
uint32_t *d_wnounce[8]; uint32_t *d_wnounce[MAX_GPUS];
uint32_t *d_WNonce[8]; uint32_t *d_WNonce[MAX_GPUS];
#define USE_ALL_TABLES 1 #define USE_ALL_TABLES 1

4
x15/whirlpool.cu

@ -9,7 +9,7 @@ extern "C"
#include "cuda_helper.h" #include "cuda_helper.h"
static uint32_t *d_hash[8]; static uint32_t *d_hash[MAX_GPUS];
extern void x15_whirlpool_cpu_init(int thr_id, int threads, int mode); extern void x15_whirlpool_cpu_init(int thr_id, int threads, int mode);
extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); extern void x15_whirlpool_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -49,7 +49,7 @@ extern "C" void wcoinhash(void *state, const void *input)
memcpy(state, hash, 32); memcpy(state, hash, 32);
} }
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_whc(int thr_id, uint32_t *pdata, extern "C" int scanhash_whc(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,

4
x15/x14.cu

@ -27,7 +27,7 @@ extern "C" {
#include "cuda_helper.h" #include "cuda_helper.h"
// Memory for the hash functions // Memory for the hash functions
static uint32_t *d_hash[8]; static uint32_t *d_hash[MAX_GPUS];
extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_setBlock_80(void *pdata); extern void quark_blake512_cpu_setBlock_80(void *pdata);
@ -155,7 +155,7 @@ extern "C" void x14hash(void *output, const void *input)
memcpy(output, hash, 32); memcpy(output, hash, 32);
} }
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_x14(int thr_id, uint32_t *pdata, extern "C" int scanhash_x14(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,

4
x15/x15.cu

@ -28,7 +28,7 @@ extern "C" {
#include "cuda_helper.h" #include "cuda_helper.h"
// Memory for the hash functions // Memory for the hash functions
static uint32_t *d_hash[8]; static uint32_t *d_hash[MAX_GPUS];
extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_setBlock_80(void *pdata); extern void quark_blake512_cpu_setBlock_80(void *pdata);
@ -165,7 +165,7 @@ extern "C" void x15hash(void *output, const void *input)
memcpy(output, hash, 32); memcpy(output, hash, 32);
} }
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_x15(int thr_id, uint32_t *pdata, extern "C" int scanhash_x15(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,

4
x17/x17.cu

@ -31,7 +31,7 @@ extern "C"
#include "miner.h" #include "miner.h"
#include "cuda_helper.h" #include "cuda_helper.h"
static uint32_t *d_hash[8]; static uint32_t *d_hash[MAX_GPUS];
extern void quark_blake512_cpu_init(int thr_id, int threads); extern void quark_blake512_cpu_init(int thr_id, int threads);
extern void quark_blake512_cpu_setBlock_80(void *pdata); extern void quark_blake512_cpu_setBlock_80(void *pdata);
@ -184,7 +184,7 @@ extern "C" void x17hash(void *output, const void *input)
memcpy(output, hash, 32); memcpy(output, hash, 32);
} }
static bool init[8] = { 0 }; static bool init[MAX_GPUS] = { 0 };
extern "C" int scanhash_x17(int thr_id, uint32_t *pdata, extern "C" int scanhash_x17(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, const uint32_t *ptarget, uint32_t max_nonce,

Loading…
Cancel
Save