Browse Source

algos: free allocated mem for algo switch

All can be freed propertly now, except script (reset) and lyra2 (leak)
master
Tanguy Pruvot 9 years ago
parent
commit
922c2a5cd7
  1. 4
      Algo256/bmw.cu
  2. 6
      Algo256/cuda_bmw.cu
  3. 27
      Algo256/cuda_groestl256.cu
  4. 17
      JHA/cuda_jha_compactionTest.cu
  5. 6
      JHA/jackpotcoin.cu
  6. 2
      cuda_nist5.cu
  7. 7
      cuda_skeincoin.cu
  8. 16
      heavy/cuda_blake512.cu
  9. 8
      heavy/cuda_combine.cu
  10. 43
      heavy/cuda_groestl512.cu
  11. 11
      heavy/cuda_hefty1.cu
  12. 10
      heavy/cuda_keccak512.cu
  13. 13
      heavy/cuda_sha256.cu
  14. 10
      heavy/heavy.cu
  15. 6
      heavy/heavy.h
  16. 3
      lyra2/lyra2RE.cu
  17. 3
      lyra2/lyra2REv2.cu
  18. 2
      neoscrypt/cuda_neoscrypt.cu
  19. 4
      pentablake.cu
  20. 8
      quark/cuda_quark_groestl512.cu
  21. 32
      quark/cuda_quark_groestl512_sm20.cu
  22. 2
      quark/quarkcoin.cu
  23. 9
      qubit/qubit.cu
  24. 24
      scrypt-jane.cpp
  25. 27
      scrypt.cpp
  26. 14
      scrypt/blake.cu
  27. 5
      scrypt/salsa_kernel.cu
  28. 10
      skein.cu
  29. 12
      skein2.cpp
  30. 4
      x11/c11.cu
  31. 7
      x11/cuda_x11_simd512.cu
  32. 2
      x11/fresh.cu
  33. 2
      x11/s3.cu
  34. 5
      x11/x11.cu
  35. 31
      x13/cuda_x13_fugue512.cu
  36. 8
      x13/x13.cu
  37. 14
      x15/x14.cu
  38. 7
      x15/x15.cu
  39. 7
      x17/x17.cu
  40. 2
      zr5.cu

4
Algo256/bmw.cu

@ -13,6 +13,7 @@ extern "C" { @@ -13,6 +13,7 @@ extern "C" {
static uint32_t *d_hash[MAX_GPUS];
extern void bmw256_midstate_init(int thr_id, uint32_t threads);
extern void bmw256_midstate_free(int thr_id);
extern void bmw256_setBlock_80(int thr_id, void *pdata);
extern void bmw256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_outputHash, int swap);
@ -111,8 +112,9 @@ extern "C" void free_bmw(int thr_id) @@ -111,8 +112,9 @@ extern "C" void free_bmw(int thr_id)
cudaSetDevice(device_map[thr_id]);
cudaFree(d_hash[thr_id]);
bmw256_midstate_free(thr_id);
cuda_check_cpu_free(thr_id);
init[thr_id] = false;
cudaDeviceSynchronize();

6
Algo256/cuda_bmw.cu

@ -372,3 +372,9 @@ void bmw256_midstate_init(int thr_id, uint32_t threads) @@ -372,3 +372,9 @@ void bmw256_midstate_init(int thr_id, uint32_t threads)
{
cudaMalloc(&d_midstate[thr_id], sizeof(sph_bmw256_context));
}
__host__
void bmw256_midstate_free(int thr_id)
{
cudaFree(d_midstate[thr_id]);
}

27
Algo256/cuda_groestl256.cu

@ -7,6 +7,7 @@ @@ -7,6 +7,7 @@
static uint32_t *h_GNonces[MAX_GPUS];
static uint32_t *d_GNonces[MAX_GPUS];
static unsigned int* d_textures[MAX_GPUS][8];
__constant__ uint32_t pTarget[8];
@ -249,28 +250,31 @@ void groestl256_gpu_hash32(uint32_t threads, uint32_t startNounce, uint64_t *out @@ -249,28 +250,31 @@ void groestl256_gpu_hash32(uint32_t threads, uint32_t startNounce, uint64_t *out
}
}
#define texDef(texname, texmem, texsource, texsize) \
#define texDef(id, texname, texmem, texsource, texsize) { \
unsigned int *texmem; \
cudaMalloc(&texmem, texsize); \
d_textures[thr_id][id] = texmem; \
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
texname.normalized = 0; \
texname.filterMode = cudaFilterModePoint; \
texname.addressMode[0] = cudaAddressModeClamp; \
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } \
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); \
} \
}
__host__
void groestl256_cpu_init(int thr_id, uint32_t threads)
{
// Texturen mit obigem Makro initialisieren
texDef(t0up2, d_T0up, T0up_cpu, sizeof(uint32_t) * 256);
texDef(t0dn2, d_T0dn, T0dn_cpu, sizeof(uint32_t) * 256);
texDef(t1up2, d_T1up, T1up_cpu, sizeof(uint32_t) * 256);
texDef(t1dn2, d_T1dn, T1dn_cpu, sizeof(uint32_t) * 256);
texDef(t2up2, d_T2up, T2up_cpu, sizeof(uint32_t) * 256);
texDef(t2dn2, d_T2dn, T2dn_cpu, sizeof(uint32_t) * 256);
texDef(t3up2, d_T3up, T3up_cpu, sizeof(uint32_t) * 256);
texDef(t3dn2, d_T3dn, T3dn_cpu, sizeof(uint32_t) * 256);
texDef(0, t0up2, d_T0up, T0up_cpu, sizeof(uint32_t) * 256);
texDef(1, t0dn2, d_T0dn, T0dn_cpu, sizeof(uint32_t) * 256);
texDef(2, t1up2, d_T1up, T1up_cpu, sizeof(uint32_t) * 256);
texDef(3, t1dn2, d_T1dn, T1dn_cpu, sizeof(uint32_t) * 256);
texDef(4, t2up2, d_T2up, T2up_cpu, sizeof(uint32_t) * 256);
texDef(5, t2dn2, d_T2dn, T2dn_cpu, sizeof(uint32_t) * 256);
texDef(6, t3up2, d_T3up, T3up_cpu, sizeof(uint32_t) * 256);
texDef(7, t3dn2, d_T3dn, T3dn_cpu, sizeof(uint32_t) * 256);
cudaMalloc(&d_GNonces[thr_id], 2*sizeof(uint32_t));
cudaMallocHost(&h_GNonces[thr_id], 2*sizeof(uint32_t));
@ -279,6 +283,9 @@ void groestl256_cpu_init(int thr_id, uint32_t threads) @@ -279,6 +283,9 @@ void groestl256_cpu_init(int thr_id, uint32_t threads)
__host__
void groestl256_cpu_free(int thr_id)
{
for (int i=0; i<8; i++)
cudaFree(d_textures[thr_id][i]);
cudaFree(d_GNonces[thr_id]);
cudaFreeHost(h_GNonces[thr_id]);
}

17
JHA/cuda_jha_compactionTest.cu

@ -33,8 +33,9 @@ __device__ cuda_compactTestFunction_t d_JackpotTrueFunction = JackpotTrueTest, d @@ -33,8 +33,9 @@ __device__ cuda_compactTestFunction_t d_JackpotTrueFunction = JackpotTrueTest, d
cuda_compactTestFunction_t h_JackpotTrueFunction[MAX_GPUS], h_JackpotFalseFunction[MAX_GPUS];
// Setup-Funktionen
__host__ void jackpot_compactTest_cpu_init(int thr_id, uint32_t threads)
// Setup-Function
__host__
void jackpot_compactTest_cpu_init(int thr_id, uint32_t threads)
{
cudaMemcpyFromSymbol(&h_JackpotTrueFunction[thr_id], d_JackpotTrueFunction, sizeof(cuda_compactTestFunction_t));
cudaMemcpyFromSymbol(&h_JackpotFalseFunction[thr_id], d_JackpotFalseFunction, sizeof(cuda_compactTestFunction_t));
@ -51,6 +52,18 @@ __host__ void jackpot_compactTest_cpu_init(int thr_id, uint32_t threads) @@ -51,6 +52,18 @@ __host__ void jackpot_compactTest_cpu_init(int thr_id, uint32_t threads)
cudaMalloc(&d_partSum[1][thr_id], sizeof(uint32_t) * s1); // BLOCKSIZE (Threads/Block)
}
__host__
void jackpot_compactTest_cpu_free(int thr_id)
{
cudaFree(d_tempBranch1Nonces[thr_id]);
cudaFree(d_numValid[thr_id]);
cudaFree(d_partSum[0][thr_id]);
cudaFree(d_partSum[1][thr_id]);
cudaFreeHost(h_numValid[thr_id]);
}
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 300
/**
* __shfl_up() calculates a source lane ID by subtracting delta from the caller's lane ID, and clamping to the range 0..width-1

6
JHA/jackpotcoin.cu

@ -21,6 +21,7 @@ extern void quark_blake512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st @@ -21,6 +21,7 @@ extern void quark_blake512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_groestl512_cpu_free(int thr_id);
extern void quark_jh512_cpu_init(int thr_id, uint32_t threads);
extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -29,6 +30,7 @@ extern void quark_skein512_cpu_init(int thr_id, uint32_t threads); @@ -29,6 +30,7 @@ extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void jackpot_compactTest_cpu_init(int thr_id, uint32_t threads);
extern void jackpot_compactTest_cpu_free(int thr_id);
extern void jackpot_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
uint32_t *d_nonces1, uint32_t *nrm1,
uint32_t *d_nonces2, uint32_t *nrm2,
@ -264,9 +266,11 @@ extern "C" void free_jackpot(int thr_id) @@ -264,9 +266,11 @@ extern "C" void free_jackpot(int thr_id)
cudaFree(d_branch1Nonces[thr_id]);
cudaFree(d_branch2Nonces[thr_id]);
cudaFree(d_branch3Nonces[thr_id]);
cudaFree(d_jackpotNonces[thr_id]);
quark_groestl512_cpu_free(thr_id);
jackpot_compactTest_cpu_free(thr_id);
cuda_check_cpu_free(thr_id);
init[thr_id] = false;

2
cuda_nist5.cu

@ -19,6 +19,7 @@ extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t st @@ -19,6 +19,7 @@ extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t st
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_groestl512_cpu_free(int thr_id);
extern void quark_jh512_cpu_init(int thr_id, uint32_t threads);
extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -173,6 +174,7 @@ extern "C" void free_nist5(int thr_id) @@ -173,6 +174,7 @@ extern "C" void free_nist5(int thr_id)
cudaFree(d_hash[thr_id]);
quark_groestl512_cpu_free(thr_id);
cuda_check_cpu_free(thr_id);
init[thr_id] = false;

7
cuda_skeincoin.cu

@ -708,6 +708,12 @@ __host__ @@ -708,6 +708,12 @@ __host__
void skeincoin_init(int thr_id)
{
cuda_get_arch(thr_id);
CUDA_SAFE_CALL(cudaMalloc(&d_found[thr_id], 2 * sizeof(uint32_t)));
}
__host__
void skeincoin_free(int thr_id) {
cudaFree(d_found[thr_id]);
}
__host__
@ -716,7 +722,6 @@ void skeincoin_setBlock_80(int thr_id, void *pdata) @@ -716,7 +722,6 @@ void skeincoin_setBlock_80(int thr_id, void *pdata)
uint64_t message[16];
memcpy(&message[0], pdata, 80);
CUDA_SAFE_CALL(cudaMalloc(&(d_found[thr_id]), 2 * sizeof(uint32_t)));
cudaMemcpyToSymbol(c_message16, &message[8], 16, 0, cudaMemcpyHostToDevice);
precalc(message);

16
heavy/cuda_blake512.cu

@ -191,8 +191,9 @@ template <int BLOCKSIZE> __global__ void blake512_gpu_hash(uint32_t threads, uin @@ -191,8 +191,9 @@ template <int BLOCKSIZE> __global__ void blake512_gpu_hash(uint32_t threads, uin
// ---------------------------- END CUDA blake512 functions ------------------------------------
// Setup-Funktionen
__host__ void blake512_cpu_init(int thr_id, uint32_t threads)
// Setup Function
__host__
void blake512_cpu_init(int thr_id, uint32_t threads)
{
// Kopiere die Hash-Tabellen in den GPU-Speicher
cudaMemcpyToSymbol( c_sigma,
@ -211,12 +212,19 @@ __host__ void blake512_cpu_init(int thr_id, uint32_t threads) @@ -211,12 +212,19 @@ __host__ void blake512_cpu_init(int thr_id, uint32_t threads)
0, cudaMemcpyHostToDevice);
// Speicher für alle Ergebnisse belegen
CUDA_SAFE_CALL(cudaMalloc(&d_hash5output[thr_id], 16 * sizeof(uint32_t) * threads));
CUDA_SAFE_CALL(cudaMalloc(&d_hash5output[thr_id], (size_t) 64 * threads));
}
__host__
void blake512_cpu_free(int thr_id)
{
cudaFree(d_hash5output[thr_id]);
}
static int BLOCKSIZE = 84;
__host__ void blake512_cpu_setBlock(void *pdata, int len)
__host__
void blake512_cpu_setBlock(void *pdata, int len)
// data muss 84-Byte haben!
// heftyHash hat 32-Byte
{

8
heavy/cuda_combine.cu

@ -124,7 +124,13 @@ __host__ @@ -124,7 +124,13 @@ __host__
void combine_cpu_init(int thr_id, uint32_t threads)
{
// Speicher für alle Ergebnisse belegen
CUDA_SAFE_CALL(cudaMalloc(&d_hashoutput[thr_id], 8 * sizeof(uint32_t) * threads));
CUDA_SAFE_CALL(cudaMalloc(&d_hashoutput[thr_id], (size_t) 32 * threads));
}
__host__
void combine_cpu_free(int thr_id)
{
cudaFree(d_hashoutput[thr_id]);
}
__host__

43
heavy/cuda_groestl512.cu

@ -9,6 +9,7 @@ @@ -9,6 +9,7 @@
// globaler Speicher für alle HeftyHashes aller Threads
extern uint32_t *heavy_heftyHashes[MAX_GPUS];
extern uint32_t *heavy_nonceVector[MAX_GPUS];
static unsigned int *d_textures[MAX_GPUS][8];
// globaler Speicher für unsere Ergebnisse
uint32_t *d_hash4output[MAX_GPUS];
@ -730,36 +731,50 @@ template <int BLOCKSIZE> __global__ void groestl512_gpu_hash(uint32_t threads, u @@ -730,36 +731,50 @@ template <int BLOCKSIZE> __global__ void groestl512_gpu_hash(uint32_t threads, u
}
}
#define texDef(texname, texmem, texsource, texsize) \
#define texDef(id, texname, texmem, texsource, texsize) { \
unsigned int *texmem; \
cudaMalloc(&texmem, texsize); \
d_textures[thr_id][id] = texmem; \
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
texname.normalized = 0; \
texname.filterMode = cudaFilterModePoint; \
texname.addressMode[0] = cudaAddressModeClamp; \
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } \
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); \
} \
}
// Setup-Funktionen
__host__ void groestl512_cpu_init(int thr_id, uint32_t threads)
// Setup Function
__host__
void groestl512_cpu_init(int thr_id, uint32_t threads)
{
// Texturen mit obigem Makro initialisieren
texDef(t0up, d_T0up, T0up_cpu, sizeof(uint32_t)*256);
texDef(t0dn, d_T0dn, T0dn_cpu, sizeof(uint32_t)*256);
texDef(t1up, d_T1up, T1up_cpu, sizeof(uint32_t)*256);
texDef(t1dn, d_T1dn, T1dn_cpu, sizeof(uint32_t)*256);
texDef(t2up, d_T2up, T2up_cpu, sizeof(uint32_t)*256);
texDef(t2dn, d_T2dn, T2dn_cpu, sizeof(uint32_t)*256);
texDef(t3up, d_T3up, T3up_cpu, sizeof(uint32_t)*256);
texDef(t3dn, d_T3dn, T3dn_cpu, sizeof(uint32_t)*256);
texDef(0, t0up, d_T0up, T0up_cpu, sizeof(uint32_t)*256);
texDef(1, t0dn, d_T0dn, T0dn_cpu, sizeof(uint32_t)*256);
texDef(2, t1up, d_T1up, T1up_cpu, sizeof(uint32_t)*256);
texDef(3, t1dn, d_T1dn, T1dn_cpu, sizeof(uint32_t)*256);
texDef(4, t2up, d_T2up, T2up_cpu, sizeof(uint32_t)*256);
texDef(5, t2dn, d_T2dn, T2dn_cpu, sizeof(uint32_t)*256);
texDef(6, t3up, d_T3up, T3up_cpu, sizeof(uint32_t)*256);
texDef(7, t3dn, d_T3dn, T3dn_cpu, sizeof(uint32_t)*256);
// Speicher für alle Ergebnisse belegen
cudaMalloc(&d_hash4output[thr_id], 16 * sizeof(uint32_t) * threads);
cudaMalloc(&d_hash4output[thr_id], (size_t) 64 * threads);
}
__host__
void groestl512_cpu_free(int thr_id)
{
for (int i=0; i <8; i++)
cudaFree(d_textures[thr_id][i]);
cudaFree(d_hash4output[thr_id]);
}
static int BLOCKSIZE = 84;
__host__ void groestl512_cpu_setBlock(void *data, int len)
__host__
void groestl512_cpu_setBlock(void *data, int len)
// data muss 80/84-Byte haben!
// heftyHash hat 32-Byte
{

11
heavy/cuda_hefty1.cu

@ -31,7 +31,8 @@ uint32_t hefty_cpu_hashTable[] = { @@ -31,7 +31,8 @@ uint32_t hefty_cpu_hashTable[] = {
0x510e527fUL,
0x9b05688cUL,
0x1f83d9abUL,
0x5be0cd19UL };
0x5be0cd19UL
};
uint32_t hefty_cpu_constantTable[] = {
0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL,
@ -316,7 +317,13 @@ void hefty_cpu_init(int thr_id, uint32_t threads) @@ -316,7 +317,13 @@ void hefty_cpu_init(int thr_id, uint32_t threads)
sizeof(uint32_t) * 64 );
// Speicher für alle Hefty1 hashes belegen
CUDA_SAFE_CALL(cudaMalloc(&heavy_heftyHashes[thr_id], 8 * sizeof(uint32_t) * threads));
CUDA_SAFE_CALL(cudaMalloc(&heavy_heftyHashes[thr_id], (size_t) 32 * threads));
}
__host__
void hefty_cpu_free(int thr_id)
{
cudaFree(heavy_heftyHashes[thr_id]);
}
__host__

10
heavy/cuda_keccak512.cu

@ -185,7 +185,7 @@ template <int BLOCKSIZE> __global__ void keccak512_gpu_hash(uint32_t threads, ui @@ -185,7 +185,7 @@ template <int BLOCKSIZE> __global__ void keccak512_gpu_hash(uint32_t threads, ui
// ---------------------------- END CUDA keccak512 functions ------------------------------------
__host__
__host__
void keccak512_cpu_init(int thr_id, uint32_t threads)
{
// Kopiere die Hash-Tabellen in den GPU-Speicher
@ -195,7 +195,13 @@ void keccak512_cpu_init(int thr_id, uint32_t threads) @@ -195,7 +195,13 @@ void keccak512_cpu_init(int thr_id, uint32_t threads)
0, cudaMemcpyHostToDevice);
// Speicher für alle Ergebnisse belegen
cudaMalloc(&d_hash3output[thr_id], 16 * sizeof(uint32_t) * threads);
cudaMalloc(&d_hash3output[thr_id], (size_t) 64 * threads);
}
__host__
void keccak512_cpu_free(int thr_id)
{
cudaFree(d_hash3output[thr_id]);
}
// ----------------BEGIN keccak512 CPU version from scrypt-jane code --------------------

13
heavy/cuda_sha256.cu

@ -160,8 +160,9 @@ template <int BLOCKSIZE> __global__ void sha256_gpu_hash(uint32_t threads, uint3 @@ -160,8 +160,9 @@ template <int BLOCKSIZE> __global__ void sha256_gpu_hash(uint32_t threads, uint3
}
}
// Setup-Funktionen
__host__ void sha256_cpu_init(int thr_id, uint32_t threads)
// Setup Function
__host__
void sha256_cpu_init(int thr_id, uint32_t threads)
{
// Kopiere die Hash-Tabellen in den GPU-Speicher
cudaMemcpyToSymbol( sha256_gpu_constantTable,
@ -169,7 +170,13 @@ __host__ void sha256_cpu_init(int thr_id, uint32_t threads) @@ -169,7 +170,13 @@ __host__ void sha256_cpu_init(int thr_id, uint32_t threads)
sizeof(uint32_t) * 64 );
// Speicher für alle Ergebnisse belegen
cudaMalloc(&d_hash2output[thr_id], 8 * sizeof(uint32_t) * threads);
cudaMalloc(&d_hash2output[thr_id], (size_t) 8 * sizeof(uint32_t) * threads);
}
__host__
void sha256_cpu_free(int thr_id)
{
cudaFree(d_hash2output[thr_id]);
}
static int BLOCKSIZE = 84;

10
heavy/heavy.cu

@ -178,7 +178,7 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l @@ -178,7 +178,7 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l
{
uint16_t *ext = (uint16_t *)&pdata[20];
if (opt_vote > maxvote) {
if (opt_vote > maxvote && !opt_benchmark) {
applog(LOG_WARNING, "Your block reward vote (%hu) exceeds "
"the maxvote reported by the pool (%hu).",
opt_vote, maxvote);
@ -310,12 +310,18 @@ extern "C" void free_heavy(int thr_id) @@ -310,12 +310,18 @@ extern "C" void free_heavy(int thr_id)
cudaFree(heavy_nonceVector[thr_id]);
// todo: free sub algos vectors
blake512_cpu_free(thr_id);
groestl512_cpu_free(thr_id);
hefty_cpu_free(thr_id);
keccak512_cpu_free(thr_id);
sha256_cpu_free(thr_id);
combine_cpu_free(thr_id);
init[thr_id] = false;
cudaDeviceSynchronize();
}
__host__
void heavycoin_hash(uchar* output, const uchar* input, int len)
{

6
heavy/heavy.h

@ -4,27 +4,33 @@ @@ -4,27 +4,33 @@
void blake512_cpu_init(int thr_id, uint32_t threads);
void blake512_cpu_setBlock(void *pdata, int len);
void blake512_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce);
void blake512_cpu_free(int thr_id);
void groestl512_cpu_init(int thr_id, uint32_t threads);
void groestl512_cpu_copyHeftyHash(int thr_id, uint32_t threads, void *heftyHashes, int copy);
void groestl512_cpu_setBlock(void *data, int len);
void groestl512_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce);
void groestl512_cpu_free(int thr_id);
void hefty_cpu_hash(int thr_id, uint32_t threads, int startNounce);
void hefty_cpu_setBlock(int thr_id, uint32_t threads, void *data, int len);
void hefty_cpu_init(int thr_id, uint32_t threads);
void hefty_cpu_free(int thr_id);
void keccak512_cpu_init(int thr_id, uint32_t threads);
void keccak512_cpu_setBlock(void *data, int len);
void keccak512_cpu_copyHeftyHash(int thr_id, uint32_t threads, void *heftyHashes, int copy);
void keccak512_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce);
void keccak512_cpu_free(int thr_id);
void sha256_cpu_init(int thr_id, uint32_t threads);
void sha256_cpu_setBlock(void *data, int len);
void sha256_cpu_hash(int thr_id, uint32_t threads, int startNounce);
void sha256_cpu_copyHeftyHash(int thr_id, uint32_t threads, void *heftyHashes, int copy);
void sha256_cpu_free(int thr_id);
void combine_cpu_init(int thr_id, uint32_t threads);
void combine_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *hash);
void combine_cpu_free(int thr_id);
#endif

3
lyra2/lyra2RE.cu

@ -17,6 +17,7 @@ extern void blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const @@ -17,6 +17,7 @@ extern void blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const
extern void blake256_cpu_setBlock_80(uint32_t *pdata);
extern void keccak256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, int order);
extern void keccak256_cpu_init(int thr_id, uint32_t threads);
extern void keccak256_cpu_free(int thr_id);
extern void skein256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, int order);
extern void skein256_cpu_init(int thr_id, uint32_t threads);
@ -174,7 +175,9 @@ extern "C" void free_lyra2(int thr_id) @@ -174,7 +175,9 @@ extern "C" void free_lyra2(int thr_id)
cudaFree(d_hash[thr_id]);
keccak256_cpu_free(thr_id);
groestl256_cpu_free(thr_id);
init[thr_id] = false;
cudaDeviceSynchronize();

3
lyra2/lyra2REv2.cu

@ -19,6 +19,7 @@ extern void blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const @@ -19,6 +19,7 @@ extern void blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const
extern void blake256_cpu_setBlock_80(uint32_t *pdata);
extern void keccak256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, int order);
extern void keccak256_cpu_init(int thr_id, uint32_t threads);
extern void keccak256_cpu_free(int thr_id);
extern void skein256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, int order);
extern void skein256_cpu_init(int thr_id, uint32_t threads);
extern void cubehash256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_hash, int order);
@ -183,6 +184,8 @@ extern "C" void free_lyra2v2(int thr_id) @@ -183,6 +184,8 @@ extern "C" void free_lyra2v2(int thr_id)
cudaFree(d_matrix[thr_id]);
bmw256_cpu_free(thr_id);
keccak256_cpu_free(thr_id);
init[thr_id] = false;
cudaDeviceSynchronize();

2
neoscrypt/cuda_neoscrypt.cu

@ -730,7 +730,7 @@ void neoscrypt_cpu_init(int thr_id, uint32_t threads) @@ -730,7 +730,7 @@ void neoscrypt_cpu_init(int thr_id, uint32_t threads)
{
cuda_get_arch(thr_id);
cudaMalloc(&d_NNonce[thr_id], sizeof(uint32_t));
CUDA_SAFE_CALL(cudaMalloc(&d_buffer[thr_id], threads * 256 * SHIFT));
CUDA_SAFE_CALL(cudaMalloc(&d_buffer[thr_id], (size_t) 256 * SHIFT * threads));
cudaMemcpyToSymbol(W, &d_buffer[thr_id], sizeof(uint4*), 0, cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(BLAKE2S_SIGMA, BLAKE2S_SIGMA_host, sizeof(BLAKE2S_SIGMA_host), 0, cudaMemcpyHostToDevice);
}

4
pentablake.cu

@ -381,7 +381,7 @@ extern "C" int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_n @@ -381,7 +381,7 @@ extern "C" int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_n
if (active_gpus > 1) {
cudaSetDevice(device_map[thr_id]);
}
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 64 * throughput));
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));
CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], 2*sizeof(uint32_t)));
CUDA_SAFE_CALL(cudaMalloc(&d_resNounce[thr_id], 2*sizeof(uint32_t)));
@ -452,7 +452,7 @@ void free_pentablake(int thr_id) @@ -452,7 +452,7 @@ void free_pentablake(int thr_id)
cudaSetDevice(device_map[thr_id]);
cudaFree(d_hash[thr_id]);
cudaFree(h_resNounce[thr_id]);
cudaFreeHost(h_resNounce[thr_id]);
cudaFree(d_resNounce[thr_id]);
init[thr_id] = false;

8
quark/cuda_quark_groestl512.cu

@ -141,6 +141,14 @@ void quark_groestl512_cpu_init(int thr_id, uint32_t threads) @@ -141,6 +141,14 @@ void quark_groestl512_cpu_init(int thr_id, uint32_t threads)
quark_groestl512_sm20_init(thr_id, threads);
}
__host__
void quark_groestl512_cpu_free(int thr_id)
{
int dev_id = device_map[thr_id];
if (device_sm[dev_id] < 300 || cuda_arch[dev_id] < 300)
quark_groestl512_sm20_free(thr_id);
}
__host__
void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
{

32
quark/cuda_quark_groestl512_sm20.cu

@ -5,6 +5,8 @@ @@ -5,6 +5,8 @@
#define MAXWELL_OR_FERMI 0
#define USE_SHARED 1
static unsigned int *d_textures[MAX_GPUS][8];
// #define SPH_C32(x) ((uint32_t)(x ## U))
// #define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
@ -274,28 +276,38 @@ void quark_groestl512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32 @@ -274,28 +276,38 @@ void quark_groestl512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32
#endif
}
#define texDef(texname, texmem, texsource, texsize) \
#define texDef(id, texname, texmem, texsource, texsize) { \
unsigned int *texmem; \
cudaMalloc(&texmem, texsize); \
d_textures[thr_id][id] = texmem; \
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
texname.normalized = 0; \
texname.filterMode = cudaFilterModePoint; \
texname.addressMode[0] = cudaAddressModeClamp; \
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } \
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); \
} \
}
__host__
void quark_groestl512_sm20_init(int thr_id, uint32_t threads)
{
// Texturen mit obigem Makro initialisieren
texDef(t0up1, d_T0up, T0up_cpu, sizeof(uint32_t)*256);
texDef(t0dn1, d_T0dn, T0dn_cpu, sizeof(uint32_t)*256);
texDef(t1up1, d_T1up, T1up_cpu, sizeof(uint32_t)*256);
texDef(t1dn1, d_T1dn, T1dn_cpu, sizeof(uint32_t)*256);
texDef(t2up1, d_T2up, T2up_cpu, sizeof(uint32_t)*256);
texDef(t2dn1, d_T2dn, T2dn_cpu, sizeof(uint32_t)*256);
texDef(t3up1, d_T3up, T3up_cpu, sizeof(uint32_t)*256);
texDef(t3dn1, d_T3dn, T3dn_cpu, sizeof(uint32_t)*256);
texDef(0, t0up1, d_T0up, T0up_cpu, sizeof(uint32_t)*256);
texDef(1, t0dn1, d_T0dn, T0dn_cpu, sizeof(uint32_t)*256);
texDef(2, t1up1, d_T1up, T1up_cpu, sizeof(uint32_t)*256);
texDef(3, t1dn1, d_T1dn, T1dn_cpu, sizeof(uint32_t)*256);
texDef(4, t2up1, d_T2up, T2up_cpu, sizeof(uint32_t)*256);
texDef(5, t2dn1, d_T2dn, T2dn_cpu, sizeof(uint32_t)*256);
texDef(6, t3up1, d_T3up, T3up_cpu, sizeof(uint32_t)*256);
texDef(7, t3dn1, d_T3dn, T3dn_cpu, sizeof(uint32_t)*256);
}
__host__
void quark_groestl512_sm20_free(int thr_id)
{
for (int i=0; i<8; i++)
cudaFree(d_textures[thr_id][i]);
}
__host__

2
quark/quarkcoin.cu

@ -30,6 +30,7 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star @@ -30,6 +30,7 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_doublegroestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_groestl512_cpu_free(int thr_id);
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -270,6 +271,7 @@ extern "C" void free_quark(int thr_id) @@ -270,6 +271,7 @@ extern "C" void free_quark(int thr_id)
cudaFree(d_branch2Nonces[thr_id]);
cudaFree(d_branch3Nonces[thr_id]);
quark_groestl512_cpu_free(thr_id);
quark_compactTest_cpu_free(thr_id);
cuda_check_cpu_free(thr_id);

9
qubit/qubit.cu

@ -28,14 +28,11 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st @@ -28,14 +28,11 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_free(int thr_id);
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads);
extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes,
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
int order);
extern "C" void qubithash(void *state, const void *input)
{
@ -96,7 +93,7 @@ extern "C" int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce, @@ -96,7 +93,7 @@ extern "C" int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce,
x11_simd512_cpu_init(thr_id, throughput);
x11_echo512_cpu_init(thr_id, throughput);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0);
cuda_check_cpu_init(thr_id, throughput);
@ -167,6 +164,8 @@ extern "C" void free_qubit(int thr_id) @@ -167,6 +164,8 @@ extern "C" void free_qubit(int thr_id)
cudaFree(d_hash[thr_id]);
x11_simd512_cpu_free(thr_id);
cuda_check_cpu_free(thr_id);
init[thr_id] = false;

24
scrypt-jane.cpp

@ -426,10 +426,17 @@ unsigned char GetNfactor(unsigned int nTimestamp) @@ -426,10 +426,17 @@ unsigned char GetNfactor(unsigned int nTimestamp)
return Nfactor;
}
static bool init[MAX_GPUS] = { 0 };
// cleanup
void free_scrypt_jane(int thr_id)
{
// todo ?
int dev_id = device_map[thr_id];
cudaSetDevice(dev_id);
cudaDeviceReset(); // well, simple way to free ;)
init[thr_id] = false;
}
#define bswap_32x4(x) ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) \
@ -467,7 +474,18 @@ int scanhash_scrypt_jane(int thr_id, struct work *work, uint32_t max_nonce, unsi @@ -467,7 +474,18 @@ int scanhash_scrypt_jane(int thr_id, struct work *work, uint32_t max_nonce, unsi
s_Nfactor = Nfactor;
}
int throughput = cuda_throughput(thr_id);
static __thread int throughput = 0;
if(!init[thr_id]) {
int dev_id = device_map[thr_id];
cudaSetDevice(dev_id);
cudaDeviceReset();
cudaSetDevice(dev_id);
throughput = cuda_throughput(thr_id);
applog(LOG_INFO, "GPU #%d: cuda throughput is %d", dev_id, throughput);
init[thr_id] = true;
}
if(throughput == 0)
return -1;
@ -602,7 +620,7 @@ int scanhash_scrypt_jane(int thr_id, struct work *work, uint32_t max_nonce, unsi @@ -602,7 +620,7 @@ int scanhash_scrypt_jane(int thr_id, struct work *work, uint32_t max_nonce, unsi
if (memcmp(thash, &hash[cur][8*i], 32) == 0)
{
bn_store_hash_target_ratio(thash, ptarget, work);
work_set_target_ratio(work, thash);
*hashes_done = n - pdata[19];
pdata[19] = tmp_nonce;
scrypt_free(&Vbuf);

27
scrypt.cpp

@ -685,10 +685,18 @@ static int lastFactor = 0; @@ -685,10 +685,18 @@ static int lastFactor = 0;
static void computeGold(uint32_t* const input, uint32_t *reference, uchar *scratchpad);
static bool init[MAX_GPUS] = { 0 };
// cleanup
void free_scrypt(int thr_id)
{
// todo ?
int dev_id = device_map[thr_id];
// trivial way to free all...
cudaSetDevice(dev_id);
cudaDeviceReset();
init[thr_id] = false;
}
// Scrypt proof of work algorithm
@ -701,9 +709,20 @@ int scanhash_scrypt(int thr_id, struct work *work, uint32_t max_nonce, unsigned @@ -701,9 +709,20 @@ int scanhash_scrypt(int thr_id, struct work *work, uint32_t max_nonce, unsigned
int result = 0;
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
int throughput = cuda_throughput(thr_id);
static __thread int throughput = 0;
if (!init[thr_id]) {
int dev_id = device_map[thr_id];
cudaSetDevice(dev_id);
cudaDeviceReset();
cudaSetDevice(dev_id);
throughput = cuda_throughput(thr_id);
applog(LOG_INFO, "GPU #%d: cuda throughput is %d", dev_id, throughput);
init[thr_id] = true;
}
if(throughput == 0)
if (throughput == 0)
return -1;
gettimeofday(tv_start, NULL);
@ -912,7 +931,7 @@ int scanhash_scrypt(int thr_id, struct work *work, uint32_t max_nonce, unsigned @@ -912,7 +931,7 @@ int scanhash_scrypt(int thr_id, struct work *work, uint32_t max_nonce, unsigned
device_map[thr_id], device_name[thr_id], i, cur);
} else {
*hashes_done = n - pdata[19];
bn_store_hash_target_ratio(refhash, ptarget, work);
work_set_target_ratio(work, refhash);
pdata[19] = nonce[cur] + i;
result = 1;
goto byebye;

14
scrypt/blake.cu

@ -404,10 +404,10 @@ void cuda_blake256_hash( uint64_t *g_out, uint32_t nonce, uint32_t *g_good, bool @@ -404,10 +404,10 @@ void cuda_blake256_hash( uint64_t *g_out, uint32_t nonce, uint32_t *g_good, bool
static std::map<int, uint32_t *> context_good[2];
static bool init[MAX_GPUS] = { 0 };
bool default_prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t host_ptarget[8])
{
static bool init[MAX_GPUS] = { 0 };
if (!init[thr_id])
{
// allocate pinned host memory for good hashes
@ -441,3 +441,13 @@ void default_do_blake256(dim3 grid, dim3 threads, int thr_id, int stream, uint32 @@ -441,3 +441,13 @@ void default_do_blake256(dim3 grid, dim3 threads, int thr_id, int stream, uint32
cudaMemcpyDeviceToHost, context_streams[stream][thr_id]));
}
}
void default_free_blake256(int thr_id)
{
if (init[thr_id]) {
cudaFree(context_good[0][thr_id]);
cudaFree(context_good[1][thr_id]);
init[thr_id] = false;
}
}

5
scrypt/salsa_kernel.cu

@ -144,9 +144,8 @@ int cuda_throughput(int thr_id) @@ -144,9 +144,8 @@ int cuda_throughput(int thr_id)
cuCtxCreate( &ctx, CU_CTX_SCHED_YIELD, device_map[thr_id] );
cuCtxSetCurrent(ctx);
#else
checkCudaErrors(cudaSetDeviceFlags(cudaDeviceScheduleYield));
checkCudaErrors(cudaSetDevice(device_map[thr_id]));
// checkCudaErrors(cudaFree(0));
checkCudaErrors(cudaSetDeviceFlags(cudaDeviceScheduleYield));
#endif
KernelInterface *kernel;
@ -256,7 +255,7 @@ inline int _ConvertSMVer2Cores(int major, int minor) @@ -256,7 +255,7 @@ inline int _ConvertSMVer2Cores(int major, int minor)
}
// If we don't find the values, we default use the previous one to run properly
applog(LOG_WARNING, "MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM", major, minor, 128);
applog(LOG_WARNING, "MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM", major, minor, 128);
return 128;
}

10
skein.cu

@ -11,12 +11,14 @@ @@ -11,12 +11,14 @@
#include <openssl/sha.h>
static uint32_t *d_hash[MAX_GPUS];
static __thread bool sm5 = true;
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
extern void skein512_cpu_setBlock_80(void *pdata);
extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap);
extern void skeincoin_init(int thr_id);
extern void skeincoin_free(int thr_id);
extern void skeincoin_setBlock_80(int thr_id, void *pdata);
extern uint32_t skeincoin_hash_sm5(int thr_id, uint32_t threads, uint32_t startNounce, int swap, uint64_t target64, uint32_t *secNonce);
@ -355,7 +357,7 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no @@ -355,7 +357,7 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no
const uint32_t first_nonce = pdata[19];
const int swap = 1;
bool sm5 = (device_sm[device_map[thr_id]] >= 500);
sm5 = (device_sm[device_map[thr_id]] >= 500);
bool checkSecnonce = (have_stratum || have_longpoll) && !sm5;
uint32_t throughput = device_intensity(thr_id, __func__, 1U << 20);
@ -473,9 +475,11 @@ extern "C" void free_skeincoin(int thr_id) @@ -473,9 +475,11 @@ extern "C" void free_skeincoin(int thr_id)
cudaSetDevice(device_map[thr_id]);
cudaFree(d_hash[thr_id]);
if (!sm5) {
cudaFree(d_hash[thr_id]);
cuda_check_cpu_free(thr_id);
}
cuda_check_cpu_free(thr_id);
init[thr_id] = false;
cudaDeviceSynchronize();

12
skein2.cpp

@ -16,6 +16,7 @@ extern void quark_skein512_cpu_init(int thr_id, uint32_t threads); @@ -16,6 +16,7 @@ extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
extern void skein512_cpu_setBlock_80(void *pdata);
extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap);
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
void skein2hash(void *output, const void *input)
@ -38,6 +39,7 @@ static bool init[MAX_GPUS] = { 0 }; @@ -38,6 +39,7 @@ static bool init[MAX_GPUS] = { 0 };
int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
{
int dev_id = device_map[thr_id];
uint32_t *pdata = work->data;
uint32_t *ptarget = work->target;
const uint32_t first_nonce = pdata[19];
@ -50,9 +52,9 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned @@ -50,9 +52,9 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned
if (!init[thr_id])
{
cudaSetDevice(device_map[thr_id]);
cudaSetDevice(dev_id);
cudaMalloc(&d_hash[thr_id], throughput * 64U);
cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput);
quark_skein512_cpu_init(thr_id, throughput);
cuda_check_cpu_init(thr_id, throughput);
@ -92,7 +94,7 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned @@ -92,7 +94,7 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned
work_set_target_ratio(work, vhash64);
if (secNonce != 0) {
if (!opt_quiet)
applog(LOG_BLUE, "GPU #%d: found second nonce %08x !", device_map[thr_id], swab32(secNonce));
applog(LOG_BLUE, "GPU #%d: found second nonce %08x !", dev_id, swab32(secNonce));
endiandata[19] = secNonce;
skein2hash(vhash64, endiandata);
@ -104,7 +106,7 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned @@ -104,7 +106,7 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned
pdata[19] = swab32(foundNonce);
return res;
} else {
applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNonce);
applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", dev_id, foundNonce);
}
}
@ -135,4 +137,4 @@ void free_skein2(int thr_id) @@ -135,4 +137,4 @@ void free_skein2(int thr_id)
init[thr_id] = false;
cudaDeviceSynchronize();
}
}

4
x11/c11.cu

@ -32,6 +32,7 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star @@ -32,6 +32,7 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_doublegroestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_groestl512_cpu_free(int thr_id);
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -50,6 +51,7 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st @@ -50,6 +51,7 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_free(int thr_id);
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -255,6 +257,8 @@ extern "C" void free_c11(int thr_id) @@ -255,6 +257,8 @@ extern "C" void free_c11(int thr_id)
cudaSetDevice(device_map[thr_id]);
cudaFree(d_hash[thr_id]);
quark_groestl512_cpu_free(thr_id);
x11_simd512_cpu_free(thr_id);
cuda_check_cpu_free(thr_id);
init[thr_id] = false;

7
x11/cuda_x11_simd512.cu

@ -672,6 +672,13 @@ int x11_simd512_cpu_init(int thr_id, uint32_t threads) @@ -672,6 +672,13 @@ int x11_simd512_cpu_init(int thr_id, uint32_t threads)
return 0;
}
__host__
void x11_simd512_cpu_free(int thr_id)
{
cudaFree(d_temp4[thr_id]);
cudaFree(d_state[thr_id]);
}
__host__
void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
{

2
x11/fresh.cu

@ -21,6 +21,7 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st @@ -21,6 +21,7 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_free(int thr_id);
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -163,6 +164,7 @@ extern "C" void free_fresh(int thr_id) @@ -163,6 +164,7 @@ extern "C" void free_fresh(int thr_id)
cudaSetDevice(device_map[thr_id]);
cudaFree(d_hash[thr_id]);
x11_simd512_cpu_free(thr_id);
cuda_check_cpu_free(thr_id);
init[thr_id] = false;

2
x11/s3.cu

@ -21,6 +21,7 @@ extern void x11_shavite512_setBlock_80(void *pdata); @@ -21,6 +21,7 @@ extern void x11_shavite512_setBlock_80(void *pdata);
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_free(int thr_id);
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -146,6 +147,7 @@ extern "C" void free_s3(int thr_id) @@ -146,6 +147,7 @@ extern "C" void free_s3(int thr_id)
cudaSetDevice(device_map[thr_id]);
cudaFree(d_hash[thr_id]);
x11_simd512_cpu_free(thr_id);
cuda_check_cpu_free(thr_id);
init[thr_id] = false;

5
x11/x11.cu

@ -32,6 +32,7 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star @@ -32,6 +32,7 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_doublegroestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_groestl512_cpu_free(int thr_id);
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -50,6 +51,7 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st @@ -50,6 +51,7 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_free(int thr_id);
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -254,6 +256,9 @@ extern "C" void free_x11(int thr_id) @@ -254,6 +256,9 @@ extern "C" void free_x11(int thr_id)
cudaFree(d_hash[thr_id]);
quark_groestl512_cpu_free(thr_id);
x11_simd512_cpu_free(thr_id);
cuda_check_cpu_free(thr_id);
init[thr_id] = false;

31
x13/cuda_x13_fugue512.cu

@ -38,6 +38,9 @@ @@ -38,6 +38,9 @@
* @author phm <phm@inbox.com>
*/
// store allocated textures device addresses
static unsigned int* d_textures[MAX_GPUS][4];
#define mixtab0(x) (*((uint32_t*)mixtabs + ( (x))))
#define mixtab1(x) (*((uint32_t*)mixtabs + (256+(x))))
#define mixtab2(x) (*((uint32_t*)mixtabs + (512+(x))))
@ -657,25 +660,37 @@ __global__ void x13_fugue512_gpu_hash_64(uint32_t threads, uint32_t startNounce, @@ -657,25 +660,37 @@ __global__ void x13_fugue512_gpu_hash_64(uint32_t threads, uint32_t startNounce,
}
}
#define texDef(texname, texmem, texsource, texsize) \
#define texDef(id, texname, texmem, texsource, texsize) { \
unsigned int *texmem; \
cudaMalloc(&texmem, texsize); \
d_textures[thr_id][id] = texmem; \
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
texname.normalized = 0; \
texname.filterMode = cudaFilterModePoint; \
texname.addressMode[0] = cudaAddressModeClamp; \
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); }
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); \
} \
}
__host__
void x13_fugue512_cpu_init(int thr_id, uint32_t threads)
{
texDef(0, mixTab0Tex, mixTab0m, mixtab0_cpu, sizeof(uint32_t)*256);
texDef(1, mixTab1Tex, mixTab1m, mixtab1_cpu, sizeof(uint32_t)*256);
texDef(2, mixTab2Tex, mixTab2m, mixtab2_cpu, sizeof(uint32_t)*256);
texDef(3, mixTab3Tex, mixTab3m, mixtab3_cpu, sizeof(uint32_t)*256);
}
__host__ void x13_fugue512_cpu_init(int thr_id, uint32_t threads)
__host__
void x13_fugue512_cpu_free(int thr_id)
{
texDef(mixTab0Tex, mixTab0m, mixtab0_cpu, sizeof(uint32_t)*256);
texDef(mixTab1Tex, mixTab1m, mixtab1_cpu, sizeof(uint32_t)*256);
texDef(mixTab2Tex, mixTab2m, mixtab2_cpu, sizeof(uint32_t)*256);
texDef(mixTab3Tex, mixTab3m, mixtab3_cpu, sizeof(uint32_t)*256);
for (int i=0; i<4; i++)
cudaFree(d_textures[thr_id][i]);
}
__host__ void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
__host__
void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
{
const uint32_t threadsperblock = 256;

8
x13/x13.cu

@ -34,6 +34,7 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star @@ -34,6 +34,7 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_groestl512_cpu_free(int thr_id);
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -52,6 +53,7 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st @@ -52,6 +53,7 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_free(int thr_id);
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -61,7 +63,7 @@ extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star @@ -61,7 +63,7 @@ extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x13_fugue512_cpu_free(int thr_id);
// X13 CPU Hash
extern "C" void x13hash(void *output, const void *input)
@ -256,6 +258,10 @@ extern "C" void free_x13(int thr_id) @@ -256,6 +258,10 @@ extern "C" void free_x13(int thr_id)
cudaFree(d_hash[thr_id]);
quark_groestl512_cpu_free(thr_id);
x11_simd512_cpu_free(thr_id);
x13_fugue512_cpu_free(thr_id);
cuda_check_cpu_free(thr_id);
init[thr_id] = false;

14
x15/x14.cu

@ -38,6 +38,7 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star @@ -38,6 +38,7 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_groestl512_cpu_free(int thr_id);
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -56,6 +57,7 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st @@ -56,6 +57,7 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_free(int thr_id);
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -65,6 +67,7 @@ extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star @@ -65,6 +67,7 @@ extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x13_fugue512_cpu_free(int thr_id);
extern void x14_shabal512_cpu_init(int thr_id, uint32_t threads);
extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -185,9 +188,10 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce, @@ -185,9 +188,10 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce,
x13_fugue512_cpu_init(thr_id, throughput);
x14_shabal512_cpu_init(thr_id, throughput);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);
cuda_check_cpu_init(thr_id, throughput);
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0);
init[thr_id] = true;
}
@ -260,8 +264,12 @@ extern "C" void free_x14(int thr_id) @@ -260,8 +264,12 @@ extern "C" void free_x14(int thr_id)
cudaFree(d_hash[thr_id]);
quark_groestl512_cpu_free(thr_id);
x11_simd512_cpu_free(thr_id);
x13_fugue512_cpu_free(thr_id);
cuda_check_cpu_free(thr_id);
init[thr_id] = false;
cudaDeviceSynchronize();
}
}

7
x15/x15.cu

@ -39,6 +39,7 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star @@ -39,6 +39,7 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_groestl512_cpu_free(int thr_id);
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -57,6 +58,7 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st @@ -57,6 +58,7 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_free(int thr_id);
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -66,6 +68,7 @@ extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star @@ -66,6 +68,7 @@ extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x13_fugue512_cpu_free(int thr_id);
extern void x14_shabal512_cpu_init(int thr_id, uint32_t threads);
extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -275,6 +278,10 @@ extern "C" void free_x15(int thr_id) @@ -275,6 +278,10 @@ extern "C" void free_x15(int thr_id)
cudaFree(d_hash[thr_id]);
quark_groestl512_cpu_free(thr_id);
x11_simd512_cpu_free(thr_id);
x13_fugue512_cpu_free(thr_id);
cuda_check_cpu_free(thr_id);
init[thr_id] = false;

7
x17/x17.cu

@ -42,6 +42,7 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star @@ -42,6 +42,7 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_groestl512_cpu_free(int thr_id);
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -60,6 +61,7 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st @@ -60,6 +61,7 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x11_simd512_cpu_free(int thr_id);
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -69,6 +71,7 @@ extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star @@ -69,6 +71,7 @@ extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void x13_fugue512_cpu_free(int thr_id);
extern void x14_shabal512_cpu_init(int thr_id, uint32_t threads);
extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -297,6 +300,10 @@ extern "C" void free_x17(int thr_id) @@ -297,6 +300,10 @@ extern "C" void free_x17(int thr_id)
cudaFree(d_hash[thr_id]);
quark_groestl512_cpu_free(thr_id);
x11_simd512_cpu_free(thr_id);
x13_fugue512_cpu_free(thr_id);
cuda_check_cpu_free(thr_id);
init[thr_id] = false;

2
zr5.cu

@ -319,6 +319,7 @@ extern void quark_blake512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st @@ -319,6 +319,7 @@ extern void quark_blake512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
extern void quark_groestl512_cpu_free(int thr_id);
extern void quark_jh512_cpu_init(int thr_id, uint32_t threads);
extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
@ -491,6 +492,7 @@ extern "C" void free_zr5(int thr_id) @@ -491,6 +492,7 @@ extern "C" void free_zr5(int thr_id)
cudaFree(d_txs[thr_id]);
quark_groestl512_cpu_free(thr_id);
cuda_check_cpu_free(thr_id);
init[thr_id] = false;

Loading…
Cancel
Save