algos: free allocated mem for algo switch
All can be freed propertly now, except script (reset) and lyra2 (leak)
This commit is contained in:
parent
ee93927fac
commit
922c2a5cd7
@ -13,6 +13,7 @@ extern "C" {
|
|||||||
static uint32_t *d_hash[MAX_GPUS];
|
static uint32_t *d_hash[MAX_GPUS];
|
||||||
|
|
||||||
extern void bmw256_midstate_init(int thr_id, uint32_t threads);
|
extern void bmw256_midstate_init(int thr_id, uint32_t threads);
|
||||||
|
extern void bmw256_midstate_free(int thr_id);
|
||||||
extern void bmw256_setBlock_80(int thr_id, void *pdata);
|
extern void bmw256_setBlock_80(int thr_id, void *pdata);
|
||||||
extern void bmw256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_outputHash, int swap);
|
extern void bmw256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_outputHash, int swap);
|
||||||
|
|
||||||
@ -111,8 +112,9 @@ extern "C" void free_bmw(int thr_id)
|
|||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(device_map[thr_id]);
|
||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
cudaFree(d_hash[thr_id]);
|
||||||
|
bmw256_midstate_free(thr_id);
|
||||||
cuda_check_cpu_free(thr_id);
|
cuda_check_cpu_free(thr_id);
|
||||||
|
|
||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
@ -372,3 +372,9 @@ void bmw256_midstate_init(int thr_id, uint32_t threads)
|
|||||||
{
|
{
|
||||||
cudaMalloc(&d_midstate[thr_id], sizeof(sph_bmw256_context));
|
cudaMalloc(&d_midstate[thr_id], sizeof(sph_bmw256_context));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__host__
|
||||||
|
void bmw256_midstate_free(int thr_id)
|
||||||
|
{
|
||||||
|
cudaFree(d_midstate[thr_id]);
|
||||||
|
}
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
|
|
||||||
static uint32_t *h_GNonces[MAX_GPUS];
|
static uint32_t *h_GNonces[MAX_GPUS];
|
||||||
static uint32_t *d_GNonces[MAX_GPUS];
|
static uint32_t *d_GNonces[MAX_GPUS];
|
||||||
|
static unsigned int* d_textures[MAX_GPUS][8];
|
||||||
|
|
||||||
__constant__ uint32_t pTarget[8];
|
__constant__ uint32_t pTarget[8];
|
||||||
|
|
||||||
@ -249,28 +250,31 @@ void groestl256_gpu_hash32(uint32_t threads, uint32_t startNounce, uint64_t *out
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define texDef(texname, texmem, texsource, texsize) \
|
#define texDef(id, texname, texmem, texsource, texsize) { \
|
||||||
unsigned int *texmem; \
|
unsigned int *texmem; \
|
||||||
cudaMalloc(&texmem, texsize); \
|
cudaMalloc(&texmem, texsize); \
|
||||||
|
d_textures[thr_id][id] = texmem; \
|
||||||
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
|
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
|
||||||
texname.normalized = 0; \
|
texname.normalized = 0; \
|
||||||
texname.filterMode = cudaFilterModePoint; \
|
texname.filterMode = cudaFilterModePoint; \
|
||||||
texname.addressMode[0] = cudaAddressModeClamp; \
|
texname.addressMode[0] = cudaAddressModeClamp; \
|
||||||
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
|
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
|
||||||
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } \
|
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
__host__
|
__host__
|
||||||
void groestl256_cpu_init(int thr_id, uint32_t threads)
|
void groestl256_cpu_init(int thr_id, uint32_t threads)
|
||||||
{
|
{
|
||||||
// Texturen mit obigem Makro initialisieren
|
// Texturen mit obigem Makro initialisieren
|
||||||
texDef(t0up2, d_T0up, T0up_cpu, sizeof(uint32_t) * 256);
|
texDef(0, t0up2, d_T0up, T0up_cpu, sizeof(uint32_t) * 256);
|
||||||
texDef(t0dn2, d_T0dn, T0dn_cpu, sizeof(uint32_t) * 256);
|
texDef(1, t0dn2, d_T0dn, T0dn_cpu, sizeof(uint32_t) * 256);
|
||||||
texDef(t1up2, d_T1up, T1up_cpu, sizeof(uint32_t) * 256);
|
texDef(2, t1up2, d_T1up, T1up_cpu, sizeof(uint32_t) * 256);
|
||||||
texDef(t1dn2, d_T1dn, T1dn_cpu, sizeof(uint32_t) * 256);
|
texDef(3, t1dn2, d_T1dn, T1dn_cpu, sizeof(uint32_t) * 256);
|
||||||
texDef(t2up2, d_T2up, T2up_cpu, sizeof(uint32_t) * 256);
|
texDef(4, t2up2, d_T2up, T2up_cpu, sizeof(uint32_t) * 256);
|
||||||
texDef(t2dn2, d_T2dn, T2dn_cpu, sizeof(uint32_t) * 256);
|
texDef(5, t2dn2, d_T2dn, T2dn_cpu, sizeof(uint32_t) * 256);
|
||||||
texDef(t3up2, d_T3up, T3up_cpu, sizeof(uint32_t) * 256);
|
texDef(6, t3up2, d_T3up, T3up_cpu, sizeof(uint32_t) * 256);
|
||||||
texDef(t3dn2, d_T3dn, T3dn_cpu, sizeof(uint32_t) * 256);
|
texDef(7, t3dn2, d_T3dn, T3dn_cpu, sizeof(uint32_t) * 256);
|
||||||
|
|
||||||
cudaMalloc(&d_GNonces[thr_id], 2*sizeof(uint32_t));
|
cudaMalloc(&d_GNonces[thr_id], 2*sizeof(uint32_t));
|
||||||
cudaMallocHost(&h_GNonces[thr_id], 2*sizeof(uint32_t));
|
cudaMallocHost(&h_GNonces[thr_id], 2*sizeof(uint32_t));
|
||||||
@ -279,6 +283,9 @@ void groestl256_cpu_init(int thr_id, uint32_t threads)
|
|||||||
__host__
|
__host__
|
||||||
void groestl256_cpu_free(int thr_id)
|
void groestl256_cpu_free(int thr_id)
|
||||||
{
|
{
|
||||||
|
for (int i=0; i<8; i++)
|
||||||
|
cudaFree(d_textures[thr_id][i]);
|
||||||
|
|
||||||
cudaFree(d_GNonces[thr_id]);
|
cudaFree(d_GNonces[thr_id]);
|
||||||
cudaFreeHost(h_GNonces[thr_id]);
|
cudaFreeHost(h_GNonces[thr_id]);
|
||||||
}
|
}
|
||||||
|
@ -33,8 +33,9 @@ __device__ cuda_compactTestFunction_t d_JackpotTrueFunction = JackpotTrueTest, d
|
|||||||
|
|
||||||
cuda_compactTestFunction_t h_JackpotTrueFunction[MAX_GPUS], h_JackpotFalseFunction[MAX_GPUS];
|
cuda_compactTestFunction_t h_JackpotTrueFunction[MAX_GPUS], h_JackpotFalseFunction[MAX_GPUS];
|
||||||
|
|
||||||
// Setup-Funktionen
|
// Setup-Function
|
||||||
__host__ void jackpot_compactTest_cpu_init(int thr_id, uint32_t threads)
|
__host__
|
||||||
|
void jackpot_compactTest_cpu_init(int thr_id, uint32_t threads)
|
||||||
{
|
{
|
||||||
cudaMemcpyFromSymbol(&h_JackpotTrueFunction[thr_id], d_JackpotTrueFunction, sizeof(cuda_compactTestFunction_t));
|
cudaMemcpyFromSymbol(&h_JackpotTrueFunction[thr_id], d_JackpotTrueFunction, sizeof(cuda_compactTestFunction_t));
|
||||||
cudaMemcpyFromSymbol(&h_JackpotFalseFunction[thr_id], d_JackpotFalseFunction, sizeof(cuda_compactTestFunction_t));
|
cudaMemcpyFromSymbol(&h_JackpotFalseFunction[thr_id], d_JackpotFalseFunction, sizeof(cuda_compactTestFunction_t));
|
||||||
@ -51,6 +52,18 @@ __host__ void jackpot_compactTest_cpu_init(int thr_id, uint32_t threads)
|
|||||||
cudaMalloc(&d_partSum[1][thr_id], sizeof(uint32_t) * s1); // BLOCKSIZE (Threads/Block)
|
cudaMalloc(&d_partSum[1][thr_id], sizeof(uint32_t) * s1); // BLOCKSIZE (Threads/Block)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__host__
|
||||||
|
void jackpot_compactTest_cpu_free(int thr_id)
|
||||||
|
{
|
||||||
|
cudaFree(d_tempBranch1Nonces[thr_id]);
|
||||||
|
cudaFree(d_numValid[thr_id]);
|
||||||
|
|
||||||
|
cudaFree(d_partSum[0][thr_id]);
|
||||||
|
cudaFree(d_partSum[1][thr_id]);
|
||||||
|
|
||||||
|
cudaFreeHost(h_numValid[thr_id]);
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 300
|
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 300
|
||||||
/**
|
/**
|
||||||
* __shfl_up() calculates a source lane ID by subtracting delta from the caller's lane ID, and clamping to the range 0..width-1
|
* __shfl_up() calculates a source lane ID by subtracting delta from the caller's lane ID, and clamping to the range 0..width-1
|
||||||
|
@ -21,6 +21,7 @@ extern void quark_blake512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st
|
|||||||
|
|
||||||
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void quark_groestl512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void quark_jh512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_jh512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -29,6 +30,7 @@ extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
|||||||
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
|
||||||
extern void jackpot_compactTest_cpu_init(int thr_id, uint32_t threads);
|
extern void jackpot_compactTest_cpu_init(int thr_id, uint32_t threads);
|
||||||
|
extern void jackpot_compactTest_cpu_free(int thr_id);
|
||||||
extern void jackpot_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
|
extern void jackpot_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
|
||||||
uint32_t *d_nonces1, uint32_t *nrm1,
|
uint32_t *d_nonces1, uint32_t *nrm1,
|
||||||
uint32_t *d_nonces2, uint32_t *nrm2,
|
uint32_t *d_nonces2, uint32_t *nrm2,
|
||||||
@ -264,9 +266,11 @@ extern "C" void free_jackpot(int thr_id)
|
|||||||
cudaFree(d_branch1Nonces[thr_id]);
|
cudaFree(d_branch1Nonces[thr_id]);
|
||||||
cudaFree(d_branch2Nonces[thr_id]);
|
cudaFree(d_branch2Nonces[thr_id]);
|
||||||
cudaFree(d_branch3Nonces[thr_id]);
|
cudaFree(d_branch3Nonces[thr_id]);
|
||||||
|
|
||||||
cudaFree(d_jackpotNonces[thr_id]);
|
cudaFree(d_jackpotNonces[thr_id]);
|
||||||
|
|
||||||
|
quark_groestl512_cpu_free(thr_id);
|
||||||
|
jackpot_compactTest_cpu_free(thr_id);
|
||||||
|
|
||||||
cuda_check_cpu_free(thr_id);
|
cuda_check_cpu_free(thr_id);
|
||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
|
||||||
|
@ -19,6 +19,7 @@ extern void quark_blake512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t st
|
|||||||
|
|
||||||
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void quark_groestl512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void quark_jh512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_jh512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -173,6 +174,7 @@ extern "C" void free_nist5(int thr_id)
|
|||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
cudaFree(d_hash[thr_id]);
|
||||||
|
|
||||||
|
quark_groestl512_cpu_free(thr_id);
|
||||||
cuda_check_cpu_free(thr_id);
|
cuda_check_cpu_free(thr_id);
|
||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
|
||||||
|
@ -708,6 +708,12 @@ __host__
|
|||||||
void skeincoin_init(int thr_id)
|
void skeincoin_init(int thr_id)
|
||||||
{
|
{
|
||||||
cuda_get_arch(thr_id);
|
cuda_get_arch(thr_id);
|
||||||
|
CUDA_SAFE_CALL(cudaMalloc(&d_found[thr_id], 2 * sizeof(uint32_t)));
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__
|
||||||
|
void skeincoin_free(int thr_id) {
|
||||||
|
cudaFree(d_found[thr_id]);
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__
|
__host__
|
||||||
@ -716,7 +722,6 @@ void skeincoin_setBlock_80(int thr_id, void *pdata)
|
|||||||
uint64_t message[16];
|
uint64_t message[16];
|
||||||
memcpy(&message[0], pdata, 80);
|
memcpy(&message[0], pdata, 80);
|
||||||
|
|
||||||
CUDA_SAFE_CALL(cudaMalloc(&(d_found[thr_id]), 2 * sizeof(uint32_t)));
|
|
||||||
cudaMemcpyToSymbol(c_message16, &message[8], 16, 0, cudaMemcpyHostToDevice);
|
cudaMemcpyToSymbol(c_message16, &message[8], 16, 0, cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
precalc(message);
|
precalc(message);
|
||||||
|
@ -191,8 +191,9 @@ template <int BLOCKSIZE> __global__ void blake512_gpu_hash(uint32_t threads, uin
|
|||||||
|
|
||||||
// ---------------------------- END CUDA blake512 functions ------------------------------------
|
// ---------------------------- END CUDA blake512 functions ------------------------------------
|
||||||
|
|
||||||
// Setup-Funktionen
|
// Setup Function
|
||||||
__host__ void blake512_cpu_init(int thr_id, uint32_t threads)
|
__host__
|
||||||
|
void blake512_cpu_init(int thr_id, uint32_t threads)
|
||||||
{
|
{
|
||||||
// Kopiere die Hash-Tabellen in den GPU-Speicher
|
// Kopiere die Hash-Tabellen in den GPU-Speicher
|
||||||
cudaMemcpyToSymbol( c_sigma,
|
cudaMemcpyToSymbol( c_sigma,
|
||||||
@ -211,12 +212,19 @@ __host__ void blake512_cpu_init(int thr_id, uint32_t threads)
|
|||||||
0, cudaMemcpyHostToDevice);
|
0, cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
// Speicher für alle Ergebnisse belegen
|
// Speicher für alle Ergebnisse belegen
|
||||||
CUDA_SAFE_CALL(cudaMalloc(&d_hash5output[thr_id], 16 * sizeof(uint32_t) * threads));
|
CUDA_SAFE_CALL(cudaMalloc(&d_hash5output[thr_id], (size_t) 64 * threads));
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__
|
||||||
|
void blake512_cpu_free(int thr_id)
|
||||||
|
{
|
||||||
|
cudaFree(d_hash5output[thr_id]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int BLOCKSIZE = 84;
|
static int BLOCKSIZE = 84;
|
||||||
|
|
||||||
__host__ void blake512_cpu_setBlock(void *pdata, int len)
|
__host__
|
||||||
|
void blake512_cpu_setBlock(void *pdata, int len)
|
||||||
// data muss 84-Byte haben!
|
// data muss 84-Byte haben!
|
||||||
// heftyHash hat 32-Byte
|
// heftyHash hat 32-Byte
|
||||||
{
|
{
|
||||||
|
@ -124,7 +124,13 @@ __host__
|
|||||||
void combine_cpu_init(int thr_id, uint32_t threads)
|
void combine_cpu_init(int thr_id, uint32_t threads)
|
||||||
{
|
{
|
||||||
// Speicher für alle Ergebnisse belegen
|
// Speicher für alle Ergebnisse belegen
|
||||||
CUDA_SAFE_CALL(cudaMalloc(&d_hashoutput[thr_id], 8 * sizeof(uint32_t) * threads));
|
CUDA_SAFE_CALL(cudaMalloc(&d_hashoutput[thr_id], (size_t) 32 * threads));
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__
|
||||||
|
void combine_cpu_free(int thr_id)
|
||||||
|
{
|
||||||
|
cudaFree(d_hashoutput[thr_id]);
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__
|
__host__
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
// globaler Speicher für alle HeftyHashes aller Threads
|
// globaler Speicher für alle HeftyHashes aller Threads
|
||||||
extern uint32_t *heavy_heftyHashes[MAX_GPUS];
|
extern uint32_t *heavy_heftyHashes[MAX_GPUS];
|
||||||
extern uint32_t *heavy_nonceVector[MAX_GPUS];
|
extern uint32_t *heavy_nonceVector[MAX_GPUS];
|
||||||
|
static unsigned int *d_textures[MAX_GPUS][8];
|
||||||
|
|
||||||
// globaler Speicher für unsere Ergebnisse
|
// globaler Speicher für unsere Ergebnisse
|
||||||
uint32_t *d_hash4output[MAX_GPUS];
|
uint32_t *d_hash4output[MAX_GPUS];
|
||||||
@ -730,36 +731,50 @@ template <int BLOCKSIZE> __global__ void groestl512_gpu_hash(uint32_t threads, u
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define texDef(texname, texmem, texsource, texsize) \
|
#define texDef(id, texname, texmem, texsource, texsize) { \
|
||||||
unsigned int *texmem; \
|
unsigned int *texmem; \
|
||||||
cudaMalloc(&texmem, texsize); \
|
cudaMalloc(&texmem, texsize); \
|
||||||
|
d_textures[thr_id][id] = texmem; \
|
||||||
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
|
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
|
||||||
texname.normalized = 0; \
|
texname.normalized = 0; \
|
||||||
texname.filterMode = cudaFilterModePoint; \
|
texname.filterMode = cudaFilterModePoint; \
|
||||||
texname.addressMode[0] = cudaAddressModeClamp; \
|
texname.addressMode[0] = cudaAddressModeClamp; \
|
||||||
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
|
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
|
||||||
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } \
|
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
// Setup-Funktionen
|
// Setup Function
|
||||||
__host__ void groestl512_cpu_init(int thr_id, uint32_t threads)
|
__host__
|
||||||
|
void groestl512_cpu_init(int thr_id, uint32_t threads)
|
||||||
{
|
{
|
||||||
// Texturen mit obigem Makro initialisieren
|
// Texturen mit obigem Makro initialisieren
|
||||||
texDef(t0up, d_T0up, T0up_cpu, sizeof(uint32_t)*256);
|
texDef(0, t0up, d_T0up, T0up_cpu, sizeof(uint32_t)*256);
|
||||||
texDef(t0dn, d_T0dn, T0dn_cpu, sizeof(uint32_t)*256);
|
texDef(1, t0dn, d_T0dn, T0dn_cpu, sizeof(uint32_t)*256);
|
||||||
texDef(t1up, d_T1up, T1up_cpu, sizeof(uint32_t)*256);
|
texDef(2, t1up, d_T1up, T1up_cpu, sizeof(uint32_t)*256);
|
||||||
texDef(t1dn, d_T1dn, T1dn_cpu, sizeof(uint32_t)*256);
|
texDef(3, t1dn, d_T1dn, T1dn_cpu, sizeof(uint32_t)*256);
|
||||||
texDef(t2up, d_T2up, T2up_cpu, sizeof(uint32_t)*256);
|
texDef(4, t2up, d_T2up, T2up_cpu, sizeof(uint32_t)*256);
|
||||||
texDef(t2dn, d_T2dn, T2dn_cpu, sizeof(uint32_t)*256);
|
texDef(5, t2dn, d_T2dn, T2dn_cpu, sizeof(uint32_t)*256);
|
||||||
texDef(t3up, d_T3up, T3up_cpu, sizeof(uint32_t)*256);
|
texDef(6, t3up, d_T3up, T3up_cpu, sizeof(uint32_t)*256);
|
||||||
texDef(t3dn, d_T3dn, T3dn_cpu, sizeof(uint32_t)*256);
|
texDef(7, t3dn, d_T3dn, T3dn_cpu, sizeof(uint32_t)*256);
|
||||||
|
|
||||||
// Speicher für alle Ergebnisse belegen
|
// Speicher für alle Ergebnisse belegen
|
||||||
cudaMalloc(&d_hash4output[thr_id], 16 * sizeof(uint32_t) * threads);
|
cudaMalloc(&d_hash4output[thr_id], (size_t) 64 * threads);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__
|
||||||
|
void groestl512_cpu_free(int thr_id)
|
||||||
|
{
|
||||||
|
for (int i=0; i <8; i++)
|
||||||
|
cudaFree(d_textures[thr_id][i]);
|
||||||
|
|
||||||
|
cudaFree(d_hash4output[thr_id]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int BLOCKSIZE = 84;
|
static int BLOCKSIZE = 84;
|
||||||
|
|
||||||
__host__ void groestl512_cpu_setBlock(void *data, int len)
|
__host__
|
||||||
|
void groestl512_cpu_setBlock(void *data, int len)
|
||||||
// data muss 80/84-Byte haben!
|
// data muss 80/84-Byte haben!
|
||||||
// heftyHash hat 32-Byte
|
// heftyHash hat 32-Byte
|
||||||
{
|
{
|
||||||
|
@ -31,7 +31,8 @@ uint32_t hefty_cpu_hashTable[] = {
|
|||||||
0x510e527fUL,
|
0x510e527fUL,
|
||||||
0x9b05688cUL,
|
0x9b05688cUL,
|
||||||
0x1f83d9abUL,
|
0x1f83d9abUL,
|
||||||
0x5be0cd19UL };
|
0x5be0cd19UL
|
||||||
|
};
|
||||||
|
|
||||||
uint32_t hefty_cpu_constantTable[] = {
|
uint32_t hefty_cpu_constantTable[] = {
|
||||||
0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL,
|
0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL,
|
||||||
@ -316,7 +317,13 @@ void hefty_cpu_init(int thr_id, uint32_t threads)
|
|||||||
sizeof(uint32_t) * 64 );
|
sizeof(uint32_t) * 64 );
|
||||||
|
|
||||||
// Speicher für alle Hefty1 hashes belegen
|
// Speicher für alle Hefty1 hashes belegen
|
||||||
CUDA_SAFE_CALL(cudaMalloc(&heavy_heftyHashes[thr_id], 8 * sizeof(uint32_t) * threads));
|
CUDA_SAFE_CALL(cudaMalloc(&heavy_heftyHashes[thr_id], (size_t) 32 * threads));
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__
|
||||||
|
void hefty_cpu_free(int thr_id)
|
||||||
|
{
|
||||||
|
cudaFree(heavy_heftyHashes[thr_id]);
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__
|
__host__
|
||||||
|
@ -185,7 +185,7 @@ template <int BLOCKSIZE> __global__ void keccak512_gpu_hash(uint32_t threads, ui
|
|||||||
|
|
||||||
// ---------------------------- END CUDA keccak512 functions ------------------------------------
|
// ---------------------------- END CUDA keccak512 functions ------------------------------------
|
||||||
|
|
||||||
__host__
|
__host__
|
||||||
void keccak512_cpu_init(int thr_id, uint32_t threads)
|
void keccak512_cpu_init(int thr_id, uint32_t threads)
|
||||||
{
|
{
|
||||||
// Kopiere die Hash-Tabellen in den GPU-Speicher
|
// Kopiere die Hash-Tabellen in den GPU-Speicher
|
||||||
@ -195,7 +195,13 @@ void keccak512_cpu_init(int thr_id, uint32_t threads)
|
|||||||
0, cudaMemcpyHostToDevice);
|
0, cudaMemcpyHostToDevice);
|
||||||
|
|
||||||
// Speicher für alle Ergebnisse belegen
|
// Speicher für alle Ergebnisse belegen
|
||||||
cudaMalloc(&d_hash3output[thr_id], 16 * sizeof(uint32_t) * threads);
|
cudaMalloc(&d_hash3output[thr_id], (size_t) 64 * threads);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__
|
||||||
|
void keccak512_cpu_free(int thr_id)
|
||||||
|
{
|
||||||
|
cudaFree(d_hash3output[thr_id]);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ----------------BEGIN keccak512 CPU version from scrypt-jane code --------------------
|
// ----------------BEGIN keccak512 CPU version from scrypt-jane code --------------------
|
||||||
|
@ -160,8 +160,9 @@ template <int BLOCKSIZE> __global__ void sha256_gpu_hash(uint32_t threads, uint3
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setup-Funktionen
|
// Setup Function
|
||||||
__host__ void sha256_cpu_init(int thr_id, uint32_t threads)
|
__host__
|
||||||
|
void sha256_cpu_init(int thr_id, uint32_t threads)
|
||||||
{
|
{
|
||||||
// Kopiere die Hash-Tabellen in den GPU-Speicher
|
// Kopiere die Hash-Tabellen in den GPU-Speicher
|
||||||
cudaMemcpyToSymbol( sha256_gpu_constantTable,
|
cudaMemcpyToSymbol( sha256_gpu_constantTable,
|
||||||
@ -169,7 +170,13 @@ __host__ void sha256_cpu_init(int thr_id, uint32_t threads)
|
|||||||
sizeof(uint32_t) * 64 );
|
sizeof(uint32_t) * 64 );
|
||||||
|
|
||||||
// Speicher für alle Ergebnisse belegen
|
// Speicher für alle Ergebnisse belegen
|
||||||
cudaMalloc(&d_hash2output[thr_id], 8 * sizeof(uint32_t) * threads);
|
cudaMalloc(&d_hash2output[thr_id], (size_t) 8 * sizeof(uint32_t) * threads);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__
|
||||||
|
void sha256_cpu_free(int thr_id)
|
||||||
|
{
|
||||||
|
cudaFree(d_hash2output[thr_id]);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int BLOCKSIZE = 84;
|
static int BLOCKSIZE = 84;
|
||||||
|
@ -178,7 +178,7 @@ int scanhash_heavy(int thr_id, struct work *work, uint32_t max_nonce, unsigned l
|
|||||||
{
|
{
|
||||||
uint16_t *ext = (uint16_t *)&pdata[20];
|
uint16_t *ext = (uint16_t *)&pdata[20];
|
||||||
|
|
||||||
if (opt_vote > maxvote) {
|
if (opt_vote > maxvote && !opt_benchmark) {
|
||||||
applog(LOG_WARNING, "Your block reward vote (%hu) exceeds "
|
applog(LOG_WARNING, "Your block reward vote (%hu) exceeds "
|
||||||
"the maxvote reported by the pool (%hu).",
|
"the maxvote reported by the pool (%hu).",
|
||||||
opt_vote, maxvote);
|
opt_vote, maxvote);
|
||||||
@ -310,12 +310,18 @@ extern "C" void free_heavy(int thr_id)
|
|||||||
|
|
||||||
cudaFree(heavy_nonceVector[thr_id]);
|
cudaFree(heavy_nonceVector[thr_id]);
|
||||||
|
|
||||||
// todo: free sub algos vectors
|
blake512_cpu_free(thr_id);
|
||||||
|
groestl512_cpu_free(thr_id);
|
||||||
|
hefty_cpu_free(thr_id);
|
||||||
|
keccak512_cpu_free(thr_id);
|
||||||
|
sha256_cpu_free(thr_id);
|
||||||
|
combine_cpu_free(thr_id);
|
||||||
|
|
||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__
|
__host__
|
||||||
void heavycoin_hash(uchar* output, const uchar* input, int len)
|
void heavycoin_hash(uchar* output, const uchar* input, int len)
|
||||||
{
|
{
|
||||||
|
@ -4,27 +4,33 @@
|
|||||||
void blake512_cpu_init(int thr_id, uint32_t threads);
|
void blake512_cpu_init(int thr_id, uint32_t threads);
|
||||||
void blake512_cpu_setBlock(void *pdata, int len);
|
void blake512_cpu_setBlock(void *pdata, int len);
|
||||||
void blake512_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce);
|
void blake512_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce);
|
||||||
|
void blake512_cpu_free(int thr_id);
|
||||||
|
|
||||||
void groestl512_cpu_init(int thr_id, uint32_t threads);
|
void groestl512_cpu_init(int thr_id, uint32_t threads);
|
||||||
void groestl512_cpu_copyHeftyHash(int thr_id, uint32_t threads, void *heftyHashes, int copy);
|
void groestl512_cpu_copyHeftyHash(int thr_id, uint32_t threads, void *heftyHashes, int copy);
|
||||||
void groestl512_cpu_setBlock(void *data, int len);
|
void groestl512_cpu_setBlock(void *data, int len);
|
||||||
void groestl512_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce);
|
void groestl512_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce);
|
||||||
|
void groestl512_cpu_free(int thr_id);
|
||||||
|
|
||||||
void hefty_cpu_hash(int thr_id, uint32_t threads, int startNounce);
|
void hefty_cpu_hash(int thr_id, uint32_t threads, int startNounce);
|
||||||
void hefty_cpu_setBlock(int thr_id, uint32_t threads, void *data, int len);
|
void hefty_cpu_setBlock(int thr_id, uint32_t threads, void *data, int len);
|
||||||
void hefty_cpu_init(int thr_id, uint32_t threads);
|
void hefty_cpu_init(int thr_id, uint32_t threads);
|
||||||
|
void hefty_cpu_free(int thr_id);
|
||||||
|
|
||||||
void keccak512_cpu_init(int thr_id, uint32_t threads);
|
void keccak512_cpu_init(int thr_id, uint32_t threads);
|
||||||
void keccak512_cpu_setBlock(void *data, int len);
|
void keccak512_cpu_setBlock(void *data, int len);
|
||||||
void keccak512_cpu_copyHeftyHash(int thr_id, uint32_t threads, void *heftyHashes, int copy);
|
void keccak512_cpu_copyHeftyHash(int thr_id, uint32_t threads, void *heftyHashes, int copy);
|
||||||
void keccak512_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce);
|
void keccak512_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce);
|
||||||
|
void keccak512_cpu_free(int thr_id);
|
||||||
|
|
||||||
void sha256_cpu_init(int thr_id, uint32_t threads);
|
void sha256_cpu_init(int thr_id, uint32_t threads);
|
||||||
void sha256_cpu_setBlock(void *data, int len);
|
void sha256_cpu_setBlock(void *data, int len);
|
||||||
void sha256_cpu_hash(int thr_id, uint32_t threads, int startNounce);
|
void sha256_cpu_hash(int thr_id, uint32_t threads, int startNounce);
|
||||||
void sha256_cpu_copyHeftyHash(int thr_id, uint32_t threads, void *heftyHashes, int copy);
|
void sha256_cpu_copyHeftyHash(int thr_id, uint32_t threads, void *heftyHashes, int copy);
|
||||||
|
void sha256_cpu_free(int thr_id);
|
||||||
|
|
||||||
void combine_cpu_init(int thr_id, uint32_t threads);
|
void combine_cpu_init(int thr_id, uint32_t threads);
|
||||||
void combine_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *hash);
|
void combine_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *hash);
|
||||||
|
void combine_cpu_free(int thr_id);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -17,6 +17,7 @@ extern void blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const
|
|||||||
extern void blake256_cpu_setBlock_80(uint32_t *pdata);
|
extern void blake256_cpu_setBlock_80(uint32_t *pdata);
|
||||||
extern void keccak256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, int order);
|
extern void keccak256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, int order);
|
||||||
extern void keccak256_cpu_init(int thr_id, uint32_t threads);
|
extern void keccak256_cpu_init(int thr_id, uint32_t threads);
|
||||||
|
extern void keccak256_cpu_free(int thr_id);
|
||||||
extern void skein256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, int order);
|
extern void skein256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, int order);
|
||||||
extern void skein256_cpu_init(int thr_id, uint32_t threads);
|
extern void skein256_cpu_init(int thr_id, uint32_t threads);
|
||||||
|
|
||||||
@ -174,7 +175,9 @@ extern "C" void free_lyra2(int thr_id)
|
|||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
cudaFree(d_hash[thr_id]);
|
||||||
|
|
||||||
|
keccak256_cpu_free(thr_id);
|
||||||
groestl256_cpu_free(thr_id);
|
groestl256_cpu_free(thr_id);
|
||||||
|
|
||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
@ -19,6 +19,7 @@ extern void blake256_cpu_hash_80(const int thr_id, const uint32_t threads, const
|
|||||||
extern void blake256_cpu_setBlock_80(uint32_t *pdata);
|
extern void blake256_cpu_setBlock_80(uint32_t *pdata);
|
||||||
extern void keccak256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, int order);
|
extern void keccak256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, int order);
|
||||||
extern void keccak256_cpu_init(int thr_id, uint32_t threads);
|
extern void keccak256_cpu_init(int thr_id, uint32_t threads);
|
||||||
|
extern void keccak256_cpu_free(int thr_id);
|
||||||
extern void skein256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, int order);
|
extern void skein256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNonce, uint64_t *d_outputHash, int order);
|
||||||
extern void skein256_cpu_init(int thr_id, uint32_t threads);
|
extern void skein256_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void cubehash256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_hash, int order);
|
extern void cubehash256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_hash, int order);
|
||||||
@ -183,6 +184,8 @@ extern "C" void free_lyra2v2(int thr_id)
|
|||||||
cudaFree(d_matrix[thr_id]);
|
cudaFree(d_matrix[thr_id]);
|
||||||
|
|
||||||
bmw256_cpu_free(thr_id);
|
bmw256_cpu_free(thr_id);
|
||||||
|
keccak256_cpu_free(thr_id);
|
||||||
|
|
||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
@ -730,7 +730,7 @@ void neoscrypt_cpu_init(int thr_id, uint32_t threads)
|
|||||||
{
|
{
|
||||||
cuda_get_arch(thr_id);
|
cuda_get_arch(thr_id);
|
||||||
cudaMalloc(&d_NNonce[thr_id], sizeof(uint32_t));
|
cudaMalloc(&d_NNonce[thr_id], sizeof(uint32_t));
|
||||||
CUDA_SAFE_CALL(cudaMalloc(&d_buffer[thr_id], threads * 256 * SHIFT));
|
CUDA_SAFE_CALL(cudaMalloc(&d_buffer[thr_id], (size_t) 256 * SHIFT * threads));
|
||||||
cudaMemcpyToSymbol(W, &d_buffer[thr_id], sizeof(uint4*), 0, cudaMemcpyHostToDevice);
|
cudaMemcpyToSymbol(W, &d_buffer[thr_id], sizeof(uint4*), 0, cudaMemcpyHostToDevice);
|
||||||
cudaMemcpyToSymbol(BLAKE2S_SIGMA, BLAKE2S_SIGMA_host, sizeof(BLAKE2S_SIGMA_host), 0, cudaMemcpyHostToDevice);
|
cudaMemcpyToSymbol(BLAKE2S_SIGMA, BLAKE2S_SIGMA_host, sizeof(BLAKE2S_SIGMA_host), 0, cudaMemcpyHostToDevice);
|
||||||
}
|
}
|
||||||
|
@ -381,7 +381,7 @@ extern "C" int scanhash_pentablake(int thr_id, struct work *work, uint32_t max_n
|
|||||||
if (active_gpus > 1) {
|
if (active_gpus > 1) {
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(device_map[thr_id]);
|
||||||
}
|
}
|
||||||
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 64 * throughput));
|
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));
|
||||||
CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], 2*sizeof(uint32_t)));
|
CUDA_SAFE_CALL(cudaMallocHost(&h_resNounce[thr_id], 2*sizeof(uint32_t)));
|
||||||
CUDA_SAFE_CALL(cudaMalloc(&d_resNounce[thr_id], 2*sizeof(uint32_t)));
|
CUDA_SAFE_CALL(cudaMalloc(&d_resNounce[thr_id], 2*sizeof(uint32_t)));
|
||||||
|
|
||||||
@ -452,7 +452,7 @@ void free_pentablake(int thr_id)
|
|||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(device_map[thr_id]);
|
||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
cudaFree(d_hash[thr_id]);
|
||||||
cudaFree(h_resNounce[thr_id]);
|
cudaFreeHost(h_resNounce[thr_id]);
|
||||||
cudaFree(d_resNounce[thr_id]);
|
cudaFree(d_resNounce[thr_id]);
|
||||||
|
|
||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
@ -141,6 +141,14 @@ void quark_groestl512_cpu_init(int thr_id, uint32_t threads)
|
|||||||
quark_groestl512_sm20_init(thr_id, threads);
|
quark_groestl512_sm20_init(thr_id, threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__host__
|
||||||
|
void quark_groestl512_cpu_free(int thr_id)
|
||||||
|
{
|
||||||
|
int dev_id = device_map[thr_id];
|
||||||
|
if (device_sm[dev_id] < 300 || cuda_arch[dev_id] < 300)
|
||||||
|
quark_groestl512_sm20_free(thr_id);
|
||||||
|
}
|
||||||
|
|
||||||
__host__
|
__host__
|
||||||
void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
|
void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
|
||||||
{
|
{
|
||||||
|
@ -5,6 +5,8 @@
|
|||||||
#define MAXWELL_OR_FERMI 0
|
#define MAXWELL_OR_FERMI 0
|
||||||
#define USE_SHARED 1
|
#define USE_SHARED 1
|
||||||
|
|
||||||
|
static unsigned int *d_textures[MAX_GPUS][8];
|
||||||
|
|
||||||
// #define SPH_C32(x) ((uint32_t)(x ## U))
|
// #define SPH_C32(x) ((uint32_t)(x ## U))
|
||||||
// #define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
|
// #define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
|
||||||
|
|
||||||
@ -274,28 +276,38 @@ void quark_groestl512_gpu_hash_64(uint32_t threads, uint32_t startNounce, uint32
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#define texDef(texname, texmem, texsource, texsize) \
|
#define texDef(id, texname, texmem, texsource, texsize) { \
|
||||||
unsigned int *texmem; \
|
unsigned int *texmem; \
|
||||||
cudaMalloc(&texmem, texsize); \
|
cudaMalloc(&texmem, texsize); \
|
||||||
|
d_textures[thr_id][id] = texmem; \
|
||||||
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
|
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
|
||||||
texname.normalized = 0; \
|
texname.normalized = 0; \
|
||||||
texname.filterMode = cudaFilterModePoint; \
|
texname.filterMode = cudaFilterModePoint; \
|
||||||
texname.addressMode[0] = cudaAddressModeClamp; \
|
texname.addressMode[0] = cudaAddressModeClamp; \
|
||||||
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
|
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
|
||||||
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } \
|
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
__host__
|
__host__
|
||||||
void quark_groestl512_sm20_init(int thr_id, uint32_t threads)
|
void quark_groestl512_sm20_init(int thr_id, uint32_t threads)
|
||||||
{
|
{
|
||||||
// Texturen mit obigem Makro initialisieren
|
// Texturen mit obigem Makro initialisieren
|
||||||
texDef(t0up1, d_T0up, T0up_cpu, sizeof(uint32_t)*256);
|
texDef(0, t0up1, d_T0up, T0up_cpu, sizeof(uint32_t)*256);
|
||||||
texDef(t0dn1, d_T0dn, T0dn_cpu, sizeof(uint32_t)*256);
|
texDef(1, t0dn1, d_T0dn, T0dn_cpu, sizeof(uint32_t)*256);
|
||||||
texDef(t1up1, d_T1up, T1up_cpu, sizeof(uint32_t)*256);
|
texDef(2, t1up1, d_T1up, T1up_cpu, sizeof(uint32_t)*256);
|
||||||
texDef(t1dn1, d_T1dn, T1dn_cpu, sizeof(uint32_t)*256);
|
texDef(3, t1dn1, d_T1dn, T1dn_cpu, sizeof(uint32_t)*256);
|
||||||
texDef(t2up1, d_T2up, T2up_cpu, sizeof(uint32_t)*256);
|
texDef(4, t2up1, d_T2up, T2up_cpu, sizeof(uint32_t)*256);
|
||||||
texDef(t2dn1, d_T2dn, T2dn_cpu, sizeof(uint32_t)*256);
|
texDef(5, t2dn1, d_T2dn, T2dn_cpu, sizeof(uint32_t)*256);
|
||||||
texDef(t3up1, d_T3up, T3up_cpu, sizeof(uint32_t)*256);
|
texDef(6, t3up1, d_T3up, T3up_cpu, sizeof(uint32_t)*256);
|
||||||
texDef(t3dn1, d_T3dn, T3dn_cpu, sizeof(uint32_t)*256);
|
texDef(7, t3dn1, d_T3dn, T3dn_cpu, sizeof(uint32_t)*256);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__
|
||||||
|
void quark_groestl512_sm20_free(int thr_id)
|
||||||
|
{
|
||||||
|
for (int i=0; i<8; i++)
|
||||||
|
cudaFree(d_textures[thr_id][i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__
|
__host__
|
||||||
|
@ -30,6 +30,7 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
|
|||||||
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
extern void quark_doublegroestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_doublegroestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void quark_groestl512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -270,6 +271,7 @@ extern "C" void free_quark(int thr_id)
|
|||||||
cudaFree(d_branch2Nonces[thr_id]);
|
cudaFree(d_branch2Nonces[thr_id]);
|
||||||
cudaFree(d_branch3Nonces[thr_id]);
|
cudaFree(d_branch3Nonces[thr_id]);
|
||||||
|
|
||||||
|
quark_groestl512_cpu_free(thr_id);
|
||||||
quark_compactTest_cpu_free(thr_id);
|
quark_compactTest_cpu_free(thr_id);
|
||||||
|
|
||||||
cuda_check_cpu_free(thr_id);
|
cuda_check_cpu_free(thr_id);
|
||||||
|
@ -28,14 +28,11 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st
|
|||||||
|
|
||||||
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
|
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void x11_simd512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
|
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
|
||||||
extern void quark_compactTest_cpu_init(int thr_id, uint32_t threads);
|
|
||||||
extern void quark_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes,
|
|
||||||
uint32_t *d_noncesTrue, size_t *nrmTrue, uint32_t *d_noncesFalse, size_t *nrmFalse,
|
|
||||||
int order);
|
|
||||||
|
|
||||||
extern "C" void qubithash(void *state, const void *input)
|
extern "C" void qubithash(void *state, const void *input)
|
||||||
{
|
{
|
||||||
@ -96,7 +93,7 @@ extern "C" int scanhash_qubit(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
x11_simd512_cpu_init(thr_id, throughput);
|
x11_simd512_cpu_init(thr_id, throughput);
|
||||||
x11_echo512_cpu_init(thr_id, throughput);
|
x11_echo512_cpu_init(thr_id, throughput);
|
||||||
|
|
||||||
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);
|
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0);
|
||||||
|
|
||||||
cuda_check_cpu_init(thr_id, throughput);
|
cuda_check_cpu_init(thr_id, throughput);
|
||||||
|
|
||||||
@ -167,6 +164,8 @@ extern "C" void free_qubit(int thr_id)
|
|||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
cudaFree(d_hash[thr_id]);
|
||||||
|
|
||||||
|
x11_simd512_cpu_free(thr_id);
|
||||||
|
|
||||||
cuda_check_cpu_free(thr_id);
|
cuda_check_cpu_free(thr_id);
|
||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
|
||||||
|
@ -426,10 +426,17 @@ unsigned char GetNfactor(unsigned int nTimestamp)
|
|||||||
return Nfactor;
|
return Nfactor;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool init[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
// cleanup
|
// cleanup
|
||||||
void free_scrypt_jane(int thr_id)
|
void free_scrypt_jane(int thr_id)
|
||||||
{
|
{
|
||||||
// todo ?
|
int dev_id = device_map[thr_id];
|
||||||
|
|
||||||
|
cudaSetDevice(dev_id);
|
||||||
|
cudaDeviceReset(); // well, simple way to free ;)
|
||||||
|
|
||||||
|
init[thr_id] = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define bswap_32x4(x) ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) \
|
#define bswap_32x4(x) ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) \
|
||||||
@ -467,7 +474,18 @@ int scanhash_scrypt_jane(int thr_id, struct work *work, uint32_t max_nonce, unsi
|
|||||||
s_Nfactor = Nfactor;
|
s_Nfactor = Nfactor;
|
||||||
}
|
}
|
||||||
|
|
||||||
int throughput = cuda_throughput(thr_id);
|
static __thread int throughput = 0;
|
||||||
|
if(!init[thr_id]) {
|
||||||
|
int dev_id = device_map[thr_id];
|
||||||
|
|
||||||
|
cudaSetDevice(dev_id);
|
||||||
|
cudaDeviceReset();
|
||||||
|
cudaSetDevice(dev_id);
|
||||||
|
throughput = cuda_throughput(thr_id);
|
||||||
|
applog(LOG_INFO, "GPU #%d: cuda throughput is %d", dev_id, throughput);
|
||||||
|
|
||||||
|
init[thr_id] = true;
|
||||||
|
}
|
||||||
|
|
||||||
if(throughput == 0)
|
if(throughput == 0)
|
||||||
return -1;
|
return -1;
|
||||||
@ -602,7 +620,7 @@ int scanhash_scrypt_jane(int thr_id, struct work *work, uint32_t max_nonce, unsi
|
|||||||
|
|
||||||
if (memcmp(thash, &hash[cur][8*i], 32) == 0)
|
if (memcmp(thash, &hash[cur][8*i], 32) == 0)
|
||||||
{
|
{
|
||||||
bn_store_hash_target_ratio(thash, ptarget, work);
|
work_set_target_ratio(work, thash);
|
||||||
*hashes_done = n - pdata[19];
|
*hashes_done = n - pdata[19];
|
||||||
pdata[19] = tmp_nonce;
|
pdata[19] = tmp_nonce;
|
||||||
scrypt_free(&Vbuf);
|
scrypt_free(&Vbuf);
|
||||||
|
27
scrypt.cpp
27
scrypt.cpp
@ -685,10 +685,18 @@ static int lastFactor = 0;
|
|||||||
|
|
||||||
static void computeGold(uint32_t* const input, uint32_t *reference, uchar *scratchpad);
|
static void computeGold(uint32_t* const input, uint32_t *reference, uchar *scratchpad);
|
||||||
|
|
||||||
|
static bool init[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
// cleanup
|
// cleanup
|
||||||
void free_scrypt(int thr_id)
|
void free_scrypt(int thr_id)
|
||||||
{
|
{
|
||||||
// todo ?
|
int dev_id = device_map[thr_id];
|
||||||
|
|
||||||
|
// trivial way to free all...
|
||||||
|
cudaSetDevice(dev_id);
|
||||||
|
cudaDeviceReset();
|
||||||
|
|
||||||
|
init[thr_id] = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scrypt proof of work algorithm
|
// Scrypt proof of work algorithm
|
||||||
@ -701,9 +709,20 @@ int scanhash_scrypt(int thr_id, struct work *work, uint32_t max_nonce, unsigned
|
|||||||
int result = 0;
|
int result = 0;
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
int throughput = cuda_throughput(thr_id);
|
static __thread int throughput = 0;
|
||||||
|
|
||||||
if(throughput == 0)
|
if (!init[thr_id]) {
|
||||||
|
int dev_id = device_map[thr_id];
|
||||||
|
cudaSetDevice(dev_id);
|
||||||
|
cudaDeviceReset();
|
||||||
|
cudaSetDevice(dev_id);
|
||||||
|
throughput = cuda_throughput(thr_id);
|
||||||
|
applog(LOG_INFO, "GPU #%d: cuda throughput is %d", dev_id, throughput);
|
||||||
|
|
||||||
|
init[thr_id] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (throughput == 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
gettimeofday(tv_start, NULL);
|
gettimeofday(tv_start, NULL);
|
||||||
@ -912,7 +931,7 @@ int scanhash_scrypt(int thr_id, struct work *work, uint32_t max_nonce, unsigned
|
|||||||
device_map[thr_id], device_name[thr_id], i, cur);
|
device_map[thr_id], device_name[thr_id], i, cur);
|
||||||
} else {
|
} else {
|
||||||
*hashes_done = n - pdata[19];
|
*hashes_done = n - pdata[19];
|
||||||
bn_store_hash_target_ratio(refhash, ptarget, work);
|
work_set_target_ratio(work, refhash);
|
||||||
pdata[19] = nonce[cur] + i;
|
pdata[19] = nonce[cur] + i;
|
||||||
result = 1;
|
result = 1;
|
||||||
goto byebye;
|
goto byebye;
|
||||||
|
@ -404,10 +404,10 @@ void cuda_blake256_hash( uint64_t *g_out, uint32_t nonce, uint32_t *g_good, bool
|
|||||||
|
|
||||||
static std::map<int, uint32_t *> context_good[2];
|
static std::map<int, uint32_t *> context_good[2];
|
||||||
|
|
||||||
|
static bool init[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
bool default_prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t host_ptarget[8])
|
bool default_prepare_blake256(int thr_id, const uint32_t host_pdata[20], const uint32_t host_ptarget[8])
|
||||||
{
|
{
|
||||||
static bool init[MAX_GPUS] = { 0 };
|
|
||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
// allocate pinned host memory for good hashes
|
// allocate pinned host memory for good hashes
|
||||||
@ -441,3 +441,13 @@ void default_do_blake256(dim3 grid, dim3 threads, int thr_id, int stream, uint32
|
|||||||
cudaMemcpyDeviceToHost, context_streams[stream][thr_id]));
|
cudaMemcpyDeviceToHost, context_streams[stream][thr_id]));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void default_free_blake256(int thr_id)
|
||||||
|
{
|
||||||
|
if (init[thr_id]) {
|
||||||
|
cudaFree(context_good[0][thr_id]);
|
||||||
|
cudaFree(context_good[1][thr_id]);
|
||||||
|
init[thr_id] = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -144,9 +144,8 @@ int cuda_throughput(int thr_id)
|
|||||||
cuCtxCreate( &ctx, CU_CTX_SCHED_YIELD, device_map[thr_id] );
|
cuCtxCreate( &ctx, CU_CTX_SCHED_YIELD, device_map[thr_id] );
|
||||||
cuCtxSetCurrent(ctx);
|
cuCtxSetCurrent(ctx);
|
||||||
#else
|
#else
|
||||||
checkCudaErrors(cudaSetDeviceFlags(cudaDeviceScheduleYield));
|
|
||||||
checkCudaErrors(cudaSetDevice(device_map[thr_id]));
|
checkCudaErrors(cudaSetDevice(device_map[thr_id]));
|
||||||
// checkCudaErrors(cudaFree(0));
|
checkCudaErrors(cudaSetDeviceFlags(cudaDeviceScheduleYield));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
KernelInterface *kernel;
|
KernelInterface *kernel;
|
||||||
@ -256,7 +255,7 @@ inline int _ConvertSMVer2Cores(int major, int minor)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// If we don't find the values, we default use the previous one to run properly
|
// If we don't find the values, we default use the previous one to run properly
|
||||||
applog(LOG_WARNING, "MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM", major, minor, 128);
|
applog(LOG_WARNING, "MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM", major, minor, 128);
|
||||||
return 128;
|
return 128;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
10
skein.cu
10
skein.cu
@ -11,12 +11,14 @@
|
|||||||
#include <openssl/sha.h>
|
#include <openssl/sha.h>
|
||||||
|
|
||||||
static uint32_t *d_hash[MAX_GPUS];
|
static uint32_t *d_hash[MAX_GPUS];
|
||||||
|
static __thread bool sm5 = true;
|
||||||
|
|
||||||
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void skein512_cpu_setBlock_80(void *pdata);
|
extern void skein512_cpu_setBlock_80(void *pdata);
|
||||||
extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap);
|
extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap);
|
||||||
|
|
||||||
extern void skeincoin_init(int thr_id);
|
extern void skeincoin_init(int thr_id);
|
||||||
|
extern void skeincoin_free(int thr_id);
|
||||||
extern void skeincoin_setBlock_80(int thr_id, void *pdata);
|
extern void skeincoin_setBlock_80(int thr_id, void *pdata);
|
||||||
extern uint32_t skeincoin_hash_sm5(int thr_id, uint32_t threads, uint32_t startNounce, int swap, uint64_t target64, uint32_t *secNonce);
|
extern uint32_t skeincoin_hash_sm5(int thr_id, uint32_t threads, uint32_t startNounce, int swap, uint64_t target64, uint32_t *secNonce);
|
||||||
|
|
||||||
@ -355,7 +357,7 @@ extern "C" int scanhash_skeincoin(int thr_id, struct work* work, uint32_t max_no
|
|||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
const int swap = 1;
|
const int swap = 1;
|
||||||
|
|
||||||
bool sm5 = (device_sm[device_map[thr_id]] >= 500);
|
sm5 = (device_sm[device_map[thr_id]] >= 500);
|
||||||
bool checkSecnonce = (have_stratum || have_longpoll) && !sm5;
|
bool checkSecnonce = (have_stratum || have_longpoll) && !sm5;
|
||||||
|
|
||||||
uint32_t throughput = device_intensity(thr_id, __func__, 1U << 20);
|
uint32_t throughput = device_intensity(thr_id, __func__, 1U << 20);
|
||||||
@ -473,9 +475,11 @@ extern "C" void free_skeincoin(int thr_id)
|
|||||||
|
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(device_map[thr_id]);
|
||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
if (!sm5) {
|
||||||
|
cudaFree(d_hash[thr_id]);
|
||||||
|
cuda_check_cpu_free(thr_id);
|
||||||
|
}
|
||||||
|
|
||||||
cuda_check_cpu_free(thr_id);
|
|
||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
12
skein2.cpp
12
skein2.cpp
@ -16,6 +16,7 @@ extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
|||||||
extern void skein512_cpu_setBlock_80(void *pdata);
|
extern void skein512_cpu_setBlock_80(void *pdata);
|
||||||
extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap);
|
extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_hash, int swap);
|
||||||
|
|
||||||
|
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
|
||||||
void skein2hash(void *output, const void *input)
|
void skein2hash(void *output, const void *input)
|
||||||
@ -38,6 +39,7 @@ static bool init[MAX_GPUS] = { 0 };
|
|||||||
|
|
||||||
int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
|
int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned long *hashes_done)
|
||||||
{
|
{
|
||||||
|
int dev_id = device_map[thr_id];
|
||||||
uint32_t *pdata = work->data;
|
uint32_t *pdata = work->data;
|
||||||
uint32_t *ptarget = work->target;
|
uint32_t *ptarget = work->target;
|
||||||
const uint32_t first_nonce = pdata[19];
|
const uint32_t first_nonce = pdata[19];
|
||||||
@ -50,9 +52,9 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned
|
|||||||
|
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
{
|
{
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(dev_id);
|
||||||
|
|
||||||
cudaMalloc(&d_hash[thr_id], throughput * 64U);
|
cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput);
|
||||||
|
|
||||||
quark_skein512_cpu_init(thr_id, throughput);
|
quark_skein512_cpu_init(thr_id, throughput);
|
||||||
cuda_check_cpu_init(thr_id, throughput);
|
cuda_check_cpu_init(thr_id, throughput);
|
||||||
@ -92,7 +94,7 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned
|
|||||||
work_set_target_ratio(work, vhash64);
|
work_set_target_ratio(work, vhash64);
|
||||||
if (secNonce != 0) {
|
if (secNonce != 0) {
|
||||||
if (!opt_quiet)
|
if (!opt_quiet)
|
||||||
applog(LOG_BLUE, "GPU #%d: found second nonce %08x !", device_map[thr_id], swab32(secNonce));
|
applog(LOG_BLUE, "GPU #%d: found second nonce %08x !", dev_id, swab32(secNonce));
|
||||||
|
|
||||||
endiandata[19] = secNonce;
|
endiandata[19] = secNonce;
|
||||||
skein2hash(vhash64, endiandata);
|
skein2hash(vhash64, endiandata);
|
||||||
@ -104,7 +106,7 @@ int scanhash_skein2(int thr_id, struct work* work, uint32_t max_nonce, unsigned
|
|||||||
pdata[19] = swab32(foundNonce);
|
pdata[19] = swab32(foundNonce);
|
||||||
return res;
|
return res;
|
||||||
} else {
|
} else {
|
||||||
applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNonce);
|
applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", dev_id, foundNonce);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -135,4 +137,4 @@ void free_skein2(int thr_id)
|
|||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
}
|
}
|
||||||
|
@ -32,6 +32,7 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
|
|||||||
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
extern void quark_doublegroestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_doublegroestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void quark_groestl512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -50,6 +51,7 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st
|
|||||||
|
|
||||||
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
|
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void x11_simd512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
|
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -255,6 +257,8 @@ extern "C" void free_c11(int thr_id)
|
|||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(device_map[thr_id]);
|
||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
cudaFree(d_hash[thr_id]);
|
||||||
|
quark_groestl512_cpu_free(thr_id);
|
||||||
|
x11_simd512_cpu_free(thr_id);
|
||||||
|
|
||||||
cuda_check_cpu_free(thr_id);
|
cuda_check_cpu_free(thr_id);
|
||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
@ -672,6 +672,13 @@ int x11_simd512_cpu_init(int thr_id, uint32_t threads)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__host__
|
||||||
|
void x11_simd512_cpu_free(int thr_id)
|
||||||
|
{
|
||||||
|
cudaFree(d_temp4[thr_id]);
|
||||||
|
cudaFree(d_state[thr_id]);
|
||||||
|
}
|
||||||
|
|
||||||
__host__
|
__host__
|
||||||
void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
|
void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
|
||||||
{
|
{
|
||||||
|
@ -21,6 +21,7 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st
|
|||||||
|
|
||||||
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
|
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void x11_simd512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
|
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -163,6 +164,7 @@ extern "C" void free_fresh(int thr_id)
|
|||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(device_map[thr_id]);
|
||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
cudaFree(d_hash[thr_id]);
|
||||||
|
x11_simd512_cpu_free(thr_id);
|
||||||
|
|
||||||
cuda_check_cpu_free(thr_id);
|
cuda_check_cpu_free(thr_id);
|
||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
@ -21,6 +21,7 @@ extern void x11_shavite512_setBlock_80(void *pdata);
|
|||||||
|
|
||||||
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
|
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void x11_simd512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -146,6 +147,7 @@ extern "C" void free_s3(int thr_id)
|
|||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(device_map[thr_id]);
|
||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
cudaFree(d_hash[thr_id]);
|
||||||
|
x11_simd512_cpu_free(thr_id);
|
||||||
|
|
||||||
cuda_check_cpu_free(thr_id);
|
cuda_check_cpu_free(thr_id);
|
||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
@ -32,6 +32,7 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
|
|||||||
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
extern void quark_doublegroestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_doublegroestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void quark_groestl512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -50,6 +51,7 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st
|
|||||||
|
|
||||||
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
|
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void x11_simd512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
|
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -254,6 +256,9 @@ extern "C" void free_x11(int thr_id)
|
|||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
cudaFree(d_hash[thr_id]);
|
||||||
|
|
||||||
|
quark_groestl512_cpu_free(thr_id);
|
||||||
|
x11_simd512_cpu_free(thr_id);
|
||||||
|
|
||||||
cuda_check_cpu_free(thr_id);
|
cuda_check_cpu_free(thr_id);
|
||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
|
||||||
|
@ -38,6 +38,9 @@
|
|||||||
* @author phm <phm@inbox.com>
|
* @author phm <phm@inbox.com>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// store allocated textures device addresses
|
||||||
|
static unsigned int* d_textures[MAX_GPUS][4];
|
||||||
|
|
||||||
#define mixtab0(x) (*((uint32_t*)mixtabs + ( (x))))
|
#define mixtab0(x) (*((uint32_t*)mixtabs + ( (x))))
|
||||||
#define mixtab1(x) (*((uint32_t*)mixtabs + (256+(x))))
|
#define mixtab1(x) (*((uint32_t*)mixtabs + (256+(x))))
|
||||||
#define mixtab2(x) (*((uint32_t*)mixtabs + (512+(x))))
|
#define mixtab2(x) (*((uint32_t*)mixtabs + (512+(x))))
|
||||||
@ -657,25 +660,37 @@ __global__ void x13_fugue512_gpu_hash_64(uint32_t threads, uint32_t startNounce,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define texDef(texname, texmem, texsource, texsize) \
|
#define texDef(id, texname, texmem, texsource, texsize) { \
|
||||||
unsigned int *texmem; \
|
unsigned int *texmem; \
|
||||||
cudaMalloc(&texmem, texsize); \
|
cudaMalloc(&texmem, texsize); \
|
||||||
|
d_textures[thr_id][id] = texmem; \
|
||||||
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
|
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \
|
||||||
texname.normalized = 0; \
|
texname.normalized = 0; \
|
||||||
texname.filterMode = cudaFilterModePoint; \
|
texname.filterMode = cudaFilterModePoint; \
|
||||||
texname.addressMode[0] = cudaAddressModeClamp; \
|
texname.addressMode[0] = cudaAddressModeClamp; \
|
||||||
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
|
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \
|
||||||
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); }
|
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); \
|
||||||
|
} \
|
||||||
__host__ void x13_fugue512_cpu_init(int thr_id, uint32_t threads)
|
|
||||||
{
|
|
||||||
texDef(mixTab0Tex, mixTab0m, mixtab0_cpu, sizeof(uint32_t)*256);
|
|
||||||
texDef(mixTab1Tex, mixTab1m, mixtab1_cpu, sizeof(uint32_t)*256);
|
|
||||||
texDef(mixTab2Tex, mixTab2m, mixtab2_cpu, sizeof(uint32_t)*256);
|
|
||||||
texDef(mixTab3Tex, mixTab3m, mixtab3_cpu, sizeof(uint32_t)*256);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
|
__host__
|
||||||
|
void x13_fugue512_cpu_init(int thr_id, uint32_t threads)
|
||||||
|
{
|
||||||
|
texDef(0, mixTab0Tex, mixTab0m, mixtab0_cpu, sizeof(uint32_t)*256);
|
||||||
|
texDef(1, mixTab1Tex, mixTab1m, mixtab1_cpu, sizeof(uint32_t)*256);
|
||||||
|
texDef(2, mixTab2Tex, mixTab2m, mixtab2_cpu, sizeof(uint32_t)*256);
|
||||||
|
texDef(3, mixTab3Tex, mixTab3m, mixtab3_cpu, sizeof(uint32_t)*256);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__
|
||||||
|
void x13_fugue512_cpu_free(int thr_id)
|
||||||
|
{
|
||||||
|
for (int i=0; i<4; i++)
|
||||||
|
cudaFree(d_textures[thr_id][i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__
|
||||||
|
void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order)
|
||||||
{
|
{
|
||||||
const uint32_t threadsperblock = 256;
|
const uint32_t threadsperblock = 256;
|
||||||
|
|
||||||
|
@ -34,6 +34,7 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
|
|||||||
|
|
||||||
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void quark_groestl512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -52,6 +53,7 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st
|
|||||||
|
|
||||||
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
|
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void x11_simd512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
|
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -61,7 +63,7 @@ extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
|
|||||||
|
|
||||||
extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
|
extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void x13_fugue512_cpu_free(int thr_id);
|
||||||
|
|
||||||
// X13 CPU Hash
|
// X13 CPU Hash
|
||||||
extern "C" void x13hash(void *output, const void *input)
|
extern "C" void x13hash(void *output, const void *input)
|
||||||
@ -256,6 +258,10 @@ extern "C" void free_x13(int thr_id)
|
|||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
cudaFree(d_hash[thr_id]);
|
||||||
|
|
||||||
|
quark_groestl512_cpu_free(thr_id);
|
||||||
|
x11_simd512_cpu_free(thr_id);
|
||||||
|
x13_fugue512_cpu_free(thr_id);
|
||||||
|
|
||||||
cuda_check_cpu_free(thr_id);
|
cuda_check_cpu_free(thr_id);
|
||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
|
||||||
|
14
x15/x14.cu
14
x15/x14.cu
@ -38,6 +38,7 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
|
|||||||
|
|
||||||
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void quark_groestl512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -56,6 +57,7 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st
|
|||||||
|
|
||||||
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
|
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void x11_simd512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
|
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -65,6 +67,7 @@ extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
|
|||||||
|
|
||||||
extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
|
extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void x13_fugue512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void x14_shabal512_cpu_init(int thr_id, uint32_t threads);
|
extern void x14_shabal512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -185,9 +188,10 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
x13_fugue512_cpu_init(thr_id, throughput);
|
x13_fugue512_cpu_init(thr_id, throughput);
|
||||||
x14_shabal512_cpu_init(thr_id, throughput);
|
x14_shabal512_cpu_init(thr_id, throughput);
|
||||||
|
|
||||||
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput), 0);
|
|
||||||
|
|
||||||
cuda_check_cpu_init(thr_id, throughput);
|
cuda_check_cpu_init(thr_id, throughput);
|
||||||
|
|
||||||
|
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0);
|
||||||
|
|
||||||
init[thr_id] = true;
|
init[thr_id] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -260,8 +264,12 @@ extern "C" void free_x14(int thr_id)
|
|||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
cudaFree(d_hash[thr_id]);
|
||||||
|
|
||||||
|
quark_groestl512_cpu_free(thr_id);
|
||||||
|
x11_simd512_cpu_free(thr_id);
|
||||||
|
x13_fugue512_cpu_free(thr_id);
|
||||||
|
|
||||||
cuda_check_cpu_free(thr_id);
|
cuda_check_cpu_free(thr_id);
|
||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
}
|
}
|
||||||
|
@ -39,6 +39,7 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
|
|||||||
|
|
||||||
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void quark_groestl512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -57,6 +58,7 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st
|
|||||||
|
|
||||||
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
|
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void x11_simd512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
|
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -66,6 +68,7 @@ extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
|
|||||||
|
|
||||||
extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
|
extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void x13_fugue512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void x14_shabal512_cpu_init(int thr_id, uint32_t threads);
|
extern void x14_shabal512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -275,6 +278,10 @@ extern "C" void free_x15(int thr_id)
|
|||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
cudaFree(d_hash[thr_id]);
|
||||||
|
|
||||||
|
quark_groestl512_cpu_free(thr_id);
|
||||||
|
x11_simd512_cpu_free(thr_id);
|
||||||
|
x13_fugue512_cpu_free(thr_id);
|
||||||
|
|
||||||
cuda_check_cpu_free(thr_id);
|
cuda_check_cpu_free(thr_id);
|
||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
|
||||||
|
@ -42,6 +42,7 @@ extern void quark_bmw512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
|
|||||||
|
|
||||||
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void quark_groestl512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_skein512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -60,6 +61,7 @@ extern void x11_shavite512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st
|
|||||||
|
|
||||||
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
|
extern int x11_simd512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x11_simd512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void x11_simd512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
|
extern void x11_echo512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x11_echo512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -69,6 +71,7 @@ extern void x13_hamsi512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t star
|
|||||||
|
|
||||||
extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
|
extern void x13_fugue512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x13_fugue512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void x13_fugue512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void x14_shabal512_cpu_init(int thr_id, uint32_t threads);
|
extern void x14_shabal512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -297,6 +300,10 @@ extern "C" void free_x17(int thr_id)
|
|||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
cudaFree(d_hash[thr_id]);
|
||||||
|
|
||||||
|
quark_groestl512_cpu_free(thr_id);
|
||||||
|
x11_simd512_cpu_free(thr_id);
|
||||||
|
x13_fugue512_cpu_free(thr_id);
|
||||||
|
|
||||||
cuda_check_cpu_free(thr_id);
|
cuda_check_cpu_free(thr_id);
|
||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
|
||||||
|
2
zr5.cu
2
zr5.cu
@ -319,6 +319,7 @@ extern void quark_blake512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st
|
|||||||
|
|
||||||
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_groestl512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_groestl512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void quark_groestl512_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void quark_jh512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_jh512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void quark_jh512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -491,6 +492,7 @@ extern "C" void free_zr5(int thr_id)
|
|||||||
|
|
||||||
cudaFree(d_txs[thr_id]);
|
cudaFree(d_txs[thr_id]);
|
||||||
|
|
||||||
|
quark_groestl512_cpu_free(thr_id);
|
||||||
cuda_check_cpu_free(thr_id);
|
cuda_check_cpu_free(thr_id);
|
||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user