|
|
@ -9,6 +9,7 @@ |
|
|
|
// globaler Speicher für alle HeftyHashes aller Threads |
|
|
|
// globaler Speicher für alle HeftyHashes aller Threads |
|
|
|
extern uint32_t *heavy_heftyHashes[MAX_GPUS]; |
|
|
|
extern uint32_t *heavy_heftyHashes[MAX_GPUS]; |
|
|
|
extern uint32_t *heavy_nonceVector[MAX_GPUS]; |
|
|
|
extern uint32_t *heavy_nonceVector[MAX_GPUS]; |
|
|
|
|
|
|
|
static unsigned int *d_textures[MAX_GPUS][8]; |
|
|
|
|
|
|
|
|
|
|
|
// globaler Speicher für unsere Ergebnisse |
|
|
|
// globaler Speicher für unsere Ergebnisse |
|
|
|
uint32_t *d_hash4output[MAX_GPUS]; |
|
|
|
uint32_t *d_hash4output[MAX_GPUS]; |
|
|
@ -730,36 +731,50 @@ template <int BLOCKSIZE> __global__ void groestl512_gpu_hash(uint32_t threads, u |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#define texDef(texname, texmem, texsource, texsize) \ |
|
|
|
#define texDef(id, texname, texmem, texsource, texsize) { \ |
|
|
|
unsigned int *texmem; \ |
|
|
|
unsigned int *texmem; \ |
|
|
|
cudaMalloc(&texmem, texsize); \ |
|
|
|
cudaMalloc(&texmem, texsize); \ |
|
|
|
|
|
|
|
d_textures[thr_id][id] = texmem; \ |
|
|
|
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \ |
|
|
|
cudaMemcpy(texmem, texsource, texsize, cudaMemcpyHostToDevice); \ |
|
|
|
texname.normalized = 0; \ |
|
|
|
texname.normalized = 0; \ |
|
|
|
texname.filterMode = cudaFilterModePoint; \ |
|
|
|
texname.filterMode = cudaFilterModePoint; \ |
|
|
|
texname.addressMode[0] = cudaAddressModeClamp; \ |
|
|
|
texname.addressMode[0] = cudaAddressModeClamp; \ |
|
|
|
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \ |
|
|
|
{ cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<unsigned int>(); \ |
|
|
|
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); } \ |
|
|
|
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); \ |
|
|
|
|
|
|
|
} \ |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// Setup-Funktionen |
|
|
|
// Setup Function |
|
|
|
__host__ void groestl512_cpu_init(int thr_id, uint32_t threads) |
|
|
|
__host__ |
|
|
|
|
|
|
|
void groestl512_cpu_init(int thr_id, uint32_t threads) |
|
|
|
{ |
|
|
|
{ |
|
|
|
// Texturen mit obigem Makro initialisieren |
|
|
|
// Texturen mit obigem Makro initialisieren |
|
|
|
texDef(t0up, d_T0up, T0up_cpu, sizeof(uint32_t)*256); |
|
|
|
texDef(0, t0up, d_T0up, T0up_cpu, sizeof(uint32_t)*256); |
|
|
|
texDef(t0dn, d_T0dn, T0dn_cpu, sizeof(uint32_t)*256); |
|
|
|
texDef(1, t0dn, d_T0dn, T0dn_cpu, sizeof(uint32_t)*256); |
|
|
|
texDef(t1up, d_T1up, T1up_cpu, sizeof(uint32_t)*256); |
|
|
|
texDef(2, t1up, d_T1up, T1up_cpu, sizeof(uint32_t)*256); |
|
|
|
texDef(t1dn, d_T1dn, T1dn_cpu, sizeof(uint32_t)*256); |
|
|
|
texDef(3, t1dn, d_T1dn, T1dn_cpu, sizeof(uint32_t)*256); |
|
|
|
texDef(t2up, d_T2up, T2up_cpu, sizeof(uint32_t)*256); |
|
|
|
texDef(4, t2up, d_T2up, T2up_cpu, sizeof(uint32_t)*256); |
|
|
|
texDef(t2dn, d_T2dn, T2dn_cpu, sizeof(uint32_t)*256); |
|
|
|
texDef(5, t2dn, d_T2dn, T2dn_cpu, sizeof(uint32_t)*256); |
|
|
|
texDef(t3up, d_T3up, T3up_cpu, sizeof(uint32_t)*256); |
|
|
|
texDef(6, t3up, d_T3up, T3up_cpu, sizeof(uint32_t)*256); |
|
|
|
texDef(t3dn, d_T3dn, T3dn_cpu, sizeof(uint32_t)*256); |
|
|
|
texDef(7, t3dn, d_T3dn, T3dn_cpu, sizeof(uint32_t)*256); |
|
|
|
|
|
|
|
|
|
|
|
// Speicher für alle Ergebnisse belegen |
|
|
|
// Speicher für alle Ergebnisse belegen |
|
|
|
cudaMalloc(&d_hash4output[thr_id], 16 * sizeof(uint32_t) * threads); |
|
|
|
cudaMalloc(&d_hash4output[thr_id], (size_t) 64 * threads); |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__host__ |
|
|
|
|
|
|
|
void groestl512_cpu_free(int thr_id) |
|
|
|
|
|
|
|
{ |
|
|
|
|
|
|
|
for (int i=0; i <8; i++) |
|
|
|
|
|
|
|
cudaFree(d_textures[thr_id][i]); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cudaFree(d_hash4output[thr_id]); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static int BLOCKSIZE = 84; |
|
|
|
static int BLOCKSIZE = 84; |
|
|
|
|
|
|
|
|
|
|
|
__host__ void groestl512_cpu_setBlock(void *data, int len) |
|
|
|
__host__ |
|
|
|
|
|
|
|
void groestl512_cpu_setBlock(void *data, int len) |
|
|
|
// data muss 80/84-Byte haben! |
|
|
|
// data muss 80/84-Byte haben! |
|
|
|
// heftyHash hat 32-Byte |
|
|
|
// heftyHash hat 32-Byte |
|
|
|
{ |
|
|
|
{ |
|
|
|