mirror of
https://github.com/GOSTSec/ccminer
synced 2025-08-26 13:51:51 +00:00
sources: swith to UTF-8
This commit is contained in:
parent
a586cee493
commit
b4e690b486
@ -14,7 +14,7 @@ static uint32_t *d_tempBranch1Nonces[8];
|
|||||||
static uint32_t *d_numValid[8];
|
static uint32_t *d_numValid[8];
|
||||||
static uint32_t *h_numValid[8];
|
static uint32_t *h_numValid[8];
|
||||||
|
|
||||||
static uint32_t *d_partSum[2][8]; // für bis zu vier partielle Summen
|
static uint32_t *d_partSum[2][8]; // für bis zu vier partielle Summen
|
||||||
|
|
||||||
// aus heavy.cu
|
// aus heavy.cu
|
||||||
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
|
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
|
||||||
@ -93,7 +93,7 @@ __global__ void jackpot_compactTest_gpu_SCAN(uint32_t *data, int width, uint32_t
|
|||||||
inpHash = &inpHashes[id<<4];
|
inpHash = &inpHashes[id<<4];
|
||||||
}else
|
}else
|
||||||
{
|
{
|
||||||
// Nonce-Liste verfügbar
|
// Nonce-Liste verfügbar
|
||||||
int nonce = d_validNonceTable[id] - startNounce;
|
int nonce = d_validNonceTable[id] - startNounce;
|
||||||
inpHash = &inpHashes[nonce<<4];
|
inpHash = &inpHashes[nonce<<4];
|
||||||
}
|
}
|
||||||
@ -210,7 +210,7 @@ __global__ void jackpot_compactTest_gpu_SCATTER(uint32_t *sum, uint32_t *outp, c
|
|||||||
inpHash = &inpHashes[id<<4];
|
inpHash = &inpHashes[id<<4];
|
||||||
}else
|
}else
|
||||||
{
|
{
|
||||||
// Nonce-Liste verfügbar
|
// Nonce-Liste verfügbar
|
||||||
int nonce = d_validNonceTable[id] - startNounce;
|
int nonce = d_validNonceTable[id] - startNounce;
|
||||||
actNounce = nonce;
|
actNounce = nonce;
|
||||||
inpHash = &inpHashes[nonce<<4];
|
inpHash = &inpHashes[nonce<<4];
|
||||||
@ -345,7 +345,7 @@ __host__ void jackpot_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t
|
|||||||
int order)
|
int order)
|
||||||
{
|
{
|
||||||
// Wenn validNonceTable genutzt wird, dann werden auch nur die Nonces betrachtet, die dort enthalten sind
|
// Wenn validNonceTable genutzt wird, dann werden auch nur die Nonces betrachtet, die dort enthalten sind
|
||||||
// "threads" ist in diesem Fall auf die Länge dieses Array's zu setzen!
|
// "threads" ist in diesem Fall auf die Länge dieses Array's zu setzen!
|
||||||
|
|
||||||
jackpot_compactTest_cpu_dualCompaction(thr_id, threads,
|
jackpot_compactTest_cpu_dualCompaction(thr_id, threads,
|
||||||
h_numValid[thr_id], d_nonces1, d_nonces2,
|
h_numValid[thr_id], d_nonces1, d_nonces2,
|
||||||
|
@ -127,7 +127,7 @@ __global__ void jackpot_keccak512_gpu_hash(int threads, uint32_t startNounce, ui
|
|||||||
for (int i=0; i<25; i++)
|
for (int i=0; i<25; i++)
|
||||||
keccak_gpu_state[i] = c_State[i];
|
keccak_gpu_state[i] = c_State[i];
|
||||||
|
|
||||||
// den Block einmal gut durchschütteln
|
// den Block einmal gut durchschütteln
|
||||||
keccak_block(keccak_gpu_state, message, c_keccak_round_constants);
|
keccak_block(keccak_gpu_state, message, c_keccak_round_constants);
|
||||||
|
|
||||||
// das Hash erzeugen
|
// das Hash erzeugen
|
||||||
@ -533,7 +533,7 @@ __host__ void jackpot_keccak512_cpu_hash(int thr_id, int threads, uint32_t start
|
|||||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||||
dim3 block(threadsperblock);
|
dim3 block(threadsperblock);
|
||||||
|
|
||||||
// Größe des dynamischen Shared Memory Bereichs
|
// Größe des dynamischen Shared Memory Bereichs
|
||||||
size_t shared_size = 0;
|
size_t shared_size = 0;
|
||||||
|
|
||||||
jackpot_keccak512_gpu_hash<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash);
|
jackpot_keccak512_gpu_hash<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash);
|
||||||
|
@ -12,7 +12,7 @@ extern "C"
|
|||||||
// aus cpu-miner.c
|
// aus cpu-miner.c
|
||||||
extern int device_map[8];
|
extern int device_map[8];
|
||||||
|
|
||||||
// Speicher für Input/Output der verketteten Hashfunktionen
|
// Speicher für Input/Output der verketteten Hashfunktionen
|
||||||
static uint32_t *d_hash[8];
|
static uint32_t *d_hash[8];
|
||||||
|
|
||||||
extern void jackpot_keccak512_cpu_init(int thr_id, int threads);
|
extern void jackpot_keccak512_cpu_init(int thr_id, int threads);
|
||||||
@ -41,7 +41,7 @@ extern void jackpot_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t st
|
|||||||
uint32_t *d_nonces2, size_t *nrm2,
|
uint32_t *d_nonces2, size_t *nrm2,
|
||||||
int order);
|
int order);
|
||||||
|
|
||||||
// Speicher zur Generierung der Noncevektoren für die bedingten Hashes
|
// Speicher zur Generierung der Noncevektoren für die bedingten Hashes
|
||||||
static uint32_t *d_jackpotNonces[8];
|
static uint32_t *d_jackpotNonces[8];
|
||||||
static uint32_t *d_branch1Nonces[8];
|
static uint32_t *d_branch1Nonces[8];
|
||||||
static uint32_t *d_branch2Nonces[8];
|
static uint32_t *d_branch2Nonces[8];
|
||||||
@ -142,7 +142,7 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
|
|||||||
|
|
||||||
size_t nrm1, nrm2, nrm3;
|
size_t nrm1, nrm2, nrm3;
|
||||||
|
|
||||||
// Runde 1 (ohne Gröstl)
|
// Runde 1 (ohne Gröstl)
|
||||||
|
|
||||||
jackpot_compactTest_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], NULL,
|
jackpot_compactTest_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], NULL,
|
||||||
d_branch1Nonces[thr_id], &nrm1,
|
d_branch1Nonces[thr_id], &nrm1,
|
||||||
@ -165,7 +165,7 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
|
|||||||
|
|
||||||
// Runde 3 (komplett)
|
// Runde 3 (komplett)
|
||||||
|
|
||||||
// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
|
// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
|
||||||
jackpot_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
|
jackpot_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
|
||||||
d_branch1Nonces[thr_id], &nrm1,
|
d_branch1Nonces[thr_id], &nrm1,
|
||||||
d_branch2Nonces[thr_id], &nrm2,
|
d_branch2Nonces[thr_id], &nrm2,
|
||||||
@ -176,7 +176,7 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
|
|||||||
quark_skein512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);
|
quark_skein512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);
|
||||||
}
|
}
|
||||||
|
|
||||||
// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
|
// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
|
||||||
jackpot_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
|
jackpot_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
|
||||||
d_branch1Nonces[thr_id], &nrm1,
|
d_branch1Nonces[thr_id], &nrm1,
|
||||||
d_branch2Nonces[thr_id], &nrm2,
|
d_branch2Nonces[thr_id], &nrm2,
|
||||||
@ -189,7 +189,7 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
|
|||||||
|
|
||||||
// Runde 3 (komplett)
|
// Runde 3 (komplett)
|
||||||
|
|
||||||
// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
|
// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
|
||||||
jackpot_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
|
jackpot_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
|
||||||
d_branch1Nonces[thr_id], &nrm1,
|
d_branch1Nonces[thr_id], &nrm1,
|
||||||
d_branch2Nonces[thr_id], &nrm2,
|
d_branch2Nonces[thr_id], &nrm2,
|
||||||
@ -200,7 +200,7 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
|
|||||||
quark_skein512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);
|
quark_skein512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);
|
||||||
}
|
}
|
||||||
|
|
||||||
// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
|
// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
|
||||||
jackpot_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
|
jackpot_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
|
||||||
d_branch1Nonces[thr_id], &nrm1,
|
d_branch1Nonces[thr_id], &nrm1,
|
||||||
d_branch2Nonces[thr_id], &nrm2,
|
d_branch2Nonces[thr_id], &nrm2,
|
||||||
@ -219,7 +219,7 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
|
|||||||
uint32_t vhash64[8];
|
uint32_t vhash64[8];
|
||||||
be32enc(&endiandata[19], foundNonce);
|
be32enc(&endiandata[19], foundNonce);
|
||||||
|
|
||||||
// diese jackpothash Funktion gibt die Zahl der Runden zurück
|
// diese jackpothash Funktion gibt die Zahl der Runden zurück
|
||||||
rounds = jackpothash(vhash64, endiandata);
|
rounds = jackpothash(vhash64, endiandata);
|
||||||
|
|
||||||
if ((vhash64[7]<=Htarg) && fulltest(vhash64, ptarget)) {
|
if ((vhash64[7]<=Htarg) && fulltest(vhash64, ptarget)) {
|
||||||
|
@ -734,7 +734,7 @@ void fugue256_cpu_init(int thr_id, int threads)
|
|||||||
texDef(mixTab2Tex, mixTab2m, mixtab2_cpu, sizeof(uint32_t)*256);
|
texDef(mixTab2Tex, mixTab2m, mixtab2_cpu, sizeof(uint32_t)*256);
|
||||||
texDef(mixTab3Tex, mixTab3m, mixtab3_cpu, sizeof(uint32_t)*256);
|
texDef(mixTab3Tex, mixTab3m, mixtab3_cpu, sizeof(uint32_t)*256);
|
||||||
|
|
||||||
// Speicher für alle Ergebnisse belegen
|
// Speicher für alle Ergebnisse belegen
|
||||||
cudaMalloc(&d_fugue256_hashoutput[thr_id], 8 * sizeof(uint32_t) * threads);
|
cudaMalloc(&d_fugue256_hashoutput[thr_id], 8 * sizeof(uint32_t) * threads);
|
||||||
cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
|
cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
|
||||||
}
|
}
|
||||||
@ -760,15 +760,15 @@ __host__ void fugue256_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
|
|||||||
__host__ void fugue256_cpu_hash(int thr_id, int threads, int startNounce, void *outputHashes, uint32_t *nounce)
|
__host__ void fugue256_cpu_hash(int thr_id, int threads, int startNounce, void *outputHashes, uint32_t *nounce)
|
||||||
{
|
{
|
||||||
#if USE_SHARED
|
#if USE_SHARED
|
||||||
const int threadsperblock = 256; // Alignment mit mixtab Grösse. NICHT ÄNDERN
|
const int threadsperblock = 256; // Alignment mit mixtab Grösse. NICHT ÄNDERN
|
||||||
#else
|
#else
|
||||||
const int threadsperblock = 512; // so einstellen wie gewünscht ;-)
|
const int threadsperblock = 512; // so einstellen wie gewünscht ;-)
|
||||||
#endif
|
#endif
|
||||||
// berechne wie viele Thread Blocks wir brauchen
|
// berechne wie viele Thread Blocks wir brauchen
|
||||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||||
dim3 block(threadsperblock);
|
dim3 block(threadsperblock);
|
||||||
|
|
||||||
// Größe des dynamischen Shared Memory Bereichs
|
// Größe des dynamischen Shared Memory Bereichs
|
||||||
#if USE_SHARED
|
#if USE_SHARED
|
||||||
size_t shared_size = 4 * 256 * sizeof(uint32_t);
|
size_t shared_size = 4 * 256 * sizeof(uint32_t);
|
||||||
#else
|
#else
|
||||||
|
@ -15,13 +15,13 @@ extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int t
|
|||||||
// diese Struktur wird in der Init Funktion angefordert
|
// diese Struktur wird in der Init Funktion angefordert
|
||||||
static cudaDeviceProp props[8];
|
static cudaDeviceProp props[8];
|
||||||
|
|
||||||
// globaler Speicher für alle HeftyHashes aller Threads
|
// globaler Speicher für alle HeftyHashes aller Threads
|
||||||
__constant__ uint32_t pTarget[8]; // Single GPU
|
__constant__ uint32_t pTarget[8]; // Single GPU
|
||||||
extern uint32_t *d_resultNonce[8];
|
extern uint32_t *d_resultNonce[8];
|
||||||
|
|
||||||
__constant__ uint32_t groestlcoin_gpu_msg[32];
|
__constant__ uint32_t groestlcoin_gpu_msg[32];
|
||||||
|
|
||||||
// 64 Register Variante für Compute 3.0
|
// 64 Register Variante für Compute 3.0
|
||||||
#include "groestl_functions_quad.cu"
|
#include "groestl_functions_quad.cu"
|
||||||
#include "bitslice_transformations_quad.cu"
|
#include "bitslice_transformations_quad.cu"
|
||||||
|
|
||||||
@ -104,7 +104,7 @@ __host__ void groestlcoin_cpu_init(int thr_id, int threads)
|
|||||||
|
|
||||||
cudaGetDeviceProperties(&props[thr_id], device_map[thr_id]);
|
cudaGetDeviceProperties(&props[thr_id], device_map[thr_id]);
|
||||||
|
|
||||||
// Speicher für Gewinner-Nonce belegen
|
// Speicher für Gewinner-Nonce belegen
|
||||||
cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
|
cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -121,8 +121,8 @@ __host__ void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
|
|||||||
msgBlock[20] = 0x80;
|
msgBlock[20] = 0x80;
|
||||||
msgBlock[31] = 0x01000000;
|
msgBlock[31] = 0x01000000;
|
||||||
|
|
||||||
// groestl512 braucht hierfür keinen CPU-Code (die einzige Runde wird
|
// groestl512 braucht hierfür keinen CPU-Code (die einzige Runde wird
|
||||||
// auf der GPU ausgeführt)
|
// auf der GPU ausgeführt)
|
||||||
|
|
||||||
// Blockheader setzen (korrekte Nonce und Hefty Hash fehlen da drin noch)
|
// Blockheader setzen (korrekte Nonce und Hefty Hash fehlen da drin noch)
|
||||||
cudaMemcpyToSymbol( groestlcoin_gpu_msg,
|
cudaMemcpyToSymbol( groestlcoin_gpu_msg,
|
||||||
@ -147,7 +147,7 @@ __host__ void groestlcoin_cpu_hash(int thr_id, int threads, uint32_t startNounce
|
|||||||
dim3 grid(factor*((threads + threadsperblock-1)/threadsperblock));
|
dim3 grid(factor*((threads + threadsperblock-1)/threadsperblock));
|
||||||
dim3 block(threadsperblock);
|
dim3 block(threadsperblock);
|
||||||
|
|
||||||
// Größe des dynamischen Shared Memory Bereichs
|
// Größe des dynamischen Shared Memory Bereichs
|
||||||
size_t shared_size = 0;
|
size_t shared_size = 0;
|
||||||
|
|
||||||
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
|
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
|
||||||
|
@ -14,7 +14,7 @@ extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int t
|
|||||||
// diese Struktur wird in der Init Funktion angefordert
|
// diese Struktur wird in der Init Funktion angefordert
|
||||||
static cudaDeviceProp props[8];
|
static cudaDeviceProp props[8];
|
||||||
|
|
||||||
// globaler Speicher für alle HeftyHashes aller Threads
|
// globaler Speicher für alle HeftyHashes aller Threads
|
||||||
__constant__ uint32_t pTarget[8]; // Single GPU
|
__constant__ uint32_t pTarget[8]; // Single GPU
|
||||||
uint32_t *d_outputHashes[8];
|
uint32_t *d_outputHashes[8];
|
||||||
extern uint32_t *d_resultNonce[8];
|
extern uint32_t *d_resultNonce[8];
|
||||||
@ -49,7 +49,7 @@ uint32_t myr_sha256_cpu_w2Table[] = {
|
|||||||
0x69bc7ac4, 0xbd11375b, 0xe3ba71e5, 0x3b209ff2, 0x18feee17, 0xe25ad9e7, 0x13375046, 0x0515089d,
|
0x69bc7ac4, 0xbd11375b, 0xe3ba71e5, 0x3b209ff2, 0x18feee17, 0xe25ad9e7, 0x13375046, 0x0515089d,
|
||||||
0x4f0d0f04, 0x2627484e, 0x310128d2, 0xc668b434, 0x420841cc, 0x62d311b8, 0xe59ba771, 0x85a7a484 };
|
0x4f0d0f04, 0x2627484e, 0x310128d2, 0xc668b434, 0x420841cc, 0x62d311b8, 0xe59ba771, 0x85a7a484 };
|
||||||
|
|
||||||
// 64 Register Variante für Compute 3.0
|
// 64 Register Variante für Compute 3.0
|
||||||
#include "groestl_functions_quad.cu"
|
#include "groestl_functions_quad.cu"
|
||||||
#include "bitslice_transformations_quad.cu"
|
#include "bitslice_transformations_quad.cu"
|
||||||
|
|
||||||
@ -326,10 +326,10 @@ __host__ void myriadgroestl_cpu_init(int thr_id, int threads)
|
|||||||
|
|
||||||
cudaGetDeviceProperties(&props[thr_id], device_map[thr_id]);
|
cudaGetDeviceProperties(&props[thr_id], device_map[thr_id]);
|
||||||
|
|
||||||
// Speicher für Gewinner-Nonce belegen
|
// Speicher für Gewinner-Nonce belegen
|
||||||
cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
|
cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
|
||||||
|
|
||||||
// Speicher für temporäreHashes
|
// Speicher für temporäreHashes
|
||||||
cudaMalloc(&d_outputHashes[thr_id], 16*sizeof(uint32_t)*threads);
|
cudaMalloc(&d_outputHashes[thr_id], 16*sizeof(uint32_t)*threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -346,8 +346,8 @@ __host__ void myriadgroestl_cpu_setBlock(int thr_id, void *data, void *pTargetIn
|
|||||||
msgBlock[20] = 0x80;
|
msgBlock[20] = 0x80;
|
||||||
msgBlock[31] = 0x01000000;
|
msgBlock[31] = 0x01000000;
|
||||||
|
|
||||||
// groestl512 braucht hierfür keinen CPU-Code (die einzige Runde wird
|
// groestl512 braucht hierfür keinen CPU-Code (die einzige Runde wird
|
||||||
// auf der GPU ausgeführt)
|
// auf der GPU ausgeführt)
|
||||||
|
|
||||||
// Blockheader setzen (korrekte Nonce und Hefty Hash fehlen da drin noch)
|
// Blockheader setzen (korrekte Nonce und Hefty Hash fehlen da drin noch)
|
||||||
cudaMemcpyToSymbol( myriadgroestl_gpu_msg,
|
cudaMemcpyToSymbol( myriadgroestl_gpu_msg,
|
||||||
@ -368,7 +368,7 @@ __host__ void myriadgroestl_cpu_hash(int thr_id, int threads, uint32_t startNoun
|
|||||||
// mit den Quad Funktionen brauchen wir jetzt 4 threads pro Hash, daher Faktor 4 bei der Blockzahl
|
// mit den Quad Funktionen brauchen wir jetzt 4 threads pro Hash, daher Faktor 4 bei der Blockzahl
|
||||||
const int factor=4;
|
const int factor=4;
|
||||||
|
|
||||||
// Größe des dynamischen Shared Memory Bereichs
|
// Größe des dynamischen Shared Memory Bereichs
|
||||||
size_t shared_size = 0;
|
size_t shared_size = 0;
|
||||||
|
|
||||||
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
|
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
|
||||||
|
@ -12,7 +12,7 @@ extern "C"
|
|||||||
// aus cpu-miner.c
|
// aus cpu-miner.c
|
||||||
extern int device_map[8];
|
extern int device_map[8];
|
||||||
|
|
||||||
// Speicher für Input/Output der verketteten Hashfunktionen
|
// Speicher für Input/Output der verketteten Hashfunktionen
|
||||||
static uint32_t *d_hash[8];
|
static uint32_t *d_hash[8];
|
||||||
|
|
||||||
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
||||||
|
@ -259,10 +259,10 @@ __global__ void x11_cubehash512_gpu_hash_64(int threads, uint32_t startNounce, u
|
|||||||
uint32_t x[2][2][2][2][2];
|
uint32_t x[2][2][2][2][2];
|
||||||
Init(x);
|
Init(x);
|
||||||
|
|
||||||
// erste Hälfte des Hashes (32 bytes)
|
// erste Hälfte des Hashes (32 bytes)
|
||||||
Update32(x, (const BitSequence*)Hash);
|
Update32(x, (const BitSequence*)Hash);
|
||||||
|
|
||||||
// zweite Hälfte des Hashes (32 bytes)
|
// zweite Hälfte des Hashes (32 bytes)
|
||||||
Update32(x, (const BitSequence*)(Hash+8));
|
Update32(x, (const BitSequence*)(Hash+8));
|
||||||
|
|
||||||
// Padding Block
|
// Padding Block
|
||||||
@ -290,7 +290,7 @@ __host__ void x11_cubehash512_cpu_hash_64(int thr_id, int threads, uint32_t star
|
|||||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||||
dim3 block(threadsperblock);
|
dim3 block(threadsperblock);
|
||||||
|
|
||||||
// Größe des dynamischen Shared Memory Bereichs
|
// Größe des dynamischen Shared Memory Bereichs
|
||||||
size_t shared_size = 0;
|
size_t shared_size = 0;
|
||||||
|
|
||||||
x11_cubehash512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
|
x11_cubehash512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
|
||||||
|
@ -33,7 +33,7 @@ __device__ __forceinline__ void cuda_echo_round(
|
|||||||
uint32_t &k0, uint32_t &k1, uint32_t &k2, uint32_t &k3,
|
uint32_t &k0, uint32_t &k1, uint32_t &k2, uint32_t &k3,
|
||||||
uint32_t *W, int round)
|
uint32_t *W, int round)
|
||||||
{
|
{
|
||||||
// W hat 16*4 als Abmaße
|
// W hat 16*4 als Abmaße
|
||||||
|
|
||||||
// Big Sub Words
|
// Big Sub Words
|
||||||
#pragma unroll 16
|
#pragma unroll 16
|
||||||
@ -76,10 +76,10 @@ __device__ __forceinline__ void cuda_echo_round(
|
|||||||
|
|
||||||
// Mix Columns
|
// Mix Columns
|
||||||
#pragma unroll 4
|
#pragma unroll 4
|
||||||
for(int i=0;i<4;i++) // Schleife über je 2*uint32_t
|
for(int i=0;i<4;i++) // Schleife über je 2*uint32_t
|
||||||
{
|
{
|
||||||
#pragma unroll 4
|
#pragma unroll 4
|
||||||
for(int j=0;j<4;j++) // Schleife über die elemnte
|
for(int j=0;j<4;j++) // Schleife über die elemnte
|
||||||
{
|
{
|
||||||
int idx = j<<2; // j*4
|
int idx = j<<2; // j*4
|
||||||
|
|
||||||
@ -138,7 +138,7 @@ __global__ void x11_echo512_gpu_hash_64(int threads, uint32_t startNounce, uint6
|
|||||||
W[i + 3] = 0;
|
W[i + 3] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// kopiere 32-byte großen hash
|
// kopiere 32-byte großen hash
|
||||||
#pragma unroll 16
|
#pragma unroll 16
|
||||||
for(int i=0;i<16;i++)
|
for(int i=0;i<16;i++)
|
||||||
W[i+32] = Hash[i];
|
W[i+32] = Hash[i];
|
||||||
@ -198,7 +198,7 @@ __host__ void x11_echo512_cpu_hash_64(int thr_id, int threads, uint32_t startNou
|
|||||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||||
dim3 block(threadsperblock);
|
dim3 block(threadsperblock);
|
||||||
|
|
||||||
// Größe des dynamischen Shared Memory Bereichs
|
// Größe des dynamischen Shared Memory Bereichs
|
||||||
size_t shared_size = 0;
|
size_t shared_size = 0;
|
||||||
|
|
||||||
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
|
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
|
||||||
|
@ -368,7 +368,7 @@ __host__ void x11_luffa512_cpu_hash_64(int thr_id, int threads, uint32_t startNo
|
|||||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||||
dim3 block(threadsperblock);
|
dim3 block(threadsperblock);
|
||||||
|
|
||||||
// Größe des dynamischen Shared Memory Bereichs
|
// Größe des dynamischen Shared Memory Bereichs
|
||||||
size_t shared_size = 0;
|
size_t shared_size = 0;
|
||||||
|
|
||||||
x11_luffa512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
|
x11_luffa512_gpu_hash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector);
|
||||||
|
@ -1319,7 +1319,7 @@ __global__ void x11_shavite512_gpu_hash_64(int threads, uint32_t startNounce, ui
|
|||||||
// nachricht laden
|
// nachricht laden
|
||||||
uint32_t msg[32];
|
uint32_t msg[32];
|
||||||
|
|
||||||
// fülle die Nachricht mit 64-byte (vorheriger Hash)
|
// fülle die Nachricht mit 64-byte (vorheriger Hash)
|
||||||
#pragma unroll 16
|
#pragma unroll 16
|
||||||
for(int i=0;i<16;i++)
|
for(int i=0;i<16;i++)
|
||||||
msg[i] = Hash[i];
|
msg[i] = Hash[i];
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
// Parallelisierung:
|
// Parallelisierung:
|
||||||
//
|
//
|
||||||
// FFT_8 wird 2 mal 8-fach parallel ausgeführt (in FFT_64)
|
// FFT_8 wird 2 mal 8-fach parallel ausgeführt (in FFT_64)
|
||||||
// und 1 mal 16-fach parallel (in FFT_128_full)
|
// und 1 mal 16-fach parallel (in FFT_128_full)
|
||||||
//
|
//
|
||||||
// STEP8_IF und STEP8_MAJ beinhalten je zwei 8-fach parallele Operationen
|
// STEP8_IF und STEP8_MAJ beinhalten je zwei 8-fach parallele Operationen
|
||||||
@ -573,7 +573,7 @@ x11_simd512_gpu_expand_64(int threads, uint32_t startNounce, uint64_t *g_hash, u
|
|||||||
for (int i=0; i<2; i++)
|
for (int i=0; i<2; i++)
|
||||||
Hash[i] = inpHash[8*i+(threadIdx.x&7)];
|
Hash[i] = inpHash[8*i+(threadIdx.x&7)];
|
||||||
|
|
||||||
// Puffer für expandierte Nachricht
|
// Puffer für expandierte Nachricht
|
||||||
uint4 *temp4 = &g_temp4[64 * hashPosition];
|
uint4 *temp4 = &g_temp4[64 * hashPosition];
|
||||||
|
|
||||||
Expansion(Hash, temp4);
|
Expansion(Hash, temp4);
|
||||||
@ -630,7 +630,7 @@ __host__ void x11_simd512_cpu_init(int thr_id, int threads)
|
|||||||
cudaMalloc( &d_state[thr_id], 32*sizeof(int)*threads );
|
cudaMalloc( &d_state[thr_id], 32*sizeof(int)*threads );
|
||||||
cudaMalloc( &d_temp4[thr_id], 64*sizeof(uint4)*threads );
|
cudaMalloc( &d_temp4[thr_id], 64*sizeof(uint4)*threads );
|
||||||
|
|
||||||
// Textur für 128 Bit Zugriffe
|
// Textur für 128 Bit Zugriffe
|
||||||
cudaChannelFormatDesc channelDesc128 = cudaCreateChannelDesc<uint4>();
|
cudaChannelFormatDesc channelDesc128 = cudaCreateChannelDesc<uint4>();
|
||||||
texRef1D_128.normalized = 0;
|
texRef1D_128.normalized = 0;
|
||||||
texRef1D_128.filterMode = cudaFilterModePoint;
|
texRef1D_128.filterMode = cudaFilterModePoint;
|
||||||
@ -651,7 +651,7 @@ __host__ void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNou
|
|||||||
{
|
{
|
||||||
const int threadsperblock = TPB;
|
const int threadsperblock = TPB;
|
||||||
|
|
||||||
// Größe des dynamischen Shared Memory Bereichs
|
// Größe des dynamischen Shared Memory Bereichs
|
||||||
size_t shared_size = 0;
|
size_t shared_size = 0;
|
||||||
|
|
||||||
// berechne wie viele Thread Blocks wir brauchen
|
// berechne wie viele Thread Blocks wir brauchen
|
||||||
@ -662,7 +662,7 @@ __host__ void x11_simd512_cpu_hash_64(int thr_id, int threads, uint32_t startNou
|
|||||||
|
|
||||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||||
|
|
||||||
// künstlich die Occupancy limitieren, um das totale Erschöpfen des Texture Cache zu vermeiden
|
// künstlich die Occupancy limitieren, um das totale Erschöpfen des Texture Cache zu vermeiden
|
||||||
x11_simd512_gpu_compress1_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector, d_temp4[thr_id], d_state[thr_id]);
|
x11_simd512_gpu_compress1_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector, d_temp4[thr_id], d_state[thr_id]);
|
||||||
x11_simd512_gpu_compress2_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector, d_temp4[thr_id], d_state[thr_id]);
|
x11_simd512_gpu_compress2_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector, d_temp4[thr_id], d_state[thr_id]);
|
||||||
|
|
||||||
|
@ -24,7 +24,7 @@ extern "C"
|
|||||||
// aus cpu-miner.c
|
// aus cpu-miner.c
|
||||||
extern int device_map[8];
|
extern int device_map[8];
|
||||||
|
|
||||||
// Speicher für Input/Output der verketteten Hashfunktionen
|
// Speicher für Input/Output der verketteten Hashfunktionen
|
||||||
static uint32_t *d_hash[8];
|
static uint32_t *d_hash[8];
|
||||||
|
|
||||||
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
||||||
|
@ -686,7 +686,7 @@ __host__ void x13_fugue512_cpu_hash_64(int thr_id, int threads, uint32_t startNo
|
|||||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||||
dim3 block(threadsperblock);
|
dim3 block(threadsperblock);
|
||||||
|
|
||||||
// Größe des dynamischen Shared Memory Bereichs
|
// Größe des dynamischen Shared Memory Bereichs
|
||||||
size_t shared_size = 4 * 256 * sizeof(uint32_t);
|
size_t shared_size = 4 * 256 * sizeof(uint32_t);
|
||||||
|
|
||||||
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
|
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
|
||||||
|
@ -733,7 +733,7 @@ __host__ void x13_hamsi512_cpu_hash_64(int thr_id, int threads, uint32_t startNo
|
|||||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||||
dim3 block(threadsperblock);
|
dim3 block(threadsperblock);
|
||||||
|
|
||||||
// Größe des dynamischen Shared Memory Bereichs
|
// Größe des dynamischen Shared Memory Bereichs
|
||||||
size_t shared_size = 0;
|
size_t shared_size = 0;
|
||||||
|
|
||||||
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
|
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
|
||||||
|
@ -27,7 +27,7 @@ extern "C"
|
|||||||
// aus cpu-miner.c
|
// aus cpu-miner.c
|
||||||
extern int device_map[8];
|
extern int device_map[8];
|
||||||
|
|
||||||
// Speicher für Input/Output der verketteten Hashfunktionen
|
// Speicher für Input/Output der verketteten Hashfunktionen
|
||||||
static uint32_t *d_hash[8];
|
static uint32_t *d_hash[8];
|
||||||
|
|
||||||
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
extern void quark_blake512_cpu_init(int thr_id, int threads);
|
||||||
|
@ -11,7 +11,7 @@ extern "C"
|
|||||||
extern int device_map[8];
|
extern int device_map[8];
|
||||||
extern bool opt_benchmark;
|
extern bool opt_benchmark;
|
||||||
|
|
||||||
// Speicher für Input/Output der verketteten Hashfunktionen
|
// Speicher für Input/Output der verketteten Hashfunktionen
|
||||||
static uint32_t *d_hash[8];
|
static uint32_t *d_hash[8];
|
||||||
|
|
||||||
extern void x15_whirlpool_cpu_init(int thr_id, int threads, int mode);
|
extern void x15_whirlpool_cpu_init(int thr_id, int threads, int mode);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user