mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-22 12:34:17 +00:00
Speed optimization: do MUCH LESS Groestl in Jackpot, throw away 75% of the hashes. More speed ;-)
This commit is contained in:
parent
48996faf0f
commit
be044f31a1
@ -14,69 +14,54 @@ extern int device_map[8];
|
|||||||
static cudaDeviceProp props[8];
|
static cudaDeviceProp props[8];
|
||||||
|
|
||||||
static uint32_t *d_tempBranch1Nonces[8];
|
static uint32_t *d_tempBranch1Nonces[8];
|
||||||
static uint32_t *d_tempBranch2Nonces[8];
|
static uint32_t *d_numValid[8];
|
||||||
static size_t *d_numValid[8];
|
static uint32_t *h_numValid[8];
|
||||||
static size_t *h_numValid[8];
|
|
||||||
|
|
||||||
static uint32_t *d_partSum1[8], *d_partSum2[8]; // 2x partielle summen
|
static uint32_t *d_partSum[2][8]; // für bis zu vier partielle Summen
|
||||||
static uint32_t *d_validTemp1[8], *d_validTemp2[8];
|
|
||||||
|
|
||||||
// Zwischenspeicher
|
|
||||||
static uint32_t *d_tempBranchAllNonces[8];
|
|
||||||
|
|
||||||
// aus heavy.cu
|
// aus heavy.cu
|
||||||
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
|
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
|
||||||
|
|
||||||
|
// True/False tester
|
||||||
|
typedef uint32_t(*cuda_compactTestFunction_t)(uint32_t *inpHash);
|
||||||
|
|
||||||
|
__device__ uint32_t JackpotTrueTest(uint32_t *inpHash)
|
||||||
|
{
|
||||||
|
uint32_t tmp = inpHash[0] & 0x01;
|
||||||
|
return (tmp == 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ uint32_t JackpotFalseTest(uint32_t *inpHash)
|
||||||
|
{
|
||||||
|
uint32_t tmp = inpHash[0] & 0x01;
|
||||||
|
return (tmp == 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ cuda_compactTestFunction_t d_JackpotTrueFunction = JackpotTrueTest, d_JackpotFalseFunction = JackpotFalseTest;
|
||||||
|
cuda_compactTestFunction_t h_JackpotTrueFunction[8], h_JackpotFalseFunction[8];
|
||||||
|
|
||||||
// Setup-Funktionen
|
// Setup-Funktionen
|
||||||
__host__ void jackpot_compactTest_cpu_init(int thr_id, int threads)
|
__host__ void jackpot_compactTest_cpu_init(int thr_id, int threads)
|
||||||
{
|
{
|
||||||
cudaGetDeviceProperties(&props[thr_id], device_map[thr_id]);
|
cudaGetDeviceProperties(&props[thr_id], device_map[thr_id]);
|
||||||
|
|
||||||
|
cudaMemcpyFromSymbol(&h_JackpotTrueFunction[thr_id], d_JackpotTrueFunction, sizeof(cuda_compactTestFunction_t));
|
||||||
|
cudaMemcpyFromSymbol(&h_JackpotFalseFunction[thr_id], d_JackpotFalseFunction, sizeof(cuda_compactTestFunction_t));
|
||||||
|
|
||||||
// wir brauchen auch Speicherplatz auf dem Device
|
// wir brauchen auch Speicherplatz auf dem Device
|
||||||
cudaMalloc(&d_tempBranchAllNonces[thr_id], sizeof(uint32_t) * threads);
|
cudaMalloc(&d_tempBranch1Nonces[thr_id], sizeof(uint32_t) * threads * 2);
|
||||||
cudaMalloc(&d_tempBranch1Nonces[thr_id], sizeof(uint32_t) * threads);
|
cudaMalloc(&d_numValid[thr_id], 2*sizeof(uint32_t));
|
||||||
cudaMalloc(&d_tempBranch2Nonces[thr_id], sizeof(uint32_t) * threads);
|
cudaMallocHost(&h_numValid[thr_id], 2*sizeof(uint32_t));
|
||||||
cudaMalloc(&d_numValid[thr_id], 2*sizeof(size_t));
|
|
||||||
cudaMallocHost(&h_numValid[thr_id], 2*sizeof(size_t));
|
|
||||||
|
|
||||||
uint32_t s1;
|
uint32_t s1;
|
||||||
s1 = threads / 256;
|
s1 = (threads / 256) * 2;
|
||||||
|
|
||||||
cudaMalloc(&d_partSum1[thr_id], sizeof(uint32_t) * s1); // BLOCKSIZE (Threads/Block)
|
cudaMalloc(&d_partSum[0][thr_id], sizeof(uint32_t) * s1); // BLOCKSIZE (Threads/Block)
|
||||||
cudaMalloc(&d_partSum2[thr_id], sizeof(uint32_t) * s1); // BLOCKSIZE (Threads/Block)
|
cudaMalloc(&d_partSum[1][thr_id], sizeof(uint32_t) * s1); // BLOCKSIZE (Threads/Block)
|
||||||
|
|
||||||
cudaMalloc(&d_validTemp1[thr_id], sizeof(uint32_t) * threads); // BLOCKSIZE (Threads/Block)
|
|
||||||
cudaMalloc(&d_validTemp2[thr_id], sizeof(uint32_t) * threads); // BLOCKSIZE (Threads/Block)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Die Testfunktion (zum Erstellen der TestMap)
|
|
||||||
__global__ void jackpot_compactTest_gpu_TEST_64(int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_noncesFull,
|
|
||||||
uint32_t *d_nonces1, uint32_t *d_nonces2,
|
|
||||||
uint32_t *d_validT1, uint32_t *d_validT2)
|
|
||||||
{
|
|
||||||
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
|
||||||
if (thread < threads)
|
|
||||||
{
|
|
||||||
// bestimme den aktuellen Zähler
|
|
||||||
uint32_t nounce = startNounce + thread;
|
|
||||||
uint32_t *inpHash = &inpHashes[16 * thread];
|
|
||||||
|
|
||||||
uint32_t tmp = inpHash[0] & 0x01;
|
|
||||||
uint32_t val1 = (tmp == 1);
|
|
||||||
uint32_t val2 = (tmp == 0);
|
|
||||||
|
|
||||||
d_nonces1[thread] = val1;
|
|
||||||
d_validT1[thread] = val1;
|
|
||||||
d_nonces2[thread] = val2;
|
|
||||||
d_validT2[thread] = val2;
|
|
||||||
d_noncesFull[thread] = nounce;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Die Summenfunktion (vom NVIDIA SDK)
|
// Die Summenfunktion (vom NVIDIA SDK)
|
||||||
__global__ void jackpot_compactTest_gpu_SCAN(uint32_t *data, int width, uint32_t *partial_sums=NULL)
|
__global__ void jackpot_compactTest_gpu_SCAN(uint32_t *data, int width, uint32_t *partial_sums=NULL, cuda_compactTestFunction_t testFunc=NULL, int threads=0, uint32_t startNounce=0, uint32_t *inpHashes=NULL, uint32_t *d_validNonceTable=NULL)
|
||||||
{
|
{
|
||||||
extern __shared__ uint32_t sums[];
|
extern __shared__ uint32_t sums[];
|
||||||
int id = ((blockIdx.x * blockDim.x) + threadIdx.x);
|
int id = ((blockIdx.x * blockDim.x) + threadIdx.x);
|
||||||
@ -86,10 +71,38 @@ __global__ void jackpot_compactTest_gpu_SCAN(uint32_t *data, int width, uint32_t
|
|||||||
//int warp_id = threadIdx.x / warpSize;
|
//int warp_id = threadIdx.x / warpSize;
|
||||||
int warp_id = threadIdx.x / width;
|
int warp_id = threadIdx.x / width;
|
||||||
|
|
||||||
|
sums[lane_id] = 0;
|
||||||
|
|
||||||
// Below is the basic structure of using a shfl instruction
|
// Below is the basic structure of using a shfl instruction
|
||||||
// for a scan.
|
// for a scan.
|
||||||
// Record "value" as a variable - we accumulate it along the way
|
// Record "value" as a variable - we accumulate it along the way
|
||||||
uint32_t value = data[id];
|
uint32_t value;
|
||||||
|
if(testFunc != NULL)
|
||||||
|
{
|
||||||
|
if (id < threads)
|
||||||
|
{
|
||||||
|
uint32_t *inpHash;
|
||||||
|
if(d_validNonceTable == NULL)
|
||||||
|
{
|
||||||
|
// keine Nonce-Liste
|
||||||
|
inpHash = &inpHashes[id<<4];
|
||||||
|
}else
|
||||||
|
{
|
||||||
|
// Nonce-Liste verfügbar
|
||||||
|
int nonce = d_validNonceTable[id] - startNounce;
|
||||||
|
inpHash = &inpHashes[nonce<<4];
|
||||||
|
}
|
||||||
|
value = (*testFunc)(inpHash);
|
||||||
|
}else
|
||||||
|
{
|
||||||
|
value = 0;
|
||||||
|
}
|
||||||
|
}else
|
||||||
|
{
|
||||||
|
value = data[id];
|
||||||
|
}
|
||||||
|
|
||||||
|
__syncthreads();
|
||||||
|
|
||||||
// Now accumulate in log steps up the chain
|
// Now accumulate in log steps up the chain
|
||||||
// compute sums, with another thread's value who is
|
// compute sums, with another thread's value who is
|
||||||
@ -177,39 +190,137 @@ __global__ void jackpot_compactTest_gpu_ADD(uint32_t *data, uint32_t *partial_su
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Der Scatter
|
// Der Scatter
|
||||||
__global__ void jackpot_compactTest_gpu_SCATTER(uint32_t *data, uint32_t *valid, uint32_t *sum, uint32_t *outp)
|
__global__ void jackpot_compactTest_gpu_SCATTER(uint32_t *sum, uint32_t *outp, cuda_compactTestFunction_t testFunc, int threads=0, uint32_t startNounce=0, uint32_t *inpHashes=NULL, uint32_t *d_validNonceTable=NULL)
|
||||||
{
|
{
|
||||||
int id = ((blockIdx.x * blockDim.x) + threadIdx.x);
|
int id = ((blockIdx.x * blockDim.x) + threadIdx.x);
|
||||||
if( valid[id] )
|
uint32_t actNounce = id;
|
||||||
|
uint32_t value;
|
||||||
|
if (id < threads)
|
||||||
|
{
|
||||||
|
// uint32_t nounce = startNounce + id;
|
||||||
|
uint32_t *inpHash;
|
||||||
|
if(d_validNonceTable == NULL)
|
||||||
|
{
|
||||||
|
// keine Nonce-Liste
|
||||||
|
inpHash = &inpHashes[id<<4];
|
||||||
|
}else
|
||||||
|
{
|
||||||
|
// Nonce-Liste verfügbar
|
||||||
|
int nonce = d_validNonceTable[id] - startNounce;
|
||||||
|
actNounce = nonce;
|
||||||
|
inpHash = &inpHashes[nonce<<4];
|
||||||
|
}
|
||||||
|
|
||||||
|
value = (*testFunc)(inpHash);
|
||||||
|
}else
|
||||||
|
{
|
||||||
|
value = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( value )
|
||||||
{
|
{
|
||||||
int idx = sum[id];
|
int idx = sum[id];
|
||||||
if(idx > 0)
|
if(idx > 0)
|
||||||
outp[idx-1] = data[id];
|
outp[idx-1] = startNounce + actNounce;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
////// ACHTUNG: Diese funktion geht aktuell nur mit threads > 65536 (Am besten 256 * 1024 oder 256*2048)
|
__host__ static uint32_t jackpot_compactTest_roundUpExp(uint32_t val)
|
||||||
__host__ void jackpot_compactTest_cpu_dualCompaction(int thr_id, int threads, size_t *nrm,
|
|
||||||
uint32_t *d_nonces1, uint32_t *d_nonces2)
|
|
||||||
{
|
{
|
||||||
|
if(val == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
uint32_t mask = 0x80000000;
|
||||||
|
while( (val & mask) == 0 ) mask = mask >> 1;
|
||||||
|
|
||||||
|
if( (val & (~mask)) != 0 )
|
||||||
|
return mask << 1;
|
||||||
|
|
||||||
|
return mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
__host__ void jackpot_compactTest_cpu_singleCompaction(int thr_id, int threads, uint32_t *nrm,
|
||||||
|
uint32_t *d_nonces1, cuda_compactTestFunction_t function,
|
||||||
|
uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable)
|
||||||
|
{
|
||||||
|
int orgThreads = threads;
|
||||||
|
threads = (int)jackpot_compactTest_roundUpExp((uint32_t)threads);
|
||||||
|
// threadsPerBlock ausrechnen
|
||||||
|
int blockSize = 256;
|
||||||
|
int nSummen = threads / blockSize;
|
||||||
|
|
||||||
|
int thr1 = (threads+blockSize-1) / blockSize;
|
||||||
|
int thr2 = threads / (blockSize*blockSize);
|
||||||
|
int blockSize2 = (nSummen < blockSize) ? nSummen : blockSize;
|
||||||
|
int thr3 = (nSummen + blockSize2-1) / blockSize2;
|
||||||
|
|
||||||
|
bool callThrid = (thr2 > 0) ? true : false;
|
||||||
|
|
||||||
|
// Erster Initialscan
|
||||||
|
jackpot_compactTest_gpu_SCAN<<<thr1,blockSize, 32*sizeof(uint32_t)>>>(
|
||||||
|
d_tempBranch1Nonces[thr_id], 32, d_partSum[0][thr_id], function, orgThreads, startNounce, inpHashes, d_validNonceTable);
|
||||||
|
|
||||||
|
// weitere Scans
|
||||||
|
if(callThrid)
|
||||||
|
{
|
||||||
|
jackpot_compactTest_gpu_SCAN<<<thr2,blockSize, 32*sizeof(uint32_t)>>>(d_partSum[0][thr_id], 32, d_partSum[1][thr_id]);
|
||||||
|
jackpot_compactTest_gpu_SCAN<<<1, thr2, 32*sizeof(uint32_t)>>>(d_partSum[1][thr_id], (thr2>32) ? 32 : thr2);
|
||||||
|
}else
|
||||||
|
{
|
||||||
|
jackpot_compactTest_gpu_SCAN<<<thr3,blockSize2, 32*sizeof(uint32_t)>>>(d_partSum[0][thr_id], (blockSize2>32) ? 32 : blockSize2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sync + Anzahl merken
|
||||||
|
cudaStreamSynchronize(NULL);
|
||||||
|
|
||||||
|
if(callThrid)
|
||||||
|
cudaMemcpy(nrm, &(d_partSum[1][thr_id])[thr2-1], sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
||||||
|
else
|
||||||
|
cudaMemcpy(nrm, &(d_partSum[0][thr_id])[nSummen-1], sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
||||||
|
|
||||||
|
|
||||||
|
// Addieren
|
||||||
|
if(callThrid)
|
||||||
|
{
|
||||||
|
jackpot_compactTest_gpu_ADD<<<thr2-1, blockSize>>>(d_partSum[0][thr_id]+blockSize, d_partSum[1][thr_id], blockSize*thr2);
|
||||||
|
}
|
||||||
|
jackpot_compactTest_gpu_ADD<<<thr1-1, blockSize>>>(d_tempBranch1Nonces[thr_id]+blockSize, d_partSum[0][thr_id], threads);
|
||||||
|
|
||||||
|
// Scatter
|
||||||
|
jackpot_compactTest_gpu_SCATTER<<<thr1,blockSize,0>>>(d_tempBranch1Nonces[thr_id], d_nonces1,
|
||||||
|
function, orgThreads, startNounce, inpHashes, d_validNonceTable);
|
||||||
|
|
||||||
|
// Sync
|
||||||
|
cudaStreamSynchronize(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
////// ACHTUNG: Diese funktion geht aktuell nur mit threads > 65536 (Am besten 256 * 1024 oder 256*2048)
|
||||||
|
__host__ void jackpot_compactTest_cpu_dualCompaction(int thr_id, int threads, uint32_t *nrm,
|
||||||
|
uint32_t *d_nonces1, uint32_t *d_nonces2,
|
||||||
|
uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable)
|
||||||
|
{
|
||||||
|
jackpot_compactTest_cpu_singleCompaction(thr_id, threads, &nrm[0], d_nonces1, h_JackpotTrueFunction[thr_id], startNounce, inpHashes, d_validNonceTable);
|
||||||
|
jackpot_compactTest_cpu_singleCompaction(thr_id, threads, &nrm[1], d_nonces2, h_JackpotFalseFunction[thr_id], startNounce, inpHashes, d_validNonceTable);
|
||||||
|
|
||||||
|
/*
|
||||||
// threadsPerBlock ausrechnen
|
// threadsPerBlock ausrechnen
|
||||||
int blockSize = 256;
|
int blockSize = 256;
|
||||||
int thr1 = threads / blockSize;
|
int thr1 = threads / blockSize;
|
||||||
int thr2 = threads / (blockSize*blockSize);
|
int thr2 = threads / (blockSize*blockSize);
|
||||||
|
|
||||||
// 1
|
// 1
|
||||||
jackpot_compactTest_gpu_SCAN<<<thr1,blockSize, 8*sizeof(uint32_t)>>>(d_tempBranch1Nonces[thr_id], 32, d_partSum1[thr_id]);
|
jackpot_compactTest_gpu_SCAN<<<thr1,blockSize, 32*sizeof(uint32_t)>>>(d_tempBranch1Nonces[thr_id], 32, d_partSum1[thr_id], h_JackpotTrueFunction[thr_id], threads, startNounce, inpHashes);
|
||||||
jackpot_compactTest_gpu_SCAN<<<thr2,blockSize, 8*sizeof(uint32_t)>>>(d_partSum1[thr_id], 32, d_partSum2[thr_id]);
|
jackpot_compactTest_gpu_SCAN<<<thr2,blockSize, 32*sizeof(uint32_t)>>>(d_partSum1[thr_id], 32, d_partSum2[thr_id]);
|
||||||
jackpot_compactTest_gpu_SCAN<<<1, thr2, 8*sizeof(uint32_t)>>>(d_partSum2[thr_id], (thr2>32) ? 32 : thr2);
|
jackpot_compactTest_gpu_SCAN<<<1, thr2, 32*sizeof(uint32_t)>>>(d_partSum2[thr_id], (thr2>32) ? 32 : thr2);
|
||||||
cudaStreamSynchronize(NULL);
|
cudaStreamSynchronize(NULL);
|
||||||
cudaMemcpy(&nrm[0], &(d_partSum2[thr_id])[thr2-1], sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
cudaMemcpy(&nrm[0], &(d_partSum2[thr_id])[thr2-1], sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
||||||
jackpot_compactTest_gpu_ADD<<<thr2-1, blockSize>>>(d_partSum1[thr_id]+blockSize, d_partSum2[thr_id], blockSize*thr2);
|
jackpot_compactTest_gpu_ADD<<<thr2-1, blockSize>>>(d_partSum1[thr_id]+blockSize, d_partSum2[thr_id], blockSize*thr2);
|
||||||
jackpot_compactTest_gpu_ADD<<<thr1-1, blockSize>>>(d_tempBranch1Nonces[thr_id]+blockSize, d_partSum1[thr_id], threads);
|
jackpot_compactTest_gpu_ADD<<<thr1-1, blockSize>>>(d_tempBranch1Nonces[thr_id]+blockSize, d_partSum1[thr_id], threads);
|
||||||
|
|
||||||
// 2
|
// 2
|
||||||
jackpot_compactTest_gpu_SCAN<<<thr1,blockSize, 8*sizeof(uint32_t)>>>(d_tempBranch2Nonces[thr_id], 32, d_partSum1[thr_id]);
|
jackpot_compactTest_gpu_SCAN<<<thr1,blockSize, 32*sizeof(uint32_t)>>>(d_tempBranch2Nonces[thr_id], 32, d_partSum1[thr_id], h_JackpotFalseFunction[thr_id], threads, startNounce, inpHashes);
|
||||||
jackpot_compactTest_gpu_SCAN<<<thr2,blockSize, 8*sizeof(uint32_t)>>>(d_partSum1[thr_id], 32, d_partSum2[thr_id]);
|
jackpot_compactTest_gpu_SCAN<<<thr2,blockSize, 32*sizeof(uint32_t)>>>(d_partSum1[thr_id], 32, d_partSum2[thr_id]);
|
||||||
jackpot_compactTest_gpu_SCAN<<<1, thr2, 8*sizeof(uint32_t)>>>(d_partSum2[thr_id], (thr2>32) ? 32 : thr2);
|
jackpot_compactTest_gpu_SCAN<<<1, thr2, 32*sizeof(uint32_t)>>>(d_partSum2[thr_id], (thr2>32) ? 32 : thr2);
|
||||||
cudaStreamSynchronize(NULL);
|
cudaStreamSynchronize(NULL);
|
||||||
cudaMemcpy(&nrm[1], &(d_partSum2[thr_id])[thr2-1], sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
cudaMemcpy(&nrm[1], &(d_partSum2[thr_id])[thr2-1], sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
||||||
jackpot_compactTest_gpu_ADD<<<thr2-1, blockSize>>>(d_partSum1[thr_id]+blockSize, d_partSum2[thr_id], blockSize*thr2);
|
jackpot_compactTest_gpu_ADD<<<thr2-1, blockSize>>>(d_partSum1[thr_id]+blockSize, d_partSum2[thr_id], blockSize*thr2);
|
||||||
@ -217,39 +328,25 @@ __host__ void jackpot_compactTest_cpu_dualCompaction(int thr_id, int threads, si
|
|||||||
|
|
||||||
// Hier ist noch eine Besonderheit: in d_tempBranch1Nonces sind die element von 1...nrm1 die Interessanten
|
// Hier ist noch eine Besonderheit: in d_tempBranch1Nonces sind die element von 1...nrm1 die Interessanten
|
||||||
// Schritt 3: Scatter
|
// Schritt 3: Scatter
|
||||||
jackpot_compactTest_gpu_SCATTER<<<thr1,blockSize,0>>>(d_tempBranchAllNonces[thr_id], d_validTemp1[thr_id], d_tempBranch1Nonces[thr_id], d_nonces1);
|
jackpot_compactTest_gpu_SCATTER<<<thr1,blockSize,0>>>(d_tempBranch1Nonces[thr_id], d_nonces1, h_JackpotTrueFunction[thr_id], threads, startNounce, inpHashes);
|
||||||
jackpot_compactTest_gpu_SCATTER<<<thr1,blockSize,0>>>(d_tempBranchAllNonces[thr_id], d_validTemp2[thr_id], d_tempBranch2Nonces[thr_id], d_nonces2);
|
jackpot_compactTest_gpu_SCATTER<<<thr1,blockSize,0>>>(d_tempBranch2Nonces[thr_id], d_nonces2, h_JackpotFalseFunction[thr_id], threads, startNounce, inpHashes);
|
||||||
cudaStreamSynchronize(NULL);
|
cudaStreamSynchronize(NULL);
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ void jackpot_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
|
__host__ void jackpot_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
|
||||||
uint32_t *d_nonces1, size_t *nrm1,
|
uint32_t *d_nonces1, size_t *nrm1,
|
||||||
uint32_t *d_nonces2, size_t *nrm2,
|
uint32_t *d_nonces2, size_t *nrm2,
|
||||||
int order)
|
int order)
|
||||||
{
|
{
|
||||||
// Compute 3.x und 5.x Geräte am besten mit 768 Threads ansteuern,
|
// Wenn validNonceTable genutzt wird, dann werden auch nur die Nonces betrachtet, die dort enthalten sind
|
||||||
// alle anderen mit 512 Threads.
|
// "threads" ist in diesem Fall auf die Länge dieses Array's zu setzen!
|
||||||
//int threadsperblock = (props[thr_id].major >= 3) ? 768 : 512;
|
|
||||||
int threadsperblock = 256;
|
|
||||||
|
|
||||||
// berechne wie viele Thread Blocks wir brauchen
|
|
||||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
|
||||||
dim3 block(threadsperblock);
|
|
||||||
|
|
||||||
size_t shared_size = 0;
|
|
||||||
|
|
||||||
// fprintf(stderr, "threads=%d, %d blocks, %d threads per block, %d bytes shared\n", threads, grid.x, block.x, shared_size);
|
|
||||||
|
|
||||||
// Schritt 1: Prüfen der Bedingung und Speicherung in d_tempBranch1/2Nonces
|
|
||||||
jackpot_compactTest_gpu_TEST_64<<<grid, block, shared_size>>>(threads, startNounce, inpHashes, d_tempBranchAllNonces[thr_id],
|
|
||||||
d_tempBranch1Nonces[thr_id], d_tempBranch2Nonces[thr_id],
|
|
||||||
d_validTemp1[thr_id], d_validTemp2[thr_id]);
|
|
||||||
|
|
||||||
// Strategisches Sleep Kommando zur Senkung der CPU Last
|
|
||||||
jackpot_compactTest_cpu_dualCompaction(thr_id, threads,
|
jackpot_compactTest_cpu_dualCompaction(thr_id, threads,
|
||||||
h_numValid[thr_id], d_nonces1, d_nonces2);
|
h_numValid[thr_id], d_nonces1, d_nonces2,
|
||||||
|
startNounce, inpHashes, d_validNonceTable);
|
||||||
|
|
||||||
cudaStreamSynchronize(NULL); // Das original braucht zwar etwas CPU-Last, ist an dieser Stelle aber evtl besser
|
cudaStreamSynchronize(NULL); // Das original braucht zwar etwas CPU-Last, ist an dieser Stelle aber evtl besser
|
||||||
*nrm1 = h_numValid[thr_id][0];
|
*nrm1 = (size_t)h_numValid[thr_id][0];
|
||||||
*nrm2 = h_numValid[thr_id][1];
|
*nrm2 = (size_t)h_numValid[thr_id][1];
|
||||||
}
|
}
|
||||||
|
@ -13,7 +13,6 @@ extern "C"
|
|||||||
|
|
||||||
// aus cpu-miner.c
|
// aus cpu-miner.c
|
||||||
extern int device_map[8];
|
extern int device_map[8];
|
||||||
extern bool opt_benchmark;
|
|
||||||
|
|
||||||
// Speicher für Input/Output der verketteten Hashfunktionen
|
// Speicher für Input/Output der verketteten Hashfunktionen
|
||||||
static uint32_t *d_hash[8];
|
static uint32_t *d_hash[8];
|
||||||
@ -39,7 +38,7 @@ extern void quark_check_cpu_setTarget(const void *ptarget);
|
|||||||
extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
|
extern uint32_t quark_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
|
||||||
|
|
||||||
extern void jackpot_compactTest_cpu_init(int thr_id, int threads);
|
extern void jackpot_compactTest_cpu_init(int thr_id, int threads);
|
||||||
extern void jackpot_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes,
|
extern void jackpot_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
|
||||||
uint32_t *d_nonces1, size_t *nrm1,
|
uint32_t *d_nonces1, size_t *nrm1,
|
||||||
uint32_t *d_nonces2, size_t *nrm2,
|
uint32_t *d_nonces2, size_t *nrm2,
|
||||||
int order);
|
int order);
|
||||||
@ -48,6 +47,7 @@ extern void jackpot_compactTest_cpu_hash_64(int thr_id, int threads, uint32_t st
|
|||||||
static uint32_t *d_jackpotNonces[8];
|
static uint32_t *d_jackpotNonces[8];
|
||||||
static uint32_t *d_branch1Nonces[8];
|
static uint32_t *d_branch1Nonces[8];
|
||||||
static uint32_t *d_branch2Nonces[8];
|
static uint32_t *d_branch2Nonces[8];
|
||||||
|
static uint32_t *d_branch3Nonces[8];
|
||||||
|
|
||||||
// Original jackpothash Funktion aus einem miner Quelltext
|
// Original jackpothash Funktion aus einem miner Quelltext
|
||||||
inline unsigned int jackpothash(void *state, const void *input)
|
inline unsigned int jackpothash(void *state, const void *input)
|
||||||
@ -93,6 +93,8 @@ inline unsigned int jackpothash(void *state, const void *input)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
extern bool opt_benchmark;
|
||||||
|
|
||||||
extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
|
extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
|
||||||
const uint32_t *ptarget, uint32_t max_nonce,
|
const uint32_t *ptarget, uint32_t max_nonce,
|
||||||
unsigned long *hashes_done)
|
unsigned long *hashes_done)
|
||||||
@ -105,7 +107,8 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
|
|||||||
|
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
|
||||||
const int throughput = 256*4096; // 100;
|
const int throughput = 256*4096*4; // 100;
|
||||||
|
//const int throughput = 256*256*2+100; // 100;
|
||||||
|
|
||||||
static bool init[8] = {0,0,0,0,0,0,0,0};
|
static bool init[8] = {0,0,0,0,0,0,0,0};
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
@ -121,9 +124,10 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
|
|||||||
quark_jh512_cpu_init(thr_id, throughput);
|
quark_jh512_cpu_init(thr_id, throughput);
|
||||||
quark_skein512_cpu_init(thr_id, throughput);
|
quark_skein512_cpu_init(thr_id, throughput);
|
||||||
quark_check_cpu_init(thr_id, throughput);
|
quark_check_cpu_init(thr_id, throughput);
|
||||||
cudaMalloc(&d_jackpotNonces[thr_id], sizeof(uint32_t)*throughput);
|
cudaMalloc(&d_jackpotNonces[thr_id], sizeof(uint32_t)*throughput*2);
|
||||||
cudaMalloc(&d_branch1Nonces[thr_id], sizeof(uint32_t)*throughput);
|
cudaMalloc(&d_branch1Nonces[thr_id], sizeof(uint32_t)*throughput*2);
|
||||||
cudaMalloc(&d_branch2Nonces[thr_id], sizeof(uint32_t)*throughput);
|
cudaMalloc(&d_branch2Nonces[thr_id], sizeof(uint32_t)*throughput*2);
|
||||||
|
cudaMalloc(&d_branch3Nonces[thr_id], sizeof(uint32_t)*throughput*2);
|
||||||
init[thr_id] = true;
|
init[thr_id] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -140,35 +144,77 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
|
|||||||
// erstes Keccak512 Hash mit CUDA
|
// erstes Keccak512 Hash mit CUDA
|
||||||
jackpot_keccak512_cpu_hash(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
jackpot_keccak512_cpu_hash(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
|
|
||||||
for (int round=0; round < 3; round++)
|
size_t nrm1, nrm2, nrm3;
|
||||||
{
|
|
||||||
size_t nrm1, nrm2;
|
|
||||||
|
|
||||||
// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
|
// Runde 1 (ohne Gröstl)
|
||||||
jackpot_compactTest_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id],
|
|
||||||
|
jackpot_compactTest_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id], NULL,
|
||||||
d_branch1Nonces[thr_id], &nrm1,
|
d_branch1Nonces[thr_id], &nrm1,
|
||||||
d_branch2Nonces[thr_id], &nrm2,
|
d_branch3Nonces[thr_id], &nrm3,
|
||||||
order++);
|
order++);
|
||||||
|
|
||||||
if (nrm1+nrm2 == throughput) {
|
// verfolge den skein-pfad weiter
|
||||||
quark_groestl512_cpu_hash_64(thr_id, nrm1, pdata[19], d_branch1Nonces[thr_id], d_hash[thr_id], order++);
|
quark_skein512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
|
||||||
quark_skein512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);
|
|
||||||
}
|
|
||||||
|
|
||||||
// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
|
// noch schnell Blake & JH
|
||||||
jackpot_compactTest_cpu_hash_64(thr_id, throughput, pdata[19], d_hash[thr_id],
|
jackpot_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
|
||||||
d_branch1Nonces[thr_id], &nrm1,
|
d_branch1Nonces[thr_id], &nrm1,
|
||||||
d_branch2Nonces[thr_id], &nrm2,
|
d_branch2Nonces[thr_id], &nrm2,
|
||||||
order++);
|
order++);
|
||||||
|
|
||||||
if (nrm1+nrm2 == throughput) {
|
if (nrm1+nrm2 == nrm3) {
|
||||||
quark_blake512_cpu_hash_64(thr_id, nrm1, pdata[19], d_branch1Nonces[thr_id], d_hash[thr_id], order++);
|
quark_blake512_cpu_hash_64(thr_id, nrm1, pdata[19], d_branch1Nonces[thr_id], d_hash[thr_id], order++);
|
||||||
quark_jh512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);
|
quark_jh512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Runde 2 (ohne Gröstl)
|
||||||
|
|
||||||
|
// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
|
||||||
|
jackpot_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
|
||||||
|
d_branch1Nonces[thr_id], &nrm1,
|
||||||
|
d_branch3Nonces[thr_id], &nrm3,
|
||||||
|
order++);
|
||||||
|
|
||||||
|
// verfolge den skein-pfad weiter
|
||||||
|
quark_skein512_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
|
||||||
|
|
||||||
|
// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
|
||||||
|
jackpot_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
|
||||||
|
d_branch1Nonces[thr_id], &nrm1,
|
||||||
|
d_branch2Nonces[thr_id], &nrm2,
|
||||||
|
order++);
|
||||||
|
|
||||||
|
if (nrm1+nrm2 == nrm3) {
|
||||||
|
quark_blake512_cpu_hash_64(thr_id, nrm1, pdata[19], d_branch1Nonces[thr_id], d_hash[thr_id], order++);
|
||||||
|
quark_jh512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Runde 3 (komplett)
|
||||||
|
|
||||||
|
// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
|
||||||
|
jackpot_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
|
||||||
|
d_branch1Nonces[thr_id], &nrm1,
|
||||||
|
d_branch2Nonces[thr_id], &nrm2,
|
||||||
|
order++);
|
||||||
|
|
||||||
|
if (nrm1+nrm2 == nrm3) {
|
||||||
|
quark_groestl512_cpu_hash_64(thr_id, nrm1, pdata[19], d_branch1Nonces[thr_id], d_hash[thr_id], order++);
|
||||||
|
quark_skein512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);
|
||||||
|
}
|
||||||
|
|
||||||
|
// jackpotNonces in branch1/2 aufsplitten gemäss if (hash[0] & 0x01)
|
||||||
|
jackpot_compactTest_cpu_hash_64(thr_id, nrm3, pdata[19], d_hash[thr_id], d_branch3Nonces[thr_id],
|
||||||
|
d_branch1Nonces[thr_id], &nrm1,
|
||||||
|
d_branch2Nonces[thr_id], &nrm2,
|
||||||
|
order++);
|
||||||
|
|
||||||
|
if (nrm1+nrm2 == nrm3) {
|
||||||
|
quark_blake512_cpu_hash_64(thr_id, nrm1, pdata[19], d_branch1Nonces[thr_id], d_hash[thr_id], order++);
|
||||||
|
quark_jh512_cpu_hash_64(thr_id, nrm2, pdata[19], d_branch2Nonces[thr_id], d_hash[thr_id], order++);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scan nach Gewinner Hashes auf der GPU
|
// Scan nach Gewinner Hashes auf der GPU
|
||||||
uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
uint32_t foundNonce = quark_check_cpu_hash_64(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
|
||||||
if (foundNonce != 0xffffffff)
|
if (foundNonce != 0xffffffff)
|
||||||
{
|
{
|
||||||
uint32_t vhash64[8];
|
uint32_t vhash64[8];
|
||||||
@ -180,7 +226,8 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
|
|||||||
if ((vhash64[7]<=Htarg) && fulltest(vhash64, ptarget)) {
|
if ((vhash64[7]<=Htarg) && fulltest(vhash64, ptarget)) {
|
||||||
|
|
||||||
pdata[19] = foundNonce;
|
pdata[19] = foundNonce;
|
||||||
*hashes_done = (foundNonce - first_nonce + 1);
|
*hashes_done = (foundNonce - first_nonce + 1)/4;
|
||||||
|
//applog(LOG_INFO, "GPU #%d: result for nonce $%08X does validate on CPU (%d rounds)!", thr_id, foundNonce, rounds);
|
||||||
return 1;
|
return 1;
|
||||||
} else {
|
} else {
|
||||||
applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU (%d rounds)!", thr_id, foundNonce, rounds);
|
applog(LOG_INFO, "GPU #%d: result for nonce $%08X does not validate on CPU (%d rounds)!", thr_id, foundNonce, rounds);
|
||||||
@ -191,6 +238,6 @@ extern "C" int scanhash_jackpot(int thr_id, uint32_t *pdata,
|
|||||||
|
|
||||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = (pdata[19] - first_nonce + 1);
|
*hashes_done = (pdata[19] - first_nonce + 1)/4;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -40,6 +40,7 @@ ccminer_CPPFLAGS = -msse2 @LIBCURL_CPPFLAGS@ @OPENMP_CFLAGS@ $(PTHREAD_FLAGS) -f
|
|||||||
.cu.o:
|
.cu.o:
|
||||||
$(NVCC) @CFLAGS@ -I . -Xptxas "-abi=no -v" -gencode=arch=compute_20,code=\"sm_20,compute_20\" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
|
$(NVCC) @CFLAGS@ -I . -Xptxas "-abi=no -v" -gencode=arch=compute_20,code=\"sm_20,compute_20\" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
|
||||||
|
|
||||||
|
# ABI requiring code modules
|
||||||
# this module doesn't compile with Compute 2.0 unfortunately
|
# this module doesn't compile with Compute 2.0 unfortunately
|
||||||
JHA/cuda_jha_compactionTest.o: JHA/cuda_jha_compactionTest.cu
|
JHA/cuda_jackpot_compactionTest.o: JHA/cuda_jackpot_compactionTest.cu
|
||||||
$(NVCC) @CFLAGS@ -I . -Xptxas "-abi=no -v" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
|
$(NVCC) -I . -I cudpp-2.1/include @CFLAGS@ -Xptxas "-abi=yes -v" -gencode=arch=compute_30,code=\"sm_30,compute_30\" -gencode=arch=compute_35,code=\"sm_35,compute_35\" --maxrregcount=80 --ptxas-options=-v $(JANSSON_INCLUDES) -o $@ -c $<
|
||||||
|
130
ccminer.vcxproj
130
ccminer.vcxproj
@ -103,7 +103,6 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
|
|||||||
<CodeGeneration>compute_35,sm_35</CodeGeneration>
|
<CodeGeneration>compute_35,sm_35</CodeGeneration>
|
||||||
<Include>
|
<Include>
|
||||||
</Include>
|
</Include>
|
||||||
<AdditionalOptions>-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
|
||||||
</CudaCompile>
|
</CudaCompile>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||||
@ -135,7 +134,6 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
|
|||||||
<CodeGeneration>compute_35,sm_35</CodeGeneration>
|
<CodeGeneration>compute_35,sm_35</CodeGeneration>
|
||||||
<Include>
|
<Include>
|
||||||
</Include>
|
</Include>
|
||||||
<AdditionalOptions>-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
|
||||||
</CudaCompile>
|
</CudaCompile>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||||
@ -171,7 +169,6 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
|
|||||||
<CodeGeneration>compute_35,sm_35</CodeGeneration>
|
<CodeGeneration>compute_35,sm_35</CodeGeneration>
|
||||||
<Include>
|
<Include>
|
||||||
</Include>
|
</Include>
|
||||||
<AdditionalOptions>-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
|
||||||
</CudaCompile>
|
</CudaCompile>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||||
@ -207,7 +204,6 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
|
|||||||
<CodeGeneration>compute_35,sm_35</CodeGeneration>
|
<CodeGeneration>compute_35,sm_35</CodeGeneration>
|
||||||
<Include>
|
<Include>
|
||||||
</Include>
|
</Include>
|
||||||
<AdditionalOptions>-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
|
||||||
</CudaCompile>
|
</CudaCompile>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
@ -291,24 +287,114 @@ copy "$(CudaToolkitBinDir)\cudart*.dll" "$(OutDir)"</Command>
|
|||||||
<ClInclude Include="uint256.h" />
|
<ClInclude Include="uint256.h" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CudaCompile Include="cuda_fugue256.cu" />
|
<CudaCompile Include="cuda_fugue256.cu">
|
||||||
<CudaCompile Include="cuda_groestlcoin.cu" />
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<CudaCompile Include="cuda_myriadgroestl.cu" />
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<CudaCompile Include="heavy\cuda_blake512.cu" />
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<CudaCompile Include="heavy\cuda_combine.cu" />
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<CudaCompile Include="heavy\cuda_groestl512.cu" />
|
</CudaCompile>
|
||||||
<CudaCompile Include="heavy\cuda_hefty1.cu" />
|
<CudaCompile Include="cuda_groestlcoin.cu">
|
||||||
<CudaCompile Include="heavy\cuda_keccak512.cu" />
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<CudaCompile Include="heavy\cuda_sha256.cu" />
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<CudaCompile Include="heavy\heavy.cu" />
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<CudaCompile Include="JHA\cuda_jha_compactionTest.cu" />
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<CudaCompile Include="JHA\cuda_jha_keccak512.cu" />
|
</CudaCompile>
|
||||||
<CudaCompile Include="JHA\jackpotcoin.cu" />
|
<CudaCompile Include="cuda_myriadgroestl.cu">
|
||||||
<CudaCompile Include="quark\cuda_jh512.cu" />
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<CudaCompile Include="quark\cuda_quark_blake512.cu" />
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<CudaCompile Include="quark\cuda_quark_checkhash.cu" />
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<CudaCompile Include="quark\cuda_quark_groestl512.cu" />
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
<CudaCompile Include="quark\cuda_skein512.cu" />
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="heavy\cuda_blake512.cu">
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="heavy\cuda_combine.cu">
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="heavy\cuda_groestl512.cu">
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="heavy\cuda_hefty1.cu">
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="heavy\cuda_keccak512.cu">
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="heavy\cuda_sha256.cu">
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="heavy\heavy.cu">
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="JHA\cuda_jha_compactionTest.cu">
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=yes -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=yes -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=yes -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=yes -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="JHA\cuda_jha_keccak512.cu">
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="JHA\jackpotcoin.cu">
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="quark\cuda_jh512.cu">
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="quark\cuda_quark_blake512.cu">
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="quark\cuda_quark_checkhash.cu">
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="quark\cuda_quark_groestl512.cu">
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
</CudaCompile>
|
||||||
|
<CudaCompile Include="quark\cuda_skein512.cu">
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Release|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
<AdditionalOptions Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">-Xptxas "-abi=no -v" %(AdditionalOptions)</AdditionalOptions>
|
||||||
|
</CudaCompile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
|
Loading…
x
Reference in New Issue
Block a user