1
0
mirror of https://github.com/GOSTSec/ccminer synced 2025-02-04 19:04:34 +00:00

quark: static shared memory allocation for SM3+

from KlausT committed on 4 Jan, add a few kH/s
This commit is contained in:
Tanguy Pruvot 2015-11-06 15:03:36 +01:00
parent 64e14b7d82
commit d7c2168f2b
2 changed files with 6 additions and 6 deletions

View File

@ -228,7 +228,7 @@ features.
>>> RELEASE HISTORY <<< >>> RELEASE HISTORY <<<
Nov. 05th 2015 v1.7 Nov. 06th 2015 v1.7
Improve old devices compatibility (x11, lyra2, qubit...) Improve old devices compatibility (x11, lyra2, qubit...)
Add windows support for SM 2.1 and drop SM 3.5 (x86) Add windows support for SM 2.1 and drop SM 3.5 (x86)
Improve lyra2 (v1/v2) cuda implementations Improve lyra2 (v1/v2) cuda implementations

View File

@ -79,7 +79,7 @@ void quark_compactTest_gpu_SCAN(uint32_t *data, const int width, uint32_t *parti
uint32_t threads=0, uint32_t startNounce=0, uint32_t *inpHashes=NULL, uint32_t *d_validNonceTable=NULL) uint32_t threads=0, uint32_t startNounce=0, uint32_t *inpHashes=NULL, uint32_t *d_validNonceTable=NULL)
{ {
#if __CUDA_ARCH__ >= 300 #if __CUDA_ARCH__ >= 300
extern __shared__ uint32_t sums[]; __shared__ uint32_t sums[32];
int id = ((blockIdx.x * blockDim.x) + threadIdx.x); int id = ((blockIdx.x * blockDim.x) + threadIdx.x);
//int lane_id = id % warpSize; //int lane_id = id % warpSize;
int lane_id = id % width; int lane_id = id % width;
@ -267,15 +267,15 @@ void quark_compactTest_cpu_singleCompaction(int thr_id, uint32_t threads, uint32
bool callThrid = (thr2 > 0) ? true : false; bool callThrid = (thr2 > 0) ? true : false;
// Erster Initialscan // Erster Initialscan
quark_compactTest_gpu_SCAN<<<thr1,blockSize, 32*sizeof(uint32_t)>>>( quark_compactTest_gpu_SCAN <<<thr1,blockSize>>>(
d_tempBranch1Nonces[thr_id], 32, d_partSum[0][thr_id], function, orgThreads, startNounce, inpHashes, d_validNonceTable); d_tempBranch1Nonces[thr_id], 32, d_partSum[0][thr_id], function, orgThreads, startNounce, inpHashes, d_validNonceTable);
// weitere Scans // weitere Scans
if(callThrid) { if(callThrid) {
quark_compactTest_gpu_SCAN<<<thr2,blockSize, 32*sizeof(uint32_t)>>>(d_partSum[0][thr_id], 32, d_partSum[1][thr_id]); quark_compactTest_gpu_SCAN<<<thr2,blockSize>>>(d_partSum[0][thr_id], 32, d_partSum[1][thr_id]);
quark_compactTest_gpu_SCAN<<<1, thr2, 32*sizeof(uint32_t)>>>(d_partSum[1][thr_id], (thr2>32) ? 32 : thr2); quark_compactTest_gpu_SCAN<<<1, thr2>>>(d_partSum[1][thr_id], (thr2>32) ? 32 : thr2);
} else { } else {
quark_compactTest_gpu_SCAN<<<thr3,blockSize2, 32*sizeof(uint32_t)>>>(d_partSum[0][thr_id], (blockSize2>32) ? 32 : blockSize2); quark_compactTest_gpu_SCAN<<<thr3,blockSize2>>>(d_partSum[0][thr_id], (blockSize2>32) ? 32 : blockSize2);
} }
// Sync + Anzahl merken // Sync + Anzahl merken