mirror of
https://github.com/GOSTSec/ccminer
synced 2025-02-04 19:04:34 +00:00
quark: static shared memory allocation for SM3+
from KlausT committed on 4 Jan, add a few kH/s
This commit is contained in:
parent
64e14b7d82
commit
d7c2168f2b
@ -228,7 +228,7 @@ features.
|
|||||||
|
|
||||||
>>> RELEASE HISTORY <<<
|
>>> RELEASE HISTORY <<<
|
||||||
|
|
||||||
Nov. 05th 2015 v1.7
|
Nov. 06th 2015 v1.7
|
||||||
Improve old devices compatibility (x11, lyra2, qubit...)
|
Improve old devices compatibility (x11, lyra2, qubit...)
|
||||||
Add windows support for SM 2.1 and drop SM 3.5 (x86)
|
Add windows support for SM 2.1 and drop SM 3.5 (x86)
|
||||||
Improve lyra2 (v1/v2) cuda implementations
|
Improve lyra2 (v1/v2) cuda implementations
|
||||||
|
@ -79,7 +79,7 @@ void quark_compactTest_gpu_SCAN(uint32_t *data, const int width, uint32_t *parti
|
|||||||
uint32_t threads=0, uint32_t startNounce=0, uint32_t *inpHashes=NULL, uint32_t *d_validNonceTable=NULL)
|
uint32_t threads=0, uint32_t startNounce=0, uint32_t *inpHashes=NULL, uint32_t *d_validNonceTable=NULL)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 300
|
#if __CUDA_ARCH__ >= 300
|
||||||
extern __shared__ uint32_t sums[];
|
__shared__ uint32_t sums[32];
|
||||||
int id = ((blockIdx.x * blockDim.x) + threadIdx.x);
|
int id = ((blockIdx.x * blockDim.x) + threadIdx.x);
|
||||||
//int lane_id = id % warpSize;
|
//int lane_id = id % warpSize;
|
||||||
int lane_id = id % width;
|
int lane_id = id % width;
|
||||||
@ -267,15 +267,15 @@ void quark_compactTest_cpu_singleCompaction(int thr_id, uint32_t threads, uint32
|
|||||||
bool callThrid = (thr2 > 0) ? true : false;
|
bool callThrid = (thr2 > 0) ? true : false;
|
||||||
|
|
||||||
// Erster Initialscan
|
// Erster Initialscan
|
||||||
quark_compactTest_gpu_SCAN<<<thr1,blockSize, 32*sizeof(uint32_t)>>>(
|
quark_compactTest_gpu_SCAN <<<thr1,blockSize>>>(
|
||||||
d_tempBranch1Nonces[thr_id], 32, d_partSum[0][thr_id], function, orgThreads, startNounce, inpHashes, d_validNonceTable);
|
d_tempBranch1Nonces[thr_id], 32, d_partSum[0][thr_id], function, orgThreads, startNounce, inpHashes, d_validNonceTable);
|
||||||
|
|
||||||
// weitere Scans
|
// weitere Scans
|
||||||
if(callThrid) {
|
if(callThrid) {
|
||||||
quark_compactTest_gpu_SCAN<<<thr2,blockSize, 32*sizeof(uint32_t)>>>(d_partSum[0][thr_id], 32, d_partSum[1][thr_id]);
|
quark_compactTest_gpu_SCAN<<<thr2,blockSize>>>(d_partSum[0][thr_id], 32, d_partSum[1][thr_id]);
|
||||||
quark_compactTest_gpu_SCAN<<<1, thr2, 32*sizeof(uint32_t)>>>(d_partSum[1][thr_id], (thr2>32) ? 32 : thr2);
|
quark_compactTest_gpu_SCAN<<<1, thr2>>>(d_partSum[1][thr_id], (thr2>32) ? 32 : thr2);
|
||||||
} else {
|
} else {
|
||||||
quark_compactTest_gpu_SCAN<<<thr3,blockSize2, 32*sizeof(uint32_t)>>>(d_partSum[0][thr_id], (blockSize2>32) ? 32 : blockSize2);
|
quark_compactTest_gpu_SCAN<<<thr3,blockSize2>>>(d_partSum[0][thr_id], (blockSize2>32) ? 32 : blockSize2);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sync + Anzahl merken
|
// Sync + Anzahl merken
|
||||||
|
Loading…
x
Reference in New Issue
Block a user