mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-22 12:34:17 +00:00
xmr: dont use shared mem hack, windows dont like
This commit is contained in:
parent
bd030db5d1
commit
588c7ba361
@ -14,6 +14,7 @@ struct uint3 blockDim;
|
|||||||
#define atomicExch(p,y) (*p) = y
|
#define atomicExch(p,y) (*p) = y
|
||||||
#define __funnelshift_r(a,b,c) 1
|
#define __funnelshift_r(a,b,c) 1
|
||||||
#define __syncthreads()
|
#define __syncthreads()
|
||||||
|
#define __threadfence_block()
|
||||||
#define asm(x)
|
#define asm(x)
|
||||||
#define __shfl(a,b,c) 1
|
#define __shfl(a,b,c) 1
|
||||||
#define __umul64hi(a,b) a*b
|
#define __umul64hi(a,b) a*b
|
||||||
|
@ -32,12 +32,14 @@ void cryptonight_core_gpu_phase1(const uint32_t threads, uint32_t * long_state,
|
|||||||
for (uint32_t i = 0; i < 40U; i += 4U)
|
for (uint32_t i = 0; i < 40U; i += 4U)
|
||||||
AS_UINT4(&key[i]) = AS_UINT4(&ctx_key[i]);
|
AS_UINT4(&key[i]) = AS_UINT4(&ctx_key[i]);
|
||||||
|
|
||||||
__threadfence_block();
|
__syncthreads();
|
||||||
|
|
||||||
for(uint32_t i = 0; i < LONG_LOOPS32; i += 32U) {
|
for(uint32_t i = 0; i < LONG_LOOPS32; i += 32U) {
|
||||||
cn_aes_pseudo_round_mut(sharedMemory, (uint32_t*) &text, key);
|
cn_aes_pseudo_round_mut(sharedMemory, (uint32_t*) &text, key);
|
||||||
AS_UL2(&long_state[long_oft + i]) = text;
|
AS_UL2(&long_state[long_oft + i]) = text;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
__syncthreads();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -79,8 +81,8 @@ void cryptonight_core_gpu_phase2(const uint32_t threads, const uint32_t bfactor,
|
|||||||
{
|
{
|
||||||
__shared__ __align__(16) uint32_t sharedMemory[1024];
|
__shared__ __align__(16) uint32_t sharedMemory[1024];
|
||||||
|
|
||||||
// cn_aes_gpu_init(sharedMemory);
|
cn_aes_gpu_init(sharedMemory);
|
||||||
// __syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
const uint32_t thread = blockDim.x * blockIdx.x + threadIdx.x;
|
const uint32_t thread = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
@ -125,7 +127,7 @@ void cryptonight_core_gpu_phase3(const uint32_t threads, const uint32_t * __rest
|
|||||||
{
|
{
|
||||||
__shared__ __align__(16) uint32_t sharedMemory[1024];
|
__shared__ __align__(16) uint32_t sharedMemory[1024];
|
||||||
|
|
||||||
//cn_aes_gpu_init(sharedMemory);
|
cn_aes_gpu_init(sharedMemory);
|
||||||
|
|
||||||
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x) >> 3U;
|
const uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x) >> 3U;
|
||||||
const uint32_t sub = (threadIdx.x & 7U) << 2U;
|
const uint32_t sub = (threadIdx.x & 7U) << 2U;
|
||||||
@ -144,7 +146,7 @@ void cryptonight_core_gpu_phase3(const uint32_t threads, const uint32_t * __rest
|
|||||||
for (uint32_t i = 0; i < 40U; i += 4U)
|
for (uint32_t i = 0; i < 40U; i += 4U)
|
||||||
AS_UL2(&key[i]) = AS_UL2(&ctx_key[i]);
|
AS_UL2(&key[i]) = AS_UL2(&ctx_key[i]);
|
||||||
|
|
||||||
//__syncthreads();
|
__syncthreads();
|
||||||
for(uint32_t i = 0; i < LONG_LOOPS32; i += 32U)
|
for(uint32_t i = 0; i < LONG_LOOPS32; i += 32U)
|
||||||
{
|
{
|
||||||
ulonglong2 st = AS_UL2(&long_state[long_oft + i]);
|
ulonglong2 st = AS_UL2(&long_state[long_oft + i]);
|
||||||
@ -153,13 +155,15 @@ void cryptonight_core_gpu_phase3(const uint32_t threads, const uint32_t * __rest
|
|||||||
}
|
}
|
||||||
|
|
||||||
AS_UL2(&ctx_state[st_oft]) = text;
|
AS_UL2(&ctx_state[st_oft]) = text;
|
||||||
|
} else {
|
||||||
|
__syncthreads();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
extern int device_bfactor[MAX_GPUS];
|
extern int device_bfactor[MAX_GPUS];
|
||||||
|
|
||||||
__host__
|
__host__
|
||||||
void cryptonight_core_cpu_hash(int thr_id, int blocks, int threads, uint32_t *d_long_state, uint64_t *d_ctx_state,
|
void cryptonight_core_cuda(int thr_id, int blocks, int threads, uint32_t *d_long_state, uint64_t *d_ctx_state,
|
||||||
uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2)
|
uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2)
|
||||||
{
|
{
|
||||||
dim3 grid(blocks);
|
dim3 grid(blocks);
|
||||||
@ -174,20 +178,19 @@ void cryptonight_core_cpu_hash(int thr_id, int blocks, int threads, uint32_t *d_
|
|||||||
|
|
||||||
const int bsleep = bfactor ? 100 : 0;
|
const int bsleep = bfactor ? 100 : 0;
|
||||||
const int dev_id = device_map[thr_id];
|
const int dev_id = device_map[thr_id];
|
||||||
int i;
|
|
||||||
|
|
||||||
cryptonight_core_gpu_phase1 <<<grid, block8, 4096>>> (throughput, d_long_state, (uint32_t*)d_ctx_state, d_ctx_key1);
|
cryptonight_core_gpu_phase1 <<<grid, block8>>> (throughput, d_long_state, (uint32_t*)d_ctx_state, d_ctx_key1);
|
||||||
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
|
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
|
||||||
if(partcount > 1) usleep(bsleep);
|
if(partcount > 1) usleep(bsleep);
|
||||||
|
|
||||||
for(i = 0; i < partcount; i++)
|
for (uint32_t i = 0; i < partcount; i++)
|
||||||
{
|
{
|
||||||
dim3 b = device_sm[dev_id] >= 300 ? block4 : block;
|
dim3 b = device_sm[dev_id] >= 300 ? block4 : block;
|
||||||
cryptonight_core_gpu_phase2 <<<grid, b, 4096>>> (throughput, bfactor, i, d_long_state, d_ctx_a, d_ctx_b);
|
cryptonight_core_gpu_phase2 <<<grid, b>>> (throughput, bfactor, i, d_long_state, d_ctx_a, d_ctx_b);
|
||||||
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
|
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
|
||||||
if(partcount > 1) usleep(bsleep);
|
if(partcount > 1) usleep(bsleep);
|
||||||
}
|
}
|
||||||
|
|
||||||
cryptonight_core_gpu_phase3 <<<grid, block8, 4096>>> (throughput, d_long_state, (uint32_t*)d_ctx_state, d_ctx_key2);
|
cryptonight_core_gpu_phase3 <<<grid, block8>>> (throughput, d_long_state, (uint32_t*)d_ctx_state, d_ctx_key2);
|
||||||
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
|
exit_if_cudaerror(thr_id, __FUNCTION__, __LINE__);
|
||||||
}
|
}
|
||||||
|
@ -92,7 +92,7 @@ extern "C" int scanhash_cryptonight(int thr_id, struct work* work, uint32_t max_
|
|||||||
|
|
||||||
cryptonight_extra_cpu_setData(thr_id, pdata, ptarget);
|
cryptonight_extra_cpu_setData(thr_id, pdata, ptarget);
|
||||||
cryptonight_extra_cpu_prepare(thr_id, throughput, nonce, d_ctx_state[thr_id], d_ctx_a[thr_id], d_ctx_b[thr_id], d_ctx_key1[thr_id], d_ctx_key2[thr_id]);
|
cryptonight_extra_cpu_prepare(thr_id, throughput, nonce, d_ctx_state[thr_id], d_ctx_a[thr_id], d_ctx_b[thr_id], d_ctx_key1[thr_id], d_ctx_key2[thr_id]);
|
||||||
cryptonight_core_cpu_hash(thr_id, cn_blocks, cn_threads, d_long_state[thr_id], d_ctx_state[thr_id], d_ctx_a[thr_id], d_ctx_b[thr_id], d_ctx_key1[thr_id], d_ctx_key2[thr_id]);
|
cryptonight_core_cuda(thr_id, cn_blocks, cn_threads, d_long_state[thr_id], d_ctx_state[thr_id], d_ctx_a[thr_id], d_ctx_b[thr_id], d_ctx_key1[thr_id], d_ctx_key2[thr_id]);
|
||||||
cryptonight_extra_cpu_final(thr_id, throughput, nonce, resNonces, d_ctx_state[thr_id]);
|
cryptonight_extra_cpu_final(thr_id, throughput, nonce, resNonces, d_ctx_state[thr_id]);
|
||||||
|
|
||||||
*hashes_done = nonce - first_nonce + throughput;
|
*hashes_done = nonce - first_nonce + throughput;
|
||||||
|
@ -14,6 +14,7 @@ struct uint3 blockDim;
|
|||||||
#define atomicExch(p,y) (*p) = y
|
#define atomicExch(p,y) (*p) = y
|
||||||
#define __funnelshift_r(a,b,c) 1
|
#define __funnelshift_r(a,b,c) 1
|
||||||
#define __syncthreads()
|
#define __syncthreads()
|
||||||
|
#define __threadfence_block()
|
||||||
#define asm(x)
|
#define asm(x)
|
||||||
#define __shfl(a,b,c) 1
|
#define __shfl(a,b,c) 1
|
||||||
#define __umul64hi(a,b) a*b
|
#define __umul64hi(a,b) a*b
|
||||||
@ -136,7 +137,7 @@ static inline void exit_if_cudaerror(int thr_id, const char *src, int line)
|
|||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
void cryptonight_core_cpu_hash(int thr_id, int blocks, int threads, uint32_t *d_long_state, uint64_t *d_ctx_state, uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2);
|
void cryptonight_core_cuda(int thr_id, int blocks, int threads, uint32_t *d_long_state, uint64_t *d_ctx_state, uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2);
|
||||||
|
|
||||||
void cryptonight_extra_cpu_setData(int thr_id, const void *data, const void *pTargetIn);
|
void cryptonight_extra_cpu_setData(int thr_id, const void *data, const void *pTargetIn);
|
||||||
void cryptonight_extra_cpu_init(int thr_id, uint32_t threads);
|
void cryptonight_extra_cpu_init(int thr_id, uint32_t threads);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user