mirror of
https://github.com/GOSTSec/ccminer
synced 2025-02-05 19:34:18 +00:00
pluck: fix SM 3.0 compilation
This commit is contained in:
parent
ae8e863591
commit
e7567332f4
@ -94,6 +94,12 @@ static __forceinline__ __device__ void operator+= (uchar4 &a, uchar4 b) { a = a
|
|||||||
static __forceinline__ __device__ __host__ void operator+= (uint8 &a, const uint8 &b) { a = a + b; }
|
static __forceinline__ __device__ __host__ void operator+= (uint8 &a, const uint8 &b) { a = a + b; }
|
||||||
static __forceinline__ __device__ __host__ void operator+= (uint16 &a, const uint16 &b) { a = a + b; }
|
static __forceinline__ __device__ __host__ void operator+= (uint16 &a, const uint16 &b) { a = a + b; }
|
||||||
|
|
||||||
|
#if __CUDA_ARCH__ < 320
|
||||||
|
|
||||||
|
#define rotate ROTL32
|
||||||
|
#define rotateR ROTR32
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
static __forceinline__ __device__ uint32_t rotate(uint32_t vec4, uint32_t shift)
|
static __forceinline__ __device__ uint32_t rotate(uint32_t vec4, uint32_t shift)
|
||||||
{
|
{
|
||||||
@ -102,7 +108,6 @@ static __forceinline__ __device__ uint32_t rotate(uint32_t vec4, uint32_t shift)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static __forceinline__ __device__ uint32_t rotateR(uint32_t vec4, uint32_t shift)
|
static __forceinline__ __device__ uint32_t rotateR(uint32_t vec4, uint32_t shift)
|
||||||
{
|
{
|
||||||
uint32_t ret;
|
uint32_t ret;
|
||||||
@ -110,7 +115,6 @@ static __forceinline__ __device__ uint32_t rotateR(uint32_t vec4, uint32_t shift
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static __device__ __inline__ uint8 __ldg8(const uint8_t *ptr)
|
static __device__ __inline__ uint8 __ldg8(const uint8_t *ptr)
|
||||||
{
|
{
|
||||||
uint8 test;
|
uint8 test;
|
||||||
@ -119,7 +123,6 @@ static __device__ __inline__ uint8 __ldg8(const uint8_t *ptr)
|
|||||||
return (test);
|
return (test);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static __device__ __inline__ uint32_t __ldgtoint(const uint8_t *ptr)
|
static __device__ __inline__ uint32_t __ldgtoint(const uint8_t *ptr)
|
||||||
{
|
{
|
||||||
uint32_t test;
|
uint32_t test;
|
||||||
@ -204,7 +207,7 @@ static __device__ __inline__ uint32_t __ldgtoint_unaligned2(const uint8_t *ptr)
|
|||||||
return (test);
|
return (test);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
static __forceinline__ __device__ uint8 swapvec(const uint8 *buf)
|
static __forceinline__ __device__ uint8 swapvec(const uint8 *buf)
|
||||||
|
@ -34,9 +34,10 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <memory.h>
|
#include <memory.h>
|
||||||
|
|
||||||
#include "cuda_helper.h"
|
|
||||||
#include "cuda_vector.h"
|
#include "cuda_vector.h"
|
||||||
|
|
||||||
|
#include "miner.h"
|
||||||
|
|
||||||
uint32_t *d_PlNonce[MAX_GPUS];
|
uint32_t *d_PlNonce[MAX_GPUS];
|
||||||
|
|
||||||
__device__ uint8_t * hashbuffer;
|
__device__ uint8_t * hashbuffer;
|
||||||
@ -297,8 +298,8 @@ void pluck_gpu_hash0_v50(uint32_t threads, uint32_t startNonce)
|
|||||||
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
if (thread < threads)
|
if (thread < threads)
|
||||||
{
|
{
|
||||||
|
#if __CUDA_ARCH__ >= 320
|
||||||
const uint32_t nonce = startNonce + thread;
|
const uint32_t nonce = startNonce + thread;
|
||||||
|
|
||||||
uint32_t shift = SHIFT * thread;
|
uint32_t shift = SHIFT * thread;
|
||||||
((uint8*)(hashbuffer + shift))[0] = sha256_80(nonce);
|
((uint8*)(hashbuffer + shift))[0] = sha256_80(nonce);
|
||||||
((uint8*)(hashbuffer + shift))[1] = make_uint8(0, 0, 0, 0, 0, 0, 0, 0);
|
((uint8*)(hashbuffer + shift))[1] = make_uint8(0, 0, 0, 0, 0, 0, 0, 0);
|
||||||
@ -308,7 +309,7 @@ void pluck_gpu_hash0_v50(uint32_t threads, uint32_t startNonce)
|
|||||||
uint32_t randseed[16];
|
uint32_t randseed[16];
|
||||||
uint32_t randbuffer[16];
|
uint32_t randbuffer[16];
|
||||||
uint32_t joint[16];
|
uint32_t joint[16];
|
||||||
uint8 Buffbuffer[2];
|
// uint8 Buffbuffer[2];
|
||||||
|
|
||||||
((uint8*)randseed)[0] = __ldg8(&(hashbuffer + shift)[32 * i - 64]);
|
((uint8*)randseed)[0] = __ldg8(&(hashbuffer + shift)[32 * i - 64]);
|
||||||
((uint8*)randseed)[1] = __ldg8(&(hashbuffer + shift)[32 * i - 32]);
|
((uint8*)randseed)[1] = __ldg8(&(hashbuffer + shift)[32 * i - 32]);
|
||||||
@ -339,8 +340,8 @@ void pluck_gpu_hash0_v50(uint32_t threads, uint32_t startNonce)
|
|||||||
(hashbuffer + shift)[rand + 2] = __ldg(&(hashbuffer + shift)[randmax + j + 2]);
|
(hashbuffer + shift)[rand + 2] = __ldg(&(hashbuffer + shift)[randmax + j + 2]);
|
||||||
(hashbuffer + shift)[rand + 3] = __ldg(&(hashbuffer + shift)[randmax + j + 3]);
|
(hashbuffer + shift)[rand + 3] = __ldg(&(hashbuffer + shift)[randmax + j + 3]);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // main loop
|
} // main loop
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -351,8 +352,8 @@ void pluck_gpu_hash_v50(uint32_t threads, uint32_t startNonce, uint32_t *nonceVe
|
|||||||
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
if (thread < threads)
|
if (thread < threads)
|
||||||
{
|
{
|
||||||
|
#if __CUDA_ARCH__ >= 320
|
||||||
const uint32_t nonce = startNonce + thread;
|
const uint32_t nonce = startNonce + thread;
|
||||||
|
|
||||||
uint32_t shift = SHIFT * thread;
|
uint32_t shift = SHIFT * thread;
|
||||||
|
|
||||||
for (int i = 5; i < HASH_MEMORY - 1; i++)
|
for (int i = 5; i < HASH_MEMORY - 1; i++)
|
||||||
@ -405,7 +406,7 @@ void pluck_gpu_hash_v50(uint32_t threads, uint32_t startNonce, uint32_t *nonceVe
|
|||||||
if (outbuf <= pTarget[7]) {
|
if (outbuf <= pTarget[7]) {
|
||||||
nonceVector[0] = nonce;
|
nonceVector[0] = nonce;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -415,6 +416,7 @@ void pluck_gpu_hash0(uint32_t threads, uint32_t startNonce)
|
|||||||
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
if (thread < threads)
|
if (thread < threads)
|
||||||
{
|
{
|
||||||
|
#if __CUDA_ARCH__ >= 320
|
||||||
const uint32_t nonce = startNonce + thread;
|
const uint32_t nonce = startNonce + thread;
|
||||||
|
|
||||||
uint32_t shift = SHIFT * thread;
|
uint32_t shift = SHIFT * thread;
|
||||||
@ -426,7 +428,7 @@ void pluck_gpu_hash0(uint32_t threads, uint32_t startNonce)
|
|||||||
uint32_t randseed[16];
|
uint32_t randseed[16];
|
||||||
uint32_t randbuffer[16];
|
uint32_t randbuffer[16];
|
||||||
uint32_t joint[16];
|
uint32_t joint[16];
|
||||||
uint8 Buffbuffer[2];
|
// uint8 Buffbuffer[2];
|
||||||
|
|
||||||
((uint8*)randseed)[0] = __ldg8(&(hashbuffer + shift)[32 * i - 64]);
|
((uint8*)randseed)[0] = __ldg8(&(hashbuffer + shift)[32 * i - 64]);
|
||||||
((uint8*)randseed)[1] = __ldg8(&(hashbuffer + shift)[32 * i - 32]);
|
((uint8*)randseed)[1] = __ldg8(&(hashbuffer + shift)[32 * i - 32]);
|
||||||
@ -457,9 +459,8 @@ void pluck_gpu_hash0(uint32_t threads, uint32_t startNonce)
|
|||||||
(hashbuffer + shift)[rand + 2] = __ldg(&(hashbuffer + shift)[randmax + j + 2]);
|
(hashbuffer + shift)[rand + 2] = __ldg(&(hashbuffer + shift)[randmax + j + 2]);
|
||||||
(hashbuffer + shift)[rand + 3] = __ldg(&(hashbuffer + shift)[randmax + j + 3]);
|
(hashbuffer + shift)[rand + 3] = __ldg(&(hashbuffer + shift)[randmax + j + 3]);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // main loop
|
} // main loop
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -469,6 +470,7 @@ void pluck_gpu_hash(uint32_t threads, uint32_t startNonce, uint32_t *nonceVector
|
|||||||
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
if (thread < threads)
|
if (thread < threads)
|
||||||
{
|
{
|
||||||
|
#if __CUDA_ARCH__ >= 320
|
||||||
const uint32_t nonce = startNonce + thread;
|
const uint32_t nonce = startNonce + thread;
|
||||||
|
|
||||||
uint32_t shift = SHIFT * thread;
|
uint32_t shift = SHIFT * thread;
|
||||||
@ -508,7 +510,6 @@ void pluck_gpu_hash(uint32_t threads, uint32_t startNonce, uint32_t *nonceVector
|
|||||||
|
|
||||||
((uint16*)randseed)[0] ^= ((uint16*)Buffbuffer)[0];
|
((uint16*)randseed)[0] ^= ((uint16*)Buffbuffer)[0];
|
||||||
((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]);
|
((uint16*)randbuffer)[0] = xor_salsa8(((uint16*)randseed)[0]);
|
||||||
|
|
||||||
for (int j = 0; j < 32; j += 2)
|
for (int j = 0; j < 32; j += 2)
|
||||||
{
|
{
|
||||||
uint32_t rand = randbuffer[j / 2] % randmax;
|
uint32_t rand = randbuffer[j / 2] % randmax;
|
||||||
@ -518,7 +519,6 @@ void pluck_gpu_hash(uint32_t threads, uint32_t startNonce, uint32_t *nonceVector
|
|||||||
(hashbuffer + shift)[rand + 2] = __ldg(&(hashbuffer + shift)[randmax + j + 2]);
|
(hashbuffer + shift)[rand + 2] = __ldg(&(hashbuffer + shift)[randmax + j + 2]);
|
||||||
(hashbuffer + shift)[rand + 3] = __ldg(&(hashbuffer + shift)[randmax + j + 3]);
|
(hashbuffer + shift)[rand + 3] = __ldg(&(hashbuffer + shift)[randmax + j + 3]);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // main loop
|
} // main loop
|
||||||
|
|
||||||
uint32_t outbuf = __ldgtoint(&(hashbuffer + shift)[28]);
|
uint32_t outbuf = __ldgtoint(&(hashbuffer + shift)[28]);
|
||||||
@ -526,7 +526,7 @@ void pluck_gpu_hash(uint32_t threads, uint32_t startNonce, uint32_t *nonceVector
|
|||||||
if (outbuf <= pTarget[7]) {
|
if (outbuf <= pTarget[7]) {
|
||||||
nonceVector[0] = nonce;
|
nonceVector[0] = nonce;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -549,7 +549,10 @@ uint32_t pluck_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, int
|
|||||||
dim3 grid50((threads + 256 - 1) / 256);
|
dim3 grid50((threads + 256 - 1) / 256);
|
||||||
dim3 block50(256);
|
dim3 block50(256);
|
||||||
|
|
||||||
if (device_sm[device_map[thr_id]] >= 500) {
|
if (device_sm[device_map[thr_id]] <= 300) {
|
||||||
|
applog(LOG_ERR,"Sorry pluck not supported on SM 3.0 devices");
|
||||||
|
return 0;
|
||||||
|
} else if (device_sm[device_map[thr_id]] >= 500) {
|
||||||
pluck_gpu_hash0_v50 <<< grid50, block50 >>>(threads, startNounce);
|
pluck_gpu_hash0_v50 <<< grid50, block50 >>>(threads, startNounce);
|
||||||
pluck_gpu_hash_v50 <<< grid50, block50 >>>(threads, startNounce, d_PlNonce[thr_id]);
|
pluck_gpu_hash_v50 <<< grid50, block50 >>>(threads, startNounce, d_PlNonce[thr_id]);
|
||||||
} else {
|
} else {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user