mirror of
https://github.com/GOSTSec/ccminer
synced 2025-03-09 20:11:11 +00:00
lbry maxwell and pascal update (up to 10% on pascal)
Based on alexis78 work and sponsored by LBRY.IO team (thanks) Release 1.8.2, use cuda 8 for x86
This commit is contained in:
parent
f982fd70d5
commit
9f2ed5135b
@ -52,7 +52,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \
|
|||||||
sph/hamsi.c sph/hamsi_helper.c sph/streebog.c \
|
sph/hamsi.c sph/hamsi_helper.c sph/streebog.c \
|
||||||
sph/shabal.c sph/whirlpool.c sph/sha2big.c sph/haval.c \
|
sph/shabal.c sph/whirlpool.c sph/sha2big.c sph/haval.c \
|
||||||
sph/ripemd.c sph/sph_sha2.c \
|
sph/ripemd.c sph/sph_sha2.c \
|
||||||
lbry/lbry.cu lbry/cuda_sha256_lbry.cu lbry/cuda_sha512_lbry.cu \
|
lbry/lbry.cu lbry/cuda_sha256_lbry.cu lbry/cuda_sha512_lbry.cu lbry/cuda_lbry_merged.cu \
|
||||||
qubit/qubit.cu qubit/qubit_luffa512.cu qubit/deep.cu qubit/luffa.cu \
|
qubit/qubit.cu qubit/qubit_luffa512.cu qubit/deep.cu qubit/luffa.cu \
|
||||||
x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
|
x11/x11.cu x11/fresh.cu x11/cuda_x11_luffa512.cu x11/cuda_x11_cubehash512.cu \
|
||||||
x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
|
x11/cuda_x11_shavite512.cu x11/cuda_x11_simd512.cu x11/cuda_x11_echo.cu \
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
ccMiner 1.8.2 (August 2016) "Veltor algo Thor's Riddle streebog"
|
ccMiner 1.8.2 (Sept 2016) "Veltor and lbry boost"
|
||||||
---------------------------------------------------------------
|
---------------------------------------------------------------
|
||||||
|
|
||||||
***************************************************************
|
***************************************************************
|
||||||
@ -245,9 +245,10 @@ features.
|
|||||||
|
|
||||||
>>> RELEASE HISTORY <<<
|
>>> RELEASE HISTORY <<<
|
||||||
|
|
||||||
Aug. 20th 2016 v1.8.2
|
Sep. 22th 2016 v1.8.2
|
||||||
|
lbry improvements by Alexis Provos
|
||||||
Prevent Windows hibernate while mining
|
Prevent Windows hibernate while mining
|
||||||
veltor algo
|
veltor algo (basic implementation)
|
||||||
|
|
||||||
Aug. 10th 2016 v1.8.1
|
Aug. 10th 2016 v1.8.1
|
||||||
SIA Blake2-B Algo (getwork over stratum for Suprnova)
|
SIA Blake2-B Algo (getwork over stratum for Suprnova)
|
||||||
|
@ -2430,7 +2430,7 @@ static void *miner_thread(void *userdata)
|
|||||||
pthread_mutex_lock(&stats_lock);
|
pthread_mutex_lock(&stats_lock);
|
||||||
thr_hashrates[thr_id] = hashes_done / dtime;
|
thr_hashrates[thr_id] = hashes_done / dtime;
|
||||||
thr_hashrates[thr_id] *= rate_factor;
|
thr_hashrates[thr_id] *= rate_factor;
|
||||||
if (loopcnt > 1) // ignore first (init time)
|
if (loopcnt > 2) // ignore first (init time)
|
||||||
stats_remember_speed(thr_id, hashes_done, thr_hashrates[thr_id], (uint8_t) rc, work.height);
|
stats_remember_speed(thr_id, hashes_done, thr_hashrates[thr_id], (uint8_t) rc, work.height);
|
||||||
pthread_mutex_unlock(&stats_lock);
|
pthread_mutex_unlock(&stats_lock);
|
||||||
}
|
}
|
||||||
|
@ -39,7 +39,7 @@
|
|||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||||
<ImportGroup Label="ExtensionSettings" Condition="'$(Platform)'=='Win32'">
|
<ImportGroup Label="ExtensionSettings" Condition="'$(Platform)'=='Win32'">
|
||||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.props" />
|
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 8.0.props" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="ExtensionSettings" Condition="'$(Platform)'=='x64'">
|
<ImportGroup Label="ExtensionSettings" Condition="'$(Platform)'=='x64'">
|
||||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.props" />
|
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.props" />
|
||||||
@ -429,6 +429,7 @@
|
|||||||
<CudaCompile Include="Algo256\cuda_skein256.cu" />
|
<CudaCompile Include="Algo256\cuda_skein256.cu" />
|
||||||
<CudaCompile Include="lbry\cuda_sha256_lbry.cu" />
|
<CudaCompile Include="lbry\cuda_sha256_lbry.cu" />
|
||||||
<CudaCompile Include="lbry\cuda_sha512_lbry.cu" />
|
<CudaCompile Include="lbry\cuda_sha512_lbry.cu" />
|
||||||
|
<CudaCompile Include="lbry\cuda_lbry_merged.cu" />
|
||||||
<CudaCompile Include="lbry\lbry.cu" />
|
<CudaCompile Include="lbry\lbry.cu" />
|
||||||
<CudaCompile Include="pentablake.cu">
|
<CudaCompile Include="pentablake.cu">
|
||||||
<MaxRegCount>80</MaxRegCount>
|
<MaxRegCount>80</MaxRegCount>
|
||||||
@ -534,7 +535,7 @@
|
|||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets" Condition="'$(Platform)'=='Win32'">
|
<ImportGroup Label="ExtensionTargets" Condition="'$(Platform)'=='Win32'">
|
||||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.targets" />
|
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 8.0.targets" />
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
<ImportGroup Label="ExtensionTargets" Condition="'$(Platform)'=='x64'">
|
<ImportGroup Label="ExtensionTargets" Condition="'$(Platform)'=='x64'">
|
||||||
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.targets" />
|
<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 7.5.targets" />
|
||||||
|
@ -481,7 +481,7 @@ static __device__ __forceinline__ uint2 operator~ (uint2 a) { return make_uint2(
|
|||||||
static __device__ __forceinline__ void operator^= (uint2 &a, uint2 b) { a = a ^ b; }
|
static __device__ __forceinline__ void operator^= (uint2 &a, uint2 b) { a = a ^ b; }
|
||||||
|
|
||||||
static __device__ __forceinline__ uint2 operator+ (uint2 a, uint2 b) {
|
static __device__ __forceinline__ uint2 operator+ (uint2 a, uint2 b) {
|
||||||
#ifdef __CUDA_ARCH__
|
#if defined(__CUDA_ARCH__) && CUDA_VERSION < 7000
|
||||||
uint2 result;
|
uint2 result;
|
||||||
asm("{ // uint2 a+b \n\t"
|
asm("{ // uint2 a+b \n\t"
|
||||||
"add.cc.u32 %0, %2, %4; \n\t"
|
"add.cc.u32 %0, %2, %4; \n\t"
|
||||||
|
1055
lbry/cuda_lbry_merged.cu
Normal file
1055
lbry/cuda_lbry_merged.cu
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,20 +1,16 @@
|
|||||||
/**
|
/**
|
||||||
* sha-512 CUDA implementation.
|
* sha-512 CUDA implementation.
|
||||||
* Tanguy Pruvot and Provos Alexis - JUL 2016
|
* Tanguy Pruvot and Provos Alexis - Jul / Sep 2016
|
||||||
|
* Sponsored by LBRY.IO team
|
||||||
*/
|
*/
|
||||||
|
|
||||||
//#define USE_ROT_ASM_OPT 0
|
//#define USE_ROT_ASM_OPT 0
|
||||||
#include <cuda_helper.h>
|
#include <cuda_helper.h>
|
||||||
#include <cuda_vector_uint2x4.h>
|
#include <cuda_vector_uint2x4.h>
|
||||||
#include "miner.h"
|
|
||||||
|
|
||||||
static __constant__
|
#include <miner.h>
|
||||||
#if __CUDA_ARCH__ > 500
|
|
||||||
_ALIGN(16)
|
static __constant__ _ALIGN(8) uint64_t K_512[80] = {
|
||||||
#else
|
|
||||||
_ALIGN(8)
|
|
||||||
#endif
|
|
||||||
uint64_t K_512[80] = {
|
|
||||||
0x428A2F98D728AE22, 0x7137449123EF65CD, 0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC,
|
0x428A2F98D728AE22, 0x7137449123EF65CD, 0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC,
|
||||||
0x3956C25BF348B538, 0x59F111F1B605D019, 0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118,
|
0x3956C25BF348B538, 0x59F111F1B605D019, 0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118,
|
||||||
0xD807AA98A3030242, 0x12835B0145706FBE, 0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2,
|
0xD807AA98A3030242, 0x12835B0145706FBE, 0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2,
|
||||||
@ -58,12 +54,12 @@ uint64_t ROR64_8(const uint64_t u64) {
|
|||||||
#define andor64(a,b,c) ((a & (b | c)) | (b & c))
|
#define andor64(a,b,c) ((a & (b | c)) | (b & c))
|
||||||
#define xandx64(e,f,g) (g ^ (e & (g ^ f)))
|
#define xandx64(e,f,g) (g ^ (e & (g ^ f)))
|
||||||
|
|
||||||
static __device__ __forceinline__
|
__device__ __forceinline__
|
||||||
void sha512_step2(uint64_t* r, const uint64_t W, const uint64_t K, const int ord)
|
static void sha512_step2(uint64_t *const r,const uint64_t W,const uint64_t K, const int ord)
|
||||||
{
|
{
|
||||||
const uint64_t T1 = r[(15-ord) & 7] + K + W + bsg5_1(r[(12-ord) & 7]) + xandx64(r[(12-ord) & 7],r[(13-ord) & 7],r[(14-ord) & 7]);
|
const uint64_t T1 = r[(15-ord) & 7] + K + W + bsg5_1(r[(12-ord) & 7]) + xandx64(r[(12-ord) & 7],r[(13-ord) & 7],r[(14-ord) & 7]);
|
||||||
r[(15-ord) & 7] = andor64(r[(8-ord) & 7],r[(9-ord) & 7],r[(10-ord) & 7]) + bsg5_0(r[(8-ord) & 7]) + T1;
|
r[(15-ord) & 7] = andor64(r[(8-ord) & 7],r[(9-ord) & 7],r[(10-ord) & 7]) + bsg5_0(r[(8-ord) & 7]) + T1;
|
||||||
r[(11-ord) & 7] += T1;
|
r[(11-ord) & 7]+= T1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**************************************************************************************************/
|
/**************************************************************************************************/
|
||||||
@ -76,7 +72,6 @@ void lbry_sha512_gpu_hash_32(const uint32_t threads, uint64_t *g_hash)
|
|||||||
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B, 0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
|
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B, 0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
|
||||||
0x510E527FADE682D1, 0x9B05688C2B3E6C1F, 0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
|
0x510E527FADE682D1, 0x9B05688C2B3E6C1F, 0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
|
||||||
};
|
};
|
||||||
|
|
||||||
uint64_t r[8];
|
uint64_t r[8];
|
||||||
uint64_t W[16];
|
uint64_t W[16];
|
||||||
if (thread < threads)
|
if (thread < threads)
|
||||||
@ -91,7 +86,7 @@ void lbry_sha512_gpu_hash_32(const uint32_t threads, uint64_t *g_hash)
|
|||||||
W[4] = 0x8000000000000000; // end tag
|
W[4] = 0x8000000000000000; // end tag
|
||||||
|
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int i = 5; i < 15; i++) W[i] = 0;
|
for (uint32_t i = 5; i < 15; i++) W[i] = 0;
|
||||||
|
|
||||||
W[15] = 0x100; // 256 bits
|
W[15] = 0x100; // 256 bits
|
||||||
|
|
||||||
@ -100,31 +95,32 @@ void lbry_sha512_gpu_hash_32(const uint32_t threads, uint64_t *g_hash)
|
|||||||
sha512_step2(r, W[i], K_512[i], i&7);
|
sha512_step2(r, W[i], K_512[i], i&7);
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma unroll
|
#pragma unroll 5
|
||||||
for (int i = 16; i < 80; i+=16) {
|
for (uint32_t i = 16; i < 80; i+=16){
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int j = 0; j<16; j++) {
|
for (uint32_t j = 0; j<16; j++){
|
||||||
W[(i + j) & 15] += W[((i + j) - 7) & 15] + ssg5_0(W[((i + j) - 15) & 15]) + ssg5_1(W[((i + j) - 2) & 15]);
|
W[(i + j) & 15] += W[((i + j) - 7) & 15] + ssg5_0(W[((i + j) - 15) & 15]) + ssg5_1(W[((i + j) - 2) & 15]);
|
||||||
}
|
}
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (int j = 0; j<16; j++) {
|
for (uint32_t j = 0; j<16; j++){
|
||||||
sha512_step2(r, W[j], K_512[i+j], (i+j)&7);
|
sha512_step2(r, W[j], K_512[i+j], (i+j)&7);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma unroll 8
|
#pragma unroll 8
|
||||||
for (int i = 0; i < 8; i++)
|
for (uint32_t i = 0; i < 8; i++)
|
||||||
r[i] = cuda_swab64(r[i] + IV512[i]);
|
r[i] = cuda_swab64(r[i] + IV512[i]);
|
||||||
|
|
||||||
*(uint2x4*)&pHash[0] = *(uint2x4*)&r[0];
|
*(uint2x4*)&pHash[0] = *(uint2x4*)&r[0];
|
||||||
*(uint2x4*)&pHash[4] = *(uint2x4*)&r[4];
|
*(uint2x4*)&pHash[4] = *(uint2x4*)&r[4];
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__
|
__host__
|
||||||
void lbry_sha512_hash_32(int thr_id, uint32_t threads, uint32_t *d_hash)
|
void lbry_sha512_hash_32(int thr_id, uint32_t threads, uint32_t *d_hash)
|
||||||
{
|
{
|
||||||
const uint32_t threadsperblock = 512;
|
const uint32_t threadsperblock = 256;
|
||||||
|
|
||||||
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
||||||
dim3 block(threadsperblock);
|
dim3 block(threadsperblock);
|
||||||
|
61
lbry/lbry.cu
61
lbry/lbry.cu
@ -1,8 +1,9 @@
|
|||||||
/**
|
/**
|
||||||
* Lbry CUDA Implementation
|
* Lbry Algo (sha-256 / sha-512 / ripemd)
|
||||||
*
|
*
|
||||||
* by tpruvot@github - July 2016
|
* tpruvot and Provos Alexis - Jul / Sep 2016
|
||||||
*
|
*
|
||||||
|
* Sponsored by LBRY.IO team
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
@ -64,11 +65,14 @@ extern "C" void lbry_hash(void* output, const void* input)
|
|||||||
|
|
||||||
extern void lbry_sha256_init(int thr_id);
|
extern void lbry_sha256_init(int thr_id);
|
||||||
extern void lbry_sha256_free(int thr_id);
|
extern void lbry_sha256_free(int thr_id);
|
||||||
extern void lbry_sha256_setBlock_112(uint32_t *pdata, uint32_t *ptarget);
|
extern void lbry_sha256_setBlock_112(uint32_t *pdata);
|
||||||
extern void lbry_sha256d_hash_112(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_outputHash);
|
extern void lbry_sha256d_hash_112(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_outputHash);
|
||||||
extern void lbry_sha512_init(int thr_id);
|
extern void lbry_sha512_init(int thr_id);
|
||||||
extern void lbry_sha512_hash_32(int thr_id, uint32_t threads, uint32_t *d_hash);
|
extern void lbry_sha512_hash_32(int thr_id, uint32_t threads, uint32_t *d_hash);
|
||||||
extern int lbry_sha256d_hash_final(int thr_id, uint32_t threads, uint32_t *d_inputHash, uint32_t *d_resNonce);
|
extern void lbry_sha256d_hash_final(int thr_id, uint32_t threads, uint32_t *d_inputHash, uint32_t *d_resNonce, const uint64_t target64);
|
||||||
|
|
||||||
|
extern void lbry_sha256_setBlock_112_merged(uint32_t *pdata);
|
||||||
|
extern void lbry_merged(int thr_id,uint32_t startNonce, uint32_t threads, uint32_t *d_resNonce, const uint64_t target64);
|
||||||
|
|
||||||
static __inline uint32_t swab32_if(uint32_t val, bool iftrue) {
|
static __inline uint32_t swab32_if(uint32_t val, bool iftrue) {
|
||||||
return iftrue ? swab32(val) : val;
|
return iftrue ? swab32(val) : val;
|
||||||
@ -97,7 +101,7 @@ extern "C" int scanhash_lbry(int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
if (device_sm[dev_id] < 350) intensity = 18;
|
if (device_sm[dev_id] < 350) intensity = 18;
|
||||||
|
|
||||||
uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity);
|
uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity);
|
||||||
//if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
|
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce);
|
||||||
|
|
||||||
if (opt_benchmark) {
|
if (opt_benchmark) {
|
||||||
ptarget[7] = 0xf;
|
ptarget[7] = 0xf;
|
||||||
@ -107,17 +111,16 @@ extern "C" int scanhash_lbry(int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
cudaSetDevice(dev_id);
|
cudaSetDevice(dev_id);
|
||||||
if (opt_cudaschedule == -1 && gpu_threads == 1) {
|
if (opt_cudaschedule == -1 && gpu_threads == 1) {
|
||||||
cudaDeviceReset();
|
cudaDeviceReset();
|
||||||
// reduce cpu usage (linux)
|
// reduce cpu usage
|
||||||
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
|
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
|
||||||
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
|
cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
|
||||||
CUDA_LOG_ERROR();
|
CUDA_LOG_ERROR();
|
||||||
}
|
}
|
||||||
|
|
||||||
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));
|
if(device_sm[dev_id] <= 500)
|
||||||
CUDA_SAFE_CALL(cudaMalloc(&d_resNonce[thr_id], 2 * sizeof(uint32_t)));
|
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 8 * sizeof(uint64_t) * throughput));
|
||||||
|
|
||||||
lbry_sha256_init(thr_id);
|
CUDA_SAFE_CALL(cudaMalloc(&d_resNonce[thr_id], 2 * sizeof(uint32_t)));
|
||||||
lbry_sha512_init(thr_id);
|
|
||||||
CUDA_LOG_ERROR();
|
CUDA_LOG_ERROR();
|
||||||
|
|
||||||
init[thr_id] = true;
|
init[thr_id] = true;
|
||||||
@ -127,29 +130,29 @@ extern "C" int scanhash_lbry(int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
be32enc(&endiandata[i], pdata[i]);
|
be32enc(&endiandata[i], pdata[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
lbry_sha256_setBlock_112(endiandata, ptarget);
|
if(device_sm[dev_id] <= 500)
|
||||||
|
lbry_sha256_setBlock_112(endiandata);
|
||||||
|
else
|
||||||
|
lbry_sha256_setBlock_112_merged(endiandata);
|
||||||
|
|
||||||
cudaMemset(d_resNonce[thr_id], 0xFF, 2 * sizeof(uint32_t));
|
cudaMemset(d_resNonce[thr_id], 0xFF, 2 * sizeof(uint32_t));
|
||||||
|
|
||||||
do {
|
do {
|
||||||
// Hash with CUDA
|
// Hash with CUDA
|
||||||
lbry_sha256d_hash_112(thr_id, throughput, pdata[LBC_NONCE_OFT32], d_hash[thr_id]);
|
if(device_sm[dev_id] <= 500){
|
||||||
lbry_sha512_hash_32(thr_id, throughput, d_hash[thr_id]);
|
lbry_sha256d_hash_112(thr_id, throughput, pdata[LBC_NONCE_OFT32], d_hash[thr_id]);
|
||||||
|
lbry_sha512_hash_32(thr_id, throughput, d_hash[thr_id]);
|
||||||
uint32_t resNonces[2] = { UINT32_MAX, UINT32_MAX };
|
lbry_sha256d_hash_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id], *(uint64_t*)&ptarget[6]);
|
||||||
int err = lbry_sha256d_hash_final(thr_id, throughput, d_hash[thr_id], d_resNonce[thr_id]);
|
}else{
|
||||||
if (err) {
|
lbry_merged(thr_id,pdata[LBC_NONCE_OFT32], throughput, d_resNonce[thr_id], *(uint64_t*)&ptarget[6]);
|
||||||
// reinit
|
|
||||||
free_lbry(thr_id);
|
|
||||||
return -1;
|
|
||||||
}
|
}
|
||||||
|
uint32_t resNonces[2] = { UINT32_MAX, UINT32_MAX };
|
||||||
cudaMemcpy(resNonces, d_resNonce[thr_id], 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
cudaMemcpy(resNonces, d_resNonce[thr_id], 2 * sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
||||||
|
|
||||||
*hashes_done = pdata[LBC_NONCE_OFT32] - first_nonce + throughput;
|
|
||||||
|
|
||||||
if (resNonces[0] != UINT32_MAX)
|
if (resNonces[0] != UINT32_MAX)
|
||||||
{
|
{
|
||||||
const uint32_t startNonce = pdata[LBC_NONCE_OFT32];
|
const uint32_t startNonce = pdata[LBC_NONCE_OFT32];
|
||||||
|
|
||||||
resNonces[0] += startNonce;
|
resNonces[0] += startNonce;
|
||||||
|
|
||||||
endiandata[LBC_NONCE_OFT32] = swab32_if(resNonces[0], !swap);
|
endiandata[LBC_NONCE_OFT32] = swab32_if(resNonces[0], !swap);
|
||||||
@ -157,12 +160,13 @@ extern "C" int scanhash_lbry(int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
|
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
|
||||||
int res = 1;
|
int res = 1;
|
||||||
|
*hashes_done = pdata[LBC_NONCE_OFT32] - first_nonce + throughput;
|
||||||
work->nonces[0] = swab32_if(resNonces[0], swap);
|
work->nonces[0] = swab32_if(resNonces[0], swap);
|
||||||
work_set_target_ratio(work, vhash);
|
work_set_target_ratio(work, vhash);
|
||||||
if (resNonces[1] != UINT32_MAX) {
|
if (resNonces[1] != UINT32_MAX) {
|
||||||
resNonces[1] += startNonce;
|
resNonces[1] += startNonce;
|
||||||
if (opt_debug)
|
if (opt_debug)
|
||||||
gpulog(LOG_BLUE, thr_id, "found second nonce %08x", resNonces[1]);
|
gpulog(LOG_BLUE, thr_id, "Found second nonce %08x", swab32(resNonces[1]));
|
||||||
endiandata[LBC_NONCE_OFT32] = swab32_if(resNonces[1], !swap);
|
endiandata[LBC_NONCE_OFT32] = swab32_if(resNonces[1], !swap);
|
||||||
lbry_hash(vhash, endiandata);
|
lbry_hash(vhash, endiandata);
|
||||||
work->nonces[1] = swab32_if(resNonces[1], swap);
|
work->nonces[1] = swab32_if(resNonces[1], swap);
|
||||||
@ -189,22 +193,23 @@ extern "C" int scanhash_lbry(int thr_id, struct work *work, uint32_t max_nonce,
|
|||||||
|
|
||||||
} while (!work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = pdata[LBC_NONCE_OFT32] - first_nonce + 1;
|
*hashes_done = pdata[LBC_NONCE_OFT32] - first_nonce;
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// cleanup
|
// cleanup
|
||||||
extern "C" void free_lbry(int thr_id)
|
void free_lbry(int thr_id)
|
||||||
{
|
{
|
||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
return;
|
return;
|
||||||
|
|
||||||
cudaThreadSynchronize();
|
cudaThreadSynchronize();
|
||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
if(device_sm[device_map[thr_id]]<=500)
|
||||||
|
cudaFree(d_hash[thr_id]);
|
||||||
|
|
||||||
cudaFree(d_resNonce[thr_id]);
|
cudaFree(d_resNonce[thr_id]);
|
||||||
lbry_sha256_free(thr_id);
|
|
||||||
|
|
||||||
init[thr_id] = false;
|
init[thr_id] = false;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user