mirror of
https://github.com/GOSTSec/ccminer
synced 2025-08-30 07:42:11 +00:00
warn on cuda errors + various small changes
The full benchmark can now be launched with "ccminer --benchmark" add a new helper function which log a warning with last cuda error (not shown with the quiet option) : CUDA_LOG_ERROR(); it can be used where miner.h is included (.c/.cpp/.cu) fix x14 (in ccminer.cpp), a break was missing in switch..case
This commit is contained in:
parent
8fbfe2cfda
commit
9dfa757dc7
@ -10,7 +10,13 @@ extern "C"
|
|||||||
#include "miner.h"
|
#include "miner.h"
|
||||||
#include "cuda_helper.h"
|
#include "cuda_helper.h"
|
||||||
|
|
||||||
static uint32_t *d_hash[MAX_GPUS];
|
static uint32_t *d_hash[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
|
// Speicher zur Generierung der Noncevektoren für die bedingten Hashes
|
||||||
|
static uint32_t *d_jackpotNonces[MAX_GPUS] = { 0 };
|
||||||
|
static uint32_t *d_branch1Nonces[MAX_GPUS] = { 0 };
|
||||||
|
static uint32_t *d_branch2Nonces[MAX_GPUS] = { 0 };
|
||||||
|
static uint32_t *d_branch3Nonces[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
extern void jackpot_keccak512_cpu_init(int thr_id, uint32_t threads);
|
extern void jackpot_keccak512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void jackpot_keccak512_cpu_setBlock(void *pdata, size_t inlen);
|
extern void jackpot_keccak512_cpu_setBlock(void *pdata, size_t inlen);
|
||||||
@ -32,35 +38,28 @@ extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t st
|
|||||||
extern void jackpot_compactTest_cpu_init(int thr_id, uint32_t threads);
|
extern void jackpot_compactTest_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void jackpot_compactTest_cpu_free(int thr_id);
|
extern void jackpot_compactTest_cpu_free(int thr_id);
|
||||||
extern void jackpot_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
|
extern void jackpot_compactTest_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *inpHashes, uint32_t *d_validNonceTable,
|
||||||
uint32_t *d_nonces1, uint32_t *nrm1,
|
uint32_t *d_nonces1, uint32_t *nrm1, uint32_t *d_nonces2, uint32_t *nrm2, int order);
|
||||||
uint32_t *d_nonces2, uint32_t *nrm2,
|
|
||||||
int order);
|
|
||||||
|
|
||||||
extern uint32_t cuda_check_hash_branch(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
|
extern uint32_t cuda_check_hash_branch(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order);
|
||||||
|
|
||||||
// Speicher zur Generierung der Noncevektoren für die bedingten Hashes
|
|
||||||
static uint32_t *d_jackpotNonces[MAX_GPUS];
|
|
||||||
static uint32_t *d_branch1Nonces[MAX_GPUS];
|
|
||||||
static uint32_t *d_branch2Nonces[MAX_GPUS];
|
|
||||||
static uint32_t *d_branch3Nonces[MAX_GPUS];
|
|
||||||
|
|
||||||
// Original jackpothash Funktion aus einem miner Quelltext
|
// Original jackpothash Funktion aus einem miner Quelltext
|
||||||
extern "C" unsigned int jackpothash(void *state, const void *input)
|
extern "C" unsigned int jackpothash(void *state, const void *input)
|
||||||
{
|
{
|
||||||
|
uint32_t hash[16];
|
||||||
|
unsigned int rnd;
|
||||||
|
|
||||||
sph_blake512_context ctx_blake;
|
sph_blake512_context ctx_blake;
|
||||||
sph_groestl512_context ctx_groestl;
|
sph_groestl512_context ctx_groestl;
|
||||||
sph_jh512_context ctx_jh;
|
sph_jh512_context ctx_jh;
|
||||||
sph_keccak512_context ctx_keccak;
|
sph_keccak512_context ctx_keccak;
|
||||||
sph_skein512_context ctx_skein;
|
sph_skein512_context ctx_skein;
|
||||||
|
|
||||||
uint32_t hash[16];
|
|
||||||
|
|
||||||
sph_keccak512_init(&ctx_keccak);
|
sph_keccak512_init(&ctx_keccak);
|
||||||
sph_keccak512 (&ctx_keccak, input, 80);
|
sph_keccak512 (&ctx_keccak, input, 80);
|
||||||
sph_keccak512_close(&ctx_keccak, hash);
|
sph_keccak512_close(&ctx_keccak, hash);
|
||||||
|
|
||||||
unsigned int round;
|
for (rnd = 0; rnd < 3; rnd++)
|
||||||
for (round = 0; round < 3; round++) {
|
{
|
||||||
if (hash[0] & 0x01) {
|
if (hash[0] & 0x01) {
|
||||||
sph_groestl512_init(&ctx_groestl);
|
sph_groestl512_init(&ctx_groestl);
|
||||||
sph_groestl512 (&ctx_groestl, (&hash), 64);
|
sph_groestl512 (&ctx_groestl, (&hash), 64);
|
||||||
@ -71,6 +70,7 @@ extern "C" unsigned int jackpothash(void *state, const void *input)
|
|||||||
sph_skein512 (&ctx_skein, (&hash), 64);
|
sph_skein512 (&ctx_skein, (&hash), 64);
|
||||||
sph_skein512_close(&ctx_skein, (&hash));
|
sph_skein512_close(&ctx_skein, (&hash));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hash[0] & 0x01) {
|
if (hash[0] & 0x01) {
|
||||||
sph_blake512_init(&ctx_blake);
|
sph_blake512_init(&ctx_blake);
|
||||||
sph_blake512 (&ctx_blake, (&hash), 64);
|
sph_blake512 (&ctx_blake, (&hash), 64);
|
||||||
@ -84,7 +84,7 @@ extern "C" unsigned int jackpothash(void *state, const void *input)
|
|||||||
}
|
}
|
||||||
memcpy(state, hash, 32);
|
memcpy(state, hash, 32);
|
||||||
|
|
||||||
return round;
|
return rnd;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool init[MAX_GPUS] = { 0 };
|
static bool init[MAX_GPUS] = { 0 };
|
||||||
@ -106,7 +106,7 @@ extern "C" int scanhash_jackpot(int thr_id, struct work *work, uint32_t max_nonc
|
|||||||
{
|
{
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(device_map[thr_id]);
|
||||||
|
|
||||||
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
|
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));
|
||||||
|
|
||||||
jackpot_keccak512_cpu_init(thr_id, throughput);
|
jackpot_keccak512_cpu_init(thr_id, throughput);
|
||||||
jackpot_compactTest_cpu_init(thr_id, throughput);
|
jackpot_compactTest_cpu_init(thr_id, throughput);
|
||||||
@ -117,11 +117,11 @@ extern "C" int scanhash_jackpot(int thr_id, struct work *work, uint32_t max_nonc
|
|||||||
|
|
||||||
cuda_check_cpu_init(thr_id, throughput);
|
cuda_check_cpu_init(thr_id, throughput);
|
||||||
|
|
||||||
cudaMalloc(&d_branch1Nonces[thr_id], sizeof(uint32_t)*throughput*2);
|
cudaMalloc(&d_branch1Nonces[thr_id], (size_t) sizeof(uint32_t)*throughput*2);
|
||||||
cudaMalloc(&d_branch2Nonces[thr_id], sizeof(uint32_t)*throughput*2);
|
cudaMalloc(&d_branch2Nonces[thr_id], (size_t) sizeof(uint32_t)*throughput*2);
|
||||||
cudaMalloc(&d_branch3Nonces[thr_id], sizeof(uint32_t)*throughput*2);
|
cudaMalloc(&d_branch3Nonces[thr_id], (size_t) sizeof(uint32_t)*throughput*2);
|
||||||
|
|
||||||
CUDA_SAFE_CALL(cudaMalloc(&d_jackpotNonces[thr_id], sizeof(uint32_t)*throughput*2));
|
CUDA_SAFE_CALL(cudaMalloc(&d_jackpotNonces[thr_id], (size_t) sizeof(uint32_t)*throughput*2));
|
||||||
|
|
||||||
init[thr_id] = true;
|
init[thr_id] = true;
|
||||||
}
|
}
|
||||||
@ -211,19 +211,22 @@ extern "C" int scanhash_jackpot(int thr_id, struct work *work, uint32_t max_nonc
|
|||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + throughput;
|
*hashes_done = pdata[19] - first_nonce + throughput;
|
||||||
|
|
||||||
|
CUDA_LOG_ERROR();
|
||||||
|
|
||||||
uint32_t foundNonce = cuda_check_hash_branch(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
|
uint32_t foundNonce = cuda_check_hash_branch(thr_id, nrm3, pdata[19], d_branch3Nonces[thr_id], d_hash[thr_id], order++);
|
||||||
if (foundNonce != 0xffffffff)
|
if (foundNonce != UINT32_MAX)
|
||||||
{
|
{
|
||||||
uint32_t vhash64[8];
|
uint32_t vhash64[8];
|
||||||
be32enc(&endiandata[19], foundNonce);
|
be32enc(&endiandata[19], foundNonce);
|
||||||
|
|
||||||
// diese jackpothash Funktion gibt die Zahl der Runden zurück
|
// jackpothash function gibt die Zahl der Runden zurück
|
||||||
jackpothash(vhash64, endiandata);
|
jackpothash(vhash64, endiandata);
|
||||||
|
|
||||||
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
|
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
|
||||||
int res = 1;
|
int res = 1;
|
||||||
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
|
|
||||||
work_set_target_ratio(work, vhash64);
|
work_set_target_ratio(work, vhash64);
|
||||||
|
#if 0
|
||||||
|
uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], 1);
|
||||||
if (secNonce != 0) {
|
if (secNonce != 0) {
|
||||||
be32enc(&endiandata[19], secNonce);
|
be32enc(&endiandata[19], secNonce);
|
||||||
nist5hash(vhash64, endiandata);
|
nist5hash(vhash64, endiandata);
|
||||||
@ -232,6 +235,7 @@ extern "C" int scanhash_jackpot(int thr_id, struct work *work, uint32_t max_nonc
|
|||||||
pdata[21] = secNonce;
|
pdata[21] = secNonce;
|
||||||
res++;
|
res++;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
pdata[19] = foundNonce;
|
pdata[19] = foundNonce;
|
||||||
return res;
|
return res;
|
||||||
} else {
|
} else {
|
||||||
@ -250,6 +254,8 @@ extern "C" int scanhash_jackpot(int thr_id, struct work *work, uint32_t max_nonc
|
|||||||
|
|
||||||
} while (!work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
|
CUDA_LOG_ERROR();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -259,9 +265,7 @@ extern "C" void free_jackpot(int thr_id)
|
|||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
return;
|
return;
|
||||||
|
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaThreadSynchronize();
|
||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
|
||||||
|
|
||||||
cudaFree(d_branch1Nonces[thr_id]);
|
cudaFree(d_branch1Nonces[thr_id]);
|
||||||
cudaFree(d_branch2Nonces[thr_id]);
|
cudaFree(d_branch2Nonces[thr_id]);
|
||||||
@ -271,8 +275,12 @@ extern "C" void free_jackpot(int thr_id)
|
|||||||
quark_groestl512_cpu_free(thr_id);
|
quark_groestl512_cpu_free(thr_id);
|
||||||
jackpot_compactTest_cpu_free(thr_id);
|
jackpot_compactTest_cpu_free(thr_id);
|
||||||
|
|
||||||
|
cudaFree(d_hash[thr_id]);
|
||||||
|
|
||||||
cuda_check_cpu_free(thr_id);
|
cuda_check_cpu_free(thr_id);
|
||||||
init[thr_id] = false;
|
CUDA_LOG_ERROR();
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
|
||||||
|
init[thr_id] = false;
|
||||||
}
|
}
|
@ -231,7 +231,7 @@ features.
|
|||||||
Under Dev... v1.7
|
Under Dev... v1.7
|
||||||
Restore whirlpool algo (and whirlcoin variant)
|
Restore whirlpool algo (and whirlcoin variant)
|
||||||
Prepare algo switch ability
|
Prepare algo switch ability
|
||||||
Add --benchmark -a auto to run a multi algo benchmark
|
Add --benchmark -a all to run a benchmark for all algos
|
||||||
Add --cuda-schedule parameter
|
Add --cuda-schedule parameter
|
||||||
Add --show-diff parameter, which display shares diff,
|
Add --show-diff parameter, which display shares diff,
|
||||||
and is able to detect real solved blocks on pools.
|
and is able to detect real solved blocks on pools.
|
||||||
|
4
api.cpp
4
api.cpp
@ -1002,5 +1002,9 @@ void api_set_throughput(int thr_id, uint32_t throughput)
|
|||||||
if (i && (1U << i) < throughput) {
|
if (i && (1U << i) < throughput) {
|
||||||
cgpu->intensity += ((float) (throughput-(1U << i)) / (1U << i));
|
cgpu->intensity += ((float) (throughput-(1U << i)) / (1U << i));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// to display in bench results
|
||||||
|
if (opt_benchmark)
|
||||||
|
bench_set_throughput(thr_id, throughput);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
56
bench.cpp
56
bench.cpp
@ -21,7 +21,7 @@ static pthread_barrier_t algo_barr;
|
|||||||
static pthread_mutex_t bench_lock = PTHREAD_MUTEX_INITIALIZER;
|
static pthread_mutex_t bench_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
|
||||||
extern double thr_hashrates[MAX_GPUS];
|
extern double thr_hashrates[MAX_GPUS];
|
||||||
extern enum sha_algos opt_algo;
|
extern volatile enum sha_algos opt_algo;
|
||||||
|
|
||||||
void bench_init(int threads)
|
void bench_init(int threads)
|
||||||
{
|
{
|
||||||
@ -41,6 +41,48 @@ void bench_free()
|
|||||||
pthread_barrier_destroy(&algo_barr);
|
pthread_barrier_destroy(&algo_barr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// required to switch algos
|
||||||
|
void algo_free_all(int thr_id)
|
||||||
|
{
|
||||||
|
// only initialized algos will be freed
|
||||||
|
free_blake256(thr_id);
|
||||||
|
free_bmw(thr_id);
|
||||||
|
free_c11(thr_id);
|
||||||
|
free_deep(thr_id);
|
||||||
|
free_keccak256(thr_id);
|
||||||
|
free_fresh(thr_id);
|
||||||
|
free_fugue256(thr_id);
|
||||||
|
free_groestlcoin(thr_id);
|
||||||
|
free_heavy(thr_id);
|
||||||
|
free_jackpot(thr_id);
|
||||||
|
free_luffa(thr_id);
|
||||||
|
free_lyra2(thr_id);
|
||||||
|
free_lyra2v2(thr_id);
|
||||||
|
free_myriad(thr_id);
|
||||||
|
free_neoscrypt(thr_id);
|
||||||
|
free_nist5(thr_id);
|
||||||
|
free_pentablake(thr_id);
|
||||||
|
free_quark(thr_id);
|
||||||
|
free_qubit(thr_id);
|
||||||
|
free_skeincoin(thr_id);
|
||||||
|
free_skein2(thr_id);
|
||||||
|
free_s3(thr_id);
|
||||||
|
free_whirl(thr_id);
|
||||||
|
free_whirlx(thr_id);
|
||||||
|
free_x11(thr_id);
|
||||||
|
free_x13(thr_id);
|
||||||
|
free_x14(thr_id);
|
||||||
|
free_x15(thr_id);
|
||||||
|
free_x17(thr_id);
|
||||||
|
free_zr5(thr_id);
|
||||||
|
//free_sha256d(thr_id);
|
||||||
|
free_scrypt(thr_id);
|
||||||
|
free_scrypt_jane(thr_id);
|
||||||
|
|
||||||
|
// warn on cuda error
|
||||||
|
CUDA_LOG_ERROR();
|
||||||
|
}
|
||||||
|
|
||||||
// benchmark all algos (called once per mining thread)
|
// benchmark all algos (called once per mining thread)
|
||||||
bool bench_algo_switch_next(int thr_id)
|
bool bench_algo_switch_next(int thr_id)
|
||||||
{
|
{
|
||||||
@ -60,6 +102,11 @@ bool bench_algo_switch_next(int thr_id)
|
|||||||
if (algo == ALGO_SCRYPT) algo++;
|
if (algo == ALGO_SCRYPT) algo++;
|
||||||
if (algo == ALGO_SCRYPT_JANE) algo++;
|
if (algo == ALGO_SCRYPT_JANE) algo++;
|
||||||
|
|
||||||
|
// free current algo memory and track mem usage
|
||||||
|
mused = cuda_available_memory(thr_id);
|
||||||
|
algo_free_all(thr_id);
|
||||||
|
mfree = cuda_available_memory(thr_id);
|
||||||
|
|
||||||
// we need to wait completion on all cards before the switch
|
// we need to wait completion on all cards before the switch
|
||||||
if (opt_n_threads > 1) {
|
if (opt_n_threads > 1) {
|
||||||
pthread_barrier_wait(&miner_barr);
|
pthread_barrier_wait(&miner_barr);
|
||||||
@ -70,15 +117,12 @@ bool bench_algo_switch_next(int thr_id)
|
|||||||
format_hashrate(hashrate, rate);
|
format_hashrate(hashrate, rate);
|
||||||
gpulog(LOG_NOTICE, thr_id, "%s hashrate = %s", algo_names[prev_algo], rate);
|
gpulog(LOG_NOTICE, thr_id, "%s hashrate = %s", algo_names[prev_algo], rate);
|
||||||
|
|
||||||
// free current algo memory and track mem usage
|
|
||||||
mused = cuda_available_memory(thr_id);
|
|
||||||
miner_free_device(thr_id);
|
|
||||||
mfree = cuda_available_memory(thr_id);
|
|
||||||
|
|
||||||
// check if there is memory leak
|
// check if there is memory leak
|
||||||
if (device_mem_free[thr_id] > mfree) {
|
if (device_mem_free[thr_id] > mfree) {
|
||||||
gpulog(LOG_WARNING, thr_id, "memory leak detected in %s ! %d MB free",
|
gpulog(LOG_WARNING, thr_id, "memory leak detected in %s ! %d MB free",
|
||||||
algo_names[prev_algo], mfree);
|
algo_names[prev_algo], mfree);
|
||||||
|
cuda_reset_device(thr_id, NULL); // force to free the leak
|
||||||
|
mfree = cuda_available_memory(thr_id);
|
||||||
}
|
}
|
||||||
// store used memory per algo
|
// store used memory per algo
|
||||||
algo_mem_used[thr_id][opt_algo] = device_mem_free[thr_id] - mused;
|
algo_mem_used[thr_id][opt_algo] = device_mem_free[thr_id] - mused;
|
||||||
|
59
ccminer.cpp
59
ccminer.cpp
@ -108,7 +108,7 @@ int opt_timeout = 300; // curl
|
|||||||
int opt_scantime = 10;
|
int opt_scantime = 10;
|
||||||
static json_t *opt_config;
|
static json_t *opt_config;
|
||||||
static const bool opt_time = true;
|
static const bool opt_time = true;
|
||||||
enum sha_algos opt_algo = ALGO_X11;
|
volatile enum sha_algos opt_algo = ALGO_AUTO;
|
||||||
int opt_n_threads = 0;
|
int opt_n_threads = 0;
|
||||||
int gpu_threads = 1;
|
int gpu_threads = 1;
|
||||||
int64_t opt_affinity = -1L;
|
int64_t opt_affinity = -1L;
|
||||||
@ -1435,49 +1435,6 @@ static bool wanna_mine(int thr_id)
|
|||||||
return state;
|
return state;
|
||||||
}
|
}
|
||||||
|
|
||||||
// required to switch algos
|
|
||||||
void miner_free_device(int thr_id)
|
|
||||||
{
|
|
||||||
// todo: some kind of algo "registration"
|
|
||||||
// to call automatically if needed
|
|
||||||
free_blake256(thr_id);
|
|
||||||
free_bmw(thr_id);
|
|
||||||
free_c11(thr_id);
|
|
||||||
free_deep(thr_id);
|
|
||||||
free_keccak256(thr_id);
|
|
||||||
free_fresh(thr_id);
|
|
||||||
free_fugue256(thr_id);
|
|
||||||
free_groestlcoin(thr_id);
|
|
||||||
free_heavy(thr_id);
|
|
||||||
free_jackpot(thr_id);
|
|
||||||
free_luffa(thr_id);
|
|
||||||
free_lyra2(thr_id);
|
|
||||||
free_lyra2v2(thr_id);
|
|
||||||
free_myriad(thr_id);
|
|
||||||
free_neoscrypt(thr_id);
|
|
||||||
free_nist5(thr_id);
|
|
||||||
free_pentablake(thr_id);
|
|
||||||
free_quark(thr_id);
|
|
||||||
free_qubit(thr_id);
|
|
||||||
free_skeincoin(thr_id);
|
|
||||||
free_skein2(thr_id);
|
|
||||||
free_s3(thr_id);
|
|
||||||
free_whirl(thr_id);
|
|
||||||
free_whirlx(thr_id);
|
|
||||||
free_x11(thr_id);
|
|
||||||
free_x13(thr_id);
|
|
||||||
free_x14(thr_id);
|
|
||||||
free_x15(thr_id);
|
|
||||||
free_x17(thr_id);
|
|
||||||
free_zr5(thr_id);
|
|
||||||
//free_sha256d(thr_id);
|
|
||||||
free_scrypt(thr_id);
|
|
||||||
free_scrypt_jane(thr_id);
|
|
||||||
|
|
||||||
// reset remains of error..
|
|
||||||
cudaGetLastError();
|
|
||||||
}
|
|
||||||
|
|
||||||
static void *miner_thread(void *userdata)
|
static void *miner_thread(void *userdata)
|
||||||
{
|
{
|
||||||
struct thr_info *mythr = (struct thr_info *)userdata;
|
struct thr_info *mythr = (struct thr_info *)userdata;
|
||||||
@ -1634,7 +1591,7 @@ static void *miner_thread(void *userdata)
|
|||||||
|
|
||||||
pthread_mutex_unlock(&g_work_lock);
|
pthread_mutex_unlock(&g_work_lock);
|
||||||
|
|
||||||
// --benchmark [-a auto]
|
// --benchmark [-a all]
|
||||||
if (opt_benchmark && bench_algo >= 0) {
|
if (opt_benchmark && bench_algo >= 0) {
|
||||||
//gpulog(LOG_DEBUG, thr_id, "loop %d", loopcnt);
|
//gpulog(LOG_DEBUG, thr_id, "loop %d", loopcnt);
|
||||||
if (loopcnt >= 3) {
|
if (loopcnt >= 3) {
|
||||||
@ -1756,6 +1713,7 @@ static void *miner_thread(void *userdata)
|
|||||||
break;
|
break;
|
||||||
case ALGO_KECCAK:
|
case ALGO_KECCAK:
|
||||||
case ALGO_JACKPOT:
|
case ALGO_JACKPOT:
|
||||||
|
case ALGO_X14:
|
||||||
case ALGO_X15:
|
case ALGO_X15:
|
||||||
minmax = 0x300000;
|
minmax = 0x300000;
|
||||||
break;
|
break;
|
||||||
@ -1800,8 +1758,10 @@ static void *miner_thread(void *userdata)
|
|||||||
hashes_done = 0;
|
hashes_done = 0;
|
||||||
gettimeofday(&tv_start, NULL);
|
gettimeofday(&tv_start, NULL);
|
||||||
|
|
||||||
|
// check (and reset) previous errors
|
||||||
cudaGetLastError(); // reset previous errors
|
cudaError_t err = cudaGetLastError();
|
||||||
|
if (err != cudaSuccess && !opt_quiet)
|
||||||
|
gpulog(LOG_WARNING, thr_id, "%s", cudaGetErrorString(err));
|
||||||
|
|
||||||
/* scan nonces for a proof-of-work hash */
|
/* scan nonces for a proof-of-work hash */
|
||||||
switch (opt_algo) {
|
switch (opt_algo) {
|
||||||
@ -1904,6 +1864,7 @@ static void *miner_thread(void *userdata)
|
|||||||
break;
|
break;
|
||||||
case ALGO_X14:
|
case ALGO_X14:
|
||||||
rc = scanhash_x14(thr_id, &work, max_nonce, &hashes_done);
|
rc = scanhash_x14(thr_id, &work, max_nonce, &hashes_done);
|
||||||
|
break;
|
||||||
case ALGO_X15:
|
case ALGO_X15:
|
||||||
rc = scanhash_x15(thr_id, &work, max_nonce, &hashes_done);
|
rc = scanhash_x15(thr_id, &work, max_nonce, &hashes_done);
|
||||||
break;
|
break;
|
||||||
@ -2388,7 +2349,9 @@ void parse_arg(int key, char *arg)
|
|||||||
}
|
}
|
||||||
if (i == ALGO_COUNT) {
|
if (i == ALGO_COUNT) {
|
||||||
// some aliases...
|
// some aliases...
|
||||||
if (!strcasecmp("flax", arg))
|
if (!strcasecmp("all", arg))
|
||||||
|
i = opt_algo = ALGO_AUTO;
|
||||||
|
else if (!strcasecmp("flax", arg))
|
||||||
i = opt_algo = ALGO_C11;
|
i = opt_algo = ALGO_C11;
|
||||||
else if (!strcasecmp("diamond", arg))
|
else if (!strcasecmp("diamond", arg))
|
||||||
i = opt_algo = ALGO_DMD_GR;
|
i = opt_algo = ALGO_DMD_GR;
|
||||||
|
11
cuda.cpp
11
cuda.cpp
@ -156,8 +156,7 @@ uint32_t cuda_default_throughput(int thr_id, uint32_t defcount)
|
|||||||
uint32_t throughput = gpus_intensity[thr_id] ? gpus_intensity[thr_id] : defcount;
|
uint32_t throughput = gpus_intensity[thr_id] ? gpus_intensity[thr_id] : defcount;
|
||||||
if (gpu_threads > 1 && throughput == defcount) throughput /= (gpu_threads-1);
|
if (gpu_threads > 1 && throughput == defcount) throughput /= (gpu_threads-1);
|
||||||
api_set_throughput(thr_id, throughput);
|
api_set_throughput(thr_id, throughput);
|
||||||
bench_set_throughput(thr_id, throughput);
|
//gpulog(LOG_INFO, thr_id, "throughput %u", throughput);
|
||||||
//if (opt_debug) applog(LOG_DEBUG, "GPU %d-%d: throughput %u", dev_id, thr_id, throughput);
|
|
||||||
return throughput;
|
return throughput;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -196,6 +195,14 @@ int cuda_available_memory(int thr_id)
|
|||||||
return (int) (mfree / (1024 * 1024));
|
return (int) (mfree / (1024 * 1024));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check (and reset) last cuda error, and report it in logs
|
||||||
|
void cuda_log_lasterror(int thr_id, const char* func, int line)
|
||||||
|
{
|
||||||
|
cudaError_t err = cudaGetLastError();
|
||||||
|
if (err != cudaSuccess && !opt_quiet)
|
||||||
|
gpulog(LOG_WARNING, thr_id, "%s:%d %s", func, line, cudaGetErrorString(err));
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} /* extern "C" */
|
} /* extern "C" */
|
||||||
#endif
|
#endif
|
||||||
|
@ -147,6 +147,7 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc
|
|||||||
{
|
{
|
||||||
int res = 1;
|
int res = 1;
|
||||||
work_set_target_ratio(work, vhash64);
|
work_set_target_ratio(work, vhash64);
|
||||||
|
pdata[19] = foundNonces[0];
|
||||||
// check if there was another one...
|
// check if there was another one...
|
||||||
if (foundNonces[1] != 0)
|
if (foundNonces[1] != 0)
|
||||||
{
|
{
|
||||||
@ -155,9 +156,9 @@ extern "C" int scanhash_lyra2v2(int thr_id, struct work* work, uint32_t max_nonc
|
|||||||
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio)
|
if (bn_hash_target_ratio(vhash64, ptarget) > work->shareratio)
|
||||||
work_set_target_ratio(work, vhash64);
|
work_set_target_ratio(work, vhash64);
|
||||||
pdata[21] = foundNonces[1];
|
pdata[21] = foundNonces[1];
|
||||||
|
//xchg(pdata[19], pdata[21]);
|
||||||
res++;
|
res++;
|
||||||
}
|
}
|
||||||
pdata[19] = foundNonces[0];
|
|
||||||
MyStreamSynchronize(NULL, 0, device_map[thr_id]);
|
MyStreamSynchronize(NULL, 0, device_map[thr_id]);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
5
miner.h
5
miner.h
@ -493,6 +493,9 @@ int cuda_available_memory(int thr_id);
|
|||||||
uint32_t cuda_default_throughput(int thr_id, uint32_t defcount);
|
uint32_t cuda_default_throughput(int thr_id, uint32_t defcount);
|
||||||
#define device_intensity(t,f,d) cuda_default_throughput(t,d)
|
#define device_intensity(t,f,d) cuda_default_throughput(t,d)
|
||||||
|
|
||||||
|
void cuda_log_lasterror(int thr_id, const char* func, int line);
|
||||||
|
#define CUDA_LOG_ERROR() cuda_log_lasterror(thr_id, __func__, __LINE__)
|
||||||
|
|
||||||
#define CL_N "\x1B[0m"
|
#define CL_N "\x1B[0m"
|
||||||
#define CL_RED "\x1B[31m"
|
#define CL_RED "\x1B[31m"
|
||||||
#define CL_GRN "\x1B[32m"
|
#define CL_GRN "\x1B[32m"
|
||||||
@ -553,7 +556,7 @@ void bench_free();
|
|||||||
bool bench_algo_switch_next(int thr_id);
|
bool bench_algo_switch_next(int thr_id);
|
||||||
void bench_set_throughput(int thr_id, uint32_t throughput);
|
void bench_set_throughput(int thr_id, uint32_t throughput);
|
||||||
void bench_display_results();
|
void bench_display_results();
|
||||||
|
void algo_free_all(int thr_id);
|
||||||
|
|
||||||
struct stratum_job {
|
struct stratum_job {
|
||||||
char *job_id;
|
char *job_id;
|
||||||
|
@ -20,6 +20,8 @@ int scanhash_neoscrypt(int thr_id, struct work* work, uint32_t max_nonce, unsign
|
|||||||
int intensity = is_windows() ? 18 : 19;
|
int intensity = is_windows() ? 18 : 19;
|
||||||
uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity);
|
uint32_t throughput = cuda_default_throughput(thr_id, 1U << intensity);
|
||||||
throughput = throughput / 32; /* set for max intensity ~= 20 */
|
throughput = throughput / 32; /* set for max intensity ~= 20 */
|
||||||
|
api_set_throughput(thr_id, throughput);
|
||||||
|
|
||||||
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce + 1);
|
if (init[thr_id]) throughput = min(throughput, max_nonce - first_nonce + 1);
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
|
@ -48,7 +48,7 @@ extern "C" int scanhash_luffa(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
if (opt_cudaschedule == -1) // to reduce cpu usage...
|
if (opt_cudaschedule == -1) // to reduce cpu usage...
|
||||||
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
|
cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
|
||||||
|
|
||||||
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], throughput * 64));
|
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput));
|
||||||
|
|
||||||
qubit_luffa512_cpu_init(thr_id, throughput);
|
qubit_luffa512_cpu_init(thr_id, throughput);
|
||||||
cuda_check_cpu_init(thr_id, throughput);
|
cuda_check_cpu_init(thr_id, throughput);
|
||||||
@ -103,12 +103,12 @@ extern "C" void free_luffa(int thr_id)
|
|||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
return;
|
return;
|
||||||
|
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaDeviceSynchronize();
|
||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
cudaFree(d_hash[thr_id]);
|
||||||
|
|
||||||
cuda_check_cpu_free(thr_id);
|
cuda_check_cpu_free(thr_id);
|
||||||
init[thr_id] = false;
|
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
cudaThreadSynchronize();
|
||||||
|
init[thr_id] = false;
|
||||||
}
|
}
|
@ -395,6 +395,9 @@ unsigned char GetNfactor(unsigned int nTimestamp)
|
|||||||
} else if (!strcmp(jane_params, "RAD") || !strcasecmp(jane_params, "RadioactiveCoin")) {
|
} else if (!strcmp(jane_params, "RAD") || !strcasecmp(jane_params, "RadioactiveCoin")) {
|
||||||
// InternetCoin:1389196388, minN: 4, maxN: 30
|
// InternetCoin:1389196388, minN: 4, maxN: 30
|
||||||
Ntimestamp = 1389196388; minN= 4; maxN= 30;
|
Ntimestamp = 1389196388; minN= 4; maxN= 30;
|
||||||
|
} else if (!strcmp(jane_params, "LEO") || !strcasecmp(jane_params, "LEOCoin")) {
|
||||||
|
// LEOCoin:1402845776, minN: 4, maxN: 30
|
||||||
|
Ntimestamp = 1402845776; minN= 4; maxN= 30;
|
||||||
} else {
|
} else {
|
||||||
if (sscanf(jane_params, "%u,%u,%u", &Ntimestamp, &minN, &maxN) != 3)
|
if (sscanf(jane_params, "%u,%u,%u", &Ntimestamp, &minN, &maxN) != 3)
|
||||||
if (sscanf(jane_params, "%u", &Nfactor) == 1) return Nfactor; // skip bounding against minN, maxN
|
if (sscanf(jane_params, "%u", &Nfactor) == 1) return Nfactor; // skip bounding against minN, maxN
|
||||||
|
10
x13/x13.cu
10
x13/x13.cu
@ -208,6 +208,8 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u
|
|||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + throughput;
|
*hashes_done = pdata[19] - first_nonce + throughput;
|
||||||
|
|
||||||
|
CUDA_LOG_ERROR();
|
||||||
|
|
||||||
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
|
foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
|
||||||
if (foundNonce != UINT32_MAX)
|
if (foundNonce != UINT32_MAX)
|
||||||
{
|
{
|
||||||
@ -245,6 +247,9 @@ extern "C" int scanhash_x13(int thr_id, struct work* work, uint32_t max_nonce, u
|
|||||||
} while (!work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = pdata[19] - first_nonce + 1;
|
||||||
|
|
||||||
|
CUDA_LOG_ERROR();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -254,7 +259,7 @@ extern "C" void free_x13(int thr_id)
|
|||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
return;
|
return;
|
||||||
|
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaThreadSynchronize();
|
||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
cudaFree(d_hash[thr_id]);
|
||||||
|
|
||||||
@ -263,7 +268,8 @@ extern "C" void free_x13(int thr_id)
|
|||||||
x13_fugue512_cpu_free(thr_id);
|
x13_fugue512_cpu_free(thr_id);
|
||||||
|
|
||||||
cuda_check_cpu_free(thr_id);
|
cuda_check_cpu_free(thr_id);
|
||||||
init[thr_id] = false;
|
CUDA_LOG_ERROR();
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
init[thr_id] = false;
|
||||||
}
|
}
|
@ -14,8 +14,8 @@
|
|||||||
__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
|
__constant__ uint64_t c_PaddedMessage80[16]; // padded message (80 bytes + padding)
|
||||||
__constant__ uint32_t pTarget[8];
|
__constant__ uint32_t pTarget[8];
|
||||||
|
|
||||||
static uint32_t *d_wnounce[MAX_GPUS];
|
static uint32_t *h_wnounce[MAX_GPUS] = { 0 };
|
||||||
static uint32_t *d_WNonce[MAX_GPUS];
|
static uint32_t *d_WNonce[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
#define USE_ALL_TABLES 1
|
#define USE_ALL_TABLES 1
|
||||||
|
|
||||||
@ -2575,14 +2575,14 @@ extern void x15_whirlpool_cpu_init(int thr_id, uint32_t threads, int mode)
|
|||||||
}
|
}
|
||||||
|
|
||||||
cudaMalloc(&d_WNonce[thr_id], sizeof(uint32_t));
|
cudaMalloc(&d_WNonce[thr_id], sizeof(uint32_t));
|
||||||
cudaMallocHost(&d_wnounce[thr_id], sizeof(uint32_t));
|
cudaMallocHost(&h_wnounce[thr_id], sizeof(uint32_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__
|
__host__
|
||||||
extern void x15_whirlpool_cpu_free(int thr_id)
|
extern void x15_whirlpool_cpu_free(int thr_id)
|
||||||
{
|
{
|
||||||
cudaFree(d_WNonce[thr_id]);
|
cudaFree(d_WNonce[thr_id]);
|
||||||
cudaFreeHost(d_wnounce[thr_id]);
|
cudaFreeHost(h_wnounce[thr_id]);
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__
|
__host__
|
||||||
@ -2613,9 +2613,9 @@ extern uint32_t whirlpool512_cpu_finalhash_64(int thr_id, uint32_t threads, uint
|
|||||||
oldwhirlpool_gpu_finalhash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector,d_WNonce[thr_id]);
|
oldwhirlpool_gpu_finalhash_64<<<grid, block, shared_size>>>(threads, startNounce, (uint64_t*)d_hash, d_nonceVector,d_WNonce[thr_id]);
|
||||||
|
|
||||||
MyStreamSynchronize(NULL, order, thr_id);
|
MyStreamSynchronize(NULL, order, thr_id);
|
||||||
cudaMemcpy(d_wnounce[thr_id], d_WNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
cudaMemcpy(h_wnounce[thr_id], d_WNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
||||||
|
|
||||||
result = *d_wnounce[thr_id];
|
result = *h_wnounce[thr_id];
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
21
x15/x14.cu
21
x15/x14.cu
@ -27,7 +27,7 @@ extern "C" {
|
|||||||
#include "cuda_helper.h"
|
#include "cuda_helper.h"
|
||||||
|
|
||||||
// Memory for the hash functions
|
// Memory for the hash functions
|
||||||
static uint32_t *d_hash[MAX_GPUS];
|
static uint32_t *d_hash[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
extern void quark_blake512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_blake512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_blake512_cpu_setBlock_80(int thr_id, uint32_t *pdata);
|
extern void quark_blake512_cpu_setBlock_80(int thr_id, uint32_t *pdata);
|
||||||
@ -190,7 +190,9 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
|
|
||||||
cuda_check_cpu_init(thr_id, throughput);
|
cuda_check_cpu_init(thr_id, throughput);
|
||||||
|
|
||||||
CUDA_CALL_OR_RET_X(cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput), 0);
|
cudaMalloc(&d_hash[thr_id], (size_t) 64 * throughput);
|
||||||
|
|
||||||
|
CUDA_LOG_ERROR();
|
||||||
|
|
||||||
init[thr_id] = true;
|
init[thr_id] = true;
|
||||||
}
|
}
|
||||||
@ -217,9 +219,12 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
x13_fugue512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
x14_shabal512_cpu_hash_64(thr_id, throughput, pdata[19], NULL, d_hash[thr_id], order++);
|
||||||
|
|
||||||
|
CUDA_LOG_ERROR();
|
||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + throughput;
|
*hashes_done = pdata[19] - first_nonce + throughput;
|
||||||
|
|
||||||
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
|
uint32_t foundNonce = cuda_check_hash(thr_id, throughput, pdata[19], d_hash[thr_id]);
|
||||||
|
|
||||||
if (foundNonce != UINT32_MAX)
|
if (foundNonce != UINT32_MAX)
|
||||||
{
|
{
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
@ -250,6 +255,8 @@ extern "C" int scanhash_x14(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
|
|
||||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
||||||
|
|
||||||
|
CUDA_LOG_ERROR();
|
||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = pdata[19] - first_nonce + 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -260,17 +267,17 @@ extern "C" void free_x14(int thr_id)
|
|||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
return;
|
return;
|
||||||
|
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaThreadSynchronize();
|
||||||
cudaDeviceSynchronize();
|
|
||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
|
||||||
|
|
||||||
quark_groestl512_cpu_free(thr_id);
|
quark_groestl512_cpu_free(thr_id);
|
||||||
x11_simd512_cpu_free(thr_id);
|
x11_simd512_cpu_free(thr_id);
|
||||||
x13_fugue512_cpu_free(thr_id);
|
x13_fugue512_cpu_free(thr_id);
|
||||||
|
|
||||||
|
cudaFree(d_hash[thr_id]);
|
||||||
|
d_hash[thr_id] = NULL;
|
||||||
|
|
||||||
cuda_check_cpu_free(thr_id);
|
cuda_check_cpu_free(thr_id);
|
||||||
init[thr_id] = false;
|
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
init[thr_id] = false;
|
||||||
}
|
}
|
||||||
|
@ -28,7 +28,7 @@ extern "C" {
|
|||||||
#include "cuda_helper.h"
|
#include "cuda_helper.h"
|
||||||
|
|
||||||
// Memory for the hash functions
|
// Memory for the hash functions
|
||||||
static uint32_t *d_hash[MAX_GPUS];
|
static uint32_t *d_hash[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
extern void quark_blake512_cpu_init(int thr_id, uint32_t threads);
|
extern void quark_blake512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void quark_blake512_cpu_setBlock_80(int thr_id, uint32_t *pdata);
|
extern void quark_blake512_cpu_setBlock_80(int thr_id, uint32_t *pdata);
|
||||||
@ -264,7 +264,6 @@ extern "C" int scanhash_x15(int thr_id, struct work* work, uint32_t max_nonce,
|
|||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
*hashes_done = pdata[19] - first_nonce + 1;
|
||||||
|
|
||||||
x15_whirlpool_cpu_free(thr_id);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -274,16 +273,17 @@ extern "C" void free_x15(int thr_id)
|
|||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
return;
|
return;
|
||||||
|
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaThreadSynchronize();
|
||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
cudaFree(d_hash[thr_id]);
|
||||||
|
|
||||||
quark_groestl512_cpu_free(thr_id);
|
quark_groestl512_cpu_free(thr_id);
|
||||||
x11_simd512_cpu_free(thr_id);
|
x11_simd512_cpu_free(thr_id);
|
||||||
x13_fugue512_cpu_free(thr_id);
|
x13_fugue512_cpu_free(thr_id);
|
||||||
|
x15_whirlpool_cpu_free(thr_id);
|
||||||
|
|
||||||
cuda_check_cpu_free(thr_id);
|
cuda_check_cpu_free(thr_id);
|
||||||
init[thr_id] = false;
|
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
init[thr_id] = false;
|
||||||
}
|
}
|
||||||
|
@ -78,6 +78,7 @@ extern void x14_shabal512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t sta
|
|||||||
|
|
||||||
extern void x15_whirlpool_cpu_init(int thr_id, uint32_t threads, int flag);
|
extern void x15_whirlpool_cpu_init(int thr_id, uint32_t threads, int flag);
|
||||||
extern void x15_whirlpool_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x15_whirlpool_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
|
extern void x15_whirlpool_cpu_free(int thr_id);
|
||||||
|
|
||||||
extern void x17_sha512_cpu_init(int thr_id, uint32_t threads);
|
extern void x17_sha512_cpu_init(int thr_id, uint32_t threads);
|
||||||
extern void x17_sha512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
extern void x17_sha512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order);
|
||||||
@ -296,16 +297,17 @@ extern "C" void free_x17(int thr_id)
|
|||||||
if (!init[thr_id])
|
if (!init[thr_id])
|
||||||
return;
|
return;
|
||||||
|
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaThreadSynchronize();
|
||||||
|
|
||||||
cudaFree(d_hash[thr_id]);
|
cudaFree(d_hash[thr_id]);
|
||||||
|
|
||||||
quark_groestl512_cpu_free(thr_id);
|
quark_groestl512_cpu_free(thr_id);
|
||||||
x11_simd512_cpu_free(thr_id);
|
x11_simd512_cpu_free(thr_id);
|
||||||
x13_fugue512_cpu_free(thr_id);
|
x13_fugue512_cpu_free(thr_id);
|
||||||
|
x15_whirlpool_cpu_free(thr_id);
|
||||||
|
|
||||||
cuda_check_cpu_free(thr_id);
|
cuda_check_cpu_free(thr_id);
|
||||||
init[thr_id] = false;
|
|
||||||
|
|
||||||
cudaDeviceSynchronize();
|
cudaDeviceSynchronize();
|
||||||
|
init[thr_id] = false;
|
||||||
}
|
}
|
Loading…
x
Reference in New Issue
Block a user