Various algos cleanup + lyra2 sec nonce fix
This commit is contained in:
parent
34fd408440
commit
03c3b7d341
@ -9,7 +9,7 @@
|
|||||||
#define USE_SHARED 1
|
#define USE_SHARED 1
|
||||||
|
|
||||||
uint32_t *d_fugue256_hashoutput[MAX_GPUS];
|
uint32_t *d_fugue256_hashoutput[MAX_GPUS];
|
||||||
uint32_t *d_resultNonce[MAX_GPUS];
|
static uint32_t *d_resultNonce[MAX_GPUS];
|
||||||
|
|
||||||
__constant__ uint32_t GPUstate[30]; // Single GPU
|
__constant__ uint32_t GPUstate[30]; // Single GPU
|
||||||
__constant__ uint32_t pTarget[8]; // Single GPU
|
__constant__ uint32_t pTarget[8]; // Single GPU
|
||||||
@ -718,10 +718,9 @@ fugue256_gpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outp
|
|||||||
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); }
|
cudaBindTexture(NULL, &texname, texmem, &channelDesc, texsize ); }
|
||||||
|
|
||||||
|
|
||||||
|
__host__
|
||||||
void fugue256_cpu_init(int thr_id, uint32_t threads)
|
void fugue256_cpu_init(int thr_id, uint32_t threads)
|
||||||
{
|
{
|
||||||
cudaSetDevice(device_map[thr_id]);
|
|
||||||
|
|
||||||
// Kopiere die Hash-Tabellen in den GPU-Speicher
|
// Kopiere die Hash-Tabellen in den GPU-Speicher
|
||||||
texDef(mixTab0Tex, mixTab0m, mixtab0_cpu, sizeof(uint32_t)*256);
|
texDef(mixTab0Tex, mixTab0m, mixtab0_cpu, sizeof(uint32_t)*256);
|
||||||
texDef(mixTab1Tex, mixTab1m, mixtab1_cpu, sizeof(uint32_t)*256);
|
texDef(mixTab1Tex, mixTab1m, mixtab1_cpu, sizeof(uint32_t)*256);
|
||||||
@ -733,25 +732,23 @@ void fugue256_cpu_init(int thr_id, uint32_t threads)
|
|||||||
cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
|
cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ void fugue256_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
|
__host__
|
||||||
|
void fugue256_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
|
||||||
{
|
{
|
||||||
// CPU-Vorbereitungen treffen
|
// CPU-Vorbereitungen treffen
|
||||||
sph_fugue256_context ctx_fugue_const;
|
sph_fugue256_context ctx_fugue_const;
|
||||||
sph_fugue256_init(&ctx_fugue_const);
|
sph_fugue256_init(&ctx_fugue_const);
|
||||||
sph_fugue256 (&ctx_fugue_const, data, 80); // State speichern
|
sph_fugue256 (&ctx_fugue_const, data, 80); // State speichern
|
||||||
|
|
||||||
cudaMemcpyToSymbol( GPUstate,
|
cudaMemcpyToSymbol(GPUstate, ctx_fugue_const.S, sizeof(uint32_t) * 30);
|
||||||
ctx_fugue_const.S,
|
|
||||||
sizeof(uint32_t) * 30 );
|
|
||||||
|
|
||||||
cudaMemcpyToSymbol( pTarget,
|
cudaMemcpyToSymbol(pTarget, pTargetIn, sizeof(uint32_t) * 8);
|
||||||
pTargetIn,
|
|
||||||
sizeof(uint32_t) * 8 );
|
|
||||||
|
|
||||||
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
|
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ void fugue256_cpu_hash(int thr_id, uint32_t threads, int startNounce, void *outputHashes, uint32_t *nounce)
|
__host__
|
||||||
|
void fugue256_cpu_hash(int thr_id, uint32_t threads, int startNounce, void *outputHashes, uint32_t *nounce)
|
||||||
{
|
{
|
||||||
#if USE_SHARED
|
#if USE_SHARED
|
||||||
const uint32_t threadsperblock = 256; // Alignment mit mixtab Grösse. NICHT ÄNDERN
|
const uint32_t threadsperblock = 256; // Alignment mit mixtab Grösse. NICHT ÄNDERN
|
||||||
|
@ -280,8 +280,8 @@ void groestl256_cpu_init(int thr_id, uint32_t threads)
|
|||||||
__host__
|
__host__
|
||||||
uint32_t groestl256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_outputHash, int order)
|
uint32_t groestl256_cpu_hash_32(int thr_id, uint32_t threads, uint32_t startNounce, uint64_t *d_outputHash, int order)
|
||||||
{
|
{
|
||||||
uint32_t result = 0xffffffff;
|
uint32_t result = UINT32_MAX;
|
||||||
cudaMemset(d_GNonces[thr_id], 0xff, sizeof(uint32_t));
|
cudaMemset(d_GNonces[thr_id], 0xff, 2*sizeof(uint32_t));
|
||||||
const uint32_t threadsperblock = 256;
|
const uint32_t threadsperblock = 256;
|
||||||
|
|
||||||
// berechne wie viele Thread Blocks wir brauchen
|
// berechne wie viele Thread Blocks wir brauchen
|
||||||
@ -308,7 +308,10 @@ __host__
|
|||||||
uint32_t groestl256_getSecNonce(int thr_id, int num)
|
uint32_t groestl256_getSecNonce(int thr_id, int num)
|
||||||
{
|
{
|
||||||
uint32_t results[2];
|
uint32_t results[2];
|
||||||
|
memset(results, 0xFF, sizeof(results));
|
||||||
cudaMemcpy(results, d_GNonces[thr_id], sizeof(results), cudaMemcpyDeviceToHost);
|
cudaMemcpy(results, d_GNonces[thr_id], sizeof(results), cudaMemcpyDeviceToHost);
|
||||||
|
if (results[1] == results[0])
|
||||||
|
return UINT32_MAX;
|
||||||
return results[num];
|
return results[num];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -23,10 +23,9 @@ extern uint32_t keccak256_cpu_hash_80(int thr_id, uint32_t threads, uint32_t sta
|
|||||||
// CPU Hash
|
// CPU Hash
|
||||||
extern "C" void keccak256_hash(void *state, const void *input)
|
extern "C" void keccak256_hash(void *state, const void *input)
|
||||||
{
|
{
|
||||||
|
uint32_t _ALIGN(64) hash[16];
|
||||||
sph_keccak_context ctx_keccak;
|
sph_keccak_context ctx_keccak;
|
||||||
|
|
||||||
uint32_t hash[16];
|
|
||||||
|
|
||||||
sph_keccak256_init(&ctx_keccak);
|
sph_keccak256_init(&ctx_keccak);
|
||||||
sph_keccak256 (&ctx_keccak, input, 80);
|
sph_keccak256 (&ctx_keccak, input, 80);
|
||||||
sph_keccak256_close(&ctx_keccak, (void*) hash);
|
sph_keccak256_close(&ctx_keccak, (void*) hash);
|
||||||
@ -50,8 +49,8 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
|
|||||||
if (!init[thr_id]) {
|
if (!init[thr_id]) {
|
||||||
cudaSetDevice(device_map[thr_id]);
|
cudaSetDevice(device_map[thr_id]);
|
||||||
|
|
||||||
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
|
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], throughput * 64));
|
||||||
keccak256_cpu_init(thr_id, (int) throughput);
|
keccak256_cpu_init(thr_id, throughput);
|
||||||
|
|
||||||
init[thr_id] = true;
|
init[thr_id] = true;
|
||||||
}
|
}
|
||||||
@ -65,16 +64,16 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
|
|||||||
do {
|
do {
|
||||||
int order = 0;
|
int order = 0;
|
||||||
|
|
||||||
uint32_t foundNonce = keccak256_cpu_hash_80(thr_id, (int) throughput, pdata[19], d_hash[thr_id], order++);
|
*hashes_done = pdata[19] - first_nonce + throughput;
|
||||||
|
|
||||||
|
uint32_t foundNonce = keccak256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
if (foundNonce != UINT32_MAX)
|
if (foundNonce != UINT32_MAX)
|
||||||
{
|
{
|
||||||
uint32_t Htarg = ptarget[7];
|
uint32_t _ALIGN(64) vhash64[8];
|
||||||
uint32_t vhash64[8];
|
|
||||||
be32enc(&endiandata[19], foundNonce);
|
be32enc(&endiandata[19], foundNonce);
|
||||||
keccak256_hash(vhash64, endiandata);
|
keccak256_hash(vhash64, endiandata);
|
||||||
|
|
||||||
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
|
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
|
||||||
*hashes_done = foundNonce - first_nonce + 1;
|
|
||||||
pdata[19] = foundNonce;
|
pdata[19] = foundNonce;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
@ -91,6 +90,5 @@ extern "C" int scanhash_keccak256(int thr_id, uint32_t *pdata,
|
|||||||
|
|
||||||
} while (!work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce;
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -8,10 +8,10 @@
|
|||||||
|
|
||||||
// globaler Speicher für alle HeftyHashes aller Threads
|
// globaler Speicher für alle HeftyHashes aller Threads
|
||||||
__constant__ uint32_t pTarget[8]; // Single GPU
|
__constant__ uint32_t pTarget[8]; // Single GPU
|
||||||
extern uint32_t *d_resultNonce[MAX_GPUS];
|
|
||||||
|
|
||||||
__constant__ uint32_t groestlcoin_gpu_msg[32];
|
__constant__ uint32_t groestlcoin_gpu_msg[32];
|
||||||
|
|
||||||
|
static uint32_t *d_resultNonce[MAX_GPUS];
|
||||||
|
|
||||||
#if __CUDA_ARCH__ >= 300
|
#if __CUDA_ARCH__ >= 300
|
||||||
// 64 Registers Variant for Compute 3.0+
|
// 64 Registers Variant for Compute 3.0+
|
||||||
#include "quark/groestl_functions_quad.h"
|
#include "quark/groestl_functions_quad.h"
|
||||||
@ -30,7 +30,8 @@ void groestlcoin_gpu_hash_quad(uint32_t threads, uint32_t startNounce, uint32_t
|
|||||||
{
|
{
|
||||||
// GROESTL
|
// GROESTL
|
||||||
uint32_t paddedInput[8];
|
uint32_t paddedInput[8];
|
||||||
#pragma unroll 8
|
|
||||||
|
#pragma unroll 8
|
||||||
for(int k=0;k<8;k++) paddedInput[k] = groestlcoin_gpu_msg[4*k+threadIdx.x%4];
|
for(int k=0;k<8;k++) paddedInput[k] = groestlcoin_gpu_msg[4*k+threadIdx.x%4];
|
||||||
|
|
||||||
uint32_t nounce = startNounce + thread;
|
uint32_t nounce = startNounce + thread;
|
||||||
@ -68,7 +69,7 @@ void groestlcoin_gpu_hash_quad(uint32_t threads, uint32_t startNounce, uint32_t
|
|||||||
int i, position = -1;
|
int i, position = -1;
|
||||||
bool rc = true;
|
bool rc = true;
|
||||||
|
|
||||||
#pragma unroll 8
|
#pragma unroll 8
|
||||||
for (i = 7; i >= 0; i--) {
|
for (i = 7; i >= 0; i--) {
|
||||||
if (out_state[i] > pTarget[i]) {
|
if (out_state[i] > pTarget[i]) {
|
||||||
if(position < i) {
|
if(position < i) {
|
||||||
@ -92,16 +93,14 @@ void groestlcoin_gpu_hash_quad(uint32_t threads, uint32_t startNounce, uint32_t
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setup-Funktionen
|
__host__
|
||||||
__host__ void groestlcoin_cpu_init(int thr_id, uint32_t threads)
|
void groestlcoin_cpu_init(int thr_id, uint32_t threads)
|
||||||
{
|
{
|
||||||
cudaSetDevice(device_map[thr_id]);
|
|
||||||
|
|
||||||
// Speicher für Gewinner-Nonce belegen
|
|
||||||
cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
|
cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
|
__host__
|
||||||
|
void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
|
||||||
{
|
{
|
||||||
// Nachricht expandieren und setzen
|
// Nachricht expandieren und setzen
|
||||||
uint32_t msgBlock[32];
|
uint32_t msgBlock[32];
|
||||||
@ -128,7 +127,8 @@ __host__ void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
|
|||||||
sizeof(uint32_t) * 8 );
|
sizeof(uint32_t) * 8 );
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ void groestlcoin_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce)
|
__host__
|
||||||
|
void groestlcoin_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce)
|
||||||
{
|
{
|
||||||
uint32_t threadsperblock = 256;
|
uint32_t threadsperblock = 256;
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
// globaler Speicher für alle HeftyHashes aller Threads
|
// globaler Speicher für alle HeftyHashes aller Threads
|
||||||
__constant__ uint32_t pTarget[8]; // Single GPU
|
__constant__ uint32_t pTarget[8]; // Single GPU
|
||||||
uint32_t *d_outputHashes[MAX_GPUS];
|
uint32_t *d_outputHashes[MAX_GPUS];
|
||||||
extern uint32_t *d_resultNonce[MAX_GPUS];
|
static uint32_t *d_resultNonce[MAX_GPUS];
|
||||||
|
|
||||||
__constant__ uint32_t myriadgroestl_gpu_msg[32];
|
__constant__ uint32_t myriadgroestl_gpu_msg[32];
|
||||||
|
|
||||||
@ -299,11 +299,10 @@ __global__ void
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setup-Funktionen
|
// Setup Function
|
||||||
__host__ void myriadgroestl_cpu_init(int thr_id, uint32_t threads)
|
__host__
|
||||||
|
void myriadgroestl_cpu_init(int thr_id, uint32_t threads)
|
||||||
{
|
{
|
||||||
cudaSetDevice(device_map[thr_id]);
|
|
||||||
|
|
||||||
cudaMemcpyToSymbol( myr_sha256_gpu_hashTable,
|
cudaMemcpyToSymbol( myr_sha256_gpu_hashTable,
|
||||||
myr_sha256_cpu_hashTable,
|
myr_sha256_cpu_hashTable,
|
||||||
sizeof(uint32_t) * 8 );
|
sizeof(uint32_t) * 8 );
|
||||||
@ -328,7 +327,8 @@ __host__ void myriadgroestl_cpu_init(int thr_id, uint32_t threads)
|
|||||||
cudaMalloc(&d_outputHashes[thr_id], 16*sizeof(uint32_t)*threads);
|
cudaMalloc(&d_outputHashes[thr_id], 16*sizeof(uint32_t)*threads);
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ void myriadgroestl_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
|
__host__
|
||||||
|
void myriadgroestl_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
|
||||||
{
|
{
|
||||||
// Nachricht expandieren und setzen
|
// Nachricht expandieren und setzen
|
||||||
uint32_t msgBlock[32];
|
uint32_t msgBlock[32];
|
||||||
@ -355,7 +355,8 @@ __host__ void myriadgroestl_cpu_setBlock(int thr_id, void *data, void *pTargetIn
|
|||||||
sizeof(uint32_t) * 8 );
|
sizeof(uint32_t) * 8 );
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__ void myriadgroestl_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce)
|
__host__
|
||||||
|
void myriadgroestl_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce)
|
||||||
{
|
{
|
||||||
uint32_t threadsperblock = 256;
|
uint32_t threadsperblock = 256;
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
|
||||||
#include "uint256.h"
|
#include "uint256.h"
|
||||||
#include "sph/sph_fugue.h"
|
#include "sph/sph_fugue.h"
|
||||||
@ -22,7 +23,7 @@ extern "C" void my_fugue256_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
|||||||
|
|
||||||
static bool init[MAX_GPUS] = { 0 };
|
static bool init[MAX_GPUS] = { 0 };
|
||||||
|
|
||||||
extern "C" int scanhash_fugue256(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
|
int scanhash_fugue256(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
|
||||||
uint32_t max_nonce, unsigned long *hashes_done)
|
uint32_t max_nonce, unsigned long *hashes_done)
|
||||||
{
|
{
|
||||||
uint32_t start_nonce = pdata[19]++;
|
uint32_t start_nonce = pdata[19]++;
|
||||||
@ -36,6 +37,8 @@ extern "C" int scanhash_fugue256(int thr_id, uint32_t *pdata, const uint32_t *pt
|
|||||||
// init
|
// init
|
||||||
if(!init[thr_id])
|
if(!init[thr_id])
|
||||||
{
|
{
|
||||||
|
cudaSetDevice(device_map[thr_id]);
|
||||||
|
|
||||||
fugue256_cpu_init(thr_id, throughput);
|
fugue256_cpu_init(thr_id, throughput);
|
||||||
init[thr_id] = true;
|
init[thr_id] = true;
|
||||||
}
|
}
|
||||||
@ -50,10 +53,10 @@ extern "C" int scanhash_fugue256(int thr_id, uint32_t *pdata, const uint32_t *pt
|
|||||||
|
|
||||||
do {
|
do {
|
||||||
// GPU
|
// GPU
|
||||||
uint32_t foundNounce = 0xFFFFFFFF;
|
uint32_t foundNounce = UINT32_MAX;
|
||||||
fugue256_cpu_hash(thr_id, throughput, pdata[19], NULL, &foundNounce);
|
fugue256_cpu_hash(thr_id, throughput, pdata[19], NULL, &foundNounce);
|
||||||
|
|
||||||
if(foundNounce < 0xffffffff)
|
if (foundNounce < UINT32_MAX)
|
||||||
{
|
{
|
||||||
uint32_t hash[8];
|
uint32_t hash[8];
|
||||||
const uint32_t Htarg = ptarget[7];
|
const uint32_t Htarg = ptarget[7];
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include <cuda_runtime.h>
|
||||||
#include <openssl/sha.h>
|
#include <openssl/sha.h>
|
||||||
|
|
||||||
#include "uint256.h"
|
#include "uint256.h"
|
||||||
@ -36,11 +37,11 @@ static bool init[MAX_GPUS] = { 0 };
|
|||||||
extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
|
extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
|
||||||
uint32_t max_nonce, unsigned long *hashes_done)
|
uint32_t max_nonce, unsigned long *hashes_done)
|
||||||
{
|
{
|
||||||
uint32_t start_nonce = pdata[19]++;
|
uint32_t start_nonce = pdata[19];
|
||||||
uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); // 256*256*8
|
uint32_t throughput = device_intensity(thr_id, __func__, 1 << 19); // 256*256*8
|
||||||
throughput = min(throughput, max_nonce - start_nonce);
|
throughput = min(throughput, max_nonce - start_nonce);
|
||||||
|
|
||||||
uint32_t *outputHash = (uint32_t*)malloc(throughput * 16 * sizeof(uint32_t));
|
uint32_t *outputHash = (uint32_t*)malloc(throughput * 64);
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = 0x000000ff;
|
((uint32_t*)ptarget)[7] = 0x000000ff;
|
||||||
@ -48,6 +49,7 @@ extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t
|
|||||||
// init
|
// init
|
||||||
if(!init[thr_id])
|
if(!init[thr_id])
|
||||||
{
|
{
|
||||||
|
cudaSetDevice(device_map[thr_id]);
|
||||||
groestlcoin_cpu_init(thr_id, throughput);
|
groestlcoin_cpu_init(thr_id, throughput);
|
||||||
init[thr_id] = true;
|
init[thr_id] = true;
|
||||||
}
|
}
|
||||||
@ -62,27 +64,25 @@ extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t
|
|||||||
|
|
||||||
do {
|
do {
|
||||||
// GPU
|
// GPU
|
||||||
uint32_t foundNounce = 0xFFFFFFFF;
|
uint32_t foundNounce = UINT32_MAX;
|
||||||
const uint32_t Htarg = ptarget[7];
|
|
||||||
|
*hashes_done = pdata[19] - start_nonce + throughput;
|
||||||
|
|
||||||
groestlcoin_cpu_hash(thr_id, throughput, pdata[19], outputHash, &foundNounce);
|
groestlcoin_cpu_hash(thr_id, throughput, pdata[19], outputHash, &foundNounce);
|
||||||
|
|
||||||
if(foundNounce < 0xffffffff)
|
if(foundNounce < UINT32_MAX)
|
||||||
{
|
{
|
||||||
uint32_t tmpHash[8];
|
uint32_t _ALIGN(64) tmpHash[8];
|
||||||
endiandata[19] = SWAP32(foundNounce);
|
endiandata[19] = SWAP32(foundNounce);
|
||||||
groestlhash(tmpHash, endiandata);
|
groestlhash(tmpHash, endiandata);
|
||||||
|
|
||||||
if (tmpHash[7] <= Htarg && fulltest(tmpHash, ptarget)) {
|
if (tmpHash[7] <= ptarget[7] && fulltest(tmpHash, ptarget)) {
|
||||||
pdata[19] = foundNounce;
|
pdata[19] = foundNounce;
|
||||||
*hashes_done = foundNounce - start_nonce + 1;
|
|
||||||
free(outputHash);
|
free(outputHash);
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNounce);
|
applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNounce);
|
||||||
}
|
}
|
||||||
|
|
||||||
foundNounce = 0xffffffff;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pdata[19] + throughput < pdata[19])
|
if (pdata[19] + throughput < pdata[19])
|
||||||
@ -91,7 +91,6 @@ extern "C" int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t
|
|||||||
|
|
||||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = pdata[19] - start_nonce + 1;
|
|
||||||
free(outputHash);
|
free(outputHash);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -79,7 +79,7 @@ extern "C" int scanhash_lyra2(int thr_id, uint32_t *pdata,
|
|||||||
skein256_cpu_init(thr_id, throughput);
|
skein256_cpu_init(thr_id, throughput);
|
||||||
groestl256_cpu_init(thr_id, throughput);
|
groestl256_cpu_init(thr_id, throughput);
|
||||||
|
|
||||||
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], 16 * sizeof(uint32_t) * throughput));
|
CUDA_SAFE_CALL(cudaMalloc(&d_hash[thr_id], throughput * 64));
|
||||||
|
|
||||||
init[thr_id] = true;
|
init[thr_id] = true;
|
||||||
}
|
}
|
||||||
@ -95,23 +95,22 @@ extern "C" int scanhash_lyra2(int thr_id, uint32_t *pdata,
|
|||||||
int order = 0;
|
int order = 0;
|
||||||
uint32_t foundNonce;
|
uint32_t foundNonce;
|
||||||
|
|
||||||
|
*hashes_done = pdata[19] - first_nonce + throughput;
|
||||||
|
|
||||||
blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
blake256_cpu_hash_80(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
keccak256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
keccak256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
lyra2_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
skein256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + throughput;
|
|
||||||
|
|
||||||
foundNonce = groestl256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
foundNonce = groestl256_cpu_hash_32(thr_id, throughput, pdata[19], d_hash[thr_id], order++);
|
||||||
if (foundNonce != UINT32_MAX)
|
if (foundNonce != UINT32_MAX)
|
||||||
{
|
{
|
||||||
uint32_t _ALIGN(64) vhash64[8];
|
uint32_t _ALIGN(64) vhash64[8];
|
||||||
const uint32_t Htarg = ptarget[7];
|
|
||||||
|
|
||||||
be32enc(&endiandata[19], foundNonce);
|
be32enc(&endiandata[19], foundNonce);
|
||||||
lyra2_hash(vhash64, endiandata);
|
lyra2_hash(vhash64, endiandata);
|
||||||
|
|
||||||
if (vhash64[7] <= Htarg && fulltest(vhash64, ptarget)) {
|
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) {
|
||||||
int res = 1;
|
int res = 1;
|
||||||
uint32_t secNonce = groestl256_getSecNonce(thr_id, 1);
|
uint32_t secNonce = groestl256_getSecNonce(thr_id, 1);
|
||||||
if (secNonce != UINT32_MAX)
|
if (secNonce != UINT32_MAX)
|
||||||
@ -136,6 +135,5 @@ extern "C" int scanhash_lyra2(int thr_id, uint32_t *pdata,
|
|||||||
|
|
||||||
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
} while (pdata[19] < max_nonce && !work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = pdata[19] - first_nonce + 1;
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include <cuda_runtime.h>
|
||||||
#include <openssl/sha.h>
|
#include <openssl/sha.h>
|
||||||
|
|
||||||
#include "uint256.h"
|
#include "uint256.h"
|
||||||
@ -41,7 +42,7 @@ extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptar
|
|||||||
uint32_t throughput = device_intensity(thr_id, __func__, 1 << 17);
|
uint32_t throughput = device_intensity(thr_id, __func__, 1 << 17);
|
||||||
throughput = min(throughput, max_nonce - start_nonce);
|
throughput = min(throughput, max_nonce - start_nonce);
|
||||||
|
|
||||||
uint32_t *outputHash = (uint32_t*)malloc(throughput * 16 * sizeof(uint32_t));
|
uint32_t *outputHash = (uint32_t*)malloc(throughput * 64);
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
((uint32_t*)ptarget)[7] = 0x0000ff;
|
((uint32_t*)ptarget)[7] = 0x0000ff;
|
||||||
@ -49,14 +50,13 @@ extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptar
|
|||||||
// init
|
// init
|
||||||
if(!init[thr_id])
|
if(!init[thr_id])
|
||||||
{
|
{
|
||||||
#if BIG_DEBUG
|
cudaSetDevice(device_map[thr_id]);
|
||||||
#else
|
|
||||||
myriadgroestl_cpu_init(thr_id, throughput);
|
myriadgroestl_cpu_init(thr_id, throughput);
|
||||||
#endif
|
|
||||||
init[thr_id] = true;
|
init[thr_id] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t endiandata[32];
|
uint32_t _ALIGN(64) endiandata[32];
|
||||||
for (int kk=0; kk < 32; kk++)
|
for (int kk=0; kk < 32; kk++)
|
||||||
be32enc(&endiandata[kk], pdata[kk]);
|
be32enc(&endiandata[kk], pdata[kk]);
|
||||||
|
|
||||||
@ -66,26 +66,23 @@ extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptar
|
|||||||
do {
|
do {
|
||||||
// GPU
|
// GPU
|
||||||
uint32_t foundNounce = UINT32_MAX;
|
uint32_t foundNounce = UINT32_MAX;
|
||||||
const uint32_t Htarg = ptarget[7];
|
|
||||||
|
*hashes_done = pdata[19] - start_nonce + throughput;
|
||||||
|
|
||||||
myriadgroestl_cpu_hash(thr_id, throughput, pdata[19], outputHash, &foundNounce);
|
myriadgroestl_cpu_hash(thr_id, throughput, pdata[19], outputHash, &foundNounce);
|
||||||
|
|
||||||
if (foundNounce < UINT32_MAX)
|
if (foundNounce < UINT32_MAX)
|
||||||
{
|
{
|
||||||
uint32_t tmpHash[8];
|
uint32_t _ALIGN(64) tmpHash[8];
|
||||||
endiandata[19] = SWAP32(foundNounce);
|
endiandata[19] = SWAP32(foundNounce);
|
||||||
myriadhash(tmpHash, endiandata);
|
myriadhash(tmpHash, endiandata);
|
||||||
if (tmpHash[7] <= Htarg &&
|
if (tmpHash[7] <= ptarget[7] && fulltest(tmpHash, ptarget)) {
|
||||||
fulltest(tmpHash, ptarget)) {
|
pdata[19] = foundNounce;
|
||||||
pdata[19] = foundNounce;
|
free(outputHash);
|
||||||
*hashes_done = foundNounce - start_nonce + 1;
|
|
||||||
free(outputHash);
|
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNounce);
|
applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNounce);
|
||||||
}
|
}
|
||||||
|
|
||||||
foundNounce = 0xffffffff;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((uint64_t) pdata[19] + throughput > (uint64_t) max_nonce) {
|
if ((uint64_t) pdata[19] + throughput > (uint64_t) max_nonce) {
|
||||||
@ -96,7 +93,6 @@ extern "C" int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptar
|
|||||||
|
|
||||||
} while (!work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = pdata[19] - start_nonce + 1;
|
|
||||||
free(outputHash);
|
free(outputHash);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user