mirror of
https://github.com/GOSTSec/ccminer
synced 2025-02-02 01:44:32 +00:00
groestl: same, remove useless host alloc
This commit is contained in:
parent
3fe4dda4c1
commit
374174c7c8
@ -5,6 +5,11 @@
|
|||||||
|
|
||||||
#include "cuda_helper.h"
|
#include "cuda_helper.h"
|
||||||
|
|
||||||
|
#ifdef __INTELLISENSE__
|
||||||
|
#define __CUDA_ARCH__ 500
|
||||||
|
#define __byte_perm(x,y,n) x
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "miner.h"
|
#include "miner.h"
|
||||||
|
|
||||||
__constant__ uint32_t pTarget[8]; // Single GPU
|
__constant__ uint32_t pTarget[8]; // Single GPU
|
||||||
@ -85,9 +90,8 @@ void groestlcoin_gpu_hash_quad(uint32_t threads, uint32_t startNounce, uint32_t
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(rc == true)
|
if(rc && resNounce[0] > nounce)
|
||||||
if(resNounce[0] > nounce)
|
resNounce[0] = nounce;
|
||||||
resNounce[0] = nounce;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -111,9 +115,8 @@ void groestlcoin_cpu_free(int thr_id)
|
|||||||
__host__
|
__host__
|
||||||
void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
|
void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
|
||||||
{
|
{
|
||||||
uint32_t msgBlock[32];
|
uint32_t msgBlock[32] = { 0 };
|
||||||
|
|
||||||
memset(msgBlock, 0, sizeof(uint32_t) * 32);
|
|
||||||
memcpy(&msgBlock[0], data, 80);
|
memcpy(&msgBlock[0], data, 80);
|
||||||
|
|
||||||
// Erweitere die Nachricht auf den Nachrichtenblock (padding)
|
// Erweitere die Nachricht auf den Nachrichtenblock (padding)
|
||||||
@ -125,18 +128,14 @@ void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
|
|||||||
// auf der GPU ausgeführt)
|
// auf der GPU ausgeführt)
|
||||||
|
|
||||||
// Blockheader setzen (korrekte Nonce und Hefty Hash fehlen da drin noch)
|
// Blockheader setzen (korrekte Nonce und Hefty Hash fehlen da drin noch)
|
||||||
cudaMemcpyToSymbol( groestlcoin_gpu_msg,
|
cudaMemcpyToSymbol(groestlcoin_gpu_msg, msgBlock, 128);
|
||||||
msgBlock,
|
|
||||||
128);
|
|
||||||
|
|
||||||
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
|
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
|
||||||
cudaMemcpyToSymbol( pTarget,
|
cudaMemcpyToSymbol(pTarget, pTargetIn, 32);
|
||||||
pTargetIn,
|
|
||||||
sizeof(uint32_t) * 8 );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__host__
|
__host__
|
||||||
void groestlcoin_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce)
|
void groestlcoin_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *resNonce)
|
||||||
{
|
{
|
||||||
uint32_t threadsperblock = 256;
|
uint32_t threadsperblock = 256;
|
||||||
|
|
||||||
@ -144,13 +143,10 @@ void groestlcoin_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, vo
|
|||||||
// mit den Quad Funktionen brauchen wir jetzt 4 threads pro Hash, daher Faktor 4 bei der Blockzahl
|
// mit den Quad Funktionen brauchen wir jetzt 4 threads pro Hash, daher Faktor 4 bei der Blockzahl
|
||||||
int factor = 4;
|
int factor = 4;
|
||||||
|
|
||||||
// berechne wie viele Thread Blocks wir brauchen
|
// berechne wie viele Thread Blocks wir brauchen
|
||||||
dim3 grid(factor*((threads + threadsperblock-1)/threadsperblock));
|
dim3 grid(factor*((threads + threadsperblock-1)/threadsperblock));
|
||||||
dim3 block(threadsperblock);
|
dim3 block(threadsperblock);
|
||||||
|
|
||||||
// Größe des dynamischen Shared Memory Bereichs
|
|
||||||
size_t shared_size = 0;
|
|
||||||
|
|
||||||
int dev_id = device_map[thr_id];
|
int dev_id = device_map[thr_id];
|
||||||
if (device_sm[dev_id] < 300 || cuda_arch[dev_id] < 300) {
|
if (device_sm[dev_id] < 300 || cuda_arch[dev_id] < 300) {
|
||||||
gpulog(LOG_ERR, thr_id, "Sorry, This algo is not supported by this GPU arch (SM 3.0 required)");
|
gpulog(LOG_ERR, thr_id, "Sorry, This algo is not supported by this GPU arch (SM 3.0 required)");
|
||||||
@ -158,10 +154,10 @@ void groestlcoin_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, vo
|
|||||||
}
|
}
|
||||||
|
|
||||||
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
|
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
|
||||||
groestlcoin_gpu_hash_quad<<<grid, block, shared_size>>>(threads, startNounce, d_resultNonce[thr_id]);
|
groestlcoin_gpu_hash_quad <<<grid, block>>> (threads, startNounce, d_resultNonce[thr_id]);
|
||||||
|
|
||||||
// Strategisches Sleep Kommando zur Senkung der CPU Last
|
// Strategisches Sleep Kommando zur Senkung der CPU Last
|
||||||
MyStreamSynchronize(NULL, 0, thr_id);
|
// MyStreamSynchronize(NULL, 0, thr_id);
|
||||||
|
|
||||||
cudaMemcpy(nounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
cudaMemcpy(resNonce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,6 @@
|
|||||||
void groestlcoin_cpu_init(int thr_id, uint32_t threads);
|
void groestlcoin_cpu_init(int thr_id, uint32_t threads);
|
||||||
void groestlcoin_cpu_free(int thr_id);
|
void groestlcoin_cpu_free(int thr_id);
|
||||||
void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn);
|
void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn);
|
||||||
void groestlcoin_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce);
|
void groestlcoin_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *resNonce);
|
||||||
|
|
||||||
#endif
|
#endif
|
@ -18,7 +18,7 @@
|
|||||||
|
|
||||||
// globaler Speicher für alle HeftyHashes aller Threads
|
// globaler Speicher für alle HeftyHashes aller Threads
|
||||||
__constant__ uint32_t pTarget[8]; // Single GPU
|
__constant__ uint32_t pTarget[8]; // Single GPU
|
||||||
uint32_t *d_outputHashes[MAX_GPUS];
|
static uint32_t *d_outputHashes[MAX_GPUS];
|
||||||
static uint32_t *d_resultNonce[MAX_GPUS];
|
static uint32_t *d_resultNonce[MAX_GPUS];
|
||||||
|
|
||||||
__constant__ uint32_t myriadgroestl_gpu_msg[32];
|
__constant__ uint32_t myriadgroestl_gpu_msg[32];
|
||||||
@ -225,8 +225,9 @@ __device__ void myriadgroestl_gpu_sha256(uint32_t *message)
|
|||||||
message[k] = SWAB32(hash[k]);
|
message[k] = SWAB32(hash[k]);
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void __launch_bounds__(256, 4)
|
__global__
|
||||||
myriadgroestl_gpu_hash_quad(uint32_t threads, uint32_t startNounce, uint32_t *hashBuffer)
|
__launch_bounds__(256, 4)
|
||||||
|
void myriadgroestl_gpu_hash_quad(uint32_t threads, uint32_t startNounce, uint32_t *hashBuffer)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 300
|
#if __CUDA_ARCH__ >= 300
|
||||||
// durch 4 dividieren, weil jeweils 4 Threads zusammen ein Hash berechnen
|
// durch 4 dividieren, weil jeweils 4 Threads zusammen ein Hash berechnen
|
||||||
@ -263,8 +264,8 @@ __global__ void __launch_bounds__(256, 4)
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void
|
__global__
|
||||||
myriadgroestl_gpu_hash_quad2(uint32_t threads, uint32_t startNounce, uint32_t *resNounce, uint32_t *hashBuffer)
|
void myriadgroestl_gpu_hash_quad2(uint32_t threads, uint32_t startNounce, uint32_t *resNounce, uint32_t *hashBuffer)
|
||||||
{
|
{
|
||||||
#if __CUDA_ARCH__ >= 300
|
#if __CUDA_ARCH__ >= 300
|
||||||
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||||
@ -322,6 +323,9 @@ void myriadgroestl_cpu_init(int thr_id, uint32_t threads)
|
|||||||
myr_sha256_cpu_constantTable,
|
myr_sha256_cpu_constantTable,
|
||||||
sizeof(uint32_t) * 64 );
|
sizeof(uint32_t) * 64 );
|
||||||
|
|
||||||
|
// to check if the binary supports SM3+
|
||||||
|
cuda_get_arch(thr_id);
|
||||||
|
|
||||||
cudaMalloc(&d_outputHashes[thr_id], (size_t) 64 * threads);
|
cudaMalloc(&d_outputHashes[thr_id], (size_t) 64 * threads);
|
||||||
cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
|
cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
|
||||||
}
|
}
|
||||||
@ -379,7 +383,7 @@ void myriadgroestl_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce,
|
|||||||
myriadgroestl_gpu_hash_quad2 <<< grid2, block >>> (threads, startNounce, d_resultNonce[thr_id], d_outputHashes[thr_id]);
|
myriadgroestl_gpu_hash_quad2 <<< grid2, block >>> (threads, startNounce, d_resultNonce[thr_id], d_outputHashes[thr_id]);
|
||||||
|
|
||||||
// Strategisches Sleep Kommando zur Senkung der CPU Last
|
// Strategisches Sleep Kommando zur Senkung der CPU Last
|
||||||
MyStreamSynchronize(NULL, 0, thr_id);
|
//MyStreamSynchronize(NULL, 0, thr_id);
|
||||||
|
|
||||||
cudaMemcpy(resNounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
cudaMemcpy(resNounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
||||||
}
|
}
|
||||||
|
@ -36,8 +36,6 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi
|
|||||||
uint32_t throughput = cuda_default_throughput(thr_id, 1 << 19); // 256*256*8
|
uint32_t throughput = cuda_default_throughput(thr_id, 1 << 19); // 256*256*8
|
||||||
if (init[thr_id]) throughput = min(throughput, max_nonce - start_nonce);
|
if (init[thr_id]) throughput = min(throughput, max_nonce - start_nonce);
|
||||||
|
|
||||||
uint32_t *outputHash = (uint32_t*)malloc((size_t) 64* throughput);
|
|
||||||
|
|
||||||
if (opt_benchmark)
|
if (opt_benchmark)
|
||||||
ptarget[7] = 0x001f;
|
ptarget[7] = 0x001f;
|
||||||
|
|
||||||
@ -66,7 +64,7 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi
|
|||||||
*hashes_done = pdata[19] - start_nonce + throughput;
|
*hashes_done = pdata[19] - start_nonce + throughput;
|
||||||
|
|
||||||
// GPU hash
|
// GPU hash
|
||||||
groestlcoin_cpu_hash(thr_id, throughput, pdata[19], outputHash, &foundNounce);
|
groestlcoin_cpu_hash(thr_id, throughput, pdata[19], &foundNounce);
|
||||||
|
|
||||||
if (foundNounce < UINT32_MAX && bench_algo < 0)
|
if (foundNounce < UINT32_MAX && bench_algo < 0)
|
||||||
{
|
{
|
||||||
@ -77,9 +75,8 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi
|
|||||||
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
|
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
|
||||||
work_set_target_ratio(work, vhash);
|
work_set_target_ratio(work, vhash);
|
||||||
pdata[19] = foundNounce;
|
pdata[19] = foundNounce;
|
||||||
free(outputHash);
|
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else if (vhash[7] > ptarget[7]) {
|
||||||
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNounce);
|
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNounce);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -93,8 +90,6 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi
|
|||||||
} while (!work_restart[thr_id].restart);
|
} while (!work_restart[thr_id].restart);
|
||||||
|
|
||||||
*hashes_done = pdata[19] - start_nonce;
|
*hashes_done = pdata[19] - start_nonce;
|
||||||
|
|
||||||
free(outputHash);
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user