mirror of
https://github.com/GOSTSec/ccminer
synced 2025-01-22 04:24:29 +00:00
groestl: same, remove useless host alloc
This commit is contained in:
parent
3fe4dda4c1
commit
374174c7c8
@ -5,6 +5,11 @@
|
||||
|
||||
#include "cuda_helper.h"
|
||||
|
||||
#ifdef __INTELLISENSE__
|
||||
#define __CUDA_ARCH__ 500
|
||||
#define __byte_perm(x,y,n) x
|
||||
#endif
|
||||
|
||||
#include "miner.h"
|
||||
|
||||
__constant__ uint32_t pTarget[8]; // Single GPU
|
||||
@ -85,9 +90,8 @@ void groestlcoin_gpu_hash_quad(uint32_t threads, uint32_t startNounce, uint32_t
|
||||
}
|
||||
}
|
||||
|
||||
if(rc == true)
|
||||
if(resNounce[0] > nounce)
|
||||
resNounce[0] = nounce;
|
||||
if(rc && resNounce[0] > nounce)
|
||||
resNounce[0] = nounce;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -111,9 +115,8 @@ void groestlcoin_cpu_free(int thr_id)
|
||||
__host__
|
||||
void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
|
||||
{
|
||||
uint32_t msgBlock[32];
|
||||
uint32_t msgBlock[32] = { 0 };
|
||||
|
||||
memset(msgBlock, 0, sizeof(uint32_t) * 32);
|
||||
memcpy(&msgBlock[0], data, 80);
|
||||
|
||||
// Erweitere die Nachricht auf den Nachrichtenblock (padding)
|
||||
@ -125,18 +128,14 @@ void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn)
|
||||
// auf der GPU ausgeführt)
|
||||
|
||||
// Blockheader setzen (korrekte Nonce und Hefty Hash fehlen da drin noch)
|
||||
cudaMemcpyToSymbol( groestlcoin_gpu_msg,
|
||||
msgBlock,
|
||||
128);
|
||||
cudaMemcpyToSymbol(groestlcoin_gpu_msg, msgBlock, 128);
|
||||
|
||||
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
|
||||
cudaMemcpyToSymbol( pTarget,
|
||||
pTargetIn,
|
||||
sizeof(uint32_t) * 8 );
|
||||
cudaMemcpyToSymbol(pTarget, pTargetIn, 32);
|
||||
}
|
||||
|
||||
__host__
|
||||
void groestlcoin_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce)
|
||||
void groestlcoin_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *resNonce)
|
||||
{
|
||||
uint32_t threadsperblock = 256;
|
||||
|
||||
@ -144,13 +143,10 @@ void groestlcoin_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, vo
|
||||
// mit den Quad Funktionen brauchen wir jetzt 4 threads pro Hash, daher Faktor 4 bei der Blockzahl
|
||||
int factor = 4;
|
||||
|
||||
// berechne wie viele Thread Blocks wir brauchen
|
||||
// berechne wie viele Thread Blocks wir brauchen
|
||||
dim3 grid(factor*((threads + threadsperblock-1)/threadsperblock));
|
||||
dim3 block(threadsperblock);
|
||||
|
||||
// Größe des dynamischen Shared Memory Bereichs
|
||||
size_t shared_size = 0;
|
||||
|
||||
int dev_id = device_map[thr_id];
|
||||
if (device_sm[dev_id] < 300 || cuda_arch[dev_id] < 300) {
|
||||
gpulog(LOG_ERR, thr_id, "Sorry, This algo is not supported by this GPU arch (SM 3.0 required)");
|
||||
@ -158,10 +154,10 @@ void groestlcoin_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, vo
|
||||
}
|
||||
|
||||
cudaMemset(d_resultNonce[thr_id], 0xFF, sizeof(uint32_t));
|
||||
groestlcoin_gpu_hash_quad<<<grid, block, shared_size>>>(threads, startNounce, d_resultNonce[thr_id]);
|
||||
groestlcoin_gpu_hash_quad <<<grid, block>>> (threads, startNounce, d_resultNonce[thr_id]);
|
||||
|
||||
// Strategisches Sleep Kommando zur Senkung der CPU Last
|
||||
MyStreamSynchronize(NULL, 0, thr_id);
|
||||
// MyStreamSynchronize(NULL, 0, thr_id);
|
||||
|
||||
cudaMemcpy(nounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(resNonce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
||||
}
|
||||
|
@ -4,6 +4,6 @@
|
||||
void groestlcoin_cpu_init(int thr_id, uint32_t threads);
|
||||
void groestlcoin_cpu_free(int thr_id);
|
||||
void groestlcoin_cpu_setBlock(int thr_id, void *data, void *pTargetIn);
|
||||
void groestlcoin_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, void *outputHashes, uint32_t *nounce);
|
||||
void groestlcoin_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *resNonce);
|
||||
|
||||
#endif
|
@ -18,7 +18,7 @@
|
||||
|
||||
// globaler Speicher für alle HeftyHashes aller Threads
|
||||
__constant__ uint32_t pTarget[8]; // Single GPU
|
||||
uint32_t *d_outputHashes[MAX_GPUS];
|
||||
static uint32_t *d_outputHashes[MAX_GPUS];
|
||||
static uint32_t *d_resultNonce[MAX_GPUS];
|
||||
|
||||
__constant__ uint32_t myriadgroestl_gpu_msg[32];
|
||||
@ -225,8 +225,9 @@ __device__ void myriadgroestl_gpu_sha256(uint32_t *message)
|
||||
message[k] = SWAB32(hash[k]);
|
||||
}
|
||||
|
||||
__global__ void __launch_bounds__(256, 4)
|
||||
myriadgroestl_gpu_hash_quad(uint32_t threads, uint32_t startNounce, uint32_t *hashBuffer)
|
||||
__global__
|
||||
__launch_bounds__(256, 4)
|
||||
void myriadgroestl_gpu_hash_quad(uint32_t threads, uint32_t startNounce, uint32_t *hashBuffer)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
// durch 4 dividieren, weil jeweils 4 Threads zusammen ein Hash berechnen
|
||||
@ -263,8 +264,8 @@ __global__ void __launch_bounds__(256, 4)
|
||||
#endif
|
||||
}
|
||||
|
||||
__global__ void
|
||||
myriadgroestl_gpu_hash_quad2(uint32_t threads, uint32_t startNounce, uint32_t *resNounce, uint32_t *hashBuffer)
|
||||
__global__
|
||||
void myriadgroestl_gpu_hash_quad2(uint32_t threads, uint32_t startNounce, uint32_t *resNounce, uint32_t *hashBuffer)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
uint32_t thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
||||
@ -322,6 +323,9 @@ void myriadgroestl_cpu_init(int thr_id, uint32_t threads)
|
||||
myr_sha256_cpu_constantTable,
|
||||
sizeof(uint32_t) * 64 );
|
||||
|
||||
// to check if the binary supports SM3+
|
||||
cuda_get_arch(thr_id);
|
||||
|
||||
cudaMalloc(&d_outputHashes[thr_id], (size_t) 64 * threads);
|
||||
cudaMalloc(&d_resultNonce[thr_id], sizeof(uint32_t));
|
||||
}
|
||||
@ -379,7 +383,7 @@ void myriadgroestl_cpu_hash(int thr_id, uint32_t threads, uint32_t startNounce,
|
||||
myriadgroestl_gpu_hash_quad2 <<< grid2, block >>> (threads, startNounce, d_resultNonce[thr_id], d_outputHashes[thr_id]);
|
||||
|
||||
// Strategisches Sleep Kommando zur Senkung der CPU Last
|
||||
MyStreamSynchronize(NULL, 0, thr_id);
|
||||
//MyStreamSynchronize(NULL, 0, thr_id);
|
||||
|
||||
cudaMemcpy(resNounce, d_resultNonce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
||||
}
|
||||
|
@ -36,8 +36,6 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi
|
||||
uint32_t throughput = cuda_default_throughput(thr_id, 1 << 19); // 256*256*8
|
||||
if (init[thr_id]) throughput = min(throughput, max_nonce - start_nonce);
|
||||
|
||||
uint32_t *outputHash = (uint32_t*)malloc((size_t) 64* throughput);
|
||||
|
||||
if (opt_benchmark)
|
||||
ptarget[7] = 0x001f;
|
||||
|
||||
@ -66,7 +64,7 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi
|
||||
*hashes_done = pdata[19] - start_nonce + throughput;
|
||||
|
||||
// GPU hash
|
||||
groestlcoin_cpu_hash(thr_id, throughput, pdata[19], outputHash, &foundNounce);
|
||||
groestlcoin_cpu_hash(thr_id, throughput, pdata[19], &foundNounce);
|
||||
|
||||
if (foundNounce < UINT32_MAX && bench_algo < 0)
|
||||
{
|
||||
@ -77,9 +75,8 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi
|
||||
if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
|
||||
work_set_target_ratio(work, vhash);
|
||||
pdata[19] = foundNounce;
|
||||
free(outputHash);
|
||||
return true;
|
||||
} else {
|
||||
} else if (vhash[7] > ptarget[7]) {
|
||||
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNounce);
|
||||
}
|
||||
}
|
||||
@ -93,8 +90,6 @@ int scanhash_groestlcoin(int thr_id, struct work *work, uint32_t max_nonce, unsi
|
||||
} while (!work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = pdata[19] - start_nonce;
|
||||
|
||||
free(outputHash);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user