3e44e52300
Blake256: squashed commit... Squashed commit of the following: commit c370208bc92ef16557f66e5391faf2b1ad47726f Author: Tanguy Pruvot <tanguy.pruvot@gmail.com> Date: Wed Sep 3 13:53:01 2014 +0200 hashlog: prepare store of scanned range commit e2cf49a5e956f03deafd266d1a0dd087a2041c99 Author: Tanguy Pruvot <tanguy.pruvot@gmail.com> Date: Wed Sep 3 12:54:13 2014 +0200 stratum: store server time offset in context commit 1a4391d7ff21397a128abf031f92733a8ac47437 Author: Tanguy Pruvot <tanguy.pruvot@gmail.com> Date: Tue Sep 2 12:40:52 2014 +0200 hashlog: prevent double computing on jobs already done commit 049e57730116685755bd3ff214f0793cce7c773b Author: Tanguy Pruvot <tanguy.pruvot@gmail.com> Date: Wed Sep 3 09:49:14 2014 +0200 tmp blake log commit 43d3e93e1a97e569ead2437f759c6b8423d30c0a Author: Tanguy Pruvot <tanguy.pruvot@gmail.com> Date: Wed Sep 3 09:29:51 2014 +0200 blake: set a max throughput commit 7e595a36ea69027c8a28023399540a761e7686c3 Author: Tanguy Pruvot <tanguy.pruvot@gmail.com> Date: Tue Sep 2 21:13:37 2014 +0200 blake: cleanup, remove d_hash buf, not in a chain host: only bencode if gpu hash was found commit de80c7e9d1448f15541d08c5dbbf372d5bfeba48 Author: Tanguy Pruvot <tanguy.pruvot@gmail.com> Date: Tue Sep 2 12:40:44 2014 +0200 blake: remove unused parameter and fix index in d_hash that reduce the speed to 92MH/s but the next commit give us 30 more so, todo: merge the whole checkhash proc in gpu_hash and remove this d_hash buffer... commit 2d42ae6de586a6ae8cbfd01806a273fd5cc4b262 Author: Tanguy Pruvot <tanguy.pruvot@gmail.com> Date: Tue Sep 2 05:09:31 2014 +0200 stratum: handle a small cache of submitted jobs Prevent to send duplicated shares on some pools like hashharder.. This cache keeps submitted job/nounces of the last 15 minutes so, remove exit on repeated duplicate shares, the submitted cache now handles this problem. Signed-off-by: Tanguy Pruvot <tanguy.pruvot@gmail.com> commit 1b8c3c12fa5bb83afbb02f9d5f60586939f36d86 Author: Tanguy Pruvot <tanguy.pruvot@gmail.com> Date: Tue Sep 2 03:38:57 2014 +0200 debug: a new boolean to log or not json rpc data commit 1f99aae0ff621f4f85f119d811a3f1a8d2204f60 Author: Tanguy Pruvot <tanguy.pruvot@gmail.com> Date: Mon Sep 1 18:49:23 2014 +0200 exit on repeated duplicate shares (to enhance) create a new function proper_exit() to do common stuff on exit... commit 530732458add6c4c3836606d028930f3581c0a5f Author: Tanguy Pruvot <tanguy.pruvot@gmail.com> Date: Mon Sep 1 12:22:51 2014 +0200 blake: use a constant for threads, reduce mallocated d_hash size and clean a bit more... commit 0aeac878ef60840f3123354037cd56a89d2e94e6 Author: Tanguy Pruvot <tanguy.pruvot@gmail.com> Date: Mon Sep 1 06:12:55 2014 +0200 blake: tune up and cleanup, ~100 MH/s on a normal 750Ti tested on linux and windows (x86 binary)... but there is a high number of duplicated shares... weird commit 4a52d0553b0076b984be480725fa67689c544647 Author: Tanguy Pruvot <tanguy.pruvot@gmail.com> Date: Mon Sep 1 10:22:32 2014 +0200 debug: show json methods, hide hash/target if ok commit 1fb9becc1f2b6a15d8ccea4d8314df9ddf0af4ed Author: Tanguy Pruvot <tanguy.pruvot@gmail.com> Date: Mon Sep 1 08:44:19 2014 +0200 cpu-miner: sort algos by name, show reject reason commit bfe96c49b0bf321ed0776cb1cf31c4fe8a0a8b8d Author: Tanguy Pruvot <tanguy.pruvot@gmail.com> Date: Mon Aug 25 11:21:06 2014 +0200 release 1.4, update README... commit c17d11e37758c37762a7664a731fda6e9a5454b1 Author: Tanguy Pruvot <tanguy.pruvot@gmail.com> Date: Sun Aug 31 08:57:48 2014 +0200 add "blake" 256, 14 rounds (for NEOS blake, not BlakeCoin) also remove "missing" file, its old and not compatible with ubuntu 14.04 to test on windows blake: clean and optimize Release v1.4 with blake (NEOS)
99 lines
2.6 KiB
Plaintext
99 lines
2.6 KiB
Plaintext
#include <stdio.h>
|
|
#include <memory.h>
|
|
|
|
#include "cuda_helper.h"
|
|
|
|
// Hash Target gegen das wir testen sollen
|
|
__constant__ uint32_t pTarget[8];
|
|
|
|
static uint32_t *d_resNounce[8];
|
|
static uint32_t *h_resNounce[8];
|
|
|
|
// aus heavy.cu
|
|
extern cudaError_t MyStreamSynchronize(cudaStream_t stream, int situation, int thr_id);
|
|
|
|
__global__ void cuda_check_gpu_hash_64(int threads, uint32_t startNounce, uint32_t *g_nonceVector, uint32_t *g_hash, uint32_t *resNounce)
|
|
{
|
|
int thread = (blockDim.x * blockIdx.x + threadIdx.x);
|
|
if (thread < threads)
|
|
{
|
|
// bestimme den aktuellen Zähler
|
|
uint32_t nounce = (g_nonceVector != NULL) ? g_nonceVector[thread] : (startNounce + thread);
|
|
|
|
int hashPosition = nounce - startNounce;
|
|
uint32_t *inpHash = &g_hash[16 * hashPosition];
|
|
|
|
uint32_t hash[8];
|
|
#pragma unroll 8
|
|
for (int i=0; i < 8; i++)
|
|
hash[i] = inpHash[i];
|
|
|
|
// kopiere Ergebnis
|
|
int i, position = -1;
|
|
bool rc = true;
|
|
|
|
#pragma unroll 8
|
|
for (i = 7; i >= 0; i--) {
|
|
if (hash[i] > pTarget[i]) {
|
|
if(position < i) {
|
|
position = i;
|
|
rc = false;
|
|
}
|
|
}
|
|
if (hash[i] < pTarget[i]) {
|
|
if(position < i) {
|
|
position = i;
|
|
rc = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if(rc == true)
|
|
if(resNounce[0] > nounce)
|
|
resNounce[0] = nounce;
|
|
}
|
|
}
|
|
|
|
// Setup-Funktionen
|
|
__host__ void cuda_check_cpu_init(int thr_id, int threads)
|
|
{
|
|
cudaMallocHost(&h_resNounce[thr_id], 1*sizeof(uint32_t));
|
|
cudaMalloc(&d_resNounce[thr_id], 1*sizeof(uint32_t));
|
|
}
|
|
|
|
// Target Difficulty setzen
|
|
__host__ void cuda_check_cpu_setTarget(const void *ptarget)
|
|
{
|
|
// die Message zur Berechnung auf der GPU
|
|
cudaMemcpyToSymbol( pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice);
|
|
}
|
|
|
|
__host__ uint32_t cuda_check_cpu_hash_64(int thr_id, int threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order)
|
|
{
|
|
uint32_t result = 0xffffffff;
|
|
cudaMemset(d_resNounce[thr_id], 0xff, sizeof(uint32_t));
|
|
|
|
const int threadsperblock = 256;
|
|
|
|
// berechne wie viele Thread Blocks wir brauchen
|
|
dim3 grid((threads + threadsperblock-1)/threadsperblock);
|
|
dim3 block(threadsperblock);
|
|
|
|
// Größe des dynamischen Shared Memory Bereichs
|
|
size_t shared_size = 0;
|
|
|
|
cuda_check_gpu_hash_64 <<<grid, block, shared_size>>>(threads, startNounce, d_nonceVector, d_inputHash, d_resNounce[thr_id]);
|
|
|
|
// Strategisches Sleep Kommando zur Senkung der CPU Last
|
|
MyStreamSynchronize(NULL, order, thr_id);
|
|
|
|
// Ergebnis zum Host kopieren (in page locked memory, damits schneller geht)
|
|
cudaMemcpy(h_resNounce[thr_id], d_resNounce[thr_id], sizeof(uint32_t), cudaMemcpyDeviceToHost);
|
|
|
|
// cudaMemcpy() ist asynchron!
|
|
cudaThreadSynchronize();
|
|
result = *h_resNounce[thr_id];
|
|
|
|
return result;
|
|
}
|