diff --git a/Makefile.am b/Makefile.am index a8d19b5..8878096 100644 --- a/Makefile.am +++ b/Makefile.am @@ -42,7 +42,7 @@ ccminer_SOURCES = elist.h miner.h compat.h \ quark/quarkcoin.cu quark/animecoin.cu \ quark/cuda_quark_compactionTest.cu \ neoscrypt/neoscrypt.cpp neoscrypt/neoscrypt-cpu.c neoscrypt/cuda_neoscrypt.cu \ - cuda_nist5.cu pentablake.cu skein.cu skein2.cu zr5.cu \ + cuda_nist5.cu pentablake.cu skein.cu skein2.cpp zr5.cu \ sph/bmw.c sph/blake.c sph/groestl.c sph/jh.c sph/keccak.c sph/skein.c \ sph/cubehash.c sph/echo.c sph/luffa.c sph/sha2.c sph/shavite.c sph/simd.c \ sph/hamsi.c sph/hamsi_helper.c sph/sph_hamsi.h \ diff --git a/ccminer.vcxproj b/ccminer.vcxproj index 485da3d..0a8d629 100644 --- a/ccminer.vcxproj +++ b/ccminer.vcxproj @@ -270,6 +270,7 @@ + @@ -464,9 +465,6 @@ 64 - - 64 - true @@ -538,4 +536,4 @@ - \ No newline at end of file + diff --git a/ccminer.vcxproj.filters b/ccminer.vcxproj.filters index 9cc242c..c7042db 100644 --- a/ccminer.vcxproj.filters +++ b/ccminer.vcxproj.filters @@ -240,6 +240,9 @@ Source Files\neoscrypt + + Source Files\CUDA + @@ -601,9 +604,6 @@ Source Files\CUDA - - Source Files\CUDA - Source Files\CUDA\scrypt @@ -653,4 +653,4 @@ Ressources - \ No newline at end of file + diff --git a/cuda_checkhash.cu b/cuda_checkhash.cu index 4bbef59..a40f75e 100644 --- a/cuda_checkhash.cu +++ b/cuda_checkhash.cu @@ -17,15 +17,15 @@ static uint32_t* d_resNonces[MAX_GPUS]; __host__ void cuda_check_cpu_init(int thr_id, uint32_t threads) { - CUDA_CALL_OR_RET(cudaMallocHost(&h_resNonces[thr_id], 8*sizeof(uint32_t))); - CUDA_CALL_OR_RET(cudaMalloc(&d_resNonces[thr_id], 8*sizeof(uint32_t))); + CUDA_CALL_OR_RET(cudaMallocHost(&h_resNonces[thr_id], 32)); + CUDA_CALL_OR_RET(cudaMalloc(&d_resNonces[thr_id], 32)); } // Target Difficulty __host__ void cuda_check_cpu_setTarget(const void *ptarget) { - CUDA_SAFE_CALL(cudaMemcpyToSymbol(pTarget, ptarget, 8*sizeof(uint32_t), 0, cudaMemcpyHostToDevice)); + CUDA_SAFE_CALL(cudaMemcpyToSymbol(pTarget, ptarget, 32, 0, cudaMemcpyHostToDevice)); } /* --------------------------------------------------------------------------------------------- */ @@ -177,11 +177,11 @@ void cuda_check_hash_branch_64(uint32_t threads, uint32_t startNounce, uint32_t __host__ uint32_t cuda_check_hash_branch(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_inputHash, int order) { - uint32_t result = 0xffffffff; - cudaMemset(d_resNonces[thr_id], 0xff, sizeof(uint32_t)); - const uint32_t threadsperblock = 256; + uint32_t result = UINT32_MAX; + cudaMemset(d_resNonces[thr_id], 0xff, sizeof(uint32_t)); + dim3 grid((threads + threadsperblock-1)/threadsperblock); dim3 block(threadsperblock); @@ -199,9 +199,9 @@ uint32_t cuda_check_hash_branch(int thr_id, uint32_t threads, uint32_t startNoun /* Function to get the compiled Shader Model version */ int cuda_arch[MAX_GPUS] = { 0 }; -__global__ -void nvcc_get_arch(int *d_version) +__global__ void nvcc_get_arch(int *d_version) { + *d_version = 0; #ifdef __CUDA_ARCH__ *d_version = __CUDA_ARCH__; #endif diff --git a/groestlcoin.cpp b/groestlcoin.cpp index 400b8ef..6956102 100644 --- a/groestlcoin.cpp +++ b/groestlcoin.cpp @@ -78,8 +78,8 @@ int scanhash_groestlcoin(int thr_id, uint32_t *pdata, const uint32_t *ptarget, } if ((uint64_t) pdata[19] + throughput > max_nonce) { + *hashes_done = pdata[19] - start_nonce + 1; pdata[19] = max_nonce; - *hashes_done = max_nonce - start_nonce + 1; break; } pdata[19] += throughput; diff --git a/myriadgroestl.cpp b/myriadgroestl.cpp index 5464819..4186334 100644 --- a/myriadgroestl.cpp +++ b/myriadgroestl.cpp @@ -35,7 +35,7 @@ int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done) { uint32_t _ALIGN(64) endiandata[32]; - uint32_t start_nonce = pdata[19]++; + uint32_t start_nonce = pdata[19]; uint32_t throughput = device_intensity(thr_id, __func__, 1 << 17); throughput = min(throughput, max_nonce - start_nonce); @@ -82,8 +82,8 @@ int scanhash_myriad(int thr_id, uint32_t *pdata, const uint32_t *ptarget, } if ((uint64_t) pdata[19] + throughput > max_nonce) { + *hashes_done = pdata[19] - start_nonce; pdata[19] = max_nonce; - *hashes_done = max_nonce - start_nonce + 1; break; } pdata[19] += throughput; diff --git a/skein.cu b/skein.cu index 131812b..38b4bcc 100644 --- a/skein.cu +++ b/skein.cu @@ -396,12 +396,13 @@ extern "C" int scanhash_skeincoin(int thr_id, uint32_t *pdata, const uint32_t *p int res = 1; uint8_t num = res; uint32_t secNonce = cuda_check_hash_suppl(thr_id, throughput, pdata[19], d_hash[thr_id], num); - while (secNonce != 0 && res < 6) + while (secNonce != 0 && res < 2) /* todo: up to 6 */ { endiandata[19] = swab32_if(secNonce, swap); skeincoinhash(vhash64, endiandata); if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) { - pdata[19+res] = swab32_if(secNonce, !swap); + // todo: use 19 20 21... zr5 pok to adapt... + pdata[19+res*2] = swab32_if(secNonce, !swap); res++; } num++; @@ -416,10 +417,16 @@ extern "C" int scanhash_skeincoin(int thr_id, uint32_t *pdata, const uint32_t *p applog(LOG_WARNING, "GPU #%d: result for nonce %08x does not validate on CPU!", device_map[thr_id], foundNonce); } } + + if ((uint64_t) pdata[19] + throughput > max_nonce) { + *hashes_done = pdata[19] - first_nonce; + pdata[19] = max_nonce; + break; + } + pdata[19] += throughput; - } while (pdata[19] < max_nonce && !work_restart[thr_id].restart); + } while (!work_restart[thr_id].restart); - *hashes_done = pdata[19] - first_nonce + 1; return 0; } diff --git a/skein2.cu b/skein2.cpp similarity index 90% rename from skein2.cu rename to skein2.cpp index b264597..bdb170e 100644 --- a/skein2.cu +++ b/skein2.cpp @@ -2,6 +2,7 @@ * SKEIN512 80 + SKEIN512 64 (Woodcoin) * by tpruvot@github - 2015 */ +#include #include "sph/sph_skein.h" @@ -17,7 +18,7 @@ extern void skein512_cpu_hash_80(int thr_id, uint32_t threads, uint32_t startNou extern void quark_skein512_cpu_hash_64(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *d_nonceVector, uint32_t *d_hash, int order); -extern "C" void skein2hash(void *output, const void *input) +void skein2hash(void *output, const void *input) { uint32_t _ALIGN(64) hash[16]; sph_skein512_context ctx_skein; @@ -30,12 +31,12 @@ extern "C" void skein2hash(void *output, const void *input) sph_skein512(&ctx_skein, hash, 64); sph_skein512_close(&ctx_skein, hash); - memcpy(output, hash, 32); + memcpy(output, (void*) hash, 32); } static bool init[MAX_GPUS] = { 0 }; -extern "C" int scanhash_skein2(int thr_id, uint32_t *pdata, const uint32_t *ptarget, +int scanhash_skein2(int thr_id, uint32_t *pdata, const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done) { const uint32_t first_nonce = pdata[19]; @@ -100,7 +101,9 @@ extern "C" int scanhash_skein2(int thr_id, uint32_t *pdata, const uint32_t *ptar } } - if (((uint64_t) throughput + pdata[19]) > max_nonce) { + if ((uint64_t) pdata[19] + throughput > max_nonce) { + *hashes_done = pdata[19] - first_nonce; + pdata[19] = max_nonce; break; }