Browse Source

neoscrypt: apply last VTC improvements

rewrote almost properly ;)
2upstream
Tanguy Pruvot 8 years ago
parent
commit
a4196b341d
  1. 5
      README.txt
  2. 2
      configure.ac
  3. 1697
      neoscrypt/cuda_neoscrypt.cu
  4. 41
      neoscrypt/neoscrypt.cpp
  5. 11
      quark/quarkcoin.cu

5
README.txt

@ -240,11 +240,12 @@ features.
>>> RELEASE HISTORY <<< >>> RELEASE HISTORY <<<
June 2016 v1.8.0 July 2016 v1.8.0
Pascal support with cuda 8 Pascal support with cuda 8
x11evo algo (XRE) x11evo algo (XRE)
Lyra2v2 and Decred hashrate improvements Lyra2v2, Neoscrypt and Decred improvements
Enhance windows NVAPI clock and power limits Enhance windows NVAPI clock and power limits
Led support for mining/shares activity on windows
May 18th 2016 v1.7.6 May 18th 2016 v1.7.6
Decred vote support Decred vote support

2
configure.ac

@ -1,4 +1,4 @@
AC_INIT([ccminer], [1.8-dev], [], [ccminer], [http://github.com/tpruvot/ccminer]) AC_INIT([ccminer], [1.8], [], [ccminer], [http://github.com/tpruvot/ccminer])
AC_PREREQ([2.59c]) AC_PREREQ([2.59c])
AC_CANONICAL_SYSTEM AC_CANONICAL_SYSTEM

1697
neoscrypt/cuda_neoscrypt.cu

File diff suppressed because it is too large Load Diff

41
neoscrypt/neoscrypt.cpp

@ -3,9 +3,10 @@
#include "neoscrypt/neoscrypt.h" #include "neoscrypt/neoscrypt.h"
extern void neoscrypt_setBlockTarget(uint32_t * data, const void *ptarget); extern void neoscrypt_setBlockTarget(uint32_t * data, const void *ptarget);
extern void neoscrypt_cpu_init(int thr_id, uint32_t threads);
extern void neoscrypt_cpu_free(int thr_id); extern void neoscrypt_init_2stream(int thr_id, uint32_t threads);
extern uint32_t neoscrypt_cpu_hash_k4(int thr_id, uint32_t threads, uint32_t startNounce, int have_stratum, int order); extern void neoscrypt_free_2stream(int thr_id);
extern void neoscrypt_hash_k4_2stream(int thr_id, uint32_t threads, uint32_t startNounce, uint32_t *result, bool stratum);
static bool init[MAX_GPUS] = { 0 }; static bool init[MAX_GPUS] = { 0 };
@ -35,12 +36,12 @@ int scanhash_neoscrypt(int thr_id, struct work* work, uint32_t max_nonce, unsign
cudaGetLastError(); // reset errors if device is not "reset" cudaGetLastError(); // reset errors if device is not "reset"
if (device_sm[dev_id] <= 300) { if (device_sm[dev_id] <= 300) {
applog(LOG_ERR, "Sorry neoscrypt is not supported on SM 3.0 devices"); gpulog(LOG_ERR, thr_id, "Sorry neoscrypt is not supported on SM 3.0 devices");
proper_exit(EXIT_CODE_CUDA_ERROR); proper_exit(EXIT_CODE_CUDA_ERROR);
} }
applog(LOG_INFO, "GPU #%d: Using %d cuda threads", dev_id, throughput); gpulog(LOG_INFO, thr_id, "Using %d cuda threads", throughput);
neoscrypt_cpu_init(thr_id, throughput); neoscrypt_init_2stream(thr_id, throughput);
init[thr_id] = true; init[thr_id] = true;
} }
@ -56,26 +57,28 @@ int scanhash_neoscrypt(int thr_id, struct work* work, uint32_t max_nonce, unsign
neoscrypt_setBlockTarget(endiandata,ptarget); neoscrypt_setBlockTarget(endiandata,ptarget);
do { do {
uint32_t foundNonce = neoscrypt_cpu_hash_k4(thr_id, throughput, pdata[19], have_stratum, 0); uint32_t foundNonces[2] = { UINT32_MAX, UINT32_MAX };
if (foundNonce != UINT32_MAX) neoscrypt_hash_k4_2stream(thr_id, throughput, pdata[19], foundNonces, have_stratum);
{
uint32_t _ALIGN(64) vhash64[8];
*hashes_done = pdata[19] - first_nonce + 1; *hashes_done = pdata[19] - first_nonce + throughput;
if (foundNonces[0] != UINT32_MAX)
{
uint32_t _ALIGN(64) vhash[8];
if (have_stratum) { if (have_stratum) {
be32enc(&endiandata[19], foundNonce); be32enc(&endiandata[19], foundNonces[0]);
} else { } else {
endiandata[19] = foundNonce; endiandata[19] = foundNonces[0];
} }
neoscrypt((uchar*)vhash64, (uchar*) endiandata, 0x80000620U); neoscrypt((uchar*)vhash, (uchar*) endiandata, 0x80000620U);
if (vhash64[7] <= ptarget[7] && fulltest(vhash64, ptarget)) { if (vhash[7] <= ptarget[7] && fulltest(vhash, ptarget)) {
work_set_target_ratio(work, vhash64); work_set_target_ratio(work, vhash);
pdata[19] = foundNonce; pdata[19] = foundNonces[0];
return 1; return 1;
} else { } else {
gpulog(LOG_WARNING, thr_id, "result for %08x does not validate on CPU!", foundNonce); gpulog(LOG_WARNING, thr_id, "nonce %08x does not validate on CPU!", foundNonces[0]);
} }
} }
@ -100,7 +103,7 @@ void free_neoscrypt(int thr_id)
cudaThreadSynchronize(); cudaThreadSynchronize();
neoscrypt_cpu_free(thr_id); neoscrypt_free_2stream(thr_id);
init[thr_id] = false; init[thr_id] = false;
cudaDeviceSynchronize(); cudaDeviceSynchronize();

11
quark/quarkcoin.cu

@ -294,6 +294,7 @@ extern "C" int scanhash_quark(int thr_id, struct work* work, uint32_t max_nonce,
// cleanup // cleanup
extern "C" void free_quark(int thr_id) extern "C" void free_quark(int thr_id)
{ {
int dev_id = device_map[thr_id];
if (!init[thr_id]) if (!init[thr_id])
return; return;
@ -301,9 +302,13 @@ extern "C" void free_quark(int thr_id)
cudaFree(d_hash[thr_id]); cudaFree(d_hash[thr_id]);
cudaFree(d_branch1Nonces[thr_id]); if (cuda_arch[dev_id] >= 300) {
cudaFree(d_branch2Nonces[thr_id]); cudaFree(d_branch1Nonces[thr_id]);
cudaFree(d_branch3Nonces[thr_id]); cudaFree(d_branch2Nonces[thr_id]);
cudaFree(d_branch3Nonces[thr_id]);
} else {
cudaFree(d_hash_br2[thr_id]);
}
quark_blake512_cpu_free(thr_id); quark_blake512_cpu_free(thr_id);
quark_groestl512_cpu_free(thr_id); quark_groestl512_cpu_free(thr_id);

Loading…
Cancel
Save