From edd9b816220aafd246a20536d9a674f4d8298724 Mon Sep 17 00:00:00 2001 From: ckolivas Date: Wed, 22 Aug 2012 10:07:20 +1000 Subject: [PATCH 1/7] Do not add time to dynamic opencl calculations over a getwork. --- cgminer.c | 1 + driver-opencl.c | 14 ++++++++++---- miner.h | 2 ++ 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/cgminer.c b/cgminer.c index c380a5b0..471ce3d1 100644 --- a/cgminer.c +++ b/cgminer.c @@ -4239,6 +4239,7 @@ void *miner_thread(void *userdata) if (api->free_work && likely(work->pool)) api->free_work(mythr, work); get_work(work, mythr, thr_id); + cgpu->new_work = true; gettimeofday(&tv_workstart, NULL); work->blk.nonce = 0; diff --git a/driver-opencl.c b/driver-opencl.c index 5ca659c8..500a9ead 100644 --- a/driver-opencl.c +++ b/driver-opencl.c @@ -1532,10 +1532,16 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, clFinish(clState->commandQueue); } - gettimeofday(&gpu->tv_gpumid, NULL); - if (!gpu->intervals) { - gpu->tv_gpustart.tv_sec = gpu->tv_gpumid.tv_sec; - gpu->tv_gpustart.tv_usec = gpu->tv_gpumid.tv_usec; + if (gpu->dynamic) { + gettimeofday(&gpu->tv_gpumid, NULL); + if (gpu->new_work) { + gpu->new_work = false; + gpu->intervals = 0; + } + if (!gpu->intervals) { + gpu->tv_gpustart.tv_sec = gpu->tv_gpumid.tv_sec; + gpu->tv_gpustart.tv_usec = gpu->tv_gpumid.tv_usec; + } } status = thrdata->queue_kernel_parameters(clState, &work->blk, globalThreads[0]); diff --git a/miner.h b/miner.h index a6719923..91fe7ce7 100644 --- a/miner.h +++ b/miner.h @@ -374,6 +374,8 @@ struct cgpu_info { int intervals, hit; #endif + bool new_work; + float temp; int cutofftemp; From f63aeab1fe02e6deaefc01b166c1f97dd800d35b Mon Sep 17 00:00:00 2001 From: ckolivas Date: Wed, 22 Aug 2012 10:33:40 +1000 Subject: [PATCH 2/7] gpu->hit should be reset on new work as well. --- driver-opencl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver-opencl.c b/driver-opencl.c index 500a9ead..a8744e24 100644 --- a/driver-opencl.c +++ b/driver-opencl.c @@ -1536,7 +1536,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, gettimeofday(&gpu->tv_gpumid, NULL); if (gpu->new_work) { gpu->new_work = false; - gpu->intervals = 0; + gpu->intervals = gpu->hit = 0; } if (!gpu->intervals) { gpu->tv_gpustart.tv_sec = gpu->tv_gpumid.tv_sec; From 61df3013a8f2f69ee9ed8b0dbca5edd065cd3d2d Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Wed, 22 Aug 2012 23:03:17 +1000 Subject: [PATCH 3/7] Ignore the submit_fail flag when deciding whether to recruit more curls or not since we have upper bounds on how many curls can be recruited, this test is redundant and can lead to problems. --- cgminer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cgminer.c b/cgminer.c index 471ce3d1..9162e6ac 100644 --- a/cgminer.c +++ b/cgminer.c @@ -2227,7 +2227,7 @@ retry: if (!pool->curls) recruit_curl(pool); else if (list_empty(&pool->curlring)) { - if (pool->submit_fail || pool->curls >= curl_limit) { + if (pool->curls >= curl_limit) { pthread_cond_wait(&pool->cr_cond, &pool->pool_lock); goto retry; } else From 86d5377e727ffcfe8ac0c78a282453aa35a5b498 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Thu, 23 Aug 2012 10:48:15 +1000 Subject: [PATCH 4/7] Use atomic ops to never miss a nonce on opencl kernels, including nonce==0, also allowing us to make the output buffer smaller. --- diablo120724.cl | 51 ++++++++++----- diakgcn120724.cl | 47 +++++++++++--- driver-opencl.c | 2 +- findnonce.c | 25 +++----- findnonce.h | 5 +- mkinstalldirs | 162 ----------------------------------------------- phatk120724.cl | 51 ++++++++++----- poclbm120724.cl | 65 ++++++++++--------- scrypt120724.cl | 13 ++-- 9 files changed, 158 insertions(+), 263 deletions(-) delete mode 100755 mkinstalldirs diff --git a/diablo120724.cl b/diablo120724.cl index 4b64c300..4687c5bc 100644 --- a/diablo120724.cl +++ b/diablo120724.cl @@ -62,7 +62,7 @@ void search( const uint c1_plus_k5, const uint b1_plus_k6, const uint state0, const uint state1, const uint state2, const uint state3, const uint state4, const uint state5, const uint state6, const uint state7, - __global uint * output) + volatile __global uint * output) { z ZA[930]; @@ -1242,33 +1242,50 @@ void search( ZA[924] = (ZCh(ZA[922], ZA[920], ZA[918]) + ZA[923]) + ZR26(ZA[922]); -#define FOUND (0x800) -#define NFLAG (0x7FF) +#define FOUND (0x0F) #if defined(VECTORS4) bool result = any(ZA[924] == 0x136032EDU); if (result) { - if (ZA[924].x == 0x136032EDU) - output[FOUND] = output[NFLAG & Znonce.x] = Znonce.x; - if (ZA[924].y == 0x136032EDU) - output[FOUND] = output[NFLAG & Znonce.y] = Znonce.y; - if (ZA[924].z == 0x136032EDU) - output[FOUND] = output[NFLAG & Znonce.z] = Znonce.z; - if (ZA[924].w == 0x136032EDU) - output[FOUND] = output[NFLAG & Znonce.w] = Znonce.w; + uint found; + + if (ZA[924].x == 0x136032EDU) { + found = atomic_add(&output[FOUND], 1); + output[found] = Znonce.x; + } + if (ZA[924].y == 0x136032EDU) { + found = atomic_add(&output[FOUND], 1); + output[found] = Znonce.y; + } + if (ZA[924].z == 0x136032EDU) { + found = atomic_add(&output[FOUND], 1); + output[found] = Znonce.z; + } + if (ZA[924].w == 0x136032EDU) { + found = atomic_add(&output[FOUND], 1); + output[found] = Znonce.w; + } } #elif defined(VECTORS2) bool result = any(ZA[924] == 0x136032EDU); if (result) { - if (ZA[924].x == 0x136032EDU) - output[FOUND] = output[NFLAG & Znonce.x] = Znonce.x; - if (ZA[924].y == 0x136032EDU) - output[FOUND] = output[NFLAG & Znonce.y] = Znonce.y; + uint found; + + if (ZA[924].x == 0x136032EDU) { + found = atomic_add(&output[FOUND], 1); + output[found] = Znonce.x; + } + if (ZA[924].y == 0x136032EDU) { + found = atomic_add(&output[FOUND], 1); + output[found] = Znonce.y; + } } #else - if (ZA[924] == 0x136032EDU) - output[FOUND] = output[NFLAG & Znonce] = Znonce; + if (ZA[924] == 0x136032EDU) { + uint found = atomic_add(&output[FOUND], 1); + output[found] = Znonce; + } #endif } diff --git a/diakgcn120724.cl b/diakgcn120724.cl index 7dd73fb9..d27674f6 100644 --- a/diakgcn120724.cl +++ b/diakgcn120724.cl @@ -48,7 +48,7 @@ __kernel const uint state0A, const uint state0B, const uint state1A, const uint state2A, const uint state3A, const uint state4A, const uint state5A, const uint state6A, const uint state7A, - __global uint * output) + volatile __global uint * output) { u V[8]; u W[16]; @@ -571,17 +571,46 @@ __kernel V[7] += V[3] + W[12] + ch(V[0], V[1], V[2]) + rotr26(V[0]); -#define FOUND (0x800) -#define NFLAG (0x7FF) +#define FOUND (0x0F) #ifdef VECTORS4 - if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU)) - output[FOUND] = output[NFLAG & nonce.x] = (V[7].x == 0x136032edU) ? nonce.x : ((V[7].y == 0x136032edU) ? nonce.y : ((V[7].z == 0x136032edU) ? nonce.z : nonce.w)); + if ((V[7].x == 0x136032edU) ^ (V[7].y == 0x136032edU) ^ (V[7].z == 0x136032edU) ^ (V[7].w == 0x136032edU)) { + uint found; + + if (V[7].x == 0x136032edU) { + found = atomic_add(&output[FOUND], 1); + output[found] = nonce.x; + } + if (V[7].y == 0x136032edU) { + found = atomic_add(&output[FOUND], 1); + output[found] = nonce.y; + } + if (V[7].z == 0x136032edU) { + found = atomic_add(&output[FOUND], 1); + output[found] = nonce.z; + } + if (V[7].w == 0x136032edU) { + found = atomic_add(&output[FOUND], 1); + output[found] = nonce.w; + } + } #elif defined VECTORS2 - if ((V[7].x == 0x136032edU) + (V[7].y == 0x136032edU)) - output[FOUND] = output[NFLAG & nonce.x] = (V[7].x == 0x136032edU) ? nonce.x : nonce.y; + if ((V[7].x == 0x136032edU) + (V[7].y == 0x136032edU)) { + uint found; + + if (V[7].x == 0x136032edU) { + found = atomic_add(&output[FOUND], 1); + output[found] = nonce.x; + } + if (V[7].y == 0x136032edU) { + found = atomic_add(&output[FOUND], 1); + output[found] = nonce.y; + } + } #else - if (V[7] == 0x136032edU) - output[FOUND] = output[NFLAG & nonce] = nonce; + if (V[7] == 0x136032edU) { + uint found = atomic_add(&output[FOUND], 1); + output[found] = nonce; + } #endif } diff --git a/driver-opencl.c b/driver-opencl.c index a8744e24..6883ada3 100644 --- a/driver-opencl.c +++ b/driver-opencl.c @@ -1511,7 +1511,7 @@ static int64_t opencl_scanhash(struct thr_info *thr, struct work *work, if (hashes > gpu->max_hashes) gpu->max_hashes = hashes; - /* MAXBUFFERS entry is used as a flag to say nonces exist */ + /* FOUND entry is used as a counter to say how many nonces exist */ if (thrdata->res[FOUND]) { /* Clear the buffer again */ status = clEnqueueWriteBuffer(clState->commandQueue, clState->outputBuffer, CL_FALSE, 0, diff --git a/findnonce.c b/findnonce.c index 9980a704..d557f17a 100644 --- a/findnonce.c +++ b/findnonce.c @@ -172,6 +172,7 @@ struct pc_data { struct work *work; uint32_t res[MAXBUFFERS]; pthread_t pth; + int found; }; static void send_sha_nonce(struct pc_data *pcd, cl_uint nonce) @@ -237,32 +238,22 @@ static void send_scrypt_nonce(struct pc_data *pcd, uint32_t nonce) static void *postcalc_hash(void *userdata) { struct pc_data *pcd = (struct pc_data *)userdata; - struct thr_info *thr = pcd->thr; - int entry = 0, nonces = 0; + unsigned int entry = 0; pthread_detach(pthread_self()); - for (entry = 0; entry < FOUND; entry++) { + for (entry = 0; entry < pcd->res[FOUND]; entry++) { uint32_t nonce = pcd->res[entry]; - if (nonce) { - applog(LOG_DEBUG, "OCL NONCE %u", nonce); - if (opt_scrypt) - send_scrypt_nonce(pcd, nonce); - else - send_sha_nonce(pcd, nonce); - nonces++; - } + applog(LOG_DEBUG, "OCL NONCE %u found in slot %d", nonce, entry); + if (opt_scrypt) + send_scrypt_nonce(pcd, nonce); + else + send_sha_nonce(pcd, nonce); } free(pcd); - if (unlikely(!nonces)) { - applog(LOG_DEBUG, "No nonces found! Error in OpenCL code?"); - hw_errors++; - thr->cgpu->hw_errors++; - } - return NULL; } diff --git a/findnonce.h b/findnonce.h index ce69569e..610f6f8d 100644 --- a/findnonce.h +++ b/findnonce.h @@ -4,10 +4,9 @@ #include "config.h" #define MAXTHREADS (0xFFFFFFFEULL) -#define MAXBUFFERS (0xFFF) +#define MAXBUFFERS (0x10) #define BUFFERSIZE (sizeof(uint32_t) * MAXBUFFERS) -#define FOUND (0x800) -/* #define NFLAG (0x7FF) Just for reference */ +#define FOUND (0x0F) #ifdef HAVE_OPENCL extern void precalc_hash(dev_blk_ctx *blk, uint32_t *state, uint32_t *data); diff --git a/mkinstalldirs b/mkinstalldirs deleted file mode 100755 index 55d537f8..00000000 --- a/mkinstalldirs +++ /dev/null @@ -1,162 +0,0 @@ -#! /bin/sh -# mkinstalldirs --- make directory hierarchy - -scriptversion=2009-04-28.21; # UTC - -# Original author: Noah Friedman -# Created: 1993-05-16 -# Public domain. -# -# This file is maintained in Automake, please report -# bugs to or send patches to -# . - -nl=' -' -IFS=" "" $nl" -errstatus=0 -dirmode= - -usage="\ -Usage: mkinstalldirs [-h] [--help] [--version] [-m MODE] DIR ... - -Create each directory DIR (with mode MODE, if specified), including all -leading file name components. - -Report bugs to ." - -# process command line arguments -while test $# -gt 0 ; do - case $1 in - -h | --help | --h*) # -h for help - echo "$usage" - exit $? - ;; - -m) # -m PERM arg - shift - test $# -eq 0 && { echo "$usage" 1>&2; exit 1; } - dirmode=$1 - shift - ;; - --version) - echo "$0 $scriptversion" - exit $? - ;; - --) # stop option processing - shift - break - ;; - -*) # unknown option - echo "$usage" 1>&2 - exit 1 - ;; - *) # first non-opt arg - break - ;; - esac -done - -for file -do - if test -d "$file"; then - shift - else - break - fi -done - -case $# in - 0) exit 0 ;; -esac - -# Solaris 8's mkdir -p isn't thread-safe. If you mkdir -p a/b and -# mkdir -p a/c at the same time, both will detect that a is missing, -# one will create a, then the other will try to create a and die with -# a "File exists" error. This is a problem when calling mkinstalldirs -# from a parallel make. We use --version in the probe to restrict -# ourselves to GNU mkdir, which is thread-safe. -case $dirmode in - '') - if mkdir -p --version . >/dev/null 2>&1 && test ! -d ./--version; then - echo "mkdir -p -- $*" - exec mkdir -p -- "$@" - else - # On NextStep and OpenStep, the 'mkdir' command does not - # recognize any option. It will interpret all options as - # directories to create, and then abort because '.' already - # exists. - test -d ./-p && rmdir ./-p - test -d ./--version && rmdir ./--version - fi - ;; - *) - if mkdir -m "$dirmode" -p --version . >/dev/null 2>&1 && - test ! -d ./--version; then - echo "mkdir -m $dirmode -p -- $*" - exec mkdir -m "$dirmode" -p -- "$@" - else - # Clean up after NextStep and OpenStep mkdir. - for d in ./-m ./-p ./--version "./$dirmode"; - do - test -d $d && rmdir $d - done - fi - ;; -esac - -for file -do - case $file in - /*) pathcomp=/ ;; - *) pathcomp= ;; - esac - oIFS=$IFS - IFS=/ - set fnord $file - shift - IFS=$oIFS - - for d - do - test "x$d" = x && continue - - pathcomp=$pathcomp$d - case $pathcomp in - -*) pathcomp=./$pathcomp ;; - esac - - if test ! -d "$pathcomp"; then - echo "mkdir $pathcomp" - - mkdir "$pathcomp" || lasterr=$? - - if test ! -d "$pathcomp"; then - errstatus=$lasterr - else - if test ! -z "$dirmode"; then - echo "chmod $dirmode $pathcomp" - lasterr= - chmod "$dirmode" "$pathcomp" || lasterr=$? - - if test ! -z "$lasterr"; then - errstatus=$lasterr - fi - fi - fi - fi - - pathcomp=$pathcomp/ - done -done - -exit $errstatus - -# Local Variables: -# mode: shell-script -# sh-indentation: 2 -# eval: (add-hook 'write-file-hooks 'time-stamp) -# time-stamp-start: "scriptversion=" -# time-stamp-format: "%:y-%02m-%02d.%02H" -# time-stamp-time-zone: "UTC" -# time-stamp-end: "; # UTC" -# End: diff --git a/phatk120724.cl b/phatk120724.cl index 0f604436..cf5eb09c 100644 --- a/phatk120724.cl +++ b/phatk120724.cl @@ -164,7 +164,7 @@ void search( const uint state0, const uint state1, const uint state2, const uint const uint PreW18, const uint PreW19, const uint PreW31, const uint PreW32, - __global uint * output) + volatile __global uint * output) { @@ -387,31 +387,48 @@ void search( const uint state0, const uint state1, const uint state2, const uint W[117] += W[108] + Vals[3] + Vals[7] + P2(124) + P1(124) + Ch((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64),Vals[1],Vals[2]) - (-(K[60] + H[7]) - S1((Vals[0] + Vals[4]) + (K[59] + W(59+64)) + s1(64+59)+ ch(59+64))); -#define FOUND (0x800) -#define NFLAG (0x7FF) +#define FOUND (0x0F) #ifdef VECTORS4 bool result = W[117].x & W[117].y & W[117].z & W[117].w; if (!result) { - if (!W[117].x) - output[FOUND] = output[NFLAG & W[3].x] = W[3].x; - if (!W[117].y) - output[FOUND] = output[NFLAG & W[3].y] = W[3].y; - if (!W[117].z) - output[FOUND] = output[NFLAG & W[3].z] = W[3].z; - if (!W[117].w) - output[FOUND] = output[NFLAG & W[3].w] = W[3].w; + uint found; + + if (!W[117].x) { + found = atomic_add(&output[FOUND], 1); + output[found] = W[3].x; + } + if (!W[117].y) { + found = atomic_add(&output[FOUND], 1); + output[found] = W[3].y; + } + if (!W[117].z) { + found = atomic_add(&output[FOUND], 1); + output[found] = W[3].z; + } + if (!W[117].w) { + found = atomic_add(&output[FOUND], 1); + output[found] = W[3].w; + } } #elif defined VECTORS2 bool result = W[117].x & W[117].y; if (!result) { - if (!W[117].x) - output[FOUND] = output[NFLAG & W[3].x] = W[3].x; - if (!W[117].y) - output[FOUND] = output[NFLAG & W[3].y] = W[3].y; + uint found; + + if (!W[117].x) { + found = atomic_add(&output[FOUND], 1); + output[found] = W[3].x; + } + if (!W[117].y) { + found = atomic_add(&output[FOUND], 1); + output[found] = W[3].y; + } } #else - if (!W[117]) - output[FOUND] = output[NFLAG & W[3]] = W[3]; + if (!W[117]) { + uint found = atomic_add(&output[FOUND], 1); + output[found] = W[3]; + } #endif } diff --git a/poclbm120724.cl b/poclbm120724.cl index 3e8b9943..a02413bb 100644 --- a/poclbm120724.cl +++ b/poclbm120724.cl @@ -80,7 +80,7 @@ void search(const uint state0, const uint state1, const uint state2, const uint const uint D1A, const uint C1addK5, const uint B1addK6, const uint W16addK16, const uint W17addK17, const uint PreVal4addT1, const uint Preval0, - __global uint * output) + volatile __global uint * output) { u Vals[24]; u *W = &Vals[8]; @@ -1311,43 +1311,46 @@ Vals[1]+=(rotr(W[9],17)^rotr(W[9],19)^(W[9]>>10U)); Vals[1]+=K[59]; Vals[1]+=Vals[5]; -#define FOUND (0x800) -#define NFLAG (0x7FF) +Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); +Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); +Vals[2]+=W[12]; +Vals[2]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U)); +Vals[2]+=W[5]; +Vals[2]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U)); +Vals[2]+=Vals[0]; +Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); +Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); + +#define FOUND (0x0F) #if defined(VECTORS2) || defined(VECTORS4) - Vals[2]+=Ma(Vals[6],Vals[5],Vals[7]); - Vals[2]+=(rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22)); - Vals[2]+=W[12]; - Vals[2]+=(rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U)); - Vals[2]+=W[5]; - Vals[2]+=(rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U)); - Vals[2]+=Vals[0]; - Vals[2]+=(rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25)); - Vals[2]+=ch(Vals[1],Vals[4],Vals[3]); if (any(Vals[2] == 0x136032edU)) { - if (Vals[2].x == 0x136032edU) - output[FOUND] = output[NFLAG & nonce.x] = nonce.x; - if (Vals[2].y == 0x136032edU) - output[FOUND] = output[NFLAG & nonce.y] = nonce.y; + uint found; + + if (Vals[2].x == 0x136032edU) { + found = atomic_add(&output[FOUND], 1); + output[found] = nonce.x; + } + if (Vals[2].y == 0x136032edU) { + found = atomic_add(&output[FOUND], 1); + output[found] = nonce.y; + } #if defined(VECTORS4) - if (Vals[2].z == 0x136032edU) - output[FOUND] = output[NFLAG & nonce.z] = nonce.z; - if (Vals[2].w == 0x136032edU) - output[FOUND] = output[NFLAG & nonce.w] = nonce.w; + if (Vals[2].z == 0x136032edU) { + found = atomic_add(&output[FOUND], 1); + output[found] = nonce.z; + } + if (Vals[2].w == 0x136032edU) { + found = atomic_add(&output[FOUND], 1); + output[found] = nonce.w; + } #endif } #else - if ((Vals[2]+ - Ma(Vals[6],Vals[5],Vals[7])+ - (rotr(Vals[5],2)^rotr(Vals[5],13)^rotr(Vals[5],22))+ - W[12]+ - (rotr(W[13],7)^rotr(W[13],18)^(W[13]>>3U))+ - W[5]+ - (rotr(W[10],17)^rotr(W[10],19)^(W[10]>>10U))+ - Vals[0]+ - (rotr(Vals[1],6)^rotr(Vals[1],11)^rotr(Vals[1],25))+ - ch(Vals[1],Vals[4],Vals[3])) == 0x136032edU) - output[FOUND] = output[NFLAG & nonce] = nonce; + if (Vals[2] == 0x136032edU) { + uint found = atomic_add(&output[FOUND], 1); + output[found] = nonce; + } #endif } diff --git a/scrypt120724.cl b/scrypt120724.cl index d38f6a54..7390d2cd 100644 --- a/scrypt120724.cl +++ b/scrypt120724.cl @@ -682,12 +682,11 @@ void scrypt_core(uint4 X[8], __global uint4*restrict lookup) unshittify(X); } -#define FOUND (0x800) -#define NFLAG (0x7FF) +#define FOUND (0x0F) __attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) __kernel void search(__global const uint4 * restrict input, -__global uint*restrict output, __global uint4*restrict padcache, +volatile __global uint*restrict output, __global uint4*restrict padcache, const uint4 midstate0, const uint4 midstate16, const uint target) { uint gid = get_global_id(0); @@ -721,9 +720,11 @@ const uint4 midstate0, const uint4 midstate16, const uint target) SHA256_fixed(&tmp0,&tmp1); SHA256(&ostate0,&ostate1, tmp0, tmp1, (uint4)(0x80000000U, 0U, 0U, 0U), (uint4)(0U, 0U, 0U, 0x300U)); - bool found = (EndianSwap(ostate1.w) <= target); - if (found) - output[FOUND] = output[NFLAG & gid] = gid; + bool result = (EndianSwap(ostate1.w) <= target); + if (result) { + uint found = atomic_add(&output[FOUND], 1); + output[found] = gid; + } } /*- From b74b54d95b9ba916dff52a87076279cfbfd9028a Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Thu, 23 Aug 2012 11:09:09 +1000 Subject: [PATCH 5/7] Check we haven't staged work while waiting for a curl entry before proceeding. --- cgminer.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/cgminer.c b/cgminer.c index 9162e6ac..ef18bdb3 100644 --- a/cgminer.c +++ b/cgminer.c @@ -2421,13 +2421,18 @@ retry: lagging = true; pool = ret_work->pool = select_pool(lagging); + inc_queued(); + if (!ce) ce = pop_curl_entry(pool); - /* Inc queued count after ce is popped in case there're none - * left and we think we've queued work when we're just waiting - * for curls */ - inc_queued(); + /* Check that we haven't staged work via other threads while + * waiting for a curl entry */ + if (total_staged() >= maxq) { + dec_queued(); + free_work(ret_work); + goto out; + } /* obtain new work from bitcoin via JSON-RPC */ if (!get_upstream_work(ret_work, ce->curl)) { From f7f55e777d43f2421b1e8cae79a1d14786fd0339 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Thu, 23 Aug 2012 11:19:52 +1000 Subject: [PATCH 6/7] Increase kernel versions signifying changed APIs. --- configure.ac | 10 +++++----- diablo120724.cl => diablo120823.cl | 0 diakgcn120724.cl => diakgcn120823.cl | 0 phatk120724.cl => phatk120823.cl | 0 poclbm120724.cl => poclbm120823.cl | 0 scrypt120724.cl => scrypt120823.cl | 0 6 files changed, 5 insertions(+), 5 deletions(-) rename diablo120724.cl => diablo120823.cl (100%) rename diakgcn120724.cl => diakgcn120823.cl (100%) rename phatk120724.cl => phatk120823.cl (100%) rename poclbm120724.cl => poclbm120823.cl (100%) rename scrypt120724.cl => scrypt120823.cl (100%) diff --git a/configure.ac b/configure.ac index c00800aa..7b1f0e23 100644 --- a/configure.ac +++ b/configure.ac @@ -389,11 +389,11 @@ fi AC_DEFINE_UNQUOTED([CGMINER_PREFIX], ["$prefix/bin"], [Path to cgminer install]) -AC_DEFINE_UNQUOTED([PHATK_KERNNAME], ["phatk120724"], [Filename for phatk kernel]) -AC_DEFINE_UNQUOTED([POCLBM_KERNNAME], ["poclbm120724"], [Filename for poclbm kernel]) -AC_DEFINE_UNQUOTED([DIAKGCN_KERNNAME], ["diakgcn120724"], [Filename for diakgcn kernel]) -AC_DEFINE_UNQUOTED([DIABLO_KERNNAME], ["diablo120724"], [Filename for diablo kernel]) -AC_DEFINE_UNQUOTED([SCRYPT_KERNNAME], ["scrypt120724"], [Filename for scrypt kernel]) +AC_DEFINE_UNQUOTED([PHATK_KERNNAME], ["phatk120823"], [Filename for phatk kernel]) +AC_DEFINE_UNQUOTED([POCLBM_KERNNAME], ["poclbm120823"], [Filename for poclbm kernel]) +AC_DEFINE_UNQUOTED([DIAKGCN_KERNNAME], ["diakgcn120823"], [Filename for diakgcn kernel]) +AC_DEFINE_UNQUOTED([DIABLO_KERNNAME], ["diablo120823"], [Filename for diablo kernel]) +AC_DEFINE_UNQUOTED([SCRYPT_KERNNAME], ["scrypt120823"], [Filename for scrypt kernel]) AC_SUBST(OPENCL_LIBS) diff --git a/diablo120724.cl b/diablo120823.cl similarity index 100% rename from diablo120724.cl rename to diablo120823.cl diff --git a/diakgcn120724.cl b/diakgcn120823.cl similarity index 100% rename from diakgcn120724.cl rename to diakgcn120823.cl diff --git a/phatk120724.cl b/phatk120823.cl similarity index 100% rename from phatk120724.cl rename to phatk120823.cl diff --git a/poclbm120724.cl b/poclbm120823.cl similarity index 100% rename from poclbm120724.cl rename to poclbm120823.cl diff --git a/scrypt120724.cl b/scrypt120823.cl similarity index 100% rename from scrypt120724.cl rename to scrypt120823.cl From 0feb679b67a5214a8eceff4e587659a7517db2f8 Mon Sep 17 00:00:00 2001 From: Con Kolivas Date: Thu, 23 Aug 2012 12:19:23 +1000 Subject: [PATCH 7/7] Only keep the last 6 blocks in the uthash database to keep memory usage constant. Storing more is unhelpful anyway. --- cgminer.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/cgminer.c b/cgminer.c index ef18bdb3..5b15e859 100644 --- a/cgminer.c +++ b/cgminer.c @@ -2858,6 +2858,20 @@ static void test_work_current(struct work *work) quit (1, "test_work_current OOM"); strcpy(s->hash, hexstr); wr_lock(&blk_lock); + /* Only keep the last 6 blocks in memory since work from blocks + * before this is virtually impossible and we want to prevent + * memory usage from continually rising */ + if (HASH_COUNT(blocks) > 5) { + struct block *blocka, *blockb; + int count = 0; + + HASH_ITER(hh, blocks, blocka, blockb) { + if (count++ < 6) + continue; + HASH_DEL(blocks, blocka); + free(blocka); + } + } HASH_ADD_STR(blocks, hash, s); wr_unlock(&blk_lock); set_curblock(hexstr, work->data);